extract metadata on images in a pdf

pdimg_meta(paths, ...)

Arguments

paths

(character) path to a pdf, required

...

additional params passed on to pdfimages. See pdimg_help() for docs

Value

data.frames of metadata on images in the pdf. if the path is not found or the path is found but no images are found, then a warning is thrown and a zero row data.frame is returned

Examples

# images found
x <- system.file("examples/BachmanEtal2020.pdf", package="pdfimager")
pdimg_meta(x)
#> [[1]]
#> # A tibble: 3 × 16
#>    page   num type  width height color  comp   bpc enc   interp object    ID
#>   <int> <int> <chr> <int>  <int> <chr> <int> <int> <chr> <chr>   <int> <int>
#> 1     5     0 image  1024    573 rgb       3     8 jpeg  yes       178     0
#> 2     8     1 image  1024   1001 rgb       3     8 jpeg  yes       146     0
#> 3    11     2 image  1024    988 rgb       3     8 jpeg  yes       110     0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#> 
z <- system.file("examples/Tierney2017JOSS.pdf", package="pdfimager")
pdimg_meta(z)
#> [[1]]
#> # A tibble: 6 × 16
#>    page   num type  width height color  comp   bpc enc   interp object    ID
#>   <int> <int> <chr> <int>  <int> <chr> <int> <int> <chr> <chr>   <int> <int>
#> 1     1     0 image   650    249 rgb       3     8 image no          6     0
#> 2     1     1 smask   650    249 gray      1     8 image no          6     0
#> 3     2     2 image   650    249 rgb       3     8 image no          6     0
#> 4     2     3 smask   650    249 gray      1     8 image no          6     0
#> 5     2     4 image   672    480 icc       3     8 image no         41     0
#> 6     2     5 smask   672    480 gray      1     8 image no         41     0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#> 

# many at once
pdimg_meta(c(x, z))
#> [[1]]
#> # A tibble: 3 × 16
#>    page   num type  width height color  comp   bpc enc   interp object    ID
#>   <int> <int> <chr> <int>  <int> <chr> <int> <int> <chr> <chr>   <int> <int>
#> 1     5     0 image  1024    573 rgb       3     8 jpeg  yes       178     0
#> 2     8     1 image  1024   1001 rgb       3     8 jpeg  yes       146     0
#> 3    11     2 image  1024    988 rgb       3     8 jpeg  yes       110     0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#> 
#> [[2]]
#> # A tibble: 6 × 16
#>    page   num type  width height color  comp   bpc enc   interp object    ID
#>   <int> <int> <chr> <int>  <int> <chr> <int> <int> <chr> <chr>   <int> <int>
#> 1     1     0 image   650    249 rgb       3     8 image no          6     0
#> 2     1     1 smask   650    249 gray      1     8 image no          6     0
#> 3     2     2 image   650    249 rgb       3     8 image no          6     0
#> 4     2     3 smask   650    249 gray      1     8 image no          6     0
#> 5     2     4 image   672    480 icc       3     8 image no         41     0
#> 6     2     5 smask   672    480 gray      1     8 image no         41     0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#> 

# no images found, but there are actually images 
d <- system.file("examples/LahtiEtal2017.pdf", package="pdfimager")
pdimg_meta(d)
#> Warning: no images found in pdf
#> [[1]]
#> # A tibble: 0 × 0
#> 

# no images found, and there really are no images
w <- system.file("examples/White2015.pdf", package="pdfimager")
pdimg_meta(w)
#> Warning: no images found in pdf
#> [[1]]
#> # A tibble: 0 × 0
#> 

# path not found
pdimg_meta("foo-bar")
#> Warning: path 'foo-bar' does not exist
#> [[1]]
#> # A tibble: 0 × 0
#> 

# only detects overlayed smaller images on plots, doesn't detect plots
g <- system.file("examples/vanGemert2018.pdf", package="pdfimager")
pdimg_meta(g)
#> [[1]]
#> # A tibble: 8 × 16
#>    page   num type  width height color  comp   bpc enc   interp object    ID
#>   <int> <int> <chr> <int>  <int> <chr> <int> <int> <chr> <chr>   <int> <int>
#> 1     1     0 image  1491    256 cmyk      4     8 jpeg  no        352     0
#> 2     3     1 image   121     53 sep       1     8 jpeg  no         84     0
#> 3     3     2 image   114     86 sep       1     8 jpeg  no         83     0
#> 4     3     3 image   108     39 sep       1     8 jpeg  no         82     0
#> 5     5     4 image    25    117 sep       1     8 image no        141     0
#> 6     5     5 image    17     34 sep       1     8 image no        140     0
#> 7     5     6 image    19     91 sep       1     8 image no        139     0
#> 8     5     7 image    18     69 sep       1     8 image no        138     0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#>