extract metadata on images in a pdf
pdimg_meta(paths, ...)
(character) path to a pdf, required
additional params passed on to pdfimages
. See
pdimg_help()
for docs
data.frames of metadata on images in the pdf. if the path is not found or the path is found but no images are found, then a warning is thrown and a zero row data.frame is returned
# images found
x <- system.file("examples/BachmanEtal2020.pdf", package="pdfimager")
pdimg_meta(x)
#> [[1]]
#> # A tibble: 3 × 16
#> page num type width height color comp bpc enc interp object ID
#> <int> <int> <chr> <int> <int> <chr> <int> <int> <chr> <chr> <int> <int>
#> 1 5 0 image 1024 573 rgb 3 8 jpeg yes 178 0
#> 2 8 1 image 1024 1001 rgb 3 8 jpeg yes 146 0
#> 3 11 2 image 1024 988 rgb 3 8 jpeg yes 110 0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#>
z <- system.file("examples/Tierney2017JOSS.pdf", package="pdfimager")
pdimg_meta(z)
#> [[1]]
#> # A tibble: 6 × 16
#> page num type width height color comp bpc enc interp object ID
#> <int> <int> <chr> <int> <int> <chr> <int> <int> <chr> <chr> <int> <int>
#> 1 1 0 image 650 249 rgb 3 8 image no 6 0
#> 2 1 1 smask 650 249 gray 1 8 image no 6 0
#> 3 2 2 image 650 249 rgb 3 8 image no 6 0
#> 4 2 3 smask 650 249 gray 1 8 image no 6 0
#> 5 2 4 image 672 480 icc 3 8 image no 41 0
#> 6 2 5 smask 672 480 gray 1 8 image no 41 0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#>
# many at once
pdimg_meta(c(x, z))
#> [[1]]
#> # A tibble: 3 × 16
#> page num type width height color comp bpc enc interp object ID
#> <int> <int> <chr> <int> <int> <chr> <int> <int> <chr> <chr> <int> <int>
#> 1 5 0 image 1024 573 rgb 3 8 jpeg yes 178 0
#> 2 8 1 image 1024 1001 rgb 3 8 jpeg yes 146 0
#> 3 11 2 image 1024 988 rgb 3 8 jpeg yes 110 0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#>
#> [[2]]
#> # A tibble: 6 × 16
#> page num type width height color comp bpc enc interp object ID
#> <int> <int> <chr> <int> <int> <chr> <int> <int> <chr> <chr> <int> <int>
#> 1 1 0 image 650 249 rgb 3 8 image no 6 0
#> 2 1 1 smask 650 249 gray 1 8 image no 6 0
#> 3 2 2 image 650 249 rgb 3 8 image no 6 0
#> 4 2 3 smask 650 249 gray 1 8 image no 6 0
#> 5 2 4 image 672 480 icc 3 8 image no 41 0
#> 6 2 5 smask 672 480 gray 1 8 image no 41 0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#>
# no images found, but there are actually images
d <- system.file("examples/LahtiEtal2017.pdf", package="pdfimager")
pdimg_meta(d)
#> Warning: no images found in pdf
#> [[1]]
#> # A tibble: 0 × 0
#>
# no images found, and there really are no images
w <- system.file("examples/White2015.pdf", package="pdfimager")
pdimg_meta(w)
#> Warning: no images found in pdf
#> [[1]]
#> # A tibble: 0 × 0
#>
# path not found
pdimg_meta("foo-bar")
#> Warning: path 'foo-bar' does not exist
#> [[1]]
#> # A tibble: 0 × 0
#>
# only detects overlayed smaller images on plots, doesn't detect plots
g <- system.file("examples/vanGemert2018.pdf", package="pdfimager")
pdimg_meta(g)
#> [[1]]
#> # A tibble: 8 × 16
#> page num type width height color comp bpc enc interp object ID
#> <int> <int> <chr> <int> <int> <chr> <int> <int> <chr> <chr> <int> <int>
#> 1 1 0 image 1491 256 cmyk 4 8 jpeg no 352 0
#> 2 3 1 image 121 53 sep 1 8 jpeg no 84 0
#> 3 3 2 image 114 86 sep 1 8 jpeg no 83 0
#> 4 3 3 image 108 39 sep 1 8 jpeg no 82 0
#> 5 5 4 image 25 117 sep 1 8 image no 141 0
#> 6 5 5 image 17 34 sep 1 8 image no 140 0
#> 7 5 6 image 19 91 sep 1 8 image no 139 0
#> 8 5 7 image 18 69 sep 1 8 image no 138 0
#> # ℹ 4 more variables: `x-ppi` <int>, `y-ppi` <int>, size <chr>, ratio <chr>
#>