simple caching of HTTP requests/responses, hooking into webmockr (https://github.com/ropensci/webmockr) for the HTTP request matching
A midden is a debris pile constructed by a woodrat/pack rat (https://en.wikipedia.org/wiki/Pack_rat#Midden)
vcr
is meant really for testing, or script use. i don’t think it fits well into a use case where another pkg wants to cache responsesmemoise
seems close-ish but doesn’t fit needs, e.g., no expiry, not specific to HTTP requests, etc.webmockr
to match requests (works with crul
; soon httr
)2019-03-08 00:00:00
and it’s 2019-03-07 23:00:00
, then 1 hr from now the cache will expire, and a new real HTTP request will need to be made (i.e., the cache will be deleted whenever the next HTTP request is made)
remotes::install_github("sckott/webmiddens")
Let’s say you have some function http_request()
that does an HTTP request that you re-use in various parts of your project or package
http_request <- function(...) {
x <- crul::HttpClient$new("https://httpbin.org", opts = list(...))
x$get("get")
}
And you have a function some_fxn()
that uses http_request()
to do the HTTP request, then proces the results to a data.frame or list, etc. This is a super common pattern in a project or R package that deals with web resources.
some_fxn <- function(...) {
res <- http_request(...)
jsonlite::fromJSON(res$parse("UTF-8"))
}
Without webmiddens
the HTTP request happens as usual and all is good
some_fxn()
#> $args
#> named list()
#>
#> $headers
#> $headers$Accept
#> [1] "application/json, text/xml, application/xml, */*"
#>
#> $headers$`Accept-Encoding`
#> [1] "gzip, deflate"
#>
#> $headers$Host
#> [1] "httpbin.org"
#>
#> $headers$`User-Agent`
#> [1] "libcurl/7.74.0 r-curl/4.3 crul/1.0.2.92"
#>
#> $headers$`X-Amzn-Trace-Id`
#> [1] "Root=1-5fd29de8-0e978093689e02246d0b3d92"
#>
#>
#> $origin
#> [1] "24.21.229.59"
#>
#> $url
#> [1] "https://httpbin.org/get"
Now, with webmiddens
run wm_configuration()
first to set the path where HTTP requests will be cached
wm_configuration("foo1")
#> configuring midden from $path
first request is a real HTTP request
res1 <- use_midden(some_fxn())
res1
#> $args
#> named list()
#>
#> $headers
#> $headers$Accept
#> [1] "application/json, text/xml, application/xml, */*"
#>
#> $headers$`Accept-Encoding`
#> [1] "gzip, deflate"
#>
#> $headers$Host
#> [1] "httpbin.org"
#>
#> $headers$`User-Agent`
#> [1] "libcurl/7.74.0 r-curl/4.3 crul/1.0.2.92"
#>
#> $headers$`X-Amzn-Trace-Id`
#> [1] "Root=1-5fd29de8-3ad69a2f59e45afc48446e85"
#>
#>
#> $origin
#> [1] "24.21.229.59"
#>
#> $url
#> [1] "https://httpbin.org/get"
second request uses the cached response from the first request
res2 <- use_midden(some_fxn())
res2
#> $args
#> named list()
#>
#> $headers
#> $headers$Accept
#> [1] "application/json, text/xml, application/xml, */*"
#>
#> $headers$`Accept-Encoding`
#> [1] "gzip, deflate"
#>
#> $headers$Host
#> [1] "httpbin.org"
#>
#> $headers$`User-Agent`
#> [1] "libcurl/7.74.0 r-curl/4.3 crul/1.0.2.92"
#>
#> $headers$`X-Amzn-Trace-Id`
#> [1] "Root=1-5fd29de8-65506d0055d8b5c874949851"
#>
#>
#> $origin
#> [1] "24.21.229.59"
#>
#> $url
#> [1] "https://httpbin.org/get"
x <- midden$new()
x # no path
#> <midden>
#> path:
#> expiry (sec): not set
# Run $init() to set the path
x$init(path = "forest")
x
#> <midden>
#> path: /Users/sckott/Library/Caches/R/forest
#> expiry (sec): not set
The cache
slot has a hoardr
object which you can use to fiddle with files, see ?hoardr::hoard
x$cache
#> <hoard>
#> path: forest
#> cache path: /Users/sckott/Library/Caches/R/forest
Use expire()
to set the expire time (in seconds). You can set it through passing to expire()
or through the environment variable WEBMIDDENS_EXPIRY_SEC
x$expire()
#> NULL
x$expire(5)
#> [1] 5
x$expire()
#> [1] 5
x$expire(reset = TRUE)
#> NULL
x$expire()
#> NULL
Sys.setenv(WEBMIDDENS_EXPIRY_SEC = 35)
x$expire()
#> [1] 35
x$expire(reset = TRUE)
#> NULL
x$expire()
#> NULL
FIXME: The below not working right now - figure out why
wm_enable()
con <- crul::HttpClient$new("https://httpbin.org")
# first request is a real HTTP request
x$r(con$get("get", query = list(stuff = "bananas")))
# following requests use the cached response
x$r(con$get("get", query = list(stuff = "bananas")))
verbose output
x <- midden$new(verbose = TRUE)
x$init(path = "rainforest")
x$r(con$get("get", query = list(stuff = "bananas")))
set expiration time
x <- midden$new()
x$init(path = "grass")
x$expire(3)
x
Delete all the files in your “midden” (the folder with cached files)
x$cleanup()
Delete the “midden” (the folder with cached files)
x$destroy()
webmiddens
in R doing citation(package = 'webmiddens')