Skip to content

Commit aee956a

Browse files
frenkiboyclaude
andcommitted
Add AST path spec detection for automatic file tracking
- B3: Scan function body AST for calls to list.files(), readRDS(), read.csv(), readLines(), and other file-reading functions - Extract string literal path arguments and hash their contents - Include path_specs_hash in cache key so cache invalidates when files referenced in function source code change - Add "AST-detected path contents" to verbose miss reasons Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent cc6c7b2 commit aee956a

1 file changed

Lines changed: 48 additions & 2 deletions

File tree

R/cacheFile.R

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,35 @@ track_file <- function(path, cache_dir = NULL) {
631631
vapply(pkgs, function(p) tryCatch(as.character(utils::packageVersion(p)), error = function(e) "NA"), character(1))
632632
}
633633

634+
#' Scan function AST for path-like function calls
635+
#' @keywords internal
636+
.find_path_specs <- function(expr) {
637+
target_fns <- c("list.files", "list.dirs", "readRDS", "read.csv",
638+
"read.table", "readLines", "scan", "file", "read_csv",
639+
"read_tsv", "read_delim", "fread", "read.delim",
640+
"source", "load")
641+
literals <- character()
642+
symbols <- character()
643+
walker <- function(e) {
644+
if (is.call(e)) {
645+
fn_name <- tryCatch(as.character(e[[1]]), error = function(z) "")
646+
# handle namespaced calls like readr::read_csv
647+
if (length(fn_name) > 1) fn_name <- fn_name[length(fn_name)]
648+
if (length(fn_name) == 1 && fn_name %in% target_fns && length(e) >= 2) {
649+
arg <- e[[2]]
650+
if (is.character(arg)) {
651+
literals <<- c(literals, arg)
652+
} else if (is.symbol(arg)) {
653+
symbols <<- c(symbols, as.character(arg))
654+
}
655+
}
656+
lapply(e, walker)
657+
}
658+
}
659+
walker(expr)
660+
list(literals = unique(literals), symbols = unique(symbols))
661+
}
662+
634663
#' Robust Path Hashing
635664
#' @keywords internal
636665
.get_path_hash <- function(path, file_pattern = NULL, algo = "xxhash64") {
@@ -810,7 +839,8 @@ cacheFile <- function(cache_dir = NULL,
810839
# Sync Graph from disk on init
811840
cacheTree_sync(cache_dir)
812841
ast_deps <- .scan_ast_deps(body(f))
813-
842+
ast_path_specs <- .find_path_specs(body(f))
843+
814844
# The actual wrapped function
815845
wrapper <- function(..., .load = TRUE, .force = FALSE, .skip_save = FALSE) {
816846
# --- 1. CAPTURE & CANONICALIZE ARGUMENTS ---
@@ -881,6 +911,20 @@ cacheFile <- function(cache_dir = NULL,
881911
explicit_vars_hash <- digest::digest(depends_on_vars, algo = algo)
882912
}
883913

914+
# Hash literal paths detected by AST scan (e.g., list.files("/data"))
915+
path_specs_hash <- NULL
916+
if (length(ast_path_specs$literals) > 0) {
917+
spec_hashes <- vapply(ast_path_specs$literals, function(p) {
918+
if (file.exists(p) || dir.exists(p)) {
919+
.get_path_hash(p, file_pattern = file_pattern, algo = algo)
920+
} else {
921+
""
922+
}
923+
}, character(1L))
924+
path_specs_hash <- spec_hashes[nzchar(spec_hashes)]
925+
if (length(path_specs_hash) == 0) path_specs_hash <- NULL
926+
}
927+
884928
hashlist <- list(
885929
input_hash = input_hash,
886930
env_hash = env_hash,
@@ -891,7 +935,8 @@ cacheFile <- function(cache_dir = NULL,
891935
dir_states = dir_states_key,
892936
version = version,
893937
explicit_deps = explicit_deps_hash,
894-
explicit_vars = explicit_vars_hash
938+
explicit_vars = explicit_vars_hash,
939+
path_specs = path_specs_hash
895940
)
896941

897942
master_key <- digest::digest(hashlist, algo = algo)
@@ -966,6 +1011,7 @@ cacheFile <- function(cache_dir = NULL,
9661011
if (!identical(sm$version, version)) changes <- c(changes, "version")
9671012
if (!identical(sm$explicit_deps, explicit_deps_hash)) changes <- c(changes, "explicit file dependencies")
9681013
if (!identical(sm$explicit_vars, explicit_vars_hash)) changes <- c(changes, "explicit variable dependencies")
1014+
if (!identical(sm$path_specs, path_specs_hash)) changes <- c(changes, "AST-detected path contents")
9691015
if (length(changes) == 0) changes <- "unknown (possibly new argument combination)"
9701016
message(sprintf("cacheR: miss for %s() -- changed: %s", fname, paste(changes, collapse = ", ")))
9711017
} else {

0 commit comments

Comments
 (0)