use crate::cache_key::FileHasher;
use std::path::{Component, Path, PathBuf};
const COLOCATED_NAME: &str = "kache.toml";
const OVER_BROAD_FILE_WARN: usize = 1000;
#[derive(serde::Deserialize, Default)]
#[serde(default, deny_unknown_fields)]
struct ColocatedConfig {
extra_inputs: Vec<String>,
}
pub(crate) fn digest(
source_file: Option<&Path>,
crate_name: &str,
is_primary: bool,
file_hasher: &FileHasher<'_>,
) -> Option<String> {
if !is_primary {
return None;
}
let crate_dir = crate_dir_from_source(source_file?)?;
let config_path = crate_dir.join(COLOCATED_NAME);
let raw = match std::fs::read(&config_path) {
Ok(bytes) => bytes,
Err(_) => return None,
};
let text = match std::str::from_utf8(&raw) {
Ok(t) => t,
Err(_) => return Some(unparseable_digest(crate_name, &config_path, &raw)),
};
let config: ColocatedConfig = match toml::from_str(text) {
Ok(c) => c,
Err(e) => {
tracing::warn!(
"[key:{crate_name}] {} is invalid ({e}); folding it as an opaque \
input so the crate rebuilds until fixed",
config_path.display()
);
return Some(unparseable_digest(crate_name, &config_path, &raw));
}
};
fold_extra_inputs(crate_name, &crate_dir, &config.extra_inputs, file_hasher)
}
pub(crate) fn apply_extra_inputs(
base: String,
source_file: Option<&Path>,
crate_name: &str,
is_primary: bool,
file_hasher: &FileHasher<'_>,
) -> String {
match digest(source_file, crate_name, is_primary, file_hasher) {
Some(d) => crate::cache_key::fold_labeled(base, "extra_inputs", &d),
None => base,
}
}
fn crate_dir_from_source(source_file: &Path) -> Option<PathBuf> {
let cwd = std::env::current_dir().ok();
let absolute = if source_file.is_absolute() {
source_file.to_path_buf()
} else {
cwd?.join(source_file)
};
let mut dir = absolute.parent();
while let Some(d) = dir {
if d.join("Cargo.toml").is_file() {
return Some(d.to_path_buf());
}
dir = d.parent();
}
None
}
fn fold_extra_inputs(
crate_name: &str,
crate_dir: &Path,
patterns: &[String],
file_hasher: &FileHasher<'_>,
) -> Option<String> {
if patterns.is_empty() {
return None;
}
let mut normalized: Vec<String> = patterns
.iter()
.filter_map(|p| normalize_pattern(crate_name, crate_dir, p))
.collect();
normalized.sort();
normalized.dedup();
if normalized.is_empty() {
tracing::warn!(
"[key:{crate_name}] every extra_inputs pattern was rejected; folding the raw \
declaration so the crate stays distinct from an unconfigured one"
);
let mut hasher = blake3::Hasher::new();
let mut raw: Vec<&String> = patterns.iter().collect();
raw.sort();
raw.dedup();
for p in raw {
hasher.update(b"extra_input_all_rejected:");
hasher.update(p.as_bytes());
hasher.update(b"\x1f");
}
return Some(hasher.finalize().to_hex().to_string());
}
let mut hasher = blake3::Hasher::new();
for pat in &normalized {
hasher.update(b"extra_input_pattern:");
hasher.update(pat.as_bytes());
hasher.update(b"\x1f");
}
let mut matched: Vec<PathBuf> = Vec::new();
let mut glob_errors: Vec<String> = Vec::new();
for pat in &normalized {
let full = if Path::new(pat).is_absolute() {
pat.clone()
} else {
format!(
"{}/{}",
glob::Pattern::escape(&crate_dir.to_string_lossy()),
pat
)
};
if walks_filesystem_root(&full) {
tracing::warn!(
"[key:{crate_name}] extra_inputs pattern {pat:?} walks from the filesystem \
root — this enumerates the entire filesystem on every compile; narrow it"
);
}
let entries = match glob::glob(&full) {
Ok(entries) => entries,
Err(e) => {
tracing::warn!("[key:{crate_name}] bad extra_inputs glob {pat:?}: {e}");
continue;
}
};
for entry in entries {
match entry {
Ok(p) if p.is_file() => matched.push(p),
Ok(_) => {}
Err(e) => {
let rel = crate_relative_path(crate_dir, e.path());
tracing::warn!(
"[key:{crate_name}] extra_inputs enumeration error at {rel:?}: {e}"
);
glob_errors.push(rel);
}
}
}
}
matched.sort();
matched.dedup();
if matched.len() > OVER_BROAD_FILE_WARN {
tracing::warn!(
"[key:{crate_name}] extra_inputs matched {} files — likely an over-broad glob; \
it busts the key on every change and walks a large tree each compile. Narrow it.",
matched.len()
);
}
let paths: Vec<&Path> = matched.iter().map(|p| p.as_path()).collect();
file_hasher.prefetch(&paths);
let mut readable: Vec<String> = Vec::new();
let mut unreadable: Vec<String> = Vec::new();
for path in &matched {
let rel = crate_relative_path(crate_dir, path);
match file_hasher.hash(path) {
Ok(h) => readable.push(format!("{rel}={h}")),
Err(e) => {
tracing::warn!("[key:{crate_name}] extra_input unreadable {rel:?}: {e}");
unreadable.push(rel);
}
}
}
readable.sort();
unreadable.sort();
glob_errors.sort();
glob_errors.dedup();
for entry in &readable {
hasher.update(b"extra_input:");
hasher.update(entry.as_bytes());
hasher.update(b"\x1f");
}
for u in &unreadable {
hasher.update(b"extra_input_unreadable:");
hasher.update(u.as_bytes());
hasher.update(b"\x1f");
}
for g in &glob_errors {
hasher.update(b"extra_input_glob_error:");
hasher.update(g.as_bytes());
hasher.update(b"\x1f");
}
if tracing::enabled!(tracing::Level::DEBUG) {
let total_bytes: u64 = matched
.iter()
.filter_map(|p| std::fs::metadata(p).ok().map(|m| m.len()))
.sum();
tracing::debug!(
"[key:{crate_name}] extra_inputs: {} pattern(s), {} file(s), {} unreadable, \
{} glob-error(s), {} bytes",
normalized.len(),
readable.len(),
unreadable.len(),
glob_errors.len(),
total_bytes
);
}
tracing::info!(
"[key:{crate_name}] extra_inputs: folded {} file(s) from {} pattern(s)",
readable.len(),
normalized.len()
);
Some(hasher.finalize().to_hex().to_string())
}
fn crate_relative_path(crate_dir: &Path, path: &Path) -> String {
let rel = path.strip_prefix(crate_dir).unwrap_or(path);
rel.components()
.map(|c| c.as_os_str().to_string_lossy())
.collect::<Vec<_>>()
.join("/")
}
fn normalize_pattern(crate_name: &str, crate_dir: &Path, pattern: &str) -> Option<String> {
let normalized = crate::config::expand_exclude_pattern(pattern);
if normalized.contains('\x1f') {
tracing::warn!(
"[key:{crate_name}] extra_inputs pattern {pattern:?} contains a control \
separator (\\x1f); skipping"
);
return None;
}
let as_path = Path::new(&normalized);
if as_path.is_absolute()
|| as_path
.components()
.any(|c| matches!(c, Component::ParentDir))
{
tracing::warn!(
"[key:{crate_name}] extra_inputs pattern {pattern:?} reaches outside the crate \
(absolute or `..`); folding it anyway, but this crate's key is now \
host-/layout-specific and won't share across machines or worktrees"
);
}
let trimmed = normalized.strip_suffix('/').unwrap_or(&normalized);
let reshaped = if crate_dir.join(trimmed).is_dir() {
format!("{}/**/*", glob::Pattern::escape(trimmed))
} else if normalized.ends_with('/') {
format!("{normalized}**/*")
} else {
normalized
};
Some(reshaped)
}
fn walks_filesystem_root(glob_pattern: &str) -> bool {
let literal_end = glob_pattern
.find(['*', '?', '['])
.unwrap_or(glob_pattern.len());
let Some(slash) = glob_pattern[..literal_end].rfind('/') else {
return false;
};
let base = Path::new(&glob_pattern[..=slash]);
base.is_absolute() && base.parent().is_none()
}
fn unparseable_digest(crate_name: &str, config_path: &Path, raw: &[u8]) -> String {
let mut hasher = blake3::Hasher::new();
hasher.update(b"extra_inputs_unparseable:");
hasher.update(raw);
tracing::debug!(
"[key:{crate_name}] folding {} as opaque (unparseable)",
config_path.display()
);
hasher.finalize().to_hex().to_string()
}
#[cfg(test)]
mod tests {
use super::*;
fn crate_fixture(files: &[(&str, &str)]) -> (tempfile::TempDir, PathBuf) {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(root.join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
std::fs::create_dir_all(root.join("src")).unwrap();
let src = root.join("src/lib.rs");
std::fs::write(&src, "// crate\n").unwrap();
for (rel, contents) in files {
let p = root.join(rel);
std::fs::create_dir_all(p.parent().unwrap()).unwrap();
std::fs::write(p, contents).unwrap();
}
(dir, src)
}
fn dig(src: &Path) -> Option<String> {
let fh = FileHasher::new();
digest(Some(src), "x", true, &fh)
}
#[test]
fn no_colocated_file_is_noop() {
let (_d, src) = crate_fixture(&[]);
assert_eq!(dig(&src), None);
}
#[test]
fn non_primary_is_noop() {
let (_d, src) = crate_fixture(&[
("kache.toml", "extra_inputs = [\".sqlx/**/*.json\"]"),
(".sqlx/q.json", "v1"),
]);
let fh = FileHasher::new();
assert_eq!(digest(Some(&src), "x", false, &fh), None);
}
#[test]
fn empty_list_is_noop() {
let (_d, src) = crate_fixture(&[("kache.toml", "extra_inputs = []")]);
assert_eq!(dig(&src), None);
}
#[test]
fn declared_input_change_rekeys() {
let (d, src) = crate_fixture(&[
("kache.toml", "extra_inputs = [\".sqlx/**/*.json\"]"),
(".sqlx/q.json", "v1"),
]);
let before = dig(&src).expect("declared input folds a digest");
std::fs::write(d.path().join(".sqlx/q.json"), "v2").unwrap();
let after = dig(&src).expect("still folds after edit");
assert_ne!(before, after);
assert_eq!(after, dig(&src).unwrap());
}
#[test]
fn zero_match_still_folds_pattern_set() {
let (_d, src) = crate_fixture(&[("kache.toml", "extra_inputs = [\".sqlx/**/*.json\"]")]);
let only_pattern = dig(&src).expect("pattern set folds even at zero matches");
let (_d2, src2) = crate_fixture(&[("kache.toml", "extra_inputs = [\"other/**/*.sql\"]")]);
let other_pattern = dig(&src2).unwrap();
assert_ne!(only_pattern, other_pattern);
}
#[test]
fn dir_shaped_patterns_are_equivalent() {
let (d, _src) = crate_fixture(&[(".sqlx/q.json", "v1")]);
let root = d.path();
let p1 = normalize_pattern("x", root, ".sqlx/").unwrap();
let p2 = normalize_pattern("x", root, ".sqlx").unwrap();
let p3 = normalize_pattern("x", root, ".sqlx/**/*").unwrap();
assert_eq!(p1, p3);
assert_eq!(p2, p3);
}
#[test]
fn out_of_crate_patterns_are_folded_not_rejected() {
let (d, _src) = crate_fixture(&[]);
let root = d.path();
assert!(normalize_pattern("x", root, "../shared/**").is_some());
assert!(normalize_pattern("x", root, "/etc/**").is_some());
assert!(normalize_pattern("x", root, ".sqlx/**/*.json").is_some());
assert!(normalize_pattern("x", root, "\u{1f}bad").is_none());
}
#[test]
fn absolute_external_input_folds_and_rekeys() {
let ext = tempfile::tempdir().unwrap();
let ext_file = ext.path().join("shared.json");
std::fs::write(&ext_file, "v1").unwrap();
let toml = format!("extra_inputs = [\"{}\"]", ext_file.display());
let (_d, src) = crate_fixture(&[("kache.toml", toml.as_str())]);
let before = dig(&src).expect("absolute external input folds");
std::fs::write(&ext_file, "v2").unwrap();
let after = dig(&src).expect("still folds after edit");
assert_ne!(
before, after,
"editing an external declared input must re-key"
);
}
#[test]
fn walks_filesystem_root_detects_root_globs() {
assert!(walks_filesystem_root("/**"));
assert!(walks_filesystem_root("/**/*.json"));
assert!(!walks_filesystem_root("/usr/**"));
assert!(!walks_filesystem_root("/home/me/proto/**/*.proto"));
assert!(!walks_filesystem_root("proto/**/*.proto")); }
#[test]
fn sibling_crate_without_file_is_unaffected() {
let (_d1, src1) = crate_fixture(&[
("kache.toml", "extra_inputs = [\".sqlx/**/*.json\"]"),
(".sqlx/q.json", "v1"),
]);
let (_d2, src2) = crate_fixture(&[(".sqlx/q.json", "v1")]);
assert!(dig(&src1).is_some());
assert_eq!(dig(&src2), None);
}
#[test]
fn relocation_is_stable() {
let files = &[
("kache.toml", "extra_inputs = [\".sqlx/**/*.json\"]"),
(".sqlx/q.json", "v1"),
];
let (_d1, src1) = crate_fixture(files);
let (_d2, src2) = crate_fixture(files);
assert_eq!(dig(&src1), dig(&src2));
assert!(dig(&src1).is_some());
}
#[test]
fn unparseable_file_folds_opaque_and_rekeys_on_edit() {
let (d, src) = crate_fixture(&[("kache.toml", "this is = not valid toml [[[")]);
let before = dig(&src).expect("broken config folds opaque, never silently ignored");
std::fs::write(d.path().join("kache.toml"), "still = broken ]]]").unwrap();
let after = dig(&src).unwrap();
assert_ne!(before, after);
}
#[test]
fn stray_key_is_rejected_as_unparseable() {
let (_d, src) =
crate_fixture(&[("kache.toml", "extra_inputs = []\nlocal_store = \"/tmp\"")]);
assert!(dig(&src).is_some());
}
#[test]
fn content_swap_between_matched_files_rekeys() {
let (d, src) = crate_fixture(&[
("kache.toml", "extra_inputs = [\"migrations/**/*.sql\"]"),
("migrations/0001_init.sql", "CREATE A;"),
("migrations/0002_add.sql", "CREATE B;"),
]);
let before = dig(&src).expect("two matched files fold a digest");
std::fs::write(d.path().join("migrations/0001_init.sql"), "CREATE B;").unwrap();
std::fs::write(d.path().join("migrations/0002_add.sql"), "CREATE A;").unwrap();
let after = dig(&src).expect("still folds after swap");
assert_ne!(
before, after,
"content swap between matched files must re-key (false-hit guard)"
);
}
#[test]
fn metachar_dir_name_still_enumerates() {
let (d, src) = crate_fixture(&[
("kache.toml", "extra_inputs = [\"gen[1]\"]"),
("gen[1]/data.bin", "v1"),
]);
let before = dig(&src).expect("metachar-named dir folds its files");
std::fs::write(d.path().join("gen[1]/data.bin"), "v2").unwrap();
let after = dig(&src).expect("still folds");
assert_ne!(
before, after,
"a file inside a metachar-named dir must re-key (false-hit guard)"
);
}
#[test]
fn all_rejected_patterns_fold_distinct_from_no_config_and_rekey() {
let (d, src) = crate_fixture(&[("kache.toml", "extra_inputs = [\"\\u001Fa\"]")]);
let folded = dig(&src).expect("all-rejected declaration still folds, never None");
let (_n, none_src) = crate_fixture(&[]);
assert!(
dig(&none_src).is_none(),
"no-config baseline is None (opt-out)"
);
std::fs::write(d.path().join("kache.toml"), "extra_inputs = [\"\\u001Fb\"]").unwrap();
let after = dig(&src).expect("still folds after edit");
assert_ne!(folded, after, "editing a rejected declaration must re-key");
}
#[test]
fn empty_list_stays_distinct_from_all_rejected() {
let (_e, empty) = crate_fixture(&[("kache.toml", "extra_inputs = []")]);
let (_r, rejected) = crate_fixture(&[("kache.toml", "extra_inputs = [\"\\u001Fx\"]")]);
assert_eq!(dig(&empty), None);
assert!(dig(&rejected).is_some());
}
#[test]
fn control_separator_in_pattern_is_rejected() {
let (d, _src) = crate_fixture(&[]);
let root = d.path();
assert!(normalize_pattern("x", root, "a\u{1f}b").is_none());
assert!(normalize_pattern("x", root, ".sqlx/**/*.json").is_some());
}
#[test]
fn non_utf8_config_folds_opaque_and_rekeys() {
let (d, src) = crate_fixture(&[]);
std::fs::write(d.path().join("kache.toml"), b"\xff\xfe extra_inputs").unwrap();
let before = dig(&src).expect("non-utf8 config folds opaque, never None");
std::fs::write(d.path().join("kache.toml"), b"\xff\xfe extra_input").unwrap();
let after = dig(&src).expect("still folds");
assert_ne!(before, after);
}
#[test]
fn invalid_glob_pattern_does_not_abort_other_patterns() {
let (d, src) = crate_fixture(&[
(
"kache.toml",
"extra_inputs = [\"a[b\", \".sqlx/**/*.json\"]",
),
(".sqlx/q.json", "v1"),
]);
let before = dig(&src).expect("valid pattern still folds despite a bad sibling");
std::fs::write(d.path().join(".sqlx/q.json"), "v2").unwrap();
let after = dig(&src).unwrap();
assert_ne!(before, after, "the valid pattern's file still re-keys");
}
#[test]
fn duplicate_pattern_folds_same_as_single() {
let (_d1, s1) = crate_fixture(&[
(
"kache.toml",
"extra_inputs = [\".sqlx/**/*\", \".sqlx/**/*\"]",
),
(".sqlx/q.json", "v1"),
]);
let (_d2, s2) = crate_fixture(&[
("kache.toml", "extra_inputs = [\".sqlx/**/*\"]"),
(".sqlx/q.json", "v1"),
]);
assert_eq!(dig(&s1), dig(&s2));
}
#[test]
fn overlapping_patterns_are_order_independent() {
let files: &[(&str, &str)] = &[
(
"kache.toml",
"extra_inputs = [\".sqlx/**/*\", \".sqlx/q.json\"]",
),
(".sqlx/q.json", "v1"),
];
let files_rev: &[(&str, &str)] = &[
(
"kache.toml",
"extra_inputs = [\".sqlx/q.json\", \".sqlx/**/*\"]",
),
(".sqlx/q.json", "v1"),
];
let (_d1, s1) = crate_fixture(files);
let (_d2, s2) = crate_fixture(files_rev);
assert_eq!(dig(&s1), dig(&s2));
}
#[test]
fn cc_style_c_source_folds_extra_inputs() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(root.join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
std::fs::write(root.join("kache.toml"), "extra_inputs = [\"include/*.h\"]").unwrap();
std::fs::create_dir_all(root.join("include")).unwrap();
std::fs::write(root.join("include/api.h"), "v1").unwrap();
let csrc = root.join("src/ffi.c");
std::fs::create_dir_all(csrc.parent().unwrap()).unwrap();
std::fs::write(&csrc, "/* c */\n").unwrap();
let fh = FileHasher::new();
let before = digest(Some(&csrc), "x", true, &fh).expect("C source folds extra inputs");
std::fs::write(root.join("include/api.h"), "v2").unwrap();
let fh2 = FileHasher::new();
let after = digest(Some(&csrc), "x", true, &fh2).unwrap();
assert_ne!(
before, after,
"editing a declared header must re-key the cc crate"
);
}
#[cfg(unix)]
#[test]
fn unreadable_file_folds_sentinel_distinct_from_absent() {
use std::os::unix::fs::PermissionsExt;
let (d, src) = crate_fixture(&[
("kache.toml", "extra_inputs = [\"data/**/*\"]"),
("data/secret.bin", "v1"),
]);
let readable = dig(&src).expect("folds the readable file");
let p = d.path().join("data/secret.bin");
std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o000)).unwrap();
if std::fs::read(&p).is_ok() {
return;
}
let unreadable = dig(&src).expect("unreadable file still folds a sentinel");
assert_ne!(readable, unreadable, "unreadable must differ from readable");
std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o644)).unwrap();
std::fs::remove_file(&p).unwrap();
let absent = dig(&src).expect("zero matches still folds the pattern set");
assert_ne!(
unreadable, absent,
"unreadable must not alias absent (false-hit guard)"
);
}
}