use std::collections::HashSet;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Source {
Default,
File,
Env,
}
#[derive(Debug, Clone)]
pub struct ProvenanceRow {
pub dotted: String,
pub source: Source,
}
pub fn provenance_for(file_toml: &str) -> Vec<ProvenanceRow> {
provenance_for_with_env(file_toml, env_overrides())
}
pub fn provenance_for_with_env(
file_toml: &str,
env_table: &[(&'static str, &'static str)],
) -> Vec<ProvenanceRow> {
let v: toml::Value =
toml::from_str(file_toml).unwrap_or(toml::Value::Table(Default::default()));
let mut file_leaves: HashSet<String> = HashSet::new();
walk_leaves(&v, "", &mut file_leaves);
let env_leaves: HashSet<String> = env_table
.iter()
.filter(|(_, var)| std::env::var(var).map(|s| !s.is_empty()).unwrap_or(false))
.map(|(key, _)| (*key).to_string())
.collect();
let mut rows = Vec::new();
for dotted in known_leaves() {
let source = if env_leaves.contains(*dotted) {
Source::Env
} else if file_leaves.contains(*dotted) {
Source::File
} else {
Source::Default
};
rows.push(ProvenanceRow {
dotted: (*dotted).to_string(),
source,
});
}
rows
}
fn walk_leaves(v: &toml::Value, prefix: &str, out: &mut HashSet<String>) {
if let toml::Value::Table(t) = v {
for (k, child) in t {
let key = if prefix.is_empty() {
k.clone()
} else {
format!("{prefix}.{k}")
};
match child {
toml::Value::Table(_) => walk_leaves(child, &key, out),
_ => {
out.insert(key);
}
}
}
}
}
pub fn known_leaves() -> &'static [&'static str] {
&[
"fetch.user_agent",
"fetch.timeout_secs",
"ssrf.level",
"ssrf.project_root",
"cache.default_ttl",
"cache.min_ttl",
"cache.max_ttl",
"cache.override_no_store",
"cache.store_raw_html",
"robots.respect",
"robots.default_ttl",
"rate_limit.requests_per_minute_per_domain",
"rate_limit.per_domain_concurrency",
"rate_limit.global_concurrency",
"tokenizer.default",
"output.dir",
"summarization.default_backend",
"summarization.default_mode",
"summarization.default_style",
"summarization.fallback_to_extractive",
"summarization.tables.target_tokens",
"summarization.tables.focus",
"debug.har_path",
"debug.har_body_cap",
"debug.log_level",
]
}
pub fn env_overrides() -> &'static [(&'static str, &'static str)] {
&[
("debug.log_level", "ROVER_LOG_LEVEL"),
("output.dir", "ROVER_OUTPUT_DIR"),
]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn provenance_marks_defaults_vs_file() {
let toml = r#"
[ssrf]
level = "loopback"
"#;
let provenance = provenance_for(toml);
let level_row = provenance
.iter()
.find(|r| r.dotted == "ssrf.level")
.expect("ssrf.level present");
assert_eq!(level_row.source, Source::File);
let default_row = provenance
.iter()
.find(|r| r.dotted == "ssrf.project_root")
.expect("ssrf.project_root present");
assert_eq!(default_row.source, Source::Default);
}
#[test]
fn provenance_recognizes_env_override() {
let toml = "";
unsafe { std::env::set_var("ROVER_LOG_LEVEL_TEST_OVERRIDE_PROBE", "debug") };
let rows = provenance_for_with_env(
toml,
&[("debug.log_level", "ROVER_LOG_LEVEL_TEST_OVERRIDE_PROBE")],
);
let r = rows.iter().find(|r| r.dotted == "debug.log_level").unwrap();
assert_eq!(r.source, Source::Env);
unsafe { std::env::remove_var("ROVER_LOG_LEVEL_TEST_OVERRIDE_PROBE") };
}
}