use std::fs;
use std::path::Path;
use rag_rat_core::{Config, IndexDatabase};
use crate::cli::{OracleArgs, OracleCommand, OracleReportArgs, OracleRunArgs, OracleStatusArgs};
use crate::{open_index, print_output};
pub(crate) fn oracle(config: &Config, args: &OracleArgs) -> anyhow::Result<()> {
match &args.command {
OracleCommand::Run(run_args) => oracle_run(config, run_args),
OracleCommand::Status(status_args) => {
let db = open_index(config)?;
oracle_status(&db, status_args)
},
OracleCommand::Report(report_args) => oracle_report(config, report_args),
}
}
pub(crate) fn with_oracle_write_lock<T>(
config: &Config,
body: impl FnOnce(&IndexDatabase) -> anyhow::Result<T>,
) -> anyhow::Result<T> {
let _lock = rag_rat_core::locks::WriteLock::acquire_blocking(&config.database)?;
let db = open_index(config)?;
body(&db)
}
fn oracle_run(config: &Config, args: &OracleRunArgs) -> anyhow::Result<()> {
let tool = args.tool.core();
if let Some(scip_path) = &args.scip {
let scip_bytes = fs::read(scip_path).map_err(|err| {
anyhow::anyhow!("failed to read SCIP index {}: {err}", scip_path.display())
})?;
let tool_version = format!(
"scip-file:{}@{}",
scip_path.file_name().and_then(|n| n.to_str()).unwrap_or("index.scip"),
rag_rat_core::index::oracle::scip_content_fingerprint(&scip_bytes),
);
let report = with_oracle_write_lock(config, |db| {
db.run_oracle_from_scip(tool, &tool_version, &scip_bytes)
})?;
return print_output(&serde_json::json!({
"outcome": "completed",
"tool": tool.as_db_str(),
"tool_version": tool_version,
"report": report,
}));
}
if let rag_rat_core::index::oracle::ToolAvailability::Blocked { tool, program, hint } =
rag_rat_core::index::oracle::probe_oracle_tool(tool)
{
eprintln!("oracle: {hint}");
return print_output(&rag_rat_core::index::oracle::OracleRunOutcome::Blocked {
tool,
program,
hint,
});
}
let (started_at_ms, pre_spawn_sha) = with_oracle_write_lock(config, |db| {
Ok((crate::now_epoch_ms(), db.oracle_pre_spawn_snapshot()?))
})?;
let scip_output = config
.database
.parent()
.map(Path::to_path_buf)
.unwrap_or_else(std::env::temp_dir)
.join(format!("rag-rat-oracle-{}.scip", std::process::id()));
let production =
rag_rat_core::index::oracle::produce_scip_with_tool(tool, &config.root, &scip_output);
let _ = fs::remove_file(&scip_output);
match production? {
rag_rat_core::index::oracle::ScipProduction::Blocked { tool, program, hint } => {
eprintln!("oracle: {hint}");
print_output(&rag_rat_core::index::oracle::OracleRunOutcome::Blocked {
tool,
program,
hint,
})
},
rag_rat_core::index::oracle::ScipProduction::Produced {
version,
bytes,
production_sha,
} => {
let report = with_oracle_write_lock(config, |db| {
db.run_oracle_at(
tool,
&version,
&bytes,
rag_rat_core::index::OracleShaSnapshots {
production: Some(&production_sha),
pre_spawn: Some(&pre_spawn_sha),
},
started_at_ms,
)
})?;
print_output(&serde_json::json!({
"outcome": "completed",
"tool": tool.as_db_str(),
"tool_version": version,
"report": report,
}))
},
}
}
fn oracle_status(db: &IndexDatabase, args: &OracleStatusArgs) -> anyhow::Result<()> {
let tools: Vec<rag_rat_core::index::oracle::OracleTool> = match args.tool {
Some(tool) => vec![tool.core()],
None => rag_rat_core::index::oracle::OracleTool::ALL.to_vec(),
};
let mut entries = Vec::with_capacity(tools.len());
for tool in tools {
let availability = db.probe_oracle_tool(tool);
let status = match db.latest_oracle_run_version(tool)? {
Some(version) => Some(db.oracle_status(tool, &version)?),
None => None,
};
entries.push(serde_json::json!({
"tool": tool.as_db_str(),
"tool_available": availability,
"verdicts": status,
}));
}
print_output(&entries)
}
fn oracle_report(config: &Config, args: &OracleReportArgs) -> anyhow::Result<()> {
use rag_rat_core::index::oracle;
let corpora_path = args
.corpora
.clone()
.unwrap_or_else(|| config.root.join("tools").join("oracle-corpora.toml"));
let toml_str = fs::read_to_string(&corpora_path).map_err(|err| {
anyhow::anyhow!("failed to read corpora file {}: {err}", corpora_path.display())
})?;
let corpora = oracle::load_corpora(&toml_str)?;
let profile = oracle::corpus_by_id(&corpora, &args.corpus)
.ok_or_else(|| {
anyhow::anyhow!("no corpus `{}` in {}", args.corpus, corpora_path.display())
})?
.clone();
let tool = oracle::OracleTool::from_db_str(&profile.tool).ok_or_else(|| {
anyhow::anyhow!(
"corpus `{}` names unknown oracle tool `{}`",
profile.corpus_id,
profile.tool
)
})?;
ensure_checkout_matches_corpus(config, &profile)?;
let rag_rat_commit = rag_rat_commit_provenance();
let (report, violations) = if let Some(scip_path) = &args.scip {
let scip_bytes = fs::read(scip_path).map_err(|err| {
anyhow::anyhow!("failed to read SCIP index {}: {err}", scip_path.display())
})?;
let tool_version = format!(
"scip-file:{}@{}",
scip_path.file_name().and_then(|n| n.to_str()).unwrap_or("index.scip"),
oracle::scip_content_fingerprint(&scip_bytes),
);
let provenance = oracle::RunProvenance {
tool_version,
rag_rat_commit: rag_rat_commit.clone(),
worktree_id: String::new(),
production_sha: oracle::scip_content_fingerprint(&scip_bytes),
};
with_oracle_write_lock(config, |db| {
let provenance =
oracle::RunProvenance { worktree_id: db.active_worktree_id.clone(), ..provenance };
db.run_oracle_report(
&profile,
&provenance,
tool,
&scip_bytes,
rag_rat_core::index::OracleShaSnapshots::default(),
crate::now_epoch_ms(),
)
})?
} else {
if let oracle::ToolAvailability::Blocked { hint, .. } = oracle::probe_oracle_tool(tool) {
anyhow::bail!("oracle tool for corpus `{}` unavailable: {hint}", profile.corpus_id);
}
let (started_at_ms, pre_spawn_sha) = with_oracle_write_lock(config, |db| {
Ok((crate::now_epoch_ms(), db.oracle_pre_spawn_snapshot()?))
})?;
let scip_output = config
.database
.parent()
.map(Path::to_path_buf)
.unwrap_or_else(std::env::temp_dir)
.join(format!("rag-rat-oracle-report-{}.scip", std::process::id()));
let production = oracle::produce_scip_with_tool(tool, &config.root, &scip_output);
let _ = fs::remove_file(&scip_output);
match production? {
oracle::ScipProduction::Blocked { hint, .. } => {
anyhow::bail!("oracle tool for corpus `{}` unavailable: {hint}", profile.corpus_id);
},
oracle::ScipProduction::Produced { version, bytes, production_sha } => {
let provenance = oracle::RunProvenance {
tool_version: with_oracle_tool_version_suffix(version),
rag_rat_commit: rag_rat_commit.clone(),
worktree_id: String::new(),
production_sha: oracle::scip_content_fingerprint(&bytes),
};
with_oracle_write_lock(config, |db| {
let provenance = oracle::RunProvenance {
worktree_id: db.active_worktree_id.clone(),
..provenance
};
db.run_oracle_report(
&profile,
&provenance,
tool,
&bytes,
rag_rat_core::index::OracleShaSnapshots {
production: Some(&production_sha),
pre_spawn: Some(&pre_spawn_sha),
},
started_at_ms,
)
})?
},
}
};
print_output(&report)?;
if !violations.is_empty() {
for violation in &violations {
eprintln!("corpus health [{}]: {}", violation.check, violation.detail);
}
anyhow::bail!(
"corpus `{}` failed {} health threshold(s)",
profile.corpus_id,
violations.len()
);
}
Ok(())
}
fn ensure_checkout_matches_corpus(
config: &Config,
profile: &rag_rat_core::index::oracle::CorpusProfile,
) -> anyhow::Result<()> {
use std::collections::{BTreeMap, BTreeSet};
let mut actual: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
for target in &config.targets {
actual
.entry(target.language.as_str().to_string())
.or_default()
.extend(target.directories.iter().map(|dir| dir.to_string_lossy().into_owned()));
}
let expected: BTreeMap<String, BTreeSet<String>> = profile
.bindings
.iter()
.map(|(lang, dirs)| (lang.clone(), dirs.iter().cloned().collect()))
.collect();
anyhow::ensure!(
actual == expected,
"active checkout target bindings {actual:?} do not match corpus `{}` bindings \
{expected:?} — run `oracle report` against a checkout indexed with the corpus's bindings \
(the corpus runner does this) so the report measures the population its profile hash \
claims",
profile.corpus_id,
);
for target in &config.targets {
let default_include: BTreeSet<String> =
target.language.default_include_globs().into_iter().collect();
let include: BTreeSet<String> = target.include.iter().cloned().collect();
anyhow::ensure!(
target.exclude.is_empty() && include == default_include,
"target `{}` ({}) has custom include/exclude filters (include {:?}, exclude {:?}) — \
`oracle report --corpus {}` requires the corpus's plain bindings (default filters) \
so the report's profile hash matches the file population it measured",
target.name,
target.language.as_str(),
target.include,
target.exclude,
profile.corpus_id,
);
}
Ok(())
}
fn rag_rat_commit_provenance() -> String {
std::env::var("RAG_RAT_COMMIT")
.ok()
.filter(|value| !value.trim().is_empty())
.unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string())
}
fn with_oracle_tool_version_suffix(version: String) -> String {
join_tool_version_suffix(version, std::env::var("RAG_RAT_ORACLE_TOOL_VERSION_SUFFIX").ok())
}
fn join_tool_version_suffix(version: String, suffix: Option<String>) -> String {
match suffix.map(|value| value.trim().to_string()).filter(|value| !value.is_empty()) {
Some(suffix) => format!("{version}+{suffix}"),
None => version,
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::mpsc;
use std::time::Duration;
use rag_rat_core::config::{ResolvedTarget, TargetKind};
use rag_rat_core::language::Language;
use rag_rat_core::locks::{FileLock, write_lock_path};
use rag_rat_core::{Config, IndexDatabase};
use crate::cli::{OracleArgs, OracleCommand, OracleRunArgs, OracleToolArg};
static N: AtomicU64 = AtomicU64::new(0);
fn temp_config() -> (PathBuf, Config) {
let root = std::env::temp_dir().join(format!(
"rag-rat-cli-oracle-lock-{}-{}",
std::process::id(),
N.fetch_add(1, Ordering::Relaxed)
));
let _ = std::fs::remove_dir_all(&root);
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/lib.rs"), "fn caller() { target(); } fn target() {}\n")
.unwrap();
let config = Config {
root: root.clone(),
database: root.join(".rag-rat/index.sqlite"),
targets: vec![ResolvedTarget {
name: "rust".to_string(),
language: Language::Rust,
directories: vec![PathBuf::from("src")],
include: vec!["src/".to_string()],
exclude: Vec::new(),
kind: TargetKind::Source,
}],
llm: Default::default(),
watch: Default::default(),
version_check: Default::default(),
oracle: Default::default(),
search: Default::default(),
};
(root, config)
}
#[test]
fn tool_version_suffix_folds_in_the_toolchain_fingerprint() {
assert_eq!(super::join_tool_version_suffix("0.4.0".into(), None), "0.4.0");
assert_eq!(super::join_tool_version_suffix("0.4.0".into(), Some(" ".into())), "0.4.0");
assert_eq!(
super::join_tool_version_suffix("0.4.0".into(), Some("toolchain:abc123".into())),
"0.4.0+toolchain:abc123"
);
assert_ne!(
super::join_tool_version_suffix("0.4.0".into(), Some("toolchain:aaa".into())),
super::join_tool_version_suffix("0.4.0".into(), Some("toolchain:bbb".into())),
);
}
#[test]
fn checkout_bindings_must_match_corpus() {
use std::collections::BTreeMap;
use rag_rat_core::index::oracle::{CorpusHealth, CorpusProfile};
let config_with = |include: Vec<String>, exclude: Vec<String>| Config {
root: PathBuf::from("/x"),
database: PathBuf::from("/x/db"),
targets: vec![ResolvedTarget {
name: "rust".to_string(),
language: Language::Rust,
directories: vec![PathBuf::from("src")],
include,
exclude,
kind: TargetKind::Source,
}],
llm: Default::default(),
watch: Default::default(),
version_check: Default::default(),
oracle: Default::default(),
search: Default::default(),
};
let config = config_with(vec!["**/*.rs".to_string()], Vec::new());
let profile = |dirs: &[&str]| {
let mut bindings = BTreeMap::new();
bindings.insert("rust".to_string(), dirs.iter().map(|d| d.to_string()).collect());
CorpusProfile {
corpus_id: "rust-semver".to_string(),
tier: "small".to_string(),
repo: "r".to_string(),
rev: "v".to_string(),
tool: "rust-analyzer".to_string(),
prepare: Vec::new(),
bindings,
health: CorpusHealth {
expected_min_heuristic_edges: 1,
expected_min_oracle_examined: 1,
expected_max_skipped_drifted: 0,
expected_min_symbols_with_moniker: 1,
expected_min_resolved_external: None,
timeout_minutes: 1,
},
}
};
assert!(super::ensure_checkout_matches_corpus(&config, &profile(&["src"])).is_ok());
assert!(super::ensure_checkout_matches_corpus(&config, &profile(&["lib"])).is_err());
let mut two_langs = profile(&["src"]);
two_langs.bindings.insert("python".to_string(), vec!["pkg".to_string()]);
assert!(super::ensure_checkout_matches_corpus(&config, &two_langs).is_err());
let excluded =
config_with(vec!["**/*.rs".to_string()], vec!["**/generated/**".to_string()]);
assert!(super::ensure_checkout_matches_corpus(&excluded, &profile(&["src"])).is_err());
let narrowed = config_with(vec!["src/lib.rs".to_string()], Vec::new());
assert!(super::ensure_checkout_matches_corpus(&narrowed, &profile(&["src"])).is_err());
}
fn run_args() -> OracleArgs {
OracleArgs {
command: OracleCommand::Run(OracleRunArgs {
tool: OracleToolArg::RustAnalyzer,
scip: None, }),
}
}
#[test]
fn oracle_run_blocks_on_write_lock() {
let (root, config) = temp_config();
IndexDatabase::rebuild(&config).unwrap();
let scip_path = root.join("empty.scip");
std::fs::write(&scip_path, []).unwrap();
let mut args = run_args();
if let OracleCommand::Run(run) = &mut args.command {
run.scip = Some(scip_path);
}
let lock = FileLock::acquire_blocking(&write_lock_path(&config.database)).unwrap();
let (tx, rx) = mpsc::channel();
let handle = std::thread::spawn(move || {
let result = super::oracle(&config, &args);
let _ = tx.send(result.is_ok());
});
assert!(
rx.recv_timeout(Duration::from_millis(300)).is_err(),
"oracle run completed while the write lock was held — it must block on the lock"
);
drop(lock);
let ok =
rx.recv_timeout(Duration::from_secs(20)).expect("oracle run completes after unlock");
assert!(ok, "oracle run should succeed once the lock is free");
handle.join().unwrap();
let _ = std::fs::remove_dir_all(&root);
}
#[test]
fn oracle_run_releases_write_lock_after_completion() {
let (root, config) = temp_config();
IndexDatabase::rebuild(&config).unwrap();
let scip_path = root.join("empty.scip");
std::fs::write(&scip_path, []).unwrap();
let mut args = run_args();
if let OracleCommand::Run(run) = &mut args.command {
run.scip = Some(scip_path);
}
super::oracle(&config, &args).unwrap();
let lock = FileLock::try_acquire(&write_lock_path(&config.database)).unwrap();
assert!(lock.is_some(), "oracle run must release the write lock when it returns");
let _ = std::fs::remove_dir_all(&root);
}
}