use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use crate::store::{ChunkSummary, NoteSummary, SearchFilter};
use crate::{normalize_slashes, AnalysisError, Embedder, Store};
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
#[serde(rename_all = "snake_case")]
pub enum ChunkRole {
ModifyTarget,
TestToUpdate,
Dependency,
}
impl ChunkRole {
pub fn as_str(&self) -> &'static str {
match self {
ChunkRole::ModifyTarget => "modify_target",
ChunkRole::TestToUpdate => "test_to_update",
ChunkRole::Dependency => "dependency",
}
}
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct ScoutChunk {
pub name: String,
pub chunk_type: crate::language::ChunkType,
pub signature: String,
pub line_start: u32,
pub role: ChunkRole,
pub caller_count: usize,
pub test_count: usize,
pub search_score: f32,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct FileGroup {
#[serde(serialize_with = "crate::serialize_path_normalized")]
pub file: PathBuf,
pub relevance_score: f32,
pub chunks: Vec<ScoutChunk>,
pub is_stale: bool,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct ScoutSummary {
pub total_files: usize,
pub total_functions: usize,
pub untested_count: usize,
pub stale_count: usize,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct ScoutResult {
pub file_groups: Vec<FileGroup>,
pub relevant_notes: Vec<NoteSummary>,
pub summary: ScoutSummary,
}
const MIN_GAP_RATIO: f32 = 0.10;
pub const DEFAULT_SCOUT_SEARCH_LIMIT: usize = 15;
pub const DEFAULT_SCOUT_SEARCH_THRESHOLD: f32 = 0.2;
#[derive(Debug, Clone)]
pub struct ScoutOptions {
pub search_limit: usize,
pub search_threshold: f32,
pub min_gap_ratio: f32,
}
impl Default for ScoutOptions {
fn default() -> Self {
Self {
search_limit: DEFAULT_SCOUT_SEARCH_LIMIT,
search_threshold: DEFAULT_SCOUT_SEARCH_THRESHOLD,
min_gap_ratio: MIN_GAP_RATIO,
}
}
}
pub fn scout<Mode>(
store: &Store<Mode>,
embedder: &Embedder,
task: &str,
root: &Path,
limit: usize,
) -> Result<ScoutResult, AnalysisError> {
scout_with_options(store, embedder, task, root, limit, &ScoutOptions::default())
}
pub fn scout_with_options<Mode>(
store: &Store<Mode>,
embedder: &Embedder,
task: &str,
root: &Path,
limit: usize,
opts: &ScoutOptions,
) -> Result<ScoutResult, AnalysisError> {
let _span = tracing::info_span!("scout", task_len = task.len(), limit).entered();
let query_embedding = embedder.embed_query(task)?;
let graph = store.get_call_graph()?;
let test_chunks = match store.find_test_chunks() {
Ok(tc) => tc,
Err(e) => {
tracing::warn!(error = %e, "Failed to load test chunks, scout will skip test analysis");
std::sync::Arc::new(Vec::new())
}
};
scout_core(&ScoutResources {
store,
query_embedding: &query_embedding,
task,
root,
limit,
opts,
graph: &graph,
test_chunks: &test_chunks,
})
}
pub(crate) struct ScoutResources<'a, Mode> {
pub store: &'a Store<Mode>,
pub query_embedding: &'a crate::Embedding,
pub task: &'a str,
pub root: &'a Path,
pub limit: usize,
pub opts: &'a ScoutOptions,
pub graph: &'a crate::store::CallGraph,
pub test_chunks: &'a [ChunkSummary],
}
pub(crate) fn scout_core<Mode>(
res: &ScoutResources<'_, Mode>,
) -> Result<ScoutResult, AnalysisError> {
let store = res.store;
let query_embedding = res.query_embedding;
let task = res.task;
let root = res.root;
let limit = res.limit;
let opts = res.opts;
let graph = res.graph;
let test_chunks = res.test_chunks;
let _span = tracing::info_span!("scout_core", %task, limit).entered();
let filter = SearchFilter {
enable_rrf: false, query_text: task.to_string(),
..SearchFilter::default()
};
let results = store.search_filtered(
query_embedding,
&filter,
opts.search_limit,
opts.search_threshold,
)?;
tracing::debug!(search_results = results.len(), "Scout search complete");
if results.is_empty() {
return Ok(ScoutResult {
file_groups: Vec::new(),
relevant_notes: Vec::new(),
summary: ScoutSummary {
total_files: 0,
total_functions: 0,
untested_count: 0,
stale_count: 0,
},
});
}
let mut file_map: HashMap<PathBuf, Vec<(f32, &ChunkSummary)>> = HashMap::new();
for r in &results {
file_map
.entry(r.chunk.file.clone())
.or_default()
.push((r.score, &r.chunk));
}
let all_names: Vec<&str> = results.iter().map(|r| r.chunk.name.as_str()).collect();
let caller_counts = match store.get_caller_counts_batch(&all_names) {
Ok(c) => c,
Err(e) => {
tracing::warn!(error = %e, "Failed to fetch caller counts");
HashMap::new()
}
};
let origins: Vec<String> = file_map
.keys()
.map(|p| p.to_string_lossy().into_owned())
.collect();
let origin_refs: Vec<&str> = origins.iter().map(|s| s.as_str()).collect();
let stale_set = match store.check_origins_stale(&origin_refs, root) {
Ok(s) => s,
Err(e) => {
tracing::warn!(error = %e, "Failed to check staleness");
HashSet::new()
}
};
let modify_threshold = compute_modify_threshold(&results, opts.min_gap_ratio);
tracing::debug!(modify_threshold, "Gap-based threshold computed");
let all_chunk_names: Vec<&str> = results.iter().map(|r| r.chunk.name.as_str()).collect();
let hints_batch =
crate::impact::compute_hints_batch(graph, test_chunks, &all_chunk_names, &caller_counts);
let hints_map: std::collections::HashMap<&str, &crate::impact::FunctionHints> = all_chunk_names
.iter()
.zip(hints_batch.iter())
.map(|(&name, hints)| (name, hints))
.collect();
let mut groups: Vec<FileGroup> = file_map
.into_iter()
.map(|(file, chunks)| {
let relevance_score = chunks.iter().map(|(s, _)| s).sum::<f32>() / chunks.len() as f32;
let is_stale = stale_set.contains(&file.to_string_lossy().to_string());
let scout_chunks: Vec<ScoutChunk> = chunks
.iter()
.map(|(score, chunk)| {
let default_hints = crate::impact::FunctionHints {
caller_count: 0,
test_count: 0,
};
let hints = hints_map
.get(chunk.name.as_str())
.copied()
.unwrap_or(&default_hints);
let role = classify_role(
*score,
&chunk.name,
&chunk.file.to_string_lossy(),
modify_threshold,
);
ScoutChunk {
name: chunk.name.clone(),
chunk_type: chunk.chunk_type,
signature: chunk.signature.clone(),
line_start: chunk.line_start,
role,
caller_count: hints.caller_count,
test_count: hints.test_count,
search_score: *score,
}
})
.collect();
FileGroup {
file: file.strip_prefix(root).unwrap_or(&file).to_path_buf(),
relevance_score,
chunks: scout_chunks,
is_stale,
}
})
.collect();
groups.sort_by(|a, b| {
b.relevance_score
.total_cmp(&a.relevance_score)
.then(a.file.cmp(&b.file))
});
groups.truncate(limit);
let result_files: HashSet<String> = groups
.iter()
.map(|g| crate::rel_display(&g.file, root))
.collect();
let relevant_notes = find_relevant_notes(store, &result_files);
let total_functions: usize = groups.iter().map(|g| g.chunks.len()).sum();
let untested_count: usize = groups
.iter()
.flat_map(|g| &g.chunks)
.filter(|c| c.test_count == 0 && c.role != ChunkRole::TestToUpdate)
.count();
let stale_count = groups.iter().filter(|g| g.is_stale).count();
Ok(ScoutResult {
summary: ScoutSummary {
total_files: groups.len(),
total_functions,
untested_count,
stale_count,
},
file_groups: groups,
relevant_notes,
})
}
fn compute_modify_threshold(results: &[crate::store::SearchResult], min_gap_ratio: f32) -> f32 {
if results.is_empty() {
return f32::MAX;
}
let mut scores: Vec<f32> = results
.iter()
.filter(|r| !crate::is_test_chunk(&r.chunk.name, &r.chunk.file.to_string_lossy()))
.map(|r| r.score)
.collect();
scores.sort_by(|a, b| b.total_cmp(a));
if scores.len() <= 1 {
return scores.first().copied().unwrap_or(f32::MAX);
}
let max_targets = scores.len() / 2;
let mut best_gap = 0.0f32;
let mut split_at = 0;
for i in 0..max_targets.min(scores.len() - 1) {
if scores[i] > 0.0 {
let gap = (scores[i] - scores[i + 1]) / scores[i];
if gap > best_gap {
best_gap = gap;
split_at = i;
}
}
}
if best_gap < min_gap_ratio {
return scores[0];
}
scores[split_at]
}
fn classify_role(score: f32, name: &str, file: &str, modify_threshold: f32) -> ChunkRole {
if crate::is_test_chunk(name, file) {
ChunkRole::TestToUpdate
} else if score >= modify_threshold {
ChunkRole::ModifyTarget
} else {
ChunkRole::Dependency
}
}
fn find_relevant_notes<Mode>(
store: &Store<Mode>,
result_files: &HashSet<String>,
) -> Vec<NoteSummary> {
let all_notes = match store.list_notes_summaries() {
Ok(n) => n,
Err(e) => {
tracing::warn!(error = %e, "Failed to list notes");
return Vec::new();
}
};
all_notes
.into_iter()
.filter(|note| {
note.mentions
.iter()
.any(|m| result_files.iter().any(|f| note_mention_matches_file(m, f)))
})
.collect()
}
fn note_mention_matches_file(mention: &str, file: &str) -> bool {
let mention = normalize_slashes(mention);
let file = normalize_slashes(file);
if !mention.contains('.') && !mention.contains('/') {
return false;
}
file.ends_with(&mention)
&& (file.len() == mention.len() || file.as_bytes()[file.len() - mention.len() - 1] == b'/')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_classify_role_modify_target() {
assert_eq!(
classify_role(0.6, "search_filtered", "src/search.rs", 0.5),
ChunkRole::ModifyTarget
);
assert_eq!(
classify_role(0.5, "do_something", "src/lib.rs", 0.5),
ChunkRole::ModifyTarget
);
}
#[test]
fn test_classify_role_dependency() {
assert_eq!(
classify_role(0.49, "helper_fn", "src/lib.rs", 0.5),
ChunkRole::Dependency
);
assert_eq!(
classify_role(0.3, "utility", "src/lib.rs", 0.5),
ChunkRole::Dependency
);
}
#[test]
fn test_classify_role_test() {
assert_eq!(
classify_role(0.9, "test_search", "src/lib.rs", 0.5),
ChunkRole::TestToUpdate
);
assert_eq!(
classify_role(0.3, "test_helper", "src/lib.rs", 0.5),
ChunkRole::TestToUpdate
);
assert_eq!(
classify_role(0.8, "TestSuite", "src/lib.rs", 0.5),
ChunkRole::ModifyTarget
);
assert_eq!(
classify_role(0.8, "TestSuite", "tests/lib.rs", 0.5),
ChunkRole::TestToUpdate
);
assert_eq!(
classify_role(0.9, "helper_fn", "tests/integration.rs", 0.5),
ChunkRole::TestToUpdate
);
}
fn mock_result(name: &str, file: &str, score: f32) -> crate::store::SearchResult {
crate::store::SearchResult {
chunk: ChunkSummary {
id: name.to_string(),
file: std::path::PathBuf::from(file),
language: crate::language::Language::Rust,
chunk_type: crate::language::ChunkType::Function,
name: name.to_string(),
signature: String::new(),
content: String::new(),
doc: None,
line_start: 1,
line_end: 10,
parent_id: None,
parent_type_name: None,
content_hash: String::new(),
window_idx: None,
},
score,
}
}
#[test]
fn test_compute_modify_threshold_clear_gap() {
let results = vec![
mock_result("a", "src/a.rs", 0.033),
mock_result("b", "src/b.rs", 0.031),
mock_result("c", "src/c.rs", 0.030),
mock_result("d", "src/d.rs", 0.016), mock_result("e", "src/e.rs", 0.015),
mock_result("f", "src/f.rs", 0.014),
];
let threshold = compute_modify_threshold(&results, MIN_GAP_RATIO);
assert!(threshold >= 0.030);
assert!(threshold <= 0.033);
}
#[test]
fn test_compute_modify_threshold_no_gap() {
let results = vec![
mock_result("a", "src/a.rs", 0.020),
mock_result("b", "src/b.rs", 0.019),
mock_result("c", "src/c.rs", 0.018),
mock_result("d", "src/d.rs", 0.017),
];
let threshold = compute_modify_threshold(&results, MIN_GAP_RATIO);
assert!((threshold - 0.020).abs() < f32::EPSILON);
}
#[test]
fn test_compute_modify_threshold_single() {
let results = vec![mock_result("a", "src/a.rs", 0.05)];
assert!((compute_modify_threshold(&results, MIN_GAP_RATIO) - 0.05).abs() < f32::EPSILON);
}
#[test]
fn test_compute_modify_threshold_empty() {
assert_eq!(compute_modify_threshold(&[], MIN_GAP_RATIO), f32::MAX);
}
#[test]
fn test_compute_modify_threshold_skips_tests() {
let results = vec![
mock_result("test_foo", "src/a.rs", 0.050), mock_result("bar", "src/b.rs", 0.020),
mock_result("baz", "src/c.rs", 0.010),
];
let threshold = compute_modify_threshold(&results, MIN_GAP_RATIO);
assert!((threshold - 0.020).abs() < f32::EPSILON);
}
#[test]
fn test_compute_modify_threshold_cosine_scale() {
let results = vec![
mock_result("a", "src/a.rs", 0.95),
mock_result("b", "src/b.rs", 0.90),
mock_result("c", "src/c.rs", 0.50), mock_result("d", "src/d.rs", 0.45),
];
let threshold = compute_modify_threshold(&results, MIN_GAP_RATIO);
assert!(threshold >= 0.90);
}
#[test]
fn test_note_mention_matches_file() {
assert!(note_mention_matches_file("search.rs", "src/search.rs"));
assert!(note_mention_matches_file("src/search.rs", "src/search.rs"));
assert!(note_mention_matches_file("cli/mod.rs", "src/cli/mod.rs"));
assert!(note_mention_matches_file("mod.rs", "src/cli/mod.rs"));
assert!(!note_mention_matches_file("od.rs", "src/cli/mod.rs"));
assert!(!note_mention_matches_file("earch.rs", "src/search.rs"));
assert!(!note_mention_matches_file("audit", "src/audit.rs"));
assert!(!note_mention_matches_file("search", "src/search.rs"));
assert!(!note_mention_matches_file(
"extra/src/search.rs",
"search.rs"
));
assert!(note_mention_matches_file("src/scout.rs", "src/scout.rs"));
assert!(!note_mention_matches_file(
"other/search.rs",
"src/search.rs"
));
}
#[test]
fn test_note_mention_matches_file_backslash() {
assert!(note_mention_matches_file("scout.rs", "src\\scout.rs"));
assert!(note_mention_matches_file("cli\\mod.rs", "src\\cli\\mod.rs"));
assert!(!note_mention_matches_file("od.rs", "src\\cli\\mod.rs"));
}
#[test]
fn test_scout_summary_nonzero() {
let summary = ScoutSummary {
total_files: 3,
total_functions: 15,
untested_count: 4,
stale_count: 2,
};
assert_eq!(summary.total_files, 3);
assert_eq!(summary.total_functions, 15);
assert_eq!(summary.untested_count, 4);
assert_eq!(summary.stale_count, 2);
}
#[test]
fn test_scout_result_serialization_empty() {
let result = ScoutResult {
file_groups: Vec::new(),
relevant_notes: Vec::new(),
summary: ScoutSummary {
total_files: 0,
total_functions: 0,
untested_count: 0,
stale_count: 0,
},
};
let json = serde_json::to_value(&result).unwrap();
assert_eq!(json["file_groups"].as_array().unwrap().len(), 0);
assert_eq!(json["relevant_notes"].as_array().unwrap().len(), 0);
assert_eq!(json["summary"]["total_files"], 0);
}
#[test]
fn test_chunk_role_equality() {
assert_eq!(ChunkRole::ModifyTarget, ChunkRole::ModifyTarget);
assert_ne!(ChunkRole::ModifyTarget, ChunkRole::Dependency);
assert_ne!(ChunkRole::TestToUpdate, ChunkRole::Dependency);
}
#[test]
fn test_chunk_role_as_str() {
assert_eq!(ChunkRole::ModifyTarget.as_str(), "modify_target");
assert_eq!(ChunkRole::TestToUpdate.as_str(), "test_to_update");
assert_eq!(ChunkRole::Dependency.as_str(), "dependency");
}
#[test]
fn test_compute_modify_threshold_all_tests() {
let results = vec![
mock_result("test_a", "src/a.rs", 0.9),
mock_result("test_b", "src/b.rs", 0.8),
mock_result("test_c", "src/c.rs", 0.7),
];
let threshold = compute_modify_threshold(&results, MIN_GAP_RATIO);
assert_eq!(threshold, f32::MAX);
}
#[test]
fn test_classify_role_exact_threshold_test_name() {
assert_eq!(
classify_role(0.5, "test_foo", "src/lib.rs", 0.5),
ChunkRole::TestToUpdate
);
assert_eq!(
classify_role(0.5, "process_data", "src/lib.rs", 0.5),
ChunkRole::ModifyTarget
);
assert_eq!(
classify_role(0.3, "test_bar", "src/lib.rs", 0.5),
ChunkRole::TestToUpdate
);
}
#[test]
fn test_note_mention_matches_file_empty() {
assert!(!note_mention_matches_file("", "src/lib.rs"));
assert!(!note_mention_matches_file("lib.rs", ""));
assert!(!note_mention_matches_file("", ""));
}
}