impl AgentContextIndex {
pub(crate) fn calculate_relevance_scores(
&self,
query: &str,
) -> Result<Vec<(usize, f32)>, String> {
if let Some(ref db_path) = self.db_path {
if let Ok(results) = self.calculate_relevance_scores_fts5(db_path, query) {
if !results.is_empty() {
return Ok(results);
}
}
}
self.calculate_relevance_scores_tf(query)
}
fn calculate_relevance_scores_fts5(
&self,
db_path: &std::path::Path,
query: &str,
) -> Result<Vec<(usize, f32)>, String> {
use crate::services::agent_context::function_index::sqlite_backend::{
fts5_search, open_db,
};
let conn = open_db(db_path)?;
fts5_search(&conn, query, 500)
}
#[allow(clippy::cast_possible_truncation)]
fn calculate_relevance_scores_tf(&self, query: &str) -> Result<Vec<(usize, f32)>, String> {
if self.corpus.is_empty() {
return Ok(Vec::new());
}
let query_terms: Vec<String> = query
.split(|c: char| !c.is_alphanumeric() && c != '_')
.filter(|s| !s.is_empty())
.map(|s| s.to_lowercase())
.collect();
if query_terms.is_empty() {
return Ok(Vec::new());
}
let mut results = Vec::new();
let mut max_score = 0.0f32;
for (doc_idx, doc_lower) in self.corpus_lower.iter().enumerate() {
let mut term_score = 0.0f32;
let mut term_count = 0;
let doc_len_factor = 1.0 + (self.corpus[doc_idx].len() as f32).ln();
for term in &query_terms {
let count = doc_lower.matches(term.as_str()).count() as f32;
if count > 0.0 {
let tf = (1.0 + count.ln()) / doc_len_factor;
term_score += tf;
term_count += 1;
}
}
if term_count > 0 {
let score = term_score / query_terms.len() as f32;
if score > 0.0 {
max_score = max_score.max(score);
results.push((doc_idx, score));
}
}
}
if max_score > 0.0 {
for (_, score) in &mut results {
*score /= max_score;
}
}
Ok(results)
}
#[allow(clippy::cast_possible_truncation)]
pub(crate) fn calculate_relevance_scores_scoped(
&self,
query: &str,
candidates: &[usize],
) -> Result<Vec<(usize, f32)>, String> {
if candidates.is_empty() {
return Ok(Vec::new());
}
let query_terms: Vec<String> = query
.split(|c: char| !c.is_alphanumeric() && c != '_')
.filter(|s| !s.is_empty())
.map(|s| s.to_lowercase())
.collect();
if query_terms.is_empty() {
return Ok(candidates.iter().map(|&idx| (idx, 1.0)).collect());
}
let mut results = Vec::new();
let mut max_score = 0.0f32;
for &doc_idx in candidates {
if doc_idx >= self.corpus_lower.len() {
continue;
}
let doc_lower = &self.corpus_lower[doc_idx];
let mut term_score = 0.0f32;
let mut term_count = 0;
let doc_len_factor = 1.0 + (self.corpus[doc_idx].len() as f32).ln();
for term in &query_terms {
let count = doc_lower.matches(term.as_str()).count() as f32;
if count > 0.0 {
let tf = (1.0 + count.ln()) / doc_len_factor;
term_score += tf;
term_count += 1;
}
}
if term_count > 0 {
let score = term_score / query_terms.len() as f32;
if score > 0.0 {
max_score = max_score.max(score);
results.push((doc_idx, score));
}
}
}
if max_score > 0.0 {
for (_, score) in &mut results {
*score /= max_score;
}
}
Ok(results)
}
fn passes_filters(&self, idx: usize, options: &QueryOptions) -> bool {
let func = &self.functions[idx];
if let Some(min_grade) = &options.min_grade {
let grade_order = ["A", "B", "C", "D", "F"];
let min_idx = grade_order.iter().position(|g| *g == min_grade);
let func_idx = grade_order
.iter()
.position(|g| *g == func.quality.tdg_grade.as_str());
if let (Some(min_i), Some(func_i)) = (min_idx, func_idx) {
if func_i > min_i {
return false;
}
}
}
if let Some(max_complexity) = options.max_complexity {
if func.quality.complexity > max_complexity {
return false;
}
}
if let Some(max_loc) = options.max_loc {
if func.quality.loc > max_loc {
return false;
}
}
if let Some(lang) = &options.language {
if !func.language.eq_ignore_ascii_case(lang) {
return false;
}
}
if let Some(pattern) = &options.path_pattern {
if !func.file_path.contains(pattern) {
return false;
}
}
if let Some(exclude) = &options.exclude_pattern {
let exclude_lower = exclude.to_lowercase();
let haystack =
format!("{} {} {}", func.function_name, func.signature, func.source).to_lowercase();
if haystack.contains(&exclude_lower) {
return false;
}
}
if let Some(exclude_file) = &options.exclude_file_pattern {
if func.file_path.contains(exclude_file) || glob_matches(exclude_file, &func.file_path)
{
return false;
}
}
true
}
}