use crate::args::Cli;
use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli};
use crate::index_discovery::find_nearest_index;
use crate::output::OutputStreams;
use anyhow::{Context, Result, anyhow};
use serde::Serialize;
#[derive(Debug, Serialize)]
struct SimilarOutput {
reference: NodeRef,
similar: Vec<SimilarSymbol>,
stats: SimilarStats,
}
#[derive(Debug, Serialize)]
struct NodeRef {
name: String,
qualified_name: String,
kind: String,
file: String,
line: u32,
}
#[derive(Debug, Serialize)]
struct SimilarSymbol {
name: String,
qualified_name: String,
kind: String,
file: String,
line: u32,
similarity: f64,
}
#[derive(Debug, Serialize)]
struct SimilarStats {
total_found: usize,
threshold: f64,
}
#[allow(clippy::too_many_lines)]
pub fn run_similar(
cli: &Cli,
file_path: &str,
symbol_name: &str,
path: Option<&str>,
threshold: f64,
max_results: usize,
) -> Result<()> {
let mut streams = OutputStreams::new();
let search_path = path.map_or_else(
|| std::env::current_dir().unwrap_or_default(),
std::path::PathBuf::from,
);
let index_location = find_nearest_index(&search_path);
let Some(ref loc) = index_location else {
streams
.write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
return Ok(());
};
let config = GraphLoadConfig::default();
let graph = load_unified_graph_for_cli(&loc.index_root, &config, cli)
.context("Failed to load graph. Run 'sqry index' to build the graph.")?;
let strings = graph.strings();
let files_registry = graph.files();
let target_file = std::path::Path::new(file_path);
let (ref_node_id, ref_entry) = graph
.nodes()
.iter()
.find(|(_, entry)| {
let sym_file = files_registry.resolve(entry.file);
let file_matches = sym_file
.as_ref()
.is_some_and(|p| p.as_ref() == target_file || p.ends_with(file_path));
if !file_matches {
return false;
}
let name = strings.resolve(entry.name);
let qname = entry.qualified_name.and_then(|id| strings.resolve(id));
name.is_some_and(|n| n.as_ref() == symbol_name)
|| qname.is_some_and(|q| q.as_ref() == symbol_name)
})
.ok_or_else(|| anyhow!("Symbol '{symbol_name}' not found in '{file_path}'"))?;
let ref_name = strings
.resolve(ref_entry.name)
.map(|s| s.to_string())
.unwrap_or_default();
let ref_qualified_name = ref_entry
.qualified_name
.and_then(|id| strings.resolve(id))
.map_or_else(|| ref_name.clone(), |s| s.to_string());
let ref_file_path = files_registry
.resolve(ref_entry.file)
.map(|p| p.display().to_string())
.unwrap_or_default();
let reference = NodeRef {
name: ref_name.clone(),
qualified_name: ref_qualified_name.clone(),
kind: format!("{:?}", ref_entry.kind),
file: ref_file_path,
line: ref_entry.start_line,
};
let mut similar_symbols: Vec<_> = graph
.nodes()
.iter()
.filter(|(node_id, entry)| {
if *node_id == ref_node_id {
return false;
}
if entry.kind != ref_entry.kind {
return false;
}
true
})
.filter_map(|(_, entry)| {
let name = strings.resolve(entry.name)?;
let similarity = compute_similarity(&ref_name, &name);
if similarity >= threshold {
let file_path = files_registry
.resolve(entry.file)
.map(|p| p.display().to_string())
.unwrap_or_default();
let qualified_name = entry
.qualified_name
.and_then(|id| strings.resolve(id))
.map_or_else(|| name.to_string(), |s| s.to_string());
Some(SimilarSymbol {
name: name.to_string(),
qualified_name,
kind: format!("{:?}", entry.kind),
file: file_path,
line: entry.start_line,
similarity,
})
} else {
None
}
})
.collect();
similar_symbols.sort_by(|a, b| {
b.similarity
.partial_cmp(&a.similarity)
.unwrap_or(std::cmp::Ordering::Equal)
});
similar_symbols.truncate(max_results);
let stats = SimilarStats {
total_found: similar_symbols.len(),
threshold,
};
let output = SimilarOutput {
reference,
similar: similar_symbols,
stats,
};
if cli.json {
let json = serde_json::to_string_pretty(&output).context("Failed to serialize to JSON")?;
streams.write_result(&json)?;
} else {
let text = format_similar_text(&output);
streams.write_result(&text)?;
}
Ok(())
}
fn compute_similarity(a: &str, b: &str) -> f64 {
let a_lower = a.to_lowercase();
let b_lower = b.to_lowercase();
if a_lower == b_lower {
return 1.0;
}
let distance = levenshtein_distance(&a_lower, &b_lower);
let max_len = a_lower.len().max(b_lower.len());
if max_len == 0 {
return 1.0;
}
let distance_f = f64::from(u32::try_from(distance).unwrap_or(u32::MAX));
let max_len_f = f64::from(u32::try_from(max_len).unwrap_or(u32::MAX));
1.0 - (distance_f / max_len_f)
}
fn levenshtein_distance(a: &str, b: &str) -> usize {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let a_len = a_chars.len();
let b_len = b_chars.len();
if a_len == 0 {
return b_len;
}
if b_len == 0 {
return a_len;
}
let mut matrix = vec![vec![0usize; b_len + 1]; a_len + 1];
for (i, row) in matrix.iter_mut().enumerate().take(a_len + 1) {
row[0] = i;
}
for (j, val) in matrix[0].iter_mut().enumerate().take(b_len + 1) {
*val = j;
}
for i in 1..=a_len {
for j in 1..=b_len {
let cost = usize::from(a_chars[i - 1] != b_chars[j - 1]);
matrix[i][j] = (matrix[i - 1][j] + 1)
.min(matrix[i][j - 1] + 1)
.min(matrix[i - 1][j - 1] + cost);
}
}
matrix[a_len][b_len]
}
fn format_similar_text(output: &SimilarOutput) -> String {
let mut lines = Vec::new();
lines.push(format!(
"Finding symbols similar to: {} [{}]",
output.reference.qualified_name, output.reference.kind
));
lines.push(format!(
"Threshold: {:.0}%, Found: {}",
output.stats.threshold * 100.0,
output.stats.total_found
));
lines.push(String::new());
if output.similar.is_empty() {
lines.push("No similar symbols found.".to_string());
} else {
lines.push("Similar symbols:".to_string());
for sym in &output.similar {
lines.push(format!(
" {} ({:.0}% similar)",
sym.qualified_name,
sym.similarity * 100.0
));
lines.push(format!(" {}:{}", sym.file, sym.line));
}
}
lines.join("\n")
}