use crate::args::Cli;
use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli};
use crate::index_discovery::find_nearest_index;
use crate::output::OutputStreams;
use anyhow::{Context, Result};
use serde::Serialize;
use sqry_core::query::{DuplicateConfig, DuplicateType, build_duplicate_groups_graph};
#[derive(Debug, Serialize)]
struct DuplicateGroupOutput {
group_id: String,
count: usize,
symbols: Vec<DuplicateSymbol>,
}
#[derive(Debug, Serialize)]
struct DuplicateSymbol {
name: String,
qualified_name: String,
kind: String,
file: String,
line: u32,
language: String,
}
pub fn run_duplicates(
cli: &Cli,
path: Option<&str>,
dup_type: &str,
threshold: u32,
max_results: usize,
exact: bool,
) -> Result<()> {
let mut streams = OutputStreams::new();
let duplicate_type: DuplicateType = dup_type
.parse()
.with_context(|| format!("Invalid duplicate type: {dup_type}"))?;
let search_path = path.map_or_else(
|| std::env::current_dir().unwrap_or_default(),
std::path::PathBuf::from,
);
let index_location = find_nearest_index(&search_path);
let Some(ref loc) = index_location else {
streams
.write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
return Ok(());
};
let graph_config = GraphLoadConfig::default();
let graph = load_unified_graph_for_cli(&loc.index_root, &graph_config, cli)
.context("Failed to load graph. Run 'sqry index' to build the graph.")?;
let config = DuplicateConfig {
threshold: if exact {
1.0
} else {
f64::from(threshold) / 100.0
},
max_results,
is_exact_only: exact || threshold >= 100,
..Default::default()
};
let groups = build_duplicate_groups_graph(duplicate_type, &graph, &config);
let strings = graph.strings();
let files = graph.files();
let mut output_groups: Vec<DuplicateGroupOutput> = groups
.into_iter()
.filter(|g| g.node_ids.len() > 1)
.map(|group| {
let symbols: Vec<DuplicateSymbol> = group
.node_ids
.iter()
.filter_map(|&node_id| {
let entry = graph.nodes().get(node_id)?;
let name = strings
.resolve(entry.name)
.map(|s| s.to_string())
.unwrap_or_default();
let qualified_name = entry
.qualified_name
.and_then(|id| strings.resolve(id))
.map_or_else(|| name.clone(), |s| s.to_string());
let file_path = files
.resolve(entry.file)
.map(|p| p.display().to_string())
.unwrap_or_default();
let language = files
.language_for_file(entry.file)
.map_or_else(|| "Unknown".to_string(), |l| l.to_string());
Some(DuplicateSymbol {
name,
qualified_name,
kind: format!("{:?}", entry.kind),
file: file_path,
line: entry.start_line,
language,
})
})
.collect();
let group_id = if let Some(body_hash) = group.body_hash_128 {
format!("{body_hash}") } else {
format!("{:016x}", group.hash)
};
DuplicateGroupOutput {
group_id,
count: symbols.len(),
symbols,
}
})
.filter(|g| g.count > 1)
.collect();
output_groups.sort_by(|a, b| {
b.count
.cmp(&a.count)
.then_with(|| a.group_id.cmp(&b.group_id))
});
output_groups.truncate(max_results);
if cli.json {
let json =
serde_json::to_string_pretty(&output_groups).context("Failed to serialize to JSON")?;
streams.write_result(&json)?;
} else {
let output = format_duplicates_text(&output_groups, duplicate_type);
streams.write_result(&output)?;
}
Ok(())
}
fn format_duplicates_text(groups: &[DuplicateGroupOutput], dup_type: DuplicateType) -> String {
let mut lines = Vec::new();
let type_name = match dup_type {
DuplicateType::Body => "body",
DuplicateType::Signature => "signature",
DuplicateType::Struct => "struct",
};
lines.push(format!(
"Found {} duplicate groups (type: {})",
groups.len(),
type_name
));
lines.push(String::new());
for (i, group) in groups.iter().enumerate() {
lines.push(format!("Group {} ({} duplicates):", i + 1, group.count));
for sym in &group.symbols {
lines.push(format!(
" {} [{}] {}:{}",
sym.qualified_name, sym.kind, sym.file, sym.line
));
}
lines.push(String::new());
}
if groups.is_empty() {
lines.push("No duplicates found.".to_string());
}
lines.join("\n")
}