sqry_cli/commands/
duplicates.rs1use crate::args::Cli;
6use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli};
7use crate::index_discovery::find_nearest_index;
8use crate::output::OutputStreams;
9use anyhow::{Context, Result};
10use serde::Serialize;
11use sqry_core::query::{DuplicateConfig, DuplicateType, build_duplicate_groups_graph};
12
13#[derive(Debug, Serialize)]
15struct DuplicateGroupOutput {
16 group_id: String,
22 count: usize,
24 symbols: Vec<DuplicateSymbol>,
26}
27
28#[derive(Debug, Serialize)]
30struct DuplicateSymbol {
31 name: String,
32 qualified_name: String,
33 kind: String,
34 file: String,
35 line: u32,
36 language: String,
37}
38
39pub fn run_duplicates(
44 cli: &Cli,
45 path: Option<&str>,
46 dup_type: &str,
47 threshold: u32,
48 max_results: usize,
49 exact: bool,
50) -> Result<()> {
51 let mut streams = OutputStreams::new();
52
53 let duplicate_type: DuplicateType = dup_type
55 .parse()
56 .with_context(|| format!("Invalid duplicate type: {dup_type}"))?;
57
58 let search_path = path.map_or_else(
60 || std::env::current_dir().unwrap_or_default(),
61 std::path::PathBuf::from,
62 );
63
64 let index_location = find_nearest_index(&search_path);
65 let Some(ref loc) = index_location else {
66 streams
67 .write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
68 return Ok(());
69 };
70
71 let graph_config = GraphLoadConfig::default();
73 let graph = load_unified_graph_for_cli(&loc.index_root, &graph_config, cli)
74 .context("Failed to load graph. Run 'sqry index' to build the graph.")?;
75
76 let config = DuplicateConfig {
78 threshold: if exact {
79 1.0
80 } else {
81 f64::from(threshold) / 100.0
82 },
83 max_results,
84 is_exact_only: exact || threshold >= 100,
85 ..Default::default()
86 };
87
88 let groups = build_duplicate_groups_graph(duplicate_type, &graph, &config);
90
91 let strings = graph.strings();
92 let files = graph.files();
93
94 let mut output_groups: Vec<DuplicateGroupOutput> = groups
96 .into_iter()
97 .filter(|g| g.node_ids.len() > 1)
98 .map(|group| {
99 let symbols: Vec<DuplicateSymbol> = group
100 .node_ids
101 .iter()
102 .filter_map(|&node_id| {
103 let entry = graph.nodes().get(node_id)?;
104
105 let name = strings
106 .resolve(entry.name)
107 .map(|s| s.to_string())
108 .unwrap_or_default();
109
110 let qualified_name = entry
111 .qualified_name
112 .and_then(|id| strings.resolve(id))
113 .map_or_else(|| name.clone(), |s| s.to_string());
114
115 let file_path = files
116 .resolve(entry.file)
117 .map(|p| p.display().to_string())
118 .unwrap_or_default();
119
120 let language = files
121 .language_for_file(entry.file)
122 .map_or_else(|| "Unknown".to_string(), |l| l.to_string());
123
124 Some(DuplicateSymbol {
125 name,
126 qualified_name,
127 kind: format!("{:?}", entry.kind),
128 file: file_path,
129 line: entry.start_line,
130 language,
131 })
132 })
133 .collect();
134
135 let group_id = if let Some(body_hash) = group.body_hash_128 {
139 format!("{body_hash}") } else {
141 format!("{:016x}", group.hash)
142 };
143
144 DuplicateGroupOutput {
145 group_id,
146 count: symbols.len(),
147 symbols,
148 }
149 })
150 .filter(|g| g.count > 1)
151 .collect();
152
153 output_groups.sort_by(|a, b| {
156 b.count
157 .cmp(&a.count)
158 .then_with(|| a.group_id.cmp(&b.group_id))
159 });
160 output_groups.truncate(max_results);
161
162 if cli.json {
164 let json =
165 serde_json::to_string_pretty(&output_groups).context("Failed to serialize to JSON")?;
166 streams.write_result(&json)?;
167 } else {
168 let output = format_duplicates_text(&output_groups, duplicate_type);
169 streams.write_result(&output)?;
170 }
171
172 Ok(())
173}
174
175fn format_duplicates_text(groups: &[DuplicateGroupOutput], dup_type: DuplicateType) -> String {
177 let mut lines = Vec::new();
178
179 let type_name = match dup_type {
180 DuplicateType::Body => "body",
181 DuplicateType::Signature => "signature",
182 DuplicateType::Struct => "struct",
183 };
184
185 lines.push(format!(
186 "Found {} duplicate groups (type: {})",
187 groups.len(),
188 type_name
189 ));
190 lines.push(String::new());
191
192 for (i, group) in groups.iter().enumerate() {
193 lines.push(format!("Group {} ({} duplicates):", i + 1, group.count));
194 for sym in &group.symbols {
195 lines.push(format!(
196 " {} [{}] {}:{}",
197 sym.qualified_name, sym.kind, sym.file, sym.line
198 ));
199 }
200 lines.push(String::new());
201 }
202
203 if groups.is_empty() {
204 lines.push("No duplicates found.".to_string());
205 }
206
207 lines.join("\n")
208}