sqry_cli/commands/
duplicates.rs1use crate::args::Cli;
6use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli};
7use crate::index_discovery::find_nearest_index;
8use crate::output::OutputStreams;
9use anyhow::{Context, Result};
10use serde::Serialize;
11use sqry_core::query::{DuplicateConfig, DuplicateType, build_duplicate_groups_graph};
12
13#[derive(Debug, Serialize)]
15struct DuplicateGroupOutput {
16 group_id: String,
22 count: usize,
24 symbols: Vec<DuplicateSymbol>,
26}
27
28#[derive(Debug, Serialize)]
30struct DuplicateSymbol {
31 name: String,
32 qualified_name: String,
33 kind: String,
34 file: String,
35 line: u32,
36 language: String,
37}
38
39pub fn run_duplicates(
44 cli: &Cli,
45 path: Option<&str>,
46 dup_type: &str,
47 threshold: u32,
48 max_results: usize,
49 exact: bool,
50) -> Result<()> {
51 let mut streams = OutputStreams::new();
52
53 let duplicate_type: DuplicateType = dup_type
55 .parse()
56 .with_context(|| format!("Invalid duplicate type: {dup_type}"))?;
57
58 let search_path = path.map_or_else(
60 || std::env::current_dir().unwrap_or_default(),
61 std::path::PathBuf::from,
62 );
63
64 let index_location = find_nearest_index(&search_path);
65 let Some(ref loc) = index_location else {
66 streams
67 .write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
68 return Ok(());
69 };
70
71 let graph_config = GraphLoadConfig::default();
73 let graph = load_unified_graph_for_cli(&loc.index_root, &graph_config, cli)
74 .context("Failed to load graph. Run 'sqry index' to build the graph.")?;
75
76 let config = DuplicateConfig {
78 threshold: if exact {
79 1.0
80 } else {
81 f64::from(threshold) / 100.0
82 },
83 max_results,
84 is_exact_only: exact || threshold >= 100,
85 };
86
87 let groups = build_duplicate_groups_graph(duplicate_type, &graph, &config);
89
90 let strings = graph.strings();
91 let files = graph.files();
92
93 let mut output_groups: Vec<DuplicateGroupOutput> = groups
95 .into_iter()
96 .filter(|g| g.node_ids.len() > 1)
97 .map(|group| {
98 let symbols: Vec<DuplicateSymbol> = group
99 .node_ids
100 .iter()
101 .filter_map(|&node_id| {
102 let entry = graph.nodes().get(node_id)?;
103
104 let name = strings
105 .resolve(entry.name)
106 .map(|s| s.to_string())
107 .unwrap_or_default();
108
109 let qualified_name = entry
110 .qualified_name
111 .and_then(|id| strings.resolve(id))
112 .map_or_else(|| name.clone(), |s| s.to_string());
113
114 let file_path = files
115 .resolve(entry.file)
116 .map(|p| p.display().to_string())
117 .unwrap_or_default();
118
119 let language = files
120 .language_for_file(entry.file)
121 .map_or_else(|| "Unknown".to_string(), |l| l.to_string());
122
123 Some(DuplicateSymbol {
124 name,
125 qualified_name,
126 kind: format!("{:?}", entry.kind),
127 file: file_path,
128 line: entry.start_line,
129 language,
130 })
131 })
132 .collect();
133
134 let group_id = if let Some(body_hash) = group.body_hash_128 {
138 format!("{body_hash}") } else {
140 format!("{:016x}", group.hash)
141 };
142
143 DuplicateGroupOutput {
144 group_id,
145 count: symbols.len(),
146 symbols,
147 }
148 })
149 .filter(|g| g.count > 1)
150 .collect();
151
152 output_groups.sort_by(|a, b| {
155 b.count
156 .cmp(&a.count)
157 .then_with(|| a.group_id.cmp(&b.group_id))
158 });
159 output_groups.truncate(max_results);
160
161 if cli.json {
163 let json =
164 serde_json::to_string_pretty(&output_groups).context("Failed to serialize to JSON")?;
165 streams.write_result(&json)?;
166 } else {
167 let output = format_duplicates_text(&output_groups, duplicate_type);
168 streams.write_result(&output)?;
169 }
170
171 Ok(())
172}
173
174fn format_duplicates_text(groups: &[DuplicateGroupOutput], dup_type: DuplicateType) -> String {
176 let mut lines = Vec::new();
177
178 let type_name = match dup_type {
179 DuplicateType::Body => "body",
180 DuplicateType::Signature => "signature",
181 DuplicateType::Struct => "struct",
182 };
183
184 lines.push(format!(
185 "Found {} duplicate groups (type: {})",
186 groups.len(),
187 type_name
188 ));
189 lines.push(String::new());
190
191 for (i, group) in groups.iter().enumerate() {
192 lines.push(format!("Group {} ({} duplicates):", i + 1, group.count));
193 for sym in &group.symbols {
194 lines.push(format!(
195 " {} [{}] {}:{}",
196 sym.qualified_name, sym.kind, sym.file, sym.line
197 ));
198 }
199 lines.push(String::new());
200 }
201
202 if groups.is_empty() {
203 lines.push("No duplicates found.".to_string());
204 }
205
206 lines.join("\n")
207}