Skip to main content

sqry_cli/commands/
duplicates.rs

1//! Duplicates command implementation
2//!
3//! Provides CLI interface for finding duplicate code in the codebase.
4
5use crate::args::Cli;
6use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli};
7use crate::index_discovery::find_nearest_index;
8use crate::output::OutputStreams;
9use anyhow::{Context, Result};
10use serde::Serialize;
11use sqry_core::query::{DuplicateConfig, DuplicateType, build_duplicate_groups_graph};
12
13/// Duplicate group for output
14#[derive(Debug, Serialize)]
15struct DuplicateGroupOutput {
16    /// Group identifier (hash as 32-char hex string for body duplicates, 16-char for others)
17    ///
18    /// For body duplicates with 128-bit `body_hash`, this is formatted as a 32-character
19    /// lowercase hexadecimal string (e.g., "000000000000000012345678abcdef01").
20    /// For signature/struct duplicates, this is a 16-character hex string from the u64 hash.
21    group_id: String,
22    /// Number of duplicates in this group
23    count: usize,
24    /// Symbols in this group
25    symbols: Vec<DuplicateSymbol>,
26}
27
28/// Symbol info for duplicate output
29#[derive(Debug, Serialize)]
30struct DuplicateSymbol {
31    name: String,
32    qualified_name: String,
33    kind: String,
34    file: String,
35    line: u32,
36    language: String,
37}
38
39/// Run the duplicates command.
40///
41/// # Errors
42/// Returns an error if the graph cannot be loaded or duplicates cannot be found.
43pub fn run_duplicates(
44    cli: &Cli,
45    path: Option<&str>,
46    dup_type: &str,
47    threshold: u32,
48    max_results: usize,
49    exact: bool,
50) -> Result<()> {
51    let mut streams = OutputStreams::new();
52
53    // Parse duplicate type
54    let duplicate_type: DuplicateType = dup_type
55        .parse()
56        .with_context(|| format!("Invalid duplicate type: {dup_type}"))?;
57
58    // Find index
59    let search_path = path.map_or_else(
60        || std::env::current_dir().unwrap_or_default(),
61        std::path::PathBuf::from,
62    );
63
64    let index_location = find_nearest_index(&search_path);
65    let Some(ref loc) = index_location else {
66        streams
67            .write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
68        return Ok(());
69    };
70
71    // Load unified graph
72    let graph_config = GraphLoadConfig::default();
73    let graph = load_unified_graph_for_cli(&loc.index_root, &graph_config, cli)
74        .context("Failed to load graph. Run 'sqry index' to build the graph.")?;
75
76    // Build config
77    let config = DuplicateConfig {
78        threshold: if exact {
79            1.0
80        } else {
81            f64::from(threshold) / 100.0
82        },
83        max_results,
84        is_exact_only: exact || threshold >= 100,
85    };
86
87    // Find duplicates using graph-based detection
88    let groups = build_duplicate_groups_graph(duplicate_type, &graph, &config);
89
90    let strings = graph.strings();
91    let files = graph.files();
92
93    // Convert to output format
94    let mut output_groups: Vec<DuplicateGroupOutput> = groups
95        .into_iter()
96        .filter(|g| g.node_ids.len() > 1)
97        .map(|group| {
98            let symbols: Vec<DuplicateSymbol> = group
99                .node_ids
100                .iter()
101                .filter_map(|&node_id| {
102                    let entry = graph.nodes().get(node_id)?;
103
104                    let name = strings
105                        .resolve(entry.name)
106                        .map(|s| s.to_string())
107                        .unwrap_or_default();
108
109                    let qualified_name = entry
110                        .qualified_name
111                        .and_then(|id| strings.resolve(id))
112                        .map_or_else(|| name.clone(), |s| s.to_string());
113
114                    let file_path = files
115                        .resolve(entry.file)
116                        .map(|p| p.display().to_string())
117                        .unwrap_or_default();
118
119                    let language = files
120                        .language_for_file(entry.file)
121                        .map_or_else(|| "Unknown".to_string(), |l| l.to_string());
122
123                    Some(DuplicateSymbol {
124                        name,
125                        qualified_name,
126                        kind: format!("{:?}", entry.kind),
127                        file: file_path,
128                        line: entry.start_line,
129                        language,
130                    })
131                })
132                .collect();
133
134            // Format group_id as hex string
135            // - For body duplicates with 128-bit hash: 32-char hex
136            // - For others: 16-char hex from u64
137            let group_id = if let Some(body_hash) = group.body_hash_128 {
138                format!("{body_hash}") // BodyHash128::Display is 32-char hex
139            } else {
140                format!("{:016x}", group.hash)
141            };
142
143            DuplicateGroupOutput {
144                group_id,
145                count: symbols.len(),
146                symbols,
147            }
148        })
149        .filter(|g| g.count > 1)
150        .collect();
151
152    // Sort by group size (largest first) for deterministic output
153    // Secondary sort by group_id string for stable ordering
154    output_groups.sort_by(|a, b| {
155        b.count
156            .cmp(&a.count)
157            .then_with(|| a.group_id.cmp(&b.group_id))
158    });
159    output_groups.truncate(max_results);
160
161    // Output
162    if cli.json {
163        let json =
164            serde_json::to_string_pretty(&output_groups).context("Failed to serialize to JSON")?;
165        streams.write_result(&json)?;
166    } else {
167        let output = format_duplicates_text(&output_groups, duplicate_type);
168        streams.write_result(&output)?;
169    }
170
171    Ok(())
172}
173
174/// Format duplicates as human-readable text
175fn format_duplicates_text(groups: &[DuplicateGroupOutput], dup_type: DuplicateType) -> String {
176    let mut lines = Vec::new();
177
178    let type_name = match dup_type {
179        DuplicateType::Body => "body",
180        DuplicateType::Signature => "signature",
181        DuplicateType::Struct => "struct",
182    };
183
184    lines.push(format!(
185        "Found {} duplicate groups (type: {})",
186        groups.len(),
187        type_name
188    ));
189    lines.push(String::new());
190
191    for (i, group) in groups.iter().enumerate() {
192        lines.push(format!("Group {} ({} duplicates):", i + 1, group.count));
193        for sym in &group.symbols {
194            lines.push(format!(
195                "  {} [{}] {}:{}",
196                sym.qualified_name, sym.kind, sym.file, sym.line
197            ));
198        }
199        lines.push(String::new());
200    }
201
202    if groups.is_empty() {
203        lines.push("No duplicates found.".to_string());
204    }
205
206    lines.join("\n")
207}