1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
//! Clones command - Detect code clones in a codebase
//!
//! Identifies duplicated code fragments using token-based similarity analysis.
//! Supports Type-1 (exact), Type-2 (parameterized), and Type-3 (gapped) clones.
use std::path::PathBuf;
use anyhow::Result;
use clap::Args;
use tldr_core::analysis::{detect_clones, CloneType, ClonesOptions, NormalizationMode};
use tldr_core::Language;
use crate::output::{
format_clones_dot, format_clones_sarif, format_clones_text, OutputFormat, OutputWriter,
};
/// Detect code clones in a codebase
#[derive(Debug, Args)]
pub struct ClonesArgs {
/// Path to analyze (default: current directory)
#[arg(default_value = ".")]
pub path: PathBuf,
/// Minimum tokens for a clone (default: 25)
#[arg(long, default_value = "25")]
pub min_tokens: usize,
/// Minimum lines for a clone (default: 5)
#[arg(long, default_value = "5")]
pub min_lines: usize,
/// Similarity threshold (0.0-1.0, default: 0.7)
#[arg(short = 't', long, default_value = "0.7")]
pub threshold: f64,
/// Filter by clone type: 1, 2, 3, or all (default: all)
#[arg(long, default_value = "all")]
pub type_filter: String,
/// Normalization mode: none, identifiers, literals, all (default: all)
#[arg(long, default_value = "all")]
pub normalize: String,
/// Filter by language: python, typescript, go, rust
#[arg(long = "language")]
pub language: Option<String>,
/// Output format: json, text, sarif (default: json)
/// Use sarif for IDE/CI integration (GitHub, VS Code, etc.)
#[arg(short, long, default_value = "json")]
pub output: String,
/// Show clone classes (transitive grouping)
#[arg(long)]
pub show_classes: bool,
/// Include clones within the same file
#[arg(long)]
pub include_within_file: bool,
/// Maximum clones to report (default: 20)
#[arg(long, default_value = "20")]
pub max_clones: usize,
/// Maximum files to analyze (default: 1000)
#[arg(long, default_value = "1000")]
pub max_files: usize,
/// Exclude generated files (e.g., *.pb.go, *_generated.ts, vendor/, etc.)
#[arg(long)]
pub exclude_generated: bool,
/// Exclude test files (e.g., test_*.py, *_test.go, *_spec.rb, tests/, __tests__/)
#[arg(long)]
pub exclude_tests: bool,
}
impl ClonesArgs {
/// Run the clones command
pub fn run(
&self,
format: OutputFormat,
quiet: bool,
global_lang: Option<Language>,
) -> Result<()> {
let writer = OutputWriter::new(format, quiet);
writer.progress(&format!("Detecting clones in {}...", self.path.display()));
let normalization =
NormalizationMode::parse(&self.normalize).unwrap_or(NormalizationMode::All);
let type_filter = parse_type_filter(&self.type_filter);
// language-adapter-fixes-v1 (P13.AGG13-10): the global `-l/--lang`
// flag (defined in `Cli` and honoured by 30+ sibling commands) was
// silently ignored by `clones` because the dispatcher discarded
// `cli.lang` when invoking `Clones(args).run(...)`. Honour it here
// by mapping the parsed `Language` enum back to the wire-format
// string `ClonesOptions` already accepts. The local `--language`
// flag wins when both are set so existing scripts keep working.
let effective_language = self
.language
.clone()
.or_else(|| global_lang.map(|l| l.as_str().to_string()));
let options = ClonesOptions {
min_tokens: self.min_tokens,
min_lines: self.min_lines,
threshold: self.threshold,
type_filter,
normalization,
language: effective_language,
show_classes: self.show_classes,
include_within_file: self.include_within_file,
max_clones: self.max_clones,
max_files: self.max_files,
exclude_generated: self.exclude_generated,
exclude_tests: self.exclude_tests,
};
let report = detect_clones(&self.path, &options)?;
// Determine output format from argument or global format
let effective_format = match self.output.as_str() {
"text" => OutputFormat::Text,
"sarif" => OutputFormat::Sarif,
"dot" => {
// DOT format for graph visualization
let dot = format_clones_dot(&report);
writer.write_text(&dot)?;
return Ok(());
}
"json" => format,
_ => format,
};
match effective_format {
OutputFormat::Text => {
let text = format_clones_text(&report);
writer.write_text(&text)?;
}
OutputFormat::Sarif => {
let sarif = format_clones_sarif(&report);
writer.write_text(&sarif)?;
}
// schema-cleanup-v2 (P2.BUG-6): the global `--format dot`
// path previously fell through to the JSON arm below, so
// `tldr clones --format dot` silently emitted JSON with
// exit 0 — even though the per-command DOT validator
// (`validate_format_for_command`) and the `secure --format
// dot` error message both advertised clones as DOT-supported.
// The dedicated emitter (`format_clones_dot`) was already
// present and reachable via the legacy `--output dot` flag;
// this arm wires the canonical `--format dot` route to it.
OutputFormat::Dot => {
let dot = format_clones_dot(&report);
writer.write_text(&dot)?;
}
_ => {
writer.write(&report)?;
}
}
Ok(())
}
}
/// Parse type filter string into CloneType
fn parse_type_filter(s: &str) -> Option<CloneType> {
match s {
"1" => Some(CloneType::Type1),
"2" => Some(CloneType::Type2),
"3" => Some(CloneType::Type3),
"all" | "" => None,
_ => None,
}
}
// Note: format_clones_text and format_clones_dot are imported from crate::output
// They provide improved formatting with:
// - S8-P3-T5: Human-readable clone type descriptions
// - S8-P3-T7: Helpful hints for empty results
// - S8-P3-T11: DOT output with proper escaping