1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
//! Clones command - Detect code clones in a codebase
//!
//! Identifies duplicated code fragments using token-based similarity analysis.
//! Supports Type-1 (exact), Type-2 (parameterized), and Type-3 (gapped) clones.
use std::path::PathBuf;
use anyhow::Result;
use clap::Args;
use tldr_core::analysis::{
detect_clones, CloneType, ClonesOptions, NormalizationMode,
};
use crate::output::{
format_clones_dot, format_clones_sarif, format_clones_text, OutputFormat, OutputWriter,
};
/// Detect code clones in a codebase
#[derive(Debug, Args)]
pub struct ClonesArgs {
/// Path to analyze (default: current directory)
#[arg(default_value = ".")]
pub path: PathBuf,
/// Minimum tokens for a clone (default: 25)
#[arg(long, default_value = "25")]
pub min_tokens: usize,
/// Minimum lines for a clone (default: 5)
#[arg(long, default_value = "5")]
pub min_lines: usize,
/// Similarity threshold (0.0-1.0, default: 0.7)
#[arg(short = 't', long, default_value = "0.7")]
pub threshold: f64,
/// Filter by clone type: 1, 2, 3, or all (default: all)
#[arg(long, default_value = "all")]
pub type_filter: String,
/// Normalization mode: none, identifiers, literals, all (default: all)
#[arg(long, default_value = "all")]
pub normalize: String,
/// Filter by language: python, typescript, go, rust
#[arg(long = "language")]
pub language: Option<String>,
/// Output format: json, text, sarif (default: json)
/// Use sarif for IDE/CI integration (GitHub, VS Code, etc.)
#[arg(short, long, default_value = "json")]
pub output: String,
/// Show clone classes (transitive grouping)
#[arg(long)]
pub show_classes: bool,
/// Include clones within the same file
#[arg(long)]
pub include_within_file: bool,
/// Maximum clones to report (default: 20)
#[arg(long, default_value = "20")]
pub max_clones: usize,
/// Maximum files to analyze (default: 1000)
#[arg(long, default_value = "1000")]
pub max_files: usize,
/// Exclude generated files (e.g., *.pb.go, *_generated.ts, vendor/, etc.)
#[arg(long)]
pub exclude_generated: bool,
/// Exclude test files (e.g., test_*.py, *_test.go, *_spec.rb, tests/, __tests__/)
#[arg(long)]
pub exclude_tests: bool,
}
impl ClonesArgs {
/// Run the clones command
pub fn run(&self, format: OutputFormat, quiet: bool) -> Result<()> {
let writer = OutputWriter::new(format, quiet);
writer.progress(&format!("Detecting clones in {}...", self.path.display()));
let normalization =
NormalizationMode::parse(&self.normalize).unwrap_or(NormalizationMode::All);
let type_filter = parse_type_filter(&self.type_filter);
let options = ClonesOptions {
min_tokens: self.min_tokens,
min_lines: self.min_lines,
threshold: self.threshold,
type_filter,
normalization,
language: self.language.clone(),
show_classes: self.show_classes,
include_within_file: self.include_within_file,
max_clones: self.max_clones,
max_files: self.max_files,
exclude_generated: self.exclude_generated,
exclude_tests: self.exclude_tests,
};
let report = detect_clones(&self.path, &options)?;
// Determine output format from argument or global format
let effective_format = match self.output.as_str() {
"text" => OutputFormat::Text,
"sarif" => OutputFormat::Sarif,
"dot" => {
// DOT format for graph visualization
let dot = format_clones_dot(&report);
writer.write_text(&dot)?;
return Ok(());
}
"json" => format,
_ => format,
};
match effective_format {
OutputFormat::Text => {
let text = format_clones_text(&report);
writer.write_text(&text)?;
}
OutputFormat::Sarif => {
let sarif = format_clones_sarif(&report);
writer.write_text(&sarif)?;
}
_ => {
writer.write(&report)?;
}
}
Ok(())
}
}
/// Parse type filter string into CloneType
fn parse_type_filter(s: &str) -> Option<CloneType> {
match s {
"1" => Some(CloneType::Type1),
"2" => Some(CloneType::Type2),
"3" => Some(CloneType::Type3),
"all" | "" => None,
_ => None,
}
}
// Note: format_clones_text and format_clones_dot are imported from crate::output
// They provide improved formatting with:
// - S8-P3-T5: Human-readable clone type descriptions
// - S8-P3-T7: Helpful hints for empty results
// - S8-P3-T11: DOT output with proper escaping