use std::collections::HashMap;
use std::fmt::Write as _;
use std::process::Command;
use anyhow::{bail, Context, Result};
use serde::Deserialize;
use crate::CpitdCommands;
use crate::db::Database;
pub fn run(command: CpitdCommands, db: &Database, quiet: bool) -> Result<()> {
match command {
CpitdCommands::Scan {
paths,
min_tokens,
ignore,
dry_run,
} => scan(db, &paths, min_tokens, &ignore, dry_run, quiet),
CpitdCommands::Status => status(db),
CpitdCommands::Clear => clear(db),
}
}
use crate::utils::format_issue_id;
#[derive(Debug, Deserialize)]
struct CpitdOutput {
clone_reports: Vec<CpitdCloneReport>,
total_pairs: usize,
}
#[derive(Debug, Deserialize)]
struct CpitdCloneReport {
file_a: String,
file_b: String,
total_cloned_lines: usize,
groups: Vec<CpitdCloneGroup>,
}
#[derive(Debug, Deserialize)]
struct CpitdCloneGroup {
lines_a: Vec<usize>,
lines_b: Vec<usize>,
line_count: usize,
token_count: usize,
}
fn find_cpitd() -> bool {
Command::new("cpitd")
.arg("--version")
.output()
.is_ok_and(|o| o.status.success())
}
fn suggest_install() -> Result<()> {
bail!(
"cpitd is not installed or not found in PATH.\n\n\
cpitd (Copy Paste Is The Devil) is a language-agnostic code clone\n\
detector that integrates with crosslink to track duplicated code\n\
as issues.\n\n\
Run `crosslink init --force` to auto-install, or install manually:\n\n\
\x20 pip install cpitd\n\n\
Or visit: https://github.com/scythia-marrow/cpitd"
)
}
fn run_cpitd(paths: &[String], min_tokens: u32, ignore_patterns: &[String]) -> Result<CpitdOutput> {
let mut cmd = Command::new("cpitd");
if paths.is_empty() {
cmd.arg(".");
} else {
for p in paths {
cmd.arg(p);
}
}
cmd.arg("--format").arg("json");
cmd.arg("--min-tokens").arg(min_tokens.to_string());
for pattern in ignore_patterns {
cmd.arg("--ignore").arg(pattern);
}
let output = cmd.output().context("Failed to execute cpitd")?;
let stdout = String::from_utf8(output.stdout).context("cpitd output is not valid UTF-8")?;
if stdout.trim().is_empty() {
let stderr = String::from_utf8_lossy(&output.stderr);
if stderr.trim().is_empty() {
return Ok(CpitdOutput {
clone_reports: vec![],
total_pairs: 0,
});
}
bail!("cpitd produced no output. stderr: {}", stderr.trim());
}
serde_json::from_str(&stdout).context("Failed to parse cpitd JSON output")
}
fn dedup_marker(file_a: &str, file_b: &str) -> String {
let (a, b) = if file_a <= file_b {
(file_a, file_b)
} else {
(file_b, file_a)
};
format!("<!-- cpitd:file_a={a}:file_b={b} -->")
}
fn find_existing_clone_issue(db: &Database, file_a: &str, file_b: &str) -> Result<Option<i64>> {
let marker = dedup_marker(file_a, file_b);
let issues = db.list_issues(Some("open"), Some("cpitd"), None)?;
for issue in issues {
if let Some(ref desc) = issue.description {
if desc.contains(&marker) {
return Ok(Some(issue.id));
}
}
}
Ok(None)
}
fn shorten_path(path: &str) -> &str {
std::path::Path::new(path)
.file_name()
.and_then(|f| f.to_str())
.unwrap_or(path)
}
fn format_clone_description(report: &CpitdCloneReport) -> String {
let marker = dedup_marker(&report.file_a, &report.file_b);
let mut desc = format!(
"{}\n\n\
Detected code clones between:\n\
- `{}`\n\
- `{}`\n\n\
Total cloned lines: {}\n\n\
Clone groups:\n",
marker, report.file_a, report.file_b, report.total_cloned_lines,
);
for (i, group) in report.groups.iter().enumerate() {
let _ = writeln!(
desc,
"{}. Lines {}-{} <-> Lines {}-{} ({} lines, {} tokens)",
i + 1,
group.lines_a[0],
group.lines_a[1],
group.lines_b[0],
group.lines_b[1],
group.line_count,
group.token_count,
);
}
desc.push_str("\nConsider extracting shared logic into a common function or module.");
desc
}
fn create_clone_issue(db: &Database, report: &CpitdCloneReport, quiet: bool) -> Result<i64> {
let title = format!(
"Code clone: {} <-> {} ({} lines)",
shorten_path(&report.file_a),
shorten_path(&report.file_b),
report.total_cloned_lines,
);
let description = format_clone_description(report);
let id = db.create_issue(&title, Some(&description), "low")?;
db.add_label(id, "cpitd")?;
db.add_label(id, "refactor")?;
if !quiet {
println!(" Created issue {}: {}", format_issue_id(id), title);
}
Ok(id)
}
fn relate_clone_issues(db: &Database, created: &[(i64, String, String)]) {
let mut file_to_issues: HashMap<&str, Vec<i64>> = HashMap::new();
for (id, file_a, file_b) in created {
file_to_issues.entry(file_a).or_default().push(*id);
file_to_issues.entry(file_b).or_default().push(*id);
}
for ids in file_to_issues.values() {
for i in 0..ids.len() {
for j in (i + 1)..ids.len() {
let _ = db.add_relation(ids[i], ids[j]);
}
}
}
}
pub fn scan(
db: &Database,
paths: &[String],
min_tokens: u32,
ignore_patterns: &[String],
dry_run: bool,
quiet: bool,
) -> Result<()> {
if !find_cpitd() {
return suggest_install();
}
if !quiet {
println!("Running cpitd clone detection...");
}
let output = run_cpitd(paths, min_tokens, ignore_patterns)?;
if output.clone_reports.is_empty() {
if !quiet {
println!("No code clones detected.");
}
return Ok(());
}
if !quiet {
println!("Found {} clone pair(s).\n", output.total_pairs);
}
if dry_run {
for report in &output.clone_reports {
println!(
" Would create: {} <-> {} ({} lines, {} group(s))",
report.file_a,
report.file_b,
report.total_cloned_lines,
report.groups.len(),
);
}
return Ok(());
}
let mut created_count = 0usize;
let mut updated_count = 0usize;
let mut created_ids: Vec<(i64, String, String)> = Vec::new();
for report in &output.clone_reports {
if let Some(existing_id) = find_existing_clone_issue(db, &report.file_a, &report.file_b)? {
let comment = format!(
"[cpitd rescan] {} total cloned lines, {} group(s)",
report.total_cloned_lines,
report.groups.len(),
);
db.add_comment(existing_id, &comment, "note")?;
updated_count += 1;
if !quiet {
println!(
" Updated issue {} (clone still present)",
format_issue_id(existing_id)
);
}
} else {
let id = create_clone_issue(db, report, quiet)?;
created_ids.push((id, report.file_a.clone(), report.file_b.clone()));
created_count += 1;
}
}
if created_ids.len() > 1 {
relate_clone_issues(db, &created_ids);
}
if !quiet {
println!("\ncpitd scan complete: {created_count} created, {updated_count} updated");
}
Ok(())
}
pub fn status(db: &Database) -> Result<()> {
let issues = db.list_issues(Some("open"), Some("cpitd"), None)?;
if issues.is_empty() {
println!("No open cpitd clone issues.");
} else {
println!("{} open clone issue(s):\n", issues.len());
for issue in &issues {
println!(" {:<5} {}", format_issue_id(issue.id), issue.title);
}
}
Ok(())
}
pub fn clear(db: &Database) -> Result<()> {
let issues = db.list_issues(Some("open"), Some("cpitd"), None)?;
if issues.is_empty() {
println!("No open cpitd clone issues to close.");
return Ok(());
}
let count = issues.len();
for issue in &issues {
db.close_issue(issue.id)?;
}
println!("Closed {count} cpitd clone issue(s).");
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_cpitd_json() {
let json = r#"{
"clone_reports": [
{
"file_a": "src/foo.rs",
"file_b": "src/bar.rs",
"total_cloned_lines": 15,
"groups": [
{
"lines_a": [10, 24],
"lines_b": [30, 44],
"line_count": 15,
"token_count": 120
}
]
}
],
"total_pairs": 1
}"#;
let output: CpitdOutput = serde_json::from_str(json).unwrap();
assert_eq!(output.total_pairs, 1);
assert_eq!(output.clone_reports.len(), 1);
assert_eq!(output.clone_reports[0].file_a, "src/foo.rs");
assert_eq!(output.clone_reports[0].file_b, "src/bar.rs");
assert_eq!(output.clone_reports[0].total_cloned_lines, 15);
assert_eq!(output.clone_reports[0].groups.len(), 1);
assert_eq!(output.clone_reports[0].groups[0].lines_a, vec![10, 24]);
assert_eq!(output.clone_reports[0].groups[0].line_count, 15);
assert_eq!(output.clone_reports[0].groups[0].token_count, 120);
}
#[test]
fn test_parse_cpitd_empty() {
let json = r#"{"clone_reports": [], "total_pairs": 0}"#;
let output: CpitdOutput = serde_json::from_str(json).unwrap();
assert_eq!(output.total_pairs, 0);
assert!(output.clone_reports.is_empty());
}
#[test]
fn test_dedup_marker_commutative() {
let m1 = dedup_marker("src/a.rs", "src/b.rs");
let m2 = dedup_marker("src/b.rs", "src/a.rs");
assert_eq!(m1, m2);
}
#[test]
fn test_dedup_marker_contains_paths() {
let marker = dedup_marker("foo.py", "bar.py");
assert!(marker.contains("bar.py"));
assert!(marker.contains("foo.py"));
assert!(marker.starts_with("<!-- cpitd:"));
assert!(marker.ends_with(" -->"));
}
#[test]
fn test_shorten_path_basic() {
assert_eq!(shorten_path("src/commands/foo.rs"), "foo.rs");
assert_eq!(shorten_path("bar.py"), "bar.py");
assert_eq!(shorten_path("a/b/c/d.txt"), "d.txt");
}
#[test]
fn test_format_clone_description() {
let report = CpitdCloneReport {
file_a: "src/foo.rs".to_string(),
file_b: "src/bar.rs".to_string(),
total_cloned_lines: 10,
groups: vec![CpitdCloneGroup {
lines_a: vec![1, 10],
lines_b: vec![20, 29],
line_count: 10,
token_count: 80,
}],
};
let desc = format_clone_description(&report);
assert!(desc.contains("<!-- cpitd:"));
assert!(desc.contains("src/bar.rs"));
assert!(desc.contains("src/foo.rs"));
assert!(desc.contains("10"));
assert!(desc.contains("80 tokens"));
}
}