use std::path::PathBuf;
use anyhow::{anyhow, Result};
use clap::Args;
use serde::Serialize;
use tldr_core::analysis::{
compute_dice_similarity, interpret_similarity, normalize_tokens, NormalizationMode,
};
use crate::output::{OutputFormat, OutputWriter};
#[derive(Debug, Args)]
pub struct DiceArgs {
pub target1: String,
pub target2: String,
#[arg(long, default_value = "all")]
pub normalize: String,
#[arg(long = "language")]
pub language: Option<String>,
#[arg(short, long, default_value = "json")]
pub output: String,
}
#[derive(Debug)]
enum Target {
File(PathBuf),
Function(PathBuf, String),
Block(PathBuf, usize, usize),
}
#[derive(Debug, Serialize)]
struct DiceSimilarityReport {
target1: String,
target2: String,
dice_coefficient: f64,
interpretation: String,
tokens1_count: usize,
tokens2_count: usize,
}
impl DiceArgs {
pub fn run(&self, format: OutputFormat, quiet: bool) -> Result<()> {
let writer = OutputWriter::new(format, quiet);
writer.progress(&format!(
"Comparing similarity between {} and {}...",
self.target1, self.target2
));
let target1 = parse_target(&self.target1)?;
let target2 = parse_target(&self.target2)?;
let normalization =
NormalizationMode::parse(&self.normalize).unwrap_or(NormalizationMode::All);
let (source1, lang1) = get_source(&target1, self.language.as_deref())?;
let (source2, lang2) = get_source(&target2, self.language.as_deref())?;
let tokens1 = normalize_tokens(&source1, &lang1, normalization)
.map_err(|e| anyhow!("Failed to tokenize target1: {}", e))?;
let tokens2 = normalize_tokens(&source2, &lang2, normalization)
.map_err(|e| anyhow!("Failed to tokenize target2: {}", e))?;
let dice = compute_dice_similarity(&tokens1, &tokens2);
let report = DiceSimilarityReport {
target1: self.target1.clone(),
target2: self.target2.clone(),
dice_coefficient: dice,
interpretation: interpret_similarity(dice),
tokens1_count: tokens1.len(),
tokens2_count: tokens2.len(),
};
let effective_format = match self.output.as_str() {
"text" => OutputFormat::Text,
"json" => format,
_ => format,
};
if matches!(effective_format, OutputFormat::Text) {
let text = format_dice_text(&report);
writer.write_text(&text)?;
} else {
writer.write(&report)?;
}
Ok(())
}
}
fn parse_target(s: &str) -> Result<Target> {
if let Some((path, func)) = s.split_once("::") {
return Ok(Target::Function(PathBuf::from(path), func.to_string()));
}
let parts: Vec<&str> = s.rsplitn(3, ':').collect();
if parts.len() == 3 {
if let (Ok(end), Ok(start)) = (parts[0].parse::<usize>(), parts[1].parse::<usize>()) {
return Ok(Target::Block(PathBuf::from(parts[2]), start, end));
}
}
Ok(Target::File(PathBuf::from(s)))
}
fn get_source(target: &Target, lang_hint: Option<&str>) -> Result<(String, String)> {
match target {
Target::File(path) => {
let source = std::fs::read_to_string(path)
.map_err(|e| anyhow!("Failed to read {}: {}", path.display(), e))?;
let lang = lang_hint
.map(String::from)
.or_else(|| detect_language(path))
.ok_or_else(|| anyhow!("Could not detect language for {}", path.display()))?;
Ok((source, lang))
}
Target::Function(path, _func_name) => {
let source = std::fs::read_to_string(path)
.map_err(|e| anyhow!("Failed to read {}: {}", path.display(), e))?;
let lang = lang_hint
.map(String::from)
.or_else(|| detect_language(path))
.ok_or_else(|| anyhow!("Could not detect language"))?;
Ok((source, lang))
}
Target::Block(path, start, end) => {
let source = std::fs::read_to_string(path)
.map_err(|e| anyhow!("Failed to read {}: {}", path.display(), e))?;
let lines: Vec<&str> = source.lines().collect();
let start_idx = start.saturating_sub(1);
let end_idx = (*end).min(lines.len());
let block = lines
.get(start_idx..end_idx)
.map(|l| l.join("\n"))
.unwrap_or_default();
let lang = lang_hint
.map(String::from)
.or_else(|| detect_language(path))
.ok_or_else(|| anyhow!("Could not detect language"))?;
Ok((block, lang))
}
}
}
fn detect_language(path: &std::path::Path) -> Option<String> {
tldr_core::Language::from_path(path).map(|l| l.to_string())
}
fn format_dice_text(report: &DiceSimilarityReport) -> String {
use std::fmt::Write;
let mut output = String::new();
writeln!(output, "Similarity Comparison").unwrap();
writeln!(output, "=====================").unwrap();
writeln!(output).unwrap();
writeln!(
output,
"Target 1: {} ({} tokens)",
report.target1, report.tokens1_count
)
.unwrap();
writeln!(
output,
"Target 2: {} ({} tokens)",
report.target2, report.tokens2_count
)
.unwrap();
writeln!(output).unwrap();
writeln!(
output,
"Dice coefficient: {:.2}%",
report.dice_coefficient * 100.0
)
.unwrap();
writeln!(output, "Interpretation: {}", report.interpretation).unwrap();
output
}