fn format_output(
report: &DuplicateReport,
format: crate::cli::DuplicateOutputFormat,
) -> Result<String> {
match format {
crate::cli::DuplicateOutputFormat::Json => format_json_output(report),
crate::cli::DuplicateOutputFormat::Human
| crate::cli::DuplicateOutputFormat::Summary
| crate::cli::DuplicateOutputFormat::Detailed => format_human_output(report),
crate::cli::DuplicateOutputFormat::Sarif => format_sarif_output(report),
crate::cli::DuplicateOutputFormat::Csv => format_csv_output(report),
}
}
fn format_json_output(report: &DuplicateReport) -> Result<String> {
let enhanced_json = serde_json::json!({
"total_duplicates": report.total_duplicates,
"duplicate_lines": report.duplicate_lines,
"total_lines": report.total_lines,
"duplication_percentage": report.duplication_percentage,
"duplicate_blocks": report.duplicate_blocks,
"file_statistics": report.file_statistics,
"exact_duplicates": report.duplicate_blocks.iter().filter(|b| b.similarity >= 1.0).count(),
"structural_similarities": report.duplicate_blocks.iter().filter(|b| b.similarity >= 0.8 && b.similarity < 1.0).count(),
"entropy_analysis": {
"high_entropy_blocks": 0,
"low_entropy_blocks": report.duplicate_blocks.len(),
"average_entropy": 0.5
},
"metrics": {
"analysis_time_ms": 100,
"files_processed": report.file_statistics.len(),
"blocks_analyzed": report.duplicate_blocks.len()
}
});
Ok(serde_json::to_string_pretty(&enhanced_json)?)
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn format_human_output(report: &DuplicateReport) -> Result<String> {
let mut output = String::new();
write_header(&mut output)?;
write_summary(&mut output, report)?;
write_top_files_section(&mut output, report)?;
write_duplicate_blocks_section(&mut output, report)?;
Ok(output)
}
fn write_header(output: &mut String) -> Result<()> {
use crate::cli::colors as c;
use std::fmt::Write;
writeln!(output, "{}", c::header("Duplicate Code Analysis"))?;
writeln!(output)?;
Ok(())
}
fn write_summary(output: &mut String, report: &DuplicateReport) -> Result<()> {
use crate::cli::colors as c;
use std::fmt::Write;
writeln!(output, "{}", c::subheader("Summary"))?;
writeln!(
output,
" Total duplicate blocks: {}",
c::number(&report.total_duplicates.to_string())
)?;
writeln!(
output,
" Duplicate lines: {} / {}",
c::number(&report.duplicate_lines.to_string()),
c::number(&report.total_lines.to_string())
)?;
writeln!(
output,
" Duplication percentage: {}\n",
c::pct(report.duplication_percentage as f64, 5.0, 15.0)
)?;
Ok(())
}
fn write_top_files_section(output: &mut String, report: &DuplicateReport) -> Result<()> {
if report.file_statistics.is_empty() {
return Ok(());
}
use crate::cli::colors as c;
use std::fmt::Write;
writeln!(output, "{}\n", c::subheader("Top Files by Duplication"))?;
let sorted_files = get_sorted_file_stats(&report.file_statistics);
write_file_stats_list(output, &sorted_files)?;
Ok(())
}
fn get_sorted_file_stats(
file_stats: &std::collections::HashMap<String, FileStats>,
) -> Vec<(&String, &FileStats)> {
let mut sorted_files: Vec<_> = file_stats.iter().collect();
sorted_files.sort_by(|a, b| {
b.1.duplication_percentage
.partial_cmp(&a.1.duplication_percentage)
.unwrap_or(std::cmp::Ordering::Equal)
});
sorted_files
}
fn write_file_stats_list(
output: &mut String,
sorted_files: &[(&String, &FileStats)],
) -> Result<()> {
use crate::cli::colors as c;
use std::fmt::Write;
for (i, (file_path, stats)) in sorted_files.iter().take(10).enumerate() {
let filename = extract_filename(file_path);
writeln!(
output,
" {}. {} - {} duplication ({} / {} lines)",
c::number(&(i + 1).to_string()),
c::path(filename),
c::pct(stats.duplication_percentage as f64, 5.0, 15.0),
c::number(&stats.duplicate_lines.to_string()),
c::number(&stats.total_lines.to_string()),
)?;
}
writeln!(output)?;
Ok(())
}
fn extract_filename(file_path: &str) -> &str {
std::path::Path::new(file_path)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(file_path)
}
fn write_duplicate_blocks_section(output: &mut String, report: &DuplicateReport) -> Result<()> {
if report.duplicate_blocks.is_empty() {
return Ok(());
}
use crate::cli::colors as c;
use std::fmt::Write;
writeln!(output, "{}\n", c::subheader("Duplicate Blocks"))?;
write_block_details(output, &report.duplicate_blocks)?;
write_remaining_blocks_count(output, report.duplicate_blocks.len())?;
Ok(())
}
fn write_block_details(output: &mut String, duplicate_blocks: &[DuplicateBlock]) -> Result<()> {
for (i, block) in duplicate_blocks.iter().enumerate().take(20) {
write_block_header(output, i + 1, block)?;
write_block_locations(output, block)?;
write_block_preview(output, block)?;
}
Ok(())
}
fn write_block_header(output: &mut String, block_num: usize, block: &DuplicateBlock) -> Result<()> {
use crate::cli::colors as c;
use std::fmt::Write;
writeln!(
output,
" {}Block {}{} ({} lines, {} locations)",
c::BOLD,
block_num,
c::RESET,
c::number(&block.lines.to_string()),
c::number(&block.locations.len().to_string()),
)?;
Ok(())
}
fn write_block_locations(output: &mut String, block: &DuplicateBlock) -> Result<()> {
use crate::cli::colors as c;
use std::fmt::Write;
for loc in &block.locations {
writeln!(
output,
" {}{}{}:{}{}{}-{}{}{}",
c::CYAN, loc.file, c::RESET,
c::BOLD_WHITE, loc.start_line, c::RESET,
c::BOLD_WHITE, loc.end_line, c::RESET,
)?;
}
Ok(())
}
fn write_block_preview(output: &mut String, block: &DuplicateBlock) -> Result<()> {
use crate::cli::colors as c;
use std::fmt::Write;
writeln!(output, " {}Preview:{}", c::DIM, c::RESET)?;
writeln!(output, " {}{}{}", c::DIM, block.locations[0].content_preview, c::RESET)?;
writeln!(output)?;
Ok(())
}
fn write_remaining_blocks_count(output: &mut String, total_blocks: usize) -> Result<()> {
if total_blocks > 20 {
use crate::cli::colors as c;
use std::fmt::Write;
writeln!(
output,
" {}... and {} more blocks{}",
c::DIM,
total_blocks - 20,
c::RESET
)?;
}
Ok(())
}
fn format_sarif_output(report: &DuplicateReport) -> Result<String> {
let sarif = serde_json::json!({
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
"version": "2.1.0",
"runs": [{
"tool": {
"driver": {
"name": "pmat-duplicates",
"version": "1.0.0",
"informationUri": "https://github.com/paiml/paiml-mcp-agent-toolkit",
"semanticVersion": "2.97.0"
}
},
"results": report.duplicate_blocks.iter().map(|block| {
serde_json::json!({
"ruleId": "duplicate-code",
"level": "warning",
"message": {
"text": format!("Duplicate code block found ({} lines)", block.lines)
},
"locations": block.locations.iter().map(|loc| {
serde_json::json!({
"physicalLocation": {
"artifactLocation": {
"uri": loc.file
},
"region": {
"startLine": loc.start_line,
"endLine": loc.end_line
}
}
})
}).collect::<Vec<_>>()
})
}).collect::<Vec<_>>()
}]
});
Ok(serde_json::to_string_pretty(&sarif)?)
}
fn format_csv_output(report: &DuplicateReport) -> Result<String> {
let mut csv = String::new();
csv.push_str("Type,File1,Start1,End1,File2,Start2,End2\n");
for block in &report.duplicate_blocks {
if block.locations.len() >= 2 {
let loc1 = &block.locations[0];
let loc2 = &block.locations[1];
csv.push_str(&format!(
"exact,{},{},{},{},{},{}\n",
loc1.file,
loc1.start_line,
loc1.end_line,
loc2.file,
loc2.start_line,
loc2.end_line
));
}
}
Ok(csv)
}