use super::{Collector, CollectorError, CollectorOutput};
use crate::context::Context;
use std::collections::HashMap;
use std::fs;
pub struct DuplicatesCollector;
impl DuplicatesCollector {
pub fn new() -> Self {
Self
}
}
impl Collector for DuplicatesCollector {
fn name(&self) -> &'static str {
"duplicates"
}
fn is_available(&self) -> bool {
true
}
fn collect(&self, ctx: &Context) -> Result<CollectorOutput, CollectorError> {
let start = std::time::Instant::now();
let mut total_lines: u32 = 0;
let mut duplicate_lines: u32 = 0;
let mut files_with_duplicates: u32 = 0;
let mut duplicate_files: Vec<String> = Vec::new();
let mut line_counts: HashMap<String, u32> = HashMap::new();
let mut file_lines: HashMap<String, Vec<String>> = HashMap::new();
if let Ok(entries) = fs::read_dir(&ctx.workspace_root) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file()
&& path.extension().is_some_and(|e| e == "rs")
&& let Ok(content) = fs::read_to_string(&path)
{
let lines: Vec<String> = content.lines().map(|s| s.trim().to_string()).collect();
total_lines += lines.len() as u32;
for line in &lines {
if !line.is_empty() && !line.starts_with("//") && !line.starts_with("/*") {
*line_counts.entry(line.clone()).or_insert(0) += 1;
}
}
file_lines.insert(path.to_string_lossy().to_string(), lines);
}
}
}
let duplicate_line_keys: HashMap<String, u32> = line_counts
.into_iter()
.filter(|(_, count)| *count > 1)
.collect();
for (file, lines) in &file_lines {
let file_dup_lines: Vec<String> = lines
.iter()
.filter(|line| {
!line.is_empty()
&& !line.starts_with("//")
&& !line.starts_with("/*")
&& duplicate_line_keys.get(line.as_str()).is_some_and(|&c| c > 1)
})
.cloned()
.collect();
if !file_dup_lines.is_empty() {
duplicate_lines += file_dup_lines.len() as u32;
files_with_duplicates += 1;
duplicate_files.push(file.clone());
}
}
let duration_ms = start.elapsed().as_millis() as u64;
let status = if files_with_duplicates > 0 {
crate::schema::CollectorStatus::Fail
} else {
crate::schema::CollectorStatus::Pass
};
let details = serde_json::json!({
"totalLines": total_lines,
"duplicateLines": duplicate_lines,
"filesWithDuplicates": files_with_duplicates,
"duplicateFiles": duplicate_files,
});
Ok(CollectorOutput {
status,
duration_ms,
stdout: serde_json::to_string(&details).unwrap_or_default(),
stderr: String::new(),
})
}
}
impl Default for DuplicatesCollector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_duplicates_collector_name() {
let collector = DuplicatesCollector::new();
assert_eq!(collector.name(), "duplicates");
}
#[test]
fn test_duplicates_collector_available() {
let collector = DuplicatesCollector::new();
assert!(collector.is_available());
}
}