use crate::violation::Violation;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct DeduplicationConfig {
pub enabled: bool,
pub rule_precedence: HashMap<String, u32>,
}
impl Default for DeduplicationConfig {
fn default() -> Self {
let mut rule_precedence = HashMap::new();
rule_precedence.insert("MDBOOK001".to_string(), 100); rule_precedence.insert("MD040".to_string(), 50);
Self {
enabled: true,
rule_precedence,
}
}
}
pub struct RuleOverlaps {
overlaps: HashMap<String, Vec<String>>,
}
impl Default for RuleOverlaps {
fn default() -> Self {
let mut overlaps = HashMap::new();
overlaps.insert(
"missing_code_block_language".to_string(),
vec!["MD040".to_string(), "MDBOOK001".to_string()],
);
Self { overlaps }
}
}
impl RuleOverlaps {
pub fn get_overlapping_rules(&self, rule_id: &str) -> Vec<String> {
for rule_list in self.overlaps.values() {
if rule_list.contains(&rule_id.to_string()) {
return rule_list
.iter()
.filter(|id| *id != rule_id)
.cloned()
.collect();
}
}
Vec::new()
}
pub fn is_overlapping_violation(&self, violation: &Violation) -> Option<String> {
let signature = self.get_violation_signature(violation);
if self.overlaps.contains_key(&signature) {
Some(signature)
} else {
None
}
}
fn get_violation_signature(&self, violation: &Violation) -> String {
match violation.rule_id.as_str() {
"MD040" | "MDBOOK001" => "missing_code_block_language".to_string(),
_ => format!("unique_{}", violation.rule_id),
}
}
}
pub fn deduplicate_violations(
violations: Vec<Violation>,
config: &DeduplicationConfig,
) -> Vec<Violation> {
if !config.enabled {
return violations;
}
let overlaps = RuleOverlaps::default();
let mut deduplicated = Vec::new();
let mut violation_groups: HashMap<String, Vec<Violation>> = HashMap::new();
for violation in violations {
let group_key = format!(
"{}:{}:{}",
violation.line,
violation.column,
overlaps.get_violation_signature(&violation)
);
violation_groups
.entry(group_key)
.or_default()
.push(violation);
}
for (_, mut group) in violation_groups {
if group.len() == 1 {
deduplicated.extend(group);
} else {
let signature = overlaps.get_violation_signature(&group[0]);
if overlaps.overlaps.contains_key(&signature) {
group.sort_by(|a, b| {
let precedence_a = config.rule_precedence.get(&a.rule_id).unwrap_or(&0);
let precedence_b = config.rule_precedence.get(&b.rule_id).unwrap_or(&0);
precedence_b.cmp(precedence_a) });
deduplicated.push(group.into_iter().next().unwrap());
} else {
deduplicated.extend(group);
}
}
}
deduplicated.sort_by(|a, b| a.line.cmp(&b.line).then_with(|| a.column.cmp(&b.column)));
deduplicated
}
#[cfg(test)]
mod tests {
use super::*;
use crate::violation::{Severity, Violation};
fn create_test_violation(
rule_id: &str,
line: usize,
column: usize,
message: &str,
) -> Violation {
Violation {
rule_id: rule_id.to_string(),
rule_name: "test".to_string(),
message: message.to_string(),
line,
column,
severity: Severity::Warning,
fix: None,
}
}
#[test]
fn test_no_duplicates() {
let violations = vec![
create_test_violation("MD001", 1, 1, "Test message 1"),
create_test_violation("MD002", 2, 1, "Test message 2"),
];
let config = DeduplicationConfig::default();
let result = deduplicate_violations(violations.clone(), &config);
assert_eq!(result.len(), 2);
assert_eq!(result, violations);
}
#[test]
fn test_md040_mdbook001_deduplication() {
let violations = vec![
create_test_violation(
"MD040",
5,
1,
"Fenced code block is missing language specification",
),
create_test_violation(
"MDBOOK001",
5,
1,
"Code block is missing language tag for syntax highlighting",
),
];
let config = DeduplicationConfig::default();
let result = deduplicate_violations(violations, &config);
assert_eq!(result.len(), 1);
assert_eq!(result[0].rule_id, "MDBOOK001");
assert_eq!(result[0].line, 5);
}
#[test]
fn test_multiple_locations() {
let violations = vec![
create_test_violation("MD040", 5, 1, "Missing language at line 5"),
create_test_violation("MDBOOK001", 5, 1, "Missing language at line 5"),
create_test_violation("MD040", 10, 1, "Missing language at line 10"),
create_test_violation("MDBOOK001", 10, 1, "Missing language at line 10"),
];
let config = DeduplicationConfig::default();
let result = deduplicate_violations(violations, &config);
assert_eq!(result.len(), 2);
assert!(result.iter().all(|v| v.rule_id == "MDBOOK001"));
assert_eq!(result[0].line, 5);
assert_eq!(result[1].line, 10);
}
#[test]
fn test_deduplication_disabled() {
let violations = vec![
create_test_violation("MD040", 5, 1, "Message 1"),
create_test_violation("MDBOOK001", 5, 1, "Message 2"),
];
let config = DeduplicationConfig {
enabled: false,
..Default::default()
};
let result = deduplicate_violations(violations.clone(), &config);
assert_eq!(result.len(), 2);
assert_eq!(result, violations);
}
#[test]
fn test_custom_precedence() {
let violations = vec![
create_test_violation("MD040", 5, 1, "MD040 message"),
create_test_violation("MDBOOK001", 5, 1, "MDBOOK001 message"),
];
let mut config = DeduplicationConfig::default();
config.rule_precedence.insert("MD040".to_string(), 200);
config.rule_precedence.insert("MDBOOK001".to_string(), 100);
let result = deduplicate_violations(violations, &config);
assert_eq!(result.len(), 1);
assert_eq!(result[0].rule_id, "MD040");
}
#[test]
fn test_rule_overlaps() {
let overlaps = RuleOverlaps::default();
let md040_overlaps = overlaps.get_overlapping_rules("MD040");
assert_eq!(md040_overlaps, vec!["MDBOOK001"]);
let mdbook001_overlaps = overlaps.get_overlapping_rules("MDBOOK001");
assert_eq!(mdbook001_overlaps, vec!["MD040"]);
let no_overlaps = overlaps.get_overlapping_rules("MD001");
assert!(no_overlaps.is_empty());
}
}