use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ErrorHistogram {
pub total_files: usize,
pub total_loc: usize,
pub by_error_code: HashMap<String, usize>,
pub by_category: HashMap<ErrorCategory, usize>,
pub construct_coverage: HashMap<CConstruct, usize>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum ErrorCategory {
Ownership,
Borrowing,
Lifetime,
Type,
Other,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum CConstruct {
RawPointer,
Array,
MallocFree,
FunctionPointer,
Struct,
Union,
Enum,
Typedef,
Macro,
Goto,
Switch,
ForLoop,
WhileLoop,
DoWhile,
}
impl ErrorHistogram {
pub fn new() -> Self {
Self::default()
}
pub fn record_error(&mut self, error_code: &str) {
*self
.by_error_code
.entry(error_code.to_string())
.or_default() += 1;
let category = categorize_error(error_code);
*self.by_category.entry(category).or_default() += 1;
}
pub fn record_construct(&mut self, construct: CConstruct) {
*self.construct_coverage.entry(construct).or_default() += 1;
}
pub fn total_errors(&self) -> usize {
self.by_error_code.values().sum()
}
pub fn error_distribution(&self) -> HashMap<String, f64> {
let total = self.total_errors() as f64;
if total == 0.0 {
return HashMap::new();
}
self.by_error_code
.iter()
.map(|(code, count)| (code.clone(), *count as f64 / total))
.collect()
}
pub fn category_distribution(&self) -> HashMap<ErrorCategory, f64> {
let total: usize = self.by_category.values().sum();
if total == 0 {
return HashMap::new();
}
self.by_category
.iter()
.map(|(cat, count)| (*cat, *count as f64 / total as f64))
.collect()
}
}
pub fn categorize_error(error_code: &str) -> ErrorCategory {
match error_code {
"E0382" | "E0505" | "E0506" | "E0507" => ErrorCategory::Ownership,
"E0499" | "E0502" | "E0503" | "E0500" => ErrorCategory::Borrowing,
"E0597" | "E0515" | "E0716" | "E0623" | "E0106" => ErrorCategory::Lifetime,
"E0308" | "E0277" | "E0369" => ErrorCategory::Type,
_ => ErrorCategory::Other,
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiversityMetrics {
pub js_divergence: f64,
pub kl_divergence: f64,
pub overlap: f64,
pub primary_unique_errors: usize,
pub comparison_unique_errors: usize,
pub shared_error_codes: usize,
pub coverage_ratio: f64,
pub passes_threshold: bool,
}
impl DiversityMetrics {
pub fn is_acceptable(&self) -> bool {
self.js_divergence < 0.15 && self.coverage_ratio > 0.6
}
}
pub fn compare_histograms(
primary: &ErrorHistogram,
comparison: &ErrorHistogram,
) -> DiversityMetrics {
let p_dist = primary.error_distribution();
let q_dist = comparison.error_distribution();
let all_codes: std::collections::HashSet<_> =
p_dist.keys().chain(q_dist.keys()).cloned().collect();
let shared: usize = p_dist.keys().filter(|k| q_dist.contains_key(*k)).count();
let js_divergence = jensen_shannon_divergence(&p_dist, &q_dist, &all_codes);
let kl_divergence = kl_divergence_safe(&p_dist, &q_dist, &all_codes);
let overlap = if p_dist.len().min(q_dist.len()) > 0 {
shared as f64 / p_dist.len().min(q_dist.len()) as f64
} else {
0.0
};
let coverage_ratio = if all_codes.is_empty() {
1.0 } else {
shared as f64 / all_codes.len() as f64
};
let metrics = DiversityMetrics {
js_divergence,
kl_divergence,
overlap,
primary_unique_errors: p_dist.len(),
comparison_unique_errors: q_dist.len(),
shared_error_codes: shared,
coverage_ratio,
passes_threshold: false, };
DiversityMetrics {
passes_threshold: metrics.is_acceptable(),
..metrics
}
}
fn jensen_shannon_divergence(
p: &HashMap<String, f64>,
q: &HashMap<String, f64>,
all_keys: &std::collections::HashSet<String>,
) -> f64 {
if all_keys.is_empty() {
return 0.0;
}
let mut m = HashMap::new();
for key in all_keys {
let p_val = p.get(key).copied().unwrap_or(0.0);
let q_val = q.get(key).copied().unwrap_or(0.0);
m.insert(key.clone(), (p_val + q_val) / 2.0);
}
let kl_pm = kl_divergence_safe(p, &m, all_keys);
let kl_qm = kl_divergence_safe(q, &m, all_keys);
(kl_pm + kl_qm) / 2.0
}
fn kl_divergence_safe(
p: &HashMap<String, f64>,
q: &HashMap<String, f64>,
all_keys: &std::collections::HashSet<String>,
) -> f64 {
if all_keys.is_empty() {
return 0.0;
}
let epsilon = 1e-10; let mut kl = 0.0;
for key in all_keys {
let p_val = p.get(key).copied().unwrap_or(0.0);
let q_val = q.get(key).copied().unwrap_or(0.0);
if p_val > epsilon {
let q_smooth = q_val.max(epsilon);
kl += p_val * (p_val / q_smooth).ln();
}
}
kl
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiversityConfig {
pub max_js_divergence: f64,
pub min_coverage_ratio: f64,
pub min_error_codes: usize,
}
impl Default for DiversityConfig {
fn default() -> Self {
Self {
max_js_divergence: 0.15,
min_coverage_ratio: 0.6,
min_error_codes: 5,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiversityValidation {
pub primary_histogram: ErrorHistogram,
pub comparisons: Vec<(String, ErrorHistogram, DiversityMetrics)>,
pub passed: bool,
pub issues: Vec<String>,
}
impl DiversityValidation {
pub fn new(primary: ErrorHistogram) -> Self {
Self {
primary_histogram: primary,
comparisons: Vec::new(),
passed: true,
issues: Vec::new(),
}
}
pub fn add_comparison(
&mut self,
name: &str,
histogram: ErrorHistogram,
config: &DiversityConfig,
) {
let metrics = compare_histograms(&self.primary_histogram, &histogram);
if metrics.js_divergence > config.max_js_divergence {
self.issues.push(format!(
"{}: JS divergence {:.3} exceeds threshold {:.3}",
name, metrics.js_divergence, config.max_js_divergence
));
self.passed = false;
}
if metrics.coverage_ratio < config.min_coverage_ratio {
self.issues.push(format!(
"{}: Coverage ratio {:.2} below threshold {:.2}",
name, metrics.coverage_ratio, config.min_coverage_ratio
));
self.passed = false;
}
self.comparisons
.push((name.to_string(), histogram, metrics));
}
pub fn to_report(&self) -> String {
let mut report = String::new();
report.push_str("## Corpus Diversity Validation Report\n\n");
report.push_str(&format!(
"**Primary Corpus**: {} files, {} LOC, {} errors\n\n",
self.primary_histogram.total_files,
self.primary_histogram.total_loc,
self.primary_histogram.total_errors()
));
if self.comparisons.is_empty() {
report.push_str("*No comparison corpora provided*\n\n");
} else {
report.push_str("### Comparison Results\n\n");
report.push_str("| Corpus | JS Divergence | Coverage | Status |\n");
report.push_str("|--------|---------------|----------|--------|\n");
for (name, _, metrics) in &self.comparisons {
let status = if metrics.passes_threshold {
"โ
PASS"
} else {
"โ FAIL"
};
report.push_str(&format!(
"| {} | {:.3} | {:.1}% | {} |\n",
name,
metrics.js_divergence,
metrics.coverage_ratio * 100.0,
status
));
}
}
report.push_str("\n### Overall Status: ");
if self.passed {
report.push_str("โ
PASSED\n");
} else {
report.push_str("โ FAILED\n\n");
report.push_str("**Issues:**\n");
for issue in &self.issues {
report.push_str(&format!("- {}\n", issue));
}
}
report
}
}
pub fn analyze_corpus(path: &Path) -> Result<ErrorHistogram, std::io::Error> {
use std::fs;
let mut histogram = ErrorHistogram::new();
for entry in walkdir::WalkDir::new(path)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().and_then(|s| s.to_str()) == Some("c"))
{
histogram.total_files += 1;
if let Ok(content) = fs::read_to_string(entry.path()) {
histogram.total_loc += content.lines().count();
if content.contains("malloc(") || content.contains("calloc(") {
histogram.record_construct(CConstruct::MallocFree);
}
if content.contains("*") && (content.contains("int *") || content.contains("char *")) {
histogram.record_construct(CConstruct::RawPointer);
}
if content.contains("struct ") {
histogram.record_construct(CConstruct::Struct);
}
if content.contains("typedef ") {
histogram.record_construct(CConstruct::Typedef);
}
if content.contains("switch (") || content.contains("switch(") {
histogram.record_construct(CConstruct::Switch);
}
if content.contains("for (") || content.contains("for(") {
histogram.record_construct(CConstruct::ForLoop);
}
if content.contains("while (") || content.contains("while(") {
histogram.record_construct(CConstruct::WhileLoop);
}
if content.contains("goto ") {
histogram.record_construct(CConstruct::Goto);
}
}
}
Ok(histogram)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_histogram_new() {
let hist = ErrorHistogram::new();
assert_eq!(hist.total_files, 0);
assert_eq!(hist.total_errors(), 0);
}
#[test]
fn test_record_error() {
let mut hist = ErrorHistogram::new();
hist.record_error("E0382");
hist.record_error("E0382");
hist.record_error("E0499");
assert_eq!(hist.total_errors(), 3);
assert_eq!(hist.by_error_code.get("E0382"), Some(&2));
assert_eq!(hist.by_error_code.get("E0499"), Some(&1));
}
#[test]
fn test_categorize_ownership_errors() {
assert_eq!(categorize_error("E0382"), ErrorCategory::Ownership);
assert_eq!(categorize_error("E0505"), ErrorCategory::Ownership);
assert_eq!(categorize_error("E0506"), ErrorCategory::Ownership);
}
#[test]
fn test_categorize_borrowing_errors() {
assert_eq!(categorize_error("E0499"), ErrorCategory::Borrowing);
assert_eq!(categorize_error("E0502"), ErrorCategory::Borrowing);
assert_eq!(categorize_error("E0503"), ErrorCategory::Borrowing);
}
#[test]
fn test_categorize_lifetime_errors() {
assert_eq!(categorize_error("E0597"), ErrorCategory::Lifetime);
assert_eq!(categorize_error("E0515"), ErrorCategory::Lifetime);
assert_eq!(categorize_error("E0716"), ErrorCategory::Lifetime);
}
#[test]
fn test_categorize_type_errors() {
assert_eq!(categorize_error("E0308"), ErrorCategory::Type);
assert_eq!(categorize_error("E0277"), ErrorCategory::Type);
}
#[test]
fn test_categorize_other_errors() {
assert_eq!(categorize_error("E9999"), ErrorCategory::Other);
assert_eq!(categorize_error("unknown"), ErrorCategory::Other);
}
#[test]
fn test_error_distribution() {
let mut hist = ErrorHistogram::new();
hist.record_error("E0382");
hist.record_error("E0382");
hist.record_error("E0499");
hist.record_error("E0499");
let dist = hist.error_distribution();
assert!((dist.get("E0382").unwrap() - 0.5).abs() < 0.001);
assert!((dist.get("E0499").unwrap() - 0.5).abs() < 0.001);
}
#[test]
fn test_category_distribution() {
let mut hist = ErrorHistogram::new();
hist.record_error("E0382"); hist.record_error("E0499");
let dist = hist.category_distribution();
assert!((dist.get(&ErrorCategory::Ownership).unwrap() - 0.5).abs() < 0.001);
assert!((dist.get(&ErrorCategory::Borrowing).unwrap() - 0.5).abs() < 0.001);
}
#[test]
fn test_jensen_shannon_identical() {
let mut p = HashMap::new();
p.insert("E0382".to_string(), 0.5);
p.insert("E0499".to_string(), 0.5);
let keys: std::collections::HashSet<_> = p.keys().cloned().collect();
let js = jensen_shannon_divergence(&p, &p, &keys);
assert!(js < 0.001);
}
#[test]
fn test_jensen_shannon_different() {
let mut p = HashMap::new();
p.insert("E0382".to_string(), 1.0);
let mut q = HashMap::new();
q.insert("E0499".to_string(), 1.0);
let keys: std::collections::HashSet<_> = p.keys().chain(q.keys()).cloned().collect();
let js = jensen_shannon_divergence(&p, &q, &keys);
assert!(js > 0.5);
}
#[test]
fn test_compare_histograms_identical() {
let mut hist = ErrorHistogram::new();
hist.record_error("E0382");
hist.record_error("E0499");
let metrics = compare_histograms(&hist, &hist);
assert!(metrics.js_divergence < 0.001);
assert!((metrics.coverage_ratio - 1.0).abs() < 0.001);
assert!(metrics.is_acceptable());
}
#[test]
fn test_compare_histograms_partial_overlap() {
let mut hist1 = ErrorHistogram::new();
hist1.record_error("E0382");
hist1.record_error("E0499");
hist1.record_error("E0597");
let mut hist2 = ErrorHistogram::new();
hist2.record_error("E0382");
hist2.record_error("E0499");
hist2.record_error("E0308");
let metrics = compare_histograms(&hist1, &hist2);
assert_eq!(metrics.shared_error_codes, 2);
assert!(metrics.js_divergence < 0.5); }
#[test]
fn test_diversity_config_default() {
let config = DiversityConfig::default();
assert!((config.max_js_divergence - 0.15).abs() < 0.001);
assert!((config.min_coverage_ratio - 0.6).abs() < 0.001);
assert_eq!(config.min_error_codes, 5);
}
#[test]
fn test_diversity_metrics_acceptable() {
let metrics = DiversityMetrics {
js_divergence: 0.1,
kl_divergence: 0.2,
overlap: 0.8,
primary_unique_errors: 10,
comparison_unique_errors: 10,
shared_error_codes: 8,
coverage_ratio: 0.8,
passes_threshold: true,
};
assert!(metrics.is_acceptable());
}
#[test]
fn test_diversity_metrics_unacceptable_divergence() {
let metrics = DiversityMetrics {
js_divergence: 0.25, kl_divergence: 0.3,
overlap: 0.8,
primary_unique_errors: 10,
comparison_unique_errors: 10,
shared_error_codes: 8,
coverage_ratio: 0.8,
passes_threshold: false,
};
assert!(!metrics.is_acceptable());
}
#[test]
fn test_diversity_metrics_unacceptable_coverage() {
let metrics = DiversityMetrics {
js_divergence: 0.1,
kl_divergence: 0.2,
overlap: 0.3,
primary_unique_errors: 10,
comparison_unique_errors: 10,
shared_error_codes: 3,
coverage_ratio: 0.3, passes_threshold: false,
};
assert!(!metrics.is_acceptable());
}
#[test]
fn test_diversity_validation_new() {
let hist = ErrorHistogram::new();
let validation = DiversityValidation::new(hist);
assert!(validation.passed);
assert!(validation.issues.is_empty());
assert!(validation.comparisons.is_empty());
}
#[test]
fn test_diversity_validation_add_comparison_pass() {
let mut primary = ErrorHistogram::new();
primary.record_error("E0382");
primary.record_error("E0499");
let mut comparison = ErrorHistogram::new();
comparison.record_error("E0382");
comparison.record_error("E0499");
let mut validation = DiversityValidation::new(primary);
validation.add_comparison("test", comparison, &DiversityConfig::default());
assert!(validation.passed);
assert!(validation.issues.is_empty());
}
#[test]
fn test_diversity_validation_add_comparison_fail() {
let mut primary = ErrorHistogram::new();
primary.record_error("E0382");
let mut comparison = ErrorHistogram::new();
comparison.record_error("E9999");
let mut validation = DiversityValidation::new(primary);
validation.add_comparison("test", comparison, &DiversityConfig::default());
assert!(!validation.passed);
assert!(!validation.issues.is_empty());
}
#[test]
fn test_diversity_validation_report() {
let mut hist = ErrorHistogram::new();
hist.total_files = 10;
hist.total_loc = 1000;
hist.record_error("E0382");
let validation = DiversityValidation::new(hist);
let report = validation.to_report();
assert!(report.contains("Corpus Diversity Validation Report"));
assert!(report.contains("10 files"));
assert!(report.contains("1000 LOC"));
}
#[test]
fn test_record_construct() {
let mut hist = ErrorHistogram::new();
hist.record_construct(CConstruct::RawPointer);
hist.record_construct(CConstruct::RawPointer);
hist.record_construct(CConstruct::Struct);
assert_eq!(
hist.construct_coverage.get(&CConstruct::RawPointer),
Some(&2)
);
assert_eq!(hist.construct_coverage.get(&CConstruct::Struct), Some(&1));
}
#[test]
fn test_empty_distribution() {
let hist = ErrorHistogram::new();
let dist = hist.error_distribution();
assert!(dist.is_empty());
}
#[test]
fn test_empty_category_distribution() {
let hist = ErrorHistogram::new();
let dist = hist.category_distribution();
assert!(dist.is_empty());
}
#[test]
fn test_compare_empty_histograms() {
let hist1 = ErrorHistogram::new();
let hist2 = ErrorHistogram::new();
let metrics = compare_histograms(&hist1, &hist2);
assert!(metrics.js_divergence < 0.001);
assert!((metrics.coverage_ratio - 1.0).abs() < 0.001); }
#[test]
fn test_analyze_corpus_with_c_files() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("test.c"),
r#"
#include <stdlib.h>
struct Point { int x; int y; };
typedef int myint;
int main() {
int *p = malloc(sizeof(int));
char *s = "hello";
for (int i = 0; i < 10; i++) {}
while (1) { break; }
switch (0) { case 0: break; }
goto end;
end:
free(p);
return 0;
}
"#,
)
.unwrap();
let hist = analyze_corpus(dir.path()).unwrap();
assert_eq!(hist.total_files, 1);
assert!(hist.total_loc > 0);
assert!(hist.construct_coverage.contains_key(&CConstruct::MallocFree));
assert!(hist.construct_coverage.contains_key(&CConstruct::RawPointer));
assert!(hist.construct_coverage.contains_key(&CConstruct::Struct));
assert!(hist.construct_coverage.contains_key(&CConstruct::Typedef));
assert!(hist.construct_coverage.contains_key(&CConstruct::ForLoop));
assert!(hist.construct_coverage.contains_key(&CConstruct::WhileLoop));
assert!(hist.construct_coverage.contains_key(&CConstruct::Switch));
assert!(hist.construct_coverage.contains_key(&CConstruct::Goto));
}
#[test]
fn test_analyze_corpus_empty_dir() {
let dir = tempfile::tempdir().unwrap();
let hist = analyze_corpus(dir.path()).unwrap();
assert_eq!(hist.total_files, 0);
assert_eq!(hist.total_loc, 0);
}
#[test]
fn test_analyze_corpus_no_constructs() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("simple.c"), "int main() { return 0; }\n").unwrap();
let hist = analyze_corpus(dir.path()).unwrap();
assert_eq!(hist.total_files, 1);
assert!(hist.total_loc > 0);
assert!(!hist.construct_coverage.contains_key(&CConstruct::MallocFree));
}
#[test]
fn test_report_no_comparisons() {
let hist = ErrorHistogram::new();
let validation = DiversityValidation::new(hist);
let report = validation.to_report();
assert!(report.contains("No comparison corpora provided"));
assert!(report.contains("PASSED"));
}
#[test]
fn test_report_with_passing_comparison() {
let mut primary = ErrorHistogram::new();
primary.total_files = 5;
primary.total_loc = 500;
primary.record_error("E0382");
primary.record_error("E0499");
let mut comparison = ErrorHistogram::new();
comparison.record_error("E0382");
comparison.record_error("E0499");
let mut validation = DiversityValidation::new(primary);
validation.add_comparison("similar", comparison, &DiversityConfig::default());
let report = validation.to_report();
assert!(report.contains("Comparison Results"));
assert!(report.contains("similar"));
assert!(report.contains("PASS"));
assert!(report.contains("PASSED"));
}
#[test]
fn test_report_with_failing_comparison() {
let mut primary = ErrorHistogram::new();
primary.record_error("E0382");
let mut comparison = ErrorHistogram::new();
comparison.record_error("E9999");
let config = DiversityConfig {
max_js_divergence: 0.01, min_coverage_ratio: 0.99,
min_error_codes: 1,
};
let mut validation = DiversityValidation::new(primary);
validation.add_comparison("different", comparison, &config);
let report = validation.to_report();
assert!(report.contains("FAILED"));
assert!(report.contains("Issues:"));
}
#[test]
fn test_add_comparison_coverage_ratio_below_threshold() {
let mut primary = ErrorHistogram::new();
primary.record_error("E0382");
primary.record_error("E0499");
primary.record_error("E0597");
let mut comparison = ErrorHistogram::new();
comparison.record_error("E0308");
let config = DiversityConfig {
max_js_divergence: 1.0, min_coverage_ratio: 0.9, min_error_codes: 1,
};
let mut validation = DiversityValidation::new(primary);
validation.add_comparison("low_coverage", comparison, &config);
assert!(!validation.passed);
assert!(validation.issues.iter().any(|i| i.contains("Coverage ratio")));
}
#[test]
fn test_add_comparison_js_divergence_above_threshold() {
let mut primary = ErrorHistogram::new();
primary.record_error("E0382");
let mut comparison = ErrorHistogram::new();
comparison.record_error("E0308");
let config = DiversityConfig {
max_js_divergence: 0.001, min_coverage_ratio: 0.0, min_error_codes: 1,
};
let mut validation = DiversityValidation::new(primary);
validation.add_comparison("divergent", comparison, &config);
assert!(!validation.passed);
assert!(validation.issues.iter().any(|i| i.contains("JS divergence")));
}
#[test]
fn test_compare_one_empty_one_nonempty() {
let hist1 = ErrorHistogram::new();
let mut hist2 = ErrorHistogram::new();
hist2.record_error("E0382");
let metrics = compare_histograms(&hist1, &hist2);
assert_eq!(metrics.overlap, 0.0);
assert_eq!(metrics.shared_error_codes, 0);
}
#[test]
fn test_compare_completely_disjoint() {
let mut hist1 = ErrorHistogram::new();
hist1.record_error("E0382");
hist1.record_error("E0505");
let mut hist2 = ErrorHistogram::new();
hist2.record_error("E0308");
hist2.record_error("E0277");
let metrics = compare_histograms(&hist1, &hist2);
assert_eq!(metrics.shared_error_codes, 0);
assert!(metrics.coverage_ratio < 0.01);
assert!(!metrics.is_acceptable());
}
#[test]
fn test_kl_divergence_with_zeros_in_p() {
let mut p = HashMap::new();
p.insert("A".to_string(), 0.0);
p.insert("B".to_string(), 1.0);
let mut q = HashMap::new();
q.insert("A".to_string(), 0.5);
q.insert("B".to_string(), 0.5);
let keys: std::collections::HashSet<_> = p.keys().chain(q.keys()).cloned().collect();
let kl = kl_divergence_safe(&p, &q, &keys);
assert!(kl > 0.0);
}
#[test]
fn test_kl_divergence_empty_keys() {
let p = HashMap::new();
let q = HashMap::new();
let keys = std::collections::HashSet::new();
let kl = kl_divergence_safe(&p, &q, &keys);
assert!(kl < 0.001);
}
#[test]
fn test_js_divergence_empty_keys() {
let p = HashMap::new();
let q = HashMap::new();
let keys = std::collections::HashSet::new();
let js = jensen_shannon_divergence(&p, &q, &keys);
assert!(js < 0.001);
}
#[test]
fn test_categorize_error_e0507() {
assert_eq!(categorize_error("E0507"), ErrorCategory::Ownership);
}
#[test]
fn test_categorize_error_e0500() {
assert_eq!(categorize_error("E0500"), ErrorCategory::Borrowing);
}
#[test]
fn test_categorize_error_e0623() {
assert_eq!(categorize_error("E0623"), ErrorCategory::Lifetime);
}
#[test]
fn test_categorize_error_e0106() {
assert_eq!(categorize_error("E0106"), ErrorCategory::Lifetime);
}
#[test]
fn test_categorize_error_e0369() {
assert_eq!(categorize_error("E0369"), ErrorCategory::Type);
}
#[test]
fn test_all_construct_variants_recordable() {
let mut hist = ErrorHistogram::new();
let constructs = vec![
CConstruct::RawPointer,
CConstruct::Array,
CConstruct::MallocFree,
CConstruct::FunctionPointer,
CConstruct::Struct,
CConstruct::Union,
CConstruct::Enum,
CConstruct::Typedef,
CConstruct::Macro,
CConstruct::Goto,
CConstruct::Switch,
CConstruct::ForLoop,
CConstruct::WhileLoop,
CConstruct::DoWhile,
];
for c in &constructs {
hist.record_construct(*c);
}
assert_eq!(hist.construct_coverage.len(), 14);
}
#[test]
fn test_compare_histograms_passes_threshold_true() {
let mut h1 = ErrorHistogram::new();
h1.record_error("E0382");
h1.record_error("E0499");
let metrics = compare_histograms(&h1, &h1);
assert!(metrics.passes_threshold);
}
#[test]
fn test_compare_histograms_passes_threshold_false() {
let mut h1 = ErrorHistogram::new();
h1.record_error("E0382");
let mut h2 = ErrorHistogram::new();
h2.record_error("E9999");
let metrics = compare_histograms(&h1, &h2);
assert!(!metrics.passes_threshold);
}
#[test]
fn test_diversity_config_serialize() {
let config = DiversityConfig::default();
let json = serde_json::to_string(&config).unwrap();
assert!(json.contains("max_js_divergence"));
let deserialized: DiversityConfig = serde_json::from_str(&json).unwrap();
assert!((deserialized.max_js_divergence - 0.15).abs() < 0.001);
}
#[test]
fn test_error_histogram_serialize() {
let mut hist = ErrorHistogram::new();
hist.total_files = 10;
hist.total_loc = 500;
hist.record_error("E0382");
hist.record_construct(CConstruct::RawPointer);
let json = serde_json::to_string(&hist).unwrap();
let deserialized: ErrorHistogram = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.total_files, 10);
assert_eq!(deserialized.total_errors(), 1);
}
#[test]
fn test_diversity_metrics_serialize() {
let metrics = DiversityMetrics {
js_divergence: 0.1,
kl_divergence: 0.2,
overlap: 0.8,
primary_unique_errors: 5,
comparison_unique_errors: 5,
shared_error_codes: 4,
coverage_ratio: 0.8,
passes_threshold: true,
};
let json = serde_json::to_string(&metrics).unwrap();
let deserialized: DiversityMetrics = serde_json::from_str(&json).unwrap();
assert!((deserialized.js_divergence - 0.1).abs() < 0.001);
}
#[test]
fn test_diversity_validation_serialize() {
let mut primary = ErrorHistogram::new();
primary.record_error("E0382");
let validation = DiversityValidation::new(primary);
let json = serde_json::to_string(&validation).unwrap();
let deserialized: DiversityValidation = serde_json::from_str(&json).unwrap();
assert!(deserialized.passed);
}
}