mod similarity_detector_tests {
use super::*;
fn create_detector_with_low_thresholds() -> SimilarityDetector {
SimilarityDetector::new(SimilarityConfig {
min_lines: 2,
min_tokens: 3,
similarity_threshold: 0.5,
..SimilarityConfig::default()
})
}
#[test]
fn test_detector_new_default_config() {
let detector = SimilarityDetector::new(SimilarityConfig::default());
let files: Vec<(PathBuf, String)> = vec![];
let _ = detector.detect_exact_duplicates(&files);
}
#[test]
fn test_detector_new_custom_config() {
let config = SimilarityConfig {
min_lines: 10,
min_tokens: 100,
similarity_threshold: 0.9,
enable_entropy: false,
enable_ast: false,
enable_semantic: false,
window_size: 50,
k_gram_size: 20,
};
let _ = SimilarityDetector::new(config);
}
#[test]
fn test_detect_exact_duplicates_single_file() {
let detector = create_detector_with_low_thresholds();
let files = vec![(
PathBuf::from("test.rs"),
"line1\nline2\nline3\n".to_string(),
)];
let duplicates = detector.detect_exact_duplicates(&files);
assert!(duplicates.is_empty() || duplicates.len() >= 1);
}
#[test]
fn test_detect_exact_duplicates_identical_files() {
let detector = SimilarityDetector::new(SimilarityConfig {
min_lines: 2,
min_tokens: 3,
..SimilarityConfig::default()
});
let content = "fn test() {\n let x = 1;\n let y = 2;\n}\n";
let files = vec![
(PathBuf::from("file1.rs"), content.to_string()),
(PathBuf::from("file2.rs"), content.to_string()),
];
let _ = detector.detect_exact_duplicates(&files);
}
#[test]
fn test_detect_exact_duplicates_whitespace_difference() {
let detector = create_detector_with_low_thresholds();
let files = vec![
(
PathBuf::from("file1.rs"),
"let x = 1;\nlet y = 2;\n".to_string(),
),
(
PathBuf::from("file2.rs"),
"let x = 1;\nlet y = 2;\n".to_string(),
),
];
let _ = detector.detect_exact_duplicates(&files);
}
#[test]
fn test_detect_exact_duplicates_many_files() {
let detector = create_detector_with_low_thresholds();
let content = "fn foo() { let a = 1; }\n";
let files: Vec<(PathBuf, String)> = (0..10)
.map(|i| (PathBuf::from(format!("file{}.rs", i)), content.to_string()))
.collect();
let _ = detector.detect_exact_duplicates(&files);
}
#[test]
fn test_detect_structural_similarity_threshold_zero() {
let detector = create_detector_with_low_thresholds();
let files = vec![
(
PathBuf::from("file1.rs"),
"let a = 1;\nlet b = 2;\nlet c = 3;\n".to_string(),
),
(
PathBuf::from("file2.rs"),
"let x = 1;\nlet y = 2;\nlet z = 3;\n".to_string(),
),
];
let _ = detector.detect_structural_similarity(&files, 0.0);
}
#[test]
fn test_detect_structural_similarity_threshold_one() {
let detector = create_detector_with_low_thresholds();
let files = vec![
(
PathBuf::from("file1.rs"),
"let a = 1;\nlet b = 2;\nlet c = 3;\n".to_string(),
),
(
PathBuf::from("file2.rs"),
"let x = 1;\nlet y = 2;\nlet z = 3;\n".to_string(),
),
];
let similar = detector.detect_structural_similarity(&files, 1.0);
assert!(similar.is_empty() || similar.iter().all(|s| s.similarity >= 1.0));
}
#[test]
fn test_detect_structural_similarity_renamed_variables() {
let detector = create_detector_with_low_thresholds();
let files = vec![
(PathBuf::from("file1.rs"), "fn process() {\n let data = vec![1,2,3];\n for item in data { println!(\"{}\", item); }\n}\n".to_string()),
(PathBuf::from("file2.rs"), "fn handle() {\n let values = vec![1,2,3];\n for elem in values { println!(\"{}\", elem); }\n}\n".to_string()),
];
let _ = detector.detect_structural_similarity(&files, 0.6);
}
#[test]
fn test_detect_semantic_similarity_threshold_zero() {
let detector = create_detector_with_low_thresholds();
let files = vec![
(
PathBuf::from("file1.rs"),
"hello world test code\nhello world test code\n".to_string(),
),
(
PathBuf::from("file2.rs"),
"goodbye moon different code\ngoodbye moon different code\n".to_string(),
),
];
let _ = detector.detect_semantic_similarity(&files, 0.0);
}
#[test]
fn test_detect_semantic_similarity_same_tokens() {
let detector = create_detector_with_low_thresholds();
let content = "fn test() println hello world\nfn test() println hello world\n";
let files = vec![
(PathBuf::from("file1.rs"), content.to_string()),
(PathBuf::from("file2.rs"), content.to_string()),
];
let similar = detector.detect_semantic_similarity(&files, 0.5);
let _ = similar.len();
}
#[test]
fn test_analyze_entropy_single_file() {
let detector = create_detector_with_low_thresholds();
let files = vec![(
PathBuf::from("test.rs"),
"abcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyz\n".to_string(),
)];
let report = detector.analyze_entropy(&files);
assert!(report.average_entropy >= 0.0);
}
#[test]
#[ignore = "Entropy analysis edge case - needs investigation"]
fn test_analyze_entropy_high_entropy_content() {
let detector = SimilarityDetector::new(SimilarityConfig {
min_lines: 2,
min_tokens: 3,
..SimilarityConfig::default()
});
let high_entropy = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()\nabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()\n";
let files = vec![(PathBuf::from("test.rs"), high_entropy.to_string())];
let report = detector.analyze_entropy(&files);
assert!(report.average_entropy > 0.0);
}
#[test]
fn test_analyze_entropy_low_entropy_content() {
let detector = SimilarityDetector::new(SimilarityConfig {
min_lines: 2,
min_tokens: 3,
..SimilarityConfig::default()
});
let low_entropy = "aaaaaaaaaa\naaaaaaaaaa\naaaaaaaaaa\n";
let files = vec![(PathBuf::from("test.rs"), low_entropy.to_string())];
let report = detector.analyze_entropy(&files);
assert!(report.average_entropy >= 0.0);
}
#[test]
fn test_analyze_entropy_multiple_files() {
let detector = create_detector_with_low_thresholds();
let files = vec![
(
PathBuf::from("file1.rs"),
"abc def ghi\nabc def ghi\n".to_string(),
),
(
PathBuf::from("file2.rs"),
"xyz xyz xyz\nxyz xyz xyz\n".to_string(),
),
];
let _ = detector.analyze_entropy(&files);
}
#[test]
fn test_calculate_entropy_single_repeated_char() {
let detector = SimilarityDetector::new(SimilarityConfig::default());
let entropy = detector.calculate_entropy("aaaaaaaaaa");
assert!((entropy - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_calculate_entropy_two_equal_chars() {
let detector = SimilarityDetector::new(SimilarityConfig::default());
let entropy = detector.calculate_entropy("aabb");
assert!((entropy - 1.0).abs() < 0.01);
}
#[test]
fn test_calculate_entropy_all_unique() {
let detector = SimilarityDetector::new(SimilarityConfig::default());
let entropy = detector.calculate_entropy("abcdefgh");
assert!((entropy - 3.0).abs() < 0.01);
}
#[test]
fn test_calculate_entropy_unicode() {
let detector = SimilarityDetector::new(SimilarityConfig::default());
let entropy = detector.calculate_entropy("hellocafe");
assert!(entropy > 0.0);
}
#[test]
fn test_calculate_entropy_whitespace() {
let detector = SimilarityDetector::new(SimilarityConfig::default());
let entropy = detector.calculate_entropy(" \t\t\n\n");
assert!(entropy >= 0.0);
}
#[test]
fn test_find_refactoring_opportunities_no_matches() {
let detector = SimilarityDetector::new(SimilarityConfig {
min_lines: 10,
min_tokens: 100,
similarity_threshold: 0.99,
..SimilarityConfig::default()
});
let files = vec![
(PathBuf::from("file1.rs"), "short\n".to_string()),
(PathBuf::from("file2.rs"), "brief\n".to_string()),
];
let hints = detector.find_refactoring_opportunities(&files);
assert!(hints.is_empty());
}
#[test]
fn test_find_refactoring_opportunities_single_file() {
let detector = create_detector_with_low_thresholds();
let files = vec![(
PathBuf::from("test.rs"),
"fn foo() {}\nfn bar() {}\n".to_string(),
)];
let _ = detector.find_refactoring_opportunities(&files);
}
#[test]
fn test_comprehensive_analysis_all_enabled() {
let config = SimilarityConfig {
min_lines: 2,
min_tokens: 3,
enable_entropy: true,
enable_ast: true,
enable_semantic: true,
..SimilarityConfig::default()
};
let detector = SimilarityDetector::new(config);
let files = vec![(
PathBuf::from("test.rs"),
"fn test() {\n let x = 1;\n let y = 2;\n}\n".to_string(),
)];
let report = detector.comprehensive_analysis(&files);
assert!(report.entropy_analysis.is_some());
}
#[test]
fn test_comprehensive_analysis_all_disabled() {
let config = SimilarityConfig {
enable_entropy: false,
enable_ast: false,
enable_semantic: false,
..SimilarityConfig::default()
};
let detector = SimilarityDetector::new(config);
let files = vec![(PathBuf::from("test.rs"), "content\n".to_string())];
let report = detector.comprehensive_analysis(&files);
assert!(report.entropy_analysis.is_none());
}
#[test]
fn test_comprehensive_analysis_metrics() {
let detector = create_detector_with_low_thresholds();
let files = vec![
(
PathBuf::from("file1.rs"),
"fn test() let x\nfn test() let x\n".to_string(),
),
(
PathBuf::from("file2.rs"),
"fn test() let y\nfn test() let y\n".to_string(),
),
];
let report = detector.comprehensive_analysis(&files);
assert!(report.metrics.duplication_percentage >= 0.0);
assert!(report.metrics.average_entropy >= 0.0);
let _ = report.metrics.total_clones;
}
}