use super::*;
use tempfile::TempDir;
#[test]
fn test_duplication_rule_creation() {
let rule = DuplicationRule::new();
assert_eq!(rule.id(), "code-duplication");
assert!((rule.similarity_threshold - 0.85).abs() < f64::EPSILON);
}
#[test]
fn test_tokenize() {
let rule = DuplicationRule::new();
let content = "fn foo() { let x = 42; }";
let tokens = rule.tokenize(content);
assert!(!tokens.is_empty());
}
#[test]
fn test_glob_match() {
assert!(glob_match("**/*.rs", "src/lib.rs"));
assert!(glob_match("**/*.rs", "src/foo/bar.rs"));
assert!(!glob_match("**/*.rs", "src/foo/bar.txt"));
assert!(glob_match("**/target/**", "foo/target/debug/test"));
assert!(glob_match("*.rs", "lib.rs"));
}
#[test]
fn test_no_duplicates() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir(&src_dir).unwrap();
let file = src_dir.join("lib.rs");
std::fs::write(
&file,
r#"
pub fn unique_function() {
println!("This is unique");
}
"#,
)
.unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
assert!(result.passed);
}
#[test]
fn test_minhash_signature() {
let rule = DuplicationRule::new();
let fragment1 = CodeFragment {
path: std::path::PathBuf::from("test1.rs"),
start_line: 1,
end_line: 10,
content: "fn foo() { let x = 1; let y = 2; let z = x + y; }".to_string(),
};
let fragment2 = CodeFragment {
path: std::path::PathBuf::from("test2.rs"),
start_line: 1,
end_line: 10,
content: "fn foo() { let x = 1; let y = 2; let z = x + y; }".to_string(),
};
let sig1 = rule.compute_minhash(&fragment1);
let sig2 = rule.compute_minhash(&fragment2);
let similarity = rule.jaccard_similarity(&sig1, &sig2);
assert!(similarity > 0.99, "Identical content should have ~1.0 similarity");
}
#[test]
fn test_duplication_rule_default() {
let rule = DuplicationRule::default();
assert_eq!(rule.id(), "code-duplication");
assert_eq!(rule.min_fragment_size, 50);
assert_eq!(rule.num_permutations, 128);
}
#[test]
fn test_duplication_rule_description() {
let rule = DuplicationRule::new();
assert!(rule.description().contains("duplication"));
}
#[test]
fn test_duplication_rule_help() {
let rule = DuplicationRule::new();
let help = rule.help();
assert!(help.is_some());
assert!(help.unwrap().contains("85%"));
}
#[test]
fn test_duplication_rule_category() {
let rule = DuplicationRule::new();
assert_eq!(rule.category(), RuleCategory::Code);
}
#[test]
fn test_duplication_rule_can_fix() {
let rule = DuplicationRule::new();
assert!(!rule.can_fix());
}
#[test]
fn test_duplication_rule_fix() {
let temp = TempDir::new().unwrap();
let rule = DuplicationRule::new();
let result = rule.fix(temp.path()).unwrap();
assert!(!result.success);
}
#[test]
fn test_should_include_rs_files() {
let rule = DuplicationRule::new();
assert!(rule.should_include(Path::new("src/lib.rs")));
assert!(rule.should_include(Path::new("src/mod/file.rs")));
}
#[test]
fn test_should_exclude_target() {
let rule = DuplicationRule::new();
assert!(!rule.should_include(Path::new("target/debug/test.rs")));
}
#[test]
fn test_should_exclude_tests() {
let rule = DuplicationRule::new();
assert!(!rule.should_include(Path::new("tests/test_file.rs")));
}
#[test]
fn test_should_exclude_benches() {
let rule = DuplicationRule::new();
assert!(!rule.should_include(Path::new("benches/bench.rs")));
}
#[test]
fn test_should_exclude_non_rust() {
let rule = DuplicationRule::new();
assert!(!rule.should_include(Path::new("src/file.txt")));
assert!(!rule.should_include(Path::new("readme.md")));
}
#[test]
fn test_hash_token_different_perms() {
let rule = DuplicationRule::new();
let hash1 = rule.hash_token("test", 0);
let hash2 = rule.hash_token("test", 1);
assert_ne!(hash1, hash2);
}
#[test]
fn test_hash_token_same_perm() {
let rule = DuplicationRule::new();
let hash1 = rule.hash_token("test", 0);
let hash2 = rule.hash_token("test", 0);
assert_eq!(hash1, hash2);
}
#[test]
fn test_tokenize_filters_comments() {
let rule = DuplicationRule::new();
let content = "// This is a comment\nfn foo() { let x = 42; }";
let tokens = rule.tokenize(content);
assert!(!tokens.iter().any(|t| t.contains("comment")));
}
#[test]
fn test_tokenize_empty_content() {
let rule = DuplicationRule::new();
let tokens = rule.tokenize("");
assert!(tokens.is_empty());
}
#[test]
fn test_tokenize_whitespace_only() {
let rule = DuplicationRule::new();
let tokens = rule.tokenize(" \n\n \n");
assert!(tokens.is_empty());
}
#[test]
fn test_jaccard_similarity_identical() {
let rule = DuplicationRule::new();
let sig = MinHashSignature { values: vec![1; rule.num_permutations] };
let similarity = rule.jaccard_similarity(&sig, &sig);
assert!((similarity - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_jaccard_similarity_different() {
let rule = DuplicationRule::new();
let sig1 = MinHashSignature { values: (0..rule.num_permutations as u64).collect() };
let sig2 = MinHashSignature { values: (1000..1000 + rule.num_permutations as u64).collect() };
let similarity = rule.jaccard_similarity(&sig1, &sig2);
assert!(similarity < 0.1);
}
#[test]
fn test_find_duplicates_single_fragment() {
let rule = DuplicationRule::new();
let fragments = vec![CodeFragment {
path: std::path::PathBuf::from("test.rs"),
start_line: 1,
end_line: 10,
content: "fn foo() {}".to_string(),
}];
let clusters = rule.find_duplicates(&fragments);
assert!(clusters.is_empty());
}
#[test]
fn test_find_duplicates_no_fragments() {
let rule = DuplicationRule::new();
let fragments: Vec<CodeFragment> = Vec::new();
let clusters = rule.find_duplicates(&fragments);
assert!(clusters.is_empty());
}
#[test]
fn test_glob_match_exact() {
assert!(glob_match("lib.rs", "lib.rs"));
assert!(!glob_match("lib.rs", "main.rs"));
}
#[test]
fn test_glob_match_single_star() {
assert!(glob_match("src/*.rs", "src/lib.rs"));
assert!(glob_match("src/*.rs", "src/main.rs"));
assert!(!glob_match("src/*.rs", "src/foo/lib.rs"));
}
#[test]
fn test_glob_match_double_star_empty() {
assert!(glob_match("**/lib.rs", "lib.rs"));
assert!(glob_match("**/lib.rs", "src/lib.rs"));
assert!(glob_match("**/lib.rs", "a/b/c/lib.rs"));
}
#[test]
fn test_segment_match_star() {
assert!(segment_match("*", "anything"));
assert!(segment_match("*", ""));
}
#[test]
fn test_segment_match_prefix_suffix() {
assert!(segment_match("test*.rs", "test_file.rs"));
assert!(segment_match("*.rs", "lib.rs"));
assert!(!segment_match("test*.rs", "main.rs"));
}
#[test]
fn test_segment_match_exact() {
assert!(segment_match("exact", "exact"));
assert!(!segment_match("exact", "different"));
}
#[test]
fn test_extract_fragments_small_file() {
let temp = TempDir::new().unwrap();
let file = temp.path().join("small.rs");
std::fs::write(&file, "fn main() {}").unwrap();
let rule = DuplicationRule::new();
let fragments = rule.extract_fragments(&file).unwrap();
assert!(fragments.is_empty());
}
#[test]
fn test_extract_fragments_nonexistent() {
let rule = DuplicationRule::new();
let result = rule.extract_fragments(Path::new("/nonexistent/file.rs"));
assert!(result.is_err());
}
#[test]
fn test_code_fragment_clone() {
let fragment = CodeFragment {
path: std::path::PathBuf::from("test.rs"),
start_line: 1,
end_line: 10,
content: "fn test() {}".to_string(),
};
let cloned = fragment.clone();
assert_eq!(fragment.path, cloned.path);
assert_eq!(fragment.content, cloned.content);
}
#[test]
fn test_code_fragment_debug() {
let fragment = CodeFragment {
path: std::path::PathBuf::from("debug.rs"),
start_line: 5,
end_line: 15,
content: "fn debug() {}".to_string(),
};
let debug_str = format!("{:?}", fragment);
assert!(debug_str.contains("debug.rs"));
}
#[test]
fn test_minhash_signature_debug() {
let sig = MinHashSignature { values: vec![1, 2, 3] };
let debug_str = format!("{:?}", sig);
assert!(debug_str.contains("MinHashSignature"));
}
#[test]
fn test_duplicate_cluster_debug() {
let cluster = DuplicateCluster { fragments: vec![], similarity: 0.95 };
let debug_str = format!("{:?}", cluster);
assert!(debug_str.contains("DuplicateCluster"));
}
#[test]
fn test_duplication_rule_debug() {
let rule = DuplicationRule::new();
let debug_str = format!("{:?}", rule);
assert!(debug_str.contains("DuplicationRule"));
}
#[test]
fn test_cluster_fragments_empty_pairs() {
let rule = DuplicationRule::new();
let fragments = vec![
CodeFragment {
path: std::path::PathBuf::from("a.rs"),
start_line: 1,
end_line: 10,
content: "a".to_string(),
},
CodeFragment {
path: std::path::PathBuf::from("b.rs"),
start_line: 1,
end_line: 10,
content: "b".to_string(),
},
];
let pairs: Vec<(usize, usize, f64)> = Vec::new();
let clusters = rule.cluster_fragments(&fragments, &pairs);
assert!(clusters.is_empty());
}
#[test]
fn test_glob_match_parts_empty_pattern() {
assert!(glob_match_parts(&[], &[]));
assert!(!glob_match_parts(&[], &["a"]));
}
#[test]
fn test_glob_match_parts_empty_path() {
assert!(!glob_match_parts(&["a"], &[]));
assert!(glob_match_parts(&["**"], &[]));
}
fn generate_rust_function(name: &str, body_lines: usize) -> String {
let mut lines = Vec::new();
lines.push(format!("pub fn {}() {{", name));
for i in 0..body_lines {
lines.push(format!(" let x_{} = {};", i, i));
}
lines.push("}".to_string());
lines.join("\n")
}
#[test]
fn test_extract_fragments_large_file_multiple_functions() {
let temp = TempDir::new().unwrap();
let file = temp.path().join("src").join("big.rs");
std::fs::create_dir_all(file.parent().unwrap()).unwrap();
let mut content = String::new();
for i in 0..3 {
content.push_str(&generate_rust_function(&format!("func_{}", i), 58));
content.push('\n');
}
std::fs::write(&file, &content).unwrap();
let rule = DuplicationRule::new();
let fragments = rule.extract_fragments(&file).unwrap();
assert!(fragments.len() >= 2, "Expected >=2 fragments, got {}", fragments.len());
for frag in &fragments {
assert!(!frag.content.is_empty());
assert!(frag.start_line > 0);
assert!(frag.end_line >= frag.start_line);
}
}
#[test]
fn test_extract_fragments_impl_blocks() {
let temp = TempDir::new().unwrap();
let file = temp.path().join("src").join("impl_test.rs");
std::fs::create_dir_all(file.parent().unwrap()).unwrap();
let mut lines = Vec::new();
lines.push("struct Foo;".to_string());
lines.push("".to_string());
lines.push("impl Foo {".to_string());
for i in 0..55 {
lines.push(format!(" fn method_{}(&self) -> i32 {{ {} }}", i, i));
}
lines.push("}".to_string());
std::fs::write(&file, lines.join("\n")).unwrap();
let rule = DuplicationRule::new();
let fragments = rule.extract_fragments(&file).unwrap();
assert!(!fragments.is_empty(), "Expected at least 1 fragment from impl block");
}
#[test]
fn test_extract_fragments_trailing_content() {
let temp = TempDir::new().unwrap();
let file = temp.path().join("src").join("trailing.rs");
std::fs::create_dir_all(file.parent().unwrap()).unwrap();
let mut content = generate_rust_function("first", 58);
content.push('\n');
for i in 0..55 {
content.push_str(&format!("let standalone_{} = {};\n", i, i));
}
std::fs::write(&file, &content).unwrap();
let rule = DuplicationRule::new();
let fragments = rule.extract_fragments(&file).unwrap();
assert!(
fragments.len() >= 2,
"Expected >=2 fragments (block + trailing), got {}",
fragments.len()
);
}
#[test]
fn test_find_duplicates_identical_fragments() {
let rule = DuplicationRule::new();
let content = generate_rust_function("duplicate_func", 58);
let fragments = vec![
CodeFragment {
path: std::path::PathBuf::from("src/a.rs"),
start_line: 1,
end_line: 60,
content: content.clone(),
},
CodeFragment {
path: std::path::PathBuf::from("src/b.rs"),
start_line: 1,
end_line: 60,
content: content.clone(),
},
];
let clusters = rule.find_duplicates(&fragments);
assert!(!clusters.is_empty(), "Expected at least 1 cluster for identical fragments");
assert!(clusters[0].similarity > 0.9);
assert!(clusters[0].fragments.len() >= 2);
}
#[test]
fn test_find_duplicates_three_way_cluster() {
let rule = DuplicationRule::new();
let content = generate_rust_function("triplicate", 58);
let fragments = vec![
CodeFragment {
path: std::path::PathBuf::from("src/x.rs"),
start_line: 1,
end_line: 60,
content: content.clone(),
},
CodeFragment {
path: std::path::PathBuf::from("src/y.rs"),
start_line: 1,
end_line: 60,
content: content.clone(),
},
CodeFragment {
path: std::path::PathBuf::from("src/z.rs"),
start_line: 1,
end_line: 60,
content: content.clone(),
},
];
let clusters = rule.find_duplicates(&fragments);
assert!(!clusters.is_empty());
assert!(clusters[0].fragments.len() >= 2);
}
#[test]
fn test_find_duplicates_dissimilar_fragments() {
let rule = DuplicationRule::new();
let frag_a = generate_rust_function("alpha_function", 58);
let frag_b = (0..60)
.map(|i| format!("// completely different line {}", i * 1000))
.collect::<Vec<_>>()
.join("\n");
let fragments = vec![
CodeFragment {
path: std::path::PathBuf::from("src/a.rs"),
start_line: 1,
end_line: 60,
content: frag_a,
},
CodeFragment {
path: std::path::PathBuf::from("src/b.rs"),
start_line: 1,
end_line: 60,
content: frag_b,
},
];
let clusters = rule.find_duplicates(&fragments);
assert!(clusters.is_empty());
}
#[test]
fn test_cluster_fragments_two_pairs() {
let rule = DuplicationRule::new();
let fragments: Vec<CodeFragment> = (0..4)
.map(|i| CodeFragment {
path: std::path::PathBuf::from(format!("src/{}.rs", i)),
start_line: 1,
end_line: 10,
content: format!("fragment {}", i),
})
.collect();
let pairs = vec![(0, 1, 0.95), (2, 3, 0.90)];
let clusters = rule.cluster_fragments(&fragments, &pairs);
assert_eq!(clusters.len(), 2);
}
#[test]
fn test_cluster_fragments_transitive_union() {
let rule = DuplicationRule::new();
let fragments: Vec<CodeFragment> = (0..3)
.map(|i| CodeFragment {
path: std::path::PathBuf::from(format!("src/{}.rs", i)),
start_line: 1,
end_line: 10,
content: format!("fragment {}", i),
})
.collect();
let pairs = vec![(0, 1, 0.90), (1, 2, 0.88)];
let clusters = rule.cluster_fragments(&fragments, &pairs);
assert_eq!(clusters.len(), 1);
assert!(clusters[0].fragments.len() >= 2);
}
#[test]
fn test_check_with_duplicate_files() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir_all(&src_dir).unwrap();
let content = generate_rust_function("duplicated_handler", 58);
std::fs::write(src_dir.join("handler_a.rs"), &content).unwrap();
std::fs::write(src_dir.join("handler_b.rs"), &content).unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
assert!(
!result.suggestions.is_empty() || !result.passed,
"check() should detect duplication or fail"
);
}
#[test]
fn test_check_high_similarity_violation() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir_all(&src_dir).unwrap();
let func_body = generate_rust_function("exact_copy", 58);
std::fs::write(src_dir.join("copy_a.rs"), &func_body).unwrap();
std::fs::write(src_dir.join("copy_b.rs"), &func_body).unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
let has_feedback = !result.violations.is_empty() || !result.suggestions.is_empty();
if has_feedback {
for v in &result.violations {
assert!(v.code.starts_with("DUP-"));
}
}
}
#[test]
fn test_check_no_source_files() {
let temp = TempDir::new().unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
assert!(result.passed);
}
#[test]
fn test_check_skips_target_directory() {
let temp = TempDir::new().unwrap();
let target_dir = temp.path().join("target").join("debug");
std::fs::create_dir_all(&target_dir).unwrap();
let content = generate_rust_function("target_func", 58);
std::fs::write(target_dir.join("generated.rs"), &content).unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
assert!(result.passed);
}
#[test]
fn test_cluster_fragments_union_rank_less() {
let rule = DuplicationRule::new();
let fragments: Vec<CodeFragment> = (0..5)
.map(|i| CodeFragment {
path: std::path::PathBuf::from(format!("src/{}.rs", i)),
start_line: 1,
end_line: 10,
content: format!("fragment {}", i),
})
.collect();
let pairs = vec![
(0, 1, 0.95), (0, 2, 0.95), (0, 3, 0.92), (3, 4, 0.91), ];
let clusters = rule.cluster_fragments(&fragments, &pairs);
assert!(!clusters.is_empty());
assert!(clusters.iter().any(|c| c.fragments.len() >= 3));
}
#[test]
fn test_cluster_fragments_union_same_root() {
let rule = DuplicationRule::new();
let fragments: Vec<CodeFragment> = (0..3)
.map(|i| CodeFragment {
path: std::path::PathBuf::from(format!("src/{}.rs", i)),
start_line: 1,
end_line: 10,
content: format!("fragment {}", i),
})
.collect();
let pairs = vec![(0, 1, 0.95), (0, 1, 0.95), (1, 2, 0.90)];
let clusters = rule.cluster_fragments(&fragments, &pairs);
assert_eq!(clusters.len(), 1);
assert!(clusters[0].fragments.len() >= 2);
}
#[test]
fn test_check_produces_violations_for_exact_copies() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir_all(&src_dir).unwrap();
let content = generate_rust_function("exact_duplicate_function", 58);
std::fs::write(src_dir.join("module_a.rs"), &content).unwrap();
std::fs::write(src_dir.join("module_b.rs"), &content).unwrap();
std::fs::write(src_dir.join("module_c.rs"), &content).unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
if !result.violations.is_empty() {
for v in &result.violations {
assert!(v.code.starts_with("DUP-"));
assert!(v.message.contains("duplication"));
}
}
}
#[test]
fn test_check_produces_suggestions_for_similar_code() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir_all(&src_dir).unwrap();
let content_a = generate_rust_function("handler_alpha", 58);
let content_b = generate_rust_function("handler_beta", 58);
std::fs::write(src_dir.join("handler_a.rs"), &content_a).unwrap();
std::fs::write(src_dir.join("handler_b.rs"), &content_b).unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
let _ = result.passed;
let _ = result.suggestions;
}
#[test]
fn test_check_fail_result_has_suggestions() {
let rule = DuplicationRule::new();
let content = generate_rust_function("cloned_fn", 58);
let fragments = vec![
CodeFragment {
path: std::path::PathBuf::from("src/a.rs"),
start_line: 1,
end_line: 60,
content: content.clone(),
},
CodeFragment {
path: std::path::PathBuf::from("src/b.rs"),
start_line: 1,
end_line: 60,
content,
},
];
let clusters = rule.find_duplicates(&fragments);
for cluster in &clusters {
assert!(cluster.fragments.len() >= 2);
assert!(cluster.similarity > 0.0);
}
}
#[test]
fn test_glob_match_multiple_double_stars() {
assert!(glob_match("**/*.rs", "a/b/c.rs"));
assert!(glob_match("**/src/**/*.rs", "project/src/mod/file.rs"));
}
#[test]
fn test_segment_match_multiple_wildcards() {
assert!(!segment_match("a*b*c", "aXbYc"));
assert!(!segment_match("a*b*c", "abc"));
}
#[test]
fn test_glob_match_empty_path_with_doublestar_only() {
assert!(glob_match_parts(&["**"], &[]));
assert!(glob_match_parts(&["**", "**"], &[]));
}
#[test]
fn test_glob_match_pattern_longer_than_path() {
assert!(!glob_match("a/b/c", "a/b"));
assert!(!glob_match("a/b/c/d", "a"));
}
#[test]
fn test_extract_fragments_pre_function_content_large() {
let temp = TempDir::new().unwrap();
let file = temp.path().join("src").join("prefunc.rs");
std::fs::create_dir_all(file.parent().unwrap()).unwrap();
let mut content = String::new();
for i in 0..55 {
content.push_str(&format!("const C_{}: i32 = {};\n", i, i));
}
content.push_str("pub fn after_consts() {\n");
for i in 0..55 {
content.push_str(&format!(" let y_{} = {};\n", i, i));
}
content.push_str("}\n");
std::fs::write(&file, &content).unwrap();
let rule = DuplicationRule::new();
let fragments = rule.extract_fragments(&file).unwrap();
assert!(
fragments.len() >= 2,
"Expected >=2 fragments (pre-function + function), got {}",
fragments.len()
);
}
#[test]
fn test_cluster_fragments_union_less_rank() {
let rule = DuplicationRule::new();
let fragments: Vec<CodeFragment> = (0..4)
.map(|i| CodeFragment {
path: std::path::PathBuf::from(format!("src/{}.rs", i)),
start_line: 1,
end_line: 10,
content: format!("fragment {}", i),
})
.collect();
let pairs = vec![
(0, 1, 0.95), (2, 0, 0.93), (3, 0, 0.91), ];
let clusters = rule.cluster_fragments(&fragments, &pairs);
assert!(!clusters.is_empty());
assert!(clusters.iter().any(|c| c.fragments.len() >= 3));
}
#[test]
fn test_check_with_suggestion_path_similar_not_identical() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir_all(&src_dir).unwrap();
let mut content_a = Vec::new();
let mut content_b = Vec::new();
content_a.push("pub fn handler_alpha() {".to_string());
content_b.push("pub fn handler_beta() {".to_string());
for i in 0..56 {
if i % 10 == 0 {
content_a.push(format!(" let alpha_{} = {};", i, i));
content_b.push(format!(" let beta_{} = {};", i, i * 2));
} else {
content_a.push(format!(" let shared_var_{} = {};", i, i));
content_b.push(format!(" let shared_var_{} = {};", i, i));
}
}
content_a.push("}".to_string());
content_b.push("}".to_string());
std::fs::write(src_dir.join("similar_a.rs"), content_a.join("\n")).unwrap();
std::fs::write(src_dir.join("similar_b.rs"), content_b.join("\n")).unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
let _ = result.passed;
let _ = result.suggestions;
}
fn generate_divergent_function(
name: &str,
body_lines: usize,
seed: u64,
divergence_pct: f64,
) -> String {
let mut lines = Vec::new();
lines.push(format!("pub fn {}() {{", name));
let diverge_every =
if divergence_pct > 0.0 { (1.0 / divergence_pct) as usize } else { usize::MAX };
for i in 0..body_lines {
if diverge_every > 0 && i % diverge_every == 0 {
lines.push(format!(
" let unique_{}_{}_{} = {} + {};",
name,
seed,
i,
i * (seed as usize + 1),
seed
));
} else {
lines.push(format!(" let common_variable_{} = {} * 2;", i, i));
}
}
lines.push("}".to_string());
lines.join("\n")
}
#[test]
fn test_find_duplicates_produces_sub_095_similarity() {
let rule = DuplicationRule::new();
let mut found_suggestion_range = false;
for diverge_every in [5, 6, 7, 8, 10, 12, 15, 20, 25, 30, 40, 50, 60, 100] {
let content_a = {
let mut lines = Vec::new();
lines.push("pub fn probe_alpha() {".to_string());
for i in 0..58 {
if i % diverge_every == 0 {
lines.push(format!(" let alpha_unique_{} = {} + 100;", i, i * 3));
} else {
lines.push(format!(" let shared_value_{} = {} * 2;", i, i));
}
}
lines.push("}".to_string());
lines.join("\n")
};
let content_b = {
let mut lines = Vec::new();
lines.push("pub fn probe_beta() {".to_string());
for i in 0..58 {
if i % diverge_every == 0 {
lines.push(format!(" let beta_unique_{} = {} + 200;", i, i * 7));
} else {
lines.push(format!(" let shared_value_{} = {} * 2;", i, i));
}
}
lines.push("}".to_string());
lines.join("\n")
};
let frag_a = CodeFragment {
path: std::path::PathBuf::from("src/a.rs"),
start_line: 1,
end_line: 60,
content: content_a,
};
let frag_b = CodeFragment {
path: std::path::PathBuf::from("src/b.rs"),
start_line: 1,
end_line: 60,
content: content_b,
};
let sig_a = rule.compute_minhash(&frag_a);
let sig_b = rule.compute_minhash(&frag_b);
let sim = rule.jaccard_similarity(&sig_a, &sig_b);
if (0.85..0.95).contains(&sim) {
found_suggestion_range = true;
let clusters = rule.find_duplicates(&[frag_a, frag_b]);
assert!(
!clusters.is_empty(),
"Expected cluster for diverge_every={} (sim={:.4})",
diverge_every,
sim
);
assert!(
clusters[0].similarity >= 0.85 && clusters[0].similarity < 0.95,
"Cluster similarity {:.4} not in suggestion range [0.85, 0.95)",
clusters[0].similarity
);
break;
}
}
if !found_suggestion_range {
let content = {
let mut lines = Vec::new();
lines.push("pub fn exact_fn() {".to_string());
for i in 0..58 {
lines.push(format!(" let val_{} = {};", i, i));
}
lines.push("}".to_string());
lines.join("\n")
};
let frag = CodeFragment {
path: std::path::PathBuf::from("src/c.rs"),
start_line: 1,
end_line: 60,
content: content.clone(),
};
let sig = rule.compute_minhash(&frag);
let sim = rule.jaccard_similarity(&sig, &sig);
assert!((sim - 1.0).abs() < f64::EPSILON, "Self-similarity should be 1.0");
}
}
#[test]
fn test_check_suggestion_branch_via_controlled_similarity() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir_all(&src_dir).unwrap();
let mut content_a = Vec::new();
let mut content_b = Vec::new();
content_a.push("pub fn compute_metrics_alpha() {".to_string());
content_b.push("pub fn compute_metrics_gamma() {".to_string());
for i in 0..56 {
if i % 60 == 0 {
content_a.push(format!(" let alpha_unique_{} = {} + 100;", i, i * 3));
content_b.push(format!(" let gamma_unique_{} = {} + 200;", i, i * 7));
} else {
content_a.push(format!(" let shared_value_{} = {} * 2;", i, i));
content_b.push(format!(" let shared_value_{} = {} * 2;", i, i));
}
}
content_a.push("}".to_string());
content_b.push("}".to_string());
std::fs::write(src_dir.join("metrics_a.rs"), content_a.join("\n")).unwrap();
std::fs::write(src_dir.join("metrics_b.rs"), content_b.join("\n")).unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
if !result.suggestions.is_empty() {
for s in &result.suggestions {
assert!(
s.message.contains("Similar code"),
"Suggestion should mention similar code: {}",
s.message
);
}
assert!(result.violations.is_empty(), "Expected no violations for sub-0.95 similarity");
assert!(result.passed, "Result with only suggestions should pass");
}
}
#[test]
fn test_check_suggestion_branch_with_near_identical_files() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir_all(&src_dir).unwrap();
let mut content_a = Vec::new();
let mut content_b = Vec::new();
content_a.push("pub fn handler_one() {".to_string());
content_b.push("pub fn handler_two() {".to_string());
content_a.push(" let one_init = 111;".to_string());
content_b.push(" let two_init = 222;".to_string());
for i in 1..56 {
content_a.push(format!(" let shared_{} = {} * 2;", i, i));
content_b.push(format!(" let shared_{} = {} * 2;", i, i));
}
content_a.push("}".to_string());
content_b.push("}".to_string());
std::fs::write(src_dir.join("one.rs"), content_a.join("\n")).unwrap();
std::fs::write(src_dir.join("two.rs"), content_b.join("\n")).unwrap();
let rule = DuplicationRule::new();
let frags_a = rule.extract_fragments(&src_dir.join("one.rs")).unwrap();
let frags_b = rule.extract_fragments(&src_dir.join("two.rs")).unwrap();
assert!(!frags_a.is_empty(), "Should extract fragment from one.rs");
assert!(!frags_b.is_empty(), "Should extract fragment from two.rs");
let sig_a = rule.compute_minhash(&frags_a[0]);
let sig_b = rule.compute_minhash(&frags_b[0]);
let sim = rule.jaccard_similarity(&sig_a, &sig_b);
assert!(sim >= 0.85, "Similarity {:.4} should be >= 0.85 threshold", sim);
assert!(sim < 0.95, "Similarity {:.4} should be < 0.95 (suggestion, not violation)", sim);
let result = rule.check(temp.path()).unwrap();
assert!(
!result.suggestions.is_empty(),
"Expected suggestions for similarity={:.4} (in [0.85, 0.95) range)",
sim
);
for s in &result.suggestions {
assert!(
s.message.contains("Similar code"),
"Suggestion should describe similar code: {}",
s.message
);
}
assert!(result.violations.is_empty(), "Expected no violations for sub-0.95 similarity");
assert!(result.passed, "Result with only suggestions should pass");
}
#[test]
fn test_check_with_unreadable_file_continues() {
let temp = TempDir::new().unwrap();
let src_dir = temp.path().join("src");
std::fs::create_dir_all(&src_dir).unwrap();
let valid = src_dir.join("valid.rs");
std::fs::write(&valid, "fn main() {}").unwrap();
let fake_rs = src_dir.join("fake.rs");
std::fs::create_dir_all(&fake_rs).unwrap();
let rule = DuplicationRule::new();
let result = rule.check(temp.path()).unwrap();
assert!(result.passed);
}