mod v2_fixtures {
use std::path::{Path, PathBuf};
use tempfile::TempDir;
pub struct V2TestDir {
pub dir: TempDir,
}
impl V2TestDir {
pub fn new() -> std::io::Result<Self> {
Ok(Self {
dir: TempDir::new()?,
})
}
pub fn path(&self) -> &Path {
self.dir.path()
}
pub fn write_file(&self, rel_path: &str, content: &str) -> std::io::Result<PathBuf> {
let path = self.dir.path().join(rel_path);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(&path, content)?;
Ok(path)
}
}
pub const PYTHON_THREE_FUNCTIONS: &str = "def add(a, b):\n return a + b\n\ndef multiply(a, b):\n result = a * b\n return result\n\ndef factorial(n):\n if n <= 1:\n return 1\n return n * factorial(n - 1)\n";
pub const PYTHON_LONG_FUNCTION_A: &str = "\
def process_records(records):
results = []
for record in records:
name = record.get('name', '')
age = record.get('age', 0)
email = record.get('email', '')
if not name:
continue
if age < 0:
continue
processed = {'name': name, 'age': age, 'email': email}
results.append(processed)
return results
";
pub const PYTHON_LONG_FUNCTION_B: &str = "\
def process_records(records):
results = []
for record in records:
name = record.get('name', '')
age = record.get('age', 0)
email = record.get('email', '')
if not name:
continue
if age < 0:
continue
processed = {'name': name, 'age': age, 'email': email}
results.append(processed)
return results
";
pub const PYTHON_LONG_FUNCTION_RENAMED: &str = "\
def handle_entries(entries):
output = []
for entry in entries:
label = entry.get('label', '')
count = entry.get('count', 0)
addr = entry.get('addr', '')
if not label:
continue
if count < 0:
continue
item = {'label': label, 'count': count, 'addr': addr}
output.append(item)
return output
";
pub const PYTHON_UNRELATED_FUNCTION: &str = "\
def fibonacci(n):
if n <= 0:
return 0
elif n == 1:
return 1
a, b = 0, 1
for _ in range(2, n + 1):
a, b = b, a + b
return b
";
pub const PYTHON_KEYWORD_OVERLAP_A: &str = "\
def check_permissions(user, resource):
if user.is_admin:
return True
if resource.is_public:
return True
for role in user.roles:
if role.has_access(resource):
return True
return False
";
pub const PYTHON_KEYWORD_OVERLAP_B: &str = "\
def validate_input(data, schema):
if data is None:
return False
if schema is None:
return True
for field in schema.fields:
if field.required and field.name not in data:
return False
return True
";
pub const PYTHON_IMPORT_HEAVY_A: &str = "\
from os import path
from sys import argv
from collections import defaultdict
from typing import List, Dict, Optional
def compute(x):
return x * 2
";
pub const PYTHON_IMPORT_HEAVY_B: &str = "\
from json import loads
from io import StringIO
from functools import reduce
from typing import Tuple, Set, Any
def transform(y):
return y + 1
";
pub const PYTHON_SHORT_3_LINES: &str = "\
def tiny(x):
y = x + 1
return y
";
pub const PYTHON_EXACTLY_5_LINES: &str = "\
def medium(data):
result = []
for item in data:
result.append(item * 2)
return result
";
pub const PYTHON_EXACTLY_5_LINES_COPY: &str = "\
def medium_copy(data):
result = []
for item in data:
result.append(item * 2)
return result
";
pub const PYTHON_TWO_FUNCTIONS_SAME_FILE: &str = "\
def handler_create(request):
data = request.get_json()
if not data:
return {'error': 'No data provided'}, 400
if 'name' not in data:
return {'error': 'Name required'}, 400
result = create_item(data)
return {'id': result.id}, 201
def handler_update(request, item_id):
data = request.get_json()
if not data:
return {'error': 'No data provided'}, 400
if 'name' not in data:
return {'error': 'Name required'}, 400
result = update_item(item_id, data)
return {'id': result.id}, 200
";
pub const RUST_IMPL_BLOCK: &str = "\
struct Rectangle {
width: f64,
height: f64,
}
impl Rectangle {
fn new(width: f64, height: f64) -> Self {
Self {
width,
height,
}
}
fn area(&self) -> f64 {
self.width * self.height
}
fn perimeter(&self) -> f64 {
2.0 * (self.width + self.height)
}
}
";
pub const PYTHON_TYPE3_BASE: &str = "\
def process_data(data):
result = []
for item in data:
if item is None:
continue
processed = transform(item)
if processed.is_valid():
result.append(processed)
return result
";
pub const PYTHON_TYPE3_WITH_LOGGING: &str = "\
def process_data_logged(data):
print('Starting processing')
result = []
for item in data:
if item is None:
continue
processed = transform(item)
print(f'Processed: {processed}')
if processed.is_valid():
result.append(processed)
return result
";
pub const PYTHON_TYPE3_UNRELATED: &str = "\
def render_template(name, context):
loader = FileSystemLoader('templates')
env = Environment(loader=loader)
template = env.get_template(name)
output = template.render(context)
return output
";
pub const SEQ_IDENTICAL_A: &str = "\
def compute_sum(values):
total = 0
for v in values:
total += v
return total
";
pub const SEQ_IDENTICAL_B: &str = "\
def compute_sum(values):
total = 0
for v in values:
total += v
return total
";
pub const SEQ_RENAMED_A: &str = "\
def calculate_total(numbers):
accumulator = 0
for num in numbers:
accumulator += num
return accumulator
";
pub const SEQ_RENAMED_B: &str = "\
def sum_values(items):
result = 0
for item in items:
result += item
return result
";
pub const SEQ_GAPPED_A: &str = "\
def fetch_data(url):
response = requests.get(url)
if response.status_code != 200:
raise Exception('Failed')
data = response.json()
return data
";
pub const SEQ_GAPPED_B: &str = "\
def fetch_data_with_retry(url):
for attempt in range(3):
response = requests.get(url)
if response.status_code != 200:
continue
data = response.json()
return data
raise Exception('All retries failed')
";
pub const SEQ_DIFFERENT_A: &str = "\
def sort_descending(items):
return sorted(items, reverse=True)
";
pub const SEQ_DIFFERENT_B: &str = "\
class DatabaseConnection:
def __init__(self, host, port):
self.host = host
self.port = port
self.connected = False
";
}
#[cfg(test)]
mod accurate_line_numbers {
use super::v2_fixtures::*;
use crate::analysis::clones::detect_clones;
#[test]
fn test_python_function_line_numbers_match_tree_sitter() {
let td = V2TestDir::new().unwrap();
td.write_file("a.py", PYTHON_THREE_FUNCTIONS).unwrap();
td.write_file("b.py", PYTHON_THREE_FUNCTIONS).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 1, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical files"
);
for pair in &report.clone_pairs {
assert!(
pair.fragment1.start_line >= 1,
"start_line must be >= 1, got {}",
pair.fragment1.start_line
);
assert!(
pair.fragment1.end_line <= 11,
"end_line must be <= 11 (actual file length), got {}. \
This suggests fabricated line numbers (BUG-1).",
pair.fragment1.end_line
);
assert!(
pair.fragment1.start_line <= pair.fragment1.end_line,
"start_line ({}) must be <= end_line ({})",
pair.fragment1.start_line,
pair.fragment1.end_line
);
assert!(
pair.fragment2.start_line >= 1 && pair.fragment2.end_line <= 11,
"fragment2 line numbers out of bounds: {}-{}",
pair.fragment2.start_line,
pair.fragment2.end_line
);
}
}
#[test]
fn test_rust_impl_block_method_line_ranges() {
let td = V2TestDir::new().unwrap();
td.write_file("a.rs", RUST_IMPL_BLOCK).unwrap();
td.write_file("b.rs", RUST_IMPL_BLOCK).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("rust".to_string()), min_tokens: 5, min_lines: 1, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
for pair in &report.clone_pairs {
assert!(
pair.fragment1.end_line <= 22,
"Rust fragment end_line {} exceeds file length 22 (BUG-1 fabrication)",
pair.fragment1.end_line
);
assert!(
pair.fragment2.end_line <= 22,
"Rust fragment2 end_line {} exceeds file length 22",
pair.fragment2.end_line
);
}
}
#[test]
fn test_line_numbers_not_derived_from_token_count() {
let td = V2TestDir::new().unwrap();
td.write_file("a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("b.py", PYTHON_LONG_FUNCTION_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 1, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected at least one clone pair"
);
for pair in &report.clone_pairs {
let frag = &pair.fragment1;
let line_count = frag.end_line - frag.start_line + 1;
assert!(
frag.end_line <= 14,
"Fragment end_line {} exceeds actual file line count 14. \
Likely derived from token count, not tree-sitter. (BUG-1)",
frag.end_line
);
assert!(
line_count >= 5,
"A 14-line function should produce a fragment of at least 5 lines, got {}",
line_count
);
}
}
}
#[cfg(test)]
mod function_level_extraction {
use super::v2_fixtures::*;
use crate::analysis::clones::{detect_clones, CloneType};
#[test]
fn test_identical_functions_detected_as_clone() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("src/b.py", PYTHON_LONG_FUNCTION_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
1,
"Two identical functions should produce exactly 1 clone pair"
);
let pair = &report.clone_pairs[0];
assert_eq!(
pair.clone_type,
CloneType::Type1,
"Identical functions should be Type-1"
);
assert!(
(pair.similarity - 1.0).abs() < 1e-6,
"Type-1 clone should have similarity ~1.0, got {}",
pair.similarity
);
}
#[test]
fn test_different_functions_not_detected() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("src/b.py", PYTHON_UNRELATED_FUNCTION)
.unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Completely different functions should NOT be detected as clones"
);
}
#[test]
fn test_fragment_boundaries_are_syntactic() {
let td = V2TestDir::new().unwrap();
td.write_file("a.py", PYTHON_THREE_FUNCTIONS).unwrap();
td.write_file("b.py", PYTHON_THREE_FUNCTIONS).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 3, min_lines: 1, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
for pair in &report.clone_pairs {
let frag = &pair.fragment1;
let valid_starts = [1, 4, 8];
assert!(
valid_starts.contains(&frag.start_line),
"Fragment start_line {} does not align with any function definition \
start (expected one of {:?}). This suggests token-window fragmentation \
instead of syntactic extraction (BUG-6).",
frag.start_line,
valid_starts
);
}
}
#[test]
fn test_import_differences_not_false_positive() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_IMPORT_HEAVY_A).unwrap();
td.write_file("src/b.py", PYTHON_IMPORT_HEAVY_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 3, min_lines: 1, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Files with different functions but similar import patterns \
should NOT be detected as clones"
);
}
#[test]
fn test_renamed_identifiers_detected_as_type2() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("src/b.py", PYTHON_LONG_FUNCTION_RENAMED)
.unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
1,
"Functions with renamed identifiers should produce 1 clone pair"
);
let pair = &report.clone_pairs[0];
assert!(
pair.clone_type == CloneType::Type2 || pair.clone_type == CloneType::Type1,
"Renamed identifiers should be at least Type-2, got {:?}",
pair.clone_type
);
assert!(
pair.similarity >= 0.9,
"Type-2 clone should have similarity >= 0.9, got {}",
pair.similarity
);
}
}
#[cfg(test)]
mod no_false_positives {
use super::v2_fixtures::*;
use crate::analysis::clones::detect_clones;
#[test]
fn test_keyword_overlap_below_threshold() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_KEYWORD_OVERLAP_A).unwrap();
td.write_file("src/b.py", PYTHON_KEYWORD_OVERLAP_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Functions with same keywords but different structure/identifiers \
should NOT be detected as clones. This is the BUG-4 false positive."
);
}
#[test]
fn test_import_pattern_not_clones() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_IMPORT_HEAVY_A).unwrap();
td.write_file("src/b.py", PYTHON_IMPORT_HEAVY_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 3, min_lines: 1, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Import-heavy files with different function bodies must not match"
);
}
#[test]
fn test_different_structure_same_keywords_not_clones() {
let td = V2TestDir::new().unwrap();
td.write_file("src/check.py", PYTHON_KEYWORD_OVERLAP_A)
.unwrap();
td.write_file("src/validate.py", PYTHON_KEYWORD_OVERLAP_B)
.unwrap();
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
opts.normalization = crate::analysis::clones::NormalizationMode::All;
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Even with normalization=All, structurally different functions \
should NOT be matched. v2 REQ-7 requires raw tokens for Dice."
);
}
#[test]
fn test_unrelated_functions_no_match() {
let td = V2TestDir::new().unwrap();
td.write_file("src/process.py", PYTHON_LONG_FUNCTION_A)
.unwrap();
td.write_file("src/fibonacci.py", PYTHON_UNRELATED_FUNCTION)
.unwrap();
td.write_file("src/render.py", PYTHON_TYPE3_UNRELATED)
.unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Three unrelated functions should produce zero clone pairs"
);
}
}
#[cfg(test)]
mod preview_populated {
use super::v2_fixtures::*;
use crate::analysis::clones::detect_clones;
#[test]
fn test_all_fragments_have_preview() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("src/b.py", PYTHON_LONG_FUNCTION_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Need at least one pair to test preview"
);
for (i, pair) in report.clone_pairs.iter().enumerate() {
assert!(
pair.fragment1.preview.is_some(),
"Clone pair {} fragment1 has preview=None (BUG-5 not fixed)",
i + 1
);
assert!(
pair.fragment2.preview.is_some(),
"Clone pair {} fragment2 has preview=None (BUG-5 not fixed)",
i + 1
);
}
}
#[test]
fn test_preview_contains_source_code() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("src/b.py", PYTHON_LONG_FUNCTION_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(!report.clone_pairs.is_empty());
let preview = report.clone_pairs[0].fragment1.preview.as_ref().unwrap();
assert!(
preview.contains("def process_records") || preview.contains("process_records"),
"Preview should contain actual source code from the fragment, \
got: {:?}",
preview
);
}
#[test]
fn test_preview_truncated_to_100_chars() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("src/b.py", PYTHON_LONG_FUNCTION_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(!report.clone_pairs.is_empty());
for pair in &report.clone_pairs {
if let Some(ref preview) = pair.fragment1.preview {
assert!(
preview.len() <= 103, "Preview should be truncated to ~100 chars, got {} chars",
preview.len()
);
}
}
}
}
#[cfg(test)]
mod include_within_file {
use super::v2_fixtures::*;
use crate::analysis::clones::detect_clones;
#[test]
fn test_within_file_false_excludes_all_same_file() {
let td = V2TestDir::new().unwrap();
td.write_file("src/handlers.py", PYTHON_TWO_FUNCTIONS_SAME_FILE)
.unwrap();
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
opts.include_within_file = false;
let report = detect_clones(td.path(), &opts).unwrap();
for pair in &report.clone_pairs {
assert_ne!(
pair.fragment1.file, pair.fragment2.file,
"include_within_file=false should exclude ALL same-file pairs, \
but found pair with both fragments in {:?} (BUG-3)",
pair.fragment1.file
);
}
}
#[test]
fn test_within_file_true_includes_same_file_pairs() {
let td = V2TestDir::new().unwrap();
td.write_file("src/handlers.py", PYTHON_TWO_FUNCTIONS_SAME_FILE)
.unwrap();
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
opts.include_within_file = true;
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"include_within_file=true should detect similar non-overlapping \
functions in the same file"
);
let has_same_file_pair = report
.clone_pairs
.iter()
.any(|p| p.fragment1.file == p.fragment2.file);
assert!(
has_same_file_pair,
"Expected at least one same-file clone pair with include_within_file=true"
);
}
#[test]
fn test_overlapping_same_file_always_excluded() {
let td = V2TestDir::new().unwrap();
td.write_file("src/handlers.py", PYTHON_TWO_FUNCTIONS_SAME_FILE)
.unwrap();
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 1, ..Default::default() };
opts.include_within_file = true;
let report = detect_clones(td.path(), &opts).unwrap();
for pair in &report.clone_pairs {
if pair.fragment1.file == pair.fragment2.file {
let f1_start = pair.fragment1.start_line;
let f1_end = pair.fragment1.end_line;
let f2_start = pair.fragment2.start_line;
let f2_end = pair.fragment2.end_line;
let overlaps = f1_start <= f2_end && f2_start <= f1_end;
assert!(
!overlaps,
"Same-file overlapping fragments must always be excluded. \
Got overlap: [{}-{}] and [{}-{}] in {:?}",
f1_start, f1_end, f2_start, f2_end, pair.fragment1.file
);
}
}
}
}
#[cfg(test)]
mod min_lines_enforced {
use super::v2_fixtures::*;
use crate::analysis::clones::detect_clones;
#[test]
fn test_min_lines_excludes_short_fragments() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_SHORT_3_LINES).unwrap();
td.write_file("src/b.py", PYTHON_SHORT_3_LINES).unwrap();
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 3, min_lines: 5, ..Default::default() }; opts.include_within_file = false;
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"A 3-line clone pair should NOT be reported when min_lines=5. \
This suggests min_lines is being ignored (BUG-2)."
);
}
#[test]
fn test_3_line_clone_not_reported_with_min_lines_5() {
let td = V2TestDir::new().unwrap();
let short_a = "def tiny_a(x):\n y = x + 1\n return y\n";
let short_b = "def tiny_b(x):\n y = x + 1\n return y\n";
td.write_file("src/a.py", short_a).unwrap();
td.write_file("src/b.py", short_b).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 3, min_lines: 5, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"3-line functions must be excluded when min_lines=5"
);
}
#[test]
fn test_exactly_min_lines_included() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_EXACTLY_5_LINES).unwrap();
td.write_file("src/b.py", PYTHON_EXACTLY_5_LINES_COPY)
.unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 3, min_lines: 5, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"5-line functions should be included when min_lines=5 (boundary condition)"
);
}
#[test]
fn test_all_fragments_respect_min_lines() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("src/b.py", PYTHON_LONG_FUNCTION_B).unwrap();
td.write_file("src/tiny.py", PYTHON_SHORT_3_LINES).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 3, min_lines: 5, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
for pair in &report.clone_pairs {
let f1_lines = pair.fragment1.end_line - pair.fragment1.start_line + 1;
let f2_lines = pair.fragment2.end_line - pair.fragment2.start_line + 1;
assert!(
f1_lines >= 5,
"Fragment1 has {} lines, below min_lines=5 (BUG-2)",
f1_lines
);
assert!(
f2_lines >= 5,
"Fragment2 has {} lines, below min_lines=5 (BUG-2)",
f2_lines
);
}
}
}
#[cfg(test)]
mod sequence_matching {
use super::v2_fixtures::*;
use crate::analysis::clones::{detect_clones, CloneType};
#[test]
fn test_identical_sequences_type1() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", SEQ_IDENTICAL_A).unwrap();
td.write_file("src/b.py", SEQ_IDENTICAL_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(report.clone_pairs.len(), 1, "Expected exactly 1 clone pair");
let pair = &report.clone_pairs[0];
assert_eq!(pair.clone_type, CloneType::Type1);
assert!(
(pair.similarity - 1.0).abs() < 1e-6,
"Type-1 should have similarity == 1.0, got {}",
pair.similarity
);
}
#[test]
fn test_renamed_identifiers_type2() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", SEQ_RENAMED_A).unwrap();
td.write_file("src/b.py", SEQ_RENAMED_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(report.clone_pairs.len(), 1, "Expected exactly 1 clone pair");
let pair = &report.clone_pairs[0];
assert!(
pair.clone_type == CloneType::Type2 || pair.clone_type == CloneType::Type1,
"Renamed identifiers should be Type-2 (or Type-1 after normalization), got {:?}",
pair.clone_type
);
assert!(
pair.similarity >= 0.9,
"Type-2 should have similarity >= 0.9, got {}",
pair.similarity
);
}
#[test]
fn test_gapped_sequences_type3() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", SEQ_GAPPED_A).unwrap();
td.write_file("src/b.py", SEQ_GAPPED_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
if !report.clone_pairs.is_empty() {
let pair = &report.clone_pairs[0];
assert_eq!(
pair.clone_type,
CloneType::Type3,
"Gapped clone should be Type-3, got {:?}",
pair.clone_type
);
assert!(
pair.similarity >= 0.7 && pair.similarity < 0.9,
"Type-3 should have 0.7 <= similarity < 0.9, got {}",
pair.similarity
);
}
}
#[test]
fn test_completely_different_no_match() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", SEQ_DIFFERENT_A).unwrap();
td.write_file("src/b.py", SEQ_DIFFERENT_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 3, min_lines: 1, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Completely different code should produce zero clone pairs"
);
}
#[test]
fn test_type3_logging_augmented() {
let td = V2TestDir::new().unwrap();
td.write_file("src/base.py", PYTHON_TYPE3_BASE).unwrap();
td.write_file("src/logged.py", PYTHON_TYPE3_WITH_LOGGING)
.unwrap();
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
opts.threshold = 0.7;
let report = detect_clones(td.path(), &opts).unwrap();
if !report.clone_pairs.is_empty() {
let pair = &report.clone_pairs[0];
assert!(
pair.similarity >= 0.7,
"Logging-augmented function should have similarity >= 0.7, got {}",
pair.similarity
);
assert!(
pair.clone_type == CloneType::Type3 || pair.clone_type == CloneType::Type2,
"Expected Type-3 or Type-2 for logging-augmented clone"
);
}
}
}
#[cfg(test)]
mod json_serialization {
use crate::analysis::clones::{
CloneConfig, CloneFragment, ClonePair, CloneStats, CloneType, ClonesReport,
NormalizationMode,
};
use std::path::PathBuf;
#[test]
fn test_clones_report_serialization_format() {
let report = ClonesReport {
root: PathBuf::from("/tmp/test"),
language: "auto".to_string(),
clone_pairs: vec![],
clone_classes: vec![],
stats: CloneStats::default(),
config: CloneConfig::default(),
};
let json = serde_json::to_value(&report).unwrap();
assert!(
json.get("clone_classes").is_none(),
"clone_classes should be absent from JSON when empty, not {:?}",
json.get("clone_classes")
);
assert!(json.get("root").is_some());
assert!(json.get("language").is_some());
assert!(json.get("clone_pairs").is_some());
assert!(json.get("stats").is_some());
assert!(json.get("config").is_some());
}
#[test]
fn test_clone_type_serde_renames() {
let type1 = serde_json::to_string(&CloneType::Type1).unwrap();
let type2 = serde_json::to_string(&CloneType::Type2).unwrap();
let type3 = serde_json::to_string(&CloneType::Type3).unwrap();
assert_eq!(type1, "\"Type-1\"", "Type1 should serialize as \"Type-1\"");
assert_eq!(type2, "\"Type-2\"", "Type2 should serialize as \"Type-2\"");
assert_eq!(type3, "\"Type-3\"", "Type3 should serialize as \"Type-3\"");
let rt: CloneType = serde_json::from_str("\"Type-1\"").unwrap();
assert_eq!(rt, CloneType::Type1);
let rt: CloneType = serde_json::from_str("\"Type-2\"").unwrap();
assert_eq!(rt, CloneType::Type2);
let rt: CloneType = serde_json::from_str("\"Type-3\"").unwrap();
assert_eq!(rt, CloneType::Type3);
}
#[test]
fn test_normalization_mode_serde() {
let none = serde_json::to_string(&NormalizationMode::None).unwrap();
let ident = serde_json::to_string(&NormalizationMode::Identifiers).unwrap();
let lit = serde_json::to_string(&NormalizationMode::Literals).unwrap();
let all = serde_json::to_string(&NormalizationMode::All).unwrap();
assert_eq!(none, "\"none\"");
assert_eq!(ident, "\"identifiers\"");
assert_eq!(lit, "\"literals\"");
assert_eq!(all, "\"all\"");
}
#[test]
fn test_optional_fields_absent_not_null() {
let fragment = CloneFragment::new(PathBuf::from("test.py"), 1, 10, 50);
let json = serde_json::to_value(&fragment).unwrap();
assert!(
json.get("function").is_none(),
"function=None should be absent, not null"
);
assert!(
json.get("preview").is_none(),
"preview=None should be absent, not null"
);
assert!(
json.get("lines").is_some(),
"lines should be present when computed"
);
}
#[test]
fn test_interpretation_absent_when_none() {
let pair = ClonePair::new(
1,
CloneType::Type1,
1.0,
CloneFragment::new(PathBuf::from("a.py"), 1, 5, 20),
CloneFragment::new(PathBuf::from("b.py"), 1, 5, 20),
);
let json = serde_json::to_value(&pair).unwrap();
assert!(
json.get("interpretation").is_some(),
"interpretation should be set by ClonePair::new"
);
}
#[test]
fn test_class_count_absent_when_none() {
let stats = CloneStats::default();
let json = serde_json::to_value(&stats).unwrap();
assert!(
json.get("class_count").is_none(),
"class_count=None should be absent from JSON"
);
}
#[test]
fn test_type_filter_absent_when_none() {
let config = CloneConfig::default();
let json = serde_json::to_value(&config).unwrap();
assert!(
json.get("type_filter").is_none(),
"type_filter=None should be absent from JSON"
);
}
#[test]
fn test_full_report_round_trip() {
let report = ClonesReport {
root: PathBuf::from("/tmp/project"),
language: "python".to_string(),
clone_pairs: vec![ClonePair::new(
1,
CloneType::Type2,
0.95,
CloneFragment::new(PathBuf::from("a.py"), 1, 10, 30)
.with_preview("def foo():".to_string()),
CloneFragment::new(PathBuf::from("b.py"), 5, 14, 30)
.with_preview("def bar():".to_string()),
)],
clone_classes: vec![],
stats: CloneStats {
files_analyzed: 2,
total_tokens: 60,
clones_found: 1,
type1_count: 0,
type2_count: 1,
type3_count: 0,
class_count: None,
detection_time_ms: 42,
},
config: CloneConfig::default(),
};
let json_str = serde_json::to_string_pretty(&report).unwrap();
let deserialized: ClonesReport = serde_json::from_str(&json_str).unwrap();
assert_eq!(deserialized.root, report.root);
assert_eq!(deserialized.language, report.language);
assert_eq!(deserialized.clone_pairs.len(), 1);
assert_eq!(deserialized.clone_pairs[0].clone_type, CloneType::Type2);
assert_eq!(deserialized.stats.files_analyzed, 2);
assert_eq!(deserialized.stats.clones_found, 1);
}
}
#[cfg(test)]
mod preserved_behaviors {
use crate::analysis::clones::{
classify_clone_type, is_generated_file, is_test_file, CloneType, ClonesOptions,
NormalizationMode,
};
use std::path::Path;
#[test]
fn test_is_test_file_directory_patterns() {
assert!(is_test_file(Path::new("project/tests/test_foo.py")));
assert!(is_test_file(Path::new("project/test/helper.py")));
assert!(is_test_file(Path::new("src/__tests__/component.test.js")));
assert!(is_test_file(Path::new("spec/models/user_spec.rb")));
assert!(is_test_file(Path::new("testing/integration.py")));
}
#[test]
fn test_is_test_file_name_patterns() {
assert!(is_test_file(Path::new("test_utils.py")));
assert!(is_test_file(Path::new("auth_test.py")));
assert!(is_test_file(Path::new("handler_test.go")));
assert!(is_test_file(Path::new("parser_test.rs")));
assert!(is_test_file(Path::new("model_spec.rb")));
assert!(is_test_file(Path::new("button.test.ts")));
assert!(is_test_file(Path::new("button.test.js")));
assert!(is_test_file(Path::new("api.spec.ts")));
assert!(is_test_file(Path::new("api.spec.js")));
assert!(is_test_file(Path::new("UserTest.java")));
assert!(is_test_file(Path::new("UserTests.cs")));
}
#[test]
fn test_is_test_file_non_test_files() {
assert!(!is_test_file(Path::new("src/main.py")));
assert!(!is_test_file(Path::new("src/utils.rs")));
assert!(!is_test_file(Path::new("lib/handler.go")));
assert!(!is_test_file(Path::new("app/models/user.py")));
}
#[test]
fn test_is_generated_file_directory_patterns() {
assert!(is_generated_file(Path::new("vendor/lib/foo.py")));
assert!(is_generated_file(Path::new("node_modules/pkg/index.js")));
assert!(is_generated_file(Path::new("__pycache__/module.pyc")));
assert!(is_generated_file(Path::new("dist/bundle.js")));
assert!(is_generated_file(Path::new("build/output.js")));
assert!(is_generated_file(Path::new("target/debug/main.rs")));
assert!(is_generated_file(Path::new("gen/proto.go")));
assert!(is_generated_file(Path::new("generated/types.ts")));
assert!(is_generated_file(Path::new(".gen/schema.rs")));
assert!(is_generated_file(Path::new("third_party/lib.go")));
assert!(is_generated_file(Path::new("external/dep.rs")));
}
#[test]
fn test_is_generated_file_suffix_patterns() {
assert!(is_generated_file(Path::new("api.pb.go")));
assert!(is_generated_file(Path::new("schema_pb2.py")));
assert!(is_generated_file(Path::new("types.pb.ts")));
assert!(is_generated_file(Path::new("types.pb.js")));
assert!(is_generated_file(Path::new("types.pb.rs")));
assert!(is_generated_file(Path::new("api_grpc.pb.go")));
assert!(is_generated_file(Path::new("api_pb2_grpc.py")));
assert!(is_generated_file(Path::new("schema.generated.ts")));
assert!(is_generated_file(Path::new("schema.generated.tsx")));
assert!(is_generated_file(Path::new("schema.generated.js")));
assert!(is_generated_file(Path::new("query.graphql.ts")));
assert!(is_generated_file(Path::new("types_generated.go")));
assert!(is_generated_file(Path::new("types_generated.ts")));
assert!(is_generated_file(Path::new("types_generated.rs")));
assert!(is_generated_file(Path::new("types_generated.py")));
assert!(is_generated_file(Path::new("schema.gen.go")));
assert!(is_generated_file(Path::new("schema.gen.ts")));
assert!(is_generated_file(Path::new("schema.gen.rs")));
assert!(is_generated_file(Path::new("client_mock.go")));
assert!(is_generated_file(Path::new("service_mocks.go")));
assert!(is_generated_file(Path::new("service.thrift.go")));
}
#[test]
fn test_is_generated_file_prefix_patterns() {
assert!(is_generated_file(Path::new("generated_types.py")));
assert!(is_generated_file(Path::new("auto_generated_schema.ts")));
assert!(is_generated_file(Path::new("autogenerated_client.go")));
assert!(is_generated_file(Path::new("mock_service.py")));
assert!(is_generated_file(Path::new("mocks_handler.py")));
assert!(is_generated_file(Path::new("Generated_Types.py")));
assert!(is_generated_file(Path::new("AUTO_GENERATED_SCHEMA.ts")));
}
#[test]
fn test_is_generated_file_non_generated() {
assert!(!is_generated_file(Path::new("src/main.py")));
assert!(!is_generated_file(Path::new("lib/utils.ts")));
assert!(!is_generated_file(Path::new("cmd/server.go")));
}
#[test]
fn test_clones_options_defaults() {
let opts = ClonesOptions::default();
assert_eq!(opts.min_tokens, 25, "Default min_tokens should be 25");
assert_eq!(opts.min_lines, 5, "Default min_lines should be 5");
assert!(
(opts.threshold - 0.7).abs() < 1e-9,
"Default threshold should be 0.7"
);
assert_eq!(opts.type_filter, None, "Default type_filter should be None");
assert_eq!(
opts.normalization,
NormalizationMode::All,
"Default normalization should be All"
);
assert_eq!(opts.language, None, "Default language should be None");
assert!(!opts.show_classes, "Default show_classes should be false");
assert!(
!opts.include_within_file,
"Default include_within_file should be false"
);
assert_eq!(opts.max_clones, 100, "Default max_clones should be 100");
assert_eq!(opts.max_files, 1000, "Default max_files should be 1000");
assert!(
!opts.exclude_generated,
"Default exclude_generated should be false"
);
assert!(!opts.exclude_tests, "Default exclude_tests should be false");
}
#[test]
fn test_clones_options_new_equals_default() {
let new = ClonesOptions::new();
let default = ClonesOptions::default();
assert_eq!(new.min_tokens, default.min_tokens);
assert_eq!(new.min_lines, default.min_lines);
assert!((new.threshold - default.threshold).abs() < 1e-9);
assert_eq!(new.normalization, default.normalization);
assert_eq!(new.max_clones, default.max_clones);
}
#[test]
fn test_classify_clone_type_type1() {
assert_eq!(classify_clone_type(1.0), CloneType::Type1);
assert_eq!(classify_clone_type(0.9999999999), CloneType::Type1);
}
#[test]
fn test_classify_clone_type_type2() {
assert_eq!(classify_clone_type(0.95), CloneType::Type2);
assert_eq!(classify_clone_type(0.9), CloneType::Type2);
assert_eq!(classify_clone_type(0.9000000001), CloneType::Type2);
}
#[test]
fn test_classify_clone_type_type3() {
assert_eq!(classify_clone_type(0.89), CloneType::Type3);
assert_eq!(classify_clone_type(0.7), CloneType::Type3);
assert_eq!(classify_clone_type(0.5), CloneType::Type3);
}
#[test]
fn test_clone_type_as_str() {
assert_eq!(CloneType::Type1.as_str(), "Type-1");
assert_eq!(CloneType::Type2.as_str(), "Type-2");
assert_eq!(CloneType::Type3.as_str(), "Type-3");
}
#[test]
fn test_clone_type_min_similarity() {
assert!((CloneType::Type1.min_similarity() - 1.0).abs() < 1e-9);
assert!((CloneType::Type2.min_similarity() - 0.9).abs() < 1e-9);
assert!((CloneType::Type3.min_similarity() - 0.7).abs() < 1e-9);
}
#[test]
fn test_clone_type_display() {
assert_eq!(format!("{}", CloneType::Type1), "Type-1");
assert_eq!(format!("{}", CloneType::Type2), "Type-2");
assert_eq!(format!("{}", CloneType::Type3), "Type-3");
}
#[test]
fn test_normalization_mode_as_str() {
assert_eq!(NormalizationMode::None.as_str(), "none");
assert_eq!(NormalizationMode::Identifiers.as_str(), "identifiers");
assert_eq!(NormalizationMode::Literals.as_str(), "literals");
assert_eq!(NormalizationMode::All.as_str(), "all");
}
#[test]
fn test_normalization_mode_from_str() {
assert_eq!(
NormalizationMode::parse("none"),
Some(NormalizationMode::None)
);
assert_eq!(
NormalizationMode::parse("identifiers"),
Some(NormalizationMode::Identifiers)
);
assert_eq!(
NormalizationMode::parse("literals"),
Some(NormalizationMode::Literals)
);
assert_eq!(
NormalizationMode::parse("all"),
Some(NormalizationMode::All)
);
assert_eq!(NormalizationMode::parse("bogus"), None);
}
#[test]
fn test_normalization_mode_flags() {
assert!(!NormalizationMode::None.normalize_identifiers());
assert!(!NormalizationMode::None.normalize_literals());
assert!(NormalizationMode::Identifiers.normalize_identifiers());
assert!(!NormalizationMode::Identifiers.normalize_literals());
assert!(!NormalizationMode::Literals.normalize_identifiers());
assert!(NormalizationMode::Literals.normalize_literals());
assert!(NormalizationMode::All.normalize_identifiers());
assert!(NormalizationMode::All.normalize_literals());
}
#[test]
fn test_normalization_mode_default() {
assert_eq!(NormalizationMode::default(), NormalizationMode::All);
}
}
#[cfg(test)]
mod edge_cases {
use super::v2_fixtures::*;
use crate::analysis::clones::{detect_clones, ClonesOptions};
#[test]
fn test_empty_directory_returns_empty_report() {
let td = V2TestDir::new().unwrap();
let opts = ClonesOptions::default();
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(report.clone_pairs.len(), 0);
assert_eq!(report.stats.files_analyzed, 0);
assert_eq!(report.stats.clones_found, 0);
}
#[test]
fn test_single_file_no_within_file() {
let td = V2TestDir::new().unwrap();
td.write_file("src/only.py", PYTHON_LONG_FUNCTION_A)
.unwrap();
let opts = ClonesOptions {
language: Some("python".to_string()),
include_within_file: false,
..Default::default()
};
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Single file with include_within_file=false should have no pairs"
);
}
#[test]
fn test_file_below_min_tokens_no_fragments() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", "def f(): pass\n").unwrap();
td.write_file("src/b.py", "def f(): pass\n").unwrap();
let opts = ClonesOptions {
language: Some("python".to_string()),
min_tokens: 25, ..Default::default()
};
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"Files with fewer tokens than min_tokens should produce no pairs"
);
}
#[test]
fn test_max_clones_limit() {
let td = V2TestDir::new().unwrap();
for i in 0..10 {
td.write_file(&format!("src/file_{}.py", i), PYTHON_LONG_FUNCTION_A)
.unwrap();
}
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
opts.max_clones = 5;
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
report.clone_pairs.len() <= 5,
"max_clones=5 should limit output to at most 5 pairs, got {}",
report.clone_pairs.len()
);
}
#[test]
fn test_stats_consistency() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
td.write_file("src/b.py", PYTHON_LONG_FUNCTION_B).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
let expected_total =
report.stats.type1_count + report.stats.type2_count + report.stats.type3_count;
assert_eq!(
report.stats.clones_found, expected_total,
"clones_found ({}) should equal type1 + type2 + type3 ({})",
report.stats.clones_found, expected_total
);
assert_eq!(
report.stats.clones_found,
report.clone_pairs.len(),
"clones_found ({}) should equal clone_pairs.len() ({})",
report.stats.clones_found,
report.clone_pairs.len()
);
}
#[test]
fn test_clone_pair_ids_sequential() {
let td = V2TestDir::new().unwrap();
for i in 0..5 {
td.write_file(&format!("src/file_{}.py", i), PYTHON_LONG_FUNCTION_A)
.unwrap();
}
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
for (idx, pair) in report.clone_pairs.iter().enumerate() {
assert_eq!(
pair.id,
idx + 1,
"Clone pair ID should be 1-indexed sequential: expected {}, got {}",
idx + 1,
pair.id
);
}
}
#[test]
fn test_canonical_pair_ordering() {
let td = V2TestDir::new().unwrap();
td.write_file("src/z_file.py", PYTHON_LONG_FUNCTION_A)
.unwrap();
td.write_file("src/a_file.py", PYTHON_LONG_FUNCTION_B)
.unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
for pair in &report.clone_pairs {
if pair.fragment1.file != pair.fragment2.file {
assert!(
pair.fragment1.file <= pair.fragment2.file,
"Canonical ordering violated: {:?} should come before {:?}",
pair.fragment1.file,
pair.fragment2.file
);
} else {
assert!(
pair.fragment1.start_line <= pair.fragment2.start_line,
"Same-file canonical ordering violated: start_line {} > {}",
pair.fragment1.start_line,
pair.fragment2.start_line
);
}
}
}
#[test]
fn test_config_snapshot() {
let td = V2TestDir::new().unwrap();
td.write_file("src/a.py", PYTHON_LONG_FUNCTION_A).unwrap();
let opts = ClonesOptions {
language: Some("python".to_string()),
min_tokens: 15,
min_lines: 4,
threshold: 0.8,
..Default::default()
};
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(report.config.min_tokens, 15);
assert_eq!(report.config.min_lines, 4);
assert!((report.config.similarity_threshold - 0.8).abs() < 1e-9);
}
#[test]
fn test_exclude_tests_option() {
let td = V2TestDir::new().unwrap();
td.write_file("src/main.py", PYTHON_LONG_FUNCTION_A)
.unwrap();
td.write_file("tests/test_main.py", PYTHON_LONG_FUNCTION_B)
.unwrap();
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
opts.exclude_tests = true;
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"With exclude_tests=true, test files should be excluded"
);
}
#[test]
fn test_exclude_generated_option() {
let td = V2TestDir::new().unwrap();
td.write_file("src/main.py", PYTHON_LONG_FUNCTION_A)
.unwrap();
td.write_file("generated/types_generated.py", PYTHON_LONG_FUNCTION_B)
.unwrap();
let mut opts = crate::analysis::clones::ClonesOptions { language: Some("python".to_string()), min_tokens: 10, min_lines: 3, ..Default::default() };
opts.exclude_generated = true;
let report = detect_clones(td.path(), &opts).unwrap();
assert_eq!(
report.clone_pairs.len(),
0,
"With exclude_generated=true, generated files should be excluded"
);
}
#[test]
fn test_no_panic_on_empty_files() {
let td = V2TestDir::new().unwrap();
td.write_file("src/empty.py", "").unwrap();
td.write_file("src/comment_only.py", "# just a comment\n")
.unwrap();
td.write_file("src/whitespace.py", "\n\n\n\n").unwrap();
let opts = ClonesOptions {
language: Some("python".to_string()),
..Default::default()
};
let result = detect_clones(td.path(), &opts);
assert!(
result.is_ok(),
"detect_clones should not panic on empty/comment-only files"
);
}
}
#[cfg(test)]
mod multi_language_discovery {
use super::v2_fixtures::*;
use crate::analysis::clones::filter::{discover_source_files, get_language_from_path};
use std::path::Path;
#[test]
fn test_discover_c_files() {
let td = V2TestDir::new().unwrap();
td.write_file("src/main.c", "int main() { return 0; }")
.unwrap();
td.write_file("src/util.h", "void util();").unwrap();
let files = discover_source_files(td.path(), Some("c"), 100, false, false);
assert!(
files.len() >= 2,
"Expected at least 2 C files (.c, .h), found {}",
files.len()
);
}
#[test]
fn test_discover_csharp_files() {
let td = V2TestDir::new().unwrap();
td.write_file("src/Program.cs", "class Program { static void Main() {} }")
.unwrap();
let files = discover_source_files(td.path(), Some("csharp"), 100, false, false);
assert_eq!(files.len(), 1, "Expected 1 C# file, found {}", files.len());
}
#[test]
fn test_discover_elixir_files() {
let td = V2TestDir::new().unwrap();
td.write_file("lib/app.ex", "defmodule App do\nend")
.unwrap();
td.write_file("lib/app_helper.exs", "defmodule AppHelper do\nend")
.unwrap();
let files = discover_source_files(td.path(), Some("elixir"), 100, false, false);
assert!(
files.len() >= 2,
"Expected at least 2 Elixir files (.ex, .exs), found {}",
files.len()
);
}
#[test]
fn test_discover_lua_files() {
let td = V2TestDir::new().unwrap();
td.write_file("src/main.lua", "print('hello')").unwrap();
let files = discover_source_files(td.path(), Some("lua"), 100, false, false);
assert_eq!(files.len(), 1, "Expected 1 Lua file, found {}", files.len());
}
#[test]
fn test_discover_ocaml_files() {
let td = V2TestDir::new().unwrap();
td.write_file("src/main.ml", "let () = print_endline \"hello\"")
.unwrap();
td.write_file("src/main.mli", "val main : unit -> unit")
.unwrap();
let files = discover_source_files(td.path(), Some("ocaml"), 100, false, false);
assert!(
files.len() >= 2,
"Expected at least 2 OCaml files (.ml, .mli), found {}",
files.len()
);
}
#[test]
fn test_discover_php_files() {
let td = V2TestDir::new().unwrap();
td.write_file("src/index.php", "<?php echo 'hello'; ?>")
.unwrap();
let files = discover_source_files(td.path(), Some("php"), 100, false, false);
assert_eq!(files.len(), 1, "Expected 1 PHP file, found {}", files.len());
}
#[test]
fn test_discover_ruby_files() {
let td = V2TestDir::new().unwrap();
td.write_file("lib/app.rb", "puts 'hello'").unwrap();
let files = discover_source_files(td.path(), Some("ruby"), 100, false, false);
assert_eq!(
files.len(),
1,
"Expected 1 Ruby file, found {}",
files.len()
);
}
#[test]
fn test_discover_scala_files() {
let td = V2TestDir::new().unwrap();
td.write_file(
"src/Main.scala",
"object Main { def main(args: Array[String]) = {} }",
)
.unwrap();
let files = discover_source_files(td.path(), Some("scala"), 100, false, false);
assert_eq!(
files.len(),
1,
"Expected 1 Scala file, found {}",
files.len()
);
}
#[test]
fn test_discover_swift_files() {
let td = V2TestDir::new().unwrap();
td.write_file("Sources/main.swift", "print(\"hello\")")
.unwrap();
let files = discover_source_files(td.path(), Some("swift"), 100, false, false);
assert_eq!(
files.len(),
1,
"Expected 1 Swift file, found {}",
files.len()
);
}
#[test]
fn test_discover_kotlin_files() {
let td = V2TestDir::new().unwrap();
td.write_file("src/Main.kt", "fun main() { println(\"hello\") }")
.unwrap();
let files = discover_source_files(td.path(), Some("kotlin"), 100, false, false);
assert_eq!(
files.len(),
1,
"Expected 1 Kotlin file, found {}",
files.len()
);
}
#[test]
fn test_discover_all_supported_extensions() {
let td = V2TestDir::new().unwrap();
let language_files: Vec<(&str, &str, &str)> = vec![
("python", "a.py", "def f(): pass"),
("typescript", "a.ts", "function f() {}"),
("javascript", "a.js", "function f() {}"),
("go", "a.go", "package main\nfunc main() {}"),
("rust", "a.rs", "fn main() {}"),
("java", "A.java", "class A { void f() {} }"),
("c", "a.c", "int main() { return 0; }"),
("csharp", "a.cs", "class A { void F() {} }"),
("elixir", "a.ex", "defmodule A do\nend"),
("lua", "a.lua", "print('hello')"),
("ocaml", "a.ml", "let () = ()"),
("php", "a.php", "<?php echo 1; ?>"),
("ruby", "a.rb", "puts 'hello'"),
("scala", "a.scala", "object A {}"),
("swift", "a.swift", "print(\"hello\")"),
("kotlin", "a.kt", "fun main() {}"),
];
for (lang, filename, content) in &language_files {
td.write_file(&format!("src/{}", filename), content)
.unwrap();
let files = discover_source_files(td.path(), Some(lang), 100, false, false);
assert!(
!files.is_empty(),
"Language '{}' with extension '{}' should discover at least 1 file, found 0",
lang,
filename
);
}
}
#[test]
fn test_get_language_from_path_all_languages() {
let cases = vec![
("test.py", "python"),
("test.ts", "typescript"),
("test.js", "javascript"),
("test.go", "go"),
("test.rs", "rust"),
("test.java", "java"),
("test.c", "c"),
("test.h", "c"),
("test.cs", "csharp"),
("test.ex", "elixir"),
("test.exs", "elixir"),
("test.lua", "lua"),
("test.ml", "ocaml"),
("test.mli", "ocaml"),
("test.php", "php"),
("test.rb", "ruby"),
("test.scala", "scala"),
("test.swift", "swift"),
("test.kt", "kotlin"),
];
for (path_str, expected_lang) in cases {
let lang = get_language_from_path(Path::new(path_str));
assert_eq!(
lang,
Some(expected_lang),
"get_language_from_path('{}') should return Some('{}'), got {:?}",
path_str,
expected_lang,
lang
);
}
}
}
#[cfg(test)]
mod multi_language_function_extraction {
use super::v2_fixtures::*;
use crate::analysis::clones::detect_clones;
#[test]
fn test_function_extraction_c() {
let td = V2TestDir::new().unwrap();
let c_source = r#"
#include <stdio.h>
int add(int a, int b) {
int result = a + b;
return result;
}
int multiply(int a, int b) {
int result = a * b;
return result;
}
"#;
td.write_file("src/a.c", c_source).unwrap();
td.write_file("src/b.c", c_source).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("c".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical C files, found 0. \
Likely missing C function extraction support."
);
}
#[test]
fn test_function_extraction_ruby() {
let td = V2TestDir::new().unwrap();
let ruby_source = r#"
def process_data(input)
result = []
input.each do |item|
result << item.to_s
end
result
end
def transform_data(input)
output = []
input.each do |item|
output << item.to_i
end
output
end
"#;
td.write_file("lib/a.rb", ruby_source).unwrap();
td.write_file("lib/b.rb", ruby_source).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("ruby".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical Ruby files, found 0. \
Likely missing Ruby method extraction support."
);
}
#[test]
fn test_function_extraction_php() {
let td = V2TestDir::new().unwrap();
let php_source = r#"<?php
function processData($input) {
$result = [];
foreach ($input as $item) {
$result[] = $item * 2;
}
return $result;
}
function transformData($input) {
$result = [];
foreach ($input as $item) {
$result[] = $item + 1;
}
return $result;
}
"#;
td.write_file("src/a.php", php_source).unwrap();
td.write_file("src/b.php", php_source).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("php".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical PHP files, found 0. \
Likely missing PHP function extraction support."
);
}
#[test]
fn test_function_extraction_swift() {
let td = V2TestDir::new().unwrap();
let swift_source = r#"
import Foundation
func processData(input: [Int]) -> [Int] {
var result: [Int] = []
for item in input {
result.append(item * 2)
}
return result
}
func transformData(input: [Int]) -> [Int] {
var result: [Int] = []
for item in input {
result.append(item + 1)
}
return result
}
"#;
td.write_file("Sources/a.swift", swift_source).unwrap();
td.write_file("Sources/b.swift", swift_source).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("swift".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical Swift files, found 0. \
Likely missing Swift function extraction support."
);
}
#[test]
fn test_function_extraction_kotlin() {
let td = V2TestDir::new().unwrap();
let kotlin_source = r#"
fun processData(input: List<Int>): List<Int> {
val result = mutableListOf<Int>()
for (item in input) {
result.add(item * 2)
}
return result
}
fun transformData(input: List<Int>): List<Int> {
val result = mutableListOf<Int>()
for (item in input) {
result.add(item + 1)
}
return result
}
"#;
td.write_file("src/a.kt", kotlin_source).unwrap();
td.write_file("src/b.kt", kotlin_source).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("kotlin".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical Kotlin files, found 0. \
Likely missing Kotlin function extraction support."
);
}
#[test]
fn test_function_extraction_scala() {
let td = V2TestDir::new().unwrap();
let scala_source = r#"
object DataProcessor {
def processData(input: List[Int]): List[Int] = {
val result = input.map(_ * 2)
result.filter(_ > 0)
}
def transformData(input: List[Int]): List[Int] = {
val result = input.map(_ + 1)
result.filter(_ > 0)
}
}
"#;
td.write_file("src/a.scala", scala_source).unwrap();
td.write_file("src/b.scala", scala_source).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("scala".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical Scala files, found 0. \
Likely missing Scala function extraction support."
);
}
#[test]
fn test_function_extraction_csharp() {
let td = V2TestDir::new().unwrap();
let csharp_source = r#"
using System;
using System.Collections.Generic;
public class DataProcessor {
public List<int> ProcessData(List<int> input) {
var result = new List<int>();
foreach (var item in input) {
result.Add(item * 2);
}
return result;
}
public List<int> TransformData(List<int> input) {
var result = new List<int>();
foreach (var item in input) {
result.Add(item + 1);
}
return result;
}
}
"#;
td.write_file("src/a.cs", csharp_source).unwrap();
td.write_file("src/b.cs", csharp_source).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("csharp".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical C# files, found 0. \
Likely missing C# method extraction support."
);
}
#[test]
fn test_function_extraction_lua() {
let td = V2TestDir::new().unwrap();
let lua_source = r#"
function processData(input)
local result = {}
for i, item in ipairs(input) do
result[i] = item * 2
end
return result
end
function transformData(input)
local result = {}
for i, item in ipairs(input) do
result[i] = item + 1
end
return result
end
"#;
td.write_file("src/a.lua", lua_source).unwrap();
td.write_file("src/b.lua", lua_source).unwrap();
let opts = crate::analysis::clones::ClonesOptions { language: Some("lua".to_string()), min_tokens: 5, min_lines: 3, ..Default::default() };
let report = detect_clones(td.path(), &opts).unwrap();
assert!(
!report.clone_pairs.is_empty(),
"Expected clone pairs between identical Lua files, found 0. \
Likely missing Lua function extraction support."
);
}
}