pub mod fixtures {
use std::path::{Path, PathBuf};
use tempfile::TempDir;
pub struct TestDir {
pub dir: TempDir,
}
impl TestDir {
pub fn new() -> std::io::Result<Self> {
let dir = TempDir::new()?;
Ok(Self { dir })
}
pub fn path(&self) -> &Path {
self.dir.path()
}
pub fn add_file(&self, name: &str, content: &str) -> std::io::Result<PathBuf> {
let path = self.dir.path().join(name);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(&path, content)?;
Ok(path)
}
}
pub const PYTHON_FUNC_A: &str = r#"
def process_data(items):
result = []
for item in items:
processed = transform(item)
result.append(processed)
return result
"#;
pub const PYTHON_FUNC_A_COPY: &str = r#"
def process_data(items):
result = []
for item in items:
processed = transform(item)
result.append(processed)
return result
"#;
pub const PYTHON_FUNC_B_SIMILAR: &str = r#"
def handle_items(data):
output = []
for element in data:
converted = transform(element)
output.append(converted)
return output
"#;
pub const PYTHON_FUNC_C_WITH_LOGGING: &str = r#"
def process_data_logged(items):
print("Starting processing")
result = []
for item in items:
processed = transform(item)
print(f"Processed: {processed}")
result.append(processed)
print("Done")
return result
"#;
pub const PYTHON_FUNC_DIFFERENT: &str = r#"
def calculate_average(numbers):
if not numbers:
return 0
total = sum(numbers)
count = len(numbers)
return total / count
"#;
pub const PYTHON_FUNC_VERY_DIFFERENT: &str = r#"
class DatabaseConnection:
def __init__(self, host, port):
self.host = host
self.port = port
self.connected = False
def connect(self):
self.connected = True
return self
"#;
pub const PYTHON_MULTI_FUNCTION_FILE: &str = r#"
def first_function(a, b):
return a + b
def second_function(x, y, z):
result = x * y
result = result + z
return result
def third_function(items):
total = 0
for item in items:
total += item
return total
"#;
pub const PYTHON_MULTI_FUNCTION_FILE_B: &str = r#"
def add_numbers(a, b):
return a + b
def multiply_and_add(x, y, z):
product = x * y
product = product + z
return product
def sum_items(elements):
total = 0
for element in elements:
total += element
return total
"#;
pub const TS_FUNC_A: &str = r#"
export function processData(items: any[]): any[] {
const result: any[] = [];
for (const item of items) {
const processed = transform(item);
result.push(processed);
}
return result;
}
"#;
pub const TS_FUNC_B_SIMILAR: &str = r#"
export function handleItems(data: any[]): any[] {
const output: any[] = [];
for (const element of data) {
const converted = transform(element);
output.push(converted);
}
return output;
}
"#;
pub const GO_FUNC_A: &str = r#"
func ProcessData(items []interface{}) []interface{} {
result := make([]interface{}, 0)
for _, item := range items {
processed := transform(item)
result = append(result, processed)
}
return result
}
"#;
pub const GO_FUNC_B_SIMILAR: &str = r#"
func HandleItems(data []interface{}) []interface{} {
output := make([]interface{}, 0)
for _, element := range data {
converted := transform(element)
output = append(output, converted)
}
return output
}
"#;
pub const RUST_FUNC_A: &str = r#"
pub fn process_data(items: &[Item]) -> Vec<Item> {
let mut result = Vec::new();
for item in items {
let processed = transform(item);
result.push(processed);
}
result
}
"#;
pub const RUST_FUNC_B_SIMILAR: &str = r#"
pub fn handle_items(data: &[Item]) -> Vec<Item> {
let mut output = Vec::new();
for element in data {
let converted = transform(element);
output.push(converted);
}
output
}
"#;
pub const UNIQUE_TOKENS_ONLY: &str = r#"
def unique_function_xyz():
alpha_var = "unique_string_123"
return alpha_var
"#;
pub const NO_SHARED_TOKENS: &str = r#"
def another_beta_function():
omega_value = "different_text_456"
return omega_value
"#;
pub const FILE_WITH_BLOCKS: &str = r#"
# Block A: lines 1-10
def block_a():
x = 1
y = 2
z = 3
result = x + y + z
return result
# Block B: lines 12-21
def block_b():
a = 1
b = 2
c = 3
result = a + b + c
return result
# Block C: lines 23-32 (different)
def block_c():
items = [1, 2, 3]
total = sum(items)
average = total / len(items)
return average
"#;
}
#[cfg(test)]
mod dice_coefficient_tests {
use super::fixtures::*;
use crate::analysis::similarity::{
compute_similarity, SimilarityMetric, SimilarityOptions,
};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_dice_identical_code() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_A_COPY).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::Dice, ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
(report.similarity.dice - 1.0).abs() < 0.001,
"Identical code should have Dice = 1.0, got {}",
report.similarity.dice
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_dice_symmetry() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report_ab = compute_similarity(&path_a, &path_b, &options).unwrap();
let report_ba = compute_similarity(&path_b, &path_a, &options).unwrap();
assert!(
(report_ab.similarity.dice - report_ba.similarity.dice).abs() < 0.001,
"Dice should be symmetric"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_dice_disjoint_code() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", UNIQUE_TOKENS_ONLY).unwrap();
let path_b = test_dir.add_file("b.py", NO_SHARED_TOKENS).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
report.similarity.dice < 0.3,
"Disjoint code should have low Dice, got {}",
report.similarity.dice
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_dice_empty_input() {
let test_dir = TestDir::new().unwrap();
let path_empty = test_dir.add_file("empty.py", "").unwrap();
let path_normal = test_dir.add_file("normal.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_empty, &path_normal, &options).unwrap();
assert!(
(report.similarity.dice - 0.0).abs() < 0.001,
"Empty input should give Dice = 0.0"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_dice_similar_code() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
report.similarity.dice >= 0.5 && report.similarity.dice <= 1.0,
"Similar code should have moderate-high Dice, got {}",
report.similarity.dice
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_dice_multiset_handling() {
let repeated = r#"
def func():
x = 1
x = 2
x = 3
return x
"#;
let not_repeated = r#"
def func():
a = 1
b = 2
c = 3
return a
"#;
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", repeated).unwrap();
let path_b = test_dir.add_file("b.py", not_repeated).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
report.token_breakdown.unique_to_fragment1 > 0
|| report.token_breakdown.unique_to_fragment2 > 0
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_dice_non_negative() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_DIFFERENT).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.dice >= 0.0, "Dice must be non-negative");
}
}
#[cfg(test)]
mod jaccard_coefficient_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_similarity, SimilarityMetric, SimilarityOptions};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_jaccard_identical_code() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_A_COPY).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::Jaccard, ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!((report.similarity.jaccard - 1.0).abs() < 0.001);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_jaccard_disjoint_code() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", UNIQUE_TOKENS_ONLY).unwrap();
let path_b = test_dir.add_file("b.py", NO_SHARED_TOKENS).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
report.similarity.jaccard < 0.3,
"Disjoint code should have low Jaccard"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_jaccard_less_than_dice() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::All, ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
report.similarity.jaccard <= report.similarity.dice + 0.001,
"Jaccard ({}) should be <= Dice ({})",
report.similarity.jaccard,
report.similarity.dice
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_jaccard_dice_relationship() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir
.add_file("b.py", PYTHON_FUNC_C_WITH_LOGGING)
.unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::All, ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
let expected_jaccard = report.similarity.dice / (2.0 - report.similarity.dice);
assert!(
(report.similarity.jaccard - expected_jaccard).abs() < 0.01,
"Jaccard/Dice relationship broken"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_jaccard_symmetry() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report_ab = compute_similarity(&path_a, &path_b, &options).unwrap();
let report_ba = compute_similarity(&path_b, &path_a, &options).unwrap();
assert!((report_ab.similarity.jaccard - report_ba.similarity.jaccard).abs() < 0.001);
}
}
#[cfg(test)]
mod cosine_similarity_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_similarity, SimilarityMetric};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_cosine_identical_code() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_A_COPY).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::Cosine, ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.cosine.is_some());
assert!((report.similarity.cosine.unwrap() - 1.0).abs() < 0.001);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_cosine_disjoint_code() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", UNIQUE_TOKENS_ONLY).unwrap();
let path_b = test_dir.add_file("b.py", NO_SHARED_TOKENS).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::Cosine, ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.cosine.unwrap() < 0.3);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_cosine_valid_range() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_DIFFERENT).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::Cosine, ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
let cosine = report.similarity.cosine.unwrap();
assert!(
(0.0..=1.0).contains(&cosine),
"Cosine must be in [0,1], got {}",
cosine
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_cosine_weights_rare_tokens() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::Cosine, ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.cosine.is_some());
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_cosine_empty_input() {
let test_dir = TestDir::new().unwrap();
let path_empty = test_dir.add_file("empty.py", "").unwrap();
let path_normal = test_dir.add_file("normal.py", PYTHON_FUNC_A).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { metric: SimilarityMetric::Cosine, ..Default::default() };
let report = compute_similarity(&path_empty, &path_normal, &options).unwrap();
assert!((report.similarity.cosine.unwrap() - 0.0).abs() < 0.001);
}
}
#[cfg(test)]
mod function_level_tests {
use super::fixtures::*;
use crate::analysis::similarity::{
compute_similarity, parse_target, ComparisonLevel, SimilarityOptions,
};
use std::path::PathBuf;
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_parse_function_target() {
let target = "src/auth.py::login";
let parsed = parse_target(target).unwrap();
assert_eq!(parsed.file, PathBuf::from("src/auth.py"));
assert_eq!(parsed.function, Some("login".to_string()));
assert!(parsed.line_range.is_none());
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_function_level_comparison() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir
.add_file("a.py", PYTHON_MULTI_FUNCTION_FILE)
.unwrap();
let path_b = test_dir
.add_file("b.py", PYTHON_MULTI_FUNCTION_FILE_B)
.unwrap();
let options = SimilarityOptions {
level: Some(ComparisonLevel::Function),
..Default::default()
};
let target_a = format!("{}::third_function", path_a.display());
let target_b = format!("{}::sum_items", path_b.display());
let report = compute_similarity(
&PathBuf::from(&target_a),
&PathBuf::from(&target_b),
&options,
)
.unwrap();
assert!(
report.similarity.dice > 0.7,
"Similar functions should have high Dice"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_function_not_found_error() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions::default();
let target = format!("{}::nonexistent_function", path.display());
let result = compute_similarity(&PathBuf::from(&target), &path, &options);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("not found") || err_msg.contains("Function"),
"Error should mention function not found"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_fragment_includes_function_name() {
let test_dir = TestDir::new().unwrap();
let path = test_dir
.add_file("a.py", PYTHON_MULTI_FUNCTION_FILE)
.unwrap();
let options = SimilarityOptions::default();
let target = format!("{}::first_function", path.display());
let report =
compute_similarity(&PathBuf::from(&target), &PathBuf::from(&target), &options).unwrap();
assert_eq!(
report.fragment1.function,
Some("first_function".to_string())
);
}
}
#[cfg(test)]
mod file_level_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_similarity, SimilarityOptions};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_file_level_comparison() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
report.fragment1.function.is_none(),
"File-level should not have function"
);
assert!(
report.fragment1.line_range.is_none(),
"File-level should not have line range"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_file_token_count() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path, &path, &options).unwrap();
assert!(report.fragment1.tokens > 0);
assert!(report.fragment1.lines > 0);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_file_line_count() {
let content = "line1\nline2\nline3\nline4\nline5\n";
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("a.py", content).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path, &path, &options).unwrap();
assert!(report.fragment1.lines >= 5, "Should count at least 5 lines");
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_file_compared_to_self() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path, &path, &options).unwrap();
assert!((report.similarity.dice - 1.0).abs() < 0.001);
}
}
#[cfg(test)]
mod block_level_tests {
use super::fixtures::*;
use crate::analysis::similarity::{
compute_similarity, parse_target, SimilarityOptions,
};
use std::path::PathBuf;
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_parse_block_target() {
let target = "src/code.py:10:50";
let parsed = parse_target(target).unwrap();
assert_eq!(parsed.file, PathBuf::from("src/code.py"));
assert_eq!(parsed.line_range, Some((10, 50)));
assert!(parsed.function.is_none());
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_block_level_comparison() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("code.py", FILE_WITH_BLOCKS).unwrap();
let options = SimilarityOptions::default();
let target_a = format!("{}:2:8", path.display()); let target_b = format!("{}:12:18", path.display());
let report = compute_similarity(
&PathBuf::from(&target_a),
&PathBuf::from(&target_b),
&options,
)
.unwrap();
assert!(
report.similarity.dice > 0.6,
"Similar blocks should have high similarity"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_block_different_content() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("code.py", FILE_WITH_BLOCKS).unwrap();
let options = SimilarityOptions::default();
let target_a = format!("{}:2:8", path.display()); let target_c = format!("{}:23:30", path.display());
let report = compute_similarity(
&PathBuf::from(&target_a),
&PathBuf::from(&target_c),
&options,
)
.unwrap();
assert!(
report.similarity.dice < 0.8,
"Different blocks should have lower similarity"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_invalid_line_range() {
let target = "file.py:50:10";
let result = parse_target(target);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("Invalid") || err_msg.contains("range"),
"Should indicate invalid range"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_line_range_clamped() {
let short_file = "line1\nline2\nline3\n";
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("short.py", short_file).unwrap();
let options = SimilarityOptions::default();
let target = format!("{}:1:100", path.display());
let result = compute_similarity(&PathBuf::from(&target), &path, &options);
if let Ok(report) = result {
assert!(report.fragment1.lines <= 5); }
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_fragment_includes_line_range() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("code.py", FILE_WITH_BLOCKS).unwrap();
let options = SimilarityOptions::default();
let target = format!("{}:2:8", path.display());
let report =
compute_similarity(&PathBuf::from(&target), &PathBuf::from(&target), &options).unwrap();
assert_eq!(report.fragment1.line_range, Some((2, 8)));
}
}
#[cfg(test)]
mod ngram_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_similarity, SimilarityOptions};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_default_ngram_size() {
let options = SimilarityOptions::default();
assert_eq!(options.ngram_size, 1);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_bigram_similarity() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions {
ngram_size: 2,
..Default::default()
};
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.dice >= 0.0 && report.similarity.dice <= 1.0);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_higher_n_stricter() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options_1 = SimilarityOptions {
ngram_size: 1,
..Default::default()
};
let report_1 = compute_similarity(&path_a, &path_b, &options_1).unwrap();
let options_3 = SimilarityOptions {
ngram_size: 3,
..Default::default()
};
let report_3 = compute_similarity(&path_a, &path_b, &options_3).unwrap();
assert!(
report_3.similarity.dice <= report_1.similarity.dice + 0.1,
"Higher n should not dramatically increase similarity"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_ngram_short_input() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", "x").unwrap();
let path_b = test_dir.add_file("b.py", "y").unwrap();
let options = SimilarityOptions {
ngram_size: 3,
..Default::default()
};
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
(report.similarity.dice - 0.0).abs() < 0.001,
"Short input with large n should give 0 similarity"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_ngram_in_config() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions {
ngram_size: 2,
..Default::default()
};
let report = compute_similarity(&path, &path, &options).unwrap();
assert_eq!(report.config.ngram_size, 2);
}
}
#[cfg(test)]
mod pairwise_matrix_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_pairwise_similarity, SimilarityOptions};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_pairwise_all_pairs() {
let test_dir = TestDir::new().unwrap();
test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
test_dir.add_file("c.py", PYTHON_FUNC_DIFFERENT).unwrap();
let options = SimilarityOptions::default();
let matrix = compute_pairwise_similarity(test_dir.path(), &options).unwrap();
assert_eq!(matrix.pairs.len(), 3);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_pairwise_valid_scores() {
let test_dir = TestDir::new().unwrap();
test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let matrix = compute_pairwise_similarity(test_dir.path(), &options).unwrap();
for pair in &matrix.pairs {
assert!(pair.dice >= 0.0 && pair.dice <= 1.0);
assert!(pair.jaccard >= 0.0 && pair.jaccard <= 1.0);
}
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_pairwise_threshold_filter() {
let test_dir = TestDir::new().unwrap();
test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
test_dir.add_file("b.py", PYTHON_FUNC_A_COPY).unwrap(); test_dir
.add_file("c.py", PYTHON_FUNC_VERY_DIFFERENT)
.unwrap();
let options = SimilarityOptions::default();
let matrix_all = compute_pairwise_similarity(test_dir.path(), &options).unwrap();
assert!(!matrix_all.pairs.is_empty());
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_pairwise_single_file() {
let test_dir = TestDir::new().unwrap();
test_dir.add_file("only.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions::default();
let matrix = compute_pairwise_similarity(test_dir.path(), &options).unwrap();
assert!(matrix.pairs.is_empty());
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_pairwise_sorted() {
let test_dir = TestDir::new().unwrap();
test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
test_dir.add_file("b.py", PYTHON_FUNC_A_COPY).unwrap();
test_dir.add_file("c.py", PYTHON_FUNC_DIFFERENT).unwrap();
let options = SimilarityOptions::default();
let matrix = compute_pairwise_similarity(test_dir.path(), &options).unwrap();
for i in 1..matrix.pairs.len() {
assert!(
matrix.pairs[i - 1].dice >= matrix.pairs[i].dice,
"Pairs should be sorted by similarity descending"
);
}
}
}
#[cfg(test)]
mod score_interpretation_tests {
use super::fixtures::*;
use crate::analysis::similarity::{
compute_similarity, interpret_similarity_score, SimilarityOptions,
};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_interpretation_near_identical() {
let interpretation = interpret_similarity_score(0.98);
assert!(
interpretation.to_lowercase().contains("identical")
|| interpretation.to_lowercase().contains("near"),
"High score should indicate near-identical"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_interpretation_high_similarity() {
let interpretation = interpret_similarity_score(0.88);
assert!(
interpretation.to_lowercase().contains("high")
|| interpretation.to_lowercase().contains("similar"),
"Score 0.88 should indicate high similarity"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_interpretation_moderate_similarity() {
let interpretation = interpret_similarity_score(0.75);
assert!(
interpretation.to_lowercase().contains("moderate")
|| interpretation.to_lowercase().contains("possible"),
"Score 0.75 should indicate moderate similarity"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_interpretation_some_similarity() {
let interpretation = interpret_similarity_score(0.55);
assert!(
interpretation.to_lowercase().contains("some")
|| interpretation.to_lowercase().contains("shared"),
"Score 0.55 should indicate some similarity"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_interpretation_very_different() {
let interpretation = interpret_similarity_score(0.15);
assert!(
interpretation.to_lowercase().contains("different")
|| interpretation.to_lowercase().contains("low"),
"Score 0.15 should indicate very different"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_interpretation_in_report() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
!report.similarity.interpretation.is_empty(),
"Report should include interpretation"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_interpretation_boundary_095() {
let interpretation = interpret_similarity_score(0.95);
assert!(
interpretation.to_lowercase().contains("identical")
|| interpretation.to_lowercase().contains("near")
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_interpretation_boundary_070() {
let interpretation = interpret_similarity_score(0.70);
assert!(
interpretation.to_lowercase().contains("moderate")
|| interpretation.to_lowercase().contains("possible")
);
}
}
#[cfg(test)]
mod token_breakdown_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_similarity, SimilarityOptions};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_token_breakdown_computed() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.token_breakdown.total_unique > 0);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_shared_tokens_valid() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
let min_tokens = report.fragment1.tokens.min(report.fragment2.tokens);
assert!(
report.token_breakdown.shared_tokens <= min_tokens,
"Shared tokens can't exceed smaller fragment"
);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_unique_tokens_formula() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_DIFFERENT).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
let expected_total = report.token_breakdown.shared_tokens
+ report.token_breakdown.unique_to_fragment1
+ report.token_breakdown.unique_to_fragment2;
assert_eq!(report.token_breakdown.total_unique, expected_total);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_identical_no_unique() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_A_COPY).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(
report.token_breakdown.unique_to_fragment1 == 0
|| report.token_breakdown.unique_to_fragment1 < 3,
"Identical files should have no/minimal unique tokens"
);
}
}
#[cfg(test)]
mod multi_language_similarity_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_similarity, SimilarityOptions};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_python_similarity() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { language: Some("python".to_string()), ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.dice > 0.5);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_typescript_similarity() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.ts", TS_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.ts", TS_FUNC_B_SIMILAR).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { language: Some("typescript".to_string()), ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.dice > 0.5);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_go_similarity() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.go", GO_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.go", GO_FUNC_B_SIMILAR).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { language: Some("go".to_string()), ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.dice > 0.5);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_rust_similarity() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.rs", RUST_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.rs", RUST_FUNC_B_SIMILAR).unwrap();
let options = crate::analysis::similarity::SimilarityOptions { language: Some("rust".to_string()), ..Default::default() };
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.dice > 0.5);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_auto_detect_language() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("code.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path, &path, &options).unwrap();
assert!((report.similarity.dice - 1.0).abs() < 0.001);
}
}
#[cfg(test)]
mod edge_case_similarity_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_similarity, SimilarityOptions};
use std::path::PathBuf;
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_empty_file() {
let test_dir = TestDir::new().unwrap();
let path_empty = test_dir.add_file("empty.py", "").unwrap();
let path_normal = test_dir.add_file("normal.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_empty, &path_normal, &options).unwrap();
assert!((report.similarity.dice - 0.0).abs() < 0.001);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_both_files_empty() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", "").unwrap();
let path_b = test_dir.add_file("b.py", "").unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
assert!(report.similarity.dice == 0.0 || report.similarity.dice == 1.0);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_file_not_found() {
let test_dir = TestDir::new().unwrap();
let path_existing = test_dir.add_file("exists.py", PYTHON_FUNC_A).unwrap();
let path_missing = PathBuf::from("nonexistent.py");
let options = SimilarityOptions::default();
let result = compute_similarity(&path_existing, &path_missing, &options);
assert!(result.is_err());
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_binary_file() {
let test_dir = TestDir::new().unwrap();
let path_normal = test_dir.add_file("code.py", PYTHON_FUNC_A).unwrap();
let binary_path = test_dir.dir.path().join("binary.bin");
std::fs::write(&binary_path, [0u8, 159, 146, 150]).unwrap();
let options = SimilarityOptions::default();
let result = compute_similarity(&path_normal, &binary_path, &options);
if let Ok(report) = result {
assert!(report.similarity.dice < 0.1);
}
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_long_file_performance() {
let long_content: String = (0..1000)
.map(|i| format!("def func{}(): return {}\n", i, i))
.collect();
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("long_a.py", &long_content).unwrap();
let path_b = test_dir.add_file("long_b.py", &long_content).unwrap();
let options = SimilarityOptions::default();
let start = std::time::Instant::now();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
let duration = start.elapsed();
assert!(duration.as_secs() < 5, "Long file comparison took too long");
assert!((report.similarity.dice - 1.0).abs() < 0.001);
}
}
#[cfg(test)]
mod serialization_tests {
use super::fixtures::*;
use crate::analysis::similarity::{compute_similarity, SimilarityOptions, SimilarityReport};
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_json_serialization() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
let json = serde_json::to_string(&report);
assert!(json.is_ok());
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_json_deserialization() {
let test_dir = TestDir::new().unwrap();
let path_a = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let path_b = test_dir.add_file("b.py", PYTHON_FUNC_B_SIMILAR).unwrap();
let options = SimilarityOptions::default();
let report = compute_similarity(&path_a, &path_b, &options).unwrap();
let json = serde_json::to_string(&report).unwrap();
let deserialized: Result<SimilarityReport, _> = serde_json::from_str(&json);
assert!(deserialized.is_ok());
let restored = deserialized.unwrap();
assert!((restored.similarity.dice - report.similarity.dice).abs() < 0.001);
}
#[test]
#[ignore = "similarity module not yet implemented"]
fn test_config_in_json() {
let test_dir = TestDir::new().unwrap();
let path = test_dir.add_file("a.py", PYTHON_FUNC_A).unwrap();
let options = SimilarityOptions {
ngram_size: 3,
..Default::default()
};
let report = compute_similarity(&path, &path, &options).unwrap();
let json = serde_json::to_string(&report).unwrap();
assert!(
json.contains("ngram_size") || json.contains("ngram"),
"JSON should include config"
);
}
}