1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
use crate::apted::{compute_edit_distance, APTEDOptions};
use crate::tree::TreeNode;
use std::rc::Rc;
#[derive(Debug, Clone)]
pub struct TSEDOptions {
pub apted_options: APTEDOptions,
pub min_lines: u32, // Minimum number of lines for a function to be considered
pub min_tokens: Option<u32>, // Minimum number of tokens (AST nodes) for a function to be considered
pub size_penalty: bool, // Apply penalty for short functions
pub skip_test: bool, // Skip test functions (language-specific)
}
impl Default for TSEDOptions {
fn default() -> Self {
TSEDOptions {
apted_options: APTEDOptions {
rename_cost: 0.3, // Default from the TypeScript implementation
delete_cost: 1.0,
insert_cost: 1.0,
compare_values: false, // TypeScript default: structural comparison only
},
min_lines: 5, // Increased default to better filter trivial matches
min_tokens: None, // No token limit by default
size_penalty: true, // Enable size penalty by default
skip_test: false, // Don't skip test functions by default
}
}
}
/// Calculate TSED (Tree Structure Edit Distance) similarity between two trees
/// Returns a value between 0.0 and 1.0, where 1.0 means identical
#[must_use]
#[allow(clippy::cast_precision_loss)]
pub fn calculate_tsed(tree1: &Rc<TreeNode>, tree2: &Rc<TreeNode>, options: &TSEDOptions) -> f64 {
let distance = compute_edit_distance(tree1, tree2, &options.apted_options);
let size1 = tree1.get_subtree_size() as f64;
let size2 = tree2.get_subtree_size() as f64;
// TSED normalization: Use the larger tree size
// This ensures that when comparing trees of different sizes,
// the similarity reflects how much of the larger tree matches
let max_size = size1.max(size2);
// Calculate base TSED similarity
let tsed_similarity = if max_size > 0.0 { (1.0 - distance / max_size).max(0.0) } else { 1.0 };
// If distance is 0 but trees have different sizes, check more carefully
// This can happen when compare_values is false and structure is similar
let tsed_similarity = if distance == 0.0 && size1 != size2 {
let size_ratio = size1.min(size2) / size1.max(size2);
let size_diff = (size1 - size2).abs();
// Apply penalty based on both ratio and absolute difference
if size_diff > 10.0 {
// Strong penalty for large absolute differences
tsed_similarity * 0.5
} else if size_ratio < 0.95 || size_diff > 3.0 {
// Moderate penalty for noticeable differences
tsed_similarity * size_ratio.powf(0.5)
} else {
tsed_similarity // Very minor differences are OK
}
} else {
tsed_similarity
};
// For very small trees, even small differences should matter more
let tsed_similarity = if options.size_penalty {
if max_size < 10.0 && distance > 0.0 {
tsed_similarity * 0.8 // Reduce similarity for small trees with any differences
} else if max_size < 30.0 && distance > 0.0 {
// For moderately small trees, apply a smaller penalty
tsed_similarity * 0.9
} else {
tsed_similarity
}
} else {
tsed_similarity
};
// Apply additional penalties for structural differences
let mut similarity = tsed_similarity;
// Size ratio penalty: penalize when trees have very different sizes
let size_ratio = size1.min(size2) / size1.max(size2);
if options.size_penalty {
// For short functions, make differences more pronounced
let min_size = size1.min(size2);
if min_size < 30.0 {
// Short function penalty: the shorter, the more sensitive to differences
let short_function_factor = (min_size / 30.0).powf(0.5);
similarity *= short_function_factor;
// Additional penalty for very short functions
if min_size < 10.0 {
similarity *= 0.5; // Strong penalty for very short functions
} else if min_size < 20.0 {
similarity *= 0.7; // Moderate penalty for short functions
}
}
// Size difference penalty
if size_ratio < 0.5 {
// If one tree is less than half the size of the other,
// they're likely fundamentally different
similarity *= size_ratio.powf(0.5);
}
}
similarity
}
/// Calculate TSED from TypeScript code strings
///
/// # Errors
///
/// Returns an error if parsing fails for either code string
pub fn calculate_tsed_from_code(
code1: &str,
code2: &str,
filename1: &str,
filename2: &str,
options: &TSEDOptions,
) -> Result<f64, String> {
use crate::parser::parse_and_convert_to_tree;
let tree1 = parse_and_convert_to_tree(filename1, code1)?;
let tree2 = parse_and_convert_to_tree(filename2, code2)?;
Ok(calculate_tsed(&tree1, &tree2, options))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_identical_code() {
let code = "function add(a: number, b: number) { return a + b; }";
let options = TSEDOptions {
size_penalty: false, // Disable for small test functions
..Default::default()
};
let similarity =
calculate_tsed_from_code(code, code, "test1.ts", "test2.ts", &options).unwrap();
assert!((similarity - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_renamed_function() {
let code1 = "function add(a: number, b: number) { return a + b; }";
let code2 = "function sum(x: number, y: number) { return x + y; }";
let options = TSEDOptions {
size_penalty: false, // Disable for small test functions
..Default::default()
};
let similarity =
calculate_tsed_from_code(code1, code2, "test1.ts", "test2.ts", &options).unwrap();
// Should have high similarity due to low rename cost
assert!(similarity > 0.8);
}
#[test]
fn test_different_structure() {
let code1 = "function test() { return 1; }";
let code2 = "class Test { method() { return 1; } }";
let options = TSEDOptions::default();
let similarity =
calculate_tsed_from_code(code1, code2, "test1.ts", "test2.ts", &options).unwrap();
// Should have lower similarity due to structural differences
assert!(similarity < 0.7);
}
}