1use std::path::Path;
6
7use objects::object::{ChangeImportance, ModificationKind};
8
9use super::analysis_similarity::{SimilarityMethod, compute_similarity};
10use crate::parser::{Language, ParsedFile};
11
12pub type ClassificationResult = (ModificationKind, ChangeImportance, f64);
14
15pub fn classify_modification(
23 path: &Path,
24 old_content: &str,
25 new_content: &str,
26) -> (ModificationKind, ChangeImportance) {
27 let (kind, importance, _confidence) =
28 classify_modification_with_confidence(path, old_content, new_content);
29 (kind, importance)
30}
31
32pub fn classify_modification_with_confidence(
34 path: &Path,
35 old_content: &str,
36 new_content: &str,
37) -> ClassificationResult {
38 if old_content == new_content {
40 return (
41 ModificationKind::WhitespaceOnly,
42 ChangeImportance::Noise,
43 1.0,
44 );
45 }
46
47 let token_sim = compute_similarity(old_content, new_content, SimilarityMethod::Tokens);
49 if token_sim >= 1.0 {
50 return (
53 ModificationKind::FormattingOnly,
54 ChangeImportance::Noise,
55 0.95,
56 );
57 }
58
59 let language = Language::from_path(path);
60
61 let old_parsed = ParsedFile::parse(old_content, language);
63 let new_parsed = ParsedFile::parse(new_content, language);
64
65 match (&old_parsed, &new_parsed) {
66 (Some(old_ast), Some(new_ast)) => {
67 classify_with_ast(old_content, new_content, old_ast, new_ast)
68 }
69 _ => {
70 classify_without_ast(old_content, new_content, token_sim)
72 }
73 }
74}
75
76fn classify_with_ast(
78 old_content: &str,
79 new_content: &str,
80 old_ast: &ParsedFile,
81 new_ast: &ParsedFile,
82) -> ClassificationResult {
83 let old_funcs = old_ast.extract_functions();
84 let new_funcs = new_ast.extract_functions();
85 let old_imports = old_ast.extract_imports();
86 let new_imports = new_ast.extract_imports();
87
88 let funcs_identical = are_functions_identical(&old_funcs, &new_funcs);
89 let imports_identical = old_imports.len() == new_imports.len()
90 && old_imports
91 .iter()
92 .zip(new_imports.iter())
93 .all(|(a, b)| a.raw == b.raw);
94
95 let old_stripped = strip_comments(old_ast);
97 let new_stripped = strip_comments(new_ast);
98 let non_comment_identical = old_stripped == new_stripped;
99
100 if non_comment_identical {
101 return (ModificationKind::CommentsOnly, ChangeImportance::Low, 0.92);
102 }
103
104 if funcs_identical && !imports_identical {
105 let old_body = strip_imports_and_functions(old_ast);
108 let new_body = strip_imports_and_functions(new_ast);
109 if old_body == new_body {
110 return (ModificationKind::ImportsOnly, ChangeImportance::Low, 0.93);
111 }
112 }
113
114 let token_sim = compute_similarity(old_content, new_content, SimilarityMethod::Tokens);
116 if token_sim >= 1.0 {
117 return (
118 ModificationKind::FormattingOnly,
119 ChangeImportance::Noise,
120 0.97,
121 );
122 }
123
124 let line_sim = compute_similarity(old_content, new_content, SimilarityMethod::Lines);
127 if token_sim > 0.9 && line_sim < 0.7 {
128 return (ModificationKind::Mixed, ChangeImportance::Medium, 0.75);
130 }
131
132 (ModificationKind::Logic, ChangeImportance::High, 0.85)
134}
135
136fn classify_without_ast(
138 old_content: &str,
139 new_content: &str,
140 token_sim: f64,
141) -> ClassificationResult {
142 if token_sim >= 1.0 {
143 return (
144 ModificationKind::FormattingOnly,
145 ChangeImportance::Noise,
146 0.9,
147 );
148 }
149
150 let line_sim = compute_similarity(old_content, new_content, SimilarityMethod::Lines);
151
152 if token_sim > 0.95 && line_sim < 0.8 {
154 return (
155 ModificationKind::FormattingOnly,
156 ChangeImportance::Noise,
157 0.7,
158 );
159 }
160
161 if token_sim > 0.9 {
162 return (ModificationKind::Mixed, ChangeImportance::Medium, 0.6);
163 }
164
165 (ModificationKind::Logic, ChangeImportance::High, 0.5)
167}
168
169fn are_functions_identical(
171 old_funcs: &[crate::parser::FunctionDef],
172 new_funcs: &[crate::parser::FunctionDef],
173) -> bool {
174 if old_funcs.len() != new_funcs.len() {
175 return false;
176 }
177 let mut old_sorted: Vec<_> = old_funcs.iter().collect();
179 let mut new_sorted: Vec<_> = new_funcs.iter().collect();
180 old_sorted.sort_by_key(|f| &f.name);
181 new_sorted.sort_by_key(|f| &f.name);
182
183 old_sorted
184 .iter()
185 .zip(new_sorted.iter())
186 .all(|(a, b)| a.name == b.name && a.content == b.content)
187}
188
189fn strip_comments(parsed: &ParsedFile) -> String {
191 let mut result = String::new();
192 collect_non_comment_text(parsed.root_node(), &parsed.source, &mut result);
193 result
194}
195
196fn collect_non_comment_text(node: tree_sitter::Node<'_>, source: &str, out: &mut String) {
197 let mut stack = vec![node];
198
199 while let Some(current) = stack.pop() {
200 if is_comment_node(current.kind()) {
201 continue;
202 }
203
204 if current.child_count() == 0 {
205 out.push_str(&source[current.byte_range()]);
206 out.push(' ');
207 continue;
208 }
209
210 let child_count = current.child_count();
211 for index in (0..child_count).rev() {
212 if let Some(child) = current.child(index as u32) {
213 stack.push(child);
214 }
215 }
216 }
217}
218
219fn is_comment_node(kind: &str) -> bool {
220 matches!(
221 kind,
222 "comment" | "line_comment" | "block_comment" | "doc_comment" | "string_comment"
223 )
224}
225
226fn strip_imports_and_functions(parsed: &ParsedFile) -> String {
228 let mut result = String::new();
229 let root = parsed.root_node();
230 for i in 0..root.child_count() {
231 if let Some(child) = root.child(i as u32) {
232 let kind = child.kind();
233 if matches!(
235 kind,
236 "use_declaration"
237 | "extern_crate_declaration"
238 | "import_statement"
239 | "import_from_statement"
240 | "import_declaration"
241 ) {
242 continue;
243 }
244 if ParsedFile::is_function_kind(kind, parsed.language) {
246 continue;
247 }
248 result.push_str(&parsed.source[child.byte_range()]);
249 result.push('\n');
250 }
251 }
252 result
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258
259 #[test]
260 fn test_whitespace_only() {
261 let old = "fn foo() {\n bar();\n}\n";
262 let new = "fn foo() {\n bar();\n}\n";
263 let (kind, importance) = classify_modification(Path::new("test.rs"), old, new);
264 assert_eq!(kind, ModificationKind::FormattingOnly);
265 assert_eq!(importance, ChangeImportance::Noise);
266 }
267
268 #[test]
269 fn test_logic_change() {
270 let old = "fn foo() -> i32 {\n 42\n}\n";
271 let new = "fn foo() -> i32 {\n 43\n}\n";
272 let (kind, importance) = classify_modification(Path::new("test.rs"), old, new);
273 assert_eq!(kind, ModificationKind::Logic);
274 assert_eq!(importance, ChangeImportance::High);
275 }
276
277 #[test]
278 fn test_comments_only() {
279 let old = "// old comment\nfn foo() {\n bar();\n}\n";
280 let new = "// new comment\nfn foo() {\n bar();\n}\n";
281 let (kind, importance) = classify_modification(Path::new("test.rs"), old, new);
282 assert_eq!(kind, ModificationKind::CommentsOnly);
283 assert_eq!(importance, ChangeImportance::Low);
284 }
285
286 #[test]
287 fn test_imports_only() {
288 let old = "use std::io;\n\nfn foo() {\n bar();\n}\n";
289 let new = "use std::io;\nuse std::fs;\n\nfn foo() {\n bar();\n}\n";
290 let (kind, importance) = classify_modification(Path::new("test.rs"), old, new);
291 assert_eq!(kind, ModificationKind::ImportsOnly);
292 assert_eq!(importance, ChangeImportance::Low);
293 }
294
295 #[test]
296 fn test_parse_error_fallback() {
297 let old = "some content here\n";
299 let new = "some content here\nwith additions\n";
300 let (kind, importance) = classify_modification(Path::new("test.xyz"), old, new);
301 assert_eq!(kind, ModificationKind::Logic);
303 assert_eq!(importance, ChangeImportance::High);
304 }
305
306 #[test]
307 fn test_formatting_only_unknown_lang() {
308 let old = "foo bar baz\n";
310 let new = "foo bar baz\n";
311 let (kind, importance) = classify_modification(Path::new("test.xyz"), old, new);
312 assert_eq!(kind, ModificationKind::FormattingOnly);
313 assert_eq!(importance, ChangeImportance::Noise);
314 }
315}