use std::path::Path;
use objects::object::{ChangeImportance, ModificationKind};
use super::analysis_similarity::{SimilarityMethod, compute_similarity};
use crate::parser::{Language, ParsedFile};
pub type ClassificationResult = (ModificationKind, ChangeImportance, f64);
pub fn classify_modification(
path: &Path,
old_content: &str,
new_content: &str,
) -> (ModificationKind, ChangeImportance) {
let (kind, importance, _confidence) =
classify_modification_with_confidence(path, old_content, new_content);
(kind, importance)
}
pub fn classify_modification_with_confidence(
path: &Path,
old_content: &str,
new_content: &str,
) -> ClassificationResult {
if old_content == new_content {
return (
ModificationKind::WhitespaceOnly,
ChangeImportance::Noise,
1.0,
);
}
let token_sim = compute_similarity(old_content, new_content, SimilarityMethod::Tokens);
if token_sim >= 1.0 {
return (
ModificationKind::FormattingOnly,
ChangeImportance::Noise,
0.95,
);
}
let language = Language::from_path(path);
let old_parsed = ParsedFile::parse(old_content, language);
let new_parsed = ParsedFile::parse(new_content, language);
match (&old_parsed, &new_parsed) {
(Some(old_ast), Some(new_ast)) => {
classify_with_ast(old_content, new_content, old_ast, new_ast)
}
_ => {
classify_without_ast(old_content, new_content, token_sim)
}
}
}
fn classify_with_ast(
old_content: &str,
new_content: &str,
old_ast: &ParsedFile,
new_ast: &ParsedFile,
) -> ClassificationResult {
let old_funcs = old_ast.extract_functions();
let new_funcs = new_ast.extract_functions();
let old_imports = old_ast.extract_imports();
let new_imports = new_ast.extract_imports();
let funcs_identical = are_functions_identical(&old_funcs, &new_funcs);
let imports_identical = old_imports.len() == new_imports.len()
&& old_imports
.iter()
.zip(new_imports.iter())
.all(|(a, b)| a.raw == b.raw);
let old_stripped = strip_comments(old_ast);
let new_stripped = strip_comments(new_ast);
let non_comment_identical = old_stripped == new_stripped;
if non_comment_identical {
return (ModificationKind::CommentsOnly, ChangeImportance::Low, 0.92);
}
if funcs_identical && !imports_identical {
let old_body = strip_imports_and_functions(old_ast);
let new_body = strip_imports_and_functions(new_ast);
if old_body == new_body {
return (ModificationKind::ImportsOnly, ChangeImportance::Low, 0.93);
}
}
let token_sim = compute_similarity(old_content, new_content, SimilarityMethod::Tokens);
if token_sim >= 1.0 {
return (
ModificationKind::FormattingOnly,
ChangeImportance::Noise,
0.97,
);
}
let line_sim = compute_similarity(old_content, new_content, SimilarityMethod::Lines);
if token_sim > 0.9 && line_sim < 0.7 {
return (ModificationKind::Mixed, ChangeImportance::Medium, 0.75);
}
(ModificationKind::Logic, ChangeImportance::High, 0.85)
}
fn classify_without_ast(
old_content: &str,
new_content: &str,
token_sim: f64,
) -> ClassificationResult {
if token_sim >= 1.0 {
return (
ModificationKind::FormattingOnly,
ChangeImportance::Noise,
0.9,
);
}
let line_sim = compute_similarity(old_content, new_content, SimilarityMethod::Lines);
if token_sim > 0.95 && line_sim < 0.8 {
return (
ModificationKind::FormattingOnly,
ChangeImportance::Noise,
0.7,
);
}
if token_sim > 0.9 {
return (ModificationKind::Mixed, ChangeImportance::Medium, 0.6);
}
(ModificationKind::Logic, ChangeImportance::High, 0.5)
}
fn are_functions_identical(
old_funcs: &[crate::parser::FunctionDef],
new_funcs: &[crate::parser::FunctionDef],
) -> bool {
if old_funcs.len() != new_funcs.len() {
return false;
}
let mut old_sorted: Vec<_> = old_funcs.iter().collect();
let mut new_sorted: Vec<_> = new_funcs.iter().collect();
old_sorted.sort_by_key(|f| &f.name);
new_sorted.sort_by_key(|f| &f.name);
old_sorted
.iter()
.zip(new_sorted.iter())
.all(|(a, b)| a.name == b.name && a.content == b.content)
}
fn strip_comments(parsed: &ParsedFile) -> String {
let mut result = String::new();
collect_non_comment_text(parsed.root_node(), &parsed.source, &mut result);
result
}
fn collect_non_comment_text(node: tree_sitter::Node<'_>, source: &str, out: &mut String) {
let mut stack = vec![node];
while let Some(current) = stack.pop() {
if is_comment_node(current.kind()) {
continue;
}
if current.child_count() == 0 {
out.push_str(&source[current.byte_range()]);
out.push(' ');
continue;
}
let child_count = current.child_count();
for index in (0..child_count).rev() {
if let Some(child) = current.child(index as u32) {
stack.push(child);
}
}
}
}
fn is_comment_node(kind: &str) -> bool {
matches!(
kind,
"comment" | "line_comment" | "block_comment" | "doc_comment" | "string_comment"
)
}
fn strip_imports_and_functions(parsed: &ParsedFile) -> String {
let mut result = String::new();
let root = parsed.root_node();
for i in 0..root.child_count() {
if let Some(child) = root.child(i as u32) {
let kind = child.kind();
if matches!(
kind,
"use_declaration"
| "extern_crate_declaration"
| "import_statement"
| "import_from_statement"
| "import_declaration"
) {
continue;
}
if ParsedFile::is_function_kind(kind, parsed.language) {
continue;
}
result.push_str(&parsed.source[child.byte_range()]);
result.push('\n');
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_whitespace_only() {
let old = "fn foo() {\n bar();\n}\n";
let new = "fn foo() {\n bar();\n}\n";
let (kind, importance) = classify_modification(Path::new("test.rs"), old, new);
assert_eq!(kind, ModificationKind::FormattingOnly);
assert_eq!(importance, ChangeImportance::Noise);
}
#[test]
fn test_logic_change() {
let old = "fn foo() -> i32 {\n 42\n}\n";
let new = "fn foo() -> i32 {\n 43\n}\n";
let (kind, importance) = classify_modification(Path::new("test.rs"), old, new);
assert_eq!(kind, ModificationKind::Logic);
assert_eq!(importance, ChangeImportance::High);
}
#[test]
fn test_comments_only() {
let old = "// old comment\nfn foo() {\n bar();\n}\n";
let new = "// new comment\nfn foo() {\n bar();\n}\n";
let (kind, importance) = classify_modification(Path::new("test.rs"), old, new);
assert_eq!(kind, ModificationKind::CommentsOnly);
assert_eq!(importance, ChangeImportance::Low);
}
#[test]
fn test_imports_only() {
let old = "use std::io;\n\nfn foo() {\n bar();\n}\n";
let new = "use std::io;\nuse std::fs;\n\nfn foo() {\n bar();\n}\n";
let (kind, importance) = classify_modification(Path::new("test.rs"), old, new);
assert_eq!(kind, ModificationKind::ImportsOnly);
assert_eq!(importance, ChangeImportance::Low);
}
#[test]
fn test_parse_error_fallback() {
let old = "some content here\n";
let new = "some content here\nwith additions\n";
let (kind, importance) = classify_modification(Path::new("test.xyz"), old, new);
assert_eq!(kind, ModificationKind::Logic);
assert_eq!(importance, ChangeImportance::High);
}
#[test]
fn test_formatting_only_unknown_lang() {
let old = "foo bar baz\n";
let new = "foo bar baz\n";
let (kind, importance) = classify_modification(Path::new("test.xyz"), old, new);
assert_eq!(kind, ModificationKind::FormattingOnly);
assert_eq!(importance, ChangeImportance::Noise);
}
}