use std::collections::HashMap;
use std::path::{Path, PathBuf};
use petgraph::Direction;
use petgraph::visit::EdgeRef;
use crate::graph::{
CodeGraph,
edge::EdgeKind,
node::{GraphNode, SymbolInfo, SymbolKind},
};
#[derive(Debug, Clone, PartialEq, Eq)]
struct StructuralSignature {
kind: SymbolKind,
body_size: usize,
outgoing_edges: usize,
incoming_edges: usize,
decorator_count: usize,
}
fn deterministic_signature_hash(sig: &StructuralSignature) -> u64 {
let kind_val: u64 = match sig.kind {
SymbolKind::Function => 1,
SymbolKind::Class => 2,
SymbolKind::Interface => 3,
SymbolKind::Variable => 4,
SymbolKind::TypeAlias => 5,
SymbolKind::Enum => 6,
SymbolKind::Trait => 7,
SymbolKind::Method => 8,
SymbolKind::Struct => 9,
SymbolKind::Component => 10,
SymbolKind::Property => 11,
SymbolKind::ImplMethod => 12,
SymbolKind::Const => 13,
SymbolKind::Static => 14,
SymbolKind::Macro => 15,
};
let mut h: u64 = 0xcbf29ce484222325;
let mix = |h: &mut u64, v: u64| {
*h ^= v;
*h = h.wrapping_mul(0x100000001b3);
};
mix(&mut h, kind_val);
mix(&mut h, sig.body_size as u64);
mix(&mut h, sig.outgoing_edges as u64);
mix(&mut h, sig.incoming_edges as u64);
mix(&mut h, sig.decorator_count as u64);
h
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct CloneMember {
pub name: String,
pub kind: String,
pub file: PathBuf,
pub line: usize,
pub body_size: usize,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct CloneGroup {
pub hash: u64,
pub signature: String,
pub members: Vec<CloneMember>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct CloneGroupResult {
pub groups: Vec<CloneGroup>,
pub total_symbols_analyzed: usize,
}
pub fn find_clones(
graph: &CodeGraph,
root: &Path,
scope: Option<&Path>,
min_group: usize,
) -> CloneGroupResult {
let abs_scope: Option<PathBuf> = scope.map(|s| {
if s.is_absolute() {
s.to_path_buf()
} else {
root.join(s)
}
});
let in_scope = |path: &Path| -> bool {
match &abs_scope {
None => true,
Some(scope_path) => path.starts_with(scope_path),
}
};
let mut hash_groups: HashMap<u64, (StructuralSignature, Vec<CloneMember>)> = HashMap::new();
let mut total_symbols_analyzed: usize = 0;
for node_idx in graph.graph.node_indices() {
let sym = match &graph.graph[node_idx] {
GraphNode::Symbol(s) => s,
_ => continue,
};
let file_path = graph
.graph
.edges_directed(node_idx, Direction::Incoming)
.find_map(|edge| {
if matches!(edge.weight(), EdgeKind::Contains)
&& let GraphNode::File(fi) = &graph.graph[edge.source()]
{
return Some(fi.path.clone());
}
None
});
let file_path = match file_path {
Some(p) => p,
None => continue, };
if !in_scope(&file_path) {
continue;
}
total_symbols_analyzed += 1;
let sig = compute_signature(graph, node_idx, sym);
let hash = deterministic_signature_hash(&sig);
let kind_str = crate::query::find::kind_to_str(&sym.kind).to_string();
let body_size = sig.body_size;
let member = CloneMember {
name: sym.name.clone(),
kind: kind_str,
file: file_path,
line: sym.line,
body_size,
};
hash_groups
.entry(hash)
.or_insert_with(|| (sig, Vec::new()))
.1
.push(member);
}
let mut groups: Vec<CloneGroup> = hash_groups
.into_iter()
.filter(|(_, (_, members))| members.len() >= min_group)
.map(|(hash, (sig, mut members))| {
members.sort_by(|a, b| a.file.cmp(&b.file).then(a.line.cmp(&b.line)));
CloneGroup {
hash,
signature: format_signature(&sig),
members,
}
})
.collect();
groups.sort_by(|a, b| {
b.members
.len()
.cmp(&a.members.len())
.then(a.hash.cmp(&b.hash))
});
CloneGroupResult {
groups,
total_symbols_analyzed,
}
}
fn compute_signature(
graph: &CodeGraph,
node_idx: petgraph::stable_graph::NodeIndex,
sym: &SymbolInfo,
) -> StructuralSignature {
let body_size = sym.line_end.saturating_sub(sym.line);
let outgoing_edges = graph
.graph
.edges_directed(node_idx, Direction::Outgoing)
.count();
let incoming_edges = graph
.graph
.edges_directed(node_idx, Direction::Incoming)
.count();
let decorator_count = sym.decorators.len();
StructuralSignature {
kind: sym.kind.clone(),
body_size,
outgoing_edges,
incoming_edges,
decorator_count,
}
}
fn format_signature(sig: &StructuralSignature) -> String {
format!(
"kind={} body={} out={} in={} decorators={}",
crate::query::find::kind_to_str(&sig.kind),
sig.body_size,
sig.outgoing_edges,
sig.incoming_edges,
sig.decorator_count,
)
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::*;
use crate::graph::{
CodeGraph,
node::{DecoratorInfo, SymbolInfo, SymbolKind},
};
fn make_symbol(name: &str, kind: SymbolKind, line: usize, line_end: usize) -> SymbolInfo {
SymbolInfo {
name: name.into(),
kind,
line,
line_end,
..Default::default()
}
}
fn make_symbol_with_decorators(
name: &str,
kind: SymbolKind,
line: usize,
line_end: usize,
decorator_count: usize,
) -> SymbolInfo {
let decorators: Vec<DecoratorInfo> = (0..decorator_count)
.map(|i| DecoratorInfo {
name: format!("decorator_{}", i),
..Default::default()
})
.collect();
SymbolInfo {
name: name.into(),
kind,
line,
line_end,
decorators,
..Default::default()
}
}
#[test]
fn test_identical_symbols_grouped() {
let mut graph = CodeGraph::new();
let root = PathBuf::from("/project");
let file_a = root.join("src/utils.rs");
let file_b = root.join("src/helpers.rs");
let file_a_idx = graph.add_file(file_a.clone(), "rust");
let file_b_idx = graph.add_file(file_b.clone(), "rust");
graph.add_symbol(
file_a_idx,
make_symbol("process_data", SymbolKind::Function, 1, 11),
);
graph.add_symbol(
file_b_idx,
make_symbol("transform_data", SymbolKind::Function, 1, 11),
);
let result = find_clones(&graph, &root, None, 2);
assert_eq!(
result.groups.len(),
1,
"Two identical symbols should form one clone group"
);
assert_eq!(
result.groups[0].members.len(),
2,
"Clone group should have 2 members"
);
let names: Vec<&str> = result.groups[0]
.members
.iter()
.map(|m| m.name.as_str())
.collect();
assert!(names.contains(&"process_data"));
assert!(names.contains(&"transform_data"));
}
#[test]
fn test_distinct_symbols_not_grouped() {
let mut graph = CodeGraph::new();
let root = PathBuf::from("/project");
let file_idx = graph.add_file(root.join("src/lib.rs"), "rust");
graph.add_symbol(file_idx, make_symbol("my_fn", SymbolKind::Function, 1, 11));
graph.add_symbol(file_idx, make_symbol("my_class", SymbolKind::Class, 1, 51));
graph.add_symbol(
file_idx,
make_symbol("small_fn", SymbolKind::Function, 1, 4),
);
let result = find_clones(&graph, &root, None, 2);
assert_eq!(
result.groups.len(),
0,
"Distinct symbols should not form clone groups"
);
}
#[test]
fn test_min_group_filter() {
let mut graph = CodeGraph::new();
let root = PathBuf::from("/project");
let file_idx = graph.add_file(root.join("src/mod.rs"), "rust");
graph.add_symbol(file_idx, make_symbol("fn_a", SymbolKind::Function, 1, 6));
graph.add_symbol(file_idx, make_symbol("fn_b", SymbolKind::Function, 10, 15));
graph.add_symbol(file_idx, make_symbol("fn_c", SymbolKind::Function, 20, 25));
let result = find_clones(&graph, &root, None, 2);
assert_eq!(result.groups.len(), 1);
assert_eq!(result.groups[0].members.len(), 3);
let result = find_clones(&graph, &root, None, 4);
assert_eq!(
result.groups.len(),
0,
"Group of 3 should be filtered out when min_group=4"
);
}
#[test]
fn test_scope_filter() {
let mut graph = CodeGraph::new();
let root = PathBuf::from("/project");
let in_scope_file = root.join("src/module/a.rs");
let in_scope_idx = graph.add_file(in_scope_file.clone(), "rust");
graph.add_symbol(
in_scope_idx,
make_symbol("fn_in_scope_1", SymbolKind::Function, 1, 11),
);
let in_scope_file2 = root.join("src/module/b.rs");
let in_scope_idx2 = graph.add_file(in_scope_file2.clone(), "rust");
graph.add_symbol(
in_scope_idx2,
make_symbol("fn_in_scope_2", SymbolKind::Function, 1, 11),
);
let out_scope_file = root.join("other/c.rs");
let out_scope_idx = graph.add_file(out_scope_file.clone(), "rust");
graph.add_symbol(
out_scope_idx,
make_symbol("fn_out_scope", SymbolKind::Function, 1, 11),
);
let scope_path = PathBuf::from("src/module");
let result = find_clones(&graph, &root, Some(&scope_path), 2);
assert_eq!(
result.groups.len(),
1,
"Should find 1 clone group within scope"
);
assert_eq!(
result.groups[0].members.len(),
2,
"Clone group should have 2 in-scope members"
);
let names: Vec<&str> = result.groups[0]
.members
.iter()
.map(|m| m.name.as_str())
.collect();
assert!(names.contains(&"fn_in_scope_1"));
assert!(names.contains(&"fn_in_scope_2"));
assert!(
!names.contains(&"fn_out_scope"),
"Out-of-scope symbol should be excluded"
);
}
#[test]
fn test_decorator_count_differentiates() {
let mut graph = CodeGraph::new();
let root = PathBuf::from("/project");
let file_idx = graph.add_file(root.join("src/app.rs"), "rust");
graph.add_symbol(
file_idx,
make_symbol_with_decorators("fn_no_dec", SymbolKind::Function, 1, 11, 0),
);
graph.add_symbol(
file_idx,
make_symbol_with_decorators("fn_with_dec", SymbolKind::Function, 15, 25, 2),
);
let result = find_clones(&graph, &root, None, 2);
assert_eq!(
result.groups.len(),
0,
"Different decorator counts should prevent grouping"
);
}
#[test]
fn test_total_symbols_analyzed() {
let mut graph = CodeGraph::new();
let root = PathBuf::from("/project");
let file_idx = graph.add_file(root.join("src/lib.rs"), "rust");
graph.add_symbol(file_idx, make_symbol("fn_a", SymbolKind::Function, 1, 6));
graph.add_symbol(file_idx, make_symbol("fn_b", SymbolKind::Function, 10, 15));
graph.add_symbol(file_idx, make_symbol("cls_a", SymbolKind::Class, 20, 50));
let result = find_clones(&graph, &root, None, 2);
assert_eq!(
result.total_symbols_analyzed, 3,
"Should count all analyzed symbols"
);
}
}