use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use sqry_core::graph::Language;
use sqry_core::graph::unified::concurrent::GraphSnapshot;
use sqry_core::graph::unified::node::kind::NodeKind;
use sqry_core::graph::unified::resolution::display_graph_qualified_name;
const SIGNATURE_WEIGHT: f64 = 0.7;
const LOCATION_WEIGHT: f64 = 0.3;
const SIGNATURE_MIN_SCORE: f64 = 0.7;
const RENAME_CONFIDENCE_THRESHOLD: f64 = 0.9;
const SAME_FILE_LINE_WINDOW: i32 = 50;
const SAME_FILE_LINE_NORMALIZER: f64 = 100.0;
const SAME_FILE_MAX_PENALTY: f64 = 0.5;
const SAME_FILE_FAR_SCORE: f64 = 0.3;
const CROSS_FILE_LOCATION_SCORE: f64 = 0.7;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ChangeType {
Added,
Removed,
Modified,
Renamed,
SignatureChanged,
Unchanged,
}
impl ChangeType {
#[must_use]
pub fn as_str(&self) -> &'static str {
match self {
ChangeType::Added => "added",
ChangeType::Removed => "removed",
ChangeType::Modified => "modified",
ChangeType::Renamed => "renamed",
ChangeType::SignatureChanged => "signature_changed",
ChangeType::Unchanged => "unchanged",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NodeLocation {
pub file_path: PathBuf,
pub language: String,
pub start_line: u32,
pub end_line: u32,
pub start_column: u32,
pub end_column: u32,
}
#[derive(Debug, Clone)]
pub struct NodeChange {
pub symbol_name: String,
pub qualified_name: String,
pub kind: String,
pub change_type: ChangeType,
pub base_location: Option<NodeLocation>,
pub target_location: Option<NodeLocation>,
pub signature_before: Option<String>,
pub signature_after: Option<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct DiffSummary {
pub added: u64,
pub removed: u64,
pub modified: u64,
pub renamed: u64,
pub signature_changed: u64,
pub unchanged: u64,
}
impl DiffSummary {
#[must_use]
pub fn from_changes(changes: &[NodeChange]) -> Self {
let mut summary = Self::default();
for change in changes {
match change.change_type {
ChangeType::Added => summary.added += 1,
ChangeType::Removed => summary.removed += 1,
ChangeType::Modified => summary.modified += 1,
ChangeType::Renamed => summary.renamed += 1,
ChangeType::SignatureChanged => summary.signature_changed += 1,
ChangeType::Unchanged => summary.unchanged += 1,
}
}
summary
}
}
#[derive(Debug, Clone, Default)]
pub struct DiffOutput {
pub changes: Vec<NodeChange>,
pub summary: DiffSummary,
}
#[derive(Debug, Clone, Default)]
pub struct DiffOptions {
pub old_worktree_path: PathBuf,
pub new_worktree_path: PathBuf,
}
#[derive(Clone)]
struct NodeSnap {
name: String,
qualified_name: String,
kind: NodeKind,
kind_str: String,
is_static: bool,
signature: Option<String>,
file_path: PathBuf,
language: String,
start_line: u32,
end_line: u32,
start_column: u32,
end_column: u32,
}
impl NodeSnap {
fn display_qualified_name(&self) -> String {
Language::from_id(&self.language).map_or_else(
|| self.qualified_name.clone(),
|language| {
display_graph_qualified_name(
language,
&self.qualified_name,
self.kind,
self.is_static,
)
},
)
}
fn into_location(self) -> NodeLocation {
NodeLocation {
file_path: self.file_path,
language: self.language,
start_line: self.start_line,
end_line: self.end_line,
start_column: self.start_column,
end_column: self.end_column,
}
}
fn to_location(&self) -> NodeLocation {
NodeLocation {
file_path: self.file_path.clone(),
language: self.language.clone(),
start_line: self.start_line,
end_line: self.end_line,
start_column: self.start_column,
end_column: self.end_column,
}
}
}
#[must_use]
pub fn compute_diff(old: &GraphSnapshot, new: &GraphSnapshot, opts: &DiffOptions) -> DiffOutput {
let base_map = build_node_map(old, &opts.old_worktree_path);
let target_map = build_node_map(new, &opts.new_worktree_path);
let (added_nodes, modified_changes) = collect_added_and_modified(&base_map, &target_map, opts);
let removed_nodes = collect_removed_nodes(&base_map, &target_map);
let mut changes = modified_changes;
let (rename_changes, renamed_qnames) = collect_renames(&removed_nodes, &added_nodes, opts);
changes.extend(rename_changes);
append_removed_changes(&mut changes, &removed_nodes, &renamed_qnames);
append_added_changes(&mut changes, &added_nodes, &renamed_qnames);
let summary = DiffSummary::from_changes(&changes);
DiffOutput { changes, summary }
}
fn build_node_map(snapshot: &GraphSnapshot, worktree_path: &Path) -> HashMap<String, NodeSnap> {
let strings = snapshot.strings();
let files = snapshot.files();
let mut map = HashMap::new();
for (_node_id, entry) in snapshot.iter_nodes() {
if entry.is_unified_loser() {
continue;
}
let name = strings
.resolve(entry.name)
.map(|s| s.to_string())
.unwrap_or_default();
let qualified_name = entry
.qualified_name
.and_then(|sid| strings.resolve(sid))
.map_or_else(|| name.clone(), |s| s.to_string());
if qualified_name.is_empty() {
continue;
}
let signature = entry
.signature
.and_then(|sid| strings.resolve(sid))
.map(|s| s.to_string());
let file_path = files
.resolve(entry.file)
.map(|p| {
if worktree_path.as_os_str().is_empty() {
PathBuf::from(p.as_ref())
} else {
worktree_path.join(p.as_ref())
}
})
.unwrap_or_default();
let language = files
.language_for_file(entry.file)
.map_or_else(|| "unknown".to_string(), |l| l.to_string());
let snap = NodeSnap {
name,
qualified_name: qualified_name.clone(),
kind: entry.kind,
kind_str: node_kind_to_string(entry.kind),
is_static: entry.is_static,
signature,
file_path,
language,
start_line: entry.start_line,
end_line: entry.end_line,
start_column: entry.start_column,
end_column: entry.end_column,
};
map.insert(qualified_name, snap);
}
map
}
fn collect_added_and_modified(
base_map: &HashMap<String, NodeSnap>,
target_map: &HashMap<String, NodeSnap>,
opts: &DiffOptions,
) -> (Vec<NodeSnap>, Vec<NodeChange>) {
let mut added = Vec::new();
let mut changes = Vec::new();
for (qname, target_snap) in target_map {
match base_map.get(qname) {
None => added.push(target_snap.clone()),
Some(base_snap) => {
if let Some(change) = detect_modification(base_snap, target_snap, opts) {
changes.push(change);
}
}
}
}
(added, changes)
}
fn collect_removed_nodes(
base_map: &HashMap<String, NodeSnap>,
target_map: &HashMap<String, NodeSnap>,
) -> Vec<NodeSnap> {
base_map
.iter()
.filter(|(qname, _)| !target_map.contains_key(*qname))
.map(|(_, snap)| snap.clone())
.collect()
}
fn detect_modification(
base_snap: &NodeSnap,
target_snap: &NodeSnap,
opts: &DiffOptions,
) -> Option<NodeChange> {
let signature_changed = base_snap.signature != target_snap.signature;
let base_rel = strip_worktree_prefix(&base_snap.file_path, opts);
let target_rel = strip_worktree_prefix(&target_snap.file_path, opts);
let body_changed = base_snap.start_line != target_snap.start_line
|| base_snap.end_line != target_snap.end_line
|| base_rel != target_rel;
if signature_changed {
Some(NodeChange {
symbol_name: target_snap.name.clone(),
qualified_name: target_snap.display_qualified_name(),
kind: target_snap.kind_str.clone(),
change_type: ChangeType::SignatureChanged,
base_location: Some(base_snap.to_location()),
target_location: Some(target_snap.to_location()),
signature_before: base_snap.signature.clone(),
signature_after: target_snap.signature.clone(),
})
} else if body_changed {
Some(NodeChange {
symbol_name: target_snap.name.clone(),
qualified_name: target_snap.display_qualified_name(),
kind: target_snap.kind_str.clone(),
change_type: ChangeType::Modified,
base_location: Some(base_snap.to_location()),
target_location: Some(target_snap.to_location()),
signature_before: base_snap.signature.clone(),
signature_after: target_snap.signature.clone(),
})
} else {
None
}
}
fn collect_renames(
removed: &[NodeSnap],
added: &[NodeSnap],
opts: &DiffOptions,
) -> (Vec<NodeChange>, HashSet<String>) {
let renames = detect_renames(removed, added, opts);
let mut rename_changes = Vec::new();
let mut renamed_qnames = HashSet::new();
for (base_snap, target_snap) in &renames {
renamed_qnames.insert(base_snap.qualified_name.clone());
renamed_qnames.insert(target_snap.qualified_name.clone());
rename_changes.push(create_renamed_change(base_snap, target_snap));
}
(rename_changes, renamed_qnames)
}
fn detect_renames(
removed: &[NodeSnap],
added: &[NodeSnap],
opts: &DiffOptions,
) -> Vec<(NodeSnap, NodeSnap)> {
let mut renames = Vec::new();
let mut matched_added: HashSet<usize> = HashSet::new();
for removed_snap in removed {
let mut best_match: Option<(usize, f64)> = None;
for (idx, added_snap) in added.iter().enumerate() {
if matched_added.contains(&idx) {
continue;
}
let Some(score) = is_likely_rename(removed_snap, added_snap, opts) else {
continue;
};
let is_better = match best_match {
Some((_, best_score)) => score > best_score,
None => true,
};
if is_better {
best_match = Some((idx, score));
}
}
if let Some((idx, score)) = best_match
&& score >= RENAME_CONFIDENCE_THRESHOLD
{
matched_added.insert(idx);
renames.push((removed_snap.clone(), added[idx].clone()));
}
}
renames
}
fn is_likely_rename(base: &NodeSnap, target: &NodeSnap, opts: &DiffOptions) -> Option<f64> {
if base.kind != target.kind {
return None;
}
let sig_score = match (&base.signature, &target.signature) {
(Some(base_sig), Some(target_sig)) => {
if base_sig == target_sig {
1.0
} else {
levenshtein_similarity(base_sig, target_sig)
}
}
(None, None) => 1.0,
_ => return None,
};
if sig_score < SIGNATURE_MIN_SCORE {
return None;
}
let mut confidence = sig_score * SIGNATURE_WEIGHT;
let base_rel = strip_worktree_prefix(&base.file_path, opts);
let target_rel = strip_worktree_prefix(&target.file_path, opts);
let location_score = if base_rel == target_rel {
let base_line: i32 = base.start_line.try_into().unwrap_or(i32::MAX);
let target_line: i32 = target.start_line.try_into().unwrap_or(i32::MAX);
let line_diff = (base_line - target_line).abs();
if line_diff <= SAME_FILE_LINE_WINDOW {
1.0 - (f64::from(line_diff) / SAME_FILE_LINE_NORMALIZER).min(SAME_FILE_MAX_PENALTY)
} else {
SAME_FILE_FAR_SCORE
}
} else {
CROSS_FILE_LOCATION_SCORE
};
confidence += location_score * LOCATION_WEIGHT;
Some(confidence)
}
fn create_renamed_change(base: &NodeSnap, target: &NodeSnap) -> NodeChange {
NodeChange {
symbol_name: target.name.clone(),
qualified_name: target.display_qualified_name(),
kind: target.kind_str.clone(),
change_type: ChangeType::Renamed,
base_location: Some(base.to_location()),
target_location: Some(target.to_location()),
signature_before: base.signature.clone(),
signature_after: target.signature.clone(),
}
}
fn append_removed_changes(
changes: &mut Vec<NodeChange>,
removed: &[NodeSnap],
renamed_qnames: &HashSet<String>,
) {
for snap in removed {
if !renamed_qnames.contains(&snap.qualified_name) {
changes.push(NodeChange {
symbol_name: snap.name.clone(),
qualified_name: snap.display_qualified_name(),
kind: snap.kind_str.clone(),
change_type: ChangeType::Removed,
base_location: Some(snap.clone().into_location()),
target_location: None,
signature_before: snap.signature.clone(),
signature_after: None,
});
}
}
}
fn append_added_changes(
changes: &mut Vec<NodeChange>,
added: &[NodeSnap],
renamed_qnames: &HashSet<String>,
) {
for snap in added {
if !renamed_qnames.contains(&snap.qualified_name) {
changes.push(NodeChange {
symbol_name: snap.name.clone(),
qualified_name: snap.display_qualified_name(),
kind: snap.kind_str.clone(),
change_type: ChangeType::Added,
base_location: None,
target_location: Some(snap.clone().into_location()),
signature_before: None,
signature_after: snap.signature.clone(),
});
}
}
}
fn strip_worktree_prefix(path: &Path, opts: &DiffOptions) -> PathBuf {
if !opts.old_worktree_path.as_os_str().is_empty()
&& let Ok(relative) = path.strip_prefix(&opts.old_worktree_path)
{
return relative.to_path_buf();
}
if !opts.new_worktree_path.as_os_str().is_empty()
&& let Ok(relative) = path.strip_prefix(&opts.new_worktree_path)
{
return relative.to_path_buf();
}
path.to_path_buf()
}
fn levenshtein_similarity(a: &str, b: &str) -> f64 {
let distance = strsim::levenshtein(a, b);
let max_len = a.len().max(b.len());
if max_len == 0 {
return 1.0;
}
let distance = f64::from(u32::try_from(distance).unwrap_or(u32::MAX));
let max_len = f64::from(u32::try_from(max_len).unwrap_or(u32::MAX));
1.0 - (distance / max_len)
}
fn node_kind_to_string(kind: NodeKind) -> String {
match kind {
NodeKind::Function => "function",
NodeKind::Method => "method",
NodeKind::Class => "class",
NodeKind::Interface => "interface",
NodeKind::Trait => "trait",
NodeKind::Module => "module",
NodeKind::Variable => "variable",
NodeKind::Constant => "constant",
NodeKind::Type => "type",
NodeKind::Struct => "struct",
NodeKind::Enum => "enum",
NodeKind::EnumVariant => "enum_variant",
NodeKind::Macro => "macro",
NodeKind::Parameter => "parameter",
NodeKind::Property => "property",
NodeKind::Import => "import",
NodeKind::Export => "export",
NodeKind::Component => "component",
NodeKind::Service => "service",
NodeKind::Resource => "resource",
NodeKind::Endpoint => "endpoint",
NodeKind::Test => "test",
_ => "other",
}
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn levenshtein_similarity_bounds() {
assert!((levenshtein_similarity("hello", "hello") - 1.0).abs() < 1e-10);
assert!((levenshtein_similarity("", "") - 1.0).abs() < 1e-10);
assert!(levenshtein_similarity("hello", "hallo") > 0.7);
assert!(levenshtein_similarity("hello", "world") < 0.5);
}
#[test]
fn change_type_wire_strings_match_pre_db20() {
assert_eq!(ChangeType::Added.as_str(), "added");
assert_eq!(ChangeType::Removed.as_str(), "removed");
assert_eq!(ChangeType::Modified.as_str(), "modified");
assert_eq!(ChangeType::Renamed.as_str(), "renamed");
assert_eq!(ChangeType::SignatureChanged.as_str(), "signature_changed");
assert_eq!(ChangeType::Unchanged.as_str(), "unchanged");
}
#[test]
fn diff_summary_from_changes_tallies_each_bucket() {
let changes = vec![
NodeChange {
symbol_name: "a".into(),
qualified_name: "a".into(),
kind: "function".into(),
change_type: ChangeType::Added,
base_location: None,
target_location: None,
signature_before: None,
signature_after: None,
},
NodeChange {
symbol_name: "b".into(),
qualified_name: "b".into(),
kind: "function".into(),
change_type: ChangeType::Removed,
base_location: None,
target_location: None,
signature_before: None,
signature_after: None,
},
NodeChange {
symbol_name: "c".into(),
qualified_name: "c".into(),
kind: "function".into(),
change_type: ChangeType::SignatureChanged,
base_location: None,
target_location: None,
signature_before: None,
signature_after: None,
},
];
let summary = DiffSummary::from_changes(&changes);
assert_eq!(summary.added, 1);
assert_eq!(summary.removed, 1);
assert_eq!(summary.signature_changed, 1);
assert_eq!(summary.modified, 0);
assert_eq!(summary.renamed, 0);
assert_eq!(summary.unchanged, 0);
}
#[test]
fn empty_snapshots_produce_empty_diff() {
use std::sync::Arc;
use sqry_core::graph::unified::concurrent::CodeGraph;
let old = Arc::new(CodeGraph::new().snapshot());
let new = Arc::new(CodeGraph::new().snapshot());
let cmp = super::super::ComparativeQueryDb::new(old, new);
let out = cmp.diff_default();
assert!(out.changes.is_empty());
assert_eq!(out.summary, DiffSummary::default());
}
#[test]
fn strip_worktree_prefix_falls_back_when_empty() {
let p = PathBuf::from("/tmp/foo/bar.rs");
let out = strip_worktree_prefix(&p, &DiffOptions::default());
assert_eq!(out, p);
}
#[test]
fn strip_worktree_prefix_strips_old_root() {
let opts = DiffOptions {
old_worktree_path: PathBuf::from("/tmp/old"),
new_worktree_path: PathBuf::from("/tmp/new"),
};
let p = PathBuf::from("/tmp/old/src/foo.rs");
let out = strip_worktree_prefix(&p, &opts);
assert_eq!(out, PathBuf::from("src/foo.rs"));
}
}