use std::collections::{HashMap, HashSet};
use std::path::Path;
use super::fingerprint::FileFingerprint;
#[derive(Debug, Clone)]
pub struct SymbolDiff {
pub file: String,
pub removed_exports: Vec<String>,
#[allow(dead_code)] pub added_exports: Vec<String>,
pub renamed_exports: Vec<(String, String)>, pub type_renamed: Option<(String, String)>, pub removed_hooks: Vec<String>,
#[allow(dead_code)] pub added_hooks: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct AffectedFile {
pub file: String,
pub source_file: String,
pub reasons: Vec<AffectReason>,
}
#[derive(Debug, Clone)]
pub enum AffectReason {
ImportsChangedType {
old_name: String,
new_name: Option<String>,
},
CallsRemovedFunction {
old_name: String,
new_name: Option<String>,
},
HooksRemovedAction { old_name: String },
ExtendsChangedClass {
old_name: String,
new_name: Option<String>,
},
}
impl std::fmt::Display for AffectReason {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AffectReason::ImportsChangedType { old_name, new_name } => {
if let Some(new) = new_name {
write!(f, "imports '{}' (renamed to '{}')", old_name, new)
} else {
write!(f, "imports '{}' (removed)", old_name)
}
}
AffectReason::CallsRemovedFunction { old_name, new_name } => {
if let Some(new) = new_name {
write!(f, "calls '{}' (renamed to '{}')", old_name, new)
} else {
write!(f, "calls '{}' (removed)", old_name)
}
}
AffectReason::HooksRemovedAction { old_name } => {
write!(f, "hooks '{}' (removed/renamed)", old_name)
}
AffectReason::ExtendsChangedClass { old_name, new_name } => {
if let Some(new) = new_name {
write!(f, "extends '{}' (renamed to '{}')", old_name, new)
} else {
write!(f, "extends '{}' (removed)", old_name)
}
}
}
}
}
pub fn fingerprint_from_git_ref(
source_path: &str,
git_ref: &str,
relative_path: &str,
) -> Option<FileFingerprint> {
use crate::extension;
let git_spec = format!("{}:{}", git_ref, relative_path);
let content =
crate::engine::command::run_in_optional(source_path, "git", &["show", &git_spec])?;
let ext = Path::new(relative_path).extension()?.to_str()?;
let matched_extension = extension::find_extension_for_file_ext(ext, "fingerprint")?;
let output = extension::run_fingerprint_script(&matched_extension, relative_path, &content)?;
let language = super::conventions::Language::from_extension(ext);
Some(FileFingerprint {
relative_path: relative_path.to_string(),
language,
methods: output.methods,
registrations: output.registrations,
type_name: output.type_name,
type_names: output.type_names,
extends: output.extends,
implements: output.implements,
namespace: output.namespace,
imports: output.imports,
content,
method_hashes: output.method_hashes,
structural_hashes: output.structural_hashes,
visibility: output.visibility,
properties: output.properties,
hooks: output.hooks,
unused_parameters: output.unused_parameters,
dead_code_markers: output.dead_code_markers,
internal_calls: output.internal_calls,
public_api: output.public_api,
})
}
pub fn diff_changed_files(
source_path: &str,
git_ref: &str,
changed_files: &[String],
current_fingerprints: &[&FileFingerprint],
) -> Vec<SymbolDiff> {
let mut diffs = Vec::new();
let current_by_path: HashMap<&str, &FileFingerprint> = current_fingerprints
.iter()
.map(|fp| (fp.relative_path.as_str(), *fp))
.collect();
for file in changed_files {
let current_fp = current_by_path.get(file.as_str());
let base_fp = fingerprint_from_git_ref(source_path, git_ref, file);
let diff = match (base_fp.as_ref(), current_fp) {
(Some(base), Some(current)) => diff_fingerprints(file, base, current),
(Some(base), None) => SymbolDiff {
file: file.clone(),
removed_exports: base.public_api.clone(),
added_exports: vec![],
renamed_exports: vec![],
type_renamed: None,
removed_hooks: base.hooks.iter().map(|h| h.name.clone()).collect(),
added_hooks: vec![],
},
(None, Some(_)) => continue,
(None, None) => continue,
};
if !diff.removed_exports.is_empty()
|| !diff.renamed_exports.is_empty()
|| diff.type_renamed.is_some()
|| !diff.removed_hooks.is_empty()
{
diffs.push(diff);
}
}
diffs
}
fn diff_fingerprints(file: &str, base: &FileFingerprint, current: &FileFingerprint) -> SymbolDiff {
let base_exports: HashSet<&str> = base.public_api.iter().map(|s| s.as_str()).collect();
let current_exports: HashSet<&str> = current.public_api.iter().map(|s| s.as_str()).collect();
let removed: Vec<String> = base_exports
.difference(¤t_exports)
.map(|s| s.to_string())
.collect();
let added: Vec<String> = current_exports
.difference(&base_exports)
.map(|s| s.to_string())
.collect();
let (renamed, truly_removed, truly_added) = match_renames(&removed, &added);
let type_renamed = match (&base.type_name, ¤t.type_name) {
(Some(old), Some(new)) if old != new => Some((old.clone(), new.clone())),
_ => None,
};
let base_hooks: HashSet<&str> = base.hooks.iter().map(|h| h.name.as_str()).collect();
let current_hooks: HashSet<&str> = current.hooks.iter().map(|h| h.name.as_str()).collect();
let removed_hooks: Vec<String> = base_hooks
.difference(¤t_hooks)
.map(|s| s.to_string())
.collect();
let added_hooks: Vec<String> = current_hooks
.difference(&base_hooks)
.map(|s| s.to_string())
.collect();
SymbolDiff {
file: file.to_string(),
removed_exports: truly_removed,
added_exports: truly_added,
renamed_exports: renamed,
type_renamed,
removed_hooks,
added_hooks,
}
}
fn match_renames(
removed: &[String],
added: &[String],
) -> (Vec<(String, String)>, Vec<String>, Vec<String>) {
let mut renames = Vec::new();
let mut used_added: HashSet<usize> = HashSet::new();
let mut truly_removed = Vec::new();
for old in removed {
let mut best_match: Option<(usize, f64)> = None;
for (i, new) in added.iter().enumerate() {
if used_added.contains(&i) {
continue;
}
let score = similarity(old, new);
if score > 0.5 && best_match.is_none_or(|(_, best_score)| score > best_score) {
best_match = Some((i, score));
}
}
if let Some((idx, _)) = best_match {
renames.push((old.clone(), added[idx].clone()));
used_added.insert(idx);
} else {
truly_removed.push(old.clone());
}
}
let truly_added: Vec<String> = added
.iter()
.enumerate()
.filter(|(i, _)| !used_added.contains(i))
.map(|(_, s)| s.clone())
.collect();
(renames, truly_removed, truly_added)
}
fn similarity(a: &str, b: &str) -> f64 {
if a == b {
return 1.0;
}
if a.is_empty() || b.is_empty() {
return 0.0;
}
let a_bytes = a.as_bytes();
let b_bytes = b.as_bytes();
let m = a_bytes.len();
let n = b_bytes.len();
let mut dp = vec![vec![0u16; n + 1]; m + 1];
for i in 1..=m {
for j in 1..=n {
dp[i][j] = if a_bytes[i - 1] == b_bytes[j - 1] {
dp[i - 1][j - 1] + 1
} else {
dp[i - 1][j].max(dp[i][j - 1])
};
}
}
let lcs_len = dp[m][n] as f64;
lcs_len / m.max(n) as f64
}
pub fn find_affected_files(
diffs: &[SymbolDiff],
all_fingerprints: &[&FileFingerprint],
changed_files: &HashSet<&str>,
) -> Vec<AffectedFile> {
let mut affected: HashMap<String, AffectedFile> = HashMap::new();
for diff in diffs {
for fp in all_fingerprints {
if changed_files.contains(fp.relative_path.as_str()) {
continue;
}
let mut reasons = Vec::new();
for removed in &diff.removed_exports {
if fp.internal_calls.contains(removed) {
let new_name = diff
.renamed_exports
.iter()
.find(|(old, _)| old == removed)
.map(|(_, new)| new.clone());
reasons.push(AffectReason::CallsRemovedFunction {
old_name: removed.clone(),
new_name,
});
}
}
for (old_name, new_name) in &diff.renamed_exports {
if fp.internal_calls.contains(old_name) {
reasons.push(AffectReason::CallsRemovedFunction {
old_name: old_name.clone(),
new_name: Some(new_name.clone()),
});
}
}
if let Some((old_type, new_type)) = &diff.type_renamed {
let imports_old = fp.imports.iter().any(|imp| imp.contains(old_type.as_str()));
if imports_old {
reasons.push(AffectReason::ImportsChangedType {
old_name: old_type.clone(),
new_name: Some(new_type.clone()),
});
}
}
if let Some((old_type, new_type)) = &diff.type_renamed {
if fp.extends.as_deref() == Some(old_type.as_str()) {
reasons.push(AffectReason::ExtendsChangedClass {
old_name: old_type.clone(),
new_name: Some(new_type.clone()),
});
}
}
for removed_hook in &diff.removed_hooks {
let hooks_it = fp.hooks.iter().any(|h| h.name == *removed_hook);
let registers_it = fp
.registrations
.iter()
.any(|r| r.contains(removed_hook.as_str()));
if hooks_it || registers_it {
reasons.push(AffectReason::HooksRemovedAction {
old_name: removed_hook.clone(),
});
}
}
if !reasons.is_empty() {
let entry =
affected
.entry(fp.relative_path.clone())
.or_insert_with(|| AffectedFile {
file: fp.relative_path.clone(),
source_file: diff.file.clone(),
reasons: Vec::new(),
});
entry.reasons.extend(reasons);
}
}
}
let mut result: Vec<AffectedFile> = affected.into_values().collect();
result.sort_by(|a, b| a.file.cmp(&b.file));
result
}
pub fn expand_scope(
source_path: &str,
git_ref: &str,
changed_files: &[String],
all_fingerprints: &[&FileFingerprint],
) -> (HashSet<String>, Vec<AffectedFile>) {
let diffs = diff_changed_files(source_path, git_ref, changed_files, all_fingerprints);
if diffs.is_empty() {
let scope: HashSet<String> = changed_files.iter().cloned().collect();
return (scope, vec![]);
}
let changed_set: HashSet<&str> = changed_files.iter().map(|s| s.as_str()).collect();
let affected = find_affected_files(&diffs, all_fingerprints, &changed_set);
let mut scope: HashSet<String> = changed_files.iter().cloned().collect();
for af in &affected {
scope.insert(af.file.clone());
}
(scope, affected)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::code_audit::conventions::Language;
fn make_fingerprint(
path: &str,
public_api: Vec<&str>,
internal_calls: Vec<&str>,
imports: Vec<&str>,
type_name: Option<&str>,
extends: Option<&str>,
hooks: Vec<(&str, &str)>,
) -> FileFingerprint {
FileFingerprint {
relative_path: path.to_string(),
language: Language::Php,
methods: public_api.iter().map(|s| s.to_string()).collect(),
type_name: type_name.map(|s| s.to_string()),
extends: extends.map(|s| s.to_string()),
imports: imports.iter().map(|s| s.to_string()).collect(),
hooks: hooks
.iter()
.map(|(t, n)| crate::extension::HookRef {
hook_type: t.to_string(),
name: n.to_string(),
})
.collect(),
internal_calls: internal_calls.iter().map(|s| s.to_string()).collect(),
public_api: public_api.iter().map(|s| s.to_string()).collect(),
..Default::default()
}
}
#[test]
fn test_similarity_identical() {
assert!((similarity("doThing", "doThing") - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_similarity_empty() {
assert!((similarity("", "anything")).abs() < f64::EPSILON);
assert!((similarity("anything", "")).abs() < f64::EPSILON);
}
#[test]
fn test_similarity_renamed() {
let score = similarity("doThing", "doStuff");
assert!(score > 0.2, "score should be > 0.2, got {}", score);
assert!(score < 0.8, "score should be < 0.8, got {}", score);
}
#[test]
fn test_similarity_prefixed() {
let score = similarity("getUser", "getUserById");
assert!(score > 0.5, "score should be > 0.5, got {}", score);
}
#[test]
fn test_match_renames_exact_pair() {
let removed = vec!["doThing".to_string()];
let added = vec!["doStuff".to_string(), "completelyNew".to_string()];
let (renames, truly_removed, truly_added) = match_renames(&removed, &added);
assert!(renames.len() + truly_removed.len() == 1);
assert!(!truly_added.is_empty());
}
#[test]
fn test_match_renames_clear_rename() {
let removed = vec!["processRequest".to_string()];
let added = vec!["processApiRequest".to_string()];
let (renames, truly_removed, _) = match_renames(&removed, &added);
assert_eq!(renames.len(), 1, "should detect rename");
assert!(truly_removed.is_empty());
assert_eq!(renames[0].0, "processRequest");
assert_eq!(renames[0].1, "processApiRequest");
}
#[test]
fn test_diff_fingerprints_detects_removed_export() {
let base = make_fingerprint(
"Foo.php",
vec!["doThing", "doOther"],
vec![],
vec![],
Some("Foo"),
None,
vec![],
);
let current = make_fingerprint(
"Foo.php",
vec!["doOther"],
vec![],
vec![],
Some("Foo"),
None,
vec![],
);
let diff = diff_fingerprints("Foo.php", &base, ¤t);
assert!(
diff.removed_exports.contains(&"doThing".to_string())
|| diff.renamed_exports.iter().any(|(old, _)| old == "doThing"),
"doThing should be in removed or renamed"
);
}
#[test]
fn test_diff_fingerprints_detects_type_rename() {
let base = make_fingerprint(
"Foo.php",
vec!["run"],
vec![],
vec![],
Some("FooHandler"),
None,
vec![],
);
let current = make_fingerprint(
"Foo.php",
vec!["run"],
vec![],
vec![],
Some("BarHandler"),
None,
vec![],
);
let diff = diff_fingerprints("Foo.php", &base, ¤t);
assert_eq!(
diff.type_renamed,
Some(("FooHandler".to_string(), "BarHandler".to_string()))
);
}
#[test]
fn test_diff_fingerprints_detects_removed_hook() {
let base = make_fingerprint(
"Foo.php",
vec![],
vec![],
vec![],
None,
None,
vec![("action", "my_custom_action")],
);
let current = make_fingerprint(
"Foo.php",
vec![],
vec![],
vec![],
None,
None,
vec![("action", "my_renamed_action")],
);
let diff = diff_fingerprints("Foo.php", &base, ¤t);
assert!(diff.removed_hooks.contains(&"my_custom_action".to_string()));
assert!(diff.added_hooks.contains(&"my_renamed_action".to_string()));
}
#[test]
fn test_find_affected_calls_removed_function() {
let diff = SymbolDiff {
file: "Foo.php".to_string(),
removed_exports: vec!["doThing".to_string()],
added_exports: vec![],
renamed_exports: vec![],
type_renamed: None,
removed_hooks: vec![],
added_hooks: vec![],
};
let bar = make_fingerprint(
"Bar.php",
vec!["run"],
vec!["doThing"], vec![],
None,
None,
vec![],
);
let baz = make_fingerprint(
"Baz.php",
vec!["run"],
vec!["somethingElse"], vec![],
None,
None,
vec![],
);
let all_fps: Vec<&FileFingerprint> = vec![&bar, &baz];
let changed: HashSet<&str> = HashSet::from(["Foo.php"]);
let affected = find_affected_files(&[diff], &all_fps, &changed);
assert_eq!(affected.len(), 1);
assert_eq!(affected[0].file, "Bar.php");
assert_eq!(affected[0].source_file, "Foo.php");
assert!(matches!(
&affected[0].reasons[0],
AffectReason::CallsRemovedFunction { old_name, .. } if old_name == "doThing"
));
}
#[test]
fn test_find_affected_imports_renamed_type() {
let diff = SymbolDiff {
file: "Foo.php".to_string(),
removed_exports: vec![],
added_exports: vec![],
renamed_exports: vec![],
type_renamed: Some(("FooHandler".to_string(), "BarHandler".to_string())),
removed_hooks: vec![],
added_hooks: vec![],
};
let consumer = make_fingerprint(
"Consumer.php",
vec!["run"],
vec![],
vec!["use App\\FooHandler"], None,
None,
vec![],
);
let all_fps: Vec<&FileFingerprint> = vec![&consumer];
let changed: HashSet<&str> = HashSet::from(["Foo.php"]);
let affected = find_affected_files(&[diff], &all_fps, &changed);
assert_eq!(affected.len(), 1);
assert_eq!(affected[0].file, "Consumer.php");
assert!(matches!(
&affected[0].reasons[0],
AffectReason::ImportsChangedType { old_name, .. } if old_name == "FooHandler"
));
}
#[test]
fn test_find_affected_extends_renamed_class() {
let diff = SymbolDiff {
file: "Base.php".to_string(),
removed_exports: vec![],
added_exports: vec![],
renamed_exports: vec![],
type_renamed: Some(("BaseTask".to_string(), "AbstractTask".to_string())),
removed_hooks: vec![],
added_hooks: vec![],
};
let child = make_fingerprint(
"Child.php",
vec!["run"],
vec![],
vec![],
Some("ChildTask"),
Some("BaseTask"), vec![],
);
let all_fps: Vec<&FileFingerprint> = vec![&child];
let changed: HashSet<&str> = HashSet::from(["Base.php"]);
let affected = find_affected_files(&[diff], &all_fps, &changed);
assert_eq!(affected.len(), 1);
assert_eq!(affected[0].file, "Child.php");
assert!(matches!(
&affected[0].reasons[0],
AffectReason::ExtendsChangedClass { old_name, .. } if old_name == "BaseTask"
));
}
#[test]
fn test_find_affected_hooks_removed_action() {
let diff = SymbolDiff {
file: "Provider.php".to_string(),
removed_exports: vec![],
added_exports: vec![],
renamed_exports: vec![],
type_renamed: None,
removed_hooks: vec!["my_custom_hook".to_string()],
added_hooks: vec![],
};
let listener = make_fingerprint(
"Listener.php",
vec!["onHook"],
vec![],
vec![],
None,
None,
vec![("filter", "my_custom_hook")], );
let all_fps: Vec<&FileFingerprint> = vec![&listener];
let changed: HashSet<&str> = HashSet::from(["Provider.php"]);
let affected = find_affected_files(&[diff], &all_fps, &changed);
assert_eq!(affected.len(), 1);
assert_eq!(affected[0].file, "Listener.php");
assert!(matches!(
&affected[0].reasons[0],
AffectReason::HooksRemovedAction { old_name } if old_name == "my_custom_hook"
));
}
#[test]
fn test_find_affected_skips_changed_files() {
let diff = SymbolDiff {
file: "Foo.php".to_string(),
removed_exports: vec!["doThing".to_string()],
added_exports: vec![],
renamed_exports: vec![],
type_renamed: None,
removed_hooks: vec![],
added_hooks: vec![],
};
let foo = make_fingerprint(
"Foo.php",
vec!["doOther"],
vec!["doThing"],
vec![],
None,
None,
vec![],
);
let all_fps: Vec<&FileFingerprint> = vec![&foo];
let changed: HashSet<&str> = HashSet::from(["Foo.php"]);
let affected = find_affected_files(&[diff], &all_fps, &changed);
assert!(
affected.is_empty(),
"changed file should not be in affected"
);
}
#[test]
fn test_find_affected_renamed_function_in_calls() {
let diff = SymbolDiff {
file: "Foo.php".to_string(),
removed_exports: vec![],
added_exports: vec![],
renamed_exports: vec![("doThing".to_string(), "doStuff".to_string())],
type_renamed: None,
removed_hooks: vec![],
added_hooks: vec![],
};
let bar = make_fingerprint(
"Bar.php",
vec!["run"],
vec!["doThing"],
vec![],
None,
None,
vec![],
);
let all_fps: Vec<&FileFingerprint> = vec![&bar];
let changed: HashSet<&str> = HashSet::from(["Foo.php"]);
let affected = find_affected_files(&[diff], &all_fps, &changed);
assert_eq!(affected.len(), 1);
assert!(matches!(
&affected[0].reasons[0],
AffectReason::CallsRemovedFunction { old_name, new_name }
if old_name == "doThing" && new_name.as_deref() == Some("doStuff")
));
}
#[test]
fn test_expand_scope_no_diffs_returns_changed_only() {
let changed = ["Foo.php".to_string()];
let foo = make_fingerprint("Foo.php", vec!["run"], vec![], vec![], None, None, vec![]);
let all_fps: Vec<&FileFingerprint> = vec![&foo];
let changed_set: HashSet<&str> = changed.iter().map(|s| s.as_str()).collect();
let affected = find_affected_files(&[], &all_fps, &changed_set);
assert!(affected.is_empty());
}
}