use std::collections::{HashMap, HashSet};
use std::path::Path;
use sha2::{Digest, Sha256};
use crate::extension::grammar::{self, Grammar, Symbol};
use crate::extension::{self, DeadCodeMarker, HookRef, UnusedParam};
use super::conventions::Language;
use super::fingerprint::FileFingerprint;
const RUST_KEYWORDS: &[&str] = &[
"as", "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
"false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub",
"ref", "return", "self", "Self", "static", "struct", "super", "trait", "true", "type",
"unsafe", "use", "where", "while", "yield",
"Some", "None", "Ok", "Err", "Result", "Option", "Vec", "String", "Box", "Arc", "Rc", "HashMap",
"HashSet", "bool", "u8", "u16", "u32", "u64", "u128", "usize", "i8", "i16", "i32", "i64",
"i128", "isize", "f32", "f64", "str", "char",
];
const PHP_KEYWORDS: &[&str] = &[
"abstract",
"and",
"array",
"as",
"break",
"callable",
"case",
"catch",
"class",
"clone",
"const",
"continue",
"declare",
"default",
"do",
"echo",
"else",
"elseif",
"empty",
"enddeclare",
"endfor",
"endforeach",
"endif",
"endswitch",
"endwhile",
"eval",
"exit",
"extends",
"final",
"finally",
"fn",
"for",
"foreach",
"function",
"global",
"goto",
"if",
"implements",
"include",
"include_once",
"instanceof",
"insteadof",
"interface",
"isset",
"list",
"match",
"namespace",
"new",
"or",
"print",
"private",
"protected",
"public",
"readonly",
"require",
"require_once",
"return",
"static",
"switch",
"throw",
"trait",
"try",
"unset",
"use",
"var",
"while",
"xor",
"yield",
"null",
"true",
"false",
"self",
"parent",
"int",
"float",
"string",
"bool",
"void",
"mixed",
"object",
"iterable",
"never",
];
const SKIP_CALLS_RUST: &[&str] = &[
"if",
"while",
"for",
"match",
"loop",
"return",
"Some",
"None",
"Ok",
"Err",
"Box",
"Vec",
"Arc",
"Rc",
"String",
"println",
"eprintln",
"format",
"write",
"writeln",
"panic",
"assert",
"assert_eq",
"assert_ne",
"todo",
"unimplemented",
"unreachable",
"dbg",
"cfg",
"include",
"include_str",
"concat",
"env",
"compile_error",
"stringify",
"vec",
"hashmap",
"bail",
"ensure",
"anyhow",
"matches",
"debug_assert",
"debug_assert_eq",
"allow",
"deny",
"warn",
"derive",
"serde",
"test",
"inline",
"must_use",
"doc",
"feature",
"pub",
"crate",
"super",
];
const SKIP_CALLS_PHP: &[&str] = &[
"if",
"while",
"for",
"foreach",
"switch",
"match",
"catch",
"return",
"echo",
"print",
"isset",
"unset",
"empty",
"list",
"array",
"function",
"class",
"interface",
"trait",
"new",
"require",
"require_once",
"include",
"include_once",
"define",
"defined",
"die",
"exit",
"eval",
"compact",
"extract",
"var_dump",
"print_r",
"var_export",
];
pub fn fingerprint_from_grammar(
content: &str,
grammar: &Grammar,
relative_path: &str,
) -> Option<FileFingerprint> {
if !grammar.patterns.contains_key("function") && !grammar.patterns.contains_key("method") {
return None;
}
let lang_id = grammar.language.id.as_str();
let language = Language::from_extension(
grammar
.language
.extensions
.first()
.map(|s| s.as_str())
.unwrap_or(""),
);
let symbols = grammar::extract(content, grammar);
let lines: Vec<&str> = content.lines().collect();
let test_range = find_test_range(&symbols, &lines, grammar);
let impl_contexts = build_impl_contexts(&symbols);
let functions = extract_functions(&symbols, &lines, &impl_contexts, test_range, grammar);
let mut methods = Vec::new();
let mut seen_methods = HashSet::new();
for f in &functions {
if f.is_test {
continue;
}
if !seen_methods.contains(&f.name) {
methods.push(f.name.clone());
seen_methods.insert(f.name.clone());
}
}
for f in &functions {
if f.is_test {
let prefixed = if f.name.starts_with("test_") {
f.name.clone()
} else {
format!("test_{}", f.name)
};
if !seen_methods.contains(&prefixed) {
methods.push(prefixed.clone());
seen_methods.insert(prefixed);
}
}
}
let keywords = match lang_id {
"rust" => RUST_KEYWORDS,
"php" => PHP_KEYWORDS,
_ => RUST_KEYWORDS, };
let mut method_hashes = HashMap::new();
let mut structural_hashes = HashMap::new();
for f in &functions {
if f.is_test || f.body.is_empty() {
continue;
}
if f.is_trait_impl {
continue;
}
let exact = exact_hash(&f.body);
method_hashes.insert(f.name.clone(), exact);
let structural = structural_hash(&f.body, keywords, lang_id == "php");
structural_hashes.insert(f.name.clone(), structural);
}
let mut visibility = HashMap::new();
for f in &functions {
if f.is_test {
continue;
}
visibility.insert(f.name.clone(), f.visibility.clone());
}
let (type_name, type_names) = extract_types(&symbols);
let extends = extract_extends(&symbols);
let implements = extract_implements(&symbols);
let namespace = extract_namespace(&symbols, relative_path, lang_id);
let imports = extract_imports(&symbols);
let registrations = extract_registrations(&symbols);
let skip_calls: &[&str] = match lang_id {
"rust" => SKIP_CALLS_RUST,
"php" => SKIP_CALLS_PHP,
_ => SKIP_CALLS_RUST,
};
let defined_names: HashSet<&str> = functions.iter().map(|f| f.name.as_str()).collect();
let effective_skip: Vec<&str> = skip_calls
.iter()
.filter(|name| !defined_names.contains(*name))
.copied()
.collect();
let internal_calls = extract_internal_calls(content, &effective_skip);
let public_api: Vec<String> = functions
.iter()
.filter(|f| !f.is_test && is_public_visibility(&f.visibility))
.map(|f| f.name.clone())
.collect::<HashSet<_>>()
.into_iter()
.collect();
let trait_impl_methods: Vec<String> = functions
.iter()
.filter(|f| f.is_trait_impl && !f.is_test)
.map(|f| f.name.clone())
.collect();
let unused_parameters = detect_unused_params(&functions, lang_id);
let dead_code_markers = extract_dead_code_markers(&symbols, &lines);
let properties = extract_properties(&symbols);
let hooks = extract_hooks(&symbols);
Some(FileFingerprint {
relative_path: relative_path.to_string(),
language,
methods,
registrations,
type_name,
type_names,
extends,
implements,
namespace,
imports,
content: content.to_string(),
method_hashes,
structural_hashes,
visibility,
properties,
hooks,
unused_parameters,
dead_code_markers,
internal_calls,
public_api,
trait_impl_methods,
})
}
pub fn load_grammar_for_ext(ext: &str) -> Option<Grammar> {
let matched = extension::find_extension_for_file_ext(ext, "fingerprint")?;
let extension_path = matched.extension_path.as_deref()?;
let grammar_path = Path::new(extension_path).join("grammar.toml");
if grammar_path.exists() {
return grammar::load_grammar(&grammar_path).ok();
}
let grammar_json_path = Path::new(extension_path).join("grammar.json");
if grammar_json_path.exists() {
return grammar::load_grammar_json(&grammar_json_path).ok();
}
None
}
struct FunctionInfo {
name: String,
body: String,
visibility: String,
is_test: bool,
is_trait_impl: bool,
params: String,
_start_line: usize,
}
fn build_impl_contexts(symbols: &[Symbol]) -> Vec<ImplContext> {
symbols
.iter()
.filter(|s| s.concept == "impl_block")
.map(|s| {
let type_name = s.get("type_name").unwrap_or("").to_string();
let trait_name = s.get("trait_name").map(|t| t.to_string());
ImplContext {
line: s.line,
depth: s.depth,
_type_name: type_name,
trait_name,
}
})
.collect()
}
struct ImplContext {
line: usize,
depth: i32,
_type_name: String,
trait_name: Option<String>,
}
fn find_test_range(
symbols: &[Symbol],
lines: &[&str],
grammar: &Grammar,
) -> Option<(usize, usize)> {
let cfg_tests: Vec<usize> = symbols
.iter()
.filter(|s| s.concept == "cfg_test" || s.concept == "test_attribute")
.filter(|s| s.concept == "cfg_test")
.map(|s| s.line)
.collect();
for cfg_line in cfg_tests {
let start_idx = cfg_line.saturating_sub(1); for i in start_idx..std::cmp::min(start_idx + 5, lines.len()) {
if lines[i].trim().contains("mod ") && lines[i].contains('{') {
let end = find_matching_brace(lines, i, grammar);
return Some((start_idx, end));
}
}
}
None
}
fn find_matching_brace(lines: &[&str], start_line: usize, _grammar: &Grammar) -> usize {
let mut depth: i32 = 0;
let mut found_open = false;
for i in start_line..lines.len() {
for ch in lines[i].chars() {
if ch == '{' {
depth += 1;
found_open = true;
} else if ch == '}' {
depth -= 1;
}
}
if found_open && depth == 0 {
return i;
}
}
lines.len().saturating_sub(1)
}
fn is_in_test_range(line: usize, test_range: Option<(usize, usize)>) -> bool {
if let Some((start, end)) = test_range {
let idx = line.saturating_sub(1);
idx >= start && idx <= end
} else {
false
}
}
fn extract_functions(
symbols: &[Symbol],
lines: &[&str],
impl_contexts: &[ImplContext],
test_range: Option<(usize, usize)>,
grammar: &Grammar,
) -> Vec<FunctionInfo> {
let fn_concepts = ["function", "method", "free_function"];
let test_attr_lines: HashSet<usize> = symbols
.iter()
.filter(|s| s.concept == "test_attribute")
.map(|s| s.line)
.collect();
let mut functions = Vec::new();
for symbol in symbols
.iter()
.filter(|s| fn_concepts.contains(&s.concept.as_str()))
{
let name = match symbol.name() {
Some(n) => n.to_string(),
None => continue,
};
if name == "tests" {
continue;
}
let has_test_attr = (1..=3).any(|offset| {
symbol.line >= offset && test_attr_lines.contains(&(symbol.line - offset))
});
let in_test_mod = is_in_test_range(symbol.line, test_range);
let is_test = has_test_attr || in_test_mod;
let is_trait_impl = if symbol.depth > 0 {
impl_contexts
.iter()
.rfind(|ctx| ctx.depth < symbol.depth && ctx.line < symbol.line)
.is_some_and(|ctx| ctx.trait_name.as_ref().is_some_and(|t| !t.is_empty()))
} else {
false
};
let visibility = extract_fn_visibility(symbol);
let params = symbol.get("params").unwrap_or("").to_string();
let body = extract_fn_body(lines, symbol.line.saturating_sub(1), grammar);
functions.push(FunctionInfo {
name,
body,
visibility,
is_test,
is_trait_impl,
params,
_start_line: symbol.line,
});
}
functions
}
fn extract_fn_visibility(symbol: &Symbol) -> String {
if let Some(vis) = symbol.visibility() {
let vis = vis.trim();
if vis.contains("pub(crate)") {
"pub(crate)".to_string()
} else if vis.contains("pub(super)") {
"pub(super)".to_string()
} else if vis.contains("pub") {
"public".to_string()
} else {
"private".to_string()
}
} else if let Some(mods) = symbol.get("modifiers") {
let mods = mods.trim();
if mods.contains("private") {
"private".to_string()
} else if mods.contains("protected") {
"protected".to_string()
} else {
"public".to_string()
}
} else {
"private".to_string()
}
}
fn extract_fn_body(lines: &[&str], start_idx: usize, _grammar: &Grammar) -> String {
let mut depth: i32 = 0;
let mut found_open = false;
let mut body_lines = Vec::new();
for i in start_idx..lines.len() {
let trimmed = lines[i].trim();
if !found_open && trimmed.ends_with(';') {
return String::new();
}
for ch in lines[i].chars() {
if ch == '{' {
depth += 1;
found_open = true;
} else if ch == '}' {
depth -= 1;
}
}
body_lines.push(lines[i]);
if found_open && depth == 0 {
break;
}
}
body_lines.join(" ")
}
fn exact_hash(body: &str) -> String {
let normalized = normalize_whitespace(body);
sha256_hex16(&normalized)
}
fn structural_hash(body: &str, keywords: &[&str], is_php: bool) -> String {
let normalized = structural_normalize(body, keywords, is_php);
sha256_hex16(&normalized)
}
fn normalize_whitespace(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut in_space = false;
for ch in s.chars() {
if ch.is_whitespace() {
if !in_space {
result.push(' ');
in_space = true;
}
} else {
result.push(ch);
in_space = false;
}
}
result.trim().to_string()
}
fn sha256_hex16(input: &str) -> String {
let hash = Sha256::digest(input.as_bytes());
format!("{:x}", hash)[..16].to_string()
}
fn structural_normalize(body: &str, keywords: &[&str], is_php: bool) -> String {
let text = if let Some(pos) = body.find('{') {
&body[pos..]
} else {
body
};
let keyword_set: HashSet<&str> = keywords.iter().copied().collect();
let mut result = text.to_string();
result = replace_string_literals(&result);
result = replace_numeric_literals(&result);
if is_php {
result = replace_php_variables(&result);
}
result = replace_identifiers(&result, &keyword_set);
normalize_whitespace(&result)
}
fn replace_string_literals(input: &str) -> String {
let mut result = String::with_capacity(input.len());
let chars: Vec<char> = input.chars().collect();
let mut i = 0;
while i < chars.len() {
if chars[i] == '"' || chars[i] == '\'' {
let quote = chars[i];
i += 1;
while i < chars.len() {
if chars[i] == '\\' {
i += 2; continue;
}
if chars[i] == quote {
i += 1;
break;
}
i += 1;
}
result.push_str("STR");
} else {
result.push(chars[i]);
i += 1;
}
}
result
}
fn replace_numeric_literals(input: &str) -> String {
let re = regex::Regex::new(r"\b\d[\d_]*(?:\.\d[\d_]*)?\b").unwrap();
re.replace_all(input, "NUM").to_string()
}
fn replace_php_variables(input: &str) -> String {
let re = regex::Regex::new(r"\$\w+").unwrap();
let mut var_map: HashMap<String, String> = HashMap::new();
let mut counter = 0;
re.replace_all(input, |caps: ®ex::Captures| {
let var = caps[0].to_string();
if var == "$this" {
return var;
}
let token = var_map.entry(var).or_insert_with(|| {
let t = format!("VAR_{}", counter);
counter += 1;
t
});
token.clone()
})
.to_string()
}
fn replace_identifiers(input: &str, keywords: &HashSet<&str>) -> String {
let re = regex::Regex::new(r"\b[a-zA-Z_]\w*\b").unwrap();
let mut id_map: HashMap<String, String> = HashMap::new();
let mut counter = 0;
re.replace_all(input, |caps: ®ex::Captures| {
let word = &caps[0];
if keywords.contains(word) {
return word.to_string();
}
if word.starts_with("STR")
|| word.starts_with("NUM")
|| word.starts_with("CHR")
|| word.starts_with("VAR_")
|| word.starts_with("ID_")
{
return word.to_string();
}
let token = id_map.entry(word.to_string()).or_insert_with(|| {
let t = format!("ID_{}", counter);
counter += 1;
t
});
token.clone()
})
.to_string()
}
fn extract_types(symbols: &[Symbol]) -> (Option<String>, Vec<String>) {
let mut type_names = Vec::new();
let mut primary_type = None;
for s in symbols {
if s.concept == "struct" || s.concept == "class" {
if let Some(name) = s.name() {
type_names.push(name.to_string());
if primary_type.is_none() {
let vis = s.visibility().unwrap_or("");
if vis.contains("pub") || vis.contains("public") || vis.is_empty() {
primary_type = Some(name.to_string());
}
}
}
}
}
if primary_type.is_none() && !type_names.is_empty() {
primary_type = Some(type_names[0].clone());
}
(primary_type, type_names)
}
fn extract_extends(symbols: &[Symbol]) -> Option<String> {
symbols
.iter()
.filter(|s| s.concept == "class" || s.concept == "struct")
.find_map(|s| {
s.get("extends").map(|e| {
e.split('\\').next_back().unwrap_or(e).to_string()
})
})
}
fn extract_implements(symbols: &[Symbol]) -> Vec<String> {
let mut implements = Vec::new();
let mut seen = HashSet::new();
for s in symbols.iter().filter(|s| s.concept == "impl_block") {
if let Some(trait_name) = s.get("trait_name") {
if !trait_name.is_empty() && seen.insert(trait_name.to_string()) {
let short = trait_name.split("::").last().unwrap_or(trait_name);
implements.push(short.to_string());
}
}
}
for s in symbols.iter().filter(|s| s.concept == "implements") {
if let Some(interfaces) = s.get("interfaces") {
for iface in interfaces.split(',') {
let iface = iface.trim();
if !iface.is_empty() {
let short = iface.split('\\').next_back().unwrap_or(iface);
if seen.insert(short.to_string()) {
implements.push(short.to_string());
}
}
}
}
}
for s in symbols.iter().filter(|s| s.concept == "trait_use") {
if let Some(name) = s.name() {
let short = name.split('\\').next_back().unwrap_or(name);
if seen.insert(short.to_string()) {
implements.push(short.to_string());
}
}
}
implements
}
fn extract_namespace(symbols: &[Symbol], relative_path: &str, lang_id: &str) -> Option<String> {
for s in symbols.iter().filter(|s| s.concept == "namespace") {
if let Some(name) = s.name() {
return Some(name.to_string());
}
}
if lang_id == "rust" {
let mut module_counts: HashMap<&str, usize> = HashMap::new();
for s in symbols.iter().filter(|s| s.concept == "import") {
if let Some(path) = s.get("path") {
if let Some(rest) = path.strip_prefix("crate::") {
if let Some(module) = rest.split("::").next() {
*module_counts.entry(module).or_insert(0) += 1;
}
}
}
}
if let Some((most_common, _)) = module_counts.iter().max_by_key(|(_, count)| *count) {
return Some(format!("crate::{}", most_common));
}
let parts: Vec<&str> = relative_path.trim_end_matches(".rs").split('/').collect();
if parts.len() > 2 {
let ns = parts[1..parts.len() - 1].join("::");
return Some(format!("crate::{}", ns));
} else if parts.len() == 2 {
return Some(format!("crate::{}", parts.last().unwrap_or(&"")));
}
}
None
}
fn extract_imports(symbols: &[Symbol]) -> Vec<String> {
let mut imports = Vec::new();
let mut seen = HashSet::new();
for s in symbols.iter().filter(|s| s.concept == "import") {
if let Some(path) = s.get("path") {
if seen.insert(path.to_string()) {
imports.push(path.to_string());
}
}
}
imports
}
fn extract_registrations(symbols: &[Symbol]) -> Vec<String> {
let registration_concepts = [
"register_post_type",
"register_taxonomy",
"register_rest_route",
"register_block_type",
"add_action",
"add_filter",
"add_shortcode",
"wp_cli_command",
"wp_register_ability",
"macro_invocation",
];
let skip_macros: HashSet<&str> = [
"println",
"eprintln",
"format",
"vec",
"assert",
"assert_eq",
"assert_ne",
"panic",
"todo",
"unimplemented",
"cfg",
"derive",
"include",
"include_str",
"include_bytes",
"concat",
"stringify",
"env",
"option_env",
"compile_error",
"write",
"writeln",
"matches",
"dbg",
"debug_assert",
"debug_assert_eq",
"debug_assert_ne",
"unreachable",
"cfg_if",
"lazy_static",
"thread_local",
"once_cell",
"macro_rules",
"serde_json",
"if_chain",
"bail",
"anyhow",
"ensure",
"Ok",
"Err",
"Some",
"None",
"Box",
"Arc",
"Rc",
"RefCell",
"Mutex",
"map",
"hashmap",
"btreemap",
"hashset",
]
.iter()
.copied()
.collect();
let mut registrations = Vec::new();
let mut seen = HashSet::new();
for s in symbols
.iter()
.filter(|s| registration_concepts.contains(&s.concept.as_str()))
{
if let Some(name) = s.name() {
if s.concept == "macro_invocation" && skip_macros.contains(name) {
continue;
}
if s.concept == "macro_invocation" && name.starts_with("test") {
continue;
}
if seen.insert(name.to_string()) {
registrations.push(name.to_string());
}
}
}
registrations
}
fn extract_internal_calls(content: &str, skip_calls: &[&str]) -> Vec<String> {
let skip_set: HashSet<&str> = skip_calls.iter().copied().collect();
let mut calls = HashSet::new();
let re = regex::Regex::new(r"\b(\w+)\s*\(").unwrap();
for caps in re.captures_iter(content) {
let name = &caps[1];
if !skip_set.contains(name) && !name.starts_with("test_") {
calls.insert(name.to_string());
}
}
let method_re = regex::Regex::new(r"[.:](\w+)\s*\(").unwrap();
for caps in method_re.captures_iter(content) {
let name = &caps[1];
if !skip_set.contains(name) && !name.starts_with("test_") {
calls.insert(name.to_string());
}
}
let mut result: Vec<String> = calls.into_iter().collect();
result.sort();
result
}
fn is_public_visibility(vis: &str) -> bool {
vis == "public"
}
fn detect_unused_params(functions: &[FunctionInfo], _lang_id: &str) -> Vec<UnusedParam> {
let mut unused = Vec::new();
for f in functions {
if f.is_test || f.is_trait_impl || f.params.is_empty() || f.body.is_empty() {
continue;
}
let param_names = parse_param_names(&f.params);
let body_after_brace = if let Some(pos) = f.body.find('{') {
&f.body[pos + 1..]
} else {
continue;
};
for pname in ¶m_names {
if pname == "self" || pname == "mut" || pname == "Self" || pname.starts_with('_') {
continue;
}
let pattern = format!(r"\b{}\b", regex::escape(pname));
if let Ok(re) = regex::Regex::new(&pattern) {
if !re.is_match(body_after_brace) {
unused.push(UnusedParam {
function: f.name.clone(),
param: pname.clone(),
});
}
}
}
}
unused
}
fn parse_param_names(params: &str) -> Vec<String> {
let mut names = Vec::new();
for chunk in params.split(',') {
let chunk = chunk.trim();
if chunk.is_empty() {
continue;
}
if chunk.contains(':') {
let before_colon = chunk.split(':').next().unwrap_or("").trim();
let name = before_colon.trim_start_matches("mut").trim();
if !name.is_empty() && name != "&self" && name != "self" {
let name = name.trim_start_matches('&');
if !name.is_empty() {
names.push(name.to_string());
}
}
} else if chunk.contains('$') {
let re = regex::Regex::new(r"\$(\w+)").unwrap();
if let Some(caps) = re.captures(chunk) {
names.push(caps[1].to_string());
}
}
}
names
}
fn extract_dead_code_markers(symbols: &[Symbol], lines: &[&str]) -> Vec<DeadCodeMarker> {
let mut markers = Vec::new();
for s in symbols.iter().filter(|s| s.concept == "dead_code_marker") {
let start_line = s.line; for offset in 0..5 {
let check_idx = start_line + offset; if check_idx > lines.len() {
break;
}
let line = lines[check_idx - 1].trim();
if line.is_empty() || line.starts_with('#') || line.starts_with("//") {
continue;
}
let item_re = regex::Regex::new(
r"(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:const\s+)?(?:static\s+)?(?:fn|struct|enum|type|trait|const|static|mod)\s+(\w+)",
)
.unwrap();
if let Some(caps) = item_re.captures(line) {
markers.push(DeadCodeMarker {
item: caps[1].to_string(),
line: s.line,
marker_type: "allow_dead_code".to_string(),
});
}
break;
}
}
markers
}
fn extract_properties(symbols: &[Symbol]) -> Vec<String> {
let mut properties = Vec::new();
let mut seen = HashSet::new();
for s in symbols.iter().filter(|s| s.concept == "property") {
let vis = s.get("visibility").unwrap_or("public");
if vis == "private" {
continue; }
if let Some(name) = s.get("name") {
let type_hint = s.get("type_hint").unwrap_or("");
let prop = if type_hint.is_empty() {
format!("${}", name)
} else {
format!("{} ${}", type_hint, name)
};
if seen.insert(prop.clone()) {
properties.push(prop);
}
}
}
properties
}
fn extract_hooks(symbols: &[Symbol]) -> Vec<HookRef> {
let mut hooks = Vec::new();
let mut seen = HashSet::new();
for s in symbols {
let hook_type = match s.concept.as_str() {
"do_action" => "action",
"apply_filters" => "filter",
_ => continue,
};
if let Some(name) = s.name() {
if seen.insert((hook_type.to_string(), name.to_string())) {
hooks.push(HookRef {
hook_type: hook_type.to_string(),
name: name.to_string(),
});
}
}
}
hooks
}
#[cfg(test)]
mod tests {
use super::*;
fn rust_grammar() -> Grammar {
let grammar_path = std::path::Path::new(
"/var/lib/datamachine/workspace/homeboy-extensions/rust/grammar.toml",
);
if grammar_path.exists() {
grammar::load_grammar(grammar_path).expect("Failed to load Rust grammar")
} else {
toml::from_str(
r#"
[language]
id = "rust"
extensions = ["rs"]
[comments]
line = ["//"]
block = [["/*", "*/"]]
doc = ["///", "//!"]
[strings]
quotes = ['"']
escape = "\\"
[blocks]
open = "{"
close = "}"
[patterns.function]
regex = '^\s*(pub(?:\(crate\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:const\s+)?fn\s+(\w+)\s*\(([^)]*)\)'
context = "any"
[patterns.function.captures]
visibility = 1
name = 2
params = 3
[patterns.struct]
regex = '^\s*(pub(?:\(crate\))?\s+)?(struct|enum|trait)\s+(\w+)'
context = "top_level"
[patterns.struct.captures]
visibility = 1
kind = 2
name = 3
[patterns.import]
regex = '^use\s+([\w:]+(?:::\{[^}]+\})?)\s*;'
context = "top_level"
[patterns.import.captures]
path = 1
[patterns.impl_block]
regex = '^\s*impl(?:<[^>]*>)?\s+(?:(\w+)\s+for\s+)?(\w+)'
context = "any"
[patterns.impl_block.captures]
trait_name = 1
type_name = 2
[patterns.test_attribute]
regex = '#\[test\]'
context = "any"
[patterns.cfg_test]
regex = '#\[cfg\(test\)\]'
context = "any"
"#,
)
.expect("Failed to parse minimal grammar")
}
}
#[test]
fn test_exact_hash_deterministic() {
let body = "fn foo() { let x = 1; }";
let h1 = exact_hash(body);
let h2 = exact_hash(body);
assert_eq!(h1, h2);
assert_eq!(h1.len(), 16);
}
#[test]
fn test_exact_hash_whitespace_insensitive() {
let a = "fn foo() { let x = 1; }";
let b = "fn foo() { let x = 1; }";
assert_eq!(exact_hash(a), exact_hash(b));
}
#[test]
fn test_structural_hash_different_names() {
let a = "{ let foo = bar(); baz(foo); }";
let b = "{ let qux = quux(); corge(qux); }";
assert_eq!(
structural_hash(a, RUST_KEYWORDS, false),
structural_hash(b, RUST_KEYWORDS, false),
);
}
#[test]
fn test_structural_hash_different_structure() {
let a = "{ let x = 1; if x > 0 { return true; } }";
let b = "{ let x = 1; for i in 0..x { print(i); } }";
assert_ne!(
structural_hash(a, RUST_KEYWORDS, false),
structural_hash(b, RUST_KEYWORDS, false),
);
}
#[test]
fn test_parse_param_names_rust() {
let names = parse_param_names("&self, key: &str, value: String");
assert_eq!(names, vec!["key", "value"]);
}
#[test]
fn test_parse_param_names_empty() {
let names = parse_param_names("");
assert!(names.is_empty());
}
#[test]
fn test_parse_param_names_mut() {
let names = parse_param_names("&mut self, mut count: usize");
assert_eq!(names, vec!["count"]);
}
#[test]
fn test_trait_impl_excluded_from_hashes() {
let grammar = rust_grammar();
let content = r#"
pub trait Entity {
fn id(&self) -> &str;
}
pub struct Foo {
id: String,
}
impl Entity for Foo {
fn id(&self) -> &str {
&self.id
}
}
pub struct Bar {
id: String,
}
impl Bar {
fn id(&self) -> &str {
&self.id
}
}
"#;
let fp = fingerprint_from_grammar(content, &grammar, "src/test.rs").unwrap();
assert!(fp.methods.contains(&"id".to_string()));
assert!(
fp.method_hashes.contains_key("id"),
"Bar's inherent id() should be in method_hashes"
);
}
#[test]
fn test_basic_rust_fingerprint() {
let grammar = rust_grammar();
let content = r#"
use std::path::Path;
pub struct Config {
pub name: String,
}
pub fn load(path: &Path) -> Config {
let content = std::fs::read_to_string(path).unwrap();
Config { name: content }
}
fn helper() -> bool {
true
}
"#;
let fp = fingerprint_from_grammar(content, &grammar, "src/config.rs").unwrap();
assert!(fp.methods.contains(&"load".to_string()));
assert!(fp.methods.contains(&"helper".to_string()));
assert_eq!(fp.type_name, Some("Config".to_string()));
assert!(fp.method_hashes.contains_key("load"));
assert!(fp.method_hashes.contains_key("helper"));
assert_eq!(fp.visibility.get("load"), Some(&"public".to_string()));
assert_eq!(fp.visibility.get("helper"), Some(&"private".to_string()));
}
#[test]
fn test_test_functions_excluded_from_hashes() {
let grammar = rust_grammar();
let content = r#"
pub fn real_fn() -> bool {
true
}
#[cfg(test)]
mod tests {
#[test]
fn test_real_fn() {
assert!(super::real_fn());
}
}
"#;
let fp = fingerprint_from_grammar(content, &grammar, "src/lib.rs").unwrap();
assert!(fp.method_hashes.contains_key("real_fn"));
assert!(
!fp.method_hashes.contains_key("test_real_fn"),
"Test functions should not be in method_hashes"
);
assert!(fp.methods.contains(&"test_real_fn".to_string()));
}
#[test]
fn test_unused_param_detection() {
let grammar = rust_grammar();
let content = r#"
pub(crate) fn uses_both(a: i32, b: i32) -> i32 {
a + b
}
pub(crate) fn ignores_second(a: i32, b: i32) -> i32 {
a * 2
}
"#;
let fp = fingerprint_from_grammar(content, &grammar, "src/lib.rs").unwrap();
assert!(
fp.unused_parameters
.iter()
.any(|p| p.function == "ignores_second" && p.param == "b"),
"Should detect unused param 'b' in ignores_second"
);
assert!(
!fp.unused_parameters
.iter()
.any(|p| p.function == "uses_both"),
"uses_both should have no unused params"
);
}
#[test]
fn trait_method_declarations_not_flagged_as_unused_params() {
let grammar = rust_grammar();
let content = r#"
pub trait FileSystem {
fn read(&self, path: &Path) -> Result<String>;
fn write(&self, path: &Path, content: &str) -> Result<()>;
fn delete(&self, path: &Path) -> Result<()>;
}
"#;
let fp = fingerprint_from_grammar(content, &grammar, "src/lib.rs").unwrap();
assert!(
fp.unused_parameters.is_empty(),
"Trait method declarations should not produce unused param findings, got: {:?}",
fp.unused_parameters
);
}
#[test]
fn trait_impl_methods_not_flagged_as_unused_params() {
let grammar = rust_grammar();
let content = r#"
pub trait Store {
fn save(&self, key: &str, value: &str) -> bool;
}
pub struct MemStore;
impl Store for MemStore {
fn save(&self, key: &str, value: &str) -> bool {
key.len() > 0
}
}
"#;
let fp = fingerprint_from_grammar(content, &grammar, "src/lib.rs").unwrap();
assert!(
!fp.unused_parameters.iter().any(|p| p.function == "save"),
"Trait impl methods should not produce unused param findings, got: {:?}",
fp.unused_parameters
);
}
#[test]
fn skip_list_does_not_suppress_defined_function_calls() {
let grammar = rust_grammar();
let content = r#"
fn run() {
let result = write("hello");
}
fn write(msg: &str) -> bool {
println!("{}", msg);
true
}
"#;
let fp = fingerprint_from_grammar(content, &grammar, "src/file.rs").unwrap();
assert!(
fp.internal_calls.contains(&"write".to_string()),
"write should be in internal_calls when the file defines fn write(), got: {:?}",
fp.internal_calls
);
}
#[test]
fn test_normalize_whitespace() {
assert_eq!(normalize_whitespace("a b\n\tc"), "a b c");
assert_eq!(normalize_whitespace(" hello "), "hello");
}
#[test]
fn test_replace_string_literals() {
assert_eq!(
replace_string_literals(r#"let x = "hello" + 'world'"#),
"let x = STR + STR"
);
}
}