1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
//! fingerprint — extracted from conventions.rs.
use std::collections::HashMap;
use std::path::Path;
use super::conventions::Language;
/// A structural fingerprint extracted from a single source file.
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct FileFingerprint {
/// Path relative to component root.
pub relative_path: String,
/// Language detected from extension.
pub language: Language,
/// Method/function names found in the file.
pub methods: Vec<String>,
/// Registration calls found (e.g., add_action, register_rest_route).
pub registrations: Vec<String>,
/// Class or struct name if found.
pub type_name: Option<String>,
/// Parent class name (e.g., "WC_Abstract_Order").
pub extends: Option<String>,
/// Interfaces or traits implemented.
pub implements: Vec<String>,
/// Namespace declaration (PHP namespace, Rust mod path).
pub namespace: Option<String>,
/// Import/use statements.
pub imports: Vec<String>,
/// Raw file content (for import usage analysis).
pub content: String,
/// Method name → normalized body hash for duplication detection.
/// Populated by extension scripts that support it; empty otherwise.
pub method_hashes: HashMap<String, String>,
/// Method name → structural hash for near-duplicate detection.
/// Identifiers/literals replaced with positional tokens before hashing.
/// Populated by extension scripts that support it; empty otherwise.
pub structural_hashes: HashMap<String, String>,
/// Method name → visibility ("public", "protected", "private").
pub visibility: HashMap<String, String>,
/// Public/protected class properties (e.g., ["string $name", "$data"]).
pub properties: Vec<String>,
/// Hook references: do_action() and apply_filters() calls.
pub hooks: Vec<crate::extension::HookRef>,
/// Function parameters that are declared but never used in the function body.
pub unused_parameters: Vec<crate::extension::UnusedParam>,
/// Dead code suppression markers (e.g., `#[allow(dead_code)]`).
pub dead_code_markers: Vec<crate::extension::DeadCodeMarker>,
/// Function/method names called within this file.
pub internal_calls: Vec<String>,
/// Public functions/methods exported from this file.
pub public_api: Vec<String>,
}
/// Extract a structural fingerprint from a source file.
///
/// Dispatches to an installed extension extension that handles the file's extension
/// and has a fingerprint script configured. No extension = no fingerprint.
pub fn fingerprint_file(path: &Path, root: &Path) -> Option<FileFingerprint> {
use crate::extension;
let ext = path.extension()?.to_str()?;
let content = std::fs::read_to_string(path).ok()?;
let relative_path = path
.strip_prefix(root)
.unwrap_or(path)
.to_string_lossy()
.to_string();
let matched_extension = extension::find_extension_for_file_ext(ext, "fingerprint")?;
let output = extension::run_fingerprint_script(&matched_extension, &relative_path, &content)?;
let language = Language::from_extension(ext);
Some(FileFingerprint {
relative_path,
language,
methods: output.methods,
registrations: output.registrations,
type_name: output.type_name,
extends: output.extends,
implements: output.implements,
namespace: output.namespace,
imports: output.imports,
content,
method_hashes: output.method_hashes,
structural_hashes: output.structural_hashes,
visibility: output.visibility,
properties: output.properties,
hooks: output.hooks,
unused_parameters: output.unused_parameters,
dead_code_markers: output.dead_code_markers,
internal_calls: output.internal_calls,
public_api: output.public_api,
})
}