sdivi_patterns/fingerprint.rs
1//! [`PatternFingerprint`] — keyed blake3 digest of a pattern's structural shape.
2
3use serde::{Deserialize, Serialize};
4
5/// Fixed 32-byte key for all pattern fingerprints.
6///
7/// This constant must never change within a `snapshot_version`. Changing it
8/// invalidates all existing snapshot fingerprints and requires a snapshot
9/// version bump per CLAUDE.md Rule 19.
10///
11/// NOTE: the byte literal still spells `sdi-rust` rather than `sdivi-rust`
12/// because this is the on-the-wire blake3 key — it has nothing to do with the
13/// project's display name, and rotating it would invalidate every published
14/// snapshot's fingerprints. The 32-byte length is load-bearing.
15pub const FINGERPRINT_KEY: [u8; 32] = *b"sdi-rust::patterns::fingerprint!";
16
17/// A keyed blake3 digest representing the structural shape of a pattern.
18///
19/// Fingerprints are computed from the `node_kind` of a [`sdivi_parsing::feature_record::PatternHint`],
20/// producing a stable, content-independent shape identifier. Two pattern instances
21/// with the same `node_kind` produce the same [`PatternFingerprint`].
22///
23/// Serialized as a 64-character lowercase hex string for JSON key compatibility.
24///
25/// # Examples
26///
27/// ```rust
28/// use sdivi_patterns::fingerprint::fingerprint_node_kind;
29///
30/// let fp1 = fingerprint_node_kind("try_expression");
31/// let fp2 = fingerprint_node_kind("try_expression");
32/// let fp3 = fingerprint_node_kind("match_expression");
33///
34/// assert_eq!(fp1, fp2);
35/// assert_ne!(fp1, fp3);
36/// assert_eq!(fp1.to_hex().len(), 64);
37/// ```
38#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
39pub struct PatternFingerprint([u8; 32]);
40
41impl PatternFingerprint {
42 /// Constructs a [`PatternFingerprint`] from raw digest bytes.
43 pub fn from_bytes(bytes: [u8; 32]) -> Self {
44 PatternFingerprint(bytes)
45 }
46
47 /// Parses a [`PatternFingerprint`] from a 64-character lowercase hex string.
48 ///
49 /// Returns `None` if the string is not exactly 64 hex characters.
50 ///
51 /// # Examples
52 ///
53 /// ```rust
54 /// use sdivi_patterns::fingerprint::{fingerprint_node_kind, PatternFingerprint};
55 ///
56 /// let fp = fingerprint_node_kind("try_expression");
57 /// let hex = fp.to_hex();
58 /// let parsed = PatternFingerprint::from_hex(&hex).unwrap();
59 /// assert_eq!(fp, parsed);
60 /// ```
61 pub fn from_hex(hex: &str) -> Option<Self> {
62 if hex.len() != 64 {
63 return None;
64 }
65 let mut bytes = [0u8; 32];
66 for (i, b) in bytes.iter_mut().enumerate() {
67 *b = u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16).ok()?;
68 }
69 Some(PatternFingerprint(bytes))
70 }
71
72 /// Returns the raw digest bytes.
73 pub fn as_bytes(&self) -> &[u8; 32] {
74 &self.0
75 }
76
77 /// Returns a 64-character lowercase hex string.
78 ///
79 /// # Examples
80 ///
81 /// ```rust
82 /// use sdivi_patterns::fingerprint::fingerprint_node_kind;
83 ///
84 /// let fp = fingerprint_node_kind("try_expression");
85 /// assert_eq!(fp.to_hex().len(), 64);
86 /// ```
87 pub fn to_hex(&self) -> String {
88 use std::fmt::Write;
89 let mut s = String::with_capacity(64);
90 for b in &self.0 {
91 write!(s, "{b:02x}").unwrap();
92 }
93 s
94 }
95}
96
97impl Serialize for PatternFingerprint {
98 fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
99 s.serialize_str(&self.to_hex())
100 }
101}
102
103impl<'de> Deserialize<'de> for PatternFingerprint {
104 fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
105 let hex: String = Deserialize::deserialize(d)?;
106 Self::from_hex(&hex)
107 .ok_or_else(|| serde::de::Error::custom("expected 64-char ASCII hex fingerprint"))
108 }
109}
110
111/// Computes a [`PatternFingerprint`] for the given `node_kind` using the fixed key.
112///
113/// Equivalent to `normalize_and_hash(node_kind, &[])` — the algorithm for a
114/// leaf node (empty children) is byte-identical to this function.
115///
116/// # Examples
117///
118/// ```rust
119/// use sdivi_patterns::fingerprint::fingerprint_node_kind;
120///
121/// let fp = fingerprint_node_kind("await_expression");
122/// assert_eq!(fp.to_hex().len(), 64);
123/// ```
124pub fn fingerprint_node_kind(node_kind: &str) -> PatternFingerprint {
125 let hash = blake3::keyed_hash(&FINGERPRINT_KEY, node_kind.as_bytes());
126 PatternFingerprint::from_bytes(*hash.as_bytes())
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132
133 #[test]
134 fn same_node_kind_same_fingerprint() {
135 assert_eq!(
136 fingerprint_node_kind("try_expression"),
137 fingerprint_node_kind("try_expression")
138 );
139 }
140
141 #[test]
142 fn different_node_kinds_different_fingerprints() {
143 assert_ne!(
144 fingerprint_node_kind("try_expression"),
145 fingerprint_node_kind("match_expression")
146 );
147 }
148
149 #[test]
150 fn hex_is_64_chars() {
151 assert_eq!(
152 fingerprint_node_kind("closure_expression").to_hex().len(),
153 64
154 );
155 }
156
157 #[test]
158 fn serde_round_trip() {
159 let fp = fingerprint_node_kind("await_expression");
160 let json = serde_json::to_string(&fp).unwrap();
161 let decoded: PatternFingerprint = serde_json::from_str(&json).unwrap();
162 assert_eq!(fp, decoded);
163 }
164
165 #[test]
166 fn fingerprint_key_is_32_bytes() {
167 assert_eq!(FINGERPRINT_KEY.len(), 32);
168 }
169
170 #[test]
171 fn from_hex_round_trips() {
172 let fp = fingerprint_node_kind("try_expression");
173 let parsed = PatternFingerprint::from_hex(&fp.to_hex()).unwrap();
174 assert_eq!(fp, parsed);
175 }
176
177 #[test]
178 fn from_hex_invalid_length_returns_none() {
179 assert!(PatternFingerprint::from_hex("abc").is_none());
180 assert!(PatternFingerprint::from_hex("").is_none());
181 }
182
183 #[test]
184 fn serde_deserialize_non_ascii_returns_err() {
185 // 64-byte non-ASCII UTF-8 string should be rejected during deserialization, not panic
186 let s = "é".repeat(32); // é is 2 bytes each; 32 × 2 = 64 bytes but not 64 chars
187 let json = serde_json::to_string(&s).unwrap();
188 let result: Result<PatternFingerprint, _> = serde_json::from_str(&json);
189 assert!(result.is_err());
190 }
191}