Skip to main content

sdivi_patterns/
fingerprint.rs

1//! [`PatternFingerprint`] — keyed blake3 digest of a pattern's structural shape.
2
3use serde::{Deserialize, Serialize};
4
5/// Fixed 32-byte key for all pattern fingerprints.
6///
7/// This constant must never change within a `snapshot_version`. Changing it
8/// invalidates all existing snapshot fingerprints and requires a snapshot
9/// version bump per CLAUDE.md Rule 19.
10///
11/// NOTE: the byte literal still spells `sdi-rust` rather than `sdivi-rust`
12/// because this is the on-the-wire blake3 key — it has nothing to do with the
13/// project's display name, and rotating it would invalidate every published
14/// snapshot's fingerprints. The 32-byte length is load-bearing.
15pub const FINGERPRINT_KEY: [u8; 32] = *b"sdi-rust::patterns::fingerprint!";
16
17/// A keyed blake3 digest representing the structural shape of a pattern.
18///
19/// Fingerprints are computed from the `node_kind` of a [`sdivi_parsing::feature_record::PatternHint`],
20/// producing a stable, content-independent shape identifier. Two pattern instances
21/// with the same `node_kind` produce the same [`PatternFingerprint`].
22///
23/// Serialized as a 64-character lowercase hex string for JSON key compatibility.
24///
25/// # Examples
26///
27/// ```rust
28/// use sdivi_patterns::fingerprint::fingerprint_node_kind;
29///
30/// let fp1 = fingerprint_node_kind("try_expression");
31/// let fp2 = fingerprint_node_kind("try_expression");
32/// let fp3 = fingerprint_node_kind("match_expression");
33///
34/// assert_eq!(fp1, fp2);
35/// assert_ne!(fp1, fp3);
36/// assert_eq!(fp1.to_hex().len(), 64);
37/// ```
38#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
39pub struct PatternFingerprint([u8; 32]);
40
41impl PatternFingerprint {
42    /// Constructs a [`PatternFingerprint`] from raw digest bytes.
43    pub fn from_bytes(bytes: [u8; 32]) -> Self {
44        PatternFingerprint(bytes)
45    }
46
47    /// Parses a [`PatternFingerprint`] from a 64-character lowercase hex string.
48    ///
49    /// Returns `None` if the string is not exactly 64 hex characters.
50    ///
51    /// # Examples
52    ///
53    /// ```rust
54    /// use sdivi_patterns::fingerprint::{fingerprint_node_kind, PatternFingerprint};
55    ///
56    /// let fp = fingerprint_node_kind("try_expression");
57    /// let hex = fp.to_hex();
58    /// let parsed = PatternFingerprint::from_hex(&hex).unwrap();
59    /// assert_eq!(fp, parsed);
60    /// ```
61    pub fn from_hex(hex: &str) -> Option<Self> {
62        if hex.len() != 64 {
63            return None;
64        }
65        let mut bytes = [0u8; 32];
66        for (i, b) in bytes.iter_mut().enumerate() {
67            *b = u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16).ok()?;
68        }
69        Some(PatternFingerprint(bytes))
70    }
71
72    /// Returns the raw digest bytes.
73    pub fn as_bytes(&self) -> &[u8; 32] {
74        &self.0
75    }
76
77    /// Returns a 64-character lowercase hex string.
78    ///
79    /// # Examples
80    ///
81    /// ```rust
82    /// use sdivi_patterns::fingerprint::fingerprint_node_kind;
83    ///
84    /// let fp = fingerprint_node_kind("try_expression");
85    /// assert_eq!(fp.to_hex().len(), 64);
86    /// ```
87    pub fn to_hex(&self) -> String {
88        use std::fmt::Write;
89        let mut s = String::with_capacity(64);
90        for b in &self.0 {
91            write!(s, "{b:02x}").unwrap();
92        }
93        s
94    }
95}
96
97impl Serialize for PatternFingerprint {
98    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
99        s.serialize_str(&self.to_hex())
100    }
101}
102
103impl<'de> Deserialize<'de> for PatternFingerprint {
104    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
105        let hex: String = Deserialize::deserialize(d)?;
106        Self::from_hex(&hex)
107            .ok_or_else(|| serde::de::Error::custom("expected 64-char ASCII hex fingerprint"))
108    }
109}
110
111/// Computes a [`PatternFingerprint`] for the given `node_kind` using the fixed key.
112///
113/// Equivalent to `normalize_and_hash(node_kind, &[])` — the algorithm for a
114/// leaf node (empty children) is byte-identical to this function.
115///
116/// # Examples
117///
118/// ```rust
119/// use sdivi_patterns::fingerprint::fingerprint_node_kind;
120///
121/// let fp = fingerprint_node_kind("await_expression");
122/// assert_eq!(fp.to_hex().len(), 64);
123/// ```
124pub fn fingerprint_node_kind(node_kind: &str) -> PatternFingerprint {
125    let hash = blake3::keyed_hash(&FINGERPRINT_KEY, node_kind.as_bytes());
126    PatternFingerprint::from_bytes(*hash.as_bytes())
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132
133    #[test]
134    fn same_node_kind_same_fingerprint() {
135        assert_eq!(
136            fingerprint_node_kind("try_expression"),
137            fingerprint_node_kind("try_expression")
138        );
139    }
140
141    #[test]
142    fn different_node_kinds_different_fingerprints() {
143        assert_ne!(
144            fingerprint_node_kind("try_expression"),
145            fingerprint_node_kind("match_expression")
146        );
147    }
148
149    #[test]
150    fn hex_is_64_chars() {
151        assert_eq!(
152            fingerprint_node_kind("closure_expression").to_hex().len(),
153            64
154        );
155    }
156
157    #[test]
158    fn serde_round_trip() {
159        let fp = fingerprint_node_kind("await_expression");
160        let json = serde_json::to_string(&fp).unwrap();
161        let decoded: PatternFingerprint = serde_json::from_str(&json).unwrap();
162        assert_eq!(fp, decoded);
163    }
164
165    #[test]
166    fn fingerprint_key_is_32_bytes() {
167        assert_eq!(FINGERPRINT_KEY.len(), 32);
168    }
169
170    #[test]
171    fn from_hex_round_trips() {
172        let fp = fingerprint_node_kind("try_expression");
173        let parsed = PatternFingerprint::from_hex(&fp.to_hex()).unwrap();
174        assert_eq!(fp, parsed);
175    }
176
177    #[test]
178    fn from_hex_invalid_length_returns_none() {
179        assert!(PatternFingerprint::from_hex("abc").is_none());
180        assert!(PatternFingerprint::from_hex("").is_none());
181    }
182
183    #[test]
184    fn serde_deserialize_non_ascii_returns_err() {
185        // 64-byte non-ASCII UTF-8 string should be rejected during deserialization, not panic
186        let s = "é".repeat(32); // é is 2 bytes each; 32 × 2 = 64 bytes but not 64 chars
187        let json = serde_json::to_string(&s).unwrap();
188        let result: Result<PatternFingerprint, _> = serde_json::from_str(&json);
189        assert!(result.is_err());
190    }
191}