Skip to main content

oxihuman_core/
asset_hash.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Content-addressed asset tracking using a rolling FNV-inspired hash.
5
6// ── Hash type ─────────────────────────────────────────────────────────────────
7
8/// A 32-byte content hash.
9#[allow(dead_code)]
10#[derive(Debug, Clone, PartialEq, Eq, Hash)]
11pub struct AssetHash(pub [u8; 32]);
12
13// ── Hasher ────────────────────────────────────────────────────────────────────
14
15/// FNV-inspired 256-bit rolling hasher (eight 32-bit lanes).
16#[allow(dead_code)]
17pub struct AssetHasher {
18    state: [u32; 8],
19}
20
21/// FNV-1a prime (32-bit).
22const FNV_PRIME: u32 = 0x0100_0193;
23
24impl AssetHasher {
25    /// Initialise with FNV offset basis split across eight lanes.
26    #[allow(dead_code)]
27    pub fn new() -> Self {
28        // Use eight distinct constants inspired by FNV offset basis
29        Self {
30            state: [
31                0x811c_9dc5,
32                0x811c_9dc5 ^ 0x5a82_7999,
33                0x811c_9dc5 ^ 0x9e37_79b9,
34                0x811c_9dc5 ^ 0xf1bb_cdcb,
35                0x811c_9dc5 ^ 0x27c4_acdd,
36                0x811c_9dc5 ^ 0x6c62_272e,
37                0x811c_9dc5 ^ 0xa09e_667f,
38                0x811c_9dc5 ^ 0xcd3d_4f0d,
39            ],
40        }
41    }
42
43    /// Process a slice of bytes.
44    #[allow(dead_code)]
45    pub fn update(&mut self, data: &[u8]) {
46        for (i, &byte) in data.iter().enumerate() {
47            let lane = i & 7;
48            self.state[lane] ^= byte as u32;
49            self.state[lane] = self.state[lane].wrapping_mul(FNV_PRIME);
50            // Mix adjacent lanes for better diffusion
51            self.state[lane] ^= self.state[(lane + 1) & 7].rotate_right(13);
52        }
53    }
54
55    /// Produce the final 32-byte digest.
56    #[allow(dead_code)]
57    pub fn finalize(&self) -> AssetHash {
58        let mut out = [0u8; 32];
59        for (i, &word) in self.state.iter().enumerate() {
60            let bytes = word.to_le_bytes();
61            out[i * 4..(i + 1) * 4].copy_from_slice(&bytes);
62        }
63        AssetHash(out)
64    }
65}
66
67impl Default for AssetHasher {
68    fn default() -> Self {
69        Self::new()
70    }
71}
72
73// ── Convenience helpers ───────────────────────────────────────────────────────
74
75/// Hash a byte slice.
76#[allow(dead_code)]
77pub fn hash_bytes(data: &[u8]) -> AssetHash {
78    let mut h = AssetHasher::new();
79    h.update(data);
80    h.finalize()
81}
82
83/// Hash string content.
84#[allow(dead_code)]
85pub fn hash_file_content(content: &str) -> AssetHash {
86    hash_bytes(content.as_bytes())
87}
88
89// ── AssetHash helpers ─────────────────────────────────────────────────────────
90
91impl AssetHash {
92    /// Encode as a 64-character lowercase hex string.
93    #[allow(dead_code)]
94    pub fn to_hex(&self) -> String {
95        self.0.iter().map(|b| format!("{b:02x}")).collect()
96    }
97
98    /// Decode from a 64-character hex string.
99    #[allow(dead_code)]
100    pub fn from_hex(s: &str) -> Result<AssetHash, String> {
101        if s.len() != 64 {
102            return Err(format!("Expected 64 hex chars, got {}", s.len()));
103        }
104        let mut out = [0u8; 32];
105        for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
106            let hi = hex_nibble(chunk[0])?;
107            let lo = hex_nibble(chunk[1])?;
108            out[i] = (hi << 4) | lo;
109        }
110        Ok(AssetHash(out))
111    }
112}
113
114fn hex_nibble(b: u8) -> Result<u8, String> {
115    match b {
116        b'0'..=b'9' => Ok(b - b'0'),
117        b'a'..=b'f' => Ok(b - b'a' + 10),
118        b'A'..=b'F' => Ok(b - b'A' + 10),
119        _ => Err(format!("Invalid hex character: {}", b as char)),
120    }
121}
122
123// ── Registry ──────────────────────────────────────────────────────────────────
124
125/// A single record in the content-addressed registry.
126#[allow(dead_code)]
127#[derive(Debug, Clone)]
128pub struct AssetRecord {
129    pub hash: AssetHash,
130    pub path: String,
131    pub size_bytes: usize,
132    pub kind: String,
133}
134
135/// Content-addressed asset registry.
136#[allow(dead_code)]
137pub struct AssetRegistry {
138    records: Vec<AssetRecord>,
139}
140
141impl AssetRegistry {
142    /// Create an empty registry.
143    #[allow(dead_code)]
144    pub fn new() -> Self {
145        Self {
146            records: Vec::new(),
147        }
148    }
149
150    /// Register an asset by its byte content. Returns the hash.
151    #[allow(dead_code)]
152    pub fn register(&mut self, path: &str, content: &[u8], kind: &str) -> AssetHash {
153        let hash = hash_bytes(content);
154        self.records.push(AssetRecord {
155            hash: hash.clone(),
156            path: path.to_string(),
157            size_bytes: content.len(),
158            kind: kind.to_string(),
159        });
160        hash
161    }
162
163    /// Find a record by its hash.
164    #[allow(dead_code)]
165    pub fn find_by_hash(&self, hash: &AssetHash) -> Option<&AssetRecord> {
166        self.records.iter().find(|r| &r.hash == hash)
167    }
168
169    /// Find a record by path (first match).
170    #[allow(dead_code)]
171    pub fn find_by_path(&self, path: &str) -> Option<&AssetRecord> {
172        self.records.iter().find(|r| r.path == path)
173    }
174
175    /// All hashes in registration order.
176    #[allow(dead_code)]
177    pub fn all_hashes(&self) -> Vec<AssetHash> {
178        self.records.iter().map(|r| r.hash.clone()).collect()
179    }
180
181    /// Count of unique hashes (deduplicated).
182    #[allow(dead_code)]
183    pub fn dedup_count(&self) -> usize {
184        let mut seen = std::collections::HashSet::new();
185        for r in &self.records {
186            seen.insert(r.hash.clone());
187        }
188        seen.len()
189    }
190}
191
192impl Default for AssetRegistry {
193    fn default() -> Self {
194        Self::new()
195    }
196}
197
198// ── Tests ─────────────────────────────────────────────────────────────────────
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn test_hash_bytes_deterministic() {
206        let data = b"hello oxihuman";
207        let h1 = hash_bytes(data);
208        let h2 = hash_bytes(data);
209        assert_eq!(h1, h2);
210    }
211
212    #[test]
213    fn test_hash_bytes_different_inputs_differ() {
214        let h1 = hash_bytes(b"abc");
215        let h2 = hash_bytes(b"xyz");
216        assert_ne!(h1, h2);
217    }
218
219    #[test]
220    fn test_to_hex_length_64() {
221        let h = hash_bytes(b"test");
222        assert_eq!(h.to_hex().len(), 64);
223    }
224
225    #[test]
226    fn test_to_hex_lowercase() {
227        let h = hash_bytes(b"test");
228        let hex = h.to_hex();
229        assert!(hex.chars().all(|c| c.is_ascii_digit() || c.is_lowercase()));
230    }
231
232    #[test]
233    fn test_from_hex_roundtrip() {
234        let h = hash_bytes(b"roundtrip test");
235        let hex = h.to_hex();
236        let h2 = AssetHash::from_hex(&hex).expect("should succeed");
237        assert_eq!(h, h2);
238    }
239
240    #[test]
241    fn test_from_hex_bad_length_error() {
242        assert!(AssetHash::from_hex("abc").is_err());
243    }
244
245    #[test]
246    fn test_from_hex_invalid_char_error() {
247        let bad = "z".repeat(64);
248        assert!(AssetHash::from_hex(&bad).is_err());
249    }
250
251    #[test]
252    fn test_register_and_find_by_hash() {
253        let mut reg = AssetRegistry::new();
254        let h = reg.register("mesh/body.obj", b"obj data", "mesh");
255        let found = reg.find_by_hash(&h);
256        assert!(found.is_some());
257        assert_eq!(found.expect("should succeed").path, "mesh/body.obj");
258    }
259
260    #[test]
261    fn test_find_by_path() {
262        let mut reg = AssetRegistry::new();
263        reg.register("tex/skin.png", b"png data", "texture");
264        let found = reg.find_by_path("tex/skin.png");
265        assert!(found.is_some());
266        assert_eq!(found.expect("should succeed").kind, "texture");
267    }
268
269    #[test]
270    fn test_find_by_hash_missing_returns_none() {
271        let reg = AssetRegistry::new();
272        let h = hash_bytes(b"ghost");
273        assert!(reg.find_by_hash(&h).is_none());
274    }
275
276    #[test]
277    fn test_dedup_count_same_content() {
278        let mut reg = AssetRegistry::new();
279        reg.register("a.obj", b"same", "mesh");
280        reg.register("b.obj", b"same", "mesh");
281        assert_eq!(reg.dedup_count(), 1);
282    }
283
284    #[test]
285    fn test_dedup_count_different_content() {
286        let mut reg = AssetRegistry::new();
287        reg.register("a.obj", b"aaa", "mesh");
288        reg.register("b.obj", b"bbb", "mesh");
289        assert_eq!(reg.dedup_count(), 2);
290    }
291
292    #[test]
293    fn test_empty_registry_dedup_zero() {
294        let reg = AssetRegistry::new();
295        assert_eq!(reg.dedup_count(), 0);
296    }
297
298    #[test]
299    fn test_all_hashes_length() {
300        let mut reg = AssetRegistry::new();
301        reg.register("a", b"1", "t");
302        reg.register("b", b"2", "t");
303        assert_eq!(reg.all_hashes().len(), 2);
304    }
305
306    #[test]
307    fn test_hash_file_content_same_as_bytes() {
308        let s = "hello world";
309        assert_eq!(hash_file_content(s), hash_bytes(s.as_bytes()));
310    }
311}