1#![allow(dead_code)]
10
11#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17pub struct ContentId(pub String);
18
19impl ContentId {
20 #[must_use]
24 pub fn generate(data: &[u8]) -> Self {
25 let hash = fnv128(data);
26 let s = format!("{:016x}{:016x}", hash.0, hash.1);
28 Self(s)
29 }
30
31 #[must_use]
33 pub fn as_str(&self) -> &str {
34 &self.0
35 }
36}
37
38impl std::fmt::Display for ContentId {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 write!(f, "{}", self.0)
41 }
42}
43
44fn fnv128(data: &[u8]) -> (u64, u64) {
46 let mut h_lo: u64 = 0x62b8_2175_6295_c58d;
49 let mut h_hi: u64 = 0x6c62_272e_07bb_0142;
50
51 for &byte in data {
52 h_lo ^= u64::from(byte);
54
55 let prime_lo: u64 = 0x0000_0000_0000_013b;
59 let prime_hi: u64 = 0x0000_0001_0000_0000;
60
61 let new_lo = h_lo.wrapping_mul(prime_lo);
62 let carry = h_lo
63 .wrapping_mul(prime_hi)
64 .wrapping_add(h_hi.wrapping_mul(prime_lo));
65
66 h_lo = new_lo;
67 h_hi = carry;
68 }
69
70 (h_hi, h_lo)
71}
72
73#[derive(Debug, Clone)]
79pub struct ContentFingerprint {
80 pub id: ContentId,
82 pub audio_fingerprint: Option<Vec<u32>>,
84 pub visual_fingerprint: Option<Vec<u64>>,
86 pub metadata_hash: u64,
88}
89
90impl ContentFingerprint {
91 #[must_use]
93 pub fn new(
94 id: ContentId,
95 audio_fingerprint: Option<Vec<u32>>,
96 visual_fingerprint: Option<Vec<u64>>,
97 metadata_hash: u64,
98 ) -> Self {
99 Self {
100 id,
101 audio_fingerprint,
102 visual_fingerprint,
103 metadata_hash,
104 }
105 }
106}
107
108pub struct ContentIdRegistry {
114 fingerprints: Vec<ContentFingerprint>,
116 stats: ContentIdStats,
118}
119
120impl ContentIdRegistry {
121 #[must_use]
123 pub fn new() -> Self {
124 Self {
125 fingerprints: Vec::new(),
126 stats: ContentIdStats::default(),
127 }
128 }
129
130 pub fn register(&mut self, fingerprint: ContentFingerprint) {
134 let is_duplicate = self.fingerprints.iter().any(|fp| fp.id == fingerprint.id);
135
136 if is_duplicate {
137 self.stats.duplicates_found += 1;
138 } else {
139 self.stats.total_registered += 1;
140 self.fingerprints.push(fingerprint);
141 }
142 }
143
144 #[must_use]
146 pub fn lookup(&self, id: &ContentId) -> Option<&ContentFingerprint> {
147 self.fingerprints.iter().find(|fp| &fp.id == id)
148 }
149
150 #[must_use]
154 pub fn find_by_audio(&self, query: &[u32], min_match: f32) -> Vec<(ContentId, f32)> {
155 self.fingerprints
156 .iter()
157 .filter_map(|fp| {
158 fp.audio_fingerprint.as_ref().map(|audio| {
159 let sim = audio_code_similarity(query, audio);
160 (fp.id.clone(), sim)
161 })
162 })
163 .filter(|(_, sim)| *sim >= min_match)
164 .collect()
165 }
166
167 #[must_use]
171 pub fn find_by_visual(&self, query: &[u64], min_match: f32) -> Vec<(ContentId, f32)> {
172 self.fingerprints
173 .iter()
174 .filter_map(|fp| {
175 fp.visual_fingerprint.as_ref().map(|visual| {
176 let sim = visual_hash_similarity(query, visual);
177 (fp.id.clone(), sim)
178 })
179 })
180 .filter(|(_, sim)| *sim >= min_match)
181 .collect()
182 }
183
184 #[must_use]
186 pub fn stats(&self) -> &ContentIdStats {
187 &self.stats
188 }
189
190 #[must_use]
192 pub fn len(&self) -> usize {
193 self.fingerprints.len()
194 }
195
196 #[must_use]
198 pub fn is_empty(&self) -> bool {
199 self.fingerprints.is_empty()
200 }
201}
202
203impl Default for ContentIdRegistry {
204 fn default() -> Self {
205 Self::new()
206 }
207}
208
209fn audio_code_similarity(query: &[u32], candidate: &[u32]) -> f32 {
213 let denom = query.len().max(candidate.len());
214 if denom == 0 {
215 return 1.0;
216 }
217
218 let matches = query.iter().filter(|code| candidate.contains(code)).count();
219
220 matches as f32 / denom as f32
221}
222
223fn visual_hash_similarity(query: &[u64], candidate: &[u64]) -> f32 {
225 let intersection = query.iter().filter(|h| candidate.contains(h)).count();
226 let union = query.len() + candidate.len() - intersection;
227 if union == 0 {
228 return 1.0;
229 }
230 intersection as f32 / union as f32
231}
232
233#[derive(Debug, Clone, Default)]
239pub struct ContentIdStats {
240 pub total_registered: u64,
242 pub duplicates_found: u64,
244 pub storage_saved_bytes: u64,
246}
247
248#[cfg(test)]
253mod tests {
254 use super::*;
255
256 #[test]
259 fn test_content_id_generate_length() {
260 let id = ContentId::generate(b"Hello, World!");
261 assert_eq!(id.0.len(), 32);
262 }
263
264 #[test]
265 fn test_content_id_generate_hex_chars() {
266 let id = ContentId::generate(b"test data");
267 assert!(id.0.chars().all(|c| c.is_ascii_hexdigit()));
268 }
269
270 #[test]
271 fn test_content_id_deterministic() {
272 let id1 = ContentId::generate(b"same input");
273 let id2 = ContentId::generate(b"same input");
274 assert_eq!(id1, id2);
275 }
276
277 #[test]
278 fn test_content_id_different_inputs() {
279 let id1 = ContentId::generate(b"input A");
280 let id2 = ContentId::generate(b"input B");
281 assert_ne!(id1, id2);
282 }
283
284 #[test]
285 fn test_content_id_empty_data() {
286 let id = ContentId::generate(b"");
287 assert_eq!(id.0.len(), 32);
288 }
289
290 #[test]
291 fn test_content_id_display() {
292 let id = ContentId::generate(b"display test");
293 let s = format!("{id}");
294 assert_eq!(s.len(), 32);
295 }
296
297 fn make_fp(data: &[u8]) -> ContentFingerprint {
300 ContentFingerprint::new(
301 ContentId::generate(data),
302 Some(vec![1u32, 2, 3, 4]),
303 Some(vec![10u64, 20, 30]),
304 0xDEAD_BEEF,
305 )
306 }
307
308 #[test]
309 fn test_registry_empty() {
310 let registry = ContentIdRegistry::new();
311 assert!(registry.is_empty());
312 assert_eq!(registry.len(), 0);
313 }
314
315 #[test]
316 fn test_registry_register_and_lookup() {
317 let mut registry = ContentIdRegistry::new();
318 let fp = make_fp(b"video1.mp4");
319 let id = fp.id.clone();
320 registry.register(fp);
321
322 let found = registry.lookup(&id);
323 assert!(found.is_some());
324 assert_eq!(found.expect("operation should succeed").id, id);
325 }
326
327 #[test]
328 fn test_registry_duplicate_not_stored() {
329 let mut registry = ContentIdRegistry::new();
330 let fp1 = make_fp(b"video1.mp4");
331 let fp2 = make_fp(b"video1.mp4"); registry.register(fp1);
333 registry.register(fp2);
334
335 assert_eq!(registry.len(), 1);
336 assert_eq!(registry.stats().duplicates_found, 1);
337 }
338
339 #[test]
340 fn test_registry_find_by_audio_match() {
341 let mut registry = ContentIdRegistry::new();
342 let fp = ContentFingerprint::new(
343 ContentId::generate(b"audio test"),
344 Some(vec![1u32, 2, 3, 4, 5]),
345 None,
346 0,
347 );
348 registry.register(fp);
349
350 let results = registry.find_by_audio(&[1, 2, 3, 4, 5], 0.9);
352 assert_eq!(results.len(), 1);
353 assert_eq!(results[0].1, 1.0);
354 }
355
356 #[test]
357 fn test_registry_find_by_audio_no_match() {
358 let mut registry = ContentIdRegistry::new();
359 let fp = ContentFingerprint::new(
360 ContentId::generate(b"audio test"),
361 Some(vec![100u32, 200, 300]),
362 None,
363 0,
364 );
365 registry.register(fp);
366
367 let results = registry.find_by_audio(&[1, 2, 3], 0.5);
369 assert!(results.is_empty());
370 }
371
372 #[test]
373 fn test_registry_find_by_visual_match() {
374 let mut registry = ContentIdRegistry::new();
375 let fp = ContentFingerprint::new(
376 ContentId::generate(b"visual test"),
377 None,
378 Some(vec![10u64, 20, 30, 40]),
379 0,
380 );
381 registry.register(fp);
382
383 let results = registry.find_by_visual(&[10, 20, 30, 40], 0.9);
384 assert_eq!(results.len(), 1);
385 assert_eq!(results[0].1, 1.0);
386 }
387
388 #[test]
389 fn test_registry_stats_initial() {
390 let registry = ContentIdRegistry::new();
391 assert_eq!(registry.stats().total_registered, 0);
392 assert_eq!(registry.stats().duplicates_found, 0);
393 }
394
395 #[test]
396 fn test_registry_multiple_unique() {
397 let mut registry = ContentIdRegistry::new();
398 for i in 0u8..5 {
399 registry.register(make_fp(&[i]));
400 }
401 assert_eq!(registry.len(), 5);
402 assert_eq!(registry.stats().total_registered, 5);
403 assert_eq!(registry.stats().duplicates_found, 0);
404 }
405
406 #[test]
407 fn test_audio_code_similarity_empty() {
408 let sim = audio_code_similarity(&[], &[]);
409 assert_eq!(sim, 1.0);
410 }
411
412 #[test]
413 fn test_audio_code_similarity_disjoint() {
414 let sim = audio_code_similarity(&[1, 2, 3], &[4, 5, 6]);
415 assert_eq!(sim, 0.0);
416 }
417}