recoco_utils/
fingerprint.rs

1// SPDX-FileCopyrightText: 2026 CocoIndex (upstream)
2// SPDX-FileCopyrightText: 2026 Knitli Inc. (ReCoco)
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4// SPDX-FileContributor: CocoIndex Contributors
5//
6// SPDX-License-Identifier: Apache-2.0
7
8use crate::{
9    client_bail,
10    error::{Error, Result},
11};
12use base64::prelude::*;
13use serde::Deserialize;
14use serde::ser::{
15    Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple,
16    SerializeTupleStruct, SerializeTupleVariant, Serializer,
17};
18
19#[derive(Debug)]
20pub struct FingerprinterError {
21    msg: String,
22}
23
24impl std::fmt::Display for FingerprinterError {
25    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26        write!(f, "FingerprinterError: {}", self.msg)
27    }
28}
29impl std::error::Error for FingerprinterError {}
30impl serde::ser::Error for FingerprinterError {
31    fn custom<T>(msg: T) -> Self
32    where
33        T: std::fmt::Display,
34    {
35        FingerprinterError {
36            msg: format!("{msg}"),
37        }
38    }
39}
40
41#[derive(Clone, Copy, PartialEq, Eq)]
42pub struct Fingerprint(pub [u8; 16]);
43
44impl Fingerprint {
45    #[inline(always)]
46    pub fn to_base64(self) -> String {
47        BASE64_STANDARD.encode(self.0)
48    }
49
50    #[inline(always)]
51    pub fn from_base64(s: &str) -> Result<Self> {
52        let bytes = match s.len() {
53            24 => BASE64_STANDARD.decode(s)?,
54
55            // For backward compatibility. Some old version (<= v0.1.2) is using hex encoding.
56            32 => hex::decode(s)?,
57            _ => client_bail!("Encoded fingerprint length is unexpected: {}", s.len()),
58        };
59        let bytes: [u8; 16] = bytes.try_into().map_err(|e: Vec<u8>| {
60            Error::client(format!(
61                "Fingerprint bytes length is unexpected: {}",
62                e.len()
63            ))
64        })?;
65        Ok(Fingerprint(bytes))
66    }
67
68    #[inline(always)]
69    pub fn as_slice(&self) -> &[u8] {
70        &self.0
71    }
72}
73
74impl std::fmt::Display for Fingerprint {
75    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76        write!(f, "#")?;
77        for byte in self.0.iter() {
78            write!(f, "{:02x}", byte)?;
79        }
80        Ok(())
81    }
82}
83
84impl std::fmt::Debug for Fingerprint {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        write!(f, "{}", self)
87    }
88}
89
90impl AsRef<[u8]> for Fingerprint {
91    fn as_ref(&self) -> &[u8] {
92        &self.0
93    }
94}
95
96impl std::hash::Hash for Fingerprint {
97    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
98        // Fingerprint is already evenly distributed, so we can just use the first few bytes.
99        const N: usize = size_of::<usize>();
100        state.write(&self.0[..N]);
101    }
102}
103
104impl Serialize for Fingerprint {
105    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
106    where
107        S: serde::Serializer,
108    {
109        serializer.serialize_str(&self.to_base64())
110    }
111}
112
113impl<'de> Deserialize<'de> for Fingerprint {
114    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
115    where
116        D: serde::Deserializer<'de>,
117    {
118        let s = String::deserialize(deserializer)?;
119        Self::from_base64(&s).map_err(serde::de::Error::custom)
120    }
121}
122#[derive(Clone, Default)]
123pub struct Fingerprinter {
124    hasher: blake3::Hasher,
125}
126
127impl Fingerprinter {
128    #[inline(always)]
129    pub fn into_fingerprint(self) -> Fingerprint {
130        let mut output = [0u8; 16];
131        self.hasher.finalize_xof().fill(&mut output);
132        Fingerprint(output)
133    }
134
135    #[inline(always)]
136    pub fn with<S: Serialize + ?Sized>(
137        self,
138        value: &S,
139    ) -> std::result::Result<Self, FingerprinterError> {
140        let mut fingerprinter = self;
141        value.serialize(&mut fingerprinter)?;
142        Ok(fingerprinter)
143    }
144
145    #[inline(always)]
146    pub fn write<S: Serialize + ?Sized>(
147        &mut self,
148        value: &S,
149    ) -> std::result::Result<(), FingerprinterError> {
150        value.serialize(self)
151    }
152
153    #[inline(always)]
154    pub fn write_raw_bytes(&mut self, bytes: &[u8]) {
155        self.hasher.update(bytes);
156    }
157
158    #[inline(always)]
159    fn write_type_tag(&mut self, tag: &str) {
160        self.hasher.update(tag.as_bytes());
161        self.hasher.update(b";");
162    }
163
164    #[inline(always)]
165    fn write_end_tag(&mut self) {
166        self.hasher.update(b".");
167    }
168
169    #[inline(always)]
170    fn write_varlen_bytes(&mut self, bytes: &[u8]) {
171        self.write_usize(bytes.len());
172        self.hasher.update(bytes);
173    }
174
175    #[inline(always)]
176    fn write_usize(&mut self, value: usize) {
177        self.hasher.update(&(value as u32).to_le_bytes());
178    }
179}
180
181impl Serializer for &mut Fingerprinter {
182    type Ok = ();
183    type Error = FingerprinterError;
184
185    type SerializeSeq = Self;
186    type SerializeTuple = Self;
187    type SerializeTupleStruct = Self;
188    type SerializeTupleVariant = Self;
189    type SerializeMap = Self;
190    type SerializeStruct = Self;
191    type SerializeStructVariant = Self;
192
193    fn serialize_bool(self, v: bool) -> std::result::Result<(), Self::Error> {
194        self.write_type_tag(if v { "t" } else { "f" });
195        Ok(())
196    }
197
198    fn serialize_i8(self, v: i8) -> std::result::Result<(), Self::Error> {
199        self.write_type_tag("i1");
200        self.hasher.update(&v.to_le_bytes());
201        Ok(())
202    }
203
204    fn serialize_i16(self, v: i16) -> std::result::Result<(), Self::Error> {
205        self.write_type_tag("i2");
206        self.hasher.update(&v.to_le_bytes());
207        Ok(())
208    }
209
210    fn serialize_i32(self, v: i32) -> std::result::Result<(), Self::Error> {
211        self.write_type_tag("i4");
212        self.hasher.update(&v.to_le_bytes());
213        Ok(())
214    }
215
216    fn serialize_i64(self, v: i64) -> std::result::Result<(), Self::Error> {
217        self.write_type_tag("i8");
218        self.hasher.update(&v.to_le_bytes());
219        Ok(())
220    }
221
222    fn serialize_u8(self, v: u8) -> std::result::Result<(), Self::Error> {
223        self.write_type_tag("u1");
224        self.hasher.update(&v.to_le_bytes());
225        Ok(())
226    }
227
228    fn serialize_u16(self, v: u16) -> std::result::Result<(), Self::Error> {
229        self.write_type_tag("u2");
230        self.hasher.update(&v.to_le_bytes());
231        Ok(())
232    }
233
234    fn serialize_u32(self, v: u32) -> std::result::Result<(), Self::Error> {
235        self.write_type_tag("u4");
236        self.hasher.update(&v.to_le_bytes());
237        Ok(())
238    }
239
240    fn serialize_u64(self, v: u64) -> std::result::Result<(), Self::Error> {
241        self.write_type_tag("u8");
242        self.hasher.update(&v.to_le_bytes());
243        Ok(())
244    }
245
246    fn serialize_f32(self, v: f32) -> std::result::Result<(), Self::Error> {
247        self.write_type_tag("f4");
248        self.hasher.update(&v.to_le_bytes());
249        Ok(())
250    }
251
252    fn serialize_f64(self, v: f64) -> std::result::Result<(), Self::Error> {
253        self.write_type_tag("f8");
254        self.hasher.update(&v.to_le_bytes());
255        Ok(())
256    }
257
258    fn serialize_char(self, v: char) -> std::result::Result<(), Self::Error> {
259        self.write_type_tag("c");
260        self.write_usize(v as usize);
261        Ok(())
262    }
263
264    fn serialize_str(self, v: &str) -> std::result::Result<(), Self::Error> {
265        self.write_type_tag("s");
266        self.write_varlen_bytes(v.as_bytes());
267        Ok(())
268    }
269
270    fn serialize_bytes(self, v: &[u8]) -> std::result::Result<(), Self::Error> {
271        self.write_type_tag("b");
272        self.write_varlen_bytes(v);
273        Ok(())
274    }
275
276    fn serialize_none(self) -> std::result::Result<(), Self::Error> {
277        self.write_type_tag("");
278        Ok(())
279    }
280
281    fn serialize_some<T>(self, value: &T) -> std::result::Result<(), Self::Error>
282    where
283        T: ?Sized + Serialize,
284    {
285        value.serialize(self)
286    }
287
288    fn serialize_unit(self) -> std::result::Result<(), Self::Error> {
289        self.write_type_tag("()");
290        Ok(())
291    }
292
293    fn serialize_unit_struct(self, name: &'static str) -> std::result::Result<(), Self::Error> {
294        self.write_type_tag("US");
295        self.write_varlen_bytes(name.as_bytes());
296        Ok(())
297    }
298
299    fn serialize_unit_variant(
300        self,
301        name: &'static str,
302        _variant_index: u32,
303        variant: &'static str,
304    ) -> std::result::Result<(), Self::Error> {
305        self.write_type_tag("UV");
306        self.write_varlen_bytes(name.as_bytes());
307        self.write_varlen_bytes(variant.as_bytes());
308        Ok(())
309    }
310
311    fn serialize_newtype_struct<T>(
312        self,
313        name: &'static str,
314        value: &T,
315    ) -> std::result::Result<(), Self::Error>
316    where
317        T: ?Sized + Serialize,
318    {
319        self.write_type_tag("NS");
320        self.write_varlen_bytes(name.as_bytes());
321        value.serialize(self)
322    }
323
324    fn serialize_newtype_variant<T>(
325        self,
326        name: &'static str,
327        _variant_index: u32,
328        variant: &'static str,
329        value: &T,
330    ) -> std::result::Result<(), Self::Error>
331    where
332        T: ?Sized + Serialize,
333    {
334        self.write_type_tag("NV");
335        self.write_varlen_bytes(name.as_bytes());
336        self.write_varlen_bytes(variant.as_bytes());
337        value.serialize(self)
338    }
339
340    fn serialize_seq(
341        self,
342        _len: Option<usize>,
343    ) -> std::result::Result<Self::SerializeSeq, Self::Error> {
344        self.write_type_tag("L");
345        Ok(self)
346    }
347
348    fn serialize_tuple(
349        self,
350        _len: usize,
351    ) -> std::result::Result<Self::SerializeTuple, Self::Error> {
352        self.write_type_tag("T");
353        Ok(self)
354    }
355
356    fn serialize_tuple_struct(
357        self,
358        name: &'static str,
359        _len: usize,
360    ) -> std::result::Result<Self::SerializeTupleStruct, Self::Error> {
361        self.write_type_tag("TS");
362        self.write_varlen_bytes(name.as_bytes());
363        Ok(self)
364    }
365
366    fn serialize_tuple_variant(
367        self,
368        name: &'static str,
369        _variant_index: u32,
370        variant: &'static str,
371        _len: usize,
372    ) -> std::result::Result<Self::SerializeTupleVariant, Self::Error> {
373        self.write_type_tag("TV");
374        self.write_varlen_bytes(name.as_bytes());
375        self.write_varlen_bytes(variant.as_bytes());
376        Ok(self)
377    }
378
379    fn serialize_map(
380        self,
381        _len: Option<usize>,
382    ) -> std::result::Result<Self::SerializeMap, Self::Error> {
383        self.write_type_tag("M");
384        Ok(self)
385    }
386
387    fn serialize_struct(
388        self,
389        name: &'static str,
390        _len: usize,
391    ) -> std::result::Result<Self::SerializeStruct, Self::Error> {
392        self.write_type_tag("S");
393        self.write_varlen_bytes(name.as_bytes());
394        Ok(self)
395    }
396
397    fn serialize_struct_variant(
398        self,
399        name: &'static str,
400        _variant_index: u32,
401        variant: &'static str,
402        _len: usize,
403    ) -> std::result::Result<Self::SerializeStructVariant, Self::Error> {
404        self.write_type_tag("SV");
405        self.write_varlen_bytes(name.as_bytes());
406        self.write_varlen_bytes(variant.as_bytes());
407        Ok(self)
408    }
409}
410
411impl SerializeSeq for &mut Fingerprinter {
412    type Ok = ();
413    type Error = FingerprinterError;
414
415    fn serialize_element<T>(&mut self, value: &T) -> std::result::Result<(), Self::Error>
416    where
417        T: ?Sized + Serialize,
418    {
419        value.serialize(&mut **self)
420    }
421
422    fn end(self) -> std::result::Result<(), Self::Error> {
423        self.write_end_tag();
424        Ok(())
425    }
426}
427
428impl SerializeTuple for &mut Fingerprinter {
429    type Ok = ();
430    type Error = FingerprinterError;
431
432    fn serialize_element<T>(&mut self, value: &T) -> std::result::Result<(), Self::Error>
433    where
434        T: ?Sized + Serialize,
435    {
436        value.serialize(&mut **self)
437    }
438
439    fn end(self) -> std::result::Result<(), Self::Error> {
440        self.write_end_tag();
441        Ok(())
442    }
443}
444
445impl SerializeTupleStruct for &mut Fingerprinter {
446    type Ok = ();
447    type Error = FingerprinterError;
448
449    fn serialize_field<T>(&mut self, value: &T) -> std::result::Result<(), Self::Error>
450    where
451        T: ?Sized + Serialize,
452    {
453        value.serialize(&mut **self)
454    }
455
456    fn end(self) -> std::result::Result<(), Self::Error> {
457        self.write_end_tag();
458        Ok(())
459    }
460}
461
462impl SerializeTupleVariant for &mut Fingerprinter {
463    type Ok = ();
464    type Error = FingerprinterError;
465
466    fn serialize_field<T>(&mut self, value: &T) -> std::result::Result<(), Self::Error>
467    where
468        T: ?Sized + Serialize,
469    {
470        value.serialize(&mut **self)
471    }
472
473    fn end(self) -> std::result::Result<(), Self::Error> {
474        self.write_end_tag();
475        Ok(())
476    }
477}
478
479impl SerializeMap for &mut Fingerprinter {
480    type Ok = ();
481    type Error = FingerprinterError;
482
483    fn serialize_key<T>(&mut self, key: &T) -> std::result::Result<(), Self::Error>
484    where
485        T: ?Sized + Serialize,
486    {
487        key.serialize(&mut **self)
488    }
489
490    fn serialize_value<T>(&mut self, value: &T) -> std::result::Result<(), Self::Error>
491    where
492        T: ?Sized + Serialize,
493    {
494        value.serialize(&mut **self)
495    }
496
497    fn end(self) -> std::result::Result<(), Self::Error> {
498        self.write_end_tag();
499        Ok(())
500    }
501}
502
503impl SerializeStruct for &mut Fingerprinter {
504    type Ok = ();
505    type Error = FingerprinterError;
506
507    fn serialize_field<T>(
508        &mut self,
509        key: &'static str,
510        value: &T,
511    ) -> std::result::Result<(), Self::Error>
512    where
513        T: ?Sized + Serialize,
514    {
515        self.hasher.update(key.as_bytes());
516        self.hasher.update(b"\n");
517        value.serialize(&mut **self)
518    }
519
520    fn end(self) -> std::result::Result<(), Self::Error> {
521        self.write_end_tag();
522        Ok(())
523    }
524}
525
526impl SerializeStructVariant for &mut Fingerprinter {
527    type Ok = ();
528    type Error = FingerprinterError;
529
530    fn serialize_field<T>(
531        &mut self,
532        key: &'static str,
533        value: &T,
534    ) -> std::result::Result<(), Self::Error>
535    where
536        T: ?Sized + Serialize,
537    {
538        self.hasher.update(key.as_bytes());
539        self.hasher.update(b"\n");
540        value.serialize(&mut **self)
541    }
542
543    fn end(self) -> std::result::Result<(), Self::Error> {
544        self.write_end_tag();
545        Ok(())
546    }
547}