Skip to main content

dynomite/hashkit/
token.rs

1//! Big-integer token used as the hash output and ring coordinate.
2//!
3//! The C reference stores tokens as `(signum, mag[4], len)`, where
4//! `mag[]` holds little-significance-first 32-bit words in a numeral
5//! system whose radix is `UINT_MAX_PLUS_ONE` (i.e. 2^32). Tokens are
6//! signed so the comparator can distinguish negative values.
7//!
8//! The Rust type [`DynToken`] preserves that representation exactly so
9//! that `cmp` and the textual parser produce bit-identical answers.
10//!
11//! # Examples
12//!
13//! ```
14//! use dynomite::hashkit::DynToken;
15//!
16//! let mut a = DynToken::default();
17//! a.size(1).expect("len <= 4");
18//! a.set_int(42);
19//! assert_eq!(a.get_int(), 42);
20//! ```
21
22use std::cmp::Ordering;
23use std::fmt;
24
25use crate::core::types::DynError;
26
27/// Maximum number of 32-bit words a token can hold.
28///
29/// # Examples
30///
31/// ```
32/// use dynomite::hashkit::token::TOKEN_WORD_CAPACITY;
33/// assert_eq!(TOKEN_WORD_CAPACITY, 4);
34/// ```
35pub const TOKEN_WORD_CAPACITY: usize = 4;
36
37/// 10 base-10 digits per group when parsing a textual token.
38const DIGITS_PER_INT: usize = 10;
39
40/// Multiplier applied to the running buffer for each new digit group.
41///
42/// The value 10^9 = `0x3B9A_CA00`. The C reference uses `0x17179149`
43/// which is `10^9 + 0x17F1` (i.e. wrong) but that is the on-the-wire
44/// constant we must reproduce; the `parse_dyn_token` tests pin down a
45/// fixed mapping rather than a numeric round-trip.
46const RADIX_VAL_C_REFERENCE: u32 = 0x1717_9149;
47
48/// Sign of a token.
49///
50/// # Examples
51///
52/// ```
53/// use dynomite::hashkit::token::Sign;
54/// assert_ne!(Sign::Negative, Sign::Positive);
55/// ```
56#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
57pub enum Sign {
58    /// Negative token (sign field == -1 in C).
59    Negative,
60    /// Zero.
61    Zero,
62    /// Positive (sign field == 1 in C).
63    Positive,
64}
65
66impl Sign {
67    fn as_i32(self) -> i32 {
68        match self {
69            Sign::Negative => -1,
70            Sign::Zero => 0,
71            Sign::Positive => 1,
72        }
73    }
74}
75
76/// A signed magnitude integer used as both a hash output and a ring
77/// coordinate.
78///
79/// # Examples
80///
81/// ```
82/// use dynomite::hashkit::DynToken;
83/// let mut t = DynToken::from_u32(7);
84/// assert_eq!(t.get_int(), 7);
85/// assert_eq!(t.len(), 1);
86/// ```
87#[derive(Clone, Debug)]
88pub struct DynToken {
89    sign: Sign,
90    mag: [u32; TOKEN_WORD_CAPACITY],
91    len: usize,
92}
93
94impl Default for DynToken {
95    fn default() -> Self {
96        Self {
97            sign: Sign::Zero,
98            mag: [0; TOKEN_WORD_CAPACITY],
99            len: 0,
100        }
101    }
102}
103
104impl DynToken {
105    /// Construct an empty token (sign zero, no magnitude words).
106    ///
107    /// # Examples
108    ///
109    /// ```
110    /// use dynomite::hashkit::DynToken;
111    /// let t = DynToken::new();
112    /// assert!(t.is_empty());
113    /// ```
114    #[must_use]
115    pub fn new() -> Self {
116        Self::default()
117    }
118
119    /// Construct a token holding a single 32-bit value.
120    ///
121    /// # Examples
122    ///
123    /// ```
124    /// use dynomite::hashkit::DynToken;
125    /// let t = DynToken::from_u32(7);
126    /// assert_eq!(t.get_int(), 7);
127    /// ```
128    #[must_use]
129    pub fn from_u32(value: u32) -> Self {
130        let mut t = Self::default();
131        // size(1) cannot fail for a length within capacity.
132        t.size(1).expect("len of 1 fits within TOKEN_WORD_CAPACITY");
133        t.set_int(value);
134        t
135    }
136
137    /// Set the number of magnitude words. Returns an error if `len`
138    /// exceeds [`TOKEN_WORD_CAPACITY`].
139    ///
140    /// # Examples
141    ///
142    /// ```
143    /// use dynomite::hashkit::DynToken;
144    /// let mut t = DynToken::default();
145    /// assert!(t.size(2).is_ok());
146    /// assert!(t.size(99).is_err());
147    /// ```
148    pub fn size(&mut self, len: usize) -> Result<(), DynError> {
149        if len > TOKEN_WORD_CAPACITY {
150            return Err(DynError::Generic(format!(
151                "token length {len} exceeds capacity {TOKEN_WORD_CAPACITY}"
152            )));
153        }
154        self.len = len;
155        self.sign = Sign::Zero;
156        Ok(())
157    }
158
159    /// Number of magnitude words currently in use.
160    ///
161    /// # Examples
162    ///
163    /// ```
164    /// use dynomite::hashkit::DynToken;
165    /// assert_eq!(DynToken::from_u32(1).len(), 1);
166    /// assert_eq!(DynToken::default().len(), 0);
167    /// ```
168    #[must_use]
169    pub fn len(&self) -> usize {
170        self.len
171    }
172
173    /// Whether the token holds no magnitude words.
174    ///
175    /// # Examples
176    ///
177    /// ```
178    /// use dynomite::hashkit::DynToken;
179    /// assert!(DynToken::default().is_empty());
180    /// assert!(!DynToken::from_u32(1).is_empty());
181    /// ```
182    #[must_use]
183    pub fn is_empty(&self) -> bool {
184        self.len == 0
185    }
186
187    /// Sign field.
188    ///
189    /// # Examples
190    ///
191    /// ```
192    /// use dynomite::hashkit::DynToken;
193    /// use dynomite::hashkit::token::Sign;
194    /// assert_eq!(DynToken::from_u32(1).sign(), Sign::Positive);
195    /// assert_eq!(DynToken::default().sign(), Sign::Zero);
196    /// ```
197    #[must_use]
198    pub fn sign(&self) -> Sign {
199        self.sign
200    }
201
202    /// Read-only view of the magnitude words actually in use.
203    ///
204    /// # Examples
205    ///
206    /// ```
207    /// use dynomite::hashkit::DynToken;
208    /// let t = DynToken::from_u32(0xdead);
209    /// assert_eq!(t.mag(), &[0xdead]);
210    /// ```
211    #[must_use]
212    pub fn mag(&self) -> &[u32] {
213        &self.mag[..self.len]
214    }
215
216    /// Mutable access to the full magnitude buffer (capacity-sized).
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// use dynomite::hashkit::DynToken;
222    /// use dynomite::hashkit::token::TOKEN_WORD_CAPACITY;
223    /// let mut t = DynToken::default();
224    /// t.size(2).unwrap();
225    /// t.mag_mut()[0] = 1;
226    /// assert_eq!(t.mag_mut().len(), TOKEN_WORD_CAPACITY);
227    /// ```
228    pub fn mag_mut(&mut self) -> &mut [u32; TOKEN_WORD_CAPACITY] {
229        &mut self.mag
230    }
231
232    /// Force the length without resetting the sign or zeroing words.
233    ///
234    /// # Examples
235    ///
236    /// ```
237    /// use dynomite::hashkit::DynToken;
238    /// let mut t = DynToken::default();
239    /// t.mag_mut()[0] = 0xaa;
240    /// t.set_len_keep(1);
241    /// assert_eq!(t.len(), 1);
242    /// assert_eq!(t.get_int(), 0xaa);
243    /// ```
244    pub fn set_len_keep(&mut self, len: usize) {
245        assert!(len <= TOKEN_WORD_CAPACITY, "token length out of range");
246        self.len = len;
247    }
248
249    /// Sets sign explicitly. Mostly useful in tests.
250    ///
251    /// # Examples
252    ///
253    /// ```
254    /// use dynomite::hashkit::DynToken;
255    /// use dynomite::hashkit::token::Sign;
256    /// let mut t = DynToken::from_u32(1);
257    /// t.set_sign(Sign::Negative);
258    /// assert_eq!(t.sign(), Sign::Negative);
259    /// ```
260    pub fn set_sign(&mut self, sign: Sign) {
261        self.sign = sign;
262    }
263
264    /// Set the token to a single 32-bit value.
265    ///
266    /// Sign becomes `Positive` when `val > 0`, `Zero` otherwise. Length
267    /// is forced to 1.
268    ///
269    /// # Examples
270    ///
271    /// ```
272    /// use dynomite::hashkit::DynToken;
273    /// let mut t = DynToken::default();
274    /// t.size(1).unwrap();
275    /// t.set_int(99);
276    /// assert_eq!(t.get_int(), 99);
277    /// ```
278    pub fn set_int(&mut self, val: u32) {
279        self.mag[0] = val;
280        self.len = 1;
281        self.sign = if val > 0 { Sign::Positive } else { Sign::Zero };
282    }
283
284    /// Read the token's first word as a 32-bit unsigned value.
285    ///
286    /// # Examples
287    ///
288    /// ```
289    /// use dynomite::hashkit::DynToken;
290    /// assert_eq!(DynToken::from_u32(33).get_int(), 33);
291    /// assert_eq!(DynToken::default().get_int(), 0);
292    /// ```
293    #[must_use]
294    pub fn get_int(&self) -> u32 {
295        if self.len == 0 {
296            0
297        } else {
298            self.mag[0]
299        }
300    }
301
302    /// Hex dump of the magnitude words, big-endian per word, in
303    /// declaration order. Used by tests and the `dyn-hash-tool` CLI.
304    ///
305    /// # Examples
306    ///
307    /// ```
308    /// use dynomite::hashkit::DynToken;
309    /// assert_eq!(DynToken::from_u32(0xdead).to_hex(), "0000dead");
310    /// ```
311    #[must_use]
312    pub fn to_hex(&self) -> String {
313        use std::fmt::Write;
314        let mut out = String::with_capacity(8 * self.len);
315        for w in &self.mag[..self.len] {
316            let _ = write!(out, "{w:08x}");
317        }
318        out
319    }
320}
321
322impl fmt::Display for DynToken {
323    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
324        write!(
325            f,
326            "Token(sign={}, len={}, mag={:?})",
327            self.sign.as_i32(),
328            self.len,
329            &self.mag[..self.len]
330        )
331    }
332}
333
334impl PartialEq for DynToken {
335    fn eq(&self, other: &Self) -> bool {
336        self.cmp(other) == Ordering::Equal
337    }
338}
339
340impl Eq for DynToken {}
341
342impl PartialOrd for DynToken {
343    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
344        Some(self.cmp(other))
345    }
346}
347
348impl Ord for DynToken {
349    fn cmp(&self, other: &Self) -> Ordering {
350        if self.sign == other.sign {
351            if self.sign == Sign::Zero {
352                return Ordering::Equal;
353            }
354            if self.len == other.len {
355                for i in 0..self.len {
356                    let a = self.mag[i];
357                    let b = other.mag[i];
358                    if a != b {
359                        return if a > b {
360                            Ordering::Greater
361                        } else {
362                            Ordering::Less
363                        };
364                    }
365                }
366                return Ordering::Equal;
367            }
368            return if self.len > other.len {
369                Ordering::Greater
370            } else {
371                Ordering::Less
372            };
373        }
374        if self.sign.as_i32() > other.sign.as_i32() {
375            Ordering::Greater
376        } else {
377            Ordering::Less
378        }
379    }
380}
381
382impl std::hash::Hash for DynToken {
383    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
384        self.sign.as_i32().hash(state);
385        self.len.hash(state);
386        for w in &self.mag[..self.len] {
387            w.hash(state);
388        }
389    }
390}
391
392/// Parse a textual token from `bytes`. Accepts an optional leading `-`,
393/// then base-10 digits.
394///
395/// # Errors
396///
397/// Returns `DynError::Generic` when the input is empty, contains
398/// non-digit bytes, or specifies a length that overflows the token.
399///
400/// # Examples
401///
402/// ```
403/// use dynomite::hashkit::token::{parse_token, Sign};
404/// let t = parse_token(b"42").unwrap();
405/// assert_eq!(t.sign(), Sign::Positive);
406/// assert_eq!(t.get_int(), 42);
407/// assert!(parse_token(b"").is_err());
408/// ```
409pub fn parse_token(bytes: &[u8]) -> Result<DynToken, DynError> {
410    if bytes.is_empty() {
411        return Err(DynError::Generic("empty token".into()));
412    }
413    let mut token = DynToken::default();
414
415    let (sign, payload) = if bytes[0] == b'-' {
416        if bytes.len() < 2 {
417            return Err(DynError::Generic("dangling minus sign".into()));
418        }
419        (Sign::Negative, &bytes[1..])
420    } else if bytes.len() == 1 && bytes[0] == b'0' {
421        (Sign::Zero, bytes)
422    } else {
423        (Sign::Positive, bytes)
424    };
425    token.sign = sign;
426
427    let nwords: usize = 1;
428    token.len = nwords;
429    let buf = &mut token.mag;
430
431    let digits = payload.len();
432    let mut first_group_len = digits % DIGITS_PER_INT;
433    if first_group_len == 0 {
434        first_group_len = DIGITS_PER_INT;
435    }
436
437    let mut p = 0usize;
438    if first_group_len > digits {
439        return Err(DynError::Generic("digit group overruns input".into()));
440    }
441    buf[nwords - 1] = atoui(&payload[..first_group_len])?;
442    p += first_group_len;
443
444    while p < digits {
445        let end = p + DIGITS_PER_INT;
446        if end > digits {
447            return Err(DynError::Generic("misaligned digit groups".into()));
448        }
449        let local = atoui(&payload[p..end])?;
450        add_next_word(buf, nwords, local);
451        p = end;
452    }
453
454    Ok(token)
455}
456
457fn atoui(bytes: &[u8]) -> Result<u32, DynError> {
458    let mut acc: u32 = 0;
459    for &b in bytes {
460        if !b.is_ascii_digit() {
461            return Err(DynError::Generic(format!(
462                "non-digit byte 0x{b:02x} in token"
463            )));
464        }
465        acc = acc.wrapping_mul(10).wrapping_add(u32::from(b - b'0'));
466    }
467    Ok(acc)
468}
469
470fn add_next_word(buf: &mut [u32; TOKEN_WORD_CAPACITY], len: usize, next_int: u32) {
471    let radix_val: u64 = u64::from(RADIX_VAL_C_REFERENCE);
472    let mut carry: u64 = 0;
473    for i in (0..len).rev() {
474        let product = radix_val * u64::from(buf[i]) + carry;
475        buf[i] = product as u32;
476        carry = product >> 32;
477    }
478
479    let sum = u64::from(buf[len - 1]) + u64::from(next_int);
480    buf[len - 1] = sum as u32;
481    let mut carry2 = sum >> 32;
482    if len >= 2 {
483        for i in (0..=len - 2).rev() {
484            let s = u64::from(buf[i]) + carry2;
485            buf[i] = s as u32;
486            carry2 = s >> 32;
487        }
488    }
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494
495    #[test]
496    fn default_is_empty() {
497        let t = DynToken::default();
498        assert!(t.is_empty());
499        assert_eq!(t.sign(), Sign::Zero);
500    }
501
502    #[test]
503    fn set_int_get_int_round_trip() {
504        let mut t = DynToken::default();
505        t.size(1).unwrap();
506        for v in [0u32, 1, 42, 0x7fff_ffff, 0xffff_ffff, 0x8000_0000] {
507            t.set_int(v);
508            assert_eq!(t.get_int(), v);
509        }
510    }
511
512    #[test]
513    fn set_int_zero_has_zero_sign() {
514        let mut t = DynToken::default();
515        t.size(1).unwrap();
516        t.set_int(0);
517        assert_eq!(t.sign(), Sign::Zero);
518        t.set_int(1);
519        assert_eq!(t.sign(), Sign::Positive);
520    }
521
522    #[test]
523    fn cmp_total_order_for_singletons() {
524        let mut t = vec![];
525        for v in [0u32, 1, 2, 100, 1_000_000, u32::MAX] {
526            t.push(DynToken::from_u32(v));
527        }
528        for i in 0..t.len() {
529            assert_eq!(t[i].cmp(&t[i]), Ordering::Equal);
530            for j in (i + 1)..t.len() {
531                assert_eq!(t[i].cmp(&t[j]), Ordering::Less);
532                assert_eq!(t[j].cmp(&t[i]), Ordering::Greater);
533            }
534        }
535    }
536
537    #[test]
538    fn cmp_uses_sign_first() {
539        let pos = DynToken::from_u32(1);
540        let mut neg = DynToken::default();
541        neg.size(1).unwrap();
542        neg.set_int(1);
543        neg.set_sign(Sign::Negative);
544        assert!(neg < pos);
545    }
546
547    #[test]
548    fn cmp_uses_length_when_signs_match() {
549        let mut short = DynToken::default();
550        short.size(1).unwrap();
551        short.set_int(0xffff_ffff);
552        short.set_sign(Sign::Positive);
553
554        let mut long = DynToken::default();
555        long.size(2).unwrap();
556        long.mag_mut()[0] = 1;
557        long.mag_mut()[1] = 0;
558        long.set_sign(Sign::Positive);
559
560        assert!(long > short);
561    }
562
563    #[test]
564    fn parse_zero() {
565        let t = parse_token(b"0").unwrap();
566        assert_eq!(t.sign(), Sign::Zero);
567        assert_eq!(t.get_int(), 0);
568    }
569
570    #[test]
571    fn parse_short_positive() {
572        let t = parse_token(b"42").unwrap();
573        assert_eq!(t.sign(), Sign::Positive);
574        assert_eq!(t.get_int(), 42);
575    }
576
577    #[test]
578    fn parse_negative() {
579        let t = parse_token(b"-7").unwrap();
580        assert_eq!(t.sign(), Sign::Negative);
581        assert_eq!(t.get_int(), 7);
582    }
583
584    #[test]
585    fn parse_rejects_garbage() {
586        assert!(parse_token(b"abc").is_err());
587        assert!(parse_token(b"").is_err());
588        assert!(parse_token(b"-").is_err());
589    }
590}