dynomite/hashkit/token.rs
1//! Big-integer token used as the hash output and ring coordinate.
2//!
3//! The C reference stores tokens as `(signum, mag[4], len)`, where
4//! `mag[]` holds little-significance-first 32-bit words in a numeral
5//! system whose radix is `UINT_MAX_PLUS_ONE` (i.e. 2^32). Tokens are
6//! signed so the comparator can distinguish negative values.
7//!
8//! The Rust type [`DynToken`] preserves that representation exactly so
9//! that `cmp` and the textual parser produce bit-identical answers.
10//!
11//! # Examples
12//!
13//! ```
14//! use dynomite::hashkit::DynToken;
15//!
16//! let mut a = DynToken::default();
17//! a.size(1).expect("len <= 4");
18//! a.set_int(42);
19//! assert_eq!(a.get_int(), 42);
20//! ```
21
22use std::cmp::Ordering;
23use std::fmt;
24
25use crate::core::types::DynError;
26
27/// Maximum number of 32-bit words a token can hold.
28///
29/// # Examples
30///
31/// ```
32/// use dynomite::hashkit::token::TOKEN_WORD_CAPACITY;
33/// assert_eq!(TOKEN_WORD_CAPACITY, 4);
34/// ```
35pub const TOKEN_WORD_CAPACITY: usize = 4;
36
37/// 10 base-10 digits per group when parsing a textual token.
38const DIGITS_PER_INT: usize = 10;
39
40/// Multiplier applied to the running buffer for each new digit group.
41///
42/// The value 10^9 = `0x3B9A_CA00`. The C reference uses `0x17179149`
43/// which is `10^9 + 0x17F1` (i.e. wrong) but that is the on-the-wire
44/// constant we must reproduce; the `parse_dyn_token` tests pin down a
45/// fixed mapping rather than a numeric round-trip.
46const RADIX_VAL_C_REFERENCE: u32 = 0x1717_9149;
47
48/// Sign of a token.
49///
50/// # Examples
51///
52/// ```
53/// use dynomite::hashkit::token::Sign;
54/// assert_ne!(Sign::Negative, Sign::Positive);
55/// ```
56#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
57pub enum Sign {
58 /// Negative token (sign field == -1 in C).
59 Negative,
60 /// Zero.
61 Zero,
62 /// Positive (sign field == 1 in C).
63 Positive,
64}
65
66impl Sign {
67 fn as_i32(self) -> i32 {
68 match self {
69 Sign::Negative => -1,
70 Sign::Zero => 0,
71 Sign::Positive => 1,
72 }
73 }
74}
75
76/// A signed magnitude integer used as both a hash output and a ring
77/// coordinate.
78///
79/// # Examples
80///
81/// ```
82/// use dynomite::hashkit::DynToken;
83/// let mut t = DynToken::from_u32(7);
84/// assert_eq!(t.get_int(), 7);
85/// assert_eq!(t.len(), 1);
86/// ```
87#[derive(Clone, Debug)]
88pub struct DynToken {
89 sign: Sign,
90 mag: [u32; TOKEN_WORD_CAPACITY],
91 len: usize,
92}
93
94impl Default for DynToken {
95 fn default() -> Self {
96 Self {
97 sign: Sign::Zero,
98 mag: [0; TOKEN_WORD_CAPACITY],
99 len: 0,
100 }
101 }
102}
103
104impl DynToken {
105 /// Construct an empty token (sign zero, no magnitude words).
106 ///
107 /// # Examples
108 ///
109 /// ```
110 /// use dynomite::hashkit::DynToken;
111 /// let t = DynToken::new();
112 /// assert!(t.is_empty());
113 /// ```
114 #[must_use]
115 pub fn new() -> Self {
116 Self::default()
117 }
118
119 /// Construct a token holding a single 32-bit value.
120 ///
121 /// # Examples
122 ///
123 /// ```
124 /// use dynomite::hashkit::DynToken;
125 /// let t = DynToken::from_u32(7);
126 /// assert_eq!(t.get_int(), 7);
127 /// ```
128 #[must_use]
129 pub fn from_u32(value: u32) -> Self {
130 let mut t = Self::default();
131 // size(1) cannot fail for a length within capacity.
132 t.size(1).expect("len of 1 fits within TOKEN_WORD_CAPACITY");
133 t.set_int(value);
134 t
135 }
136
137 /// Set the number of magnitude words. Returns an error if `len`
138 /// exceeds [`TOKEN_WORD_CAPACITY`].
139 ///
140 /// # Examples
141 ///
142 /// ```
143 /// use dynomite::hashkit::DynToken;
144 /// let mut t = DynToken::default();
145 /// assert!(t.size(2).is_ok());
146 /// assert!(t.size(99).is_err());
147 /// ```
148 pub fn size(&mut self, len: usize) -> Result<(), DynError> {
149 if len > TOKEN_WORD_CAPACITY {
150 return Err(DynError::Generic(format!(
151 "token length {len} exceeds capacity {TOKEN_WORD_CAPACITY}"
152 )));
153 }
154 self.len = len;
155 self.sign = Sign::Zero;
156 Ok(())
157 }
158
159 /// Number of magnitude words currently in use.
160 ///
161 /// # Examples
162 ///
163 /// ```
164 /// use dynomite::hashkit::DynToken;
165 /// assert_eq!(DynToken::from_u32(1).len(), 1);
166 /// assert_eq!(DynToken::default().len(), 0);
167 /// ```
168 #[must_use]
169 pub fn len(&self) -> usize {
170 self.len
171 }
172
173 /// Whether the token holds no magnitude words.
174 ///
175 /// # Examples
176 ///
177 /// ```
178 /// use dynomite::hashkit::DynToken;
179 /// assert!(DynToken::default().is_empty());
180 /// assert!(!DynToken::from_u32(1).is_empty());
181 /// ```
182 #[must_use]
183 pub fn is_empty(&self) -> bool {
184 self.len == 0
185 }
186
187 /// Sign field.
188 ///
189 /// # Examples
190 ///
191 /// ```
192 /// use dynomite::hashkit::DynToken;
193 /// use dynomite::hashkit::token::Sign;
194 /// assert_eq!(DynToken::from_u32(1).sign(), Sign::Positive);
195 /// assert_eq!(DynToken::default().sign(), Sign::Zero);
196 /// ```
197 #[must_use]
198 pub fn sign(&self) -> Sign {
199 self.sign
200 }
201
202 /// Read-only view of the magnitude words actually in use.
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// use dynomite::hashkit::DynToken;
208 /// let t = DynToken::from_u32(0xdead);
209 /// assert_eq!(t.mag(), &[0xdead]);
210 /// ```
211 #[must_use]
212 pub fn mag(&self) -> &[u32] {
213 &self.mag[..self.len]
214 }
215
216 /// Mutable access to the full magnitude buffer (capacity-sized).
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// use dynomite::hashkit::DynToken;
222 /// use dynomite::hashkit::token::TOKEN_WORD_CAPACITY;
223 /// let mut t = DynToken::default();
224 /// t.size(2).unwrap();
225 /// t.mag_mut()[0] = 1;
226 /// assert_eq!(t.mag_mut().len(), TOKEN_WORD_CAPACITY);
227 /// ```
228 pub fn mag_mut(&mut self) -> &mut [u32; TOKEN_WORD_CAPACITY] {
229 &mut self.mag
230 }
231
232 /// Force the length without resetting the sign or zeroing words.
233 ///
234 /// # Examples
235 ///
236 /// ```
237 /// use dynomite::hashkit::DynToken;
238 /// let mut t = DynToken::default();
239 /// t.mag_mut()[0] = 0xaa;
240 /// t.set_len_keep(1);
241 /// assert_eq!(t.len(), 1);
242 /// assert_eq!(t.get_int(), 0xaa);
243 /// ```
244 pub fn set_len_keep(&mut self, len: usize) {
245 assert!(len <= TOKEN_WORD_CAPACITY, "token length out of range");
246 self.len = len;
247 }
248
249 /// Sets sign explicitly. Mostly useful in tests.
250 ///
251 /// # Examples
252 ///
253 /// ```
254 /// use dynomite::hashkit::DynToken;
255 /// use dynomite::hashkit::token::Sign;
256 /// let mut t = DynToken::from_u32(1);
257 /// t.set_sign(Sign::Negative);
258 /// assert_eq!(t.sign(), Sign::Negative);
259 /// ```
260 pub fn set_sign(&mut self, sign: Sign) {
261 self.sign = sign;
262 }
263
264 /// Set the token to a single 32-bit value.
265 ///
266 /// Sign becomes `Positive` when `val > 0`, `Zero` otherwise. Length
267 /// is forced to 1.
268 ///
269 /// # Examples
270 ///
271 /// ```
272 /// use dynomite::hashkit::DynToken;
273 /// let mut t = DynToken::default();
274 /// t.size(1).unwrap();
275 /// t.set_int(99);
276 /// assert_eq!(t.get_int(), 99);
277 /// ```
278 pub fn set_int(&mut self, val: u32) {
279 self.mag[0] = val;
280 self.len = 1;
281 self.sign = if val > 0 { Sign::Positive } else { Sign::Zero };
282 }
283
284 /// Read the token's first word as a 32-bit unsigned value.
285 ///
286 /// # Examples
287 ///
288 /// ```
289 /// use dynomite::hashkit::DynToken;
290 /// assert_eq!(DynToken::from_u32(33).get_int(), 33);
291 /// assert_eq!(DynToken::default().get_int(), 0);
292 /// ```
293 #[must_use]
294 pub fn get_int(&self) -> u32 {
295 if self.len == 0 {
296 0
297 } else {
298 self.mag[0]
299 }
300 }
301
302 /// Hex dump of the magnitude words, big-endian per word, in
303 /// declaration order. Used by tests and the `dyn-hash-tool` CLI.
304 ///
305 /// # Examples
306 ///
307 /// ```
308 /// use dynomite::hashkit::DynToken;
309 /// assert_eq!(DynToken::from_u32(0xdead).to_hex(), "0000dead");
310 /// ```
311 #[must_use]
312 pub fn to_hex(&self) -> String {
313 use std::fmt::Write;
314 let mut out = String::with_capacity(8 * self.len);
315 for w in &self.mag[..self.len] {
316 let _ = write!(out, "{w:08x}");
317 }
318 out
319 }
320}
321
322impl fmt::Display for DynToken {
323 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
324 write!(
325 f,
326 "Token(sign={}, len={}, mag={:?})",
327 self.sign.as_i32(),
328 self.len,
329 &self.mag[..self.len]
330 )
331 }
332}
333
334impl PartialEq for DynToken {
335 fn eq(&self, other: &Self) -> bool {
336 self.cmp(other) == Ordering::Equal
337 }
338}
339
340impl Eq for DynToken {}
341
342impl PartialOrd for DynToken {
343 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
344 Some(self.cmp(other))
345 }
346}
347
348impl Ord for DynToken {
349 fn cmp(&self, other: &Self) -> Ordering {
350 if self.sign == other.sign {
351 if self.sign == Sign::Zero {
352 return Ordering::Equal;
353 }
354 if self.len == other.len {
355 for i in 0..self.len {
356 let a = self.mag[i];
357 let b = other.mag[i];
358 if a != b {
359 return if a > b {
360 Ordering::Greater
361 } else {
362 Ordering::Less
363 };
364 }
365 }
366 return Ordering::Equal;
367 }
368 return if self.len > other.len {
369 Ordering::Greater
370 } else {
371 Ordering::Less
372 };
373 }
374 if self.sign.as_i32() > other.sign.as_i32() {
375 Ordering::Greater
376 } else {
377 Ordering::Less
378 }
379 }
380}
381
382impl std::hash::Hash for DynToken {
383 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
384 self.sign.as_i32().hash(state);
385 self.len.hash(state);
386 for w in &self.mag[..self.len] {
387 w.hash(state);
388 }
389 }
390}
391
392/// Parse a textual token from `bytes`. Accepts an optional leading `-`,
393/// then base-10 digits.
394///
395/// # Errors
396///
397/// Returns `DynError::Generic` when the input is empty, contains
398/// non-digit bytes, or specifies a length that overflows the token.
399///
400/// # Examples
401///
402/// ```
403/// use dynomite::hashkit::token::{parse_token, Sign};
404/// let t = parse_token(b"42").unwrap();
405/// assert_eq!(t.sign(), Sign::Positive);
406/// assert_eq!(t.get_int(), 42);
407/// assert!(parse_token(b"").is_err());
408/// ```
409pub fn parse_token(bytes: &[u8]) -> Result<DynToken, DynError> {
410 if bytes.is_empty() {
411 return Err(DynError::Generic("empty token".into()));
412 }
413 let mut token = DynToken::default();
414
415 let (sign, payload) = if bytes[0] == b'-' {
416 if bytes.len() < 2 {
417 return Err(DynError::Generic("dangling minus sign".into()));
418 }
419 (Sign::Negative, &bytes[1..])
420 } else if bytes.len() == 1 && bytes[0] == b'0' {
421 (Sign::Zero, bytes)
422 } else {
423 (Sign::Positive, bytes)
424 };
425 token.sign = sign;
426
427 let nwords: usize = 1;
428 token.len = nwords;
429 let buf = &mut token.mag;
430
431 let digits = payload.len();
432 let mut first_group_len = digits % DIGITS_PER_INT;
433 if first_group_len == 0 {
434 first_group_len = DIGITS_PER_INT;
435 }
436
437 let mut p = 0usize;
438 if first_group_len > digits {
439 return Err(DynError::Generic("digit group overruns input".into()));
440 }
441 buf[nwords - 1] = atoui(&payload[..first_group_len])?;
442 p += first_group_len;
443
444 while p < digits {
445 let end = p + DIGITS_PER_INT;
446 if end > digits {
447 return Err(DynError::Generic("misaligned digit groups".into()));
448 }
449 let local = atoui(&payload[p..end])?;
450 add_next_word(buf, nwords, local);
451 p = end;
452 }
453
454 Ok(token)
455}
456
457fn atoui(bytes: &[u8]) -> Result<u32, DynError> {
458 let mut acc: u32 = 0;
459 for &b in bytes {
460 if !b.is_ascii_digit() {
461 return Err(DynError::Generic(format!(
462 "non-digit byte 0x{b:02x} in token"
463 )));
464 }
465 acc = acc.wrapping_mul(10).wrapping_add(u32::from(b - b'0'));
466 }
467 Ok(acc)
468}
469
470fn add_next_word(buf: &mut [u32; TOKEN_WORD_CAPACITY], len: usize, next_int: u32) {
471 let radix_val: u64 = u64::from(RADIX_VAL_C_REFERENCE);
472 let mut carry: u64 = 0;
473 for i in (0..len).rev() {
474 let product = radix_val * u64::from(buf[i]) + carry;
475 buf[i] = product as u32;
476 carry = product >> 32;
477 }
478
479 let sum = u64::from(buf[len - 1]) + u64::from(next_int);
480 buf[len - 1] = sum as u32;
481 let mut carry2 = sum >> 32;
482 if len >= 2 {
483 for i in (0..=len - 2).rev() {
484 let s = u64::from(buf[i]) + carry2;
485 buf[i] = s as u32;
486 carry2 = s >> 32;
487 }
488 }
489}
490
491#[cfg(test)]
492mod tests {
493 use super::*;
494
495 #[test]
496 fn default_is_empty() {
497 let t = DynToken::default();
498 assert!(t.is_empty());
499 assert_eq!(t.sign(), Sign::Zero);
500 }
501
502 #[test]
503 fn set_int_get_int_round_trip() {
504 let mut t = DynToken::default();
505 t.size(1).unwrap();
506 for v in [0u32, 1, 42, 0x7fff_ffff, 0xffff_ffff, 0x8000_0000] {
507 t.set_int(v);
508 assert_eq!(t.get_int(), v);
509 }
510 }
511
512 #[test]
513 fn set_int_zero_has_zero_sign() {
514 let mut t = DynToken::default();
515 t.size(1).unwrap();
516 t.set_int(0);
517 assert_eq!(t.sign(), Sign::Zero);
518 t.set_int(1);
519 assert_eq!(t.sign(), Sign::Positive);
520 }
521
522 #[test]
523 fn cmp_total_order_for_singletons() {
524 let mut t = vec![];
525 for v in [0u32, 1, 2, 100, 1_000_000, u32::MAX] {
526 t.push(DynToken::from_u32(v));
527 }
528 for i in 0..t.len() {
529 assert_eq!(t[i].cmp(&t[i]), Ordering::Equal);
530 for j in (i + 1)..t.len() {
531 assert_eq!(t[i].cmp(&t[j]), Ordering::Less);
532 assert_eq!(t[j].cmp(&t[i]), Ordering::Greater);
533 }
534 }
535 }
536
537 #[test]
538 fn cmp_uses_sign_first() {
539 let pos = DynToken::from_u32(1);
540 let mut neg = DynToken::default();
541 neg.size(1).unwrap();
542 neg.set_int(1);
543 neg.set_sign(Sign::Negative);
544 assert!(neg < pos);
545 }
546
547 #[test]
548 fn cmp_uses_length_when_signs_match() {
549 let mut short = DynToken::default();
550 short.size(1).unwrap();
551 short.set_int(0xffff_ffff);
552 short.set_sign(Sign::Positive);
553
554 let mut long = DynToken::default();
555 long.size(2).unwrap();
556 long.mag_mut()[0] = 1;
557 long.mag_mut()[1] = 0;
558 long.set_sign(Sign::Positive);
559
560 assert!(long > short);
561 }
562
563 #[test]
564 fn parse_zero() {
565 let t = parse_token(b"0").unwrap();
566 assert_eq!(t.sign(), Sign::Zero);
567 assert_eq!(t.get_int(), 0);
568 }
569
570 #[test]
571 fn parse_short_positive() {
572 let t = parse_token(b"42").unwrap();
573 assert_eq!(t.sign(), Sign::Positive);
574 assert_eq!(t.get_int(), 42);
575 }
576
577 #[test]
578 fn parse_negative() {
579 let t = parse_token(b"-7").unwrap();
580 assert_eq!(t.sign(), Sign::Negative);
581 assert_eq!(t.get_int(), 7);
582 }
583
584 #[test]
585 fn parse_rejects_garbage() {
586 assert!(parse_token(b"abc").is_err());
587 assert!(parse_token(b"").is_err());
588 assert!(parse_token(b"-").is_err());
589 }
590}