Skip to main content

axsys_noun/
atom.rs

1use std::{
2    collections::hash_map::DefaultHasher,
3    ffi::OsStr,
4    fmt::{Display, Error, Formatter},
5    hash::Hasher,
6    str::{self, Utf8Error},
7};
8
9use ibig::UBig;
10
11use crate::{sept::{date, Aura, FormatError, ParseError}, Noun};
12
13/// Returns the length in bits of a sequence of bytes.
14fn bit_len(bytes: &[u8]) -> usize {
15    if let Some(last_byte) = bytes.last() {
16        let byte_len = u32::try_from(bytes.len()).expect("usize to u32");
17        let bit_len = u8::BITS * (byte_len - 1) + (u8::BITS - last_byte.leading_zeros());
18        usize::try_from(bit_len).expect("u32 to usize")
19    } else {
20        0
21    }
22}
23
24/// A bitwise [`Atom`] builder.
25pub struct Builder {
26    bytes: Vec<u8>,
27    bit_idx: usize,
28}
29
30impl Builder {
31    /// Creates an empty atom builder.
32    pub fn new() -> Self {
33        Self {
34            bytes: Vec::new(),
35            bit_idx: 0,
36        }
37    }
38
39    /// Returns the current bitwise position of the end of this builder.
40    pub fn pos(&self) -> usize {
41        self.bit_idx
42    }
43
44    /// Pushes a bit onto the end of this builder.
45    pub fn push_bit(&mut self, bit: bool) {
46        let u8_bits = usize::try_from(u8::BITS).expect("u32 to usize");
47        let byte_idx = self.bit_idx / u8_bits;
48        if byte_idx == self.bytes.len() {
49            self.bytes.push(0);
50        }
51        let byte = &mut self.bytes[byte_idx];
52        let shift = self.bit_idx % u8_bits;
53        if bit {
54            *byte |= 1 << shift;
55        } else {
56            *byte &= !(1 << shift);
57        }
58        self.bit_idx += 1;
59    }
60
61    /// Converts this builder into an `Atom`, consuming the builder.
62    pub fn into_atom(self) -> Atom {
63        let bytes = self.bytes;
64        let bit_len = bit_len(&bytes[..]);
65        Atom { bytes, bit_len }
66    }
67}
68
69impl Default for Builder {
70    fn default() -> Self {
71        Self::new()
72    }
73}
74
75/// An arbitrarily large unsigned integer.
76///
77/// An [atom] is an arbitrarily large unsigned integer represented as a little-endian contiguous
78/// sequence of bytes. An atom can be:
79/// - created a single bit at a time or from other types that can be easily converted into atoms
80///   like primitive unsigned integers, strings, and string slices;
81/// - iterated over a single bit at a time;
82/// - compared to other atoms and other atom-like types;
83/// - pretty-printed as a hexadecimal number;
84/// - converted into a noun, a primitive unsigned integer type, or a string slice.
85///
86/// [atom]: https://developers.urbit.org/reference/glossary/atom
87///
88/// # Examples
89///
90/// To create a new atom, use one of the `From<T>` implementations:
91///
92/// ```
93/// # use axsys_noun::atom::Atom;
94/// let atom = Atom::from("hello");
95/// assert_eq!(atom, "hello");
96/// ```
97///
98/// ```
99/// # use axsys_noun::atom::Atom;
100/// let atom = Atom::from(0u8);
101/// assert_eq!(atom, 0u8);
102/// ```
103#[derive(Clone, Debug, Eq, Hash, PartialEq)]
104pub struct Atom {
105    bytes: Vec<u8>,
106    bit_len: usize,
107}
108
109/// Converts an atom into an unsigned integer, returning `None` if the byte width of the atom
110/// exceeds the byte width of the target unsigned integer type.
111macro_rules! atom_as_uint {
112    ($atom:expr, $uint:ty) => {{
113        let atom = $atom.as_bytes();
114        const N: usize = std::mem::size_of::<$uint>();
115        let len = atom.len();
116        if len <= N {
117            let mut bytes: [u8; N] = [0; N];
118            let _ = &mut bytes[..len].copy_from_slice(atom);
119            Some(<$uint>::from_le_bytes(bytes))
120        } else {
121            None
122        }
123    }};
124}
125
126impl Atom {
127    /// Creates an empty atom builder.
128    ///
129    /// This method is equivalent to `Builder::new()`.
130    pub fn builder() -> Builder {
131        Builder::new()
132    }
133
134    /// Creates the atom `0`.
135    pub const fn null() -> Self {
136        Self {
137            bytes: Vec::new(),
138            bit_len: 0,
139        }
140    }
141
142    /// Returns `true` if this atom is null (i.e. the atom `0`).
143    pub const fn is_null(&self) -> bool {
144        self.bit_len() == 0
145    }
146
147    /// Returns the length in bits of this atom.
148    pub const fn bit_len(&self) -> usize {
149        self.bit_len
150    }
151
152    /// Computes the hash of this atom.
153    pub fn hash(&self) -> u64 {
154        let mut hasher = DefaultHasher::new();
155        hasher.write(self.as_bytes());
156        hasher.finish()
157    }
158
159    /// Converts this atom into a byte slice.
160    pub fn as_bytes(&self) -> &[u8] {
161        &self.bytes
162    }
163
164    /// Converts this atom into a string slice, returning an error if the atom is not composed of
165    /// valid UTF-8 bytes.
166    pub fn as_str(&self) -> Result<&str, Utf8Error> {
167        str::from_utf8(self.as_bytes())
168    }
169
170    /// Converts this atom into an 8-bit unsigned integer, returning `None` if the atom is greater
171    /// than `u8::MAX`.
172    ///
173    /// # Examples
174    /// ```
175    /// # use axsys_noun::atom::Atom;
176    /// let uint = u8::MAX;
177    /// let atom = Atom::from(uint);
178    /// assert_eq!(atom.as_u8().unwrap(), uint);
179    /// ```
180    pub fn as_u8(&self) -> Option<u8> {
181        atom_as_uint!(self, u8)
182    }
183
184    /// Converts this atom into an 16-bit unsigned integer, returning `None` if the atom is greater
185    /// than `u16::MAX`.
186    ///
187    /// # Examples
188    /// ```
189    /// # use axsys_noun::atom::Atom;
190    /// let uint = u16::MAX;
191    /// let atom = Atom::from(uint);
192    /// assert_eq!(atom.as_u16().unwrap(), uint);
193    /// ```
194    pub fn as_u16(&self) -> Option<u16> {
195        atom_as_uint!(self, u16)
196    }
197
198    /// Converts this atom into an 32-bit unsigned integer, returning `None` if the atom is greater
199    /// than `u32::MAX`.
200    ///
201    /// # Examples
202    /// ```
203    /// # use axsys_noun::atom::Atom;
204    /// let uint = u32::MAX;
205    /// let atom = Atom::from(uint);
206    /// assert_eq!(atom.as_u32().unwrap(), uint);
207    /// ```
208    pub fn as_u32(&self) -> Option<u32> {
209        atom_as_uint!(self, u32)
210    }
211
212    /// Converts this atom into an 64-bit unsigned integer, returning `None` if the atom is greater
213    /// than `u64::MAX`.
214    ///
215    /// # Examples
216    /// ```
217    /// # use axsys_noun::atom::Atom;
218    /// let uint = u64::MAX;
219    /// let atom = Atom::from(uint);
220    /// assert_eq!(atom.as_u64().unwrap(), uint);
221    /// ```
222    pub fn as_u64(&self) -> Option<u64> {
223        atom_as_uint!(self, u64)
224    }
225
226    /// Converts this atom into an 128-bit unsigned integer, returning `None` if the atom is greater
227    /// than `u128::MAX`.
228    ///
229    /// # Examples
230    /// ```
231    /// # use axsys_noun::atom::Atom;
232    /// let uint = u128::MAX;
233    /// let atom = Atom::from(uint);
234    /// assert_eq!(atom.as_u128().unwrap(), uint);
235    /// ```
236    pub fn as_u128(&self) -> Option<u128> {
237        atom_as_uint!(self, u128)
238    }
239
240    /// Converts this atom into a pointer-sized unsigned integer, returning `None` if the atom is
241    /// greater than `usize::MAX`.
242    ///
243    /// # Examples
244    /// ```
245    /// # use axsys_noun::atom::Atom;
246    /// let uint = usize::MAX;
247    /// let atom = Atom::from(uint);
248    /// assert_eq!(atom.as_usize().unwrap(), uint);
249    /// ```
250    pub fn as_usize(&self) -> Option<usize> {
251        atom_as_uint!(self, usize)
252    }
253
254    /// Copies this atom into a byte vector.
255    pub fn to_vec(&self) -> Vec<u8> {
256        Vec::from(self.as_bytes())
257    }
258
259    /// Converts this atom into a byte vector, consuming the atom.
260    ///
261    /// This method does not allocate on the heap.
262    pub fn into_vec(self) -> Vec<u8> {
263        self.bytes
264    }
265
266    pub fn as_noun(&self) -> Noun {
267        Noun::Atom(self.clone())
268    }
269
270    pub fn into_noun(self) -> Noun {
271        Noun::Atom(self)
272    }
273
274    /// Returns a bitwise iterator over this atom.
275    pub fn iter(&self) -> Iter {
276        Iter {
277            atom: self,
278            bit_idx: 0,
279            bit_mask: 0b1,
280        }
281    }
282
283    /// Returns the atom as a big integer
284    pub fn as_big(&self) -> UBig {
285        UBig::from_le_bytes(&self.bytes)
286    }
287
288    fn format_decimal(&self) -> String {
289        let big = self.as_big();
290        let s = big.to_string();
291        let mut chs = s.chars().rev().collect::<Vec<char>>();
292        let len = chs.len();
293        let gap = len % 3;
294        if gap != 0 {
295            chs.resize(len + (3 - gap), ' ');
296        }
297        let mut res: Vec<String> = vec![];
298        for chunk in chs.chunks_exact(3) {
299            let bloq = chunk.iter().rev().collect::<String>();
300            res.push(bloq);
301        }
302        res.reverse();
303        res.join(".").trim().to_string()
304    }
305
306    /// Formats an atom into a string using the given aura
307    ///
308    /// # Examples
309    /// ```
310    /// # use axsys_noun::atom::Atom;
311    /// # use axsys_noun::sept::Aura;
312    /// let atom = Atom::from(123434910u64);
313    /// let s = atom.format_aura(Aura::U).unwrap();
314    /// assert_eq!(s, "123.434.910");
315    /// ```
316    pub fn format_aura(&self, aura: Aura) -> Result<String, FormatError> {
317        match aura {
318            Aura::T => self
319                .as_str()
320                .map(|s| s.to_string())
321                .map_err(FormatError::Utf8),
322            // TODO: sanity checks
323            Aura::Ta => self
324                .as_str()
325                .map(|s| s.to_string())
326                .map_err(FormatError::Utf8),
327            Aura::Tas => self
328                .as_str()
329                .map(|s| s.to_string())
330                .map_err(FormatError::Utf8),
331
332            Aura::U => Ok(self.format_decimal()),
333            Aura::Ud => Ok(self.format_decimal()),
334            Aura::Ux => unimplemented!(),
335            Aura::Uv => unimplemented!(),
336            Aura::Uw => unimplemented!(),
337            Aura::Da => self
338                .as_u128()
339                .map(date::format_da)
340                .ok_or(FormatError::TooLarge),
341            Aura::Dr => unimplemented!(),
342        }
343    }
344
345    fn parse_decimal(s: &str) -> Result<Self, ParseError> {
346        let bloqs = s.split('.').collect::<Vec<&str>>().join("");
347        let big = UBig::from_str_radix(&bloqs, 10).map_err(|_| ParseError::Invalid)?;
348        Ok(Atom::from(big.to_le_bytes()))
349    }
350
351    /// Parses a string into an atom using the given aura
352    ///
353    /// # Examples
354    /// ```
355    /// # use axsys_noun::atom::Atom;
356    /// # use axsys_noun::sept::Aura;
357    /// let atom = Atom::parse_aura(Aura::U, "123.434.910").unwrap();
358    /// assert_eq!(atom, Atom::from(123434910u64));
359    /// ```
360    pub fn parse_aura(aura: Aura, s: &str) -> Result<Self, ParseError> {
361        match aura {
362            Aura::T | Aura::Ta | Aura::Tas => Ok(Atom::from(s)),
363            Aura::U | Aura::Ud => Self::parse_decimal(s),
364            Aura::Da => Ok(Self::from(
365                date::parse_da(s).map_err(|_e| ParseError::Invalid)?,
366            )),
367
368            _ => unimplemented!(),
369        }
370    }
371}
372
373impl Display for Atom {
374    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
375        write!(f, "0x")?;
376        if self.bytes.is_empty() {
377            write!(f, "0")
378        } else {
379            for (i, byte) in (self.bytes).iter().enumerate() {
380                if i > 0 && i % 4 == 0 {
381                    write!(f, ".")?;
382                }
383                write!(f, "{:x}", byte)?;
384            }
385            Ok(())
386        }
387    }
388}
389
390impl TryFrom<&OsStr> for Atom {
391    type Error = ();
392
393    fn try_from(string: &OsStr) -> Result<Self, Self::Error> {
394        Ok(Self::from(string.to_str().ok_or(())?))
395    }
396}
397
398impl From<&str> for Atom {
399    fn from(string: &str) -> Self {
400        let bytes = string.as_bytes().to_vec();
401        let bit_len = bit_len(&bytes[..]);
402        Self { bytes, bit_len }
403    }
404}
405
406impl From<String> for Atom {
407    fn from(string: String) -> Self {
408        Self::from(string.into_bytes())
409    }
410}
411
412/// Convert an unsigned integer primitive into an atom.
413macro_rules! impl_from_uint_for_atom {
414    ($uint:ty) => {
415        impl From<$uint> for Atom {
416            fn from(uint: $uint) -> Self {
417                Atom::from(Vec::from(uint.to_le_bytes()))
418            }
419        }
420    };
421}
422
423impl_from_uint_for_atom!(u8);
424impl_from_uint_for_atom!(u16);
425impl_from_uint_for_atom!(u32);
426impl_from_uint_for_atom!(u64);
427impl_from_uint_for_atom!(u128);
428impl_from_uint_for_atom!(usize);
429
430impl From<Vec<u8>> for Atom {
431    fn from(mut vec: Vec<u8>) -> Self {
432        let len = match vec.iter().rposition(|x| *x != 0) {
433            Some(idx) => idx + 1,
434            None => 0,
435        };
436        vec.truncate(len);
437        let bit_len = bit_len(&vec[..]);
438        Self {
439            bytes: vec,
440            bit_len,
441        }
442    }
443}
444
445impl PartialEq<&Self> for Atom {
446    fn eq(&self, other: &&Self) -> bool {
447        self.bytes == other.bytes
448    }
449}
450
451impl PartialEq<str> for Atom {
452    fn eq(&self, other: &str) -> bool {
453        if let Ok(string) = str::from_utf8(self.as_bytes()) {
454            string == other
455        } else {
456            false
457        }
458    }
459}
460
461impl PartialEq<&str> for Atom {
462    fn eq(&self, other: &&str) -> bool {
463        if let Ok(string) = str::from_utf8(self.as_bytes()) {
464            string == *other
465        } else {
466            false
467        }
468    }
469}
470
471/// Compares an atom to an unsigned integer primitive.
472macro_rules! impl_partial_eq_uint_for_atom {
473    ($uint:ty, $as_uint:ident) => {
474        impl PartialEq<$uint> for Atom {
475            fn eq(&self, other: &$uint) -> bool {
476                if let Some(uint) = self.$as_uint() {
477                    uint == *other
478                } else {
479                    false
480                }
481            }
482        }
483    };
484}
485
486impl_partial_eq_uint_for_atom!(u8, as_u8);
487impl_partial_eq_uint_for_atom!(u16, as_u16);
488impl_partial_eq_uint_for_atom!(u32, as_u32);
489impl_partial_eq_uint_for_atom!(u64, as_u64);
490impl_partial_eq_uint_for_atom!(u128, as_u128);
491impl_partial_eq_uint_for_atom!(usize, as_usize);
492
493/// An iterator over the bits of an [`Atom`].
494///
495/// Iteration starts with the least significant bit of the [`Atom`] and ends with the most
496/// significant bit.
497pub struct Iter<'a> {
498    /// Atom being interated over.
499    atom: &'a Atom,
500    /// Index of the current bit.
501    bit_idx: usize,
502    /// Mask to access current bit.
503    bit_mask: u8,
504}
505
506impl Iter<'_> {
507    /// Returns the current bitwise position of this iterator.
508    pub fn pos(&self) -> usize {
509        self.bit_idx
510    }
511}
512
513impl Iterator for Iter<'_> {
514    type Item = bool;
515
516    fn next(&mut self) -> Option<Self::Item> {
517        if self.bit_idx == self.atom.bit_len {
518            return None;
519        }
520        let byte_idx = self.bit_idx / usize::try_from(u8::BITS).expect("u32 to usize");
521        let bit = (self.atom.bytes[byte_idx] & self.bit_mask) != 0;
522        self.bit_mask = self.bit_mask.rotate_left(1);
523        self.bit_idx += 1;
524        Some(bit)
525    }
526}
527
528#[cfg(test)]
529mod tests {
530    use super::*;
531
532    #[test]
533    fn bit_len() {
534        {
535            let num = 0b111u8.to_le_bytes();
536            assert_eq!(super::bit_len(&num[..]), 3);
537        }
538
539        {
540            let num = 0b10001011u8.to_le_bytes();
541            assert_eq!(super::bit_len(&num[..]), 8);
542        }
543
544        {
545            let num = 0b100000000u16.to_le_bytes();
546            assert_eq!(super::bit_len(&num[..]), 9);
547        }
548
549        {
550            let num = [
551                0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
552                0x37,
553            ];
554            assert_eq!(super::bit_len(&num[..]), 134);
555        }
556    }
557
558    #[test]
559    fn is_null() {
560        assert!(Atom::from(0u8).is_null());
561        assert!(!Atom::from(1u8).is_null());
562    }
563
564    #[test]
565    fn iter() {
566        {
567            let atom = Atom::from(0b0u8);
568            let mut atom_iter = atom.iter();
569            assert_eq!(None, atom_iter.next());
570        }
571
572        {
573            let atom = Atom::from(0b10u8);
574            let mut atom_iter = atom.iter();
575            assert_eq!(Some(false), atom_iter.next());
576            assert_eq!(Some(true), atom_iter.next());
577            assert_eq!(None, atom_iter.next());
578        }
579
580        {
581            let atom = Atom::from(0x2f004u32);
582            let mut atom_iter = atom.iter();
583            assert_eq!(Some(false), atom_iter.next());
584            assert_eq!(Some(false), atom_iter.next());
585            assert_eq!(Some(true), atom_iter.next());
586            assert_eq!(Some(false), atom_iter.next());
587
588            assert_eq!(Some(false), atom_iter.next());
589            assert_eq!(Some(false), atom_iter.next());
590            assert_eq!(Some(false), atom_iter.next());
591            assert_eq!(Some(false), atom_iter.next());
592
593            assert_eq!(Some(false), atom_iter.next());
594            assert_eq!(Some(false), atom_iter.next());
595            assert_eq!(Some(false), atom_iter.next());
596            assert_eq!(Some(false), atom_iter.next());
597
598            assert_eq!(Some(true), atom_iter.next());
599            assert_eq!(Some(true), atom_iter.next());
600            assert_eq!(Some(true), atom_iter.next());
601            assert_eq!(Some(true), atom_iter.next());
602
603            assert_eq!(Some(false), atom_iter.next());
604            assert_eq!(Some(true), atom_iter.next());
605
606            assert_eq!(None, atom_iter.next());
607        }
608    }
609
610    #[test]
611    fn partial_eq() {
612        {
613            let lh = Atom::from("The Importance of Being Ernest");
614            let rh = Atom::from("The Importance of Being Ernest");
615            assert_eq!(lh, rh);
616        }
617
618        {
619            let lh = Atom::from("Oh, to be a glove");
620            let rh = Atom::from("upon that hand.");
621            assert_ne!(lh, rh);
622        }
623
624        {
625            let string = "hello";
626            let atom = Atom::from(string);
627            assert_eq!(atom, string);
628        }
629
630        {
631            let atom = Atom::from("hello");
632            assert_ne!(atom, "goodbye");
633        }
634
635        {
636            macro_rules! uint_eq_test {
637                ($uint:expr) => {
638                    let atom = Atom::from($uint);
639                    assert_eq!(atom, $uint);
640                };
641            }
642
643            uint_eq_test!(0u8);
644            uint_eq_test!(107u8);
645            uint_eq_test!(16_000u16);
646            uint_eq_test!(949_543_111u32);
647            uint_eq_test!(184_884_819u64);
648            uint_eq_test!(19_595_184_881_994_188_181u128);
649            uint_eq_test!(10_101_044_481_818usize);
650        }
651
652        {
653            macro_rules! uint_ne_test {
654                ($atom:expr, $uint:expr) => {
655                    let atom = Atom::from($atom);
656                    assert_ne!(atom, $uint);
657                };
658            }
659
660            uint_ne_test!(97u8, 103u8);
661            uint_ne_test!(98u8, 64_222u16);
662            uint_ne_test!(99u8, 777_919_400u32);
663            uint_ne_test!(100u8, 881_944_000_887u64);
664            uint_ne_test!(881_944_000_887u64, 21_601_185_860_100_176_183u128);
665            uint_ne_test!(64_222u16, 127usize);
666        }
667    }
668
669    #[test]
670    fn format_aura() {
671        let atom = Atom::from(123434910u64);
672        let aura = Aura::U;
673        let s = atom.format_aura(aura).unwrap();
674        assert_eq!(s, "123.434.910");
675        let atom = Atom::from(1234u64);
676        let aura = Aura::Ud;
677        let s = atom.format_aura(aura).unwrap();
678        assert_eq!(s, "1.234");
679        let atom = Atom::from(1234u64);
680        let aura = Aura::Ud;
681        let s = atom.format_aura(aura).unwrap();
682        assert_eq!(s, "1.234");
683    }
684}