smol_symbol/
lib.rs

1//! # smol-symbol 💠
2//!
3//! [![Crates.io](https://img.shields.io/crates/v/smol-symbol)](https://crates.io/crates/smol-symbol)
4//! [![docs.rs](https://img.shields.io/docsrs/smol-symbol?label=docs)](https://docs.rs/smol-symbol/latest/smol_symbol/)
5//! [![Build Status](https://img.shields.io/github/actions/workflow/status/sam0x17/smol-symbol/ci.yaml)](https://github.com/sam0x17/smol-symbol/actions/workflows/ci.yaml?query=branch%3Amain)
6//! [![MIT License](https://img.shields.io/github/license/sam0x17/smol-symbol)](https://github.com/sam0x17/smol-symbol/blob/main/LICENSE)
7//!
8//! This crate provides the ability to create globally unique (per input value),
9//! human-readable [`Symbol`]s at compile-time as well as at run-time, that are meant to be
10//! reminiscent of the `Symbol` type in the Crystal programming language.
11//!
12//! Where this crate differs is the alphabet and length of our [`Symbol`] is a bit more
13//! restrictive, allowing us to encode the entire text of each [`Symbol`] as a [`u128`]
14//! internally. The only caveat is we are limited to 25 characters of length and an alphabet
15//! consisting of lowercase a-z as well as `_`.
16//!
17//! The [`Symbol`] type can be created at compile-time using the convenient [`s!`] macro, and
18//! can also be created using the [`From<Into<String>>`] impl at runtime, though this is not as
19//! efficient as using the [`s!`] macro.
20//!
21//! The [`Symbol`] type can also be turned into a [`String`] via a convenient [`Into<String>`].
22//!
23//! We also provide the ability to define custom alphabets that use the more general
24//! [`CustomSymbol`] type via a handy [`custom_alphabet!`] macro, allowing you to alter these
25//! restrictions directly (smaller alphabet = larger max length for a symbol) and add support
26//! for other languages or less restrictive character sets. The only invariant that can't be
27//! customized at the moment is [`CustomSymbol`] will always use a [`u128`] as its backing data
28//! store.
29//!
30//! ### Example
31#![doc = docify::embed_run!("tests/tests.rs", symbol_type_example)]
32//!
33//! See the docs for [`Symbol`] and [`s!`] for more detailed information.
34
35#![no_std]
36
37#[cfg(all(doc, feature = "generate-readme"))]
38docify::compile_markdown!("README.docify.md", "README.md");
39
40extern crate alloc;
41
42use alloc::string::String;
43use core::{
44    fmt::{Debug, Display, Formatter, Result},
45    hash::Hash,
46    marker::PhantomData,
47};
48
49pub use smol_symbol_macros::*;
50
51/// A compact representation for a (maximum of) 25-character identifier consisting of only
52/// lowercase a-z as well as `_`. Internally this data is converted to a [`u128`], allowing for
53/// trivial comparison operations between symbols.
54///
55/// [`Symbol`]s can be created _at compile time_ using the powerful [`s!`] macro. This is the
56/// preferred way of creating symbols as it incurs zero overhead at runtime.
57///
58/// [`Symbol`]s can also be created at runtime, albeit slower than using the [`s!`] macro, via
59/// a convenient [`From<AsRef<str>>`] impl on [`Symbol`].
60///
61/// The [`Symbol`] struct itself impls many useful traits, including [`Copy`], [`Clone`],
62/// [`Eq`], [`Ord`], [`Hash`], [`Display`], [`Debug`], [`Send`], and [`Sync`], allowing for a
63/// variety of scenarios and use-cases.
64///
65/// ### Example
66#[doc = docify::embed_run!("tests/tests.rs", test_basics)]
67pub type Symbol = CustomSymbol<{ DefaultAlphabet::LEN }, DefaultAlphabet>;
68
69/// Represents a custom alphabet for use with [`CustomSymbol`]. To create one of these you
70/// should use the [`custom_alphabet!`] macro, as there are several functions you need to
71/// define in addition to implementing the trait.
72pub trait Alphabet<const N: usize>: Copy + Clone + PartialEq + Eq {
73    /// An array of [`char`]'s representing the raw UTF-8 characters that are allowed in this
74    /// [`Alphabet`]. All characters in this array should be unique and should be valid
75    /// characters that could appear in an [identifier](https://doc.rust-lang.org/reference/identifiers.html).
76    const ALPHABET: [char; N];
77
78    /// Auto-generated constant that provides easy access to the size/length of this [`Alphabet`].
79    const LEN: usize = N;
80
81    /// Auto-generated constant that provides easy access to the size/length of this
82    /// [`Alphabet`] as a [`u128`], for performance reasons.
83    const LEN_U218: u128 = Self::LEN as u128;
84
85    /// Auto-generated constant that determines the maximum length a [`CustomSymbol`] using
86    /// this [`Alphabet`] could be, based on the number of bits used per symbol character.
87    const MAX_SYMBOL_LEN: usize = 128 / ceil_log2(Self::LEN + 1);
88
89    /// Returns the 1-based (0 is reserved) index of this [`char`] in this [`Alphabet`]. An
90    /// automatic implementation of this is provided by the [`custom_alphabet!`] macro.
91    fn invert_char(c: char) -> core::result::Result<u128, SymbolParsingError>;
92}
93
94custom_alphabet!(DefaultAlphabet, abcdefghijklmnopqrstuvwxyz_);
95
96/// The base type used for [`Symbol`] and any custom [`Alphabet`]'s that have been created
97/// using [`custom_alphabet!`].
98///
99/// Typically to create a [`Symbol`] or [`CustomSymbol`], you will want to use the [`s!`] macro.
100#[derive(Copy, Clone)]
101#[repr(transparent)]
102pub struct CustomSymbol<const N: usize, A: Alphabet<N>> {
103    _alphabet: PhantomData<A>,
104    data: u128,
105}
106
107impl<const N: usize, A: Alphabet<N>> CustomSymbol<N, A> {
108    /// Used internally by the [`s!`] macro to create a [`Symbol`] or [`CustomSymbol`] from a
109    /// raw [`u128`] generated by the macro's interaction with some const fns.
110    pub const fn from_raw(data: u128) -> Self {
111        CustomSymbol {
112            _alphabet: PhantomData,
113            data,
114        }
115    }
116
117    /// Converts this [`Symbol`] or [`CustomSymbol`] into a human-readable [`String`]
118    /// representation. This is only possible because the [`u128`] used as the backing for
119    /// [`CustomSymbol`] encodes all bits of information for each character in the
120    /// [`CustomSymbol`].
121    pub fn name(&self) -> String {
122        let mut rem = self.data;
123        let char_size = (A::ALPHABET.len() + 1) as u128;
124        let mut result = String::with_capacity(A::MAX_SYMBOL_LEN);
125        while rem != 0 {
126            let it = rem % char_size;
127            rem -= it;
128            rem /= char_size;
129            result.push(A::ALPHABET[it as usize - 1]);
130        }
131        result
132    }
133}
134
135impl<const N: usize, A: Alphabet<N>> PartialEq for CustomSymbol<N, A> {
136    fn eq(&self, other: &Self) -> bool {
137        self.data == other.data
138    }
139}
140impl<const N: usize, A: Alphabet<N>> Eq for CustomSymbol<N, A> {}
141impl<const N: usize, A: Alphabet<N>> Hash for CustomSymbol<N, A> {
142    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
143        self.data.hash(state);
144    }
145}
146impl<const N: usize, A: Alphabet<N>> PartialOrd for CustomSymbol<N, A> {
147    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
148        Some(self.cmp(other))
149    }
150}
151impl<const N: usize, A: Alphabet<N>> Ord for CustomSymbol<N, A> {
152    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
153        self.data.cmp(&other.data)
154    }
155}
156
157impl<const N: usize, A: Alphabet<N>> From<CustomSymbol<N, A>> for u128 {
158    fn from(value: CustomSymbol<N, A>) -> Self {
159        value.data
160    }
161}
162
163/// Thrown when an attempt was made to parse an invalid [`CustomSymbol`] / [`Symbol`]. This can
164/// occur when the underlying ident or string is too long, too short, or contains invalid
165/// character (characters not in the specified [`Alphabet`]).
166pub struct SymbolParsingError;
167
168pub const PARSING_ERROR_MSG: &str =
169    "To be a valid `Symbol` or `CustomSymbol`, the provided ident or string must be at least one \
170    character long, at most `Alphabet::MAX_SYMBOL_LEN` characters long, and consist only of \
171    characters that are included in the `Alphabet`. No other characters are permitted, nor is \
172    whitespace of any kind.";
173
174impl Debug for SymbolParsingError {
175    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
176        f.write_str(PARSING_ERROR_MSG)
177    }
178}
179
180impl<const N: usize, A: Alphabet<N>> TryFrom<&str> for CustomSymbol<N, A> {
181    type Error = SymbolParsingError;
182
183    /// Attempts to interpret the provided string as a valid [`Symbol`] / [`CustomSymbol`]. The usual parsing
184    /// rules for [`CustomSymbol`] apply, namely:
185    /// - At least one character
186    /// - At most `Alphabet::MAX_SYMBOL_LEN` characters
187    /// - Only characters that are contained in the [`Alphabet`].
188    ///
189    /// If any of these requirements are violated, a generic [`SymbolParsingError`] is returned
190    /// and parsing will abort.
191    fn try_from(value: &str) -> core::result::Result<Self, Self::Error> {
192        if value.is_empty() || value.len() > A::MAX_SYMBOL_LEN {
193            return Err(SymbolParsingError {});
194        }
195        let mut data: u128 = 0;
196        for c in value.chars().rev() {
197            data *= A::LEN_U218 + 1;
198            data += A::invert_char(c)?;
199        }
200        Ok(CustomSymbol {
201            _alphabet: PhantomData,
202            data,
203        })
204    }
205}
206
207impl<const N: usize, A: Alphabet<N>> TryFrom<String> for CustomSymbol<N, A> {
208    type Error = SymbolParsingError;
209
210    fn try_from(value: String) -> core::result::Result<Self, Self::Error> {
211        CustomSymbol::try_from(value.as_str())
212    }
213}
214
215impl<const N: usize, A: Alphabet<N>> TryFrom<&String> for CustomSymbol<N, A> {
216    type Error = SymbolParsingError;
217
218    fn try_from(value: &String) -> core::result::Result<Self, Self::Error> {
219        CustomSymbol::try_from(value.as_str())
220    }
221}
222
223impl<const N: usize, A: Alphabet<N>> From<CustomSymbol<N, A>> for String {
224    fn from(value: CustomSymbol<N, A>) -> Self {
225        value.name()
226    }
227}
228
229impl<const N: usize, A: Alphabet<N>> From<&CustomSymbol<N, A>> for String {
230    fn from(value: &CustomSymbol<N, A>) -> Self {
231        (*value).into()
232    }
233}
234
235impl<const N: usize, A: Alphabet<N>> Debug for CustomSymbol<N, A> {
236    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
237        f.debug_struct("Symbol")
238            .field("data", &self.data)
239            .field("symbol", &String::from(*self))
240            .finish()
241    }
242}
243
244impl<const N: usize, A: Alphabet<N>> Display for CustomSymbol<N, A> {
245    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
246        let value: String = self.name();
247        f.write_str(&value)
248    }
249}
250
251/// Internal function used to calculate the `ceil(log2(x))` when determining the
252/// `MAX_SYMBOL_LEN` of an [`Alphabet`].
253const fn ceil_log2(x: usize) -> usize {
254    let mut n = x;
255    let mut log = 0;
256    while n > 1 {
257        n = (n + 1) / 2; // ceil division
258        log += 1;
259    }
260    log
261}