smol_symbol/lib.rs
1//! # smol-symbol ðŸ’
2//!
3//! [](https://crates.io/crates/smol-symbol)
4//! [](https://docs.rs/smol-symbol/latest/smol_symbol/)
5//! [](https://github.com/sam0x17/smol-symbol/actions/workflows/ci.yaml?query=branch%3Amain)
6//! [](https://github.com/sam0x17/smol-symbol/blob/main/LICENSE)
7//!
8//! This crate provides the ability to create globally unique (per input value),
9//! human-readable [`Symbol`]s at compile-time as well as at run-time, that are meant to be
10//! reminiscent of the `Symbol` type in the Crystal programming language.
11//!
12//! Where this crate differs is the alphabet and length of our [`Symbol`] is a bit more
13//! restrictive, allowing us to encode the entire text of each [`Symbol`] as a [`u128`]
14//! internally. The only caveat is we are limited to 25 characters of length and an alphabet
15//! consisting of lowercase a-z as well as `_`.
16//!
17//! The [`Symbol`] type can be created at compile-time using the convenient [`s!`] macro, and
18//! can also be created using the [`From<Into<String>>`] impl at runtime, though this is not as
19//! efficient as using the [`s!`] macro.
20//!
21//! The [`Symbol`] type can also be turned into a [`String`] via a convenient [`Into<String>`].
22//!
23//! We also provide the ability to define custom alphabets that use the more general
24//! [`CustomSymbol`] type via a handy [`custom_alphabet!`] macro, allowing you to alter these
25//! restrictions directly (smaller alphabet = larger max length for a symbol) and add support
26//! for other languages or less restrictive character sets. The only invariant that can't be
27//! customized at the moment is [`CustomSymbol`] will always use a [`u128`] as its backing data
28//! store.
29//!
30//! ### Example
31#![doc = docify::embed_run!("tests/tests.rs", symbol_type_example)]
32//!
33//! See the docs for [`Symbol`] and [`s!`] for more detailed information.
34
35#![no_std]
36
37#[cfg(all(doc, feature = "generate-readme"))]
38docify::compile_markdown!("README.docify.md", "README.md");
39
40extern crate alloc;
41
42use alloc::string::String;
43use core::{
44 fmt::{Debug, Display, Formatter, Result},
45 hash::Hash,
46 marker::PhantomData,
47};
48
49pub use smol_symbol_macros::*;
50
51/// A compact representation for a (maximum of) 25-character identifier consisting of only
52/// lowercase a-z as well as `_`. Internally this data is converted to a [`u128`], allowing for
53/// trivial comparison operations between symbols.
54///
55/// [`Symbol`]s can be created _at compile time_ using the powerful [`s!`] macro. This is the
56/// preferred way of creating symbols as it incurs zero overhead at runtime.
57///
58/// [`Symbol`]s can also be created at runtime, albeit slower than using the [`s!`] macro, via
59/// a convenient [`From<AsRef<str>>`] impl on [`Symbol`].
60///
61/// The [`Symbol`] struct itself impls many useful traits, including [`Copy`], [`Clone`],
62/// [`Eq`], [`Ord`], [`Hash`], [`Display`], [`Debug`], [`Send`], and [`Sync`], allowing for a
63/// variety of scenarios and use-cases.
64///
65/// ### Example
66#[doc = docify::embed_run!("tests/tests.rs", test_basics)]
67pub type Symbol = CustomSymbol<{ DefaultAlphabet::LEN }, DefaultAlphabet>;
68
69/// Represents a custom alphabet for use with [`CustomSymbol`]. To create one of these you
70/// should use the [`custom_alphabet!`] macro, as there are several functions you need to
71/// define in addition to implementing the trait.
72pub trait Alphabet<const N: usize>: Copy + Clone + PartialEq + Eq {
73 /// An array of [`char`]'s representing the raw UTF-8 characters that are allowed in this
74 /// [`Alphabet`]. All characters in this array should be unique and should be valid
75 /// characters that could appear in an [identifier](https://doc.rust-lang.org/reference/identifiers.html).
76 const ALPHABET: [char; N];
77
78 /// Auto-generated constant that provides easy access to the size/length of this [`Alphabet`].
79 const LEN: usize = N;
80
81 /// Auto-generated constant that provides easy access to the size/length of this
82 /// [`Alphabet`] as a [`u128`], for performance reasons.
83 const LEN_U218: u128 = Self::LEN as u128;
84
85 /// Auto-generated constant that determines the maximum length a [`CustomSymbol`] using
86 /// this [`Alphabet`] could be, based on the number of bits used per symbol character.
87 const MAX_SYMBOL_LEN: usize = 128 / ceil_log2(Self::LEN + 1);
88
89 /// Returns the 1-based (0 is reserved) index of this [`char`] in this [`Alphabet`]. An
90 /// automatic implementation of this is provided by the [`custom_alphabet!`] macro.
91 fn invert_char(c: char) -> core::result::Result<u128, SymbolParsingError>;
92}
93
94custom_alphabet!(DefaultAlphabet, abcdefghijklmnopqrstuvwxyz_);
95
96/// The base type used for [`Symbol`] and any custom [`Alphabet`]'s that have been created
97/// using [`custom_alphabet!`].
98///
99/// Typically to create a [`Symbol`] or [`CustomSymbol`], you will want to use the [`s!`] macro.
100#[derive(Copy, Clone)]
101#[repr(transparent)]
102pub struct CustomSymbol<const N: usize, A: Alphabet<N>> {
103 _alphabet: PhantomData<A>,
104 data: u128,
105}
106
107impl<const N: usize, A: Alphabet<N>> CustomSymbol<N, A> {
108 /// Used internally by the [`s!`] macro to create a [`Symbol`] or [`CustomSymbol`] from a
109 /// raw [`u128`] generated by the macro's interaction with some const fns.
110 pub const fn from_raw(data: u128) -> Self {
111 CustomSymbol {
112 _alphabet: PhantomData,
113 data,
114 }
115 }
116
117 /// Converts this [`Symbol`] or [`CustomSymbol`] into a human-readable [`String`]
118 /// representation. This is only possible because the [`u128`] used as the backing for
119 /// [`CustomSymbol`] encodes all bits of information for each character in the
120 /// [`CustomSymbol`].
121 pub fn name(&self) -> String {
122 let mut rem = self.data;
123 let char_size = (A::ALPHABET.len() + 1) as u128;
124 let mut result = String::with_capacity(A::MAX_SYMBOL_LEN);
125 while rem != 0 {
126 let it = rem % char_size;
127 rem -= it;
128 rem /= char_size;
129 result.push(A::ALPHABET[it as usize - 1]);
130 }
131 result
132 }
133}
134
135impl<const N: usize, A: Alphabet<N>> PartialEq for CustomSymbol<N, A> {
136 fn eq(&self, other: &Self) -> bool {
137 self.data == other.data
138 }
139}
140impl<const N: usize, A: Alphabet<N>> Eq for CustomSymbol<N, A> {}
141impl<const N: usize, A: Alphabet<N>> Hash for CustomSymbol<N, A> {
142 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
143 self.data.hash(state);
144 }
145}
146impl<const N: usize, A: Alphabet<N>> PartialOrd for CustomSymbol<N, A> {
147 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
148 Some(self.cmp(other))
149 }
150}
151impl<const N: usize, A: Alphabet<N>> Ord for CustomSymbol<N, A> {
152 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
153 self.data.cmp(&other.data)
154 }
155}
156
157impl<const N: usize, A: Alphabet<N>> From<CustomSymbol<N, A>> for u128 {
158 fn from(value: CustomSymbol<N, A>) -> Self {
159 value.data
160 }
161}
162
163/// Thrown when an attempt was made to parse an invalid [`CustomSymbol`] / [`Symbol`]. This can
164/// occur when the underlying ident or string is too long, too short, or contains invalid
165/// character (characters not in the specified [`Alphabet`]).
166pub struct SymbolParsingError;
167
168pub const PARSING_ERROR_MSG: &str =
169 "To be a valid `Symbol` or `CustomSymbol`, the provided ident or string must be at least one \
170 character long, at most `Alphabet::MAX_SYMBOL_LEN` characters long, and consist only of \
171 characters that are included in the `Alphabet`. No other characters are permitted, nor is \
172 whitespace of any kind.";
173
174impl Debug for SymbolParsingError {
175 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
176 f.write_str(PARSING_ERROR_MSG)
177 }
178}
179
180impl<const N: usize, A: Alphabet<N>> TryFrom<&str> for CustomSymbol<N, A> {
181 type Error = SymbolParsingError;
182
183 /// Attempts to interpret the provided string as a valid [`Symbol`] / [`CustomSymbol`]. The usual parsing
184 /// rules for [`CustomSymbol`] apply, namely:
185 /// - At least one character
186 /// - At most `Alphabet::MAX_SYMBOL_LEN` characters
187 /// - Only characters that are contained in the [`Alphabet`].
188 ///
189 /// If any of these requirements are violated, a generic [`SymbolParsingError`] is returned
190 /// and parsing will abort.
191 fn try_from(value: &str) -> core::result::Result<Self, Self::Error> {
192 if value.is_empty() || value.len() > A::MAX_SYMBOL_LEN {
193 return Err(SymbolParsingError {});
194 }
195 let mut data: u128 = 0;
196 for c in value.chars().rev() {
197 data *= A::LEN_U218 + 1;
198 data += A::invert_char(c)?;
199 }
200 Ok(CustomSymbol {
201 _alphabet: PhantomData,
202 data,
203 })
204 }
205}
206
207impl<const N: usize, A: Alphabet<N>> TryFrom<String> for CustomSymbol<N, A> {
208 type Error = SymbolParsingError;
209
210 fn try_from(value: String) -> core::result::Result<Self, Self::Error> {
211 CustomSymbol::try_from(value.as_str())
212 }
213}
214
215impl<const N: usize, A: Alphabet<N>> TryFrom<&String> for CustomSymbol<N, A> {
216 type Error = SymbolParsingError;
217
218 fn try_from(value: &String) -> core::result::Result<Self, Self::Error> {
219 CustomSymbol::try_from(value.as_str())
220 }
221}
222
223impl<const N: usize, A: Alphabet<N>> From<CustomSymbol<N, A>> for String {
224 fn from(value: CustomSymbol<N, A>) -> Self {
225 value.name()
226 }
227}
228
229impl<const N: usize, A: Alphabet<N>> From<&CustomSymbol<N, A>> for String {
230 fn from(value: &CustomSymbol<N, A>) -> Self {
231 (*value).into()
232 }
233}
234
235impl<const N: usize, A: Alphabet<N>> Debug for CustomSymbol<N, A> {
236 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
237 f.debug_struct("Symbol")
238 .field("data", &self.data)
239 .field("symbol", &String::from(*self))
240 .finish()
241 }
242}
243
244impl<const N: usize, A: Alphabet<N>> Display for CustomSymbol<N, A> {
245 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
246 let value: String = self.name();
247 f.write_str(&value)
248 }
249}
250
251/// Internal function used to calculate the `ceil(log2(x))` when determining the
252/// `MAX_SYMBOL_LEN` of an [`Alphabet`].
253const fn ceil_log2(x: usize) -> usize {
254 let mut n = x;
255 let mut log = 0;
256 while n > 1 {
257 n = (n + 1) / 2; // ceil division
258 log += 1;
259 }
260 log
261}