smol_symbol_macros/
lib.rs

1//! This crate houses the [`s!`] macro, used to create `Symbol` / `CustomSymbol` instances at
2//! const-eval time from a provided ident and (if applicable) `Alphabet`.
3
4use derive_syn_parse::Parse;
5use proc_macro::TokenStream;
6use quote::quote;
7use syn::{parse_macro_input, parse_quote, Ident, Token, TypePath};
8
9#[derive(Parse)]
10struct SymbolInput {
11    ident: Ident,
12    _comma: Option<Token![,]>,
13    #[parse_if(_comma.is_some())]
14    alphabet_path: Option<TypePath>,
15}
16
17/// Generates a `Symbol` or `CustomSymbol` at const-eval time based on the provided ident and
18/// (optional) path to a custom `Alphabet`., e.g.:
19///
20/// ```ignore
21/// let my_sym = s!(hello_world); // uses Symbol / DefaultAlphabet
22/// let my_custom_sym = s!(OtHeR, MyCustomAlphabet); // uses the custom alphabet `MyCustomAlphabet`
23/// ```
24///
25/// Your symbol ident should be constrained to a minimum of one character and should be no
26/// longer than the `MAX_SYMBOL_LEN` for your chosen alphabet (this is 25 for `DefaultAlphabet`).
27///
28/// At runtime, each unique`Symbol` is represented internally as a unique [`u128`] that encodes
29/// the bits of the symbol (5 bits per character when using `DefaultAlphabet`), and enough
30/// information is preserved in this representation that the [`u128`] can be converted back
31/// into a [`String`] during at runtime, if desired. In other words, encoding your symbol as a
32/// [`u128`] is a non-destructive action that can be reversed.
33///
34/// These are great for scenarios where you need a human-readable globally unique identifier.
35/// The `Symbol` / `CustomSymbol` type is intended to be very loosely similar to the `Symbol`
36/// type in the Crystal programming language, though it is strictly much more powerful, with
37/// the additional capability that `Symbol`s can be created and runtime in addition to
38/// compile-time, and can be directly sorted, hashed, etc., in lexically consistent way.
39#[proc_macro]
40pub fn s(tokens: TokenStream) -> TokenStream {
41    let input = parse_macro_input!(tokens as SymbolInput);
42    let ident = input.ident.to_string();
43    let chars = ident.chars();
44    let alphabet_path = input
45        .alphabet_path
46        .unwrap_or_else(|| parse_quote!(::smol_symbol::DefaultAlphabet));
47    quote! {
48        #alphabet_path::parse_chars_panic(&[#(#chars),*])
49    }
50    .into()
51}
52
53/// Used to parse input to [`custom_alphabet`].
54#[derive(Parse)]
55struct CustomAlphabetInput {
56    name: Ident,
57    _comma: Token![,],
58    alphabet: Ident,
59}
60
61/// Allows you to define a custom alphabet for use with `CustomSymbol` and the [`s!`] macro.
62/// The macro takes two idents separated by a comma as input. The first ident should be the
63/// name of the alphabet you would like to create, and the second ident should contain all of
64/// the characters you would like to use in your alphabet (symbols must be comprised only of
65/// characters that are valid in an
66/// [ident](https://doc.rust-lang.org/reference/identifiers.html).
67///
68/// For example, this would define `MyAlphabet` to consist of uppercase A-Z, lowercase a-z, and
69/// digits, and would have a resulting `MAX_SYMBOL_LEN` of 21 characters long:
70///
71/// ```ignore
72/// custom_alphabet!(MyAlphabet, abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789);
73///
74/// let my_sym = s!(SoMeThInG33, MyAlphabet);
75/// ```
76///
77/// It is worth noting that in general, the longer an alphabet is, the lower the
78/// `MAX_SYMBOL_LEN` bound will be for that alphabet, since a [`u128`] is always used as the
79/// backing for `CustomSymbol`.
80#[proc_macro]
81pub fn custom_alphabet(tokens: TokenStream) -> TokenStream {
82    let crate_path = match std::env::var("CARGO_PKG_NAME") {
83        Ok(crate_path) => match crate_path.as_str() {
84            "smol-symbol" => quote!(crate),
85            _ => quote!(::smol_symbol),
86        },
87        _ => quote!(::smol_symbol),
88    };
89    let input = parse_macro_input!(tokens as CustomAlphabetInput);
90    let name = input.name;
91    let alphabet = input.alphabet.to_string().chars().collect::<Vec<char>>();
92    let alphabet_len = alphabet.len();
93    let alphabet_map_u128 = alphabet.iter().enumerate().map(|(i, c)| {
94        let i = i + 1;
95        let i = i as u128;
96        quote!(#c => #i)
97    });
98    let alphabet_map_u128_clone = alphabet_map_u128.clone();
99    quote! {
100        #[derive(Copy, Clone, PartialEq, Eq)]
101        pub struct #name;
102
103        impl #crate_path::Alphabet<#alphabet_len> for #name {
104            const ALPHABET: [char; #alphabet_len] = [#(#alphabet),*];
105
106            fn invert_char(c: char) -> core::result::Result<u128, #crate_path::SymbolParsingError> {
107                let i = match c {
108                    #(#alphabet_map_u128),*,
109                    _ => return Err(#crate_path::SymbolParsingError),
110                };
111                Ok(i as u128)
112            }
113        }
114
115        impl #name {
116            pub const fn invert_char(c: char) -> core::result::Result<u128, #crate_path::SymbolParsingError> {
117                let i = match c {
118                    #(#alphabet_map_u128_clone),*,
119                    _ => return Err(#crate_path::SymbolParsingError),
120                };
121                Ok(i as u128)
122            }
123
124            pub const fn parse_chars(chars: &[char]) -> core::result::Result<
125                #crate_path::CustomSymbol<#alphabet_len, #name>,
126                #crate_path::SymbolParsingError
127            > {
128                let mut i = chars.len() - 1;
129                let mut data: u128 = 0;
130                loop {
131                    let c = chars[i];
132                    let inverted = Self::invert_char(c);
133                    data *= #name::LEN_U218 + 1;
134                    data += match inverted {
135                        Ok(val) => val,
136                        Err(err) => return Err(err),
137                    };
138                    if i == 0 {
139                        break;
140                    }
141                    i -= 1;
142                }
143                Ok(#crate_path::CustomSymbol::from_raw(data))
144            }
145
146            pub const fn parse_chars_panic(chars: &[char]) -> #crate_path::CustomSymbol<#alphabet_len, #name> {
147                match Self::parse_chars(chars) {
148                    Ok(sym) => sym,
149                    Err(err) => panic!("{}", #crate_path::PARSING_ERROR_MSG),
150                }
151            }
152        }
153    }
154    .into()
155}