smol_symbol_macros/lib.rs
1//! This crate houses the [`s!`] macro, used to create `Symbol` / `CustomSymbol` instances at
2//! const-eval time from a provided ident and (if applicable) `Alphabet`.
3
4use derive_syn_parse::Parse;
5use proc_macro::TokenStream;
6use quote::quote;
7use syn::{parse_macro_input, parse_quote, Ident, Token, TypePath};
8
9#[derive(Parse)]
10struct SymbolInput {
11 ident: Ident,
12 _comma: Option<Token![,]>,
13 #[parse_if(_comma.is_some())]
14 alphabet_path: Option<TypePath>,
15}
16
17/// Generates a `Symbol` or `CustomSymbol` at const-eval time based on the provided ident and
18/// (optional) path to a custom `Alphabet`., e.g.:
19///
20/// ```ignore
21/// let my_sym = s!(hello_world); // uses Symbol / DefaultAlphabet
22/// let my_custom_sym = s!(OtHeR, MyCustomAlphabet); // uses the custom alphabet `MyCustomAlphabet`
23/// ```
24///
25/// Your symbol ident should be constrained to a minimum of one character and should be no
26/// longer than the `MAX_SYMBOL_LEN` for your chosen alphabet (this is 25 for `DefaultAlphabet`).
27///
28/// At runtime, each unique`Symbol` is represented internally as a unique [`u128`] that encodes
29/// the bits of the symbol (5 bits per character when using `DefaultAlphabet`), and enough
30/// information is preserved in this representation that the [`u128`] can be converted back
31/// into a [`String`] during at runtime, if desired. In other words, encoding your symbol as a
32/// [`u128`] is a non-destructive action that can be reversed.
33///
34/// These are great for scenarios where you need a human-readable globally unique identifier.
35/// The `Symbol` / `CustomSymbol` type is intended to be very loosely similar to the `Symbol`
36/// type in the Crystal programming language, though it is strictly much more powerful, with
37/// the additional capability that `Symbol`s can be created and runtime in addition to
38/// compile-time, and can be directly sorted, hashed, etc., in lexically consistent way.
39#[proc_macro]
40pub fn s(tokens: TokenStream) -> TokenStream {
41 let input = parse_macro_input!(tokens as SymbolInput);
42 let ident = input.ident.to_string();
43 let chars = ident.chars();
44 let alphabet_path = input
45 .alphabet_path
46 .unwrap_or_else(|| parse_quote!(::smol_symbol::DefaultAlphabet));
47 quote! {
48 #alphabet_path::parse_chars_panic(&[#(#chars),*])
49 }
50 .into()
51}
52
53/// Used to parse input to [`custom_alphabet`].
54#[derive(Parse)]
55struct CustomAlphabetInput {
56 name: Ident,
57 _comma: Token![,],
58 alphabet: Ident,
59}
60
61/// Allows you to define a custom alphabet for use with `CustomSymbol` and the [`s!`] macro.
62/// The macro takes two idents separated by a comma as input. The first ident should be the
63/// name of the alphabet you would like to create, and the second ident should contain all of
64/// the characters you would like to use in your alphabet (symbols must be comprised only of
65/// characters that are valid in an
66/// [ident](https://doc.rust-lang.org/reference/identifiers.html).
67///
68/// For example, this would define `MyAlphabet` to consist of uppercase A-Z, lowercase a-z, and
69/// digits, and would have a resulting `MAX_SYMBOL_LEN` of 21 characters long:
70///
71/// ```ignore
72/// custom_alphabet!(MyAlphabet, abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789);
73///
74/// let my_sym = s!(SoMeThInG33, MyAlphabet);
75/// ```
76///
77/// It is worth noting that in general, the longer an alphabet is, the lower the
78/// `MAX_SYMBOL_LEN` bound will be for that alphabet, since a [`u128`] is always used as the
79/// backing for `CustomSymbol`.
80#[proc_macro]
81pub fn custom_alphabet(tokens: TokenStream) -> TokenStream {
82 let crate_path = match std::env::var("CARGO_PKG_NAME") {
83 Ok(crate_path) => match crate_path.as_str() {
84 "smol-symbol" => quote!(crate),
85 _ => quote!(::smol_symbol),
86 },
87 _ => quote!(::smol_symbol),
88 };
89 let input = parse_macro_input!(tokens as CustomAlphabetInput);
90 let name = input.name;
91 let alphabet = input.alphabet.to_string().chars().collect::<Vec<char>>();
92 let alphabet_len = alphabet.len();
93 let alphabet_map_u128 = alphabet.iter().enumerate().map(|(i, c)| {
94 let i = i + 1;
95 let i = i as u128;
96 quote!(#c => #i)
97 });
98 let alphabet_map_u128_clone = alphabet_map_u128.clone();
99 quote! {
100 #[derive(Copy, Clone, PartialEq, Eq)]
101 pub struct #name;
102
103 impl #crate_path::Alphabet<#alphabet_len> for #name {
104 const ALPHABET: [char; #alphabet_len] = [#(#alphabet),*];
105
106 fn invert_char(c: char) -> core::result::Result<u128, #crate_path::SymbolParsingError> {
107 let i = match c {
108 #(#alphabet_map_u128),*,
109 _ => return Err(#crate_path::SymbolParsingError),
110 };
111 Ok(i as u128)
112 }
113 }
114
115 impl #name {
116 pub const fn invert_char(c: char) -> core::result::Result<u128, #crate_path::SymbolParsingError> {
117 let i = match c {
118 #(#alphabet_map_u128_clone),*,
119 _ => return Err(#crate_path::SymbolParsingError),
120 };
121 Ok(i as u128)
122 }
123
124 pub const fn parse_chars(chars: &[char]) -> core::result::Result<
125 #crate_path::CustomSymbol<#alphabet_len, #name>,
126 #crate_path::SymbolParsingError
127 > {
128 let mut i = chars.len() - 1;
129 let mut data: u128 = 0;
130 loop {
131 let c = chars[i];
132 let inverted = Self::invert_char(c);
133 data *= #name::LEN_U218 + 1;
134 data += match inverted {
135 Ok(val) => val,
136 Err(err) => return Err(err),
137 };
138 if i == 0 {
139 break;
140 }
141 i -= 1;
142 }
143 Ok(#crate_path::CustomSymbol::from_raw(data))
144 }
145
146 pub const fn parse_chars_panic(chars: &[char]) -> #crate_path::CustomSymbol<#alphabet_len, #name> {
147 match Self::parse_chars(chars) {
148 Ok(sym) => sym,
149 Err(err) => panic!("{}", #crate_path::PARSING_ERROR_MSG),
150 }
151 }
152 }
153 }
154 .into()
155}