miden_assembly/ast/
ident.rs

1use alloc::{string::ToString, sync::Arc};
2use core::{
3    fmt,
4    hash::{Hash, Hasher},
5    str::FromStr,
6};
7
8use crate::{SourceSpan, Span, Spanned};
9
10/// Represents the types of errors that can occur when parsing/validating an [Ident]
11#[derive(Debug, thiserror::Error)]
12pub enum IdentError {
13    #[error("invalid identifier: cannot be empty")]
14    Empty,
15    #[error(
16        "invalid identifier '{ident}': must contain only unicode alphanumeric or ascii graphic characters"
17    )]
18    InvalidChars { ident: Arc<str> },
19    #[error("invalid identifier: length exceeds the maximum of {max} bytes")]
20    InvalidLength { max: usize },
21    #[error("invalid identifier: {0}")]
22    Casing(CaseKindError),
23}
24
25/// Represents the various types of casing errors that can occur, e.g. using an identifier
26/// with `SCREAMING_CASE` where one with `snake_case` is expected.
27#[derive(Debug, thiserror::Error)]
28pub enum CaseKindError {
29    #[error(
30        "only uppercase characters or underscores are allowed, and must start with an alphabetic character"
31    )]
32    Screaming,
33    #[error(
34        "only lowercase characters or underscores are allowed, and must start with an alphabetic character"
35    )]
36    Snake,
37    #[error(
38        "only alphanumeric characters are allowed, and must start with a lowercase alphabetic character"
39    )]
40    Camel,
41}
42
43/// Represents a generic identifier in Miden Assembly source code.
44///
45/// This type is used internally by all other specialized identifier types, e.g.
46/// [super::ProcedureName], and enforces the baseline rules for identifiers in Miden Assembly.
47///
48/// All identifiers are associated with a source span, and are interned to the extent possible, i.e.
49/// rather than allocating a new `String` for every use of the same identifier, we attempt to have
50/// all such uses share a single reference-counted allocation. This interning is not perfect or
51/// guaranteed globally, but generally holds within a given module. In the future we may make these
52/// actually interned strings with a global interner, but for now it is simply best-effort.
53#[derive(Clone)]
54pub struct Ident {
55    /// The source span associated with this identifier.
56    ///
57    /// NOTE: To make use of this span, we need to know the context in which it was used, i.e.,
58    /// either the containing module or procedure, both of which have a source file which we can
59    /// use to render a source snippet for this span.
60    ///
61    /// If a span is not known, the default value is used, which has zero-length and thus will not
62    /// be rendered as a source snippet.
63    span: SourceSpan,
64    /// The actual content of the identifier
65    name: Arc<str>,
66}
67
68impl Ident {
69    /// Creates an [Ident] from `source`.
70    ///
71    /// This can fail if:
72    ///
73    /// * The identifier exceeds the maximum allowed identifier length
74    /// * The identifier contains something other than Unicode alphanumeric or ASCII graphic
75    ///   characters (e.g. whitespace, control)
76    pub fn new(source: impl AsRef<str>) -> Result<Self, IdentError> {
77        source.as_ref().parse()
78    }
79
80    /// Creates an [Ident] from `source`.
81    ///
82    /// This can fail if:
83    ///
84    /// * The identifier exceeds the maximum allowed identifier length
85    /// * The identifier contains something other than Unicode alphanumeric or ASCII graphic
86    ///   characters (e.g. whitespace, control)
87    pub fn new_with_span(span: SourceSpan, source: impl AsRef<str>) -> Result<Self, IdentError> {
88        source.as_ref().parse::<Self>().map(|id| id.with_span(span))
89    }
90
91    /// Sets the span for this identifier.
92    pub fn with_span(mut self, span: SourceSpan) -> Self {
93        self.span = span;
94        self
95    }
96
97    /// This allows constructing an [Ident] directly from a ref-counted string that is known to be
98    /// a valid identifier, and so does not require re-parsing/re-validating.
99    ///
100    /// This should _not_ be used to bypass validation, as other parts of the assembler still may
101    /// re-validate identifiers, notably during deserialization, and may result in a panic being
102    /// raised.
103    ///
104    /// NOTE: This function is perma-unstable, it may be removed or modified at any time.
105    pub fn from_raw_parts(name: Span<Arc<str>>) -> Self {
106        let (span, name) = name.into_parts();
107        Self { span, name }
108    }
109
110    /// Unwraps this [Ident], extracting the inner [`Arc<str>`].
111    pub fn into_inner(self) -> Arc<str> {
112        self.name
113    }
114
115    /// Returns the content of this identifier as a `str`.
116    pub fn as_str(&self) -> &str {
117        self.name.as_ref()
118    }
119
120    /// Applies the default [Ident] validation rules to `source`.
121    pub fn validate(source: impl AsRef<str>) -> Result<(), IdentError> {
122        let source = source.as_ref();
123        if source.is_empty() {
124            return Err(IdentError::Empty);
125        }
126        if !source.chars().all(|c| c.is_ascii_graphic() || c.is_alphanumeric()) {
127            return Err(IdentError::InvalidChars { ident: source.into() });
128        }
129        Ok(())
130    }
131}
132
133impl fmt::Debug for Ident {
134    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
135        f.debug_tuple("Ident").field(&self.name).finish()
136    }
137}
138
139impl Eq for Ident {}
140
141impl PartialEq for Ident {
142    fn eq(&self, other: &Self) -> bool {
143        self.name == other.name
144    }
145}
146
147impl Ord for Ident {
148    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
149        self.name.cmp(&other.name)
150    }
151}
152
153impl PartialOrd for Ident {
154    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
155        Some(self.cmp(other))
156    }
157}
158
159impl Hash for Ident {
160    fn hash<H: Hasher>(&self, state: &mut H) {
161        self.name.hash(state);
162    }
163}
164
165impl Spanned for Ident {
166    fn span(&self) -> SourceSpan {
167        self.span
168    }
169}
170
171impl core::ops::Deref for Ident {
172    type Target = str;
173
174    fn deref(&self) -> &Self::Target {
175        self.name.as_ref()
176    }
177}
178
179impl AsRef<str> for Ident {
180    #[inline]
181    fn as_ref(&self) -> &str {
182        &self.name
183    }
184}
185
186impl fmt::Display for Ident {
187    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
188        fmt::Display::fmt(&self.name, f)
189    }
190}
191
192impl FromStr for Ident {
193    type Err = IdentError;
194
195    fn from_str(s: &str) -> Result<Self, Self::Err> {
196        Self::validate(s)?;
197        let name = Arc::from(s.to_string().into_boxed_str());
198        Ok(Self { span: SourceSpan::default(), name })
199    }
200}
201
202#[cfg(feature = "testing")]
203pub(crate) mod testing {
204    use alloc::string::String;
205
206    use proptest::{char::CharStrategy, collection::vec, prelude::*};
207
208    use super::*;
209
210    impl Arbitrary for Ident {
211        type Parameters = ();
212
213        fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
214            ident_any_random_length().boxed()
215        }
216
217        type Strategy = BoxedStrategy<Self>;
218    }
219
220    // Our dictionary includes all ASCII graphic characters (0x21..0x7E), as well as a variety
221    // of unicode alphanumerics.
222    const SPECIAL: [char; 32] = const {
223        let mut buf = ['a'; 32];
224        let mut idx = 0;
225        let mut range_idx = 0;
226        while range_idx < SPECIAL_RANGES.len() {
227            let range = &SPECIAL_RANGES[range_idx];
228            range_idx += 1;
229            let mut j = *range.start() as u32;
230            let end = *range.end() as u32;
231            while j <= end {
232                unsafe {
233                    buf[idx] = char::from_u32_unchecked(j);
234                }
235                idx += 1;
236                j += 1;
237            }
238        }
239        buf
240    };
241
242    const SPECIAL_RANGES: &[core::ops::RangeInclusive<char>] =
243        &['!'..='/', ':'..='@', '['..='`', '{'..='~'];
244    const PREFERRED_RANGES: &[core::ops::RangeInclusive<char>] = &['a'..='z', 'A'..='Z'];
245    const EXTRA_RANGES: &[core::ops::RangeInclusive<char>] = &['0'..='9', 'à'..='ö', 'ø'..='ÿ'];
246
247    prop_compose! {
248        /// A strategy to produce a random character from our valid dictionary, using the rules
249        /// for selection provided by `CharStrategy`
250        fn ident_chars()
251                      (c in CharStrategy::new_borrowed(
252                          &SPECIAL,
253                          PREFERRED_RANGES,
254                          EXTRA_RANGES
255                      )) -> char {
256            c
257        }
258    }
259
260    prop_compose! {
261        /// A strategy to produce a raw String of length `length`, containing any characers from
262        /// our dictionary.
263        ///
264        /// The returned string will always be at least 1 characters.
265        fn ident_raw_any(length: u32)
266                        (chars in vec(ident_chars(), 1..=(length as usize))) -> String {
267            String::from_iter(chars)
268        }
269    }
270
271    prop_compose! {
272        /// Generate a random identifier of `length` containing any characters from our dictionary
273        pub fn ident_any(length: u32)
274                    (raw in ident_raw_any(length)
275                        .prop_filter(
276                            "identifiers must be valid",
277                            |s| Ident::validate(s).is_ok()
278                        )
279                    ) -> Ident {
280            Ident::from_raw_parts(Span::new(SourceSpan::UNKNOWN, raw.into_boxed_str().into()))
281        }
282    }
283
284    prop_compose! {
285        /// Generate a random identifier of `length` containing any characters from our dictionary
286        pub fn ident_any_random_length()
287            (length in 1..u8::MAX)
288            (id in ident_any(length as u32)) -> Ident {
289            id
290        }
291    }
292}