Skip to main content

miden_assembly_syntax/ast/
ident.rs

1use alloc::{string::ToString, sync::Arc};
2use core::{
3    fmt,
4    hash::{Hash, Hasher},
5    str::FromStr,
6};
7
8use miden_core::serde::{
9    ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
10};
11use miden_debug_types::{SourceSpan, Span, Spanned};
12
13/// Represents the types of errors that can occur when parsing/validating an [Ident]
14#[derive(Debug, thiserror::Error)]
15pub enum IdentError {
16    #[error("invalid identifier: cannot be empty")]
17    Empty,
18    #[error(
19        "invalid identifier '{ident}': must contain only unicode alphanumeric or ascii graphic characters"
20    )]
21    InvalidChars { ident: Arc<str> },
22    #[error("invalid identifier: length exceeds the maximum of {max} bytes")]
23    InvalidLength { max: usize },
24    #[error("invalid identifier: {0}")]
25    Casing(CaseKindError),
26}
27
28/// Represents the various types of casing errors that can occur, e.g. using an identifier
29/// with `SCREAMING_CASE` where one with `snake_case` is expected.
30#[derive(Debug, thiserror::Error)]
31pub enum CaseKindError {
32    #[error(
33        "only uppercase characters or underscores are allowed, and must start with an alphabetic character"
34    )]
35    Screaming,
36    #[error(
37        "only lowercase characters or underscores are allowed, and must start with an alphabetic character"
38    )]
39    Snake,
40    #[error(
41        "only alphanumeric characters are allowed, and must start with a lowercase alphabetic character"
42    )]
43    Camel,
44}
45
46/// Represents a generic identifier in Miden Assembly source code.
47///
48/// This type is used internally by all other specialized identifier types, e.g.
49/// [super::ProcedureName], and enforces the baseline rules for identifiers in Miden Assembly.
50///
51/// All identifiers are associated with a source span, and are interned to the extent possible, i.e.
52/// rather than allocating a new `String` for every use of the same identifier, we attempt to have
53/// all such uses share a single reference-counted allocation. This interning is not perfect or
54/// guaranteed globally, but generally holds within a given module. In the future we may make these
55/// actually interned strings with a global interner, but for now it is simply best-effort.
56#[derive(Clone)]
57#[cfg_attr(
58    all(feature = "arbitrary", test),
59    miden_test_serde_macros::serde_test(binary_serde(true))
60)]
61pub struct Ident {
62    /// The source span associated with this identifier.
63    ///
64    /// NOTE: To make use of this span, we need to know the context in which it was used, i.e.,
65    /// either the containing module or procedure, both of which have a source file which we can
66    /// use to render a source snippet for this span.
67    ///
68    /// If a span is not known, the default value is used, which has zero-length and thus will not
69    /// be rendered as a source snippet.
70    span: SourceSpan,
71    /// The actual content of the identifier
72    name: Arc<str>,
73}
74
75impl Ident {
76    /// Reserved name for a main procedure.
77    pub const MAIN: &'static str = "$main";
78
79    /// Creates an [Ident] from `source`.
80    ///
81    /// This can fail if:
82    ///
83    /// * The identifier exceeds the maximum allowed identifier length
84    /// * The identifier contains something other than Unicode alphanumeric or ASCII graphic
85    ///   characters (e.g. whitespace, control)
86    pub fn new(source: impl AsRef<str>) -> Result<Self, IdentError> {
87        source.as_ref().parse()
88    }
89
90    /// Creates an [Ident] from `source`.
91    ///
92    /// This can fail if:
93    ///
94    /// * The identifier exceeds the maximum allowed identifier length
95    /// * The identifier contains something other than Unicode alphanumeric or ASCII graphic
96    ///   characters (e.g. whitespace, control)
97    pub fn new_with_span(span: SourceSpan, source: impl AsRef<str>) -> Result<Self, IdentError> {
98        source.as_ref().parse::<Self>().map(|id| id.with_span(span))
99    }
100
101    /// Sets the span for this identifier.
102    pub fn with_span(mut self, span: SourceSpan) -> Self {
103        self.span = span;
104        self
105    }
106
107    /// This allows constructing an [Ident] directly from a ref-counted string that is known to be
108    /// a valid identifier, and so does not require re-parsing/re-validating.
109    ///
110    /// This should _not_ be used to bypass validation, as other parts of the assembler still may
111    /// re-validate identifiers, notably during deserialization, and may result in a panic being
112    /// raised.
113    ///
114    /// NOTE: This function is perma-unstable, it may be removed or modified at any time.
115    pub fn from_raw_parts(name: Span<Arc<str>>) -> Self {
116        let (span, name) = name.into_parts();
117        Self { span, name }
118    }
119
120    /// Unwraps this [Ident], extracting the inner [`Arc<str>`].
121    pub fn into_inner(self) -> Arc<str> {
122        self.name
123    }
124
125    /// Returns the content of this identifier as a `str`.
126    pub fn as_str(&self) -> &str {
127        self.name.as_ref()
128    }
129
130    /// Returns true if this identifier is a valid constant identifier
131    pub fn is_constant_ident(&self) -> bool {
132        self.name
133            .chars()
134            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
135    }
136
137    /// Returns true if this identifier must be quoted in Miden Assembly syntax
138    pub fn requires_quoting(ident: impl AsRef<str>) -> bool {
139        match ident.as_ref() {
140            crate::Path::KERNEL_PATH
141            | crate::Path::EXEC_PATH
142            | crate::ast::ProcedureName::MAIN_PROC_NAME => false,
143            ident => !ident.chars().all(|c| c.is_ascii_alphanumeric() || c == '_'),
144        }
145    }
146
147    /// Applies the default [Ident] validation rules to `source`.
148    pub fn validate(source: impl AsRef<str>) -> Result<(), IdentError> {
149        let source = source.as_ref();
150        if source.is_empty() {
151            return Err(IdentError::Empty);
152        }
153        if !source
154            .chars()
155            .all(|c| (c.is_ascii_graphic() || c.is_alphanumeric()) && c != '#')
156        {
157            return Err(IdentError::InvalidChars { ident: source.into() });
158        }
159        Ok(())
160    }
161}
162
163impl fmt::Debug for Ident {
164    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
165        f.debug_tuple("Ident").field(&self.name).finish()
166    }
167}
168
169impl Eq for Ident {}
170
171impl PartialEq for Ident {
172    fn eq(&self, other: &Self) -> bool {
173        self.name == other.name
174    }
175}
176
177impl Ord for Ident {
178    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
179        self.name.cmp(&other.name)
180    }
181}
182
183impl PartialOrd for Ident {
184    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
185        Some(self.cmp(other))
186    }
187}
188
189impl Hash for Ident {
190    fn hash<H: Hasher>(&self, state: &mut H) {
191        self.name.hash(state);
192    }
193}
194
195impl Spanned for Ident {
196    fn span(&self) -> SourceSpan {
197        self.span
198    }
199}
200
201impl core::ops::Deref for Ident {
202    type Target = str;
203
204    fn deref(&self) -> &Self::Target {
205        self.name.as_ref()
206    }
207}
208
209impl AsRef<str> for Ident {
210    #[inline]
211    fn as_ref(&self) -> &str {
212        &self.name
213    }
214}
215
216impl fmt::Display for Ident {
217    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
218        if Self::requires_quoting(&self.name) {
219            write!(f, "\"{}\"", &self.name.escape_debug())
220        } else {
221            f.write_str(&self.name)
222        }
223    }
224}
225
226impl crate::prettier::PrettyPrint for Ident {
227    fn render(&self) -> crate::prettier::Document {
228        use crate::prettier::*;
229        display(self)
230    }
231}
232
233impl FromStr for Ident {
234    type Err = IdentError;
235
236    fn from_str(s: &str) -> Result<Self, Self::Err> {
237        Self::validate(s)?;
238        let name = Arc::from(s.to_string().into_boxed_str());
239        Ok(Self { span: SourceSpan::default(), name })
240    }
241}
242
243impl From<Ident> for miden_utils_diagnostics::miette::SourceSpan {
244    fn from(value: Ident) -> Self {
245        value.span.into()
246    }
247}
248
249#[cfg(feature = "serde")]
250impl serde::Serialize for Ident {
251    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
252    where
253        S: serde::Serializer,
254    {
255        serializer.serialize_str(self.as_str())
256    }
257}
258
259#[cfg(feature = "serde")]
260impl<'de> serde::Deserialize<'de> for Ident {
261    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
262    where
263        D: serde::Deserializer<'de>,
264    {
265        let name = <&'de str as serde::Deserialize>::deserialize(deserializer)?;
266        Self::new(name).map_err(serde::de::Error::custom)
267    }
268}
269
270impl Serializable for Ident {
271    fn write_into<W: ByteWriter>(&self, target: &mut W) {
272        target.write_usize(self.len());
273        target.write_bytes(self.as_bytes());
274    }
275}
276
277impl Deserializable for Ident {
278    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
279        use alloc::string::ToString;
280
281        let len = source.read_usize()?;
282        let bytes = source.read_slice(len)?;
283        let id = core::str::from_utf8(bytes)
284            .map_err(|err| DeserializationError::InvalidValue(err.to_string()))?;
285        Self::new(id).map_err(|err| DeserializationError::InvalidValue(err.to_string()))
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292
293    #[test]
294    fn ident_with_quotes_is_properly_escaped() {
295        let id = Ident::new("a\"b").unwrap();
296        let output = id.to_string();
297        assert_eq!(output, "\"a\\\"b\"")
298    }
299}
300
301#[cfg(feature = "arbitrary")]
302pub mod arbitrary {
303    use alloc::{borrow::Cow, string::String};
304
305    use proptest::{char::CharStrategy, collection::vec, prelude::*};
306
307    use super::*;
308
309    impl Arbitrary for Ident {
310        type Parameters = ();
311
312        fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
313            ident_any_random_length().boxed()
314        }
315
316        type Strategy = BoxedStrategy<Self>;
317    }
318
319    // Our dictionary includes all ASCII graphic characters (0x21..0x7E), as well as a variety
320    // of unicode alphanumerics.
321    const SPECIAL: [char; 32] = const {
322        let mut buf = ['a'; 32];
323        let mut idx = 0;
324        let mut range_idx = 0;
325        while range_idx < SPECIAL_RANGES.len() {
326            let range = &SPECIAL_RANGES[range_idx];
327            range_idx += 1;
328            let mut j = *range.start() as u32;
329            let end = *range.end() as u32;
330            while j <= end {
331                unsafe {
332                    buf[idx] = char::from_u32_unchecked(j);
333                }
334                idx += 1;
335                j += 1;
336            }
337        }
338        buf
339    };
340
341    const SPECIAL_RANGES: &[core::ops::RangeInclusive<char>] =
342        &['!'..='/', ':'..='@', '['..='`', '{'..='~'];
343    const PREFERRED_RANGES: &[core::ops::RangeInclusive<char>] = &['a'..='z', 'A'..='Z'];
344    const EXTRA_RANGES: &[core::ops::RangeInclusive<char>] = &['0'..='9', 'à'..='ö', 'ø'..='ÿ'];
345
346    const PREFERRED_CONSTANT_RANGES: &[core::ops::RangeInclusive<char>] = &['A'..='Z'];
347    const EXTRA_CONSTANT_RANGES: &[core::ops::RangeInclusive<char>] = &['0'..='9'];
348
349    prop_compose! {
350        /// A strategy to produce a random character from a more restricted dictionary for bare
351        /// identifiers
352        fn bare_ident_chars()
353                      (c in CharStrategy::new_borrowed(
354                          &['_'],
355                          PREFERRED_RANGES,
356                          &['0'..='9']
357                      )) -> char {
358            c
359        }
360    }
361
362    prop_compose! {
363        /// A strategy to produce a random character from our valid dictionary, using the rules
364        /// for selection provided by `CharStrategy`
365        fn ident_chars()
366                      (c in CharStrategy::new_borrowed(
367                          &SPECIAL,
368                          PREFERRED_RANGES,
369                          EXTRA_RANGES
370                      )) -> char {
371            c
372        }
373    }
374
375    prop_compose! {
376        /// Like `ident_chars`, but for constants
377        fn const_ident_chars()
378                      (c in CharStrategy::new_borrowed(
379                          &['_'],
380                          PREFERRED_CONSTANT_RANGES,
381                          EXTRA_CONSTANT_RANGES
382                      )) -> char {
383            c
384        }
385    }
386
387    prop_compose! {
388        /// A strategy to produce a raw String of no more than length `length` bytes, containing any
389        /// characters from our dictionary.
390        ///
391        /// The returned string will always be at least 1 characters.
392        fn ident_raw_any(length: u32)
393                        ((leading_char, rest) in (
394                            proptest::char::ranges(Cow::Borrowed(&['a'..='z', '_'..='_'])),
395                            vec(ident_chars(), 0..=(length as usize))
396                        )) -> String {
397            let mut buf = String::with_capacity(length as usize);
398            buf.push(leading_char);
399            for c in rest {
400                if !buf.is_empty() && buf.len() + c.len_utf8() > length as usize {
401                    break;
402                }
403                buf.push(c);
404            }
405            buf
406        }
407    }
408
409    prop_compose! {
410        /// Like `ident_raw_any`, but for bare identifiers
411        fn bare_ident_raw_any(length: u32)
412                        ((leading_char, rest) in (
413                            proptest::char::range('a', 'z'),
414                            vec(bare_ident_chars(), 0..=(length as usize))
415                        )) -> String {
416            let mut buf = String::with_capacity(length as usize);
417            buf.push(leading_char);
418            for c in rest {
419                if !buf.is_empty() && buf.len() + c.len_utf8() > length as usize {
420                    break;
421                }
422                buf.push(c);
423            }
424            buf
425        }
426    }
427
428    prop_compose! {
429        /// Like `ident_raw_any`, but for constants
430        fn const_ident_raw_any(length: u32)
431                        ((leading_char, rest) in (
432                            proptest::char::range('A', 'Z'),
433                            vec(const_ident_chars(), 0..=(length as usize))
434                        )) -> String {
435            let mut buf = String::with_capacity(length as usize);
436            buf.push(leading_char);
437            for c in rest {
438                if !buf.is_empty() && buf.len() + c.len_utf8() > length as usize {
439                    break;
440                }
441                buf.push(c);
442            }
443            buf
444        }
445    }
446
447    prop_compose! {
448        /// Generate a random identifier of `length` containing any characters from our dictionary
449        pub fn ident_any(length: u32)
450                    (raw in ident_raw_any(length)
451                        .prop_filter(
452                            "identifiers must be valid",
453                            |s| Ident::validate(s).is_ok()
454                        )
455                    ) -> Ident {
456            Ident::from_raw_parts(Span::new(SourceSpan::UNKNOWN, raw.into_boxed_str().into()))
457        }
458    }
459
460    prop_compose! {
461        /// Generate a random bare identifier of `length` containing any characters from our
462        /// dictionary
463        pub fn bare_ident_any(length: u32)
464                    (raw in bare_ident_raw_any(length)
465                        .prop_filter(
466                            "identifiers must be valid",
467                            |s| Ident::validate(s).is_ok()
468                        )
469                    ) -> Ident {
470            Ident::from_raw_parts(Span::new(SourceSpan::UNKNOWN, raw.into_boxed_str().into()))
471        }
472    }
473
474    prop_compose! {
475        /// Generate a random constant identifier of `length` containing any characters from our
476        /// constant dictionary
477        pub fn const_ident_any(length: u32)
478                    (raw in const_ident_raw_any(length)
479                        .prop_filter(
480                            "identifiers must be valid",
481                            |s| Ident::validate(s).is_ok()
482                        )
483                    ) -> Ident {
484            let id = Ident::from_raw_parts(Span::new(SourceSpan::UNKNOWN, raw.into_boxed_str().into()));
485            assert!(id.is_constant_ident());
486            id
487        }
488    }
489
490    prop_compose! {
491        /// Generate a random type identifier corresponding to of one of the built-in types
492        pub fn builtin_type_any()
493                                (name in prop_oneof![
494                                    Just(crate::ast::types::Type::I1),
495                                    Just(crate::ast::types::Type::I8),
496                                    Just(crate::ast::types::Type::U8),
497                                    Just(crate::ast::types::Type::I16),
498                                    Just(crate::ast::types::Type::U16),
499                                    Just(crate::ast::types::Type::I32),
500                                    Just(crate::ast::types::Type::U32),
501                                    Just(crate::ast::types::Type::I64),
502                                    Just(crate::ast::types::Type::U64),
503                                    Just(crate::ast::types::Type::I128),
504                                    Just(crate::ast::types::Type::U128),
505                                    Just(crate::ast::types::Type::Felt),
506                                ]) -> Ident {
507            Ident::from_raw_parts(Span::new(SourceSpan::UNKNOWN, name.to_string().into_boxed_str().into()))
508        }
509    }
510
511    prop_compose! {
512        /// Generate a random identifier of `length` containing any characters from our dictionary
513        pub fn ident_any_random_length()
514            (length in 1..u8::MAX)
515            (id in ident_any(length as u32)) -> Ident {
516            id
517        }
518    }
519
520    prop_compose! {
521        /// Generate a random bare identifier of `length` containing any characters from our
522        /// dictionary
523        pub fn bare_ident_any_random_length()
524            (length in 1..u8::MAX)
525            (id in ident_any(length as u32)) -> Ident {
526            id
527        }
528    }
529
530    prop_compose! {
531        /// Generate a random constant identifier of `length` containing any characters from our
532        /// dictionary
533        pub fn const_ident_any_random_length()
534            (length in 1..u8::MAX)
535            (id in const_ident_any(length as u32)) -> Ident {
536            id
537        }
538    }
539}