miden_assembly_syntax/ast/
ident.rs

1use alloc::{string::ToString, sync::Arc};
2use core::{
3    fmt,
4    hash::{Hash, Hasher},
5    str::FromStr,
6};
7
8use miden_core::utils::{
9    ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
10};
11use miden_debug_types::{SourceSpan, Span, Spanned};
12
13/// Represents the types of errors that can occur when parsing/validating an [Ident]
14#[derive(Debug, thiserror::Error)]
15pub enum IdentError {
16    #[error("invalid identifier: cannot be empty")]
17    Empty,
18    #[error(
19        "invalid identifier '{ident}': must contain only unicode alphanumeric or ascii graphic characters"
20    )]
21    InvalidChars { ident: Arc<str> },
22    #[error("invalid identifier: length exceeds the maximum of {max} bytes")]
23    InvalidLength { max: usize },
24    #[error("invalid identifier: {0}")]
25    Casing(CaseKindError),
26}
27
28/// Represents the various types of casing errors that can occur, e.g. using an identifier
29/// with `SCREAMING_CASE` where one with `snake_case` is expected.
30#[derive(Debug, thiserror::Error)]
31pub enum CaseKindError {
32    #[error(
33        "only uppercase characters or underscores are allowed, and must start with an alphabetic character"
34    )]
35    Screaming,
36    #[error(
37        "only lowercase characters or underscores are allowed, and must start with an alphabetic character"
38    )]
39    Snake,
40    #[error(
41        "only alphanumeric characters are allowed, and must start with a lowercase alphabetic character"
42    )]
43    Camel,
44}
45
46/// Represents a generic identifier in Miden Assembly source code.
47///
48/// This type is used internally by all other specialized identifier types, e.g.
49/// [super::ProcedureName], and enforces the baseline rules for identifiers in Miden Assembly.
50///
51/// All identifiers are associated with a source span, and are interned to the extent possible, i.e.
52/// rather than allocating a new `String` for every use of the same identifier, we attempt to have
53/// all such uses share a single reference-counted allocation. This interning is not perfect or
54/// guaranteed globally, but generally holds within a given module. In the future we may make these
55/// actually interned strings with a global interner, but for now it is simply best-effort.
56#[derive(Clone)]
57#[cfg_attr(
58    all(feature = "arbitrary", test),
59    miden_test_serde_macros::serde_test(winter_serde(true))
60)]
61pub struct Ident {
62    /// The source span associated with this identifier.
63    ///
64    /// NOTE: To make use of this span, we need to know the context in which it was used, i.e.,
65    /// either the containing module or procedure, both of which have a source file which we can
66    /// use to render a source snippet for this span.
67    ///
68    /// If a span is not known, the default value is used, which has zero-length and thus will not
69    /// be rendered as a source snippet.
70    span: SourceSpan,
71    /// The actual content of the identifier
72    name: Arc<str>,
73}
74
75impl Ident {
76    /// Creates an [Ident] from `source`.
77    ///
78    /// This can fail if:
79    ///
80    /// * The identifier exceeds the maximum allowed identifier length
81    /// * The identifier contains something other than Unicode alphanumeric or ASCII graphic
82    ///   characters (e.g. whitespace, control)
83    pub fn new(source: impl AsRef<str>) -> Result<Self, IdentError> {
84        source.as_ref().parse()
85    }
86
87    /// Creates an [Ident] from `source`.
88    ///
89    /// This can fail if:
90    ///
91    /// * The identifier exceeds the maximum allowed identifier length
92    /// * The identifier contains something other than Unicode alphanumeric or ASCII graphic
93    ///   characters (e.g. whitespace, control)
94    pub fn new_with_span(span: SourceSpan, source: impl AsRef<str>) -> Result<Self, IdentError> {
95        source.as_ref().parse::<Self>().map(|id| id.with_span(span))
96    }
97
98    /// Sets the span for this identifier.
99    pub fn with_span(mut self, span: SourceSpan) -> Self {
100        self.span = span;
101        self
102    }
103
104    /// This allows constructing an [Ident] directly from a ref-counted string that is known to be
105    /// a valid identifier, and so does not require re-parsing/re-validating.
106    ///
107    /// This should _not_ be used to bypass validation, as other parts of the assembler still may
108    /// re-validate identifiers, notably during deserialization, and may result in a panic being
109    /// raised.
110    ///
111    /// NOTE: This function is perma-unstable, it may be removed or modified at any time.
112    pub fn from_raw_parts(name: Span<Arc<str>>) -> Self {
113        let (span, name) = name.into_parts();
114        Self { span, name }
115    }
116
117    /// Unwraps this [Ident], extracting the inner [`Arc<str>`].
118    pub fn into_inner(self) -> Arc<str> {
119        self.name
120    }
121
122    /// Returns the content of this identifier as a `str`.
123    pub fn as_str(&self) -> &str {
124        self.name.as_ref()
125    }
126
127    /// Applies the default [Ident] validation rules to `source`.
128    pub fn validate(source: impl AsRef<str>) -> Result<(), IdentError> {
129        let source = source.as_ref();
130        if source.is_empty() {
131            return Err(IdentError::Empty);
132        }
133        if !source.chars().all(|c| c.is_ascii_graphic() || c.is_alphanumeric()) {
134            return Err(IdentError::InvalidChars { ident: source.into() });
135        }
136        Ok(())
137    }
138}
139
140impl fmt::Debug for Ident {
141    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
142        f.debug_tuple("Ident").field(&self.name).finish()
143    }
144}
145
146impl Eq for Ident {}
147
148impl PartialEq for Ident {
149    fn eq(&self, other: &Self) -> bool {
150        self.name == other.name
151    }
152}
153
154impl Ord for Ident {
155    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
156        self.name.cmp(&other.name)
157    }
158}
159
160impl PartialOrd for Ident {
161    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
162        Some(self.cmp(other))
163    }
164}
165
166impl Hash for Ident {
167    fn hash<H: Hasher>(&self, state: &mut H) {
168        self.name.hash(state);
169    }
170}
171
172impl Spanned for Ident {
173    fn span(&self) -> SourceSpan {
174        self.span
175    }
176}
177
178impl core::ops::Deref for Ident {
179    type Target = str;
180
181    fn deref(&self) -> &Self::Target {
182        self.name.as_ref()
183    }
184}
185
186impl AsRef<str> for Ident {
187    #[inline]
188    fn as_ref(&self) -> &str {
189        &self.name
190    }
191}
192
193impl fmt::Display for Ident {
194    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
195        fmt::Display::fmt(&self.name, f)
196    }
197}
198
199impl FromStr for Ident {
200    type Err = IdentError;
201
202    fn from_str(s: &str) -> Result<Self, Self::Err> {
203        Self::validate(s)?;
204        let name = Arc::from(s.to_string().into_boxed_str());
205        Ok(Self { span: SourceSpan::default(), name })
206    }
207}
208
209#[cfg(feature = "serde")]
210impl serde::Serialize for Ident {
211    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
212    where
213        S: serde::Serializer,
214    {
215        serializer.serialize_str(self.as_str())
216    }
217}
218
219#[cfg(feature = "serde")]
220impl<'de> serde::Deserialize<'de> for Ident {
221    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
222    where
223        D: serde::Deserializer<'de>,
224    {
225        let name = <&'de str as serde::Deserialize>::deserialize(deserializer)?;
226        Self::new(name).map_err(serde::de::Error::custom)
227    }
228}
229
230impl Serializable for Ident {
231    fn write_into<W: ByteWriter>(&self, target: &mut W) {
232        target.write_usize(self.len());
233        target.write_bytes(self.as_bytes());
234    }
235}
236
237impl Deserializable for Ident {
238    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
239        use alloc::string::ToString;
240
241        let len = source.read_usize()?;
242        let bytes = source.read_slice(len)?;
243        let id = core::str::from_utf8(bytes)
244            .map_err(|err| DeserializationError::InvalidValue(err.to_string()))?;
245        Self::new(id).map_err(|err| DeserializationError::InvalidValue(err.to_string()))
246    }
247}
248
249#[cfg(feature = "arbitrary")]
250pub(crate) mod testing {
251    use alloc::string::String;
252
253    use proptest::{char::CharStrategy, collection::vec, prelude::*};
254
255    use super::*;
256
257    impl Arbitrary for Ident {
258        type Parameters = ();
259
260        fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
261            ident_any_random_length().boxed()
262        }
263
264        type Strategy = BoxedStrategy<Self>;
265    }
266
267    // Our dictionary includes all ASCII graphic characters (0x21..0x7E), as well as a variety
268    // of unicode alphanumerics.
269    const SPECIAL: [char; 32] = const {
270        let mut buf = ['a'; 32];
271        let mut idx = 0;
272        let mut range_idx = 0;
273        while range_idx < SPECIAL_RANGES.len() {
274            let range = &SPECIAL_RANGES[range_idx];
275            range_idx += 1;
276            let mut j = *range.start() as u32;
277            let end = *range.end() as u32;
278            while j <= end {
279                unsafe {
280                    buf[idx] = char::from_u32_unchecked(j);
281                }
282                idx += 1;
283                j += 1;
284            }
285        }
286        buf
287    };
288
289    const SPECIAL_RANGES: &[core::ops::RangeInclusive<char>] =
290        &['!'..='/', ':'..='@', '['..='`', '{'..='~'];
291    const PREFERRED_RANGES: &[core::ops::RangeInclusive<char>] = &['a'..='z', 'A'..='Z'];
292    const EXTRA_RANGES: &[core::ops::RangeInclusive<char>] = &['0'..='9', 'à'..='ö', 'ø'..='ÿ'];
293
294    prop_compose! {
295        /// A strategy to produce a random character from our valid dictionary, using the rules
296        /// for selection provided by `CharStrategy`
297        fn ident_chars()
298                      (c in CharStrategy::new_borrowed(
299                          &SPECIAL,
300                          PREFERRED_RANGES,
301                          EXTRA_RANGES
302                      )) -> char {
303            c
304        }
305    }
306
307    prop_compose! {
308        /// A strategy to produce a raw String of length `length`, containing any characers from
309        /// our dictionary.
310        ///
311        /// The returned string will always be at least 1 characters.
312        fn ident_raw_any(length: u32)
313                        (chars in vec(ident_chars(), 1..=(length as usize))) -> String {
314            String::from_iter(chars)
315        }
316    }
317
318    prop_compose! {
319        /// Generate a random identifier of `length` containing any characters from our dictionary
320        pub fn ident_any(length: u32)
321                    (raw in ident_raw_any(length)
322                        .prop_filter(
323                            "identifiers must be valid",
324                            |s| Ident::validate(s).is_ok()
325                        )
326                    ) -> Ident {
327            Ident::from_raw_parts(Span::new(SourceSpan::UNKNOWN, raw.into_boxed_str().into()))
328        }
329    }
330
331    prop_compose! {
332        /// Generate a random identifier of `length` containing any characters from our dictionary
333        pub fn ident_any_random_length()
334            (length in 1..u8::MAX)
335            (id in ident_any(length as u32)) -> Ident {
336            id
337        }
338    }
339}