Skip to main content

eure_document/
identifier.rs

1use alloc::borrow::Cow;
2use alloc::string::String;
3use alloc::string::ToString;
4use core::fmt::{self, Display};
5use regex::Regex;
6use thiserror::Error;
7
8#[cfg(feature = "std")]
9static IDENTIFIER_PARSER: std::sync::LazyLock<IdentifierParser> =
10    std::sync::LazyLock::new(IdentifierParser::init);
11
12/// A parser and factory API for identifiers intended for no_std environments.
13/// Prefer using `Identifier::from_str` and `.parse()` methods if you are using `std`.
14pub struct IdentifierParser(Regex);
15
16impl IdentifierParser {
17    /// Initialize the parser. This internally compiles a regex, so don't call this in a hot path.
18    /// Prefer using `FromStr` impl for `Identifier` if you are using `std`.
19    pub fn init() -> Self {
20        Self(Regex::new(r"^[\p{XID_Start}_][\p{XID_Continue}-]*").unwrap())
21    }
22
23    pub fn parse(&self, s: &str) -> Result<Identifier, IdentifierError> {
24        // Check if starts with $ (would be parsed as extension)
25        if s.starts_with('$') {
26            return Err(IdentifierError::InvalidChar {
27                at: 0,
28                invalid_char: '$',
29            });
30        }
31
32        let Some(matches) = self.0.find(s) else {
33            if let Some(c) = s.chars().next() {
34                return Err(IdentifierError::InvalidChar {
35                    at: 0,
36                    invalid_char: c,
37                });
38            } else {
39                return Err(IdentifierError::Empty);
40            }
41        };
42        if matches.len() == s.len() {
43            Ok(Identifier(Cow::Owned(matches.as_str().to_string())))
44        } else {
45            // matches.end() is a byte index, but we need a character index for the error.
46            // Count how many characters are in the matched portion.
47            let char_index = matches.as_str().chars().count();
48            // Get the invalid character from the remainder of the string.
49            let invalid_char = s[matches.end()..].chars().next().unwrap();
50            Err(IdentifierError::InvalidChar {
51                at: char_index,
52                invalid_char,
53            })
54        }
55    }
56}
57
58impl core::str::FromStr for Identifier {
59    type Err = IdentifierError;
60
61    fn from_str(s: &str) -> Result<Self, Self::Err> {
62        #[cfg(feature = "std")]
63        {
64            IDENTIFIER_PARSER.parse(s)
65        }
66        #[cfg(not(feature = "std"))]
67        {
68            IdentifierParser::init().parse(s)
69        }
70    }
71}
72
73#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
74pub struct Identifier(Cow<'static, str>);
75
76#[derive(Debug, Clone, PartialEq, Eq, Error)]
77pub enum IdentifierError {
78    #[error("Empty identifier")]
79    Empty,
80    #[error("Invalid character for identifier: {invalid_char} at {at}")]
81    InvalidChar {
82        /// the problem index of the identifier in the string
83        at: usize,
84        /// the invalid character
85        invalid_char: char,
86    },
87}
88
89impl Identifier {
90    // Don't write any extension not in the language spec e.g. $optional defined in Eure Schema
91    pub const VARIANT: Self = Self::new_unchecked("variant");
92    pub const SCHEMA: Self = Self::new_unchecked("schema");
93
94    /// Creates a new Identifier without validation.
95    ///
96    /// This function is intended for creating compile-time constants where the
97    /// identifier string is known to be valid. The caller should ensure that the
98    /// string is a valid identifier according to Eure rules:
99    /// - Must start with XID_Start character or underscore
100    /// - Can contain XID_Continue characters or hyphens
101    /// - Must not start with $
102    ///
103    /// Note: This function is not marked `unsafe` because passing an invalid string
104    /// does not cause memory unsafety - it only results in a logically invalid Identifier.
105    pub const fn new_unchecked(s: &'static str) -> Self {
106        Identifier(Cow::Borrowed(s))
107    }
108
109    pub fn into_string(self) -> String {
110        self.0.into()
111    }
112}
113
114impl Display for Identifier {
115    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116        write!(f, "{}", self.0)
117    }
118}
119
120impl AsRef<str> for Identifier {
121    fn as_ref(&self) -> &str {
122        &self.0
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use core::str::FromStr;
129
130    use super::*;
131
132    #[test]
133    fn test_identifier() {
134        assert_eq!(
135            Identifier::from_str("hello"),
136            Ok(Identifier(Cow::Owned("hello".to_string())))
137        );
138    }
139    #[test]
140    fn test_identifier_with_hyphen() {
141        assert_eq!(
142            Identifier::from_str("hello-world"),
143            Ok(Identifier(Cow::Owned("hello-world".to_string())))
144        );
145    }
146
147    #[test]
148    fn test_identifier_おーい() {
149        assert_eq!(
150            Identifier::from_str("おーい"),
151            Ok(Identifier(Cow::Owned("おーい".to_string())))
152        );
153    }
154
155    #[test]
156    fn test_identifier_error() {
157        assert_eq!(
158            Identifier::from_str("invalid identifier"),
159            Err(IdentifierError::InvalidChar {
160                at: 7,
161                invalid_char: ' ',
162            })
163        );
164    }
165
166    #[test]
167    fn test_identifier_invalid_first_char() {
168        assert_eq!(
169            Identifier::from_str("1hello"),
170            Err(IdentifierError::InvalidChar {
171                at: 0,
172                invalid_char: '1',
173            })
174        );
175    }
176
177    #[test]
178    fn test_identifier_error_empty() {
179        assert_eq!(Identifier::from_str(""), Err(IdentifierError::Empty));
180    }
181
182    #[test]
183    fn test_identifier_accept_literals() {
184        assert_eq!(
185            Identifier::from_str("true"),
186            Ok(Identifier(Cow::Owned("true".to_string())))
187        );
188        assert_eq!(
189            Identifier::from_str("false"),
190            Ok(Identifier(Cow::Owned("false".to_string())))
191        );
192        assert_eq!(
193            Identifier::from_str("null"),
194            Ok(Identifier(Cow::Owned("null".to_string())))
195        );
196    }
197
198    #[test]
199    fn test_identifier_reject_dollar_prefix() {
200        assert_eq!(
201            Identifier::from_str("$id"),
202            Err(IdentifierError::InvalidChar {
203                at: 0,
204                invalid_char: '$'
205            })
206        );
207    }
208
209    #[test]
210    fn test_identifier_new_unchecked() {
211        // This test verifies that const construction works
212        const TEST_ID: Identifier = Identifier::new_unchecked("test-const");
213        assert_eq!(TEST_ID.as_ref(), "test-const");
214
215        // Verify it's using borrowed variant
216        let id = Identifier::new_unchecked("borrowed");
217        assert_eq!(id.as_ref(), "borrowed");
218    }
219
220    #[test]
221    fn test_empty_string_returns_empty_error() {
222        let result = Identifier::from_str("");
223        assert_eq!(result, Err(IdentifierError::Empty));
224    }
225}
226
227#[cfg(test)]
228mod proptests {
229    extern crate std;
230
231    use super::*;
232    use core::str::FromStr;
233    use proptest::prelude::*;
234    use std::format;
235    use std::string::String;
236    use std::vec;
237
238    /// Characters valid as the first character of an identifier (XID_Start or underscore).
239    /// We use a representative subset of XID_Start for efficiency.
240    fn xid_start_char() -> impl Strategy<Value = char> {
241        prop_oneof![
242            // ASCII letters
243            prop::char::range('a', 'z'),
244            prop::char::range('A', 'Z'),
245            // Underscore (explicitly allowed)
246            Just('_'),
247            // Some Unicode XID_Start characters
248            Just('α'), // Greek
249            Just('β'),
250            Just('お'), // Japanese hiragana
251            Just('日'), // CJK
252            Just('é'),  // Latin extended
253            Just('ñ'),
254        ]
255    }
256
257    /// Characters valid in the continuation of an identifier (XID_Continue or hyphen).
258    fn xid_continue_char() -> impl Strategy<Value = char> {
259        prop_oneof![
260            // ASCII letters and digits
261            prop::char::range('a', 'z'),
262            prop::char::range('A', 'Z'),
263            prop::char::range('0', '9'),
264            // Underscore and hyphen
265            Just('_'),
266            Just('-'),
267            // Some Unicode XID_Continue characters
268            Just('α'),
269            Just('β'),
270            Just('ー'), // Japanese prolonged sound mark (XID_Continue)
271            Just('日'),
272            Just('é'),
273        ]
274    }
275
276    /// Strategy to generate valid identifiers.
277    fn valid_identifier() -> impl Strategy<Value = String> {
278        (
279            xid_start_char(),
280            proptest::collection::vec(xid_continue_char(), 0..20),
281        )
282            .prop_map(|(first, rest)| {
283                let mut s = String::with_capacity(1 + rest.len());
284                s.push(first);
285                s.extend(rest);
286                s
287            })
288    }
289
290    /// Characters that are invalid as the first character of an identifier.
291    fn invalid_first_char() -> impl Strategy<Value = char> {
292        prop_oneof![
293            // Digits
294            prop::char::range('0', '9'),
295            // Dollar sign (reserved for extensions)
296            Just('$'),
297            // Common invalid punctuation
298            Just(' '),
299            Just('\t'),
300            Just('\n'),
301            Just('.'),
302            Just(','),
303            Just('!'),
304            Just('@'),
305            Just('#'),
306            Just('%'),
307            Just('^'),
308            Just('&'),
309            Just('*'),
310            Just('('),
311            Just(')'),
312            Just('='),
313            Just('+'),
314            Just('['),
315            Just(']'),
316            Just('{'),
317            Just('}'),
318            Just('|'),
319            Just('\\'),
320            Just('/'),
321            Just('<'),
322            Just('>'),
323            Just('?'),
324            Just(':'),
325            Just(';'),
326            Just('"'),
327            Just('\''),
328        ]
329    }
330
331    /// Characters that are invalid in the continuation of an identifier.
332    fn invalid_continue_char() -> impl Strategy<Value = char> {
333        prop_oneof![
334            // Common invalid characters
335            Just(' '),
336            Just('\t'),
337            Just('\n'),
338            Just('.'),
339            Just(','),
340            Just('!'),
341            Just('@'),
342            Just('#'),
343            Just('$'),
344            Just('%'),
345            Just('^'),
346            Just('&'),
347            Just('*'),
348            Just('('),
349            Just(')'),
350            Just('='),
351            Just('+'),
352            Just('['),
353            Just(']'),
354            Just('{'),
355            Just('}'),
356            Just('|'),
357            Just('\\'),
358            Just('/'),
359            Just('<'),
360            Just('>'),
361            Just('?'),
362            Just(':'),
363            Just(';'),
364            Just('"'),
365            Just('\''),
366        ]
367    }
368
369    proptest! {
370        /// Valid identifiers should always parse successfully.
371        #[test]
372        fn valid_identifiers_parse_successfully(s in valid_identifier()) {
373            let result = Identifier::from_str(&s);
374            prop_assert!(result.is_ok(), "Failed to parse valid identifier: {:?}", s);
375        }
376
377        /// Parsed identifiers should round-trip correctly (parse -> to_string -> parse).
378        #[test]
379        fn round_trip_stability(s in valid_identifier()) {
380            let id1 = Identifier::from_str(&s).expect("should parse");
381            let string_repr = id1.to_string();
382            let id2 = Identifier::from_str(&string_repr).expect("should re-parse");
383            prop_assert_eq!(id1.as_ref(), id2.as_ref(), "Round-trip failed for: {:?}", s);
384        }
385
386        /// Identifiers starting with invalid characters should be rejected with error at position 0.
387        #[test]
388        fn invalid_first_char_rejected(
389            first in invalid_first_char(),
390            rest in proptest::collection::vec(xid_continue_char(), 0..10)
391        ) {
392            let mut s = String::with_capacity(1 + rest.len());
393            s.push(first);
394            s.extend(rest);
395
396            let result = Identifier::from_str(&s);
397            prop_assert!(result.is_err(), "Should reject invalid first char: {:?}", s);
398
399            if let Err(IdentifierError::InvalidChar { at, invalid_char }) = result {
400                prop_assert_eq!(at, 0, "Error position should be 0 for invalid first char");
401                prop_assert_eq!(invalid_char, first, "Reported char should match first char");
402            } else {
403                prop_assert!(false, "Expected InvalidChar error, got {:?}", result);
404            }
405        }
406
407        /// Identifiers with invalid characters in the middle should be rejected at the correct position.
408        #[test]
409        fn invalid_middle_char_rejected(
410            prefix_len in 1usize..10,
411            invalid in invalid_continue_char()
412        ) {
413            // Build a valid prefix
414            let prefix: String = (0..prefix_len)
415                .map(|i| if i == 0 { 'a' } else { 'b' })
416                .collect();
417
418            let mut s = prefix.clone();
419            s.push(invalid);
420            s.push_str("suffix");
421
422            let result = Identifier::from_str(&s);
423            prop_assert!(result.is_err(), "Should reject invalid middle char: {:?}", s);
424
425            if let Err(IdentifierError::InvalidChar { at, invalid_char }) = result {
426                prop_assert_eq!(at, prefix_len, "Error position should be at the invalid char position");
427                prop_assert_eq!(invalid_char, invalid, "Reported char should match invalid char");
428            } else {
429                prop_assert!(false, "Expected InvalidChar error, got {:?}", result);
430            }
431        }
432
433        /// Dollar prefix should always be rejected with InvalidChar at position 0.
434        #[test]
435        fn dollar_prefix_always_rejected(rest in "[a-zA-Z0-9_-]*") {
436            let s = format!("${}", rest);
437            let result = Identifier::from_str(&s);
438
439            match result {
440                Err(IdentifierError::InvalidChar { at: 0, invalid_char: '$' }) => {
441                    // Expected
442                }
443                _ => {
444                    prop_assert!(false, "Dollar prefix should return InvalidChar at 0, got {:?}", result);
445                }
446            }
447        }
448
449        /// For InvalidChar errors, the position should always be within character bounds.
450        /// Note: `at` is a character index (not byte index), so we compare against chars().count().
451        #[test]
452        fn error_position_within_bounds(s in ".+") {
453            if let Err(IdentifierError::InvalidChar { at, invalid_char }) = Identifier::from_str(&s) {
454                let char_count = s.chars().count();
455                prop_assert!(at < char_count, "Error position {} out of bounds for string with {} chars", at, char_count);
456                // Verify the character at that position matches
457                let actual_char = s.chars().nth(at);
458                prop_assert_eq!(actual_char, Some(invalid_char), "Char at position {} should match reported char", at);
459            }
460        }
461
462        /// AsRef<str> should return the same string that was parsed.
463        #[test]
464        fn as_ref_returns_original_string(s in valid_identifier()) {
465            let id = Identifier::from_str(&s).expect("should parse");
466            prop_assert_eq!(id.as_ref(), s.as_str());
467        }
468
469        /// Display implementation should match AsRef<str>.
470        #[test]
471        fn display_matches_as_ref(s in valid_identifier()) {
472            let id = Identifier::from_str(&s).expect("should parse");
473            prop_assert_eq!(id.to_string(), id.as_ref());
474        }
475    }
476}