eure_document/
identifier.rs

1use alloc::borrow::Cow;
2use alloc::string::String;
3use alloc::string::ToString;
4use core::fmt::{self, Display};
5use regex::Regex;
6use thiserror::Error;
7
8#[cfg(feature = "std")]
9static IDENTIFIER_PARSER: std::sync::LazyLock<IdentifierParser> =
10    std::sync::LazyLock::new(IdentifierParser::init);
11
12/// A parser and factory API for identifiers intended for no_std environments.
13/// Prefer using `Identifier::from_str` and `.parse()` methods if you are using `std`.
14pub struct IdentifierParser(Regex);
15
16impl IdentifierParser {
17    /// Initialize the parser. This internally compiles a regex, so don't call this in a hot path.
18    /// Prefer using `FromStr` impl for `Identifier` if you are using `std`.
19    pub fn init() -> Self {
20        Self(Regex::new(r"^[\p{XID_Start}_][\p{XID_Continue}-]*").unwrap())
21    }
22
23    pub fn parse(&self, s: &str) -> Result<Identifier, IdentifierError> {
24        // Check if starts with $ (would be parsed as extension)
25        if s.starts_with('$') {
26            return Err(IdentifierError::InvalidChar {
27                at: 0,
28                invalid_char: '$',
29            });
30        }
31
32        let Some(matches) = self.0.find(s) else {
33            if let Some(c) = s.chars().next() {
34                return Err(IdentifierError::InvalidChar {
35                    at: 0,
36                    invalid_char: c,
37                });
38            } else {
39                return Err(IdentifierError::Empty);
40            }
41        };
42        if matches.len() == s.len() {
43            Ok(Identifier(Cow::Owned(matches.as_str().to_string())))
44        } else {
45            Err(IdentifierError::InvalidChar {
46                at: matches.end(),
47                invalid_char: s.chars().nth(matches.end()).unwrap(),
48            })
49        }
50    }
51}
52
53impl core::str::FromStr for Identifier {
54    type Err = IdentifierError;
55
56    fn from_str(s: &str) -> Result<Self, Self::Err> {
57        #[cfg(feature = "std")]
58        {
59            IDENTIFIER_PARSER.parse(s)
60        }
61        #[cfg(not(feature = "std"))]
62        {
63            IdentifierParser::init().parse(s)
64        }
65    }
66}
67
68#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
69pub struct Identifier(Cow<'static, str>);
70
71#[derive(Debug, Clone, PartialEq, Eq, Error)]
72pub enum IdentifierError {
73    #[error("Empty identifier")]
74    Empty,
75    #[error("Invalid character for identifier: {invalid_char} at {at}")]
76    InvalidChar {
77        /// the problem index of the identifier in the string
78        at: usize,
79        /// the invalid character
80        invalid_char: char,
81    },
82}
83
84impl Identifier {
85    /// Creates a new Identifier without validation.
86    ///
87    /// This function is intended for creating compile-time constants where the
88    /// identifier string is known to be valid. The caller should ensure that the
89    /// string is a valid identifier according to Eure rules:
90    /// - Must start with XID_Start character or underscore
91    /// - Can contain XID_Continue characters or hyphens
92    /// - Must not start with $
93    ///
94    /// Note: This function is not marked `unsafe` because passing an invalid string
95    /// does not cause memory unsafety - it only results in a logically invalid Identifier.
96    pub const fn new_unchecked(s: &'static str) -> Self {
97        Identifier(Cow::Borrowed(s))
98    }
99
100    pub fn into_string(self) -> String {
101        self.0.into()
102    }
103}
104
105impl Display for Identifier {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        write!(f, "{}", self.0)
108    }
109}
110
111impl AsRef<str> for Identifier {
112    fn as_ref(&self) -> &str {
113        &self.0
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use core::str::FromStr;
120
121    use super::*;
122
123    #[test]
124    fn test_identifier() {
125        assert_eq!(
126            Identifier::from_str("hello"),
127            Ok(Identifier(Cow::Owned("hello".to_string())))
128        );
129    }
130    #[test]
131    fn test_identifier_with_hyphen() {
132        assert_eq!(
133            Identifier::from_str("hello-world"),
134            Ok(Identifier(Cow::Owned("hello-world".to_string())))
135        );
136    }
137
138    #[test]
139    fn test_identifier_おーい() {
140        assert_eq!(
141            Identifier::from_str("おーい"),
142            Ok(Identifier(Cow::Owned("おーい".to_string())))
143        );
144    }
145
146    #[test]
147    fn test_identifier_error() {
148        assert_eq!(
149            Identifier::from_str("invalid identifier"),
150            Err(IdentifierError::InvalidChar {
151                at: 7,
152                invalid_char: ' ',
153            })
154        );
155    }
156
157    #[test]
158    fn test_identifier_invalid_first_char() {
159        assert_eq!(
160            Identifier::from_str("1hello"),
161            Err(IdentifierError::InvalidChar {
162                at: 0,
163                invalid_char: '1',
164            })
165        );
166    }
167
168    #[test]
169    fn test_identifier_error_empty() {
170        assert_eq!(Identifier::from_str(""), Err(IdentifierError::Empty));
171    }
172
173    #[test]
174    fn test_identifier_accept_literals() {
175        assert_eq!(
176            Identifier::from_str("true"),
177            Ok(Identifier(Cow::Owned("true".to_string())))
178        );
179        assert_eq!(
180            Identifier::from_str("false"),
181            Ok(Identifier(Cow::Owned("false".to_string())))
182        );
183        assert_eq!(
184            Identifier::from_str("null"),
185            Ok(Identifier(Cow::Owned("null".to_string())))
186        );
187    }
188
189    #[test]
190    fn test_identifier_reject_dollar_prefix() {
191        assert_eq!(
192            Identifier::from_str("$id"),
193            Err(IdentifierError::InvalidChar {
194                at: 0,
195                invalid_char: '$'
196            })
197        );
198    }
199
200    #[test]
201    fn test_identifier_new_unchecked() {
202        // This test verifies that const construction works
203        const TEST_ID: Identifier = Identifier::new_unchecked("test-const");
204        assert_eq!(TEST_ID.as_ref(), "test-const");
205
206        // Verify it's using borrowed variant
207        let id = Identifier::new_unchecked("borrowed");
208        assert_eq!(id.as_ref(), "borrowed");
209    }
210}