proof_of_sql_parser/
identifier.rs

1use crate::{sql::IdentifierParser, ParseError, ParseResult};
2use alloc::{format, string::ToString};
3use arrayvec::ArrayString;
4use core::{cmp::Ordering, fmt, ops::Deref, str::FromStr};
5use sqlparser::ast::Ident;
6
7/// Top-level unique identifier.
8#[derive(Debug, PartialEq, Eq, Clone, Hash, Ord, PartialOrd, Copy)]
9pub struct Identifier {
10    name: ArrayString<64>,
11}
12
13impl Identifier {
14    /// Constructor for [Identifier]
15    ///
16    /// Note: this constructor should be private within the `proof_of_sql_parser` crate.
17    /// This is necessary to guarantee that no one outside the crate
18    /// can create Names, thus securing that [`ResourceId`]s and [`Identifier`]s
19    /// are always valid postgresql identifiers.
20    ///
21    /// # Panics
22    ///
23    /// This function will panic if:
24    /// - The provided string is too long to fit into the internal `ArrayString`.
25    pub(crate) fn new<S: AsRef<str>>(string: S) -> Self {
26        Self {
27            name: ArrayString::from(&string.as_ref().to_lowercase()).expect("Identifier too long"),
28        }
29    }
30
31    /// An alias for [`Identifier::from_str`], provided for convenience.
32    ///
33    /// # Errors
34    /// Returns a `ParseResult::Err` if the input string does not meet the requirements for a valid identifier.
35    /// This may include errors such as invalid characters or incorrect formatting based on the specific rules
36    /// that `Identifier::from_str` enforces.
37    pub fn try_new<S: AsRef<str>>(string: S) -> ParseResult<Self> {
38        Self::from_str(string.as_ref())
39    }
40
41    /// The name of this [Identifier]
42    /// It already implements [Deref] to [str], so this method is not necessary for most use cases.
43    #[must_use]
44    pub fn name(&self) -> &str {
45        self.name.as_str()
46    }
47
48    /// An alias for [`Identifier::name`], provided for convenience.
49    #[must_use]
50    pub fn as_str(&self) -> &str {
51        self.name()
52    }
53}
54
55impl FromStr for Identifier {
56    type Err = ParseError;
57
58    fn from_str(string: &str) -> ParseResult<Self> {
59        let name = IdentifierParser::new()
60            .parse(string)
61            .map_err(|e| ParseError::IdentifierParseError{ error:
62                format!("failed to parse identifier, (you may have used a reserved keyword as an ID, i.e. 'timestamp') {e:?}")})?;
63
64        Ok(Identifier::new(name))
65    }
66}
67crate::impl_serde_from_str!(Identifier);
68
69impl fmt::Display for Identifier {
70    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
71        self.name.fmt(f)
72    }
73}
74
75// TryFrom<Ident> for Identifier
76impl TryFrom<Ident> for Identifier {
77    type Error = ParseError;
78
79    fn try_from(ident: Ident) -> ParseResult<Self> {
80        // Convert Ident's value to Identifier
81        Identifier::try_new(ident.value)
82    }
83}
84
85impl PartialEq<str> for Identifier {
86    fn eq(&self, other: &str) -> bool {
87        other.eq_ignore_ascii_case(&self.name)
88    }
89}
90
91impl PartialOrd<str> for Identifier {
92    fn partial_cmp(&self, other: &str) -> Option<Ordering> {
93        self.name.partial_cmp(other.to_lowercase().as_str())
94    }
95}
96
97impl Deref for Identifier {
98    type Target = str;
99
100    fn deref(&self) -> &Self::Target {
101        self.name.as_str()
102    }
103}
104
105impl AsRef<str> for Identifier {
106    fn as_ref(&self) -> &str {
107        self.name.as_str()
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114    use alloc::{borrow::ToOwned, vec, vec::Vec};
115
116    #[test]
117    fn from_str_identifier() {
118        assert_eq!(
119            Identifier::from_str("GOOD_IDENTIFIER13AD_IDENTIFIER")
120                .unwrap()
121                .name(),
122            "good_identifier13ad_identifier"
123        );
124    }
125
126    #[test]
127    fn from_str_identifier_with_additional_characters_fails() {
128        assert!(Identifier::from_str("GOOD_IDENTIFIER.").is_err());
129        assert!(Identifier::from_str("BAD$IDENTIFIER").is_err());
130        assert!(Identifier::from_str("BAD IDENTIFIER").is_err());
131    }
132
133    #[test]
134    fn identifier_from_str() {
135        assert_eq!(
136            Identifier::from_str("G00d_identifier").unwrap().name(),
137            "g00d_identifier"
138        );
139    }
140
141    #[test]
142    fn display_identifier() {
143        assert_eq!(
144            Identifier::from_str("GOOD_IDENTIFIER").unwrap().to_string(),
145            "good_identifier"
146        );
147
148        assert_eq!(
149            Identifier::from_str("_can_Start_with_underscore")
150                .unwrap()
151                .to_string(),
152            "_can_start_with_underscore"
153        );
154    }
155
156    #[test]
157    fn we_can_parse_valid_identifiers_with_white_spaces_at_beginning_or_end() {
158        assert_eq!(
159            Identifier::from_str(" GOOD_IDENTIFIER").unwrap().name(),
160            "good_identifier"
161        );
162        assert_eq!(
163            Identifier::from_str("GOOD_IDENTIFIER ").unwrap().name(),
164            "good_identifier"
165        );
166    }
167
168    #[test]
169    fn we_cannot_parse_invalid_identifiers() {
170        assert!(Identifier::from_str("").is_err());
171        assert!(Identifier::from_str(".").is_err());
172        assert!(Identifier::from_str("GOOD_IDENTIFIER:GOOD_IDENTIFIER").is_err());
173        assert!(Identifier::from_str("BAD$IDENTIFIER").is_err());
174        assert!(Identifier::from_str("BAD_IDENT!FIER").is_err());
175        assert!(Identifier::from_str("BAD IDENTIFIER").is_err());
176        assert!(Identifier::from_str("13AD_IDENTIFIER").is_err());
177        assert!(Identifier::from_str("$AD_IDENTIFIER").is_err());
178        assert!(Identifier::from_str("GOOD_IDENTIFIER.").is_err());
179        assert!(Identifier::from_str(".GOOD_IDENTIFIER").is_err());
180        assert!(Identifier::from_str(&"LONG_IDENTIFIER_OVER_64_CHARACTERS".repeat(12)).is_err());
181
182        // Test for reserved keywords
183        let keywords = [
184            "all",
185            "asc",
186            "desc",
187            "as",
188            "and",
189            "from",
190            "not",
191            "or",
192            "select",
193            "where",
194            "order",
195            "by",
196            "limit",
197            "offset",
198            "group",
199            "min",
200            "max",
201            "count",
202            "sum",
203            "true",
204            "false",
205            "timestamp",
206            "to_timestamp",
207        ];
208
209        for keyword in &keywords {
210            assert!(
211                Identifier::from_str(keyword).is_err(),
212                "Should not parse keyword as identifier: {keyword}"
213            );
214        }
215    }
216
217    #[test]
218    fn serialize_works() {
219        let identifier = Identifier::from_str("GOOD_IDENTIFIER").unwrap();
220        let serialized = serde_json::to_string(&identifier).unwrap();
221        assert_eq!(serialized, r#""good_identifier""#);
222    }
223
224    #[test]
225    fn deserialize_works() {
226        let identifier = Identifier::from_str("GOOD_IDENTIFIER").unwrap();
227        let deserialized: Identifier = serde_json::from_str(r#""good_identifier""#).unwrap();
228        assert_eq!(identifier, deserialized);
229    }
230
231    #[test]
232    fn deserialize_fails_on_invalid_identifier() {
233        let deserialized: Result<Identifier, _> = serde_json::from_str(r#""BAD IDENTIFIER""#);
234        assert!(deserialized.is_err());
235    }
236
237    #[test]
238    fn deserialize_fails_on_empty_string() {
239        let deserialized: Result<Identifier, _> = serde_json::from_str(r#""""#);
240        assert!(deserialized.is_err());
241    }
242
243    #[test]
244    fn deserialize_fails_on_long_identifier() {
245        let deserialized: Result<Identifier, _> = serde_json::from_str(&format!(
246            r#""{}""#,
247            "LONG_IDENTIFIER_OVER_64_CHARACTERS".repeat(12)
248        ));
249        assert!(deserialized.is_err());
250    }
251
252    #[test]
253    fn deserialize_works_in_a_type_parameter() {
254        let deserialized: Vec<Identifier> =
255            serde_json::from_str(r#"[ "good_identifier" ]"#).unwrap();
256        assert_eq!(
257            deserialized,
258            vec![Identifier::from_str("GOOD_IDENTIFIER").unwrap()]
259        );
260    }
261
262    #[test]
263    fn strings_are_lower_case_when_converted_to_names() {
264        let raw_str = "sxt";
265        let string = "sXt".to_owned();
266        let lower_case = Identifier::new(raw_str);
267        let upper_case = Identifier::new("SXT");
268        let mixed_case = Identifier::new(string);
269        // Everything is set to lower case
270        assert_eq!(lower_case, upper_case);
271        assert_eq!(lower_case, mixed_case);
272        assert_eq!(lower_case.name(), "sxt");
273    }
274
275    #[test]
276    #[should_panic(expected = "Identifier too long: CapacityError: insufficient capacity")]
277    fn long_names_panic() {
278        Identifier::new("t".repeat(65));
279    }
280
281    #[test]
282    #[should_panic(expected = "Identifier too long: CapacityError: insufficient capacity")]
283    fn long_unicode_names_panic() {
284        Identifier::new("茶".repeat(22));
285    }
286
287    #[test]
288    fn short_names_are_fine() {
289        Identifier::new("t".repeat(64));
290        Identifier::new("茶".repeat(21));
291    }
292
293    #[test]
294    fn try_from_ident() {
295        let ident = Ident::new("ValidIdentifier");
296        let identifier = Identifier::try_from(ident).unwrap();
297        assert_eq!(identifier.name(), "valididentifier");
298
299        let invalid_ident = Ident::new("INVALID$IDENTIFIER");
300        assert!(Identifier::try_from(invalid_ident).is_err());
301    }
302}