Skip to main content

spacetimedb_schema/
identifier.rs

1use crate::error::IdentifierError;
2use spacetimedb_data_structures::map::{Equivalent, HashSet};
3use spacetimedb_sats::raw_identifier::RawIdentifier;
4use spacetimedb_sats::{impl_deserialize, impl_serialize, impl_st};
5use std::fmt::{self, Debug, Display};
6use std::ops::Deref;
7use unicode_ident::{is_xid_continue, is_xid_start};
8use unicode_normalization::UnicodeNormalization;
9
10lazy_static::lazy_static! {
11    /// TODO(1.0): Pull in the rest of the reserved identifiers from the Identifier Proposal once that's merged.
12    static ref RESERVED_IDENTIFIERS: HashSet<&'static str> = include_str!("reserved_identifiers.txt").lines().collect();
13}
14
15/// A valid SpacetimeDB Identifier.
16///
17/// Identifiers must be normalized according to [Unicode Standard Annex 15](https://www.unicode.org/reports/tr15/), normalization form C
18/// (Canonical Decomposition followed by Canonical Composition).
19/// Following Rust, we use the identifier rules defined by [Unicode Standard Annex 31](https://www.unicode.org/reports/tr31/tr31-37.html) to validate identifiers.
20/// We allow underscores as well as any XID_Start character to start an identifier.
21///
22/// In addition, we forbid the use of any identifier reserved by [PostgreSQL](https://www.postgresql.org/docs/current/sql-keywords-appendix.html).
23/// Any string that is converted into a reserved word by the Rust function
24/// [`String::to_uppercase`](https://doc.rust-lang.org/std/string/struct.String.html#method.to_uppercase) will be rejected.
25///
26/// The list of reserved words can be found in the file `SpacetimeDB/crates/sats/db/reserved_identifiers.txt`.
27///
28/// Internally, this is just a raw identifier with some validation on construction.
29#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub struct Identifier {
31    id: RawIdentifier,
32}
33
34impl_st!([] Identifier, ts => RawIdentifier::make_type(ts));
35impl_serialize!([] Identifier, (self, ser) => ser.serialize_str(&self.id));
36impl_deserialize!([] Identifier, de => RawIdentifier::deserialize(de).map(Self::new_assume_valid));
37
38impl Identifier {
39    /// Returns a new identifier without validating the input.
40    pub fn new_assume_valid(name: RawIdentifier) -> Self {
41        Self { id: name }
42    }
43
44    /// Validates that the input string is a valid identifier.
45    ///
46    /// Currently, this rejects non-canonicalized identifiers.
47    /// Eventually, it will be changed to canonicalize the input string.
48    pub fn new(name: RawIdentifier) -> Result<Self, IdentifierError> {
49        if name.is_empty() {
50            return Err(IdentifierError::Empty {});
51        }
52
53        // Convert to Unicode Normalization Form C (canonical decomposition followed by composition).
54        if name.nfc().zip(name.chars()).any(|(a, b)| a != b) {
55            return Err(IdentifierError::NotCanonicalized { name });
56        }
57
58        let mut chars = name.chars();
59
60        let start = chars.next().ok_or(IdentifierError::Empty {})?;
61        if !is_xid_start(start) && start != '_' {
62            return Err(IdentifierError::InvalidStart {
63                name,
64                invalid_start: start,
65            });
66        }
67
68        for char_ in chars {
69            if !is_xid_continue(char_) {
70                return Err(IdentifierError::InvalidContinue {
71                    name,
72                    invalid_continue: char_,
73                });
74            }
75        }
76
77        if Identifier::is_reserved(&name) {
78            return Err(IdentifierError::Reserved { name });
79        }
80
81        Ok(Identifier { id: name })
82    }
83
84    pub fn for_test(name: impl AsRef<str>) -> Self {
85        Identifier::new(RawIdentifier::new(name.as_ref())).unwrap()
86    }
87
88    /// Returns the raw identifier of this identifier.
89    pub fn as_raw(&self) -> &RawIdentifier {
90        &self.id
91    }
92
93    /// Check if a string is a reserved identifier.
94    pub fn is_reserved(name: &str) -> bool {
95        RESERVED_IDENTIFIERS.contains(&*name.to_uppercase())
96    }
97}
98
99impl Debug for Identifier {
100    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101        Debug::fmt(&self.id, f)
102    }
103}
104
105impl Display for Identifier {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        Display::fmt(&self.id, f)
108    }
109}
110
111impl Deref for Identifier {
112    type Target = str;
113
114    fn deref(&self) -> &str {
115        &self.id
116    }
117}
118
119impl Equivalent<Identifier> for str {
120    fn equivalent(&self, other: &Identifier) -> bool {
121        self == &other.id[..]
122    }
123}
124
125impl From<Identifier> for RawIdentifier {
126    fn from(id: Identifier) -> Self {
127        id.id
128    }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use proptest::prelude::*;
135
136    fn new(s: &str) -> Result<Identifier, IdentifierError> {
137        Identifier::new(RawIdentifier::new(s))
138    }
139
140    #[test]
141    fn test_a_bunch_of_identifiers() {
142        assert!(new("friends").is_ok());
143        assert!(new("Oysters").is_ok());
144        assert!(new("_hello").is_ok());
145        assert!(new("bananas_there_").is_ok());
146        assert!(new("Москва").is_ok());
147        assert!(new("東京").is_ok());
148        assert!(new("bees123").is_ok());
149
150        assert!(new("").is_err());
151        assert!(new("123bees").is_err());
152        assert!(new("\u{200B}hello").is_err()); // zero-width space
153        assert!(new(" hello").is_err());
154        assert!(new("hello ").is_err());
155        assert!(new("🍌").is_err()); // ;-; the unicode committee is no fun
156        assert!(new("").is_err());
157    }
158
159    #[test]
160    fn test_canonicalization() {
161        assert!(new("_\u{0041}\u{030A}").is_err());
162        // canonicalized version of the above.
163        assert!(new("_\u{00C5}").is_ok());
164    }
165
166    proptest! {
167        #[test]
168        fn test_standard_ascii_identifiers(s in "[a-zA-Z_][a-zA-Z0-9_]*") {
169            // Ha! Proptest will reliably find these.
170            prop_assume!(!Identifier::is_reserved(&s));
171
172            prop_assert!(new(&s).is_ok());
173        }
174    }
175}