moverox_types/ident_str.rs
1// Copyright (c) The Diem Core Contributors
2// Copyright (c) The Move Contributors
3// Copyright (c) Mysten Labs, Inc.
4// SPDX-License-Identifier: Apache-2.0
5
6use std::borrow::Borrow;
7use std::fmt;
8
9use ref_cast::RefCast;
10
11use crate::Identifier;
12
13// =============================================================================
14// IdentStr
15// =============================================================================
16
17/// A borrowed identifier.
18///
19/// An identifier is the name of an entity (module, resource, function, etc) in Move.
20///
21/// A valid identifier consists of an ASCII string which satisfies any of the conditions:
22///
23/// * The first character is a letter and the remaining characters are letters, digits or
24/// underscores.
25/// * The first character is an underscore, and there is at least one further letter, digit or
26/// underscore.
27///
28/// The spec for allowed identifiers is similar to Rust's spec
29/// ([as of version 1.38](https://doc.rust-lang.org/1.38.0/reference/identifiers.html)).
30///
31/// Allowed identifiers are currently restricted to ASCII due to unresolved issues with Unicode
32/// normalization. See [Rust issue #55467](https://github.com/rust-lang/rust/issues/55467) and the
33/// associated RFC for some discussion. Unicode identifiers may eventually be supported once these
34/// issues are worked out.
35///
36/// This module only determines allowed identifiers at the bytecode level. Move source code will
37/// likely be more restrictive than even this, with a "raw identifier" escape hatch similar to
38/// Rust's `r#` identifiers.
39///
40/// Among other things, identifiers are used to:
41/// * specify keys for lookups in storage
42/// * do cross-module lookups while executing transactions
43#[derive(Debug, Eq, Hash, Ord, PartialEq, PartialOrd, RefCast)]
44#[repr(transparent)]
45pub struct IdentStr(str);
46
47impl IdentStr {
48 pub fn new(s: &str) -> Result<&Self, InvalidIdentifierError> {
49 if Self::is_valid(s) {
50 Ok(Self::ref_cast(s))
51 } else {
52 Err(InvalidIdentifierError(s.to_owned()))
53 }
54 }
55
56 /// Compile-time validated constructor from static string slice.
57 ///
58 /// ### Example
59 ///
60 /// Creating a valid static or const [`IdentStr`]:
61 ///
62 /// ```rust
63 /// use moverox_types::IdentStr;
64 /// const VALID_IDENT: &'static IdentStr = IdentStr::cast("MyCoolIdentifier");
65 ///
66 /// const THING_NAME: &'static str = "thing_name";
67 /// const THING_IDENT: &'static IdentStr = IdentStr::cast(THING_NAME);
68 /// ```
69 ///
70 /// In contrast, creating an invalid [`IdentStr`] will fail at compile time:
71 ///
72 /// ```rust,compile_fail
73 /// use moverox_types::IdentStr;
74 /// const INVALID_IDENT: &'static IdentStr = IdentStr::cast("123Foo"); // Fails to compile!
75 /// ```
76 pub const fn cast(s: &'static str) -> &'static Self {
77 // Only valid identifier strings are allowed.
78 if !is_valid(s) {
79 panic!("String is not a valid Move identifier")
80 }
81
82 // SAFETY: the following transmute is safe because
83 // (1) it's equivalent to the unsafe-reborrow inside IdentStr::ref_cast()
84 // (which we can't use b/c it's not const).
85 // (2) we've just asserted that IdentStr impls RefCast<From = str>, which
86 // already guarantees the transmute is safe (RefCast checks that
87 // IdentStr(str) is #[repr(transparent)]).
88 // (3) both in and out lifetimes are 'static, so we're not widening the lifetime.
89 // (4) we've just asserted that the IdentStr passes the is_valid check.
90 unsafe { ::std::mem::transmute::<&'static str, &'static Self>(s) }
91 }
92
93 /// Returns true if this string is a valid identifier.
94 pub fn is_valid(s: impl AsRef<str>) -> bool {
95 is_valid(s.as_ref())
96 }
97
98 /// Returns the length of `self` in bytes.
99 pub const fn len(&self) -> usize {
100 self.0.len()
101 }
102
103 /// Returns `true` if `self` has a length of zero bytes.
104 pub const fn is_empty(&self) -> bool {
105 self.0.is_empty()
106 }
107
108 /// Converts `self` to a `&str`.
109 ///
110 /// This is not implemented as a `From` trait to discourage automatic conversions -- these
111 /// conversions should not typically happen.
112 pub const fn as_str(&self) -> &str {
113 &self.0
114 }
115
116 /// Converts `self` to a byte slice.
117 pub const fn as_bytes(&self) -> &[u8] {
118 self.0.as_bytes()
119 }
120}
121
122impl Borrow<IdentStr> for Identifier {
123 fn borrow(&self) -> &IdentStr {
124 let s = self.as_str();
125 // SAFETY: same reason as in `IdentStr::cast`
126 unsafe { ::std::mem::transmute::<&str, &IdentStr>(s) }
127 }
128}
129
130impl ToOwned for IdentStr {
131 type Owned = Identifier;
132
133 fn to_owned(&self) -> Identifier {
134 Identifier::new(&self.0).expect("Identifier validity ensured by IdentStr")
135 }
136}
137
138impl fmt::Display for IdentStr {
139 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
140 write!(f, "{}", &self.0)
141 }
142}
143
144// =============================================================================
145// Errors
146// =============================================================================
147
148#[derive(thiserror::Error, Debug)]
149#[error("Invalid identifier '{0}'")]
150pub struct InvalidIdentifierError(pub String);
151
152// =============================================================================
153// Other
154// =============================================================================
155
156/// Return true if this character can appear in a Move identifier.
157///
158/// Note: there are stricter restrictions on whether a character can begin a Move
159/// identifier--only alphabetic characters are allowed here.
160#[inline]
161const fn is_valid_identifier_char(c: char) -> bool {
162 matches!(c, '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')
163}
164
165/// Returns `true` if all bytes in `b` after the offset `start_offset` are valid
166/// ASCII identifier characters.
167const fn all_bytes_valid(b: &[u8], start_offset: usize) -> bool {
168 let mut i = start_offset;
169 // TODO: use for loop instead of while loop when it's stable in const fn's.
170 while i < b.len() {
171 if !is_valid_identifier_char(b[i] as char) {
172 return false;
173 }
174 i += 1;
175 }
176 true
177}
178
179/// Describes what identifiers are allowed.
180///
181/// For now this is deliberately restrictive -- we would like to evolve this in the future.
182// TODO: "<SELF>" is coded as an exception. It should be removed once CompiledScript goes away.
183const fn is_valid(s: &str) -> bool {
184 // Rust const fn's don't currently support slicing or indexing &str's, so we
185 // have to operate on the underlying byte slice. This is not a problem as
186 // valid identifiers are (currently) ASCII-only.
187 let b = s.as_bytes();
188 match b {
189 b"<SELF>" => true,
190 [b'a'..=b'z', ..] | [b'A'..=b'Z', ..] => all_bytes_valid(b, 1),
191 [b'_', ..] if b.len() > 1 => all_bytes_valid(b, 1),
192 _ => false,
193 }
194}
195
196// const assert that IdentStr impls RefCast<From = str>
197// This assertion is what guarantees the unsafe transmute is safe.
198const _: fn() = || {
199 const fn assert_impl_all<T: ?Sized + ::ref_cast::RefCast<From = str>>() {}
200 assert_impl_all::<IdentStr>();
201};
202
203#[cfg(test)]
204mod tests {
205 use std::str::FromStr as _;
206
207 use super::*;
208
209 #[test]
210 fn with_leading_underscore() {
211 let _: Identifier = "_jeet".parse().unwrap();
212 let _: Identifier = "_JEET".parse().unwrap();
213 }
214
215 /// The same behavior as `sui_types::Identifier` as of `testnet-v1.39.3`.
216 #[test]
217 fn underscores_only() {
218 assert!(Identifier::from_str("_").is_err());
219 assert!(Identifier::from_str("__").is_ok());
220 assert!(Identifier::from_str("___").is_ok());
221 assert!(IdentStr::new("_").is_err());
222 assert!(IdentStr::new("__").is_ok());
223 assert!(IdentStr::new("___").is_ok());
224 }
225}