outmove_common/types/identifier.rs
1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! An identifier is the name of an entity (module, resource, function, etc) in Move.
5//!
6//! A valid identifier consists of an ASCII string which satisfies any of the conditions:
7//!
8//! * The first character is a letter and the remaining characters are letters, digits or
9//! underscores.
10//! * The first character is an underscore, and there is at least one further letter, digit or
11//! underscore.
12//!
13//! The spec for allowed identifiers is similar to Rust's spec
14//! ([as of version 1.38](https://doc.rust-lang.org/1.38.0/reference/identifiers.html)).
15//!
16//! Allowed identifiers are currently restricted to ASCII due to unresolved issues with Unicode
17//! normalization. See [Rust issue #55467](https://github.com/rust-lang/rust/issues/55467) and the
18//! associated RFC for some discussion. Unicode identifiers may eventually be supported once these
19//! issues are worked out.
20//!
21//! This module only determines allowed identifiers at the bytecode level. Move source code will
22//! likely be more restrictive than even this, with a "raw identifier" escape hatch similar to
23//! Rust's `r#` identifiers.
24//!
25//! Among other things, identifiers are used to:
26//! * specify keys for lookups in storage
27//! * do cross-module lookups while executing transactions
28
29use anyhow::{bail, Result};
30use ref_cast::RefCast;
31use serde::{Deserialize, Serialize};
32use std::{borrow::Borrow, fmt, ops::Deref};
33
34/// Return true if this character can appear in a Move identifier.
35///
36/// Note: there are stricter restrictions on whether a character can begin a Move
37/// identifier--only alphabetic characters are allowed here.
38pub fn is_valid_identifier_char(c: char) -> bool {
39 matches!(c, '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')
40}
41
42/// Describes what identifiers are allowed.
43///
44/// For now this is deliberately restrictive -- we would like to evolve this in the future.
45// TODO: "<SELF>" is coded as an exception. It should be removed once CompiledScript goes away.
46fn is_valid(s: &str) -> bool {
47 if s == "<SELF>" {
48 return true;
49 }
50 let len = s.len();
51 let mut chars = s.chars();
52 match chars.next() {
53 Some('a'..='z') | Some('A'..='Z') => chars.all(is_valid_identifier_char),
54 Some('_') if len > 1 => chars.all(is_valid_identifier_char),
55 _ => false,
56 }
57}
58
59/// An owned identifier.
60///
61/// For more details, see the module level documentation.
62#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize)]
63pub struct Identifier(Box<str>);
64// An identifier cannot be mutated so use Box<str> instead of String -- it is 1 word smaller.
65
66impl Identifier {
67 /// Creates a new `Identifier` instance.
68 pub fn new(s: impl Into<Box<str>>) -> Result<Self> {
69 let s = s.into();
70 if Self::is_valid(&s) {
71 Ok(Self(s))
72 } else {
73 bail!("Invalid identifier '{}'", s);
74 }
75 }
76
77 /// Returns true if this string is a valid identifier.
78 pub fn is_valid(s: impl AsRef<str>) -> bool {
79 is_valid(s.as_ref())
80 }
81
82 /// Returns if this identifier is "<SELF>".
83 /// TODO: remove once we fully separate CompiledScript & CompiledModule.
84 pub fn is_self(&self) -> bool {
85 &*self.0 == "<SELF>"
86 }
87
88 /// Converts a vector of bytes to an `Identifier`.
89 pub fn from_utf8(vec: Vec<u8>) -> Result<Self> {
90 let s = String::from_utf8(vec)?;
91 Self::new(s)
92 }
93
94 /// Creates a borrowed version of `self`.
95 pub fn as_ident_str(&self) -> &IdentStr {
96 self
97 }
98
99 /// Converts this `Identifier` into a `String`.
100 ///
101 /// This is not implemented as a `From` trait to discourage automatic conversions -- these
102 /// conversions should not typically happen.
103 pub fn into_string(self) -> String {
104 self.0.into()
105 }
106
107 /// Converts this `Identifier` into a UTF-8-encoded byte sequence.
108 pub fn into_bytes(self) -> Vec<u8> {
109 self.into_string().into_bytes()
110 }
111}
112
113impl<'a> From<&'a IdentStr> for Identifier {
114 fn from(ident_str: &'a IdentStr) -> Self {
115 ident_str.to_owned()
116 }
117}
118
119impl AsRef<IdentStr> for Identifier {
120 fn as_ref(&self) -> &IdentStr {
121 self
122 }
123}
124
125impl Deref for Identifier {
126 type Target = IdentStr;
127
128 fn deref(&self) -> &IdentStr {
129 // Identifier and IdentStr maintain the same invariants, so it is safe to
130 // convert.
131 IdentStr::ref_cast(&self.0)
132 }
133}
134
135impl fmt::Display for Identifier {
136 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
137 write!(f, "{}", &self.0)
138 }
139}
140
141/// A borrowed identifier.
142///
143/// For more details, see the module level documentation.
144#[derive(Debug, Eq, Hash, Ord, PartialEq, PartialOrd, RefCast)]
145#[repr(transparent)]
146pub struct IdentStr(str);
147
148impl IdentStr {
149 pub fn new(s: &str) -> Result<&IdentStr> {
150 if Self::is_valid(s) {
151 Ok(IdentStr::ref_cast(s))
152 } else {
153 bail!("Invalid identifier '{}'", s);
154 }
155 }
156
157 /// Returns true if this string is a valid identifier.
158 pub fn is_valid(s: impl AsRef<str>) -> bool {
159 is_valid(s.as_ref())
160 }
161
162 /// Returns the length of `self` in bytes.
163 pub fn len(&self) -> usize {
164 self.0.len()
165 }
166
167 /// Returns `true` if `self` has a length of zero bytes.
168 pub fn is_empty(&self) -> bool {
169 self.0.is_empty()
170 }
171
172 /// Converts `self` to a `&str`.
173 ///
174 /// This is not implemented as a `From` trait to discourage automatic conversions -- these
175 /// conversions should not typically happen.
176 pub fn as_str(&self) -> &str {
177 &self.0
178 }
179
180 /// Converts `self` to a byte slice.
181 pub fn as_bytes(&self) -> &[u8] {
182 self.0.as_bytes()
183 }
184}
185
186impl Borrow<IdentStr> for Identifier {
187 fn borrow(&self) -> &IdentStr {
188 self
189 }
190}
191
192impl ToOwned for IdentStr {
193 type Owned = Identifier;
194
195 fn to_owned(&self) -> Identifier {
196 Identifier(self.0.into())
197 }
198}
199
200impl fmt::Display for IdentStr {
201 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
202 write!(f, "{}", &self.0)
203 }
204}