pliron/
identifier.rs

1//! [Identifier]s are strings used to name entities in programming languages.
2
3use std::{
4    fmt::Display,
5    ops::{Add, Deref},
6};
7
8use combine::{Parser, token};
9use rustc_hash::FxHashMap;
10use thiserror::Error;
11
12use crate::{
13    builtin::attributes::StringAttr,
14    impl_printable_for_display,
15    parsable::{self, Parsable, ParseResult},
16    result::{self, Result},
17    verify_err_noloc,
18};
19
20#[derive(Clone, Hash, PartialEq, Eq, Debug, PartialOrd, Ord)]
21/// An [Identifier] must satisfy the regex `[a-zA-Z_][a-zA-Z0-9_]*`.
22/// Also see [module description](module@crate::identifier).
23pub struct Identifier(String);
24
25impl Identifier {
26    /// Attempt to construct a new [Identifier] from a [String].
27    /// Examples:
28    /// ```
29    /// use pliron::identifier::Identifier;
30    /// let _: Identifier = "hi12".try_into().expect("Identifier creation error");
31    /// let _: Identifier = "A12ab".try_into().expect("Identifier creation error");
32    /// TryInto::<Identifier>::try_into("hi12.").expect_err("Malformed identifier not caught");
33    /// TryInto::<Identifier>::try_into("12ab").expect_err("Malformed identifier not caught");
34    /// TryInto::<Identifier>::try_into(".a12ab").expect_err("Malformed identifier not caught");
35    /// ```
36    pub fn try_new(value: String) -> Result<Self> {
37        let mut chars_iter = value.chars();
38        match chars_iter.next() {
39            Some(first_char) if (first_char.is_ascii_alphabetic() || first_char == '_') => {
40                if !chars_iter.all(|c| c.is_ascii_alphanumeric() || c == '_') {
41                    return verify_err_noloc!(MalformedIdentifierErr(value.clone()));
42                }
43            }
44            _ => {
45                return verify_err_noloc!(MalformedIdentifierErr(value.clone()));
46            }
47        }
48        Ok(Identifier(value))
49    }
50}
51
52impl Add for Identifier {
53    type Output = Identifier;
54
55    fn add(self, rhs: Self) -> Self::Output {
56        Identifier(self.0 + &rhs.0)
57    }
58}
59
60impl_printable_for_display!(Identifier);
61
62impl Display for Identifier {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        write!(f, "{}", self.0)
65    }
66}
67
68impl TryFrom<String> for Identifier {
69    type Error = result::Error;
70
71    fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
72        Self::try_new(value)
73    }
74}
75
76impl TryFrom<&str> for Identifier {
77    type Error = result::Error;
78
79    fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
80        Self::try_new(value.to_string())
81    }
82}
83
84impl TryFrom<StringAttr> for Identifier {
85    type Error = result::Error;
86
87    fn try_from(value: StringAttr) -> std::result::Result<Self, Self::Error> {
88        Self::try_new(value.into())
89    }
90}
91
92impl From<Identifier> for String {
93    fn from(value: Identifier) -> Self {
94        value.0
95    }
96}
97
98impl Deref for Identifier {
99    type Target = String;
100
101    fn deref(&self) -> &Self::Target {
102        &self.0
103    }
104}
105
106/// A fast way to get just the "_" character as a string.
107pub fn underscore() -> Identifier {
108    Identifier("_".to_string())
109}
110
111#[derive(Debug, Error)]
112#[error("Malformed identifier {0}")]
113struct MalformedIdentifierErr(String);
114
115impl Parsable for Identifier {
116    type Arg = ();
117    type Parsed = Identifier;
118
119    fn parse<'a>(
120        state_stream: &mut parsable::StateStream<'a>,
121        _arg: Self::Arg,
122    ) -> ParseResult<'a, Self::Parsed> {
123        use combine::{many, parser::char};
124        let parser = (char::letter().or(token('_')))
125            .and(many::<String, _, _>(char::alpha_num().or(char::char('_'))))
126            .map(|(c, rest)| c.to_string() + &rest);
127
128        parser
129            .map(|str| {
130                str.try_into()
131                    .expect("Something is wrong in our Identifier parser")
132            })
133            .parse_stream(state_stream)
134            .into()
135    }
136}
137
138/// A utility to safely (i.e., without collisions) legalise identifiers.
139/// Generated [Identifier]s are unique only within this the this object.
140/// ```
141/// use pliron::identifier::{Legaliser, Identifier};
142/// let mut legaliser = Legaliser::default();
143/// let id1 = legaliser.legalise("hello_");
144/// assert_eq!(*id1, "hello_");
145/// assert_eq!(legaliser.source_name(&id1).unwrap(), "hello_");
146/// let id2 = legaliser.legalise("hello.");
147/// assert_eq!(*id2, "hello__0");
148/// assert_eq!(legaliser.source_name(&id2).unwrap(), "hello.");
149/// let id3 = legaliser.legalise("hello__0");
150/// assert_eq!(*id3, "hello__0_1");
151/// assert_eq!(legaliser.source_name(&id3).unwrap(), "hello__0");
152/// let id4 = legaliser.legalise("");
153/// assert_eq!(*id4, "_");
154/// assert_eq!(legaliser.source_name(&id4).unwrap(), "");
155/// let id5 = legaliser.legalise("_");
156/// assert_eq!(*id5, "__2");
157/// assert_eq!(legaliser.source_name(&id5).unwrap(), "_");
158///
159/// let mut another_legaliser = Legaliser::default();
160/// let id6 = another_legaliser.legalise("_");
161/// assert_eq!(*id6, "_");
162/// assert_eq!(another_legaliser.source_name(&id6).unwrap(), "_");
163/// let id7 = another_legaliser.legalise("");
164/// assert_eq!(*id7, "__0");
165/// assert_eq!(another_legaliser.source_name(&id7).unwrap(), "");
166///
167/// ```
168#[derive(Default)]
169pub struct Legaliser {
170    /// A map from the source strings to [Identifier]s.
171    str_to_id: FxHashMap<String, Identifier>,
172    /// Reverse map from [Identifier]s to their source string.
173    rev_str_to_id: FxHashMap<String, String>,
174    /// A counter to generate unique (within this object) ids.
175    counter: usize,
176}
177
178impl Legaliser {
179    /// Replace illegal characters with '_'.
180    fn replace_illegal_chars(name: &str) -> String {
181        if TryInto::<Identifier>::try_into(name).is_ok() {
182            return name.to_string();
183        }
184
185        if name.is_empty() {
186            return String::from("_");
187        }
188
189        let mut char_iter = name.chars();
190        let first_char = char_iter.next().unwrap();
191        let mut result = if first_char.is_alphabetic() {
192            String::from(first_char)
193        } else {
194            String::from("_")
195        };
196
197        let rest = char_iter.map(|c| if c.is_ascii_alphanumeric() { c } else { '_' });
198        result.extend(rest);
199
200        result
201    }
202
203    /// Get a legal [Identifier] for input name.
204    pub fn legalise(&mut self, name: &str) -> Identifier {
205        // If we've already mapped this before, just return that.
206        if let Some(id) = self.str_to_id.get(name) {
207            return id.clone();
208        }
209
210        let legal_name = Self::replace_illegal_chars(name);
211        let mut legal_name_unique = legal_name.clone();
212        // Until this is not already a mapped identifier, create unique ones.
213        while self.rev_str_to_id.contains_key(&legal_name_unique) {
214            legal_name_unique = legal_name.clone() + &format!("_{}", self.counter);
215            self.counter += 1;
216        }
217
218        let legal_name_id = Identifier(legal_name_unique.clone());
219        self.str_to_id
220            .insert(name.to_string(), legal_name_id.clone());
221        self.rev_str_to_id
222            .insert(legal_name_unique.clone(), name.to_string());
223
224        legal_name_id
225    }
226
227    /// Get the source name from which this [Identifier] was mapped to.
228    pub fn source_name(&self, id: &Identifier) -> Option<String> {
229        self.rev_str_to_id.get(&id.0).cloned()
230    }
231}