tiger_lib/
token.rs

1//! Contains the core [`Token`] and [`Loc`] types, which represent pieces of game script and where
2//! in the game files they came from.
3
4use std::borrow::{Borrow, Cow};
5use std::cmp::Ordering;
6use std::ffi::OsStr;
7use std::fmt::{Debug, Display, Error, Formatter};
8use std::hash::Hash;
9use std::mem::ManuallyDrop;
10use std::ops::{Bound, Range, RangeBounds};
11use std::path::{Path, PathBuf};
12use std::slice::SliceIndex;
13
14use bumpalo::Bump;
15
16use crate::date::Date;
17use crate::fileset::{FileEntry, FileKind};
18use crate::macros::{MACRO_MAP, MacroMapIndex};
19use crate::pathtable::{PathTable, PathTableIndex};
20use crate::report::{ErrorKey, err, untidy};
21
22#[derive(Clone, Copy, Eq, PartialEq, Hash)]
23pub struct Loc {
24    pub(crate) idx: PathTableIndex,
25    pub kind: FileKind,
26    /// line 0 means the loc applies to the file as a whole.
27    pub line: u32,
28    pub column: u32,
29    /// Used in macro expansions to point to the macro invocation
30    /// in the macro table
31    pub link_idx: Option<MacroMapIndex>,
32}
33
34impl Loc {
35    #[must_use]
36    pub(crate) fn for_file(pathname: PathBuf, kind: FileKind, fullpath: PathBuf) -> Self {
37        let idx = PathTable::store(pathname, fullpath);
38        Loc { idx, kind, line: 0, column: 0, link_idx: None }
39    }
40
41    pub fn filename(self) -> Cow<'static, str> {
42        PathTable::lookup_path(self.idx)
43            .file_name()
44            .unwrap_or_else(|| OsStr::new(""))
45            .to_string_lossy()
46    }
47
48    pub fn pathname(self) -> &'static Path {
49        PathTable::lookup_path(self.idx)
50    }
51
52    pub fn fullpath(self) -> &'static Path {
53        PathTable::lookup_fullpath(self.idx)
54    }
55
56    #[inline]
57    pub fn same_file(self, other: Loc) -> bool {
58        self.idx == other.idx
59    }
60}
61
62impl PartialOrd for Loc {
63    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
64        Some(self.cmp(other))
65    }
66}
67
68impl Ord for Loc {
69    fn cmp(&self, other: &Self) -> Ordering {
70        self.idx
71            .cmp(&other.idx)
72            .then(self.line.cmp(&other.line))
73            .then(self.column.cmp(&other.column))
74            .then(
75                self.link_idx
76                    .map(|link| MACRO_MAP.get_loc(link))
77                    .cmp(&other.link_idx.map(|link| MACRO_MAP.get_loc(link))),
78            )
79    }
80}
81
82impl From<&FileEntry> for Loc {
83    fn from(entry: &FileEntry) -> Self {
84        if let Some(idx) = entry.path_idx() {
85            Loc { idx, kind: entry.kind(), line: 0, column: 0, link_idx: None }
86        } else {
87            Self::for_file(entry.path().to_path_buf(), entry.kind(), entry.fullpath().to_path_buf())
88        }
89    }
90}
91
92impl From<&mut FileEntry> for Loc {
93    fn from(entry: &mut FileEntry) -> Self {
94        (&*entry).into()
95    }
96}
97
98impl From<FileEntry> for Loc {
99    fn from(entry: FileEntry) -> Self {
100        (&entry).into()
101    }
102}
103
104impl Debug for Loc {
105    /// Roll our own `Debug` implementation to handle the path field
106    fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
107        f.debug_struct("Loc")
108            .field("pathindex", &self.idx)
109            .field("pathname", &self.pathname())
110            .field("fullpath", &self.fullpath())
111            .field("kind", &self.kind)
112            .field("line", &self.line)
113            .field("column", &self.column)
114            .field("linkindex", &self.link_idx)
115            .finish()
116    }
117}
118
119/// Leak the string, including any excess capacity.
120///
121/// It should only be used for large strings, rather than for small, individuals strings,
122/// due to the memory overhead. Use [`bump`] instead, which uses a bump allocator to store
123/// the strings.
124pub(crate) fn leak(s: String) -> &'static str {
125    let s = ManuallyDrop::new(s);
126    unsafe {
127        let s_ptr: *const str = s.as_ref();
128        &*s_ptr
129    }
130}
131
132thread_local!(static STR_BUMP: ManuallyDrop<Bump> = ManuallyDrop::new(Bump::new()));
133
134/// Allocate the string on heap with a bump allocator.
135///
136/// SAFETY: This is safe as long as no `Bump::reset` is called to deallocate memory
137/// and `STR_BUMP` is not dropped when thread exits.
138pub(crate) fn bump(s: &str) -> &'static str {
139    STR_BUMP.with(|bump| {
140        let s = bump.alloc_str(s);
141        unsafe {
142            let s_ptr: *const str = s;
143            &*s_ptr
144        }
145    })
146}
147
148/// A Token consists of a string and its location in the parsed files.
149#[allow(missing_copy_implementations)]
150#[derive(Clone, Debug)]
151pub struct Token {
152    s: &'static str,
153    pub loc: Loc,
154}
155
156impl Token {
157    #[must_use]
158    pub fn new(s: &str, loc: Loc) -> Self {
159        Token { s: bump(s), loc }
160    }
161
162    #[must_use]
163    pub fn from_static_str(s: &'static str, loc: Loc) -> Self {
164        Token { s, loc }
165    }
166
167    /// Create a `Token` from a substring of the given `Token`.
168    #[must_use]
169    pub fn subtoken<R>(&self, range: R, loc: Loc) -> Token
170    where
171        R: RangeBounds<usize> + SliceIndex<str, Output = str>,
172    {
173        Token { s: &self.s[range], loc }
174    }
175
176    /// Create a `Token` from a subtring of the given `Token`,
177    /// stripping any whitespace from the created token.
178    #[must_use]
179    pub fn subtoken_stripped(&self, mut range: Range<usize>, mut loc: Loc) -> Token {
180        let mut start = match range.start_bound() {
181            Bound::Included(&i) => i,
182            Bound::Excluded(&i) => i + 1,
183            Bound::Unbounded => 0,
184        };
185        let mut end = match range.end_bound() {
186            Bound::Included(&i) => i + 1,
187            Bound::Excluded(&i) => i,
188            Bound::Unbounded => self.s.len(),
189        };
190        for (i, c) in self.s[range.clone()].char_indices() {
191            if !c.is_whitespace() {
192                start += i;
193                range = start..end;
194                break;
195            }
196            loc.column += 1;
197        }
198        for (i, c) in self.s[range.clone()].char_indices().rev() {
199            if !c.is_whitespace() {
200                end = start + i + c.len_utf8();
201                range = start..end;
202                break;
203            }
204        }
205        Token { s: &self.s[range], loc }
206    }
207
208    pub fn as_str(&self) -> &'static str {
209        self.s
210    }
211
212    pub fn is(&self, s: &str) -> bool {
213        self.s == s
214    }
215
216    pub fn lowercase_is(&self, s: &str) -> bool {
217        self.s.to_ascii_lowercase() == s
218    }
219
220    pub fn starts_with(&self, s: &str) -> bool {
221        self.s.starts_with(s)
222    }
223
224    #[must_use]
225    /// Split the token into one or more subtokens, with `ch` as the delimiter.
226    /// Updates the locs for the created subtokens.
227    /// This is not meant for multiline tokens.
228    /// # Panics
229    /// May panic if the token's column location exceeds 4,294,967,296.
230    pub fn split(&self, ch: char) -> Vec<Token> {
231        let mut pos = 0;
232        let mut vec = Vec::new();
233        let mut loc = self.loc;
234        let mut lines: u32 = 0;
235        for (cols, (i, c)) in self.s.char_indices().enumerate() {
236            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
237            if c == ch {
238                vec.push(self.subtoken(pos..i, loc));
239                pos = i + 1;
240                loc.column = self.loc.column + cols + 1;
241                loc.line = self.loc.line + lines;
242            }
243            if c == '\n' {
244                lines += 1;
245            }
246        }
247        vec.push(self.subtoken(pos.., loc));
248        vec
249    }
250
251    #[must_use]
252    pub fn strip_suffix(&self, sfx: &str) -> Option<Token> {
253        self.s.strip_suffix(sfx).map(|pfx| Token::from_static_str(pfx, self.loc))
254    }
255
256    #[must_use]
257    pub fn strip_prefix(&self, pfx: &str) -> Option<Token> {
258        #[allow(clippy::cast_possible_truncation)]
259        self.s.strip_prefix(pfx).map(|sfx| {
260            let mut loc = self.loc;
261            loc.column += pfx.chars().count() as u32;
262            Token::from_static_str(sfx, loc)
263        })
264    }
265
266    #[must_use]
267    /// Split the token into two subtokens, with the split at the first occurrence of `ch`.
268    /// Updates the locs for the created subtokens.
269    /// This is not meant for multiline tokens.
270    /// Returns `None` if `ch` was not found in the token.
271    /// # Panics
272    /// May panic if the token's column location exceeds 4,294,967,296.
273    pub fn split_once(&self, ch: char) -> Option<(Token, Token)> {
274        for (cols, (i, c)) in self.s.char_indices().enumerate() {
275            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
276            if c == ch {
277                let token1 = self.subtoken(..i, self.loc);
278                let mut loc = self.loc;
279                loc.column += cols + 1;
280                let token2 = self.subtoken(i + 1.., loc);
281                return Some((token1, token2));
282            }
283        }
284        None
285    }
286
287    /// Split the token into two subtokens, with the split at the first instance of `ch`, such that `ch` is part of the first returned token.
288    /// Updates the locs for the created subtokens.
289    /// This is not meant for multiline tokens.
290    /// Returns `None` if `ch` was not found in the token.
291    /// # Panics
292    /// May panic if the token's column location exceeds 4,294,967,296.
293    #[must_use]
294    pub fn split_after(&self, ch: char) -> Option<(Token, Token)> {
295        for (cols, (i, c)) in self.s.char_indices().enumerate() {
296            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
297            #[allow(clippy::cast_possible_truncation)] // chlen can't be more than 6
298            if c == ch {
299                let chlen = ch.len_utf8();
300                let token1 = self.subtoken(..i + chlen, self.loc);
301                let mut loc = self.loc;
302                loc.column += cols + chlen as u32;
303                let token2 = self.subtoken(i + chlen.., loc);
304                return Some((token1, token2));
305            }
306        }
307        None
308    }
309
310    /// Create a new token that is a concatenation of this token and `other`, with `c` between them.
311    pub fn combine(&mut self, other: &Token, c: char) {
312        let mut s = self.s.to_string();
313        s.push(c);
314        s.push_str(other.s);
315        self.s = bump(&s);
316    }
317
318    #[must_use]
319    /// Return a subtoken of this token, such that all whitespace is removed from the start and end.
320    /// Will update the loc of the subtoken.
321    /// This is not meant for multiline tokens.
322    /// # Panics
323    /// May panic if the token's column location exceeds 4,294,967,296.
324    pub fn trim(&self) -> Token {
325        let mut real_start = None;
326        let mut real_end = self.s.len();
327        for (cols, (i, c)) in self.s.char_indices().enumerate() {
328            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
329            if c != ' ' {
330                real_start = Some((cols, i));
331                break;
332            }
333        }
334        // looping over the indices is safe here because we're only skipping spaces
335        while real_end > 0 && &self.s[real_end - 1..real_end] == " " {
336            real_end -= 1;
337        }
338        if let Some((cols, i)) = real_start {
339            let mut loc = self.loc;
340            loc.column += cols;
341            self.subtoken(i..real_end, loc)
342        } else {
343            // all spaces
344            Token::from_static_str("", self.loc)
345        }
346    }
347
348    pub fn expect_number(&self) -> Option<f64> {
349        self.check_number();
350        // Trim "f" from the end of numbers
351        let s = self.s.trim_end_matches('f');
352        if let Ok(v) = s.parse::<f64>() {
353            Some(v)
354        } else {
355            err(ErrorKey::Validation).msg("expected number").loc(self).push();
356            None
357        }
358    }
359
360    /// Gets the field as a fixed-width decimal, specifically the value multiplied by 100,000
361    pub fn get_fixed_number(&self) -> Option<i64> {
362        if !self.s.contains('.') {
363            return Some(self.s.parse::<i64>().ok()? * 100_000);
364        }
365
366        let r = self.s.find('.')?;
367        let whole = &self.s[..r];
368        let fraction = &self.s[r + 1..];
369
370        if fraction.len() > 5 {
371            return None;
372        }
373        format!("{whole}{fraction:0<5}").parse::<i64>().ok()
374    }
375
376    pub fn get_number(&self) -> Option<f64> {
377        self.s.parse::<f64>().ok()
378    }
379
380    pub fn is_number(&self) -> bool {
381        self.s.parse::<f64>().is_ok()
382    }
383
384    pub fn check_number(&self) {
385        if let Some(idx) = self.s.find('.') {
386            if self.s.len() - idx > 6 {
387                let msg = "only 5 decimals are supported";
388                let info =
389                    "if you give more decimals, you get an error and the number is read as 0";
390                err(ErrorKey::Validation).msg(msg).info(info).loc(self).push();
391            }
392        }
393    }
394
395    /// Some files seem not to have the 5-decimal limitation
396    pub fn expect_precise_number(&self) -> Option<f64> {
397        // Trim "f" from the end of precise numbers
398        let s = self.s.trim_end_matches('f');
399        if let Ok(v) = s.parse::<f64>() {
400            Some(v)
401        } else {
402            err(ErrorKey::Validation).msg("expected number").loc(self).push();
403            None
404        }
405    }
406
407    pub fn expect_integer(&self) -> Option<i64> {
408        if let Ok(v) = self.s.parse::<i64>() {
409            Some(v)
410        } else {
411            err(ErrorKey::Validation).msg("expected integer").loc(self).push();
412            None
413        }
414    }
415
416    pub fn get_integer(&self) -> Option<i64> {
417        self.s.parse::<i64>().ok()
418    }
419
420    pub fn is_integer(&self) -> bool {
421        self.s.parse::<i64>().is_ok()
422    }
423
424    pub fn expect_date(&self) -> Option<Date> {
425        if let Ok(v) = self.s.parse::<Date>() {
426            if self.s.ends_with('.') {
427                untidy(ErrorKey::Validation).msg("trailing dot on date").loc(self).push();
428            }
429            Some(v)
430        } else {
431            err(ErrorKey::Validation).msg("expected date").loc(self).push();
432            None
433        }
434    }
435
436    pub fn get_date(&self) -> Option<Date> {
437        self.s.parse::<Date>().ok()
438    }
439
440    pub fn is_date(&self) -> bool {
441        self.s.parse::<Date>().is_ok()
442    }
443
444    /// Tests if the taken is lowercase
445    pub fn is_lowercase(&self) -> bool {
446        !self.s.chars().any(char::is_uppercase)
447    }
448
449    #[must_use]
450    pub fn linked(mut self, link_idx: Option<MacroMapIndex>) -> Self {
451        self.loc.link_idx = link_idx;
452        self
453    }
454}
455
456impl From<&Token> for Token {
457    fn from(token: &Token) -> Token {
458        token.clone()
459    }
460}
461
462/// Tokens are compared for equality regardless of their loc.
463impl PartialEq for Token {
464    fn eq(&self, other: &Self) -> bool {
465        self.s == other.s
466    }
467}
468
469impl Eq for Token {}
470
471impl Hash for Token {
472    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
473        self.s.hash(state);
474    }
475}
476
477impl Borrow<str> for Token {
478    fn borrow(&self) -> &str {
479        self.s
480    }
481}
482
483impl Borrow<str> for &Token {
484    fn borrow(&self) -> &str {
485        self.s
486    }
487}
488
489impl From<Loc> for Token {
490    fn from(loc: Loc) -> Self {
491        Token { s: "", loc }
492    }
493}
494
495impl Display for Token {
496    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
497        write!(f, "{}", self.s)
498    }
499}