tiger_lib/
token.rs

1//! Contains the core [`Token`] and [`Loc`] types, which represent pieces of game script and where
2//! in the game files they came from.
3
4use std::borrow::{Borrow, Cow};
5use std::ffi::OsStr;
6use std::fmt::{Debug, Display, Error, Formatter};
7use std::hash::Hash;
8use std::mem::ManuallyDrop;
9use std::ops::{Bound, Range, RangeBounds};
10use std::path::{Path, PathBuf};
11use std::slice::SliceIndex;
12
13use bumpalo::Bump;
14
15use crate::date::Date;
16use crate::fileset::{FileEntry, FileKind};
17use crate::macros::MacroMapIndex;
18use crate::pathtable::{PathTable, PathTableIndex};
19use crate::report::{err, untidy, ErrorKey};
20
21#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
22pub struct Loc {
23    pub(crate) idx: PathTableIndex,
24    pub kind: FileKind,
25    /// line 0 means the loc applies to the file as a whole.
26    pub line: u32,
27    pub column: u32,
28    /// Used in macro expansions to point to the macro invocation
29    /// in the macro table
30    pub link_idx: Option<MacroMapIndex>,
31}
32
33impl Loc {
34    #[must_use]
35    pub(crate) fn for_file(pathname: PathBuf, kind: FileKind, fullpath: PathBuf) -> Self {
36        let idx = PathTable::store(pathname, fullpath);
37        Loc { idx, kind, line: 0, column: 0, link_idx: None }
38    }
39
40    pub fn filename(self) -> Cow<'static, str> {
41        PathTable::lookup_path(self.idx)
42            .file_name()
43            .unwrap_or_else(|| OsStr::new(""))
44            .to_string_lossy()
45    }
46
47    pub fn pathname(self) -> &'static Path {
48        PathTable::lookup_path(self.idx)
49    }
50
51    pub fn fullpath(self) -> &'static Path {
52        PathTable::lookup_fullpath(self.idx)
53    }
54
55    #[inline]
56    pub fn same_file(self, other: Loc) -> bool {
57        self.idx == other.idx
58    }
59}
60
61impl From<&FileEntry> for Loc {
62    fn from(entry: &FileEntry) -> Self {
63        if let Some(idx) = entry.path_idx() {
64            Loc { idx, kind: entry.kind(), line: 0, column: 0, link_idx: None }
65        } else {
66            Self::for_file(entry.path().to_path_buf(), entry.kind(), entry.fullpath().to_path_buf())
67        }
68    }
69}
70
71impl From<&mut FileEntry> for Loc {
72    fn from(entry: &mut FileEntry) -> Self {
73        (&*entry).into()
74    }
75}
76
77impl From<FileEntry> for Loc {
78    fn from(entry: FileEntry) -> Self {
79        (&entry).into()
80    }
81}
82
83impl Debug for Loc {
84    /// Roll our own `Debug` implementation to handle the path field
85    fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
86        f.debug_struct("Loc")
87            .field("pathindex", &self.idx)
88            .field("pathname", &self.pathname())
89            .field("fullpath", &self.fullpath())
90            .field("kind", &self.kind)
91            .field("line", &self.line)
92            .field("column", &self.column)
93            .field("linkindex", &self.link_idx)
94            .finish()
95    }
96}
97
98/// Leak the string, including any excess capacity.
99///
100/// It should only be used for large strings, rather than for small, individuals strings,
101/// due to the memory overhead. Use [`bump`] instead, which uses a bump allocator to store
102/// the strings.
103pub(crate) fn leak(s: String) -> &'static str {
104    let s = ManuallyDrop::new(s);
105    unsafe {
106        let s_ptr: *const str = s.as_ref();
107        &*s_ptr
108    }
109}
110
111thread_local!(static STR_BUMP: ManuallyDrop<Bump> = ManuallyDrop::new(Bump::new()));
112
113/// Allocate the string on heap with a bump allocator.
114///
115/// SAFETY: This is safe as long as no `Bump::reset` is called to deallocate memory
116/// and `STR_BUMP` is not dropped when thread exits.
117pub(crate) fn bump(s: &str) -> &'static str {
118    STR_BUMP.with(|bump| {
119        let s = bump.alloc_str(s);
120        unsafe {
121            let s_ptr: *const str = s;
122            &*s_ptr
123        }
124    })
125}
126
127/// A Token consists of a string and its location in the parsed files.
128#[allow(missing_copy_implementations)]
129#[derive(Clone, Debug)]
130pub struct Token {
131    s: &'static str,
132    pub loc: Loc,
133}
134
135impl Token {
136    #[must_use]
137    pub fn new(s: &str, loc: Loc) -> Self {
138        Token { s: bump(s), loc }
139    }
140
141    #[must_use]
142    pub fn from_static_str(s: &'static str, loc: Loc) -> Self {
143        Token { s, loc }
144    }
145
146    /// Create a `Token` from a substring of the given `Token`.
147    #[must_use]
148    pub fn subtoken<R>(&self, range: R, loc: Loc) -> Token
149    where
150        R: RangeBounds<usize> + SliceIndex<str, Output = str>,
151    {
152        Token { s: &self.s[range], loc }
153    }
154
155    /// Create a `Token` from a subtring of the given `Token`,
156    /// stripping any whitespace from the created token.
157    #[must_use]
158    pub fn subtoken_stripped(&self, mut range: Range<usize>, mut loc: Loc) -> Token {
159        let mut start = match range.start_bound() {
160            Bound::Included(&i) => i,
161            Bound::Excluded(&i) => i + 1,
162            Bound::Unbounded => 0,
163        };
164        let mut end = match range.end_bound() {
165            Bound::Included(&i) => i + 1,
166            Bound::Excluded(&i) => i,
167            Bound::Unbounded => self.s.len(),
168        };
169        for (i, c) in self.s[range.clone()].char_indices() {
170            if !c.is_whitespace() {
171                start += i;
172                range = start..end;
173                break;
174            }
175            loc.column += 1;
176        }
177        for (i, c) in self.s[range.clone()].char_indices().rev() {
178            if !c.is_whitespace() {
179                end = start + i + c.len_utf8();
180                range = start..end;
181                break;
182            }
183        }
184        Token { s: &self.s[range], loc }
185    }
186
187    pub fn as_str(&self) -> &'static str {
188        self.s
189    }
190
191    pub fn is(&self, s: &str) -> bool {
192        self.s == s
193    }
194
195    pub fn lowercase_is(&self, s: &str) -> bool {
196        self.s.to_ascii_lowercase() == s
197    }
198
199    pub fn starts_with(&self, s: &str) -> bool {
200        self.s.starts_with(s)
201    }
202
203    #[must_use]
204    /// Split the token into one or more subtokens, with `ch` as the delimiter.
205    /// Updates the locs for the created subtokens.
206    /// This is not meant for multiline tokens.
207    /// # Panics
208    /// May panic if the token's column location exceeds 4,294,967,296.
209    pub fn split(&self, ch: char) -> Vec<Token> {
210        let mut pos = 0;
211        let mut vec = Vec::new();
212        let mut loc = self.loc;
213        let mut lines: u32 = 0;
214        for (cols, (i, c)) in self.s.char_indices().enumerate() {
215            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
216            if c == ch {
217                vec.push(self.subtoken(pos..i, loc));
218                pos = i + 1;
219                loc.column = self.loc.column + cols + 1;
220                loc.line = self.loc.line + lines;
221            }
222            if c == '\n' {
223                lines += 1;
224            }
225        }
226        vec.push(self.subtoken(pos.., loc));
227        vec
228    }
229
230    #[must_use]
231    pub fn strip_suffix(&self, sfx: &str) -> Option<Token> {
232        self.s.strip_suffix(sfx).map(|pfx| Token::from_static_str(pfx, self.loc))
233    }
234
235    #[must_use]
236    pub fn strip_prefix(&self, pfx: &str) -> Option<Token> {
237        #[allow(clippy::cast_possible_truncation)]
238        self.s.strip_prefix(pfx).map(|sfx| {
239            let mut loc = self.loc;
240            loc.column += pfx.chars().count() as u32;
241            Token::from_static_str(sfx, loc)
242        })
243    }
244
245    #[must_use]
246    /// Split the token into two subtokens, with the split at the first occurrence of `ch`.
247    /// Updates the locs for the created subtokens.
248    /// This is not meant for multiline tokens.
249    /// Returns `None` if `ch` was not found in the token.
250    /// # Panics
251    /// May panic if the token's column location exceeds 4,294,967,296.
252    pub fn split_once(&self, ch: char) -> Option<(Token, Token)> {
253        for (cols, (i, c)) in self.s.char_indices().enumerate() {
254            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
255            if c == ch {
256                let token1 = self.subtoken(..i, self.loc);
257                let mut loc = self.loc;
258                loc.column += cols + 1;
259                let token2 = self.subtoken(i + 1.., loc);
260                return Some((token1, token2));
261            }
262        }
263        None
264    }
265
266    /// Split the token into two subtokens, with the split at the first instance of `ch`, such that `ch` is part of the first returned token.
267    /// Updates the locs for the created subtokens.
268    /// This is not meant for multiline tokens.
269    /// Returns `None` if `ch` was not found in the token.
270    /// # Panics
271    /// May panic if the token's column location exceeds 4,294,967,296.
272    #[must_use]
273    pub fn split_after(&self, ch: char) -> Option<(Token, Token)> {
274        for (cols, (i, c)) in self.s.char_indices().enumerate() {
275            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
276            #[allow(clippy::cast_possible_truncation)] // chlen can't be more than 6
277            if c == ch {
278                let chlen = ch.len_utf8();
279                let token1 = self.subtoken(..i + chlen, self.loc);
280                let mut loc = self.loc;
281                loc.column += cols + chlen as u32;
282                let token2 = self.subtoken(i + chlen.., loc);
283                return Some((token1, token2));
284            }
285        }
286        None
287    }
288
289    /// Create a new token that is a concatenation of this token and `other`, with `c` between them.
290    pub fn combine(&mut self, other: &Token, c: char) {
291        let mut s = self.s.to_string();
292        s.push(c);
293        s.push_str(other.s);
294        self.s = bump(&s);
295    }
296
297    #[must_use]
298    /// Return a subtoken of this token, such that all whitespace is removed from the start and end.
299    /// Will update the loc of the subtoken.
300    /// This is not meant for multiline tokens.
301    /// # Panics
302    /// May panic if the token's column location exceeds 4,294,967,296.
303    pub fn trim(&self) -> Token {
304        let mut real_start = None;
305        let mut real_end = self.s.len();
306        for (cols, (i, c)) in self.s.char_indices().enumerate() {
307            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
308            if c != ' ' {
309                real_start = Some((cols, i));
310                break;
311            }
312        }
313        // looping over the indices is safe here because we're only skipping spaces
314        while real_end > 0 && &self.s[real_end - 1..real_end] == " " {
315            real_end -= 1;
316        }
317        if let Some((cols, i)) = real_start {
318            let mut loc = self.loc;
319            loc.column += cols;
320            self.subtoken(i..real_end, loc)
321        } else {
322            // all spaces
323            Token::from_static_str("", self.loc)
324        }
325    }
326
327    pub fn expect_number(&self) -> Option<f64> {
328        self.check_number();
329        // Trim "f" from the end of numbers
330        let s = self.s.trim_end_matches('f');
331        if let Ok(v) = s.parse::<f64>() {
332            Some(v)
333        } else {
334            err(ErrorKey::Validation).msg("expected number").loc(self).push();
335            None
336        }
337    }
338
339    /// Gets the field as a fixed-width decimal, specifically the value multiplied by 100,000
340    pub fn get_fixed_number(&self) -> Option<i64> {
341        if !self.s.contains('.') {
342            return Some(self.s.parse::<i64>().ok()? * 100_000);
343        }
344
345        let r = self.s.find('.')?;
346        let whole = &self.s[..r];
347        let fraction = &self.s[r + 1..];
348
349        if fraction.len() > 5 {
350            return None;
351        }
352        format!("{whole}{fraction:0<5}").parse::<i64>().ok()
353    }
354
355    pub fn get_number(&self) -> Option<f64> {
356        self.s.parse::<f64>().ok()
357    }
358
359    pub fn is_number(&self) -> bool {
360        self.s.parse::<f64>().is_ok()
361    }
362
363    pub fn check_number(&self) {
364        if let Some(idx) = self.s.find('.') {
365            if self.s.len() - idx > 6 {
366                let msg = "only 5 decimals are supported";
367                let info =
368                    "if you give more decimals, you get an error and the number is read as 0";
369                err(ErrorKey::Validation).msg(msg).info(info).loc(self).push();
370            }
371        }
372    }
373
374    /// Some files seem not to have the 5-decimal limitation
375    pub fn expect_precise_number(&self) -> Option<f64> {
376        if let Ok(v) = self.s.parse::<f64>() {
377            Some(v)
378        } else {
379            err(ErrorKey::Validation).msg("expected number").loc(self).push();
380            None
381        }
382    }
383
384    pub fn expect_integer(&self) -> Option<i64> {
385        if let Ok(v) = self.s.parse::<i64>() {
386            Some(v)
387        } else {
388            err(ErrorKey::Validation).msg("expected integer").loc(self).push();
389            None
390        }
391    }
392
393    pub fn get_integer(&self) -> Option<i64> {
394        self.s.parse::<i64>().ok()
395    }
396
397    pub fn is_integer(&self) -> bool {
398        self.s.parse::<i64>().is_ok()
399    }
400
401    pub fn expect_date(&self) -> Option<Date> {
402        if let Ok(v) = self.s.parse::<Date>() {
403            if self.s.ends_with('.') {
404                untidy(ErrorKey::Validation).msg("trailing dot on date").loc(self).push();
405            }
406            Some(v)
407        } else {
408            err(ErrorKey::Validation).msg("expected date").loc(self).push();
409            None
410        }
411    }
412
413    pub fn get_date(&self) -> Option<Date> {
414        self.s.parse::<Date>().ok()
415    }
416
417    pub fn is_date(&self) -> bool {
418        self.s.parse::<Date>().is_ok()
419    }
420
421    #[must_use]
422    pub fn linked(mut self, link_idx: Option<MacroMapIndex>) -> Self {
423        self.loc.link_idx = link_idx;
424        self
425    }
426}
427
428impl From<&Token> for Token {
429    fn from(token: &Token) -> Token {
430        token.clone()
431    }
432}
433
434/// Tokens are compared for equality regardless of their loc.
435impl PartialEq for Token {
436    fn eq(&self, other: &Self) -> bool {
437        self.s == other.s
438    }
439}
440
441impl Eq for Token {}
442
443impl Hash for Token {
444    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
445        self.s.hash(state);
446    }
447}
448
449impl Borrow<str> for Token {
450    fn borrow(&self) -> &str {
451        self.s
452    }
453}
454
455impl Borrow<str> for &Token {
456    fn borrow(&self) -> &str {
457        self.s
458    }
459}
460
461impl From<Loc> for Token {
462    fn from(loc: Loc) -> Self {
463        Token { s: "", loc }
464    }
465}
466
467impl Display for Token {
468    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
469        write!(f, "{}", self.s)
470    }
471}
tiger_lib/token.rs

tiger_lib/
token.rs