titokens/
lib.rs

1use crate::tokenizer::TokenBoundaries;
2use std::fmt::{Debug, Formatter};
3pub use tokenizer::Tokenizer;
4pub use version::{Model, Version};
5
6pub mod tokenizer;
7mod version;
8
9mod xmlparse;
10
11#[cfg(feature = "deku-8xp")]
12pub mod ti_connect_file;
13
14#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
15pub enum Token {
16    OneByte(u8),
17    /// upper byte, lower byte
18    TwoByte(u8, u8),
19}
20
21impl Token {
22    #[must_use]
23    pub fn is_eol(&self) -> bool {
24        matches!(
25            self,
26            // 0x00/EOF is for completeness, but we shouldn't encounter it.
27            Token::OneByte(0x00 | 0x3E | 0x3F)
28        )
29    }
30
31    /// Includes θ
32    #[must_use]
33    pub fn is_alpha(&self) -> bool {
34        matches!(self, Token::OneByte(0x41..=0x5B))
35    }
36
37    #[must_use]
38    pub fn is_numeric(&self) -> bool {
39        matches!(self, Token::OneByte(0x30..=0x39))
40    }
41
42    #[must_use]
43    pub fn is_alphanumeric(&self) -> bool {
44        self.is_alpha() || self.is_numeric()
45    }
46
47    /// Returns the least-significant byte in the token. For one-byte tokens this
48    /// is the whole token, but for two-byte tokens this is the second byte.
49    #[must_use]
50    pub fn byte(&self) -> u8 {
51        match *self {
52            Token::TwoByte(_, x) | Token::OneByte(x) => x,
53        }
54    }
55
56    #[must_use]
57    pub fn string_escaped(&self) -> String {
58        match self {
59            Token::OneByte(a) => format!("\\x{{{:0>2x}}}", a),
60            Token::TwoByte(a, b) => format!("\\x{{{:0>2x}{:0>2x}}}", a, b),
61        }
62    }
63}
64
65impl From<Token> for u16 {
66    fn from(value: Token) -> Self {
67        match value {
68            Token::OneByte(a) => a as u16,
69            Token::TwoByte(a, b) => ((a as u16) << 8) | (b as u16),
70        }
71    }
72}
73
74impl Debug for Token {
75    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
76        match self {
77            Token::OneByte(a) => f.write_str(&format!("0x{:0>2x}", a)),
78            Token::TwoByte(a, b) => f.write_str(&format!("0x{:0>2x}{:0>2x}", a, b)),
79        }
80    }
81}
82
83#[derive(Clone)]
84pub struct Tokens {
85    tokens: Vec<Token>,
86    pos: usize,
87    version: Option<Version>,
88}
89
90impl Iterator for Tokens {
91    type Item = Token;
92
93    fn next(&mut self) -> Option<Self::Item> {
94        let tok = self.tokens.get(self.pos);
95        self.pos += 1;
96        tok.copied()
97    }
98}
99
100#[cfg(feature = "itertools")]
101impl itertools::PeekingNext for Tokens {
102    fn peeking_next<F>(&mut self, accept: F) -> Option<Self::Item>
103    where
104        Self: Sized,
105        F: FnOnce(&Self::Item) -> bool,
106    {
107        accept(&self.peek()?).then(|| self.next().unwrap())
108    }
109}
110
111impl Tokens {
112    #[must_use]
113    pub fn from_bytes(bytes: &[u8], version: Option<Version>) -> Self {
114        let mut iter = bytes.iter();
115        let mut tokens = vec![];
116
117        while let Some(&first) = iter.next() {
118            let token = match first {
119                0x5C..=0x5E | 0x60..=0x63 | 0x7E | 0xAA | 0xBB | 0xEF => {
120                    Token::TwoByte(first, *iter.next().unwrap())
121                }
122
123                _ => Token::OneByte(first),
124            };
125
126            tokens.push(token);
127        }
128
129        Tokens::from_vec(tokens, version)
130    }
131
132    #[must_use]
133    pub fn from_vec(tokens: Vec<Token>, version: Option<Version>) -> Self {
134        Tokens {
135            tokens,
136            pos: 0,
137            version,
138        }
139    }
140
141    #[must_use]
142    pub fn peek(&self) -> Option<Token> {
143        self.tokens.get(self.pos).copied()
144    }
145
146    pub fn backtrack_once(&mut self) {
147        self.pos -= 1;
148    }
149
150    #[must_use]
151    pub fn current_position(&self) -> usize {
152        self.pos
153    }
154
155    pub fn to_string(&self, tokenizer: &Tokenizer) -> String {
156        tokenizer.stringify(&self.tokens).to_string()
157    }
158
159    pub fn stringify_with_boundaries(&self, tokenizer: &Tokenizer) -> TokenBoundaries {
160        tokenizer.stringify(&self.tokens)
161    }
162
163    pub fn version(&self) -> &Version {
164        self.version.as_ref().unwrap()
165    }
166}
167
168impl From<Tokens> for Vec<u8> {
169    fn from(value: Tokens) -> Self {
170        let mut result = vec![];
171        for tok in value {
172            match tok {
173                Token::OneByte(a) => result.push(a),
174                Token::TwoByte(a, b) => {
175                    result.push(a);
176                    result.push(b);
177                }
178            }
179        }
180
181        result
182    }
183}