1use crate::char_stream::TextInterval;
2use std::fmt;
3use std::rc::Rc;
4
5pub const TOKEN_EOF: i32 = -1;
6pub const INVALID_TOKEN_TYPE: i32 = 0;
7pub const DEFAULT_CHANNEL: i32 = 0;
8pub const HIDDEN_CHANNEL: i32 = 1;
9
10#[derive(Clone, Copy, Debug, Eq, PartialEq)]
11pub enum TokenChannel {
12 Default,
13 Hidden,
14 Custom(i32),
15}
16
17impl TokenChannel {
18 pub const fn value(self) -> i32 {
19 match self {
20 Self::Default => DEFAULT_CHANNEL,
21 Self::Hidden => HIDDEN_CHANNEL,
22 Self::Custom(channel) => channel,
23 }
24 }
25}
26
27impl From<i32> for TokenChannel {
28 fn from(value: i32) -> Self {
29 match value {
30 DEFAULT_CHANNEL => Self::Default,
31 HIDDEN_CHANNEL => Self::Hidden,
32 other => Self::Custom(other),
33 }
34 }
35}
36
37pub trait Token: fmt::Debug {
38 fn token_type(&self) -> i32;
39 fn channel(&self) -> i32;
40 fn start(&self) -> usize;
41 fn stop(&self) -> usize;
42 fn token_index(&self) -> isize;
43 fn line(&self) -> usize;
44 fn column(&self) -> usize;
45 fn text(&self) -> Option<&str>;
46 fn source_name(&self) -> &str;
47
48 fn interval(&self) -> TextInterval {
49 TextInterval::new(self.start(), self.stop())
50 }
51}
52
53#[derive(Clone, Debug, Eq, PartialEq)]
54pub struct CommonToken {
55 token_type: i32,
56 channel: i32,
57 start: usize,
58 stop: usize,
59 token_index: isize,
60 line: usize,
61 column: usize,
62 text: Option<String>,
63 source_name: String,
64}
65
66#[derive(Debug)]
67pub struct TokenSpec<'a> {
68 pub token_type: i32,
69 pub channel: i32,
70 pub start: usize,
71 pub stop: usize,
72 pub line: usize,
73 pub column: usize,
74 pub text: Option<String>,
75 pub source_name: &'a str,
76}
77
78impl CommonToken {
79 pub const fn new(token_type: i32) -> Self {
80 Self {
81 token_type,
82 channel: DEFAULT_CHANNEL,
83 start: 0,
84 stop: 0,
85 token_index: -1,
86 line: 1,
87 column: 0,
88 text: None,
89 source_name: String::new(),
90 }
91 }
92
93 pub fn eof(source_name: impl Into<String>, index: usize, line: usize, column: usize) -> Self {
94 Self {
95 token_type: TOKEN_EOF,
96 channel: DEFAULT_CHANNEL,
97 start: index,
98 stop: index.checked_sub(1).unwrap_or(usize::MAX),
99 token_index: -1,
100 line,
101 column,
102 text: Some("<EOF>".to_owned()),
103 source_name: source_name.into(),
104 }
105 }
106
107 #[must_use]
108 pub fn with_text(mut self, text: impl Into<String>) -> Self {
109 self.text = Some(text.into());
110 self
111 }
112
113 #[must_use]
114 pub const fn with_span(mut self, start: usize, stop: usize) -> Self {
115 self.start = start;
116 self.stop = stop;
117 self
118 }
119
120 #[must_use]
121 pub const fn with_position(mut self, line: usize, column: usize) -> Self {
122 self.line = line;
123 self.column = column;
124 self
125 }
126
127 #[must_use]
128 pub const fn with_channel(mut self, channel: i32) -> Self {
129 self.channel = channel;
130 self
131 }
132
133 #[must_use]
134 pub fn with_source_name(mut self, source_name: impl Into<String>) -> Self {
135 self.source_name = source_name.into();
136 self
137 }
138
139 pub const fn set_token_index(&mut self, token_index: isize) {
140 self.token_index = token_index;
141 }
142}
143
144impl Token for CommonToken {
145 fn token_type(&self) -> i32 {
146 self.token_type
147 }
148
149 fn channel(&self) -> i32 {
150 self.channel
151 }
152
153 fn start(&self) -> usize {
154 self.start
155 }
156
157 fn stop(&self) -> usize {
158 self.stop
159 }
160
161 fn token_index(&self) -> isize {
162 self.token_index
163 }
164
165 fn line(&self) -> usize {
166 self.line
167 }
168
169 fn column(&self) -> usize {
170 self.column
171 }
172
173 fn text(&self) -> Option<&str> {
174 self.text.as_deref()
175 }
176
177 fn source_name(&self) -> &str {
178 &self.source_name
179 }
180}
181
182impl fmt::Display for CommonToken {
183 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184 let text = self.text().unwrap_or("");
185 let channel = if self.channel() == DEFAULT_CHANNEL {
186 String::new()
187 } else {
188 format!(",channel={}", self.channel())
189 };
190 write!(
191 f,
192 "[@{},{}:{}='{}',<{}>{},{}:{}]",
193 self.token_index(),
194 display_token_boundary(self.start()),
195 display_token_boundary(self.stop()),
196 display_text(text),
197 self.token_type(),
198 channel,
199 self.line(),
200 self.column()
201 )
202 }
203}
204
205fn display_token_boundary(value: usize) -> String {
207 if value == usize::MAX {
208 "-1".to_owned()
209 } else {
210 value.to_string()
211 }
212}
213
214fn display_text(text: &str) -> String {
220 let mut out = String::new();
221 for ch in text.chars() {
222 match ch {
223 '\n' => out.push_str("\\n"),
224 '\r' => out.push_str("\\r"),
225 '\t' => out.push_str("\\t"),
226 other => out.push(other),
227 }
228 }
229 out
230}
231
232pub type TokenRef = Rc<CommonToken>;
233
234pub trait TokenFactory {
235 fn create(&self, spec: TokenSpec<'_>) -> CommonToken;
236}
237
238#[derive(Clone, Debug, Default)]
239pub struct CommonTokenFactory;
240
241impl TokenFactory for CommonTokenFactory {
242 fn create(&self, spec: TokenSpec<'_>) -> CommonToken {
243 let mut token = CommonToken::new(spec.token_type)
244 .with_channel(spec.channel)
245 .with_span(spec.start, spec.stop)
246 .with_position(spec.line, spec.column)
247 .with_source_name(spec.source_name);
248 if let Some(text) = spec.text {
249 token = token.with_text(text);
250 }
251 token
252 }
253}
254
255#[derive(Clone, Debug, Eq, PartialEq)]
257pub struct TokenSourceError {
258 pub line: usize,
260 pub column: usize,
262 pub message: String,
264}
265
266impl TokenSourceError {
267 pub fn new(line: usize, column: usize, message: impl Into<String>) -> Self {
269 Self {
270 line,
271 column,
272 message: message.into(),
273 }
274 }
275}
276
277pub trait TokenSource {
278 fn next_token(&mut self) -> CommonToken;
279 fn line(&self) -> usize;
280 fn column(&self) -> usize;
281 fn source_name(&self) -> &str;
282 fn drain_errors(&mut self) -> Vec<TokenSourceError> {
284 Vec::new()
285 }
286
287 fn lexer_dfa_string(&self) -> String {
289 String::new()
290 }
291}
292
293#[cfg(test)]
294mod tests {
295 use super::*;
296
297 #[test]
298 fn common_token_display_matches_antlr_shape() {
299 let mut token = CommonToken::new(7)
300 .with_text("abc")
301 .with_span(2, 4)
302 .with_position(3, 9);
303 token.set_token_index(5);
304 assert_eq!(token.to_string(), "[@5,2:4='abc',<7>,3:9]");
305 }
306
307 #[test]
308 fn common_token_display_matches_antlr_escaping() {
309 let quote = CommonToken::new(1).with_text("\"");
310 assert_eq!(quote.to_string(), "[@-1,0:0='\"',<1>,1:0]");
311
312 let newline = CommonToken::new(1).with_text("\n");
313 assert_eq!(newline.to_string(), "[@-1,0:0='\\n',<1>,1:0]");
314
315 let backslash = CommonToken::new(1).with_text("\\");
316 assert_eq!(backslash.to_string(), "[@-1,0:0='\\',<1>,1:0]");
317 }
318
319 #[test]
320 fn common_token_display_includes_non_default_channel() {
321 let token = CommonToken::new(2).with_text("b").with_channel(2);
322 assert_eq!(token.to_string(), "[@-1,0:0='b',<2>,channel=2,1:0]");
323 }
324
325 #[test]
326 fn eof_display_uses_antlr_empty_input_stop_index() {
327 let token = CommonToken::eof("", 0, 1, 0);
328 assert_eq!(token.to_string(), "[@-1,0:-1='<EOF>',<-1>,1:0]");
329 }
330}