Skip to main content

antlr4_runtime/
token.rs

1use crate::char_stream::TextInterval;
2use std::fmt;
3use std::rc::Rc;
4
5pub const TOKEN_EOF: i32 = -1;
6pub const INVALID_TOKEN_TYPE: i32 = 0;
7pub const DEFAULT_CHANNEL: i32 = 0;
8pub const HIDDEN_CHANNEL: i32 = 1;
9
10#[derive(Clone, Copy, Debug, Eq, PartialEq)]
11pub enum TokenChannel {
12    Default,
13    Hidden,
14    Custom(i32),
15}
16
17impl TokenChannel {
18    pub const fn value(self) -> i32 {
19        match self {
20            Self::Default => DEFAULT_CHANNEL,
21            Self::Hidden => HIDDEN_CHANNEL,
22            Self::Custom(channel) => channel,
23        }
24    }
25}
26
27impl From<i32> for TokenChannel {
28    fn from(value: i32) -> Self {
29        match value {
30            DEFAULT_CHANNEL => Self::Default,
31            HIDDEN_CHANNEL => Self::Hidden,
32            other => Self::Custom(other),
33        }
34    }
35}
36
37pub trait Token: fmt::Debug {
38    fn token_type(&self) -> i32;
39    fn channel(&self) -> i32;
40    fn start(&self) -> usize;
41    fn stop(&self) -> usize;
42    fn token_index(&self) -> isize;
43    fn line(&self) -> usize;
44    fn column(&self) -> usize;
45    fn text(&self) -> Option<&str>;
46    fn source_name(&self) -> &str;
47
48    fn interval(&self) -> TextInterval {
49        TextInterval::new(self.start(), self.stop())
50    }
51}
52
53#[derive(Clone, Debug, Eq, PartialEq)]
54pub struct CommonToken {
55    token_type: i32,
56    channel: i32,
57    start: usize,
58    stop: usize,
59    token_index: isize,
60    line: usize,
61    column: usize,
62    text: Option<String>,
63    source_name: String,
64}
65
66#[derive(Debug)]
67pub struct TokenSpec<'a> {
68    pub token_type: i32,
69    pub channel: i32,
70    pub start: usize,
71    pub stop: usize,
72    pub line: usize,
73    pub column: usize,
74    pub text: Option<String>,
75    pub source_name: &'a str,
76}
77
78impl CommonToken {
79    pub const fn new(token_type: i32) -> Self {
80        Self {
81            token_type,
82            channel: DEFAULT_CHANNEL,
83            start: 0,
84            stop: 0,
85            token_index: -1,
86            line: 1,
87            column: 0,
88            text: None,
89            source_name: String::new(),
90        }
91    }
92
93    pub fn eof(source_name: impl Into<String>, index: usize, line: usize, column: usize) -> Self {
94        Self {
95            token_type: TOKEN_EOF,
96            channel: DEFAULT_CHANNEL,
97            start: index,
98            stop: index.checked_sub(1).unwrap_or(usize::MAX),
99            token_index: -1,
100            line,
101            column,
102            text: Some("<EOF>".to_owned()),
103            source_name: source_name.into(),
104        }
105    }
106
107    #[must_use]
108    pub fn with_text(mut self, text: impl Into<String>) -> Self {
109        self.text = Some(text.into());
110        self
111    }
112
113    #[must_use]
114    pub const fn with_span(mut self, start: usize, stop: usize) -> Self {
115        self.start = start;
116        self.stop = stop;
117        self
118    }
119
120    #[must_use]
121    pub const fn with_position(mut self, line: usize, column: usize) -> Self {
122        self.line = line;
123        self.column = column;
124        self
125    }
126
127    #[must_use]
128    pub const fn with_channel(mut self, channel: i32) -> Self {
129        self.channel = channel;
130        self
131    }
132
133    #[must_use]
134    pub fn with_source_name(mut self, source_name: impl Into<String>) -> Self {
135        self.source_name = source_name.into();
136        self
137    }
138
139    pub const fn set_token_index(&mut self, token_index: isize) {
140        self.token_index = token_index;
141    }
142}
143
144impl Token for CommonToken {
145    fn token_type(&self) -> i32 {
146        self.token_type
147    }
148
149    fn channel(&self) -> i32 {
150        self.channel
151    }
152
153    fn start(&self) -> usize {
154        self.start
155    }
156
157    fn stop(&self) -> usize {
158        self.stop
159    }
160
161    fn token_index(&self) -> isize {
162        self.token_index
163    }
164
165    fn line(&self) -> usize {
166        self.line
167    }
168
169    fn column(&self) -> usize {
170        self.column
171    }
172
173    fn text(&self) -> Option<&str> {
174        self.text.as_deref()
175    }
176
177    fn source_name(&self) -> &str {
178        &self.source_name
179    }
180}
181
182impl fmt::Display for CommonToken {
183    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184        let text = self.text().unwrap_or("");
185        let channel = if self.channel() == DEFAULT_CHANNEL {
186            String::new()
187        } else {
188            format!(",channel={}", self.channel())
189        };
190        write!(
191            f,
192            "[@{},{}:{}='{}',<{}>{},{}:{}]",
193            self.token_index(),
194            display_token_boundary(self.start()),
195            display_token_boundary(self.stop()),
196            display_text(text),
197            self.token_type(),
198            channel,
199            self.line(),
200            self.column()
201        )
202    }
203}
204
205/// Formats synthetic-token boundaries with ANTLR's `-1` sentinel.
206fn display_token_boundary(value: usize) -> String {
207    if value == usize::MAX {
208        "-1".to_owned()
209    } else {
210        value.to_string()
211    }
212}
213
214/// Escapes token text the way ANTLR's token display format expects.
215///
216/// Debug escaping is close but not identical: ANTLR leaves ordinary
217/// backslashes and quotes unescaped, and only normalizes control characters
218/// that would otherwise disrupt the one-line token representation.
219fn display_text(text: &str) -> String {
220    let mut out = String::new();
221    for ch in text.chars() {
222        match ch {
223            '\n' => out.push_str("\\n"),
224            '\r' => out.push_str("\\r"),
225            '\t' => out.push_str("\\t"),
226            other => out.push(other),
227        }
228    }
229    out
230}
231
232pub type TokenRef = Rc<CommonToken>;
233
234pub trait TokenFactory {
235    fn create(&self, spec: TokenSpec<'_>) -> CommonToken;
236}
237
238#[derive(Clone, Debug, Default)]
239pub struct CommonTokenFactory;
240
241impl TokenFactory for CommonTokenFactory {
242    fn create(&self, spec: TokenSpec<'_>) -> CommonToken {
243        let mut token = CommonToken::new(spec.token_type)
244            .with_channel(spec.channel)
245            .with_span(spec.start, spec.stop)
246            .with_position(spec.line, spec.column)
247            .with_source_name(spec.source_name);
248        if let Some(text) = spec.text {
249            token = token.with_text(text);
250        }
251        token
252    }
253}
254
255/// A diagnostic buffered by a token source while it was producing tokens.
256#[derive(Clone, Debug, Eq, PartialEq)]
257pub struct TokenSourceError {
258    /// One-based input line where the diagnostic starts.
259    pub line: usize,
260    /// Zero-based column within `line` where the diagnostic starts.
261    pub column: usize,
262    /// ANTLR-compatible diagnostic message without the leading line/column.
263    pub message: String,
264}
265
266impl TokenSourceError {
267    /// Creates a token-source diagnostic at the given input position.
268    pub fn new(line: usize, column: usize, message: impl Into<String>) -> Self {
269        Self {
270            line,
271            column,
272            message: message.into(),
273        }
274    }
275}
276
277pub trait TokenSource {
278    fn next_token(&mut self) -> CommonToken;
279    fn line(&self) -> usize;
280    fn column(&self) -> usize;
281    fn source_name(&self) -> &str;
282    /// Returns and clears diagnostics emitted while fetching tokens.
283    fn drain_errors(&mut self) -> Vec<TokenSourceError> {
284        Vec::new()
285    }
286
287    /// Serializes lexer DFA cache state when the token source exposes one.
288    fn lexer_dfa_string(&self) -> String {
289        String::new()
290    }
291}
292
293#[cfg(test)]
294mod tests {
295    use super::*;
296
297    #[test]
298    fn common_token_display_matches_antlr_shape() {
299        let mut token = CommonToken::new(7)
300            .with_text("abc")
301            .with_span(2, 4)
302            .with_position(3, 9);
303        token.set_token_index(5);
304        assert_eq!(token.to_string(), "[@5,2:4='abc',<7>,3:9]");
305    }
306
307    #[test]
308    fn common_token_display_matches_antlr_escaping() {
309        let quote = CommonToken::new(1).with_text("\"");
310        assert_eq!(quote.to_string(), "[@-1,0:0='\"',<1>,1:0]");
311
312        let newline = CommonToken::new(1).with_text("\n");
313        assert_eq!(newline.to_string(), "[@-1,0:0='\\n',<1>,1:0]");
314
315        let backslash = CommonToken::new(1).with_text("\\");
316        assert_eq!(backslash.to_string(), "[@-1,0:0='\\',<1>,1:0]");
317    }
318
319    #[test]
320    fn common_token_display_includes_non_default_channel() {
321        let token = CommonToken::new(2).with_text("b").with_channel(2);
322        assert_eq!(token.to_string(), "[@-1,0:0='b',<2>,channel=2,1:0]");
323    }
324
325    #[test]
326    fn eof_display_uses_antlr_empty_input_stop_index() {
327        let token = CommonToken::eof("", 0, 1, 0);
328        assert_eq!(token.to_string(), "[@-1,0:-1='<EOF>',<-1>,1:0]");
329    }
330}