antlr_rust_sleagon/
token.rs

1//! Symbols that parser works on
2use std::borrow::{Borrow, Cow};
3
4use std::fmt::Formatter;
5use std::fmt::{Debug, Display};
6
7use std::sync::atomic::{AtomicIsize, Ordering};
8
9use crate::char_stream::InputData;
10use crate::int_stream::EOF;
11use crate::token_factory::{INVALID_COMMON, INVALID_OWNING};
12
13use better_any::type_id;
14
15/// Type of tokens that parser considers invalid
16pub const TOKEN_INVALID_TYPE: isize = 0;
17/// Type of tokens that DFA can use to advance to next state without consuming actual input token.
18/// Should not be created by downstream implementations.
19pub const TOKEN_EPSILON: isize = -2;
20/// Min token type that can be assigned to tokens created by downstream implementations.
21pub const TOKEN_MIN_USER_TOKEN_TYPE: isize = 1;
22/// Type of EOF token
23pub const TOKEN_EOF: isize = EOF;
24/// Default channel lexer emits tokens to
25pub const TOKEN_DEFAULT_CHANNEL: isize = 0;
26/// Predefined additional channel for lexer to assign tokens to
27pub const TOKEN_HIDDEN_CHANNEL: isize = 1;
28/// Shorthand for TOKEN_HIDDEN_CHANNEL
29pub const HIDDEN: isize = TOKEN_HIDDEN_CHANNEL;
30
31/// Implemented by tokens that are produced by a `TokenFactory`
32#[allow(missing_docs)]
33pub trait Token: Debug + Display {
34    /// Type of the underlying data this token refers to
35    type Data: ?Sized + InputData;
36    // fn get_source(&self) -> Option<(Box<dyn TokenSource>, Box<dyn CharStream>)>;
37    fn get_token_type(&self) -> isize;
38    fn get_channel(&self) -> isize {
39        TOKEN_DEFAULT_CHANNEL
40    }
41    fn get_start(&self) -> isize {
42        0
43    }
44    fn get_stop(&self) -> isize {
45        0
46    }
47    fn get_line(&self) -> isize {
48        0
49    }
50    fn get_column(&self) -> isize {
51        0
52    }
53
54    fn get_text(&self) -> &Self::Data;
55    fn set_text(&mut self, _text: <Self::Data as ToOwned>::Owned) {}
56
57    fn get_token_index(&self) -> isize {
58        0
59    }
60    fn set_token_index(&self, _v: isize) {}
61
62    // fn get_token_source(&self) -> &dyn TokenSource;
63    // fn get_input_stream(&self) -> &dyn CharStream;
64
65    /// returns fully owned representation of this token
66    fn to_owned(&self) -> OwningToken {
67        OwningToken {
68            token_type: self.get_token_type(),
69            channel: self.get_channel(),
70            start: self.get_start(),
71            stop: self.get_stop(),
72            token_index: AtomicIsize::from(self.get_token_index()),
73            line: self.get_line(),
74            column: self.get_column(),
75            text: self.get_text().to_display(),
76            read_only: true,
77        }
78    }
79}
80
81/// Token that owns its data
82pub type OwningToken = GenericToken<String>;
83/// Most versatile Token that uses Cow to save data
84/// Can be used seamlessly switch from owned to zero-copy parsing
85pub type CommonToken<'a> = GenericToken<Cow<'a, str>>;
86
87type_id!(OwningToken);
88type_id!(CommonToken<'a>);
89
90#[derive(Debug)]
91#[allow(missing_docs)]
92pub struct GenericToken<T> {
93    //    source: Option<(Box<TokenSource>,Box<CharStream>)>,
94    pub token_type: isize,
95    pub channel: isize,
96    pub start: isize,
97    pub stop: isize,
98    pub token_index: AtomicIsize,
99    pub line: isize,
100    pub column: isize,
101    pub text: T,
102    pub read_only: bool,
103}
104
105impl<T: Clone> Clone for GenericToken<T>
106where
107    Self: Token,
108{
109    fn clone(&self) -> Self {
110        Self {
111            token_type: self.token_type,
112            channel: self.channel,
113            start: self.start,
114            stop: self.stop,
115            token_index: AtomicIsize::new(self.get_token_index()),
116            line: self.line,
117            column: self.column,
118            text: self.text.clone(),
119            read_only: false,
120        }
121    }
122}
123
124impl<T: Borrow<str> + Debug> Display for GenericToken<T> {
125    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
126        let txt = if self.token_type == TOKEN_EOF {
127            "<EOF>"
128        } else {
129            self.text.borrow()
130        };
131        let txt = txt.replace("\n", "\\n");
132        let txt = txt.replace("\r", "\\r");
133        let txt = txt.replace("\t", "\\t");
134        //        let txt = escape_whitespaces(txt,false);
135        f.write_fmt(format_args!(
136            "[@{},{}:{}='{}',<{}>{},{}:{}]",
137            self.get_token_index(),
138            self.start,
139            self.stop,
140            txt,
141            self.token_type,
142            if self.channel > 0 {
143                self.channel.to_string()
144            } else {
145                String::new()
146            },
147            self.line,
148            self.column
149        ))
150    }
151}
152
153// impl<T: Borrow<str> + Debug> TokenWrapper for GenericToken<T> { type Inner = Self; }
154
155impl<T: Borrow<str> + Debug> Token for GenericToken<T> {
156    type Data = str;
157
158    fn get_token_type(&self) -> isize {
159        self.token_type
160    }
161
162    fn get_channel(&self) -> isize {
163        self.channel
164    }
165
166    fn get_start(&self) -> isize {
167        self.start
168    }
169
170    fn get_stop(&self) -> isize {
171        self.stop
172    }
173
174    fn get_line(&self) -> isize {
175        self.line
176    }
177
178    fn get_column(&self) -> isize {
179        self.column
180    }
181
182    // fn get_source(&self) -> Option<(Box<dyn TokenSource>, Box<dyn CharStream>)> {
183    //     unimplemented!()
184    // }
185
186    fn get_text(&self) -> &str {
187        if self.token_type == EOF {
188            "<EOF>"
189        } else {
190            self.text.borrow()
191        }
192    }
193
194    fn set_text(&mut self, _text: String) {
195        unimplemented!()
196    }
197
198    fn get_token_index(&self) -> isize {
199        self.token_index.load(Ordering::Relaxed)
200    }
201
202    fn set_token_index(&self, _v: isize) {
203        self.token_index.store(_v, Ordering::Relaxed)
204    }
205
206    fn to_owned(&self) -> OwningToken {
207        OwningToken {
208            token_type: self.token_type,
209            channel: self.channel,
210            start: self.start,
211            stop: self.stop,
212            token_index: AtomicIsize::new(self.get_token_index()),
213            line: self.line,
214            column: self.column,
215            text: self.text.borrow().to_owned(),
216            read_only: self.read_only,
217        }
218    }
219}
220
221impl Default for &'_ OwningToken {
222    fn default() -> Self {
223        &**INVALID_OWNING
224    }
225}
226
227impl Default for &'_ CommonToken<'_> {
228    fn default() -> Self {
229        &**INVALID_COMMON
230    }
231}
232
233//
234// impl CommonToken {
235//     fn new_common_token(
236//         _source: Option<(Box<dyn TokenSource>, Box<dyn CharStream>)>,
237//         _token_type: isize,
238//         _channel: isize,
239//         _start: isize,
240//         _stop: isize,
241//     ) -> CommonToken {
242//         unimplemented!()
243//     }
244//
245//     fn clone(&self) -> CommonToken {
246//         unimplemented!()
247//     }
248// }