do_not_use_antlr_rust/
token.rs

1//! Symbols that parser works on
2use std::borrow::{Borrow, Cow};
3
4use std::fmt::Formatter;
5use std::fmt::{Debug, Display};
6
7use std::sync::atomic::{AtomicIsize, Ordering};
8
9use crate::char_stream::InputData;
10use crate::int_stream::EOF;
11use crate::token_factory::{INVALID_COMMON, INVALID_OWNING};
12
13use better_any::type_id;
14
15/// Type of tokens that parser considers invalid
16pub const TOKEN_INVALID_TYPE: isize = 0;
17/// Type of tokens that DFA can use to advance to next state without consuming actual input token.
18/// Should not be created by downstream implementations.
19pub const TOKEN_EPSILON: isize = -2;
20/// Min token type that can be assigned to tokens created by downstream implementations.
21pub const TOKEN_MIN_USER_TOKEN_TYPE: isize = 1;
22/// Type of EOF token
23pub const TOKEN_EOF: isize = EOF;
24/// Default channel lexer emits tokens to
25pub const TOKEN_DEFAULT_CHANNEL: isize = 0;
26/// Predefined additional channel for lexer to assign tokens to
27pub const TOKEN_HIDDEN_CHANNEL: isize = 1;
28/// Shorthand for TOKEN_HIDDEN_CHANNEL
29pub const HIDDEN: isize = TOKEN_HIDDEN_CHANNEL;
30
31/// Implemented by tokens that are produced by a `TokenFactory`
32#[allow(missing_docs)]
33pub trait Token: Debug + Display {
34    /// Type of the underlying data this token refers to
35    type Data: ?Sized + InputData;
36    // fn get_source(&self) -> Option<(Box<dyn TokenSource>, Box<dyn CharStream>)>;
37    fn get_token_type(&self) -> isize;
38    fn get_channel(&self) -> isize { TOKEN_DEFAULT_CHANNEL }
39    fn get_start(&self) -> isize { 0 }
40    fn get_stop(&self) -> isize { 0 }
41    fn get_line(&self) -> isize { 0 }
42    fn get_column(&self) -> isize { 0 }
43
44    fn get_text(&self) -> &Self::Data;
45    fn set_text(&mut self, _text: <Self::Data as ToOwned>::Owned) {}
46
47    fn get_token_index(&self) -> isize { 0 }
48    fn set_token_index(&self, _v: isize) {}
49
50    // fn get_token_source(&self) -> &dyn TokenSource;
51    // fn get_input_stream(&self) -> &dyn CharStream;
52
53    /// returns fully owned representation of this token
54    fn to_owned(&self) -> OwningToken {
55        OwningToken {
56            token_type: self.get_token_type(),
57            channel: self.get_channel(),
58            start: self.get_start(),
59            stop: self.get_stop(),
60            token_index: AtomicIsize::from(self.get_token_index()),
61            line: self.get_line(),
62            column: self.get_column(),
63            text: self.get_text().to_display(),
64            read_only: true,
65        }
66    }
67}
68
69/// Token that owns its data
70pub type OwningToken = GenericToken<String>;
71/// Most versatile Token that uses Cow to save data
72/// Can be used seamlessly switch from owned to zero-copy parsing
73pub type CommonToken<'a> = GenericToken<Cow<'a, str>>;
74
75type_id!(OwningToken);
76type_id!(CommonToken<'a>);
77
78#[derive(Debug)]
79#[allow(missing_docs)]
80pub struct GenericToken<T> {
81    //    source: Option<(Box<TokenSource>,Box<CharStream>)>,
82    pub token_type: isize,
83    pub channel: isize,
84    pub start: isize,
85    pub stop: isize,
86    pub token_index: AtomicIsize,
87    pub line: isize,
88    pub column: isize,
89    pub text: T,
90    pub read_only: bool,
91}
92
93impl<T: Clone> Clone for GenericToken<T>
94where
95    Self: Token,
96{
97    fn clone(&self) -> Self {
98        Self {
99            token_type: self.token_type,
100            channel: self.channel,
101            start: self.start,
102            stop: self.stop,
103            token_index: AtomicIsize::new(self.get_token_index()),
104            line: self.line,
105            column: self.column,
106            text: self.text.clone(),
107            read_only: false,
108        }
109    }
110}
111
112impl<T: Borrow<str> + Debug> Display for GenericToken<T> {
113    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
114        let txt = if self.token_type == TOKEN_EOF {
115            "<EOF>"
116        } else {
117            self.text.borrow()
118        };
119        let txt = txt.replace("\n", "\\n");
120        let txt = txt.replace("\r", "\\r");
121        let txt = txt.replace("\t", "\\t");
122        //        let txt = escape_whitespaces(txt,false);
123        f.write_fmt(format_args!(
124            "[@{},{}:{}='{}',<{}>{},{}:{}]",
125            self.get_token_index(),
126            self.start,
127            self.stop,
128            txt,
129            self.token_type,
130            if self.channel > 0 {
131                self.channel.to_string()
132            } else {
133                String::new()
134            },
135            self.line,
136            self.column
137        ))
138    }
139}
140
141// impl<T: Borrow<str> + Debug> TokenWrapper for GenericToken<T> { type Inner = Self; }
142
143impl<T: Borrow<str> + Debug> Token for GenericToken<T> {
144    type Data = str;
145
146    fn get_token_type(&self) -> isize { self.token_type }
147
148    fn get_channel(&self) -> isize { self.channel }
149
150    fn get_start(&self) -> isize { self.start }
151
152    fn get_stop(&self) -> isize { self.stop }
153
154    fn get_line(&self) -> isize { self.line }
155
156    fn get_column(&self) -> isize { self.column }
157
158    // fn get_source(&self) -> Option<(Box<dyn TokenSource>, Box<dyn CharStream>)> {
159    //     unimplemented!()
160    // }
161
162    fn get_text(&self) -> &str {
163        if self.token_type == EOF {
164            "<EOF>"
165        } else {
166            self.text.borrow()
167        }
168    }
169
170    fn set_text(&mut self, _text: String) { unimplemented!() }
171
172    fn get_token_index(&self) -> isize { self.token_index.load(Ordering::Relaxed) }
173
174    fn set_token_index(&self, _v: isize) { self.token_index.store(_v, Ordering::Relaxed) }
175
176    fn to_owned(&self) -> OwningToken {
177        OwningToken {
178            token_type: self.token_type,
179            channel: self.channel,
180            start: self.start,
181            stop: self.stop,
182            token_index: AtomicIsize::new(self.get_token_index()),
183            line: self.line,
184            column: self.column,
185            text: self.text.borrow().to_owned(),
186            read_only: self.read_only,
187        }
188    }
189}
190
191impl Default for &'_ OwningToken {
192    fn default() -> Self { &**INVALID_OWNING }
193}
194
195impl Default for &'_ CommonToken<'_> {
196    fn default() -> Self { &**INVALID_COMMON }
197}
198
199//
200// impl CommonToken {
201//     fn new_common_token(
202//         _source: Option<(Box<dyn TokenSource>, Box<dyn CharStream>)>,
203//         _token_type: isize,
204//         _channel: isize,
205//         _start: isize,
206//         _stop: isize,
207//     ) -> CommonToken {
208//         unimplemented!()
209//     }
210//
211//     fn clone(&self) -> CommonToken {
212//         unimplemented!()
213//     }
214// }