do_not_use_antlr_rust/
token_factory.rs

1//! How Lexer should produce tokens
2use std::borrow::Cow::{Borrowed, Owned};
3use std::borrow::{Borrow, Cow};
4
5use std::fmt::{Debug, Formatter};
6use std::marker::PhantomData;
7
8use std::sync::atomic::AtomicIsize;
9
10use typed_arena::Arena;
11
12use crate::char_stream::{CharStream, InputData};
13use crate::token::Token;
14use crate::token::{CommonToken, OwningToken, TOKEN_INVALID_TYPE};
15use better_any::{Tid, TidAble};
16
17#[allow(non_upper_case_globals)]
18lazy_static! {
19    pub(crate) static ref COMMON_TOKEN_FACTORY_DEFAULT: Box<CommonTokenFactory> =
20        Box::new(CommonTokenFactory {});
21    pub(crate) static ref INVALID_OWNING: Box<OwningToken> = Box::new(OwningToken {
22        token_type: TOKEN_INVALID_TYPE,
23        channel: 0,
24        start: -1,
25        stop: -1,
26        token_index: AtomicIsize::new(-1),
27        line: -1,
28        column: -1,
29        text: "<invalid>".to_owned(),
30        read_only: true,
31    });
32    pub(crate) static ref INVALID_COMMON: Box<CommonToken<'static>> = Box::new(CommonToken {
33        token_type: TOKEN_INVALID_TYPE,
34        channel: 0,
35        start: -1,
36        stop: -1,
37        token_index: AtomicIsize::new(-1),
38        line: -1,
39        column: -1,
40        text: Borrowed("<invalid>"),
41        read_only: true,
42    });
43}
44
45/// Trait for creating tokens.
46pub trait TokenFactory<'a>: TidAble<'a> + Sized {
47    /// Type of tokens emitted by this factory.
48    type Inner: Token<Data = Self::Data> + ?Sized + 'a;
49    /// Ownership of the emitted tokens
50    type Tok: Borrow<Self::Inner> + Clone + 'a + Debug;
51    // can relax InputData to just ToOwned here?
52    /// Type of the underlying storage
53    type Data: InputData + ?Sized;
54    /// Type of the `CharStream` that factory can produce tokens from
55    type From;
56
57    /// Creates token either from `sourse` or from pure data in `text`
58    /// Either `source` or `text` are not None
59    fn create<T>(
60        &'a self,
61        source: Option<&mut T>,
62        ttype: isize,
63        text: Option<<Self::Data as ToOwned>::Owned>,
64        channel: isize,
65        start: isize,
66        stop: isize,
67        line: isize,
68        column: isize,
69    ) -> Self::Tok
70    where
71        T: CharStream<Self::From> + ?Sized;
72
73    /// Creates invalid token
74    /// Invalid tokens must have `TOKEN_INVALID_TYPE` token type.
75    fn create_invalid() -> Self::Tok;
76
77    /// Creates `Self::Data` representation for `from` for lexer to work with
78    /// when it does not need to create full token   
79    fn get_data(from: Self::From) -> Cow<'a, Self::Data>;
80}
81
82/// Default token factory
83#[derive(Default, Debug)]
84pub struct CommonTokenFactory;
85
86better_any::tid! {CommonTokenFactory}
87
88impl Default for &'_ CommonTokenFactory {
89    fn default() -> Self { &**COMMON_TOKEN_FACTORY_DEFAULT }
90}
91
92impl<'a> TokenFactory<'a> for CommonTokenFactory {
93    type Inner = CommonToken<'a>;
94    type Tok = Box<Self::Inner>;
95    type Data = str;
96    type From = Cow<'a, str>;
97
98    #[inline]
99    fn create<T>(
100        &'a self,
101        source: Option<&mut T>,
102        ttype: isize,
103        text: Option<String>,
104        channel: isize,
105        start: isize,
106        stop: isize,
107        line: isize,
108        column: isize,
109    ) -> Self::Tok
110    where
111        T: CharStream<Self::From> + ?Sized,
112    {
113        let text = match (text, source) {
114            (Some(t), _) => Owned(t),
115            (None, Some(x)) => {
116                if stop >= x.size() || start >= x.size() {
117                    Borrowed("<EOF>")
118                } else {
119                    x.get_text(start, stop).into()
120                }
121            }
122            _ => Borrowed(""),
123        };
124        Box::new(CommonToken {
125            token_type: ttype,
126            channel,
127            start,
128            stop,
129            token_index: AtomicIsize::new(-1),
130            line,
131            column,
132            text,
133            read_only: false,
134        })
135    }
136
137    fn create_invalid() -> Self::Tok { INVALID_COMMON.clone() }
138
139    fn get_data(from: Self::From) -> Cow<'a, Self::Data> { from }
140}
141
142/// Token factory that produces heap allocated
143/// `OwningToken`s
144#[derive(Default, Debug)]
145pub struct OwningTokenFactory;
146
147better_any::tid! {OwningTokenFactory}
148
149impl<'a> TokenFactory<'a> for OwningTokenFactory {
150    type Inner = OwningToken;
151    type Tok = Box<Self::Inner>;
152    type Data = str;
153    type From = String;
154
155    #[inline]
156    fn create<T>(
157        &'a self,
158        source: Option<&mut T>,
159        ttype: isize,
160        text: Option<String>,
161        channel: isize,
162        start: isize,
163        stop: isize,
164        line: isize,
165        column: isize,
166    ) -> Self::Tok
167    where
168        T: CharStream<String> + ?Sized,
169    {
170        let text = match (text, source) {
171            (Some(t), _) => t,
172            (None, Some(x)) => {
173                if stop >= x.size() || start >= x.size() {
174                    "<EOF>".to_owned()
175                } else {
176                    x.get_text(start, stop)
177                }
178            }
179            _ => String::new(),
180        };
181        Box::new(OwningToken {
182            token_type: ttype,
183            channel,
184            start,
185            stop,
186            token_index: AtomicIsize::new(-1),
187            line,
188            column,
189            text,
190            read_only: false,
191        })
192    }
193
194    fn create_invalid() -> Self::Tok { INVALID_OWNING.clone() }
195
196    fn get_data(from: Self::From) -> Cow<'a, Self::Data> { from.into() }
197}
198
199// pub struct DynFactory<'input,TF:TokenFactory<'.into()input>>(TF) where TF::Tok:CoerceUnsized<Box<dyn Token+'input>>;
200// impl <'input,TF:TokenFactory<'input>> TokenFactory<'input> for DynFactory<'input,TF>
201// where TF::Tok:CoerceUnsized<Box<dyn Token+'input>>
202// {
203//
204// }
205
206///Arena token factory that contains `OwningToken`s
207pub type ArenaOwningFactory<'a> = ArenaFactory<'a, OwningTokenFactory, OwningToken>;
208///Arena token factory that contains `CommonToken`s
209pub type ArenaCommonFactory<'a> = ArenaFactory<'a, CommonTokenFactory, CommonToken<'a>>;
210
211/// This is a wrapper for Token factory that allows to allocate tokens in separate arena.
212/// It can allow to significantly improve performance by passing Tokens by references everywhere.
213///
214/// Requires `&'a Tok: Default` bound to produce invalid tokens, which can be easily implemented
215/// like this:
216/// ```text
217/// lazy_static!{ static ref INVALID_TOKEN:Box<CustomToken> = ... }
218/// impl Default for &'_ CustomToken {
219///     fn default() -> Self { &**INVALID_TOKEN }
220/// }
221/// ```
222/// or if possible just
223/// ```text
224/// const INVALID_TOKEN:CustomToken = ...
225/// ```
226// Box is used here because it is almost always should be used for token factory
227pub struct ArenaFactory<'input, TF, T> {
228    arena: Arena<T>,
229    factory: TF,
230    pd: PhantomData<&'input str>,
231}
232
233better_any::tid! {impl<'input,TF,T> TidAble<'input> for ArenaFactory<'input,TF,T>}
234
235impl<'input, TF: Debug, T> Debug for ArenaFactory<'input, TF, T> {
236    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
237        f.debug_struct("ArenaFactory")
238            .field("arena", &"Arena")
239            .field("factory", &self.factory)
240            .finish()
241    }
242}
243
244impl<'input, TF, T> Default for ArenaFactory<'input, TF, T>
245where
246    TF: Default,
247{
248    fn default() -> Self {
249        Self {
250            arena: Default::default(),
251            factory: Default::default(),
252            pd: Default::default(),
253        }
254    }
255}
256
257impl<'input, TF, Tok> TokenFactory<'input> for ArenaFactory<'input, TF, Tok>
258where
259    TF: TokenFactory<'input, Tok = Box<Tok>, Inner = Tok>,
260    Tok: Token<Data = TF::Data> + Clone + TidAble<'input>,
261    for<'a> &'a Tok: Default,
262{
263    type Inner = Tok;
264    type Tok = &'input Tok;
265    type Data = TF::Data;
266    type From = TF::From;
267
268    #[inline]
269    fn create<T>(
270        &'input self,
271        source: Option<&mut T>,
272        ttype: isize,
273        text: Option<<Self::Data as ToOwned>::Owned>,
274        channel: isize,
275        start: isize,
276        stop: isize,
277        line: isize,
278        column: isize,
279    ) -> Self::Tok
280    where
281        T: CharStream<Self::From> + ?Sized,
282    {
283        // todo remove redundant allocation
284        let token = self
285            .factory
286            .create(source, ttype, text, channel, start, stop, line, column);
287        self.arena.alloc(*token)
288    }
289
290    fn create_invalid() -> &'input Tok { <&Tok as Default>::default() }
291
292    fn get_data(from: Self::From) -> Cow<'input, Self::Data> { TF::get_data(from) }
293}
294
295#[doc(hidden)]
296pub trait TokenAware<'input> {
297    type TF: TokenFactory<'input> + 'input;
298}