antlr_rust_sleagon/
token_factory.rs

1//! How Lexer should produce tokens
2use std::borrow::Cow::{Borrowed, Owned};
3use std::borrow::{Borrow, Cow};
4
5use std::fmt::{Debug, Formatter};
6use std::marker::PhantomData;
7
8use std::sync::atomic::AtomicIsize;
9
10use typed_arena::Arena;
11
12use crate::char_stream::{CharStream, InputData};
13use crate::token::Token;
14use crate::token::{CommonToken, OwningToken, TOKEN_INVALID_TYPE};
15use better_any::{Tid, TidAble};
16
17#[allow(non_upper_case_globals)]
18lazy_static! {
19    pub(crate) static ref COMMON_TOKEN_FACTORY_DEFAULT: Box<CommonTokenFactory> =
20        Box::new(CommonTokenFactory {});
21    pub(crate) static ref INVALID_OWNING: Box<OwningToken> = Box::new(OwningToken {
22        token_type: TOKEN_INVALID_TYPE,
23        channel: 0,
24        start: -1,
25        stop: -1,
26        token_index: AtomicIsize::new(-1),
27        line: -1,
28        column: -1,
29        text: "<invalid>".to_owned(),
30        read_only: true,
31    });
32    pub(crate) static ref INVALID_COMMON: Box<CommonToken<'static>> = Box::new(CommonToken {
33        token_type: TOKEN_INVALID_TYPE,
34        channel: 0,
35        start: -1,
36        stop: -1,
37        token_index: AtomicIsize::new(-1),
38        line: -1,
39        column: -1,
40        text: Borrowed("<invalid>"),
41        read_only: true,
42    });
43}
44
45/// Trait for creating tokens.
46pub trait TokenFactory<'a>: TidAble<'a> + Sized {
47    /// Type of tokens emitted by this factory.
48    type Inner: Token<Data = Self::Data> + ?Sized + 'a;
49    /// Ownership of the emitted tokens
50    type Tok: Borrow<Self::Inner> + Clone + 'a + Debug;
51    // can relax InputData to just ToOwned here?
52    /// Type of the underlying storage
53    type Data: InputData + ?Sized;
54    /// Type of the `CharStream` that factory can produce tokens from
55    type From;
56
57    /// Creates token either from `sourse` or from pure data in `text`
58    /// Either `source` or `text` are not None
59    fn create<T>(
60        &'a self,
61        source: Option<&mut T>,
62        ttype: isize,
63        text: Option<<Self::Data as ToOwned>::Owned>,
64        channel: isize,
65        start: isize,
66        stop: isize,
67        line: isize,
68        column: isize,
69    ) -> Self::Tok
70    where
71        T: CharStream<Self::From> + ?Sized;
72
73    /// Creates invalid token
74    /// Invalid tokens must have `TOKEN_INVALID_TYPE` token type.
75    fn create_invalid() -> Self::Tok;
76
77    /// Creates `Self::Data` representation for `from` for lexer to work with
78    /// when it does not need to create full token   
79    fn get_data(from: Self::From) -> Cow<'a, Self::Data>;
80}
81
82/// Default token factory
83#[derive(Default, Debug)]
84pub struct CommonTokenFactory;
85
86better_any::tid! {CommonTokenFactory}
87
88impl Default for &'_ CommonTokenFactory {
89    fn default() -> Self {
90        &**COMMON_TOKEN_FACTORY_DEFAULT
91    }
92}
93
94impl<'a> TokenFactory<'a> for CommonTokenFactory {
95    type Inner = CommonToken<'a>;
96    type Tok = Box<Self::Inner>;
97    type Data = str;
98    type From = Cow<'a, str>;
99
100    #[inline]
101    fn create<T>(
102        &'a self,
103        source: Option<&mut T>,
104        ttype: isize,
105        text: Option<String>,
106        channel: isize,
107        start: isize,
108        stop: isize,
109        line: isize,
110        column: isize,
111    ) -> Self::Tok
112    where
113        T: CharStream<Self::From> + ?Sized,
114    {
115        let text = match (text, source) {
116            (Some(t), _) => Owned(t),
117            (None, Some(x)) => {
118                if stop >= x.size() || start >= x.size() {
119                    Borrowed("<EOF>")
120                } else {
121                    x.get_text(start, stop).into()
122                }
123            }
124            _ => Borrowed(""),
125        };
126        Box::new(CommonToken {
127            token_type: ttype,
128            channel,
129            start,
130            stop,
131            token_index: AtomicIsize::new(-1),
132            line,
133            column,
134            text,
135            read_only: false,
136        })
137    }
138
139    fn create_invalid() -> Self::Tok {
140        INVALID_COMMON.clone()
141    }
142
143    fn get_data(from: Self::From) -> Cow<'a, Self::Data> {
144        from
145    }
146}
147
148/// Token factory that produces heap allocated
149/// `OwningToken`s
150#[derive(Default, Debug)]
151pub struct OwningTokenFactory;
152
153better_any::tid! {OwningTokenFactory}
154
155impl<'a> TokenFactory<'a> for OwningTokenFactory {
156    type Inner = OwningToken;
157    type Tok = Box<Self::Inner>;
158    type Data = str;
159    type From = String;
160
161    #[inline]
162    fn create<T>(
163        &'a self,
164        source: Option<&mut T>,
165        ttype: isize,
166        text: Option<String>,
167        channel: isize,
168        start: isize,
169        stop: isize,
170        line: isize,
171        column: isize,
172    ) -> Self::Tok
173    where
174        T: CharStream<String> + ?Sized,
175    {
176        let text = match (text, source) {
177            (Some(t), _) => t,
178            (None, Some(x)) => {
179                if stop >= x.size() || start >= x.size() {
180                    "<EOF>".to_owned()
181                } else {
182                    x.get_text(start, stop)
183                }
184            }
185            _ => String::new(),
186        };
187        Box::new(OwningToken {
188            token_type: ttype,
189            channel,
190            start,
191            stop,
192            token_index: AtomicIsize::new(-1),
193            line,
194            column,
195            text,
196            read_only: false,
197        })
198    }
199
200    fn create_invalid() -> Self::Tok {
201        INVALID_OWNING.clone()
202    }
203
204    fn get_data(from: Self::From) -> Cow<'a, Self::Data> {
205        from.into()
206    }
207}
208
209// pub struct DynFactory<'input,TF:TokenFactory<'.into()input>>(TF) where TF::Tok:CoerceUnsized<Box<dyn Token+'input>>;
210// impl <'input,TF:TokenFactory<'input>> TokenFactory<'input> for DynFactory<'input,TF>
211// where TF::Tok:CoerceUnsized<Box<dyn Token+'input>>
212// {
213//
214// }
215
216///Arena token factory that contains `OwningToken`s
217pub type ArenaOwningFactory<'a> = ArenaFactory<'a, OwningTokenFactory, OwningToken>;
218///Arena token factory that contains `CommonToken`s
219pub type ArenaCommonFactory<'a> = ArenaFactory<'a, CommonTokenFactory, CommonToken<'a>>;
220
221/// This is a wrapper for Token factory that allows to allocate tokens in separate arena.
222/// It can allow to significantly improve performance by passing Tokens by references everywhere.
223///
224/// Requires `&'a Tok: Default` bound to produce invalid tokens, which can be easily implemented
225/// like this:
226/// ```text
227/// lazy_static!{ static ref INVALID_TOKEN:Box<CustomToken> = ... }
228/// impl Default for &'_ CustomToken {
229///     fn default() -> Self { &**INVALID_TOKEN }
230/// }
231/// ```
232/// or if possible just
233/// ```text
234/// const INVALID_TOKEN:CustomToken = ...
235/// ```
236// Box is used here because it is almost always should be used for token factory
237pub struct ArenaFactory<'input, TF, T> {
238    arena: Arena<T>,
239    factory: TF,
240    pd: PhantomData<&'input str>,
241}
242
243better_any::tid! {impl<'input,TF,T> TidAble<'input> for ArenaFactory<'input,TF,T>}
244
245impl<'input, TF: Debug, T> Debug for ArenaFactory<'input, TF, T> {
246    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
247        f.debug_struct("ArenaFactory")
248            .field("arena", &"Arena")
249            .field("factory", &self.factory)
250            .finish()
251    }
252}
253
254impl<'input, TF, T> Default for ArenaFactory<'input, TF, T>
255where
256    TF: Default,
257{
258    fn default() -> Self {
259        Self {
260            arena: Default::default(),
261            factory: Default::default(),
262            pd: Default::default(),
263        }
264    }
265}
266
267impl<'input, TF, Tok> TokenFactory<'input> for ArenaFactory<'input, TF, Tok>
268where
269    TF: TokenFactory<'input, Tok = Box<Tok>, Inner = Tok>,
270    Tok: Token<Data = TF::Data> + Clone + TidAble<'input>,
271    for<'a> &'a Tok: Default,
272{
273    type Inner = Tok;
274    type Tok = &'input Tok;
275    type Data = TF::Data;
276    type From = TF::From;
277
278    #[inline]
279    fn create<T>(
280        &'input self,
281        source: Option<&mut T>,
282        ttype: isize,
283        text: Option<<Self::Data as ToOwned>::Owned>,
284        channel: isize,
285        start: isize,
286        stop: isize,
287        line: isize,
288        column: isize,
289    ) -> Self::Tok
290    where
291        T: CharStream<Self::From> + ?Sized,
292    {
293        // todo remove redundant allocation
294        let token = self
295            .factory
296            .create(source, ttype, text, channel, start, stop, line, column);
297        self.arena.alloc(*token)
298    }
299
300    fn create_invalid() -> &'input Tok {
301        <&Tok as Default>::default()
302    }
303
304    fn get_data(from: Self::From) -> Cow<'input, Self::Data> {
305        TF::get_data(from)
306    }
307}
308
309#[doc(hidden)]
310pub trait TokenAware<'input> {
311    type TF: TokenFactory<'input> + 'input;
312}