Skip to main content

oak_rust/lexer/
mod.rs

1pub use self::token_type::{RustToken, RustTokenType};
2use crate::language::RustLanguage;
3use oak_core::{Lexer, LexerCache, LexerState, lexer::LexOutput, source::Source};
4
5mod lex;
6mod token_type;
7
8/// A lexer for the Rust programming language.
9///
10/// The `RustLexer` is responsible for tokenizing Rust source code into a sequence of tokens
11/// that can be used by the parser. It handles all Rust syntax including modern features like
12/// raw strings, byte strings, lifetimes, and all standard Rust keywords.
13///
14/// # Examples
15///
16/// Basic usage:
17///
18/// ```rust,ignore
19/// use oak_core::{Lexer, LexerState, SourceText};
20/// use oak_rust::{RustLanguage, RustLexer};
21///
22/// let language = RustLanguage::default();
23/// let lexer = RustLexer::new(&language);
24/// let source = SourceText::new("fn main() { println!(\"Hello, world!\"); }");
25/// let mut cache = oak_core::parser::session::ParseSession::<RustLanguage>::default();
26/// let output = lexer.lex(&source, &[], &mut cache);
27///
28/// // The output contains tokens for the entire source code
29/// assert!(output.result.is_ok());
30/// ```
31///
32/// Tokenizing different Rust constructs:
33///
34/// ```rust,ignore
35/// use oak_core::{Lexer, LexerState, SourceText};
36/// use oak_rust::{RustLanguage, RustLexer};
37///
38/// let language = RustLanguage::default();
39/// let lexer = RustLexer::new(&language);
40///
41/// // Tokenize a function with various Rust features
42/// let source = SourceText::new(
43///     r#"
44/// fn calculate<'a>(x: &'a i32, y: i32) -> i32 {
45///     let result = x + y;
46///     println!("Result: {}", result);
47///     result
48/// }
49/// "#,
50/// );
51/// let mut cache = oak_core::parser::session::ParseSession::<RustLanguage>::default();
52/// let output = lexer.lex(&source, &[], &mut cache);
53///
54/// // Verify that tokens were generated
55/// assert!(output.result.is_ok());
56/// ```
57#[derive(Clone)]
58pub struct RustLexer<'config> {
59    _config: &'config RustLanguage,
60}
61
62type State<'a, S> = LexerState<'a, S, RustLanguage>;
63
64impl<'config> Lexer<RustLanguage> for RustLexer<'config> {
65    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<RustLanguage>) -> LexOutput<RustLanguage> {
66        let mut state = State::new_with_cache(source, 0, cache);
67        let result = self.run(&mut state);
68        if result.is_ok() {
69            state.add_eof();
70        }
71        state.finish_with_cache(result, cache)
72    }
73}
74
75impl<'config> RustLexer<'config> {
76    /// Creates a new `RustLexer` with the given language configuration.
77    ///
78    /// # Parameters
79    ///
80    /// * `config` - A `RustLanguage` configuration that controls
81    ///   language-specific parsing behavior.
82    ///
83    /// # Examples
84    ///
85    /// ```
86    /// # use oak_rust::{RustLexer, RustLanguage};
87    ///
88    /// let language = RustLanguage::default();
89    /// let lexer = RustLexer::new(&language);
90    /// ```
91    pub fn new(config: &'config RustLanguage) -> Self {
92        Self { _config: config }
93    }
94
95    /// Internal method to run the lexer on the given state.
96    /// This delegates to the implementation in the `lex` module.
97    pub(crate) fn run<'s, S: Source + ?Sized>(&self, state: &mut LexerState<'s, S, RustLanguage>) -> Result<(), oak_core::OakError> {
98        lex::run(self, state)
99    }
100}