oak_rust/lexer/mod.rs
1use crate::language::RustLanguage;
2use oak_core::{
3 lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig}, source::Source, IncrementalCache,
4 Lexer,
5 LexerState,
6};
7
8mod lex;
9
10/// A lexer for the Rust programming language.
11///
12/// The `RustLexer` is responsible for tokenizing Rust source code into a sequence of tokens
13/// that can be used by the parser. It handles all Rust syntax including modern features like
14/// raw strings, byte strings, lifetimes, and all standard Rust keywords.
15///
16/// # Examples
17///
18/// Basic usage:
19///
20/// ```
21/// use oak_core::{Lexer, SourceText};
22/// use oak_rust::{RustLanguage, RustLexer};
23///
24/// let language = RustLanguage::default();
25/// let lexer = RustLexer::new(&language);
26/// let source = SourceText::new("fn main() { println!(\"Hello, world!\"); }");
27/// let output = lexer.lex(source, 0);
28///
29/// // The output contains tokens for the entire source code
30/// assert!(!output.tokens.is_empty());
31/// ```
32///
33/// Tokenizing different Rust constructs:
34///
35/// ```
36/// use oak_core::{Lexer, SourceText};
37/// use oak_rust::{RustLanguage, RustLexer, SourceText};
38///
39/// let language = RustLanguage::default();
40/// let lexer = RustLexer::new(&language);
41///
42/// // Tokenize a function with various Rust features
43/// let source = SourceText::new(
44/// r#"
45/// fn calculate<'a>(x: &'a i32, y: i32) -> i32 {
46/// let result = x + y;
47/// println!("Result: {}", result);
48/// result
49/// }
50/// "#,
51/// );
52/// let output = lexer.lex(source, 0);
53///
54/// // Verify that tokens were generated
55/// assert!(output.tokens.len() > 10);
56/// ```
57#[derive(Clone)]
58pub struct RustLexer<'config> {
59 config: &'config RustLanguage,
60}
61
62impl<'config> Lexer<RustLanguage> for RustLexer<'config> {
63 fn lex_incremental(
64 &self,
65 source: impl Source,
66 changed: usize,
67 cache: IncrementalCache<RustLanguage>,
68 ) -> LexOutput<RustLanguage> {
69 let mut state = LexerState::new_with_cache(source, changed, cache);
70 let result = self.run(&mut state);
71 state.finish(result)
72 }
73}
74
75impl<'config> RustLexer<'config> {
76 /// Creates a new `RustLexer` with the given language configuration.
77 ///
78 /// # Parameters
79 ///
80 /// * `config` - A reference to the `RustLanguage` configuration that controls
81 /// language-specific parsing behavior.
82 ///
83 /// # Examples
84 ///
85 /// ```
86 /// # use oak_rust::{RustLexer, RustLanguage};
87 ///
88 /// let language = RustLanguage::default();
89 /// let lexer = RustLexer::new(&language);
90 /// ```
91 pub fn new(config: &'config RustLanguage) -> Self {
92 Self { config }
93 }
94
95 /// Returns the whitespace configuration for the lexer.
96 ///
97 /// This method defines how the lexer should handle whitespace characters.
98 /// The configuration enables Unicode whitespace support, allowing the lexer
99 /// to recognize all Unicode whitespace characters, not just ASCII spaces.
100 pub fn whitespace_rules(&self) -> &WhitespaceConfig {
101 &WhitespaceConfig { unicode_whitespace: true }
102 }
103
104 /// Returns the comment configuration for the lexer.
105 ///
106 /// This method defines how the lexer should handle line comments in Rust code.
107 /// Rust uses double slashes (//) for line comments, which continue to the end of the line.
108 pub fn comment_rules(&self) -> &CommentLine {
109 &CommentLine { line_markers: &["//"] }
110 }
111
112 /// Returns the string literal configuration for the lexer.
113 ///
114 /// This method defines how the lexer should handle string literals in Rust code.
115 /// Rust strings are enclosed in double quotes (") and support escape sequences
116 /// using backslash (\) as the escape character.
117 pub fn string_rules(&self) -> &StringConfig {
118 &StringConfig { quotes: &['"'], escape: Some('\\') }
119 }
120
121 /// Returns the character literal configuration for the lexer.
122 ///
123 /// This method defines how the lexer should handle character literals in Rust code.
124 /// Rust character literals are enclosed in single quotes (') and do not use
125 /// escape characters in the same way as strings.
126 pub fn char_rules(&self) -> &StringConfig {
127 &StringConfig { quotes: &['\''], escape: None }
128 }
129}