1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
//! The main lexer for Lambdust source code.
use crate::diagnostics::{Error, Result, Span, SourceMap};
use std::sync::Arc;
use super::{Token, TokenKind, InternalLexer};
/// The main lexer for Lambdust source code.
#[derive(Debug)]
pub struct Lexer<'a> {
source: &'a str,
filename: Option<&'a str>,
_position: usize,
}
impl<'a> Lexer<'a> {
/// Creates a new lexer for the given source code.
pub fn new(source: &'a str, filename: Option<&'a str>) -> Self {
Self {
source,
filename,
_position: 0,
}
}
/// Tokenizes the entire source code.
pub fn tokenize(&mut self) -> Result<Vec<Token>> {
let mut internal_lexer = InternalLexer::new(self.source, self.filename);
internal_lexer.tokenize()
}
/// Gets the current filename (if any).
pub fn filename(&self) -> Option<&str> {
self.filename
}
/// Gets the source code.
pub fn source(&self) -> &str {
self.source
}
/// Gets the source map if available (placeholder implementation).
pub fn get_source_map(&self) -> Option<Arc<SourceMap>> {
// TODO: Implement source map integration
None
}
/// Validates the entire source for common issues before tokenizing.
pub fn validate_source(&self) -> Vec<Error> {
let mut errors = Vec::new();
let mut open_parens = 0;
let mut open_quotes = false;
let mut position = 0;
for (i, ch) in self.source.char_indices() {
position = i;
match ch {
'(' => open_parens += 1,
')' => {
if open_parens == 0 {
let span = Span::new(i, 1);
errors.push(Error::lex_error(
"Unmatched closing parenthesis".to_string(),
span,
));
} else {
open_parens -= 1;
}
}
'"' => open_quotes = !open_quotes,
'\n' if open_quotes => {
let span = Span::new(i, 1);
errors.push(Error::lex_error(
"Unterminated string literal".to_string(),
span,
));
open_quotes = false;
}
_ => {}
}
}
// Check for unmatched opening parentheses
if open_parens > 0 {
let span = Span::new(position, 0);
errors.push(Error::lex_error(
format!("{open_parens} unmatched opening parenthesis(es)"),
span,
));
}
// Check for unterminated string
if open_quotes {
let span = Span::new(position, 0);
errors.push(Error::lex_error(
"Unterminated string literal at end of file".to_string(),
span,
));
}
errors
}
/// Tokenizes with validation, returning both tokens and validation errors.
pub fn tokenize_with_validation(&mut self) -> (Result<Vec<Token>>, Vec<Error>) {
let validation_errors = self.validate_source();
let tokenize_result = self.tokenize();
(tokenize_result, validation_errors)
}
}