1use crate::{
7 error::{BudgetTracker, ParseBudget},
8 error_recovery::ParseError,
9 position::{Position, Range},
10 token_wrapper::TokenWithPosition,
11};
12use perl_ast_v2::NodeIdGenerator;
13use perl_lexer::TokenType;
14use perl_position_tracking::LineStartsCache;
15use std::collections::VecDeque;
16
17pub struct ParserContext {
19 tokens: VecDeque<TokenWithPosition>,
21 current: usize,
23 pub id_generator: NodeIdGenerator,
25 errors: Vec<ParseError>,
27 source: String,
29 _position_tracker: PositionTracker,
31 budget: ParseBudget,
33 budget_tracker: BudgetTracker,
35}
36
37struct PositionTracker {
43 line_cache: LineStartsCache,
45 source: String,
47}
48
49impl PositionTracker {
50 fn new(source: String) -> Self {
51 let line_cache = LineStartsCache::new(&source);
52 PositionTracker { line_cache, source }
53 }
54
55 fn byte_to_position(&self, byte_offset: usize) -> Position {
57 let (line, character) = self.line_cache.offset_to_position(&self.source, byte_offset);
58 Position::new(byte_offset, line + 1, character + 1)
60 }
61}
62
63impl ParserContext {
64 pub fn new(source: String) -> Self {
66 let mut tokens = VecDeque::new();
67 let position_tracker = PositionTracker::new(source.clone());
68
69 let mut lexer = perl_lexer::PerlLexer::new(&source);
71 loop {
72 match lexer.next_token() {
73 Some(token) => {
74 if matches!(token.token_type, TokenType::EOF) {
76 break;
77 }
78
79 let start = token.start;
80 let end = token.end;
81
82 let start_pos = position_tracker.byte_to_position(start);
84 let end_pos = position_tracker.byte_to_position(end);
85
86 tokens.push_back(TokenWithPosition::new(token, start_pos, end_pos));
87 }
88 None => break,
89 }
90 }
91
92 ParserContext {
93 tokens,
94 current: 0,
95 id_generator: NodeIdGenerator::new(),
96 errors: Vec::new(),
97 source,
98 _position_tracker: position_tracker,
99 budget: ParseBudget::default(),
100 budget_tracker: BudgetTracker::new(),
101 }
102 }
103
104 pub fn with_budget(source: String, budget: ParseBudget) -> Self {
106 let mut ctx = Self::new(source);
107 ctx.budget = budget;
108 ctx
109 }
110
111 pub fn budget(&self) -> &ParseBudget {
113 &self.budget
114 }
115
116 pub fn budget_tracker(&self) -> &BudgetTracker {
118 &self.budget_tracker
119 }
120
121 pub fn budget_tracker_mut(&mut self) -> &mut BudgetTracker {
123 &mut self.budget_tracker
124 }
125
126 pub fn errors_exhausted(&self) -> bool {
128 self.budget_tracker.errors_exhausted(&self.budget)
129 }
130
131 pub fn depth_would_exceed(&self) -> bool {
133 self.budget_tracker.depth_would_exceed(&self.budget)
134 }
135
136 pub fn enter_depth(&mut self) -> bool {
138 if self.depth_would_exceed() {
139 return false;
140 }
141 self.budget_tracker.enter_depth();
142 true
143 }
144
145 pub fn exit_depth(&mut self) {
147 self.budget_tracker.exit_depth();
148 }
149
150 pub fn current_token(&self) -> Option<&TokenWithPosition> {
152 self.tokens.get(self.current)
153 }
154
155 pub fn peek_token(&self, offset: usize) -> Option<&TokenWithPosition> {
157 self.tokens.get(self.current + offset)
158 }
159
160 pub fn advance(&mut self) -> Option<&TokenWithPosition> {
162 if self.current < self.tokens.len() {
163 self.current += 1;
164 }
165 self.current_token()
166 }
167
168 pub fn is_eof(&self) -> bool {
170 self.current >= self.tokens.len()
171 }
172
173 pub fn current_position(&self) -> Position {
175 if let Some(token) = self.current_token() {
176 token.range().start
177 } else if let Some(last_token) = self.tokens.back() {
178 last_token.range().end
180 } else {
181 Position::new(0, 1, 1)
182 }
183 }
184
185 pub fn current_position_range(&self) -> Range {
187 if let Some(token) = self.current_token() {
188 token.range()
189 } else {
190 let pos = self.current_position();
191 Range::new(pos, pos)
192 }
193 }
194
195 pub fn add_error(&mut self, error: ParseError) -> bool {
199 if self.errors_exhausted() {
200 return false;
201 }
202 self.errors.push(error);
203 self.budget_tracker.record_error();
204 true
205 }
206
207 pub fn add_error_unchecked(&mut self, error: ParseError) {
209 self.errors.push(error);
210 self.budget_tracker.record_error();
211 }
212
213 pub fn take_errors(&mut self) -> Vec<ParseError> {
215 std::mem::take(&mut self.errors)
216 }
217
218 pub fn current_index(&self) -> usize {
220 self.current
221 }
222
223 pub fn set_index(&mut self, index: usize) {
225 self.current = index.min(self.tokens.len());
226 }
227
228 pub fn expect(&mut self, expected: TokenType) -> Result<&TokenWithPosition, ParseError> {
230 match self.current_token() {
231 Some(token) if token.token.token_type == expected => {
232 self.advance();
233 Ok(&self.tokens[self.current - 1])
234 }
235 Some(token) => Err(ParseError::new(
236 format!("Expected {:?}, found {:?}", expected, token.token.token_type),
237 token.range(),
238 )
239 .with_expected(vec![format!("{:?}", expected)])
240 .with_found(format!("{:?}", token.token.token_type))),
241 None => Err(ParseError::new(
242 format!("Expected {:?}, found end of file", expected),
243 self.current_position_range(),
244 )
245 .with_expected(vec![format!("{:?}", expected)])
246 .with_found("EOF".to_string())),
247 }
248 }
249
250 pub fn check(&self, token_type: &TokenType) -> bool {
252 self.current_token().map(|t| &t.token.token_type == token_type).unwrap_or(false)
253 }
254
255 pub fn consume(&mut self, token_type: &TokenType) -> bool {
257 if self.check(token_type) {
258 self.advance();
259 true
260 } else {
261 false
262 }
263 }
264
265 pub fn source_slice(&self, range: &Range) -> &str {
267 &self.source[range.start.byte..range.end.byte]
268 }
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274 use perl_tdd_support::must_some;
275
276 #[test]
277 fn test_parser_context_creation() {
278 let source = "my $x = 42;".to_string();
279 let ctx = ParserContext::new(source);
280
281 assert!(!ctx.is_eof());
282 assert!(!ctx.tokens.is_empty());
283 }
284
285 #[test]
286 fn test_token_advancement() {
287 let source = "my $x".to_string();
288 let mut ctx = ParserContext::new(source);
289
290 assert!(matches!(
292 ctx.current_token().map(|t| &t.token.token_type),
293 Some(TokenType::Keyword(k)) if k.as_ref() == "my"
294 ));
295
296 ctx.advance();
298 assert!(ctx.current_token().is_some());
299 }
300
301 #[test]
302 fn test_error_accumulation() {
303 let mut ctx = ParserContext::new("test".to_string());
304
305 let error1 = ParseError::new("Error 1".to_string(), ctx.current_position_range());
306 let error2 = ParseError::new("Error 2".to_string(), ctx.current_position_range());
307
308 ctx.add_error(error1);
309 ctx.add_error(error2);
310
311 let errors = ctx.take_errors();
312 assert_eq!(errors.len(), 2);
313 assert_eq!(errors[0].message, "Error 1");
314 assert_eq!(errors[1].message, "Error 2");
315 }
316
317 #[test]
318 fn test_multiline_positions() {
319 let source = "my $x = 42;\nmy $y = 43;".to_string();
320 let ctx = ParserContext::new(source.clone());
321
322 let first_offset = must_some(source.find("my"));
323 let second_offset = must_some(source.rfind("my"));
324
325 let first = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == first_offset));
326 assert_eq!(first.range().start.line, 1);
327 assert_eq!(first.range().start.column, 1);
328 assert_eq!(first.range().end.line, 1);
329 assert_eq!(first.range().end.column, 3);
330
331 let second = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == second_offset));
332 assert_eq!(second.range().start.line, 2);
333 assert_eq!(second.range().start.column, 1);
334 assert_eq!(second.range().end.line, 2);
335 assert_eq!(second.range().end.column, 3);
336 }
337
338 #[test]
339 fn test_multiline_string_token_positions() {
340 let source = "my $s = \"a\nb\";".to_string();
341 let ctx = ParserContext::new(source.clone());
342
343 let string_offset = must_some(source.find('"'));
344 let token = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == string_offset));
345
346 assert_eq!(token.range().start.line, 1);
347 assert_eq!(token.range().start.column, 9);
348 assert_eq!(token.range().end.line, 2);
349 assert_eq!(token.range().end.column, 3);
350 }
351
352 #[test]
353 fn test_utf16_position_mapping() {
354 let source = "my $emoji = 😀;".to_string();
356 let ctx = ParserContext::new(source.clone());
357
358 let equals_offset = must_some(source.find('='));
361 let equals_token =
362 must_some(ctx.tokens.iter().find(|t| t.range().start.byte == equals_offset));
363
364 assert_eq!(equals_token.range().start.line, 1);
367 assert!(equals_token.range().start.column > 0);
369 }
370
371 #[test]
372 fn test_crlf_line_endings() {
373 let source = "my $x = 42;\r\nmy $y = 43;".to_string();
374 let ctx = ParserContext::new(source.clone());
375
376 let first_offset = must_some(source.find("my"));
377 let second_offset = must_some(source.rfind("my"));
378
379 let first = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == first_offset));
380 assert_eq!(first.range().start.line, 1);
381 assert_eq!(first.range().start.column, 1);
382
383 let second = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == second_offset));
384 assert_eq!(second.range().start.line, 2);
385 assert_eq!(second.range().start.column, 1);
386 }
387
388 #[test]
389 fn test_empty_source() {
390 let source = "".to_string();
391 let ctx = ParserContext::new(source);
392
393 assert!(ctx.tokens.is_empty());
394 assert!(ctx.is_eof());
395 }
396
397 #[test]
398 fn test_single_token() {
399 let source = "42".to_string();
400 let ctx = ParserContext::new(source);
401
402 assert_eq!(ctx.tokens.len(), 1);
403
404 let token = &ctx.tokens[0];
405 assert_eq!(token.range().start.byte, 0);
406 assert_eq!(token.range().start.line, 1);
407 assert_eq!(token.range().start.column, 1);
408 assert_eq!(token.range().end.byte, 2);
409 assert_eq!(token.range().end.line, 1);
410 assert_eq!(token.range().end.column, 3);
411 }
412}