Skip to main content

ass_core/tokenizer/scanner/
navigator.rs

1//! Character navigation helper for the ASS tokenizer.
2//!
3//! Provides character-level navigation with position tracking and lookahead
4//! capabilities for efficient tokenization of ASS subtitle scripts.
5
6use crate::{utils::CoreError, Result};
7use alloc::{format, string::ToString};
8use core::str::Chars;
9
10#[cfg(not(feature = "std"))]
11extern crate alloc;
12
13/// Character navigation helper for tokenizer
14///
15/// Provides character-level navigation with position tracking and
16/// lookahead capabilities for efficient tokenization.
17#[derive(Debug, Clone)]
18pub struct CharNavigator<'a> {
19    /// Source text being scanned
20    source: &'a str,
21    /// Current byte position in source
22    pub(super) position: usize,
23    /// Current line number (1-based)
24    line: usize,
25    /// Current column number (1-based)
26    column: usize,
27    /// Character iterator for the source
28    pub(super) chars: Chars<'a>,
29    /// Lookahead character for peeking
30    pub(super) peek_char: Option<char>,
31    /// Last character processed (for \r\n handling)
32    pub(super) last_char: Option<char>,
33}
34
35impl<'a> CharNavigator<'a> {
36    /// Create new character navigator
37    #[must_use]
38    pub fn new(source: &'a str, position: usize, line: usize, column: usize) -> Self {
39        Self {
40            source,
41            position,
42            line,
43            column,
44            chars: source[position..].chars(),
45            peek_char: None,
46            last_char: None,
47        }
48    }
49
50    /// Get current position
51    #[must_use]
52    pub const fn position(&self) -> usize {
53        self.position
54    }
55
56    /// Get current line
57    #[must_use]
58    pub const fn line(&self) -> usize {
59        self.line
60    }
61
62    /// Get current column
63    #[must_use]
64    pub const fn column(&self) -> usize {
65        self.column
66    }
67
68    /// Peek at current character without advancing
69    ///
70    /// # Errors
71    ///
72    /// Returns an error if the current position contains invalid UTF-8 or is at end of input.
73    pub fn peek_char(&mut self) -> Result<char> {
74        if let Some(ch) = self.peek_char {
75            Ok(ch)
76        } else if self.position < self.source.len() {
77            let ch = self.source[self.position..].chars().next().ok_or_else(|| {
78                CoreError::parse(format!("Invalid UTF-8 at position {}", self.position))
79            })?;
80            self.peek_char = Some(ch);
81            Ok(ch)
82        } else {
83            Err(CoreError::parse("Unexpected end of input".to_string()))
84        }
85    }
86
87    /// Peek at next character without advancing
88    ///
89    /// # Errors
90    ///
91    /// Returns an error if the next position is at end of input.
92    pub fn peek_next(&self) -> Result<char> {
93        let mut chars = self.source[self.position..].chars();
94        chars.next(); // Skip current
95        chars
96            .next()
97            .ok_or_else(|| CoreError::parse("Unexpected end of input".to_string()))
98    }
99
100    /// Advance by one character
101    ///
102    /// # Errors
103    ///
104    /// Returns an error if unable to peek at the current character.
105    pub fn advance_char(&mut self) -> Result<char> {
106        let ch = self.peek_char()?;
107        self.peek_char = None;
108
109        let _ = self.chars.next();
110        self.position += ch.len_utf8();
111
112        match ch {
113            '\r' => {
114                self.line += 1;
115                self.column = 1;
116            }
117            '\n' => {
118                // Only increment line if previous char wasn't \r (to handle \r\n properly)
119                if self.last_char != Some('\r') {
120                    self.line += 1;
121                }
122                self.column = 1;
123            }
124            _ => {
125                self.column += 1;
126            }
127        }
128
129        self.last_char = Some(ch);
130        Ok(ch)
131    }
132
133    /// Skip whitespace (excluding newlines)
134    pub fn skip_whitespace(&mut self) {
135        while self.position < self.source.len() {
136            if let Ok(ch) = self.peek_char() {
137                if ch.is_whitespace() && ch != '\n' && ch != '\r' {
138                    let _ = self.advance_char();
139                } else {
140                    break;
141                }
142            } else {
143                break;
144            }
145        }
146    }
147
148    /// Check if at end of source
149    #[must_use]
150    pub const fn is_at_end(&self) -> bool {
151        self.position >= self.source.len()
152    }
153}