ass_core/tokenizer/
state.rs

1//! Tokenizer state management and issue reporting
2//!
3//! Provides context tracking and error reporting for the ASS tokenizer.
4//! Maintains parsing state and accumulates issues during lexical analysis.
5
6use alloc::{format, string::String, vec::Vec};
7
8#[cfg(not(feature = "std"))]
9extern crate alloc;
10/// Tokenization context for state-aware parsing
11///
12/// Tracks current parsing context to enable context-sensitive tokenization
13/// of ASS script elements that have different lexical rules in different
14/// contexts.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum TokenContext {
17    /// Top-level document parsing
18    ///
19    /// Default state for processing section headers, comments, and
20    /// top-level document structure.
21    Document,
22
23    /// Inside section header like `[Events]`
24    ///
25    /// Special tokenization rules for section names within square brackets.
26    SectionHeader,
27
28    /// Inside field definition line
29    ///
30    /// Field values have different whitespace and delimiter handling than
31    /// other contexts.
32    FieldValue,
33
34    /// Inside style override block like {\b1}
35    ///
36    /// Override tags use backslash prefixes and have special syntax rules.
37    StyleOverride,
38
39    /// Inside drawing commands (\p1)
40    ///
41    /// Drawing commands use vector graphics syntax with different
42    /// coordinate and command parsing rules.
43    DrawingCommands,
44
45    /// Inside UU-encoded data (fonts/graphics)
46    ///
47    /// Binary data sections use different character validation and
48    /// line parsing rules.
49    UuEncodedData,
50}
51
52impl TokenContext {
53    /// Check if context allows whitespace skipping
54    #[must_use]
55    pub const fn allows_whitespace_skipping(self) -> bool {
56        !matches!(self, Self::FieldValue | Self::UuEncodedData)
57    }
58
59    /// Check if context is inside a delimited block
60    #[must_use]
61    pub const fn is_delimited_block(self) -> bool {
62        matches!(self, Self::SectionHeader | Self::StyleOverride)
63    }
64
65    /// Get expected closing delimiter for context
66    #[must_use]
67    pub const fn closing_delimiter(self) -> Option<char> {
68        match self {
69            Self::SectionHeader => Some(']'),
70            Self::StyleOverride => Some('}'),
71            _ => None,
72        }
73    }
74
75    /// Transition to field value context after colon
76    #[must_use]
77    pub const fn enter_field_value(self) -> Self {
78        match self {
79            Self::Document => Self::FieldValue,
80            other => other,
81        }
82    }
83
84    /// Reset to document context (typically after newline)
85    #[must_use]
86    pub const fn reset_to_document(self) -> Self {
87        Self::Document
88    }
89}
90
91impl Default for TokenContext {
92    fn default() -> Self {
93        Self::Document
94    }
95}
96
97/// Token issue severity levels
98///
99/// Categorizes tokenization issues by severity to enable appropriate
100/// error handling and recovery strategies.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
102pub enum IssueLevel {
103    /// Warning that doesn't prevent tokenization
104    ///
105    /// Indicates potential problems that don't break parsing but may
106    /// indicate authoring errors or compatibility issues.
107    Warning,
108
109    /// Error that may affect parsing
110    ///
111    /// Indicates problems that could cause incorrect parsing but allow
112    /// tokenization to continue with error recovery.
113    Error,
114
115    /// Critical error requiring recovery
116    ///
117    /// Indicates severe problems that require special handling to
118    /// continue tokenization safely.
119    Critical,
120}
121
122impl IssueLevel {
123    /// Check if issue level indicates an error condition
124    #[must_use]
125    pub const fn is_error(self) -> bool {
126        matches!(self, Self::Error | Self::Critical)
127    }
128
129    /// Check if issue level should stop tokenization
130    #[must_use]
131    pub const fn should_abort(self) -> bool {
132        matches!(self, Self::Critical)
133    }
134
135    /// Get string representation for display
136    #[must_use]
137    pub const fn as_str(self) -> &'static str {
138        match self {
139            Self::Warning => "warning",
140            Self::Error => "error",
141            Self::Critical => "critical",
142        }
143    }
144}
145
146/// Tokenization issue for error reporting
147///
148/// Represents a problem encountered during tokenization with location
149/// information and severity level for appropriate handling.
150#[derive(Debug, Clone, PartialEq, Eq)]
151pub struct TokenIssue<'a> {
152    /// Issue severity level
153    pub level: IssueLevel,
154
155    /// Human-readable error message
156    pub message: String,
157
158    /// Source span where issue occurred
159    pub span: &'a str,
160
161    /// Line number where issue occurred (1-based)
162    pub line: usize,
163
164    /// Column number where issue occurred (1-based)
165    pub column: usize,
166}
167
168impl<'a> TokenIssue<'a> {
169    /// Create new tokenization issue
170    ///
171    /// # Arguments
172    ///
173    /// * `level` - Severity level of the issue
174    /// * `message` - Human-readable description
175    /// * `span` - Source text span where issue occurred
176    /// * `line` - Line number (1-based)
177    /// * `column` - Column number (1-based)
178    #[must_use]
179    pub const fn new(
180        level: IssueLevel,
181        message: String,
182        span: &'a str,
183        line: usize,
184        column: usize,
185    ) -> Self {
186        Self {
187            level,
188            message,
189            span,
190            line,
191            column,
192        }
193    }
194
195    /// Create warning issue
196    #[must_use]
197    pub const fn warning(message: String, span: &'a str, line: usize, column: usize) -> Self {
198        Self::new(IssueLevel::Warning, message, span, line, column)
199    }
200
201    /// Create error issue
202    #[must_use]
203    pub const fn error(message: String, span: &'a str, line: usize, column: usize) -> Self {
204        Self::new(IssueLevel::Error, message, span, line, column)
205    }
206
207    /// Create critical issue
208    #[must_use]
209    pub const fn critical(message: String, span: &'a str, line: usize, column: usize) -> Self {
210        Self::new(IssueLevel::Critical, message, span, line, column)
211    }
212
213    /// Check if this is an error-level issue
214    #[must_use]
215    pub const fn is_error(&self) -> bool {
216        self.level.is_error()
217    }
218
219    /// Get formatted location string
220    #[must_use]
221    pub fn location_string(&self) -> String {
222        format!("{}:{}", self.line, self.column)
223    }
224
225    /// Get formatted issue string for display
226    #[must_use]
227    pub fn format_issue(&self) -> String {
228        format!(
229            "{}: {} at {}:{}",
230            self.level.as_str(),
231            self.message,
232            self.line,
233            self.column
234        )
235    }
236}
237
238/// Issue collector for accumulating tokenization problems
239///
240/// Provides convenient methods for collecting and managing tokenization
241/// issues during lexical analysis.
242#[derive(Debug, Clone, Default)]
243pub struct IssueCollector<'a> {
244    /// Collection of tokenization issues found during parsing
245    issues: Vec<TokenIssue<'a>>,
246}
247
248impl<'a> IssueCollector<'a> {
249    /// Create new empty issue collector
250    #[must_use]
251    pub const fn new() -> Self {
252        Self { issues: Vec::new() }
253    }
254
255    /// Add issue to collection
256    pub fn add_issue(&mut self, issue: TokenIssue<'a>) {
257        self.issues.push(issue);
258    }
259
260    /// Add warning issue
261    pub fn add_warning(&mut self, message: String, span: &'a str, line: usize, column: usize) {
262        self.add_issue(TokenIssue::warning(message, span, line, column));
263    }
264
265    /// Add error issue
266    pub fn add_error(&mut self, message: String, span: &'a str, line: usize, column: usize) {
267        self.add_issue(TokenIssue::error(message, span, line, column));
268    }
269
270    /// Add critical issue
271    pub fn add_critical(&mut self, message: String, span: &'a str, line: usize, column: usize) {
272        self.add_issue(TokenIssue::critical(message, span, line, column));
273    }
274
275    /// Get all collected issues
276    #[must_use]
277    pub fn issues(&self) -> &[TokenIssue<'a>] {
278        &self.issues
279    }
280
281    /// Check if any issues were collected
282    #[must_use]
283    pub fn has_issues(&self) -> bool {
284        !self.issues.is_empty()
285    }
286
287    /// Check if any error-level issues were collected
288    pub fn has_errors(&self) -> bool {
289        self.issues.iter().any(TokenIssue::is_error)
290    }
291
292    /// Get count of issues
293    #[must_use]
294    pub fn issue_count(&self) -> usize {
295        self.issues.len()
296    }
297
298    /// Clear all issues
299    pub fn clear(&mut self) {
300        self.issues.clear();
301    }
302
303    /// Take all issues, leaving collector empty
304    pub fn take_issues(&mut self) -> Vec<TokenIssue<'a>> {
305        core::mem::take(&mut self.issues)
306    }
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312    #[cfg(not(feature = "std"))]
313    use alloc::string::ToString;
314
315    #[test]
316    fn token_context_transitions() {
317        let mut context = TokenContext::Document;
318        assert!(context.allows_whitespace_skipping());
319        assert!(!context.is_delimited_block());
320
321        context = context.enter_field_value();
322        assert_eq!(context, TokenContext::FieldValue);
323        assert!(!context.allows_whitespace_skipping());
324
325        context = context.reset_to_document();
326        assert_eq!(context, TokenContext::Document);
327    }
328
329    #[test]
330    fn token_context_delimiters() {
331        assert_eq!(TokenContext::SectionHeader.closing_delimiter(), Some(']'));
332        assert_eq!(TokenContext::StyleOverride.closing_delimiter(), Some('}'));
333        assert_eq!(TokenContext::Document.closing_delimiter(), None);
334    }
335
336    #[test]
337    fn issue_level_properties() {
338        assert!(!IssueLevel::Warning.is_error());
339        assert!(IssueLevel::Error.is_error());
340        assert!(IssueLevel::Critical.is_error());
341
342        assert!(!IssueLevel::Warning.should_abort());
343        assert!(!IssueLevel::Error.should_abort());
344        assert!(IssueLevel::Critical.should_abort());
345    }
346
347    #[test]
348    fn token_issue_creation() {
349        let span = "test span";
350        let issue = TokenIssue::warning("Test warning".to_string(), span, 5, 10);
351
352        assert_eq!(issue.level, IssueLevel::Warning);
353        assert_eq!(issue.message, "Test warning");
354        assert_eq!(issue.span, span);
355        assert_eq!(issue.line, 5);
356        assert_eq!(issue.column, 10);
357        assert!(!issue.is_error());
358    }
359
360    #[test]
361    fn issue_collector_operations() {
362        let mut collector = IssueCollector::new();
363        assert!(!collector.has_issues());
364        assert!(!collector.has_errors());
365
366        collector.add_warning("Warning".to_string(), "span", 1, 1);
367        assert!(collector.has_issues());
368        assert!(!collector.has_errors());
369
370        collector.add_error("Error".to_string(), "span", 2, 2);
371        assert!(collector.has_errors());
372        assert_eq!(collector.issue_count(), 2);
373
374        let issues = collector.take_issues();
375        assert_eq!(issues.len(), 2);
376        assert!(!collector.has_issues());
377    }
378
379    #[test]
380    fn token_context_all_variants() {
381        // Test all TokenContext variants for is_delimited_block
382        assert!(!TokenContext::Document.is_delimited_block());
383        assert!(TokenContext::SectionHeader.is_delimited_block());
384        assert!(!TokenContext::FieldValue.is_delimited_block());
385        assert!(TokenContext::StyleOverride.is_delimited_block());
386        assert!(!TokenContext::DrawingCommands.is_delimited_block());
387        assert!(!TokenContext::UuEncodedData.is_delimited_block());
388    }
389
390    #[test]
391    fn token_context_whitespace_skipping_all_variants() {
392        // Test all TokenContext variants for allows_whitespace_skipping
393        assert!(TokenContext::Document.allows_whitespace_skipping());
394        assert!(TokenContext::SectionHeader.allows_whitespace_skipping());
395        assert!(!TokenContext::FieldValue.allows_whitespace_skipping());
396        assert!(TokenContext::StyleOverride.allows_whitespace_skipping());
397        assert!(TokenContext::DrawingCommands.allows_whitespace_skipping());
398        assert!(!TokenContext::UuEncodedData.allows_whitespace_skipping());
399    }
400
401    #[test]
402    fn token_context_closing_delimiters_all_variants() {
403        // Test all TokenContext variants for closing_delimiter
404        assert_eq!(TokenContext::Document.closing_delimiter(), None);
405        assert_eq!(TokenContext::SectionHeader.closing_delimiter(), Some(']'));
406        assert_eq!(TokenContext::FieldValue.closing_delimiter(), None);
407        assert_eq!(TokenContext::StyleOverride.closing_delimiter(), Some('}'));
408        assert_eq!(TokenContext::DrawingCommands.closing_delimiter(), None);
409        assert_eq!(TokenContext::UuEncodedData.closing_delimiter(), None);
410    }
411
412    #[test]
413    fn token_context_enter_field_value_all_variants() {
414        // Test enter_field_value from all contexts
415        assert_eq!(
416            TokenContext::Document.enter_field_value(),
417            TokenContext::FieldValue
418        );
419        assert_eq!(
420            TokenContext::SectionHeader.enter_field_value(),
421            TokenContext::SectionHeader
422        );
423        assert_eq!(
424            TokenContext::FieldValue.enter_field_value(),
425            TokenContext::FieldValue
426        );
427        assert_eq!(
428            TokenContext::StyleOverride.enter_field_value(),
429            TokenContext::StyleOverride
430        );
431        assert_eq!(
432            TokenContext::DrawingCommands.enter_field_value(),
433            TokenContext::DrawingCommands
434        );
435        assert_eq!(
436            TokenContext::UuEncodedData.enter_field_value(),
437            TokenContext::UuEncodedData
438        );
439    }
440
441    #[test]
442    fn token_context_reset_to_document_all_variants() {
443        // Test reset_to_document from all contexts
444        assert_eq!(
445            TokenContext::Document.reset_to_document(),
446            TokenContext::Document
447        );
448        assert_eq!(
449            TokenContext::SectionHeader.reset_to_document(),
450            TokenContext::Document
451        );
452        assert_eq!(
453            TokenContext::FieldValue.reset_to_document(),
454            TokenContext::Document
455        );
456        assert_eq!(
457            TokenContext::StyleOverride.reset_to_document(),
458            TokenContext::Document
459        );
460        assert_eq!(
461            TokenContext::DrawingCommands.reset_to_document(),
462            TokenContext::Document
463        );
464        assert_eq!(
465            TokenContext::UuEncodedData.reset_to_document(),
466            TokenContext::Document
467        );
468    }
469
470    #[test]
471    fn token_context_default() {
472        assert_eq!(TokenContext::default(), TokenContext::Document);
473    }
474
475    #[test]
476    fn issue_level_as_str() {
477        assert_eq!(IssueLevel::Warning.as_str(), "warning");
478        assert_eq!(IssueLevel::Error.as_str(), "error");
479        assert_eq!(IssueLevel::Critical.as_str(), "critical");
480    }
481
482    #[test]
483    fn token_issue_all_constructors() {
484        let span = "test span";
485
486        let warning = TokenIssue::warning("Warning message".to_string(), span, 10, 5);
487        assert_eq!(warning.level, IssueLevel::Warning);
488        assert_eq!(warning.message, "Warning message");
489        assert!(!warning.is_error());
490
491        let error = TokenIssue::error("Error message".to_string(), span, 15, 8);
492        assert_eq!(error.level, IssueLevel::Error);
493        assert_eq!(error.message, "Error message");
494        assert!(error.is_error());
495
496        let critical = TokenIssue::critical("Critical message".to_string(), span, 20, 12);
497        assert_eq!(critical.level, IssueLevel::Critical);
498        assert_eq!(critical.message, "Critical message");
499        assert!(critical.is_error());
500    }
501
502    #[test]
503    fn token_issue_location_string() {
504        let issue = TokenIssue::new(IssueLevel::Warning, "Test".to_string(), "span", 42, 13);
505        assert_eq!(issue.location_string(), "42:13");
506    }
507
508    #[test]
509    fn token_issue_format_issue() {
510        let issue = TokenIssue::error("Test error message".to_string(), "span", 5, 10);
511        let formatted = issue.format_issue();
512        assert!(formatted.contains("error"));
513        assert!(formatted.contains("Test error message"));
514        assert!(formatted.contains("5:10"));
515    }
516
517    #[test]
518    fn issue_collector_new_vs_default() {
519        let collector1 = IssueCollector::new();
520        let collector2 = IssueCollector::default();
521
522        assert_eq!(collector1.issue_count(), collector2.issue_count());
523        assert_eq!(collector1.has_issues(), collector2.has_issues());
524    }
525
526    #[test]
527    fn issue_collector_add_issue_directly() {
528        let mut collector = IssueCollector::new();
529        let issue = TokenIssue::warning("Direct issue".to_string(), "span", 1, 1);
530
531        collector.add_issue(issue.clone());
532        assert_eq!(collector.issue_count(), 1);
533        assert_eq!(collector.issues()[0], issue);
534    }
535
536    #[test]
537    fn issue_collector_add_critical() {
538        let mut collector = IssueCollector::new();
539        collector.add_critical("Critical issue".to_string(), "span", 3, 7);
540
541        assert!(collector.has_issues());
542        assert!(collector.has_errors());
543        assert_eq!(collector.issues()[0].level, IssueLevel::Critical);
544        assert!(collector.issues()[0].level.should_abort());
545    }
546
547    #[test]
548    fn issue_collector_clear() {
549        let mut collector = IssueCollector::new();
550        collector.add_warning("Warning".to_string(), "span", 1, 1);
551        collector.add_error("Error".to_string(), "span", 2, 2);
552
553        assert!(collector.has_issues());
554        assert_eq!(collector.issue_count(), 2);
555
556        collector.clear();
557        assert!(!collector.has_issues());
558        assert_eq!(collector.issue_count(), 0);
559    }
560
561    #[test]
562    fn issue_collector_mixed_issue_types() {
563        let mut collector = IssueCollector::new();
564
565        collector.add_warning("First warning".to_string(), "span1", 1, 1);
566        collector.add_error("First error".to_string(), "span2", 2, 2);
567        collector.add_critical("Critical issue".to_string(), "span3", 3, 3);
568        collector.add_warning("Second warning".to_string(), "span4", 4, 4);
569
570        assert_eq!(collector.issue_count(), 4);
571        assert!(collector.has_issues());
572        assert!(collector.has_errors());
573
574        let issues = collector.issues();
575        assert_eq!(issues[0].level, IssueLevel::Warning);
576        assert_eq!(issues[1].level, IssueLevel::Error);
577        assert_eq!(issues[2].level, IssueLevel::Critical);
578        assert_eq!(issues[3].level, IssueLevel::Warning);
579    }
580
581    #[test]
582    fn token_issue_equality() {
583        let issue1 = TokenIssue::warning("Same message".to_string(), "same span", 5, 10);
584        let issue2 = TokenIssue::warning("Same message".to_string(), "same span", 5, 10);
585        let issue3 = TokenIssue::error("Same message".to_string(), "same span", 5, 10);
586
587        assert_eq!(issue1, issue2);
588        assert_ne!(issue1, issue3); // Different levels
589    }
590
591    #[test]
592    fn issue_level_clone_and_copy() {
593        let level1 = IssueLevel::Warning;
594        let level2 = level1;
595        let level3 = level1;
596
597        assert_eq!(level1, level2);
598        assert_eq!(level1, level3);
599    }
600
601    #[test]
602    fn token_context_clone_and_copy() {
603        let context1 = TokenContext::StyleOverride;
604        let context2 = context1;
605        let context3 = context1;
606
607        assert_eq!(context1, context2);
608        assert_eq!(context1, context3);
609    }
610}
ass_core/tokenizer/state.rs

ass_core/tokenizer/
state.rs