Skip to main content

ass_core/parser/streaming/
parser.rs

1//! High-performance streaming parser state machine
2//!
3//! Defines [`StreamingParser`], which incrementally consumes input chunks,
4//! buffers partial lines, and dispatches complete lines to the
5//! [`super::LineProcessor`] before producing a [`super::StreamingResult`].
6
7use super::{LineProcessor, ParseDelta, StreamingResult};
8use crate::{utils::CoreError, Result, ScriptVersion};
9use alloc::{
10    format,
11    string::{String, ToString},
12    vec::Vec,
13};
14
15/// High-performance streaming parser for ASS scripts
16///
17/// Processes input chunks incrementally using a state machine approach.
18/// Supports partial lines, incomplete sections, and memory-efficient parsing.
19pub struct StreamingParser {
20    /// Line processor for parsing individual lines
21    processor: LineProcessor,
22    /// Buffer for incomplete lines
23    pub(super) buffer: String,
24    /// Parsed sections in document order
25    pub(super) sections: Vec<String>,
26
27    #[cfg(feature = "benches")]
28    /// Peak memory usage for benchmarking
29    peak_memory: usize,
30}
31
32impl StreamingParser {
33    /// Create new streaming parser
34    #[must_use]
35    pub const fn new() -> Self {
36        Self {
37            processor: LineProcessor::new(),
38            buffer: String::new(),
39            sections: Vec::new(),
40
41            #[cfg(feature = "benches")]
42            peak_memory: 0,
43        }
44    }
45
46    /// Create parser with custom capacity
47    #[must_use]
48    pub fn with_capacity(capacity: usize) -> Self {
49        Self {
50            processor: LineProcessor::new(),
51            buffer: String::new(),
52            sections: Vec::with_capacity(capacity),
53
54            #[cfg(feature = "benches")]
55            peak_memory: 0,
56        }
57    }
58
59    /// Feed chunk of data to parser
60    ///
61    /// # Errors
62    ///
63    /// Returns an error if the chunk contains invalid UTF-8 or parsing fails.
64    pub fn feed_chunk(&mut self, chunk: &[u8]) -> Result<Vec<ParseDelta<'static>>> {
65        if chunk.is_empty() {
66            return Ok(Vec::new());
67        }
68
69        let chunk_str = core::str::from_utf8(chunk)
70            .map_err(|e| CoreError::parse(format!("Invalid UTF-8: {e}")))?;
71
72        self.buffer.push_str(chunk_str);
73
74        let mut all_deltas = Vec::new();
75        let lines: Vec<String> = self.buffer.lines().map(str::to_string).collect();
76        let ends_with_newline = self.buffer.ends_with('\n') || self.buffer.ends_with('\r');
77
78        let complete_lines = if ends_with_newline {
79            lines.len()
80        } else {
81            lines.len().saturating_sub(1)
82        };
83
84        // Process complete lines
85        for line in &lines[..complete_lines] {
86            let deltas = self.processor.process_line(line)?;
87            all_deltas.extend(deltas.into_deltas());
88        }
89
90        // Update buffer with incomplete line
91        if complete_lines < lines.len() {
92            self.buffer.clone_from(&lines[complete_lines]);
93        } else {
94            self.buffer.clear();
95        }
96
97        #[cfg(feature = "benches")]
98        {
99            let current_memory = self.calculate_memory_usage();
100            if current_memory > self.peak_memory {
101                self.peak_memory = current_memory;
102            }
103        }
104
105        Ok(all_deltas)
106    }
107
108    /// Finish parsing and return final result
109    ///
110    /// # Errors
111    ///
112    /// Returns an error if the final line processing fails.
113    pub fn finish(mut self) -> Result<StreamingResult> {
114        if !self.buffer.trim().is_empty() {
115            let _deltas = self.processor.process_line(&self.buffer.clone())?;
116        }
117
118        Ok(StreamingResult {
119            sections: self.sections,
120            version: ScriptVersion::AssV4,
121            issues: Vec::new(),
122        })
123    }
124
125    /// Reset parser state for reuse
126    pub fn reset(&mut self) {
127        self.processor.reset();
128        self.buffer.clear();
129        self.sections.clear();
130
131        #[cfg(feature = "benches")]
132        {
133            self.peak_memory = 0;
134        }
135    }
136
137    /// Get peak memory usage (benchmarks only)
138    #[cfg(feature = "benches")]
139    #[must_use]
140    pub const fn peak_memory(&self) -> usize {
141        self.peak_memory
142    }
143
144    #[cfg(feature = "benches")]
145    /// Calculate current memory usage for benchmarking
146    fn calculate_memory_usage(&self) -> usize {
147        core::mem::size_of::<Self>()
148            + self.buffer.capacity()
149            + self.sections.capacity() * core::mem::size_of::<String>()
150    }
151}
152
153impl Default for StreamingParser {
154    fn default() -> Self {
155        Self::new()
156    }
157}