vt_push_parser/
capture.rs

1//! Raw-input-capturing push parser.
2
3use crate::{VT_PARSER_INTEREST_DEFAULT, VTEvent, VTPushParser};
4
5pub trait VTInputCaptureCallback {
6    fn event(&mut self, event: VTCaptureEvent<'_>) -> VTInputCapture;
7}
8
9impl<F: FnMut(VTCaptureEvent<'_>) -> VTInputCapture> VTInputCaptureCallback for F {
10    #[inline(always)]
11    fn event(&mut self, event: VTCaptureEvent<'_>) -> VTInputCapture {
12        self(event)
13    }
14}
15
16/// The type of capture mode to use after this event has been emitted.
17///
18/// The data will be emitted as a [`VTInputEvent::Captured`] event.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum VTInputCapture {
21    /// No capture mode. This must also be returned from any
22    /// [`VTInputEvent::Captured`] event.
23    None,
24    /// Capture a fixed number of bytes.
25    Count(usize),
26    /// Capture a fixed number of UTF-8 chars.
27    CountUtf8(usize),
28    /// Capture bytes until a terminator is found.
29    Terminator(&'static [u8]),
30}
31
32#[cfg_attr(feature = "serde", derive(serde::Serialize))]
33#[derive(Debug)]
34pub enum VTCaptureEvent<'a> {
35    VTEvent(VTEvent<'a>),
36    Capture(&'a [u8]),
37    CaptureEnd,
38}
39
40enum VTCaptureInternal {
41    None,
42    Count(usize),
43    CountUtf8(usize),
44    Terminator(&'static [u8], usize),
45}
46
47impl VTCaptureInternal {
48    fn feed<'a>(&mut self, input: &mut &'a [u8]) -> Option<&'a [u8]> {
49        match self {
50            VTCaptureInternal::None => None,
51            VTCaptureInternal::Count(count) => {
52                if input.len() >= *count {
53                    let (capture, rest) = input.split_at(*count);
54                    *input = rest;
55                    *self = VTCaptureInternal::None;
56                    Some(capture)
57                } else {
58                    None
59                }
60            }
61            VTCaptureInternal::CountUtf8(count) => {
62                // Count UTF-8 characters, not bytes
63                let mut chars_found = 0;
64                let mut bytes_consumed = 0;
65
66                for (i, &byte) in input.iter().enumerate() {
67                    // Check if this is the start of a new UTF-8 character
68                    if byte & 0xC0 != 0x80 {
69                        // Not a continuation byte
70                        chars_found += 1;
71                        if chars_found == *count {
72                            // We found the nth character, now we need to find where it ends
73                            // by consuming all its continuation bytes
74                            let mut j = i + 1;
75                            while j < input.len() && input[j] & 0xC0 == 0x80 {
76                                j += 1;
77                            }
78                            bytes_consumed = j;
79                            break;
80                        }
81                    }
82                }
83
84                if chars_found == *count {
85                    let (capture, rest) = input.split_at(bytes_consumed);
86                    *input = rest;
87                    *self = VTCaptureInternal::None;
88                    Some(capture)
89                } else {
90                    None
91                }
92            }
93            VTCaptureInternal::Terminator(terminator, found) => {
94                // Ground state
95                if *found == 0 {
96                    if let Some(position) = input.iter().position(|&b| b == terminator[0]) {
97                        // Advance to first match position
98                        *found = 1;
99                        let unmatched = &input[..position];
100                        *input = &input[position + 1..];
101                        return Some(unmatched);
102                    } else {
103                        let unmatched = *input;
104                        *input = &[];
105                        return Some(unmatched);
106                    }
107                }
108
109                // We've already found part of the terminator, so we can continue
110                while *found < terminator.len() {
111                    if input.is_empty() {
112                        return None;
113                    }
114
115                    if input[0] == terminator[*found] {
116                        *found += 1;
117                        *input = &input[1..];
118                    } else {
119                        // Failed a match, so return the part of the terminator we already matched
120                        let old_found = std::mem::take(found);
121                        return Some(&terminator[..old_found]);
122                    }
123                }
124
125                // We've matched the entire terminator
126                *self = VTCaptureInternal::None;
127                None
128            }
129        }
130    }
131}
132
133/// A parser that allows for "capturing" of input data, ie: temporarily
134/// transferring control of the parser to unparsed data events.
135///
136/// This functions in the same way as [`VTPushParser`], but emits
137/// [`VTCaptureEvent`]s instead of [`VTEvent`]s.
138pub struct VTCapturePushParser<const INTEREST: u8 = VT_PARSER_INTEREST_DEFAULT> {
139    parser: VTPushParser<INTEREST>,
140    capture: VTCaptureInternal,
141}
142
143impl Default for VTCapturePushParser {
144    fn default() -> Self {
145        Self::new()
146    }
147}
148
149impl VTCapturePushParser {
150    pub const fn new() -> VTCapturePushParser {
151        VTCapturePushParser::new_with_interest::<VT_PARSER_INTEREST_DEFAULT>()
152    }
153
154    pub const fn new_with_interest<const INTEREST: u8>() -> VTCapturePushParser<INTEREST> {
155        VTCapturePushParser::new_with()
156    }
157}
158
159impl<const INTEREST: u8> VTCapturePushParser<INTEREST> {
160    const fn new_with() -> Self {
161        Self {
162            parser: VTPushParser::new_with(),
163            capture: VTCaptureInternal::None,
164        }
165    }
166
167    pub fn is_ground(&self) -> bool {
168        self.parser.is_ground()
169    }
170
171    pub fn idle(&mut self) -> Option<VTCaptureEvent<'static>> {
172        self.parser.idle().map(VTCaptureEvent::VTEvent)
173    }
174
175    pub fn feed_with<F: VTInputCaptureCallback>(&mut self, mut input: &[u8], mut cb: F) {
176        while !input.is_empty() {
177            match &mut self.capture {
178                VTCaptureInternal::None => {
179                    // Normal parsing mode - feed to the underlying parser
180                    let count = self
181                        .parser
182                        .feed_with_abortable(input, &mut |event: VTEvent| {
183                            let capture_mode = cb.event(VTCaptureEvent::VTEvent(event));
184                            match capture_mode {
185                                VTInputCapture::None => {
186                                    // Stay in normal mode
187                                }
188                                VTInputCapture::Count(count) => {
189                                    self.capture = VTCaptureInternal::Count(count);
190                                }
191                                VTInputCapture::CountUtf8(count) => {
192                                    self.capture = VTCaptureInternal::CountUtf8(count);
193                                }
194                                VTInputCapture::Terminator(terminator) => {
195                                    self.capture = VTCaptureInternal::Terminator(terminator, 0);
196                                }
197                            }
198                            false // Don't abort parsing
199                        });
200
201                    input = &input[count..];
202                }
203                capture => {
204                    // Capture mode - collect data until capture is complete
205                    if let Some(captured_data) = capture.feed(&mut input) {
206                        cb.event(VTCaptureEvent::Capture(captured_data));
207                    }
208
209                    // Check if capture is complete
210                    if matches!(self.capture, VTCaptureInternal::None) {
211                        cb.event(VTCaptureEvent::CaptureEnd);
212                    }
213                }
214            }
215        }
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn test_capture_paste() {
225        let mut output = String::new();
226        let mut parser = VTCapturePushParser::new();
227        parser.feed_with(
228            b"raw\x1b[200~paste\x1b[201~raw",
229            &mut |event: VTCaptureEvent| {
230                output.push_str(&format!("{event:?}\n"));
231                match event {
232                    VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
233                        if csi.params.try_parse::<usize>(0).unwrap_or(0) == 200 {
234                            VTInputCapture::Terminator(b"\x1b[201~")
235                        } else {
236                            VTInputCapture::None
237                        }
238                    }
239                    _ => VTInputCapture::None,
240                }
241            },
242        );
243        assert_eq!(
244            output.trim(),
245            r#"
246VTEvent(Raw('raw'))
247VTEvent(Csi('200', '', '~'))
248Capture([112, 97, 115, 116, 101])
249CaptureEnd
250VTEvent(Raw('raw'))
251"#
252            .trim()
253        );
254    }
255
256    #[test]
257    fn test_capture_count() {
258        let mut output = String::new();
259        let mut parser = VTCapturePushParser::new();
260        parser.feed_with(b"raw\x1b[Xpaste\x1b[Yraw", &mut |event: VTCaptureEvent| {
261            output.push_str(&format!("{event:?}\n"));
262            match event {
263                VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
264                    if csi.final_byte == b'X' {
265                        VTInputCapture::Count(5)
266                    } else {
267                        VTInputCapture::None
268                    }
269                }
270                _ => VTInputCapture::None,
271            }
272        });
273        assert_eq!(
274            output.trim(),
275            r#"
276VTEvent(Raw('raw'))
277VTEvent(Csi('', 'X'))
278Capture([112, 97, 115, 116, 101])
279CaptureEnd
280VTEvent(Csi('', 'Y'))
281VTEvent(Raw('raw'))
282"#
283            .trim()
284        );
285    }
286
287    #[test]
288    fn test_capture_count_utf8_but_ascii() {
289        let mut output = String::new();
290        let mut parser = VTCapturePushParser::new();
291        parser.feed_with(b"raw\x1b[Xpaste\x1b[Yraw", &mut |event: VTCaptureEvent| {
292            output.push_str(&format!("{event:?}\n"));
293            match event {
294                VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
295                    if csi.final_byte == b'X' {
296                        VTInputCapture::CountUtf8(5)
297                    } else {
298                        VTInputCapture::None
299                    }
300                }
301                _ => VTInputCapture::None,
302            }
303        });
304        assert_eq!(
305            output.trim(),
306            r#"
307VTEvent(Raw('raw'))
308VTEvent(Csi('', 'X'))
309Capture([112, 97, 115, 116, 101])
310CaptureEnd
311VTEvent(Csi('', 'Y'))
312VTEvent(Raw('raw'))
313"#
314            .trim()
315        );
316    }
317
318    #[test]
319    fn test_capture_count_utf8() {
320        let mut output = String::new();
321        let mut parser = VTCapturePushParser::new();
322        let input = "raw\u{001b}[X🤖🦕✅😀🕓\u{001b}[Yraw".as_bytes();
323        parser.feed_with(input, &mut |event: VTCaptureEvent| {
324            output.push_str(&format!("{event:?}\n"));
325            match event {
326                VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
327                    if csi.final_byte == b'X' {
328                        VTInputCapture::CountUtf8(5)
329                    } else {
330                        VTInputCapture::None
331                    }
332                }
333                _ => VTInputCapture::None,
334            }
335        });
336        assert_eq!(output.trim(), r#"
337VTEvent(Raw('raw'))
338VTEvent(Csi('', 'X'))
339Capture([240, 159, 164, 150, 240, 159, 166, 149, 226, 156, 133, 240, 159, 152, 128, 240, 159, 149, 147])
340CaptureEnd
341VTEvent(Csi('', 'Y'))
342VTEvent(Raw('raw'))
343"#.trim());
344    }
345
346    #[test]
347    fn test_capture_terminator_partial_match() {
348        let mut output = String::new();
349        let mut parser = VTCapturePushParser::new();
350
351        parser.feed_with(
352            b"start\x1b[200~part\x1b[201ial\x1b[201~end",
353            &mut |event: VTCaptureEvent| {
354                output.push_str(&format!("{event:?}\n"));
355                match event {
356                    VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
357                        if csi.final_byte == b'~'
358                            && csi.params.try_parse::<usize>(0).unwrap_or(0) == 200
359                        {
360                            VTInputCapture::Terminator(b"\x1b[201~")
361                        } else {
362                            VTInputCapture::None
363                        }
364                    }
365                    _ => VTInputCapture::None,
366                }
367            },
368        );
369
370        assert_eq!(
371            output.trim(),
372            r#"VTEvent(Raw('start'))
373VTEvent(Csi('200', '', '~'))
374Capture([112, 97, 114, 116])
375Capture([27, 91, 50, 48, 49])
376Capture([105, 97, 108])
377CaptureEnd
378VTEvent(Raw('end'))"#
379        );
380    }
381
382    #[test]
383    fn test_capture_terminator_partial_match_single_byte() {
384        let input = b"start\x1b[200~part\x1b[201ial\x1b[201~end";
385
386        for chunk_size in 1..5 {
387            let (captured, output) = capture_chunk_size(input, chunk_size);
388            assert_eq!(captured, b"part\x1b[201ial", "{output}",);
389        }
390    }
391
392    fn capture_chunk_size(input: &'static [u8; 32], chunk_size: usize) -> (Vec<u8>, String) {
393        let mut output = String::new();
394        let mut parser = VTCapturePushParser::new();
395        let mut captured = Vec::new();
396        for chunk in input.chunks(chunk_size) {
397            parser.feed_with(chunk, &mut |event: VTCaptureEvent| {
398                output.push_str(&format!("{event:?}\n"));
399                match event {
400                    VTCaptureEvent::Capture(data) => {
401                        captured.extend_from_slice(data);
402                        VTInputCapture::None
403                    }
404                    VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
405                        if csi.final_byte == b'~'
406                            && csi.params.try_parse::<usize>(0).unwrap_or(0) == 200
407                        {
408                            VTInputCapture::Terminator(b"\x1b[201~")
409                        } else {
410                            VTInputCapture::None
411                        }
412                    }
413                    _ => VTInputCapture::None,
414                }
415            });
416        }
417        (captured, output)
418    }
419}