vt_push_parser/
capture.rs

1//! Raw-input-capturing push parser.
2
3use crate::{VT_PARSER_INTEREST_DEFAULT, VTEvent, VTPushParser};
4
5pub trait VTInputCaptureCallback {
6    fn event(&mut self, event: VTCaptureEvent<'_>) -> VTInputCapture;
7}
8
9impl<F: FnMut(VTCaptureEvent<'_>) -> VTInputCapture> VTInputCaptureCallback for F {
10    #[inline(always)]
11    fn event(&mut self, event: VTCaptureEvent<'_>) -> VTInputCapture {
12        self(event)
13    }
14}
15
16/// The type of capture mode to use after this event has been emitted.
17///
18/// The data will be emitted as a [`VTInputEvent::Captured`] event.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum VTInputCapture {
21    /// No capture mode. This must also be returned from any
22    /// [`VTInputEvent::Captured`] event.
23    None,
24    /// Capture a fixed number of bytes.
25    Count(usize),
26    /// Capture a fixed number of UTF-8 chars.
27    CountUtf8(usize),
28    /// Capture bytes until a terminator is found.
29    Terminator(&'static [u8]),
30}
31
32#[cfg_attr(feature = "serde", derive(serde::Serialize))]
33#[derive(Debug)]
34pub enum VTCaptureEvent<'a> {
35    VTEvent(VTEvent<'a>),
36    Capture(&'a [u8]),
37    CaptureEnd,
38}
39
40enum VTCaptureInternal {
41    None,
42    Count(usize),
43    CountUtf8(usize),
44    Terminator(&'static [u8], usize),
45}
46
47impl VTCaptureInternal {
48    fn feed<'a>(&mut self, input: &mut &'a [u8]) -> Option<&'a [u8]> {
49        match self {
50            VTCaptureInternal::None => None,
51            VTCaptureInternal::Count(count) => {
52                if input.len() >= *count {
53                    let (capture, rest) = input.split_at(*count);
54                    *input = rest;
55                    *self = VTCaptureInternal::None;
56                    Some(capture)
57                } else {
58                    None
59                }
60            }
61            VTCaptureInternal::CountUtf8(count) => {
62                // Count UTF-8 characters, not bytes
63                let mut chars_found = 0;
64                let mut bytes_consumed = 0;
65
66                for (i, &byte) in input.iter().enumerate() {
67                    // Check if this is the start of a new UTF-8 character
68                    if byte & 0xC0 != 0x80 {
69                        // Not a continuation byte
70                        chars_found += 1;
71                        if chars_found == *count {
72                            // We found the nth character, now we need to find where it ends
73                            // by consuming all its continuation bytes
74                            let mut j = i + 1;
75                            while j < input.len() && input[j] & 0xC0 == 0x80 {
76                                j += 1;
77                            }
78                            bytes_consumed = j;
79                            break;
80                        }
81                    }
82                }
83
84                if chars_found == *count {
85                    let (capture, rest) = input.split_at(bytes_consumed);
86                    *input = rest;
87                    *self = VTCaptureInternal::None;
88                    Some(capture)
89                } else {
90                    None
91                }
92            }
93            VTCaptureInternal::Terminator(terminator, found) => {
94                // Ground state
95                if *found == 0 {
96                    if let Some(position) = input.iter().position(|&b| b == terminator[0]) {
97                        // Advance to first match position
98                        *found = 1;
99                        let unmatched = &input[..position];
100                        *input = &input[position + 1..];
101                        return Some(unmatched);
102                    } else {
103                        let unmatched = *input;
104                        *input = &[];
105                        return Some(unmatched);
106                    }
107                }
108
109                // We've already found part of the terminator, so we can continue
110                while *found < terminator.len() {
111                    if input.is_empty() {
112                        return None;
113                    }
114
115                    if input[0] == terminator[*found] {
116                        *found += 1;
117                        *input = &input[1..];
118                    } else {
119                        // Failed a match, so return the part of the terminator we already matched
120                        let old_found = std::mem::take(found);
121                        return Some(&terminator[..old_found]);
122                    }
123                }
124
125                // We've matched the entire terminator
126                *self = VTCaptureInternal::None;
127                None
128            }
129        }
130    }
131}
132
133/// A parser that allows for "capturing" of input data, ie: temporarily
134/// transferring control of the parser to unparsed data events.
135///
136/// This functions in the same way as [`VTPushParser`], but emits
137/// [`VTCaptureEvent`]s instead of [`VTEvent`]s.
138pub struct VTCapturePushParser<const INTEREST: u8 = VT_PARSER_INTEREST_DEFAULT> {
139    parser: VTPushParser<INTEREST>,
140    capture: VTCaptureInternal,
141}
142
143impl Default for VTCapturePushParser {
144    fn default() -> Self {
145        Self::new()
146    }
147}
148
149impl VTCapturePushParser {
150    pub const fn new() -> VTCapturePushParser {
151        VTCapturePushParser::new_with_interest::<VT_PARSER_INTEREST_DEFAULT>()
152    }
153
154    pub const fn new_with_interest<const INTEREST: u8>() -> VTCapturePushParser<INTEREST> {
155        VTCapturePushParser::new_with()
156    }
157}
158
159impl<const INTEREST: u8> VTCapturePushParser<INTEREST> {
160    const fn new_with() -> Self {
161        Self {
162            parser: VTPushParser::new_with(),
163            capture: VTCaptureInternal::None,
164        }
165    }
166
167    pub fn is_ground(&self) -> bool {
168        self.parser.is_ground()
169    }
170
171    pub fn idle(&mut self) -> Option<VTCaptureEvent<'static>> {
172        self.parser.idle().map(VTCaptureEvent::VTEvent)
173    }
174
175    pub fn feed_with<'this, 'input, F: VTInputCaptureCallback>(
176        &'this mut self,
177        mut input: &'input [u8],
178        mut cb: F,
179    ) {
180        while !input.is_empty() {
181            match &mut self.capture {
182                VTCaptureInternal::None => {
183                    // Normal parsing mode - feed to the underlying parser
184                    let count = self
185                        .parser
186                        .feed_with_abortable(input, &mut |event: VTEvent| {
187                            let capture_mode = cb.event(VTCaptureEvent::VTEvent(event));
188                            match capture_mode {
189                                VTInputCapture::None => {
190                                    // Stay in normal mode
191                                }
192                                VTInputCapture::Count(count) => {
193                                    self.capture = VTCaptureInternal::Count(count);
194                                }
195                                VTInputCapture::CountUtf8(count) => {
196                                    self.capture = VTCaptureInternal::CountUtf8(count);
197                                }
198                                VTInputCapture::Terminator(terminator) => {
199                                    self.capture = VTCaptureInternal::Terminator(terminator, 0);
200                                }
201                            }
202                            false // Don't abort parsing
203                        });
204
205                    input = &input[count..];
206                }
207                capture => {
208                    // Capture mode - collect data until capture is complete
209                    if let Some(captured_data) = capture.feed(&mut input) {
210                        cb.event(VTCaptureEvent::Capture(captured_data));
211                    }
212
213                    // Check if capture is complete
214                    if matches!(self.capture, VTCaptureInternal::None) {
215                        cb.event(VTCaptureEvent::CaptureEnd);
216                    }
217                }
218            }
219        }
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    #[test]
228    fn test_capture_paste() {
229        let mut output = String::new();
230        let mut parser = VTCapturePushParser::new();
231        parser.feed_with(
232            b"raw\x1b[200~paste\x1b[201~raw",
233            &mut |event: VTCaptureEvent| {
234                output.push_str(&format!("{event:?}\n"));
235                match event {
236                    VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
237                        if csi.params.try_parse::<usize>(0).unwrap_or(0) == 200 {
238                            VTInputCapture::Terminator(b"\x1b[201~")
239                        } else {
240                            VTInputCapture::None
241                        }
242                    }
243                    _ => VTInputCapture::None,
244                }
245            },
246        );
247        assert_eq!(
248            output.trim(),
249            r#"
250VTEvent(Raw('raw'))
251VTEvent(Csi('200', '', '~'))
252Capture([112, 97, 115, 116, 101])
253CaptureEnd
254VTEvent(Raw('raw'))
255"#
256            .trim()
257        );
258    }
259
260    #[test]
261    fn test_capture_count() {
262        let mut output = String::new();
263        let mut parser = VTCapturePushParser::new();
264        parser.feed_with(b"raw\x1b[Xpaste\x1b[Yraw", &mut |event: VTCaptureEvent| {
265            output.push_str(&format!("{event:?}\n"));
266            match event {
267                VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
268                    if csi.final_byte == b'X' {
269                        VTInputCapture::Count(5)
270                    } else {
271                        VTInputCapture::None
272                    }
273                }
274                _ => VTInputCapture::None,
275            }
276        });
277        assert_eq!(
278            output.trim(),
279            r#"
280VTEvent(Raw('raw'))
281VTEvent(Csi('', 'X'))
282Capture([112, 97, 115, 116, 101])
283CaptureEnd
284VTEvent(Csi('', 'Y'))
285VTEvent(Raw('raw'))
286"#
287            .trim()
288        );
289    }
290
291    #[test]
292    fn test_capture_count_utf8_but_ascii() {
293        let mut output = String::new();
294        let mut parser = VTCapturePushParser::new();
295        parser.feed_with(b"raw\x1b[Xpaste\x1b[Yraw", &mut |event: VTCaptureEvent| {
296            output.push_str(&format!("{event:?}\n"));
297            match event {
298                VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
299                    if csi.final_byte == b'X' {
300                        VTInputCapture::CountUtf8(5)
301                    } else {
302                        VTInputCapture::None
303                    }
304                }
305                _ => VTInputCapture::None,
306            }
307        });
308        assert_eq!(
309            output.trim(),
310            r#"
311VTEvent(Raw('raw'))
312VTEvent(Csi('', 'X'))
313Capture([112, 97, 115, 116, 101])
314CaptureEnd
315VTEvent(Csi('', 'Y'))
316VTEvent(Raw('raw'))
317"#
318            .trim()
319        );
320    }
321
322    #[test]
323    fn test_capture_count_utf8() {
324        let mut output = String::new();
325        let mut parser = VTCapturePushParser::new();
326        let input = "raw\u{001b}[X🤖🦕✅😀🕓\u{001b}[Yraw".as_bytes();
327        parser.feed_with(input, &mut |event: VTCaptureEvent| {
328            output.push_str(&format!("{event:?}\n"));
329            match event {
330                VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
331                    if csi.final_byte == b'X' {
332                        VTInputCapture::CountUtf8(5)
333                    } else {
334                        VTInputCapture::None
335                    }
336                }
337                _ => VTInputCapture::None,
338            }
339        });
340        assert_eq!(output.trim(), r#"
341VTEvent(Raw('raw'))
342VTEvent(Csi('', 'X'))
343Capture([240, 159, 164, 150, 240, 159, 166, 149, 226, 156, 133, 240, 159, 152, 128, 240, 159, 149, 147])
344CaptureEnd
345VTEvent(Csi('', 'Y'))
346VTEvent(Raw('raw'))
347"#.trim());
348    }
349
350    #[test]
351    fn test_capture_terminator_partial_match() {
352        let mut output = String::new();
353        let mut parser = VTCapturePushParser::new();
354
355        parser.feed_with(
356            b"start\x1b[200~part\x1b[201ial\x1b[201~end",
357            &mut |event: VTCaptureEvent| {
358                output.push_str(&format!("{event:?}\n"));
359                match event {
360                    VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
361                        if csi.final_byte == b'~'
362                            && csi.params.try_parse::<usize>(0).unwrap_or(0) == 200
363                        {
364                            VTInputCapture::Terminator(b"\x1b[201~")
365                        } else {
366                            VTInputCapture::None
367                        }
368                    }
369                    _ => VTInputCapture::None,
370                }
371            },
372        );
373
374        assert_eq!(
375            output.trim(),
376            r#"VTEvent(Raw('start'))
377VTEvent(Csi('200', '', '~'))
378Capture([112, 97, 114, 116])
379Capture([27, 91, 50, 48, 49])
380Capture([105, 97, 108])
381CaptureEnd
382VTEvent(Raw('end'))"#
383        );
384    }
385
386    #[test]
387    fn test_capture_terminator_partial_match_single_byte() {
388        let input = b"start\x1b[200~part\x1b[201ial\x1b[201~end";
389
390        for chunk_size in 1..5 {
391            let (captured, output) = capture_chunk_size(input, chunk_size);
392            assert_eq!(captured, b"part\x1b[201ial", "{output}",);
393        }
394    }
395
396    fn capture_chunk_size(input: &'static [u8; 32], chunk_size: usize) -> (Vec<u8>, String) {
397        let mut output = String::new();
398        let mut parser = VTCapturePushParser::new();
399        let mut captured = Vec::new();
400        for chunk in input.chunks(chunk_size) {
401            parser.feed_with(chunk, &mut |event: VTCaptureEvent| {
402                output.push_str(&format!("{event:?}\n"));
403                match event {
404                    VTCaptureEvent::Capture(data) => {
405                        captured.extend_from_slice(data);
406                        VTInputCapture::None
407                    }
408                    VTCaptureEvent::VTEvent(VTEvent::Csi(csi)) => {
409                        if csi.final_byte == b'~'
410                            && csi.params.try_parse::<usize>(0).unwrap_or(0) == 200
411                        {
412                            VTInputCapture::Terminator(b"\x1b[201~")
413                        } else {
414                            VTInputCapture::None
415                        }
416                    }
417                    _ => VTInputCapture::None,
418                }
419            });
420        }
421        (captured, output)
422    }
423}