anstyle_stream/adapter/
strip.rs

1use anstyle_parse::state::state_change;
2use anstyle_parse::state::Action;
3use anstyle_parse::state::State;
4
5/// Strip ANSI escapes from a `&str`, returning the printable content
6///
7/// This can be used to take output from a program that includes escape sequences and write it
8/// somewhere that does not easily support them, such as a log file.
9///
10/// For non-contiguous data, see [`StripStr`].
11///
12/// # Example
13///
14/// ```rust
15/// use std::io::Write as _;
16///
17/// let styled_text = "\x1b[32mfoo\x1b[m bar";
18/// let plain_str = anstyle_stream::adapter::strip_str(&styled_text).to_string();
19/// assert_eq!(plain_str, "foo bar");
20/// ```
21#[inline]
22pub fn strip_str(data: &str) -> StrippedStr<'_> {
23    StrippedStr::new(data)
24}
25
26/// See [`strip_str`]
27#[derive(Default, Clone, Debug, PartialEq, Eq)]
28pub struct StrippedStr<'s> {
29    bytes: &'s [u8],
30    state: State,
31}
32
33impl<'s> StrippedStr<'s> {
34    #[inline]
35    fn new(data: &'s str) -> Self {
36        Self {
37            bytes: data.as_bytes(),
38            state: State::Ground,
39        }
40    }
41
42    /// Create a [`String`] of the printable content
43    #[inline]
44    #[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation
45    pub fn to_string(&self) -> String {
46        use std::fmt::Write as _;
47        let mut stripped = String::with_capacity(self.bytes.len());
48        let _ = write!(&mut stripped, "{}", self);
49        stripped
50    }
51}
52
53impl<'s> std::fmt::Display for StrippedStr<'s> {
54    /// **Note:** this does *not* exhaust the [`Iterator`]
55    #[inline]
56    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57        let iter = Self {
58            bytes: self.bytes,
59            state: self.state,
60        };
61        for printable in iter {
62            printable.fmt(f)?;
63        }
64        Ok(())
65    }
66}
67
68impl<'s> Iterator for StrippedStr<'s> {
69    type Item = &'s str;
70
71    #[inline]
72    fn next(&mut self) -> Option<Self::Item> {
73        next_str(&mut self.bytes, &mut self.state)
74    }
75}
76
77/// Incrementally strip non-contiguous data
78#[derive(Default, Clone, Debug, PartialEq, Eq)]
79pub struct StripStr {
80    state: State,
81}
82
83impl StripStr {
84    /// Initial state
85    pub fn new() -> Self {
86        Default::default()
87    }
88
89    /// Strip the next segment of data
90    pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
91        StripStrIter {
92            bytes: data.as_bytes(),
93            state: &mut self.state,
94        }
95    }
96}
97
98/// See [`StripStr`]
99#[derive(Debug, PartialEq, Eq)]
100pub struct StripStrIter<'s> {
101    bytes: &'s [u8],
102    state: &'s mut State,
103}
104
105impl<'s> Iterator for StripStrIter<'s> {
106    type Item = &'s str;
107
108    #[inline]
109    fn next(&mut self) -> Option<Self::Item> {
110        next_str(&mut self.bytes, self.state)
111    }
112}
113
114#[inline]
115fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
116    let offset = bytes.iter().copied().position(|b| {
117        let (next_state, action) = state_change(*state, b);
118        if next_state != State::Anywhere {
119            *state = next_state;
120        }
121        is_printable_str(action, b)
122    });
123    let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
124    *bytes = next;
125    *state = State::Ground;
126
127    let offset = bytes.iter().copied().position(|b| {
128        let (_next_state, action) = state_change(State::Ground, b);
129        !is_printable_str(action, b)
130    });
131    let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
132    *bytes = next;
133    if printable.is_empty() {
134        None
135    } else {
136        let printable = unsafe {
137            from_utf8_unchecked(
138                printable,
139                "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
140            )
141        };
142        Some(printable)
143    }
144}
145
146#[inline]
147unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
148    if cfg!(debug_assertions) {
149        // Catch problems more quickly when testing
150        std::str::from_utf8(bytes).expect(safety_justification)
151    } else {
152        std::str::from_utf8_unchecked(bytes)
153    }
154}
155
156#[inline]
157fn is_printable_str(action: Action, byte: u8) -> bool {
158    // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
159    // ISO Latin-1, making it DEL and non-printable
160    const DEL: u8 = 0x7f;
161    (action == Action::Print && byte != DEL)
162        || action == Action::BeginUtf8
163        // since we know the input is valid UTF-8, the only thing  we can do with
164        // continuations is to print them
165        || is_utf8_continuation(byte)
166        || (action == Action::Execute && byte.is_ascii_whitespace())
167}
168
169#[inline]
170fn is_utf8_continuation(b: u8) -> bool {
171    matches!(b, 0x80..=0xbf)
172}
173
174/// Strip ANSI escapes from bytes, returning the printable content
175///
176/// This can be used to take output from a program that includes escape sequences and write it
177/// somewhere that does not easily support them, such as a log file.
178///
179/// # Example
180///
181/// ```rust
182/// use std::io::Write as _;
183///
184/// let styled_text = "\x1b[32mfoo\x1b[m bar";
185/// let plain_str = anstyle_stream::adapter::strip_bytes(styled_text.as_bytes()).into_vec();
186/// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]);
187/// ```
188#[inline]
189pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
190    StrippedBytes::new(data)
191}
192
193/// See [`strip_bytes`]
194#[derive(Default, Clone, Debug, PartialEq, Eq)]
195pub struct StrippedBytes<'s> {
196    bytes: &'s [u8],
197    state: State,
198    utf8parser: Utf8Parser,
199}
200
201impl<'s> StrippedBytes<'s> {
202    /// See [`strip_bytes`]
203    #[inline]
204    pub fn new(bytes: &'s [u8]) -> Self {
205        Self {
206            bytes,
207            state: State::Ground,
208            utf8parser: Default::default(),
209        }
210    }
211
212    /// Strip the next slice of bytes
213    ///
214    /// Used when the content is in several non-contiguous slices
215    ///
216    /// # Panic
217    ///
218    /// May panic if it is not exhausted / empty
219    #[inline]
220    pub fn extend(&mut self, bytes: &'s [u8]) {
221        debug_assert!(
222            self.is_empty(),
223            "current bytes must be processed to ensure we end at the right state"
224        );
225        self.bytes = bytes;
226    }
227
228    /// Report the bytes has been exhausted
229    #[inline]
230    pub fn is_empty(&self) -> bool {
231        self.bytes.is_empty()
232    }
233
234    /// Create a [`Vec`] of the printable content
235    #[inline]
236    pub fn into_vec(self) -> Vec<u8> {
237        let mut stripped = Vec::with_capacity(self.bytes.len());
238        for printable in self {
239            stripped.extend(printable);
240        }
241        stripped
242    }
243}
244
245impl<'s> Iterator for StrippedBytes<'s> {
246    type Item = &'s [u8];
247
248    #[inline]
249    fn next(&mut self) -> Option<Self::Item> {
250        next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
251    }
252}
253
254/// Incrementally strip non-contiguous data
255#[derive(Default, Clone, Debug, PartialEq, Eq)]
256pub struct StripBytes {
257    state: State,
258    utf8parser: Utf8Parser,
259}
260
261impl StripBytes {
262    /// Initial state
263    pub fn new() -> Self {
264        Default::default()
265    }
266
267    /// Strip the next segment of data
268    pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
269        StripBytesIter {
270            bytes,
271            state: &mut self.state,
272            utf8parser: &mut self.utf8parser,
273        }
274    }
275}
276
277/// See [`StripBytes`]
278#[derive(Debug, PartialEq, Eq)]
279pub struct StripBytesIter<'s> {
280    bytes: &'s [u8],
281    state: &'s mut State,
282    utf8parser: &'s mut Utf8Parser,
283}
284
285impl<'s> Iterator for StripBytesIter<'s> {
286    type Item = &'s [u8];
287
288    #[inline]
289    fn next(&mut self) -> Option<Self::Item> {
290        next_bytes(&mut self.bytes, self.state, self.utf8parser)
291    }
292}
293
294#[inline]
295fn next_bytes<'s>(
296    bytes: &mut &'s [u8],
297    state: &mut State,
298    utf8parser: &mut Utf8Parser,
299) -> Option<&'s [u8]> {
300    let offset = bytes.iter().copied().position(|b| {
301        if *state == State::Utf8 {
302            true
303        } else {
304            let (next_state, action) = state_change(*state, b);
305            if next_state != State::Anywhere {
306                *state = next_state;
307            }
308            is_printable_bytes(action, b)
309        }
310    });
311    let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
312    *bytes = next;
313
314    let offset = bytes.iter().copied().position(|b| {
315        if *state == State::Utf8 {
316            if utf8parser.add(b) {
317                *state = State::Ground;
318            }
319            false
320        } else {
321            let (next_state, action) = state_change(State::Ground, b);
322            if next_state != State::Anywhere {
323                *state = next_state;
324            }
325            if *state == State::Utf8 {
326                utf8parser.add(b);
327                false
328            } else {
329                !is_printable_bytes(action, b)
330            }
331        }
332    });
333    let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
334    *bytes = next;
335    if printable.is_empty() {
336        None
337    } else {
338        Some(printable)
339    }
340}
341
342#[derive(Default, Clone, Debug, PartialEq, Eq)]
343pub struct Utf8Parser {
344    utf8_parser: utf8parse::Parser,
345}
346
347impl Utf8Parser {
348    fn add(&mut self, byte: u8) -> bool {
349        let mut b = false;
350        let mut receiver = VtUtf8Receiver(&mut b);
351        self.utf8_parser.advance(&mut receiver, byte);
352        b
353    }
354}
355
356struct VtUtf8Receiver<'a>(&'a mut bool);
357
358impl<'a> utf8parse::Receiver for VtUtf8Receiver<'a> {
359    fn codepoint(&mut self, _: char) {
360        *self.0 = true;
361    }
362
363    fn invalid_sequence(&mut self) {
364        *self.0 = true;
365    }
366}
367
368#[inline]
369fn is_printable_bytes(action: Action, byte: u8) -> bool {
370    // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
371    // ISO Latin-1, making it DEL and non-printable
372    const DEL: u8 = 0x7f;
373
374    // Continuations aren't included as they may also be control codes, requiring more context
375    (action == Action::Print && byte != DEL)
376        || action == Action::BeginUtf8
377        || (action == Action::Execute && byte.is_ascii_whitespace())
378}
379
380#[cfg(test)]
381mod test {
382    use super::*;
383    use proptest::prelude::*;
384
385    /// Model based off full parser
386    fn parser_strip(bytes: &[u8]) -> String {
387        #[derive(Default)]
388        struct Strip(String);
389        impl Strip {
390            fn with_capacity(capacity: usize) -> Self {
391                Self(String::with_capacity(capacity))
392            }
393        }
394        impl anstyle_parse::Perform for Strip {
395            fn print(&mut self, c: char) {
396                self.0.push(c);
397            }
398
399            fn execute(&mut self, byte: u8) {
400                if byte.is_ascii_whitespace() {
401                    self.0.push(byte as char);
402                }
403            }
404        }
405
406        let mut stripped = Strip::with_capacity(bytes.len());
407        let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
408        for byte in bytes {
409            parser.advance(&mut stripped, *byte);
410        }
411        stripped.0
412    }
413
414    /// Model verifying incremental parsing
415    fn strip_char(mut s: &str) -> String {
416        let mut result = String::new();
417        let mut state = StripStr::new();
418        while !s.is_empty() {
419            let mut indices = s.char_indices();
420            indices.next(); // current
421            let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len());
422            let (current, remainder) = s.split_at(offset);
423            for printable in state.strip_next(current) {
424                result.push_str(printable);
425            }
426            s = remainder;
427        }
428        result
429    }
430
431    /// Model verifying incremental parsing
432    fn strip_byte(s: &[u8]) -> Vec<u8> {
433        let mut result = Vec::new();
434        let mut state = StripBytes::default();
435        for start in 0..s.len() {
436            let current = &s[start..=start];
437            for printable in state.strip_next(current) {
438                result.extend(printable);
439            }
440        }
441        result
442    }
443
444    #[test]
445    fn test_strip_bytes_multibyte() {
446        let bytes = [240, 145, 141, 139];
447        let expected = parser_strip(&bytes);
448        let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
449        assert_eq!(expected, actual);
450    }
451
452    #[test]
453    fn test_strip_byte_multibyte() {
454        let bytes = [240, 145, 141, 139];
455        let expected = parser_strip(&bytes);
456        let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap();
457        assert_eq!(expected, actual);
458    }
459
460    #[test]
461    fn test_strip_str_del() {
462        let input = std::str::from_utf8(&[0x7f]).unwrap();
463        let expected = "";
464        let actual = strip_str(input).to_string();
465        assert_eq!(expected, actual);
466    }
467
468    #[test]
469    fn test_strip_byte_del() {
470        let bytes = [0x7f];
471        let expected = "";
472        let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap();
473        assert_eq!(expected, actual);
474    }
475
476    proptest! {
477        #[test]
478        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
479        fn strip_str_no_escapes(s in "\\PC*") {
480            let expected = parser_strip(s.as_bytes());
481            let actual = strip_str(&s).to_string();
482            assert_eq!(expected, actual);
483        }
484
485        #[test]
486        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
487        fn strip_char_no_escapes(s in "\\PC*") {
488            let expected = parser_strip(s.as_bytes());
489            let actual = strip_char(&s);
490            assert_eq!(expected, actual);
491        }
492
493        #[test]
494        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
495        fn strip_bytes_no_escapes(s in "\\PC*") {
496            dbg!(&s);
497            dbg!(s.as_bytes());
498            let expected = parser_strip(s.as_bytes());
499            let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
500            assert_eq!(expected, actual);
501        }
502
503        #[test]
504        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
505        fn strip_byte_no_escapes(s in "\\PC*") {
506            dbg!(&s);
507            dbg!(s.as_bytes());
508            let expected = parser_strip(s.as_bytes());
509            let actual = String::from_utf8(strip_byte(s.as_bytes()).to_vec()).unwrap();
510            assert_eq!(expected, actual);
511        }
512    }
513}