#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct StripPolicy {
pub keep_newline: bool,
pub keep_tab: bool,
}
impl StripPolicy {
pub const STRICT: Self = Self {
keep_newline: false,
keep_tab: false,
};
pub const KEEP_NEWLINE: Self = Self {
keep_newline: true,
keep_tab: false,
};
pub const KEEP_BOTH: Self = Self {
keep_newline: true,
keep_tab: true,
};
}
pub fn strip_controls(s: &str, policy: StripPolicy) -> String {
if s.chars().all(|c| keep_char(c, policy)) {
return s.to_string();
}
s.chars().filter(|c| keep_char(*c, policy)).collect()
}
pub fn strip_escapes(s: &str, policy: StripPolicy) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\x1b' {
match chars.next() {
Some('[') => {
let mut n = 0;
for next in &mut chars {
let cp = next as u32;
if (0x40..=0x7e).contains(&cp) {
break;
}
n += 1;
if n >= 256 {
break;
}
}
}
Some(']') => {
let mut n = 0;
while let Some(next) = chars.next() {
if next == '\x07' {
break;
}
if next == '\x1b' {
let mut peek = chars.clone();
if peek.next() == Some('\\') {
chars = peek;
break;
}
}
n += 1;
if n >= 256 {
break;
}
}
}
Some('P') | Some('X') | Some('^') | Some('_') => {
let mut prev = '\0';
let mut n = 0;
for next in &mut chars {
if prev == '\x1b' && next == '\\' {
break;
}
prev = next;
n += 1;
if n >= 4096 {
break;
}
}
}
Some(_) => {} None => break,
}
} else if !keep_char(c, policy) {
continue;
} else {
result.push(c);
}
}
result
}
pub fn strip_non_sgr_escapes(s: &str) -> std::borrow::Cow<'_, str> {
if !s.contains('\x1b') {
return std::borrow::Cow::Borrowed(s);
}
let mut out = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c != '\x1b' {
out.push(c);
continue;
}
match chars.next() {
Some('[') => {
let mut seq = String::from("\x1b[");
let mut sgr = false;
let mut n = 0;
for next in &mut chars {
seq.push(next);
if (0x40..=0x7e).contains(&(next as u32)) {
sgr = next == 'm';
break;
}
n += 1;
if n >= 256 {
break;
}
}
if sgr {
out.push_str(&seq);
}
}
Some(']') => {
let mut n = 0;
while let Some(next) = chars.next() {
if next == '\x07' {
break;
}
if next == '\x1b' {
let mut peek = chars.clone();
if peek.next() == Some('\\') {
chars = peek;
break;
}
}
n += 1;
if n >= 256 {
break;
}
}
}
Some('P') | Some('X') | Some('^') | Some('_') => {
let mut prev = '\0';
let mut n = 0;
for next in &mut chars {
if prev == '\x1b' && next == '\\' {
break;
}
prev = next;
n += 1;
if n >= 4096 {
break;
}
}
}
Some(_) => {}
None => break,
}
}
std::borrow::Cow::Owned(out)
}
pub fn strip_ansi(s: &str) -> String {
if !s.contains('\x1b') {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c != '\x1b' {
out.push(c);
continue;
}
match chars.next() {
Some('[') => {
let mut n = 0;
for next in &mut chars {
if (0x40..=0x7e).contains(&(next as u32)) {
break;
}
n += 1;
if n >= 256 {
break;
}
}
}
Some(']') => {
let mut n = 0;
while let Some(next) = chars.next() {
if next == '\x07' {
break;
}
if next == '\x1b' {
let mut peek = chars.clone();
if peek.next() == Some('\\') {
chars = peek;
break;
}
}
n += 1;
if n >= 256 {
break;
}
}
}
Some('P') | Some('X') | Some('^') | Some('_') => {
let mut prev = '\0';
let mut n = 0;
for next in &mut chars {
if prev == '\x1b' && next == '\\' {
break;
}
prev = next;
n += 1;
if n >= 4096 {
break;
}
}
}
Some(_) => {}
None => break,
}
}
out
}
fn keep_char(c: char, policy: StripPolicy) -> bool {
let cp = c as u32;
if cp == 0x0A {
return policy.keep_newline;
}
if cp == 0x09 {
return policy.keep_tab;
}
if cp < 0x20 || cp == 0x7F || (0x80..=0x9F).contains(&cp) {
return false;
}
true
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strict_blocks_all_controls() {
let s = "hello\x1b[31m world\u{9b}\x07\x00\t\n!";
let out = strip_controls(s, StripPolicy::STRICT);
assert_eq!(out, "hello[31m world!");
}
#[test]
fn keep_newline_preserves_lf_only() {
let s = "line1\nline2\x1b[31m\tend";
let out = strip_controls(s, StripPolicy::KEEP_NEWLINE);
assert_eq!(out, "line1\nline2[31mend");
}
#[test]
fn keep_both_preserves_lf_and_tab() {
let s = "a\tb\nc\x1b[0md";
let out = strip_controls(s, StripPolicy::KEEP_BOTH);
assert_eq!(out, "a\tb\nc[0md");
}
#[test]
fn c1_csi_blocked() {
let s = "before\u{9b}5;31mafter";
for policy in [
StripPolicy::STRICT,
StripPolicy::KEEP_NEWLINE,
StripPolicy::KEEP_BOTH,
] {
let out = strip_controls(s, policy);
assert!(
!out.contains('\u{9b}'),
"C1 CSI survived policy {policy:?}: {out:?}"
);
}
}
#[test]
fn strip_ansi_removes_sgr_sequences_keeps_payload() {
let s = "hello \x1b[31mred\x1b[0m world";
assert_eq!(strip_ansi(s), "hello red world");
}
#[test]
fn strip_ansi_handles_consecutive_and_nested_escapes() {
let s = "\x1b[1m\x1b[31mbold-red\x1b[0m\x1b[0m";
assert_eq!(strip_ansi(s), "bold-red");
}
#[test]
fn strip_ansi_drops_two_byte_esc_sequence() {
assert_eq!(strip_ansi("a\x1bcb"), "ab");
}
#[test]
fn strip_ansi_drops_osc_and_dcs_fully() {
assert_eq!(strip_ansi("a\x1b]0;title\x07b"), "ab");
assert_eq!(strip_ansi("a\x1bPq...data\x1b\\b"), "ab");
assert_eq!(strip_ansi("x\x1b[?1000l\x1b[32mok\x1b[0m"), "xok");
}
#[test]
fn strip_ansi_matches_painter_glyphs() {
for s in [
"plain",
"a\x1b[31mred\x1b[0mb",
"a\x1b]0;t\x07b", "a\x1bcb", "a\x1b[?1000lb", "a\x1b[2;5Hb", ] {
let painter = strip_ansi(&strip_non_sgr_escapes(s));
assert_eq!(strip_ansi(s), painter, "mismatch for {s:?}");
}
}
#[test]
fn strip_ansi_preserves_unicode_payload() {
let s = "\x1b[32m日本語\x1b[0m 🚀";
assert_eq!(strip_ansi(s), "日本語 🚀");
}
#[test]
fn strip_ansi_handles_non_sgr_csi() {
let s = "before\x1b[2;5Hafter\x1b[Kend";
assert_eq!(strip_ansi(s), "beforeafterend");
}
#[test]
fn strip_ansi_handles_truncated_escape() {
let s = "abc\x1b[31";
assert_eq!(strip_ansi(s), "abc");
}
#[test]
fn non_ascii_letters_pass_through() {
let s = "naïve 日本語 🚀";
for policy in [
StripPolicy::STRICT,
StripPolicy::KEEP_NEWLINE,
StripPolicy::KEEP_BOTH,
] {
assert_eq!(strip_controls(s, policy), s);
}
}
#[test]
fn strip_escapes_strips_osc_sequence_with_payload() {
let s = "hello\x1b]0;EVIL\x07world";
let out = strip_escapes(s, StripPolicy::STRICT);
assert_eq!(out, "helloworld");
}
#[test]
fn strip_escapes_strips_csi_sequence() {
let s = "before\x1b[2Jafter";
let out = strip_escapes(s, StripPolicy::STRICT);
assert_eq!(out, "beforeafter");
}
#[test]
fn strip_escapes_strips_sgr_sequence() {
let s = "\x1b[31mred\x1b[0m";
let out = strip_escapes(s, StripPolicy::STRICT);
assert_eq!(out, "red");
}
#[test]
fn strip_escapes_strips_dcs_sequence() {
let s = "start\x1bP0;data\x1b\\end";
let out = strip_escapes(s, StripPolicy::STRICT);
assert_eq!(out, "startend");
}
#[test]
fn strip_escapes_preserves_newline_and_tab_with_keep_both() {
let s = "line1\n\tindented\x07line2\x1b[31mstyled";
let out = strip_escapes(s, StripPolicy::KEEP_BOTH);
assert_eq!(out, "line1\n\tindentedline2styled");
}
#[test]
fn strip_escapes_handles_truncated_csi() {
let s = "ab\x1b[9999999999";
let out = strip_escapes(s, StripPolicy::STRICT);
assert_eq!(out, "ab");
}
#[test]
fn strip_escapes_handles_esc_inside_osc_not_st() {
let s = "a\x1b]0;payload\x1b[2J\x07b";
let out = strip_escapes(s, StripPolicy::STRICT);
assert_eq!(out, "ab");
}
#[test]
fn non_sgr_strip_keeps_sgr_drops_the_rest() {
let s = "\x1b[1mbold\x1b[0m plain \x1b[31mred\x1b[m";
assert_eq!(strip_non_sgr_escapes(s), s, "pure SGR is untouched");
assert_eq!(
strip_non_sgr_escapes("before\x1b[?1000lafter"),
"beforeafter"
);
assert_eq!(
strip_non_sgr_escapes("\x1b[?1049h\x1b[Hx\x1bc\x1b]0;t\x07\x1b[32mok\x1b[0m"),
"x\x1b[32mok\x1b[0m"
);
}
#[test]
fn non_sgr_strip_borrows_when_no_escape() {
assert!(matches!(
strip_non_sgr_escapes("plain text"),
std::borrow::Cow::Borrowed(_)
));
}
}