const BELL: char = '\u{0007}';
const ESC: char = '\u{001B}';
const ST: char = '\u{009C}';
const C1_CSI: char = '\u{009B}';
const C1_OSC: char = '\u{009D}';
const C1_DCS: char = '\u{0090}';
const C1_PM: char = '\u{009E}';
const C1_APC: char = '\u{009F}';
const C1_SOS: char = '\u{0098}';
fn is_c1_control(c: char) -> bool {
('\u{80}'..='\u{9F}').contains(&c)
}
fn is_csi_parameter(c: char) -> bool {
('\u{30}'..='\u{3F}').contains(&c)
}
fn is_intermediate(c: char) -> bool {
('\u{20}'..='\u{2F}').contains(&c)
}
fn is_csi_final(c: char) -> bool {
('\u{40}'..='\u{7E}').contains(&c)
}
fn is_escape_final(c: char) -> bool {
('\u{30}'..='\u{7E}').contains(&c)
}
pub(crate) fn has_ansi_control_characters(text: &str) -> bool {
text.chars().any(|c| c == ESC || is_c1_control(c))
}
struct CsiBody {
end: usize,
params_end: usize,
intermediates_end: usize,
final_char: char,
}
fn read_csi(chars: &[char], from: usize) -> Option<CsiBody> {
let mut i = from;
while i < chars.len() && is_csi_parameter(chars[i]) {
i += 1;
}
let params_end = i;
while i < chars.len() && is_intermediate(chars[i]) {
i += 1;
}
let intermediates_end = i;
let final_char = *chars.get(i)?;
if !is_csi_final(final_char) {
return None;
}
Some(CsiBody {
end: i + 1,
params_end,
intermediates_end,
final_char,
})
}
fn find_control_string_terminator(
chars: &[char],
from: usize,
allow_bell_terminator: bool,
) -> Option<usize> {
let mut i = from;
while i < chars.len() {
let c = chars[i];
if allow_bell_terminator && c == BELL {
return Some(i + 1);
}
if c == ST {
return Some(i + 1);
}
if c == ESC {
match chars.get(i + 1) {
Some(&ESC) => {
i += 2;
continue;
}
Some('\\') => return Some(i + 2),
_ => {}
}
}
i += 1;
}
None
}
fn read_escape_sequence(chars: &[char], from: usize) -> Option<usize> {
let mut i = from;
while i < chars.len() && is_intermediate(chars[i]) {
i += 1;
}
let final_char = *chars.get(i)?;
if !is_escape_final(final_char) {
return None;
}
Some(i + 1)
}
#[derive(PartialEq, Eq, Clone, Copy)]
enum ControlString {
Osc,
Other, }
fn control_string_from_escape_introducer(c: char) -> Option<ControlString> {
match c {
']' => Some(ControlString::Osc),
'P' | '^' | '_' | 'X' => Some(ControlString::Other),
_ => None,
}
}
fn control_string_from_c1_introducer(c: char) -> Option<ControlString> {
match c {
C1_OSC => Some(ControlString::Osc),
C1_DCS | C1_PM | C1_APC | C1_SOS => Some(ControlString::Other),
_ => None,
}
}
fn params_are_sgr(params: &[char]) -> bool {
params
.iter()
.all(|&c| c.is_ascii_digit() || c == ':' || c == ';')
}
pub(crate) fn sanitize_ansi(text: String) -> String {
if !has_ansi_control_characters(&text) {
return text;
}
let chars: Vec<char> = text.chars().collect();
let mut out = String::with_capacity(text.len());
let mut i = 0;
while i < chars.len() {
let c = chars[i];
if c == ESC {
let Some(&following) = chars.get(i + 1) else {
return out;
};
if following == '[' {
let Some(csi) = read_csi(&chars, i + 2) else {
return out;
};
if csi.final_char == 'm'
&& csi.intermediates_end == csi.params_end
&& params_are_sgr(&chars[i + 2..csi.params_end])
{
out.extend(&chars[i..csi.end]);
}
i = csi.end;
continue;
}
if let Some(kind) = control_string_from_escape_introducer(following) {
let Some(end) =
find_control_string_terminator(&chars, i + 2, kind == ControlString::Osc)
else {
return out;
};
if kind == ControlString::Osc {
out.extend(&chars[i..end]);
}
i = end;
continue;
}
match read_escape_sequence(&chars, i + 1) {
Some(end) => {
i = end;
continue;
}
None => {
if is_intermediate(following) {
return out;
}
i += 1;
continue;
}
}
}
if c == C1_CSI {
let Some(csi) = read_csi(&chars, i + 1) else {
return out;
};
if csi.final_char == 'm'
&& csi.intermediates_end == csi.params_end
&& params_are_sgr(&chars[i + 1..csi.params_end])
{
out.extend(&chars[i..csi.end]);
}
i = csi.end;
continue;
}
if let Some(kind) = control_string_from_c1_introducer(c) {
let Some(end) =
find_control_string_terminator(&chars, i + 1, kind == ControlString::Osc)
else {
return out;
};
if kind == ControlString::Osc {
out.extend(&chars[i..end]);
}
i = end;
continue;
}
if is_c1_control(c) {
i += 1;
continue;
}
out.push(c);
i += 1;
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn s(input: &str) -> String {
sanitize_ansi(input.to_string())
}
#[test]
fn strips_csi_clear_screen() {
assert_eq!(s("A\x1b[2JB"), "AB");
}
#[test]
fn strips_csi_cursor_up() {
assert_eq!(s("A\x1b[1AB"), "AB");
}
#[test]
fn strips_csi_erase_line() {
assert_eq!(s("A\x1b[2KB"), "AB");
}
#[test]
fn strips_private_mode_csi() {
assert_eq!(s("A\x1b[?25lB"), "AB");
}
#[test]
fn strips_csi_with_intermediate_even_when_final_is_m() {
assert_eq!(s("A\x1b[1 mB"), "AB");
}
#[test]
fn strips_decsc_decrc() {
assert_eq!(s("A\x1b7B"), "AB");
assert_eq!(s("A\x1b8B"), "AB");
}
#[test]
fn strips_esc_with_letter_final_consuming_it() {
assert_eq!(s("A\x1bB"), "A");
}
#[test]
fn strips_dcs_with_payload() {
assert_eq!(s("A\x1bPq\x1b\\B"), "AB");
}
#[test]
fn strips_standalone_c1_nel() {
assert_eq!(s("A\u{85}B"), "AB");
}
#[test]
fn strips_c1_csi_clear_screen() {
assert_eq!(s("A\u{9b}2JB"), "AB");
}
#[test]
fn strips_lone_st() {
assert_eq!(s("A\u{9c}B"), "AB");
}
#[test]
fn lone_esc_before_non_sequence_char_drops_only_the_esc() {
assert_eq!(s("A\x1b\u{85}B"), "AB");
}
#[test]
fn malformed_csi_at_eof_drops_remainder() {
assert_eq!(s("A\x1b[31"), "A");
}
#[test]
fn lone_esc_at_eof_drops_it() {
assert_eq!(s("A\x1b"), "A");
}
#[test]
fn esc_intermediate_without_final_drops_remainder() {
assert_eq!(s("A\x1b "), "A");
assert_eq!(s("A\x1b B"), "A");
}
#[test]
fn unterminated_osc_drops_remainder() {
assert_eq!(s("A\x1b]8;;x"), "A");
}
#[test]
fn keeps_sgr_pair() {
assert_eq!(s("A\x1b[31mR\x1b[39mB"), "A\x1b[31mR\x1b[39mB");
}
#[test]
fn keeps_colon_parameter_sgr() {
assert_eq!(s("A\x1b[38:5:196mR\x1b[39mB"), "A\x1b[38:5:196mR\x1b[39mB");
}
#[test]
fn keeps_c1_csi_sgr_with_introducer_byte() {
assert_eq!(s("A\u{9b}31mR\u{9b}39mB"), "A\u{9b}31mR\u{9b}39mB");
}
#[test]
fn keeps_osc8_hyperlink_bel_terminated() {
let t = "A\x1b]8;;https://x\x07L\x1b]8;;\x07B";
assert_eq!(s(t), t);
}
#[test]
fn keeps_osc8_hyperlink_esc_backslash_terminated() {
let t = "A\x1b]8;;https://x\x1b\\L\x1b]8;;\x1b\\B";
assert_eq!(s(t), t);
}
#[test]
fn keeps_osc0_title() {
let t = "A\x1b]0;title\x07B";
assert_eq!(s(t), t);
}
#[test]
fn keeps_c1_osc_with_introducer_byte() {
let t = "A\u{9d}0;t\x07B";
assert_eq!(s(t), t);
}
#[test]
fn plain_text_passthrough() {
assert_eq!(s("AB"), "AB");
assert_eq!(
s("multi\nline\ttext \u{1F600}"),
"multi\nline\ttext \u{1F600}"
);
}
}