use std::rc::Rc;
use unicode_segmentation::UnicodeSegmentation;
use super::ansi_codes::ansi_codes_to_string;
use super::diff::diff_ansi_codes;
use super::reduce::reduce_ansi_codes_in_place;
use super::tokenize::is_fullwidth_grapheme;
use super::types::{AnsiToken, StyledChar, Token, empty_styles};
pub fn styled_chars_from_tokens(tokens: &[Token]) -> Vec<StyledChar> {
let mut codes: Vec<AnsiToken> = Vec::new();
let mut current: Rc<[AnsiToken]> = empty_styles();
let mut ret = Vec::new();
for token in tokens {
match token {
Token::Ansi(a) => {
reduce_ansi_codes_in_place(&mut codes, std::slice::from_ref(a));
current = if codes.is_empty() {
empty_styles()
} else {
Rc::from(codes.as_slice())
};
}
Token::Char(c) => {
ret.push(StyledChar {
value: c.value.into(),
full_width: c.full_width,
styles: Rc::clone(¤t),
});
}
Token::Control(_) => {
}
}
}
ret
}
pub(crate) fn styled_chars_from_plain(input: &str) -> Vec<StyledChar> {
debug_assert!(
!input.contains(['\u{1B}', '\u{9B}']),
"styled_chars_from_plain requires ANSI-free input"
);
let empty = empty_styles();
input
.graphemes(true)
.map(|cluster| {
let base_cp = cluster.chars().next().map(|c| c as u32).unwrap_or(0);
StyledChar {
value: cluster.into(),
full_width: is_fullwidth_grapheme(cluster, base_cp),
styles: Rc::clone(&empty),
}
})
.collect()
}
pub fn styled_chars_to_string(chars: &[StyledChar]) -> String {
styled_chars_to_string_borrowed(chars.iter())
}
pub(crate) fn styled_chars_to_string_borrowed<'a, I>(chars: I) -> String
where
I: IntoIterator<Item = &'a StyledChar>,
{
let mut ret = String::new();
styled_chars_to_string_into(chars, &mut ret);
ret
}
pub(crate) fn styled_chars_to_string_into<'a, I>(chars: I, out: &mut String)
where
I: IntoIterator<Item = &'a StyledChar>,
{
let mut prev: Option<&StyledChar> = None;
for ch in chars {
match prev {
None => out.push_str(&ansi_codes_to_string(&ch.styles)),
Some(p) => {
if !Rc::ptr_eq(&p.styles, &ch.styles) {
let diff = diff_ansi_codes(&p.styles, &ch.styles);
out.push_str(&ansi_codes_to_string(&diff));
}
}
}
out.push_str(&ch.value);
prev = Some(ch);
}
if let Some(last) = prev {
let closing = diff_ansi_codes(&last.styles, &[]);
out.push_str(&ansi_codes_to_string(&closing));
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::text::ansi_tokenize::tokenize::tokenize;
fn pipeline(input: &str) -> String {
styled_chars_to_string(&styled_chars_from_tokens(&tokenize(input, None)))
}
#[test]
fn round_trip_normalized_literal() {
assert_eq!(pipeline("\x1B[31mred\x1B[39m"), "\x1B[31mred\x1B[39m");
}
#[test]
fn reset_normalizes_to_specific_close() {
assert_eq!(pipeline("\x1B[31mred\x1B[0m"), "\x1B[31mred\x1B[39m");
}
#[test]
fn compound_round_trip_exact() {
assert_eq!(
pipeline("\x1B[1;3;31mred\x1B[0m"),
"\x1B[1m\x1B[3m\x1B[31mred\x1B[39m\x1B[23m\x1B[22m"
);
}
#[test]
fn round_trip_is_idempotent() {
let once = pipeline("\x1B[1;31mhi\x1B[0m");
let twice = pipeline(&once);
assert_eq!(once, twice, "normalized output must be a fixed point");
}
#[test]
fn unstyled_string_unchanged() {
assert_eq!(pipeline("hello"), "hello");
}
#[test]
fn styled_chars_from_tokens_correct_styles() {
let tokens = tokenize("\x1B[31mab\x1B[0mc", None);
let styled = styled_chars_from_tokens(&tokens);
assert_eq!(styled.len(), 3);
assert_eq!(styled[0].styles.len(), 1);
assert_eq!(styled[0].styles[0].code, "\x1B[31m");
assert_eq!(styled[1].styles.len(), 1);
assert!(styled[2].styles.is_empty());
}
#[test]
fn styled_chars_from_plain_matches_tokenizer() {
let cases = [
"",
"hello",
"* item one",
"╭──────────╮",
"中文字",
"a中b",
"👍🏽 ok",
"🇺🇸 flag",
"e\u{0301}x", "tab\tend", "C1-OSC:\u{9D}here", "mix 中\u{FE0F}!", ];
for input in cases {
let via_tokenizer = styled_chars_from_tokens(&tokenize(input, None));
let via_plain = styled_chars_from_plain(input);
assert_eq!(
via_plain, via_tokenizer,
"plain fast path diverged from tokenizer for {input:?}"
);
}
}
#[test]
fn serializer_rc_sharing_is_unobservable() {
let inputs = [
"\x1B[31mred and \x1B[1mbold\x1B[0m plain",
"plain only",
"\x1B[38;2;1;2;3mtruecolor\x1B[39m mix \x1B[4mu\x1B[24m",
"\x1B]8;;https://e.com\x07link\x1B]8;;\x07 tail",
"\u{9B}[31mc1 \u{9B}[1;4mcompound\u{9B}[0m tail",
"",
];
for input in inputs {
let shared = styled_chars_from_tokens(&tokenize(input, None));
let unshared: Vec<StyledChar> = shared
.iter()
.map(|c| StyledChar {
value: c.value.clone(),
full_width: c.full_width,
styles: Rc::from(c.styles.to_vec()),
})
.collect();
assert_eq!(
styled_chars_to_string(&shared),
styled_chars_to_string(&unshared),
"Rc-sharing fast path diverged for {input:?}"
);
}
}
#[test]
fn control_tokens_dropped() {
let tokens = tokenize("\x1B[31ma\x1B]0;t\x07b\x1B[0m", None);
let styled = styled_chars_from_tokens(&tokens);
assert_eq!(styled.len(), 2);
assert_eq!(styled[0].value, "a");
assert_eq!(styled[1].value, "b");
assert_eq!(styled[1].styles[0].code, "\x1B[31m");
}
}