farben-core 0.14.0-beta.4

The internal core library for Farben
Documentation
//! Utilities for stripping ANSI escape sequences from strings.
//!
//! This module covers CSI sequences only: the `ESC [ ... <letter>` form
//! used by SGR color codes (e.g. `\x1b[31m`, `\x1b[0m`). OSC, DCS, and
//! other escape sequence types are passed through unchanged.

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_strip_ansi_empty_string() {
        assert_eq!(strip_ansi(""), "");
    }

    #[test]
    fn test_strip_ansi_plain_text_unchanged() {
        assert_eq!(strip_ansi("hello world"), "hello world");
    }

    #[test]
    fn test_strip_ansi_single_color_sequence() {
        assert_eq!(strip_ansi("\x1b[31mred\x1b[0m"), "red");
    }

    #[test]
    fn test_strip_ansi_bare_esc_not_followed_by_bracket_is_preserved() {
        assert_eq!(strip_ansi("\x1bhello"), "\x1bhello");
    }

    #[test]
    fn test_strip_ansi_bare_esc_at_end_is_preserved() {
        assert_eq!(strip_ansi("text\x1b"), "text\x1b");
    }

    #[test]
    fn test_strip_ansi_sequences_only_produces_empty() {
        assert_eq!(strip_ansi("\x1b[1m\x1b[31m\x1b[0m"), "");
    }

    #[test]
    fn test_strip_ansi_mixed_content_preserves_text() {
        assert_eq!(
            strip_ansi("\x1b[1mhello\x1b[0m world\x1b[32m!"),
            "hello world!"
        );
    }

    #[test]
    fn test_strip_ansi_rgb_sequence_stripped() {
        assert_eq!(strip_ansi("\x1b[38;2;255;0;0mred\x1b[0m"), "red");
    }

    #[test]
    fn test_strip_ansi_ansi256_sequence_stripped() {
        assert_eq!(strip_ansi("\x1b[38;5;200mcolor\x1b[0m"), "color");
    }

    #[test]
    fn test_escape_tags_empty_string() {
        assert_eq!(escape_tags(""), "");
    }

    #[test]
    fn test_escape_tags_no_brackets_unchanged() {
        assert_eq!(escape_tags("hello world"), "hello world");
    }

    #[test]
    fn test_escape_tags_doubles_opening_bracket() {
        assert_eq!(escape_tags("[bold]"), "[[bold]]");
    }

    #[test]
    fn test_escape_tags_doubles_closing_bracket() {
        assert_eq!(escape_tags("[/]"), "[[/]]");
    }

    #[test]
    fn test_escape_tags_mixed_text_and_tags() {
        assert_eq!(escape_tags("a[b]c"), "a[[b]]c");
    }
}

/// Remove all CSI ANSI escape sequences from `input` and return the plain text.
///
/// Scans `input` character by character. Any sequence matching `ESC [ <params> <letter>`
/// is consumed and dropped. All other characters, including bare `ESC` bytes that are
/// not followed by `[`, are passed through as-is.
///
/// Typical uses: measuring display width of colored strings, writing plain-text
/// log lines from pre-colored output, or feeding output to tools that do not
/// interpret ANSI codes.
///
/// # Arguments
///
/// * `input` - A string slice that may contain CSI ANSI escape sequences.
///
/// # Returns
///
/// A new [`String`] with all CSI sequences removed and all other content preserved.
///
/// # Examples
///
/// ```
/// use farben_core::strip::strip_ansi;
///
/// let colored = "\x1b[31mred text\x1b[0m";
/// assert_eq!(strip_ansi(colored), "red text");
///
/// // Bare ESC bytes not followed by '[' are preserved.
/// let bare_esc = "\x1bhello";
/// assert_eq!(strip_ansi(bare_esc), "\x1bhello");
/// ```
#[must_use]
pub fn strip_ansi(input: &str) -> String {
    let mut output = String::new();
    let mut chars = input.chars();
    while let Some(c) = chars.next() {
        if c == '\x1b' {
            match chars.next() {
                Some('[') => {
                    for c in chars.by_ref() {
                        if c.is_alphabetic() {
                            break;
                        }
                    }
                }
                Some(other) => {
                    output.push('\x1b');
                    output.push(other);
                }
                None => {
                    output.push('\x1b');
                }
            }
        } else {
            output.push(c);
        }
    }
    output
}

/// Strips farben markup tags from a string, returning plain text only.
///
/// Tokenizes `input` and strips all tokens recognized as tags. Invalid tags are left as-is without panicking.
///
/// # Example
/// ```
/// use farben_core::strip::strip_markup;
///
/// let stripped = strip_markup("[bold red]Just the text");
/// assert_eq!("Just the text", stripped);
///
/// let invalid = strip_markup("[I'm unclosed");
/// assert_eq!("[I'm unclosed", invalid);
/// ```
#[must_use]
pub fn strip_markup(input: &str) -> String {
    match crate::lexer::tokenize(input) {
        Ok(tokens) => tokens
            .into_iter()
            .filter_map(|t| match t {
                crate::lexer::Token::Text(s) => Some(s.into_owned()),
                crate::lexer::Token::Tag(crate::lexer::TagType::Prefix(s)) => Some(s),
                crate::lexer::Token::Tag(_) => None,
            })
            .collect(),
        Err(_) => input.to_owned(),
    }
}

/// Escapes farben markup brackets in `input` so they render as literal text.
///
/// Doubles every `[` and `]` character. The lexer treats `[[` as a literal `[`
/// and `]]` as a literal `]`, so the result contains no parseable tags.
///
/// # Example
/// ```
/// use farben_core::strip::escape_tags;
///
/// let safe = escape_tags("[bold]hello[/]");
/// assert_eq!(safe, "[[bold]]hello[[/]]");
/// ```
#[must_use]
pub fn escape_tags(input: &str) -> String {
    let mut output = String::with_capacity(input.len());
    for c in input.chars() {
        match c {
            '[' => output.push_str("[["),
            ']' => output.push_str("]]"),
            _ => output.push(c),
        }
    }
    output
}