llm-kernel 0.2.5

Foundation library for Rust AI-native apps — provider catalog, LLM client, MCP server, search, telemetry, and safety
Documentation
//! Output sanitization — secret masking and control character removal.

/// Mask known secret patterns in a string.
///
/// Handles `Bearer` tokens, `sk-*` API keys, and `password=`, `token=`,
/// `key=`, `secret=` values. All occurrences are masked.
pub fn mask_secrets(input: &str) -> String {
    let mut result = input.to_string();

    // Mask all Bearer tokens
    let mut search_from = 0;
    while let Some(rel_pos) = result[search_from..].find("Bearer ") {
        let value_start = search_from + rel_pos + "Bearer ".len();
        if value_start >= result.len() {
            break;
        }
        if let Some(value_end) = result[value_start..].find(|c: char| c.is_whitespace()) {
            result.replace_range(value_start..value_start + value_end, "****");
            search_from = value_start + 4;
        } else {
            result.replace_range(value_start.., "****");
            break;
        }
    }

    // Mask all password=, token=, key=, secret= values
    for prefix in &["password=", "token=", "key=", "secret="] {
        let mut search_from = 0;
        while let Some(rel_pos) = result[search_from..].find(prefix) {
            let value_start = search_from + rel_pos + prefix.len();
            if let Some(value_end) = result[value_start..].find(|c: char| c.is_whitespace()) {
                let end = value_start + value_end;
                result.replace_range(value_start..end, "****");
                search_from = value_start + 4;
            } else if value_start < result.len() {
                result.replace_range(value_start.., "****");
                break;
            } else {
                break;
            }
        }
    }

    // Mask standalone sk-* API keys
    let mut search_from = 0;
    while let Some(rel_pos) = result[search_from..].find("sk-") {
        let value_start = search_from + rel_pos;
        if let Some(value_end) = result[value_start..].find(|c: char| c.is_whitespace()) {
            result.replace_range(value_start..value_start + value_end, "****");
            search_from = value_start + 4;
        } else {
            result.replace_range(value_start.., "****");
            break;
        }
    }

    result
}

/// Remove ANSI escape sequences from text.
pub fn strip_ansi(input: &str) -> String {
    let mut result = String::with_capacity(input.len());
    let mut chars = input.chars().peekable();
    while let Some(ch) = chars.next() {
        if ch == '\x1B' {
            // Skip ESC and the following sequence
            if chars.peek() == Some(&'[') {
                chars.next(); // consume '['
                // Skip parameter bytes (0x30-0x3F), intermediate bytes (0x20-0x2F), final byte (0x40-0x7E)
                while let Some(&next) = chars.peek() {
                    let cp = next as u32;
                    if (0x30..=0x3F).contains(&cp) || (0x20..=0x2F).contains(&cp) {
                        chars.next();
                    } else if (0x40..=0x7E).contains(&cp) {
                        chars.next(); // consume final byte
                        break;
                    } else {
                        break;
                    }
                }
            }
            continue;
        }
        result.push(ch);
    }
    result
}

/// Sanitize output for safe display by removing:
///
/// - Bidi override characters (U+202A–U+202E) — invisible text direction attacks
/// - Plane-14 tag characters (U+E0000–U+E007F) — LLM injection vectors
/// - Line/paragraph separators (U+2028, U+2029)
/// - Null bytes (U+0000)
/// - C1 control characters (U+0080–U+009F) except common whitespace
pub fn sanitize_output(input: &str) -> String {
    input
        .chars()
        .filter(|c| {
            let cp = *c as u32;
            // Null byte
            if cp == 0 {
                return false;
            }
            // C1 controls (except \u{0085} NEL which is sometimes valid)
            if (0x80..=0x9F).contains(&cp) {
                return false;
            }
            // Bidi overrides
            if (0x202A..=0x202E).contains(&cp) {
                return false;
            }
            // Line/paragraph separators
            if cp == 0x2028 || cp == 0x2029 {
                return false;
            }
            // Plane-14 tags
            if (0xE0000..=0xE007F).contains(&cp) {
                return false;
            }
            true
        })
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn mask_bearer_token() {
        let input = "Authorization: Bearer sk-longtoken123456";
        let masked = mask_secrets(input);
        assert!(masked.contains("****"));
    }

    #[test]
    fn mask_password_value() {
        let input = "password=supersecret other=value";
        let masked = mask_secrets(input);
        assert!(masked.contains("password=****"));
        assert!(masked.contains("other=value"));
    }

    #[test]
    fn mask_token_value() {
        let masked = mask_secrets("token=abc123");
        assert!(masked.contains("token=****"));
    }

    #[test]
    fn mask_key_value() {
        let masked = mask_secrets("key=my-api-key-here");
        assert!(masked.contains("key=****"));
    }

    #[test]
    fn mask_secret_value() {
        let masked = mask_secrets("secret=data rest=ok");
        assert!(masked.contains("secret=****"));
        assert!(masked.contains("rest=ok"));
    }

    #[test]
    fn no_secrets_unchanged() {
        let input = "hello world 123";
        assert_eq!(mask_secrets(input), input);
    }

    #[test]
    fn mask_multiple_passwords() {
        let masked = mask_secrets("password=a password=b");
        assert!(masked.contains("password=****"), "got: {masked}");
    }

    #[test]
    fn mask_multiple_bearer() {
        let masked = mask_secrets("Bearer token1 and Bearer token2");
        assert_eq!(masked, "Bearer **** and Bearer ****");
    }

    #[test]
    fn mask_standalone_sk_key() {
        let masked = mask_secrets("key is sk-proj-abc123 here");
        assert!(masked.contains("****"), "got: {masked}");
        assert!(!masked.contains("sk-proj"), "got: {masked}");
    }

    #[test]
    fn sanitize_removes_bidi() {
        let input = "hello\u{202E}world";
        let clean = sanitize_output(input);
        assert_eq!(clean, "helloworld");
    }

    #[test]
    fn sanitize_removes_plane14() {
        let input = "text\u{E0001}hidden";
        let clean = sanitize_output(input);
        assert_eq!(clean, "texthidden");
    }

    #[test]
    fn sanitize_removes_null() {
        let clean = sanitize_output("a\u{0000}b");
        assert_eq!(clean, "ab");
    }

    #[test]
    fn sanitize_removes_line_sep() {
        let clean = sanitize_output("a\u{2028}b");
        assert_eq!(clean, "ab");
    }

    #[test]
    fn sanitize_preserves_normal() {
        let input = "Hello, 世界! 🎉";
        assert_eq!(sanitize_output(input), input);
    }

    #[test]
    fn sanitize_removes_c1_controls() {
        let clean = sanitize_output("a\u{0080}b\u{009F}c");
        assert_eq!(clean, "abc");
    }

    #[test]
    fn strip_ansi_removes_color_codes() {
        let input = "\x1B[31mHello\x1B[0m \x1B[1;32mWorld\x1B[0m";
        let clean = strip_ansi(input);
        assert_eq!(clean, "Hello World");
    }

    #[test]
    fn strip_ansi_preserves_plain_text() {
        let input = "Hello, 世界! 🎉";
        assert_eq!(strip_ansi(input), input);
    }

    #[test]
    fn strip_ansi_handles_complex_sequence() {
        // 256-color and RGB sequences
        let input = "\x1B[38;5;196mRed\x1B[0m \x1B[38;2;0;255;0mGreen\x1B[0m";
        let clean = strip_ansi(input);
        assert_eq!(clean, "Red Green");
    }
}