vtcode-core 0.100.3

Core library for VT Code - a Rust-based terminal coding agent
//! Unicode handling tests for PTY scrollback functionality
//!
//! This test file focuses specifically on testing the `push_utf8` function
//! in the PTY scrollback to identify unicode rendering errors.

// Note: PtyScrollback is not publicly exported, so we test it through
// the existing test infrastructure in the pty module itself.
// These tests are designed to be added to the pty.rs test module.

#[cfg(test)]
mod test_unicode_push_utf8 {
    // These tests would be added to the pty.rs test module since PtyScrollback is private
    // For now, we'll create a standalone test that demonstrates the unicode issues

    #[test]
    fn test_unicode_examples() {
        // This test demonstrates various unicode scenarios that should be tested
        // in the push_utf8 function. Since we can't access the private struct,
        // we document the test cases here for integration into the main codebase.

        // Basic ASCII - should work
        let ascii_text = "Hello World";
        assert_eq!(ascii_text, "Hello World");

        // Simple Unicode - should work
        let unicode_text = "Hello 世界";
        assert_eq!(unicode_text, "Hello 世界");

        // Emojis - 4-byte UTF-8 sequences
        let emoji_text = "🌍🚀✨";
        assert_eq!(emoji_text, "🌍🚀✨");

        // Accented characters - 2-byte UTF-8 sequences
        let accent_text = "café naïve";
        assert_eq!(accent_text, "café naïve");

        // CJK characters - 3-byte UTF-8 sequences
        let cjk_text = "你好世界 こんにちは 안녕하세요";
        assert_eq!(cjk_text, "你好世界 こんにちは 안녕하세요");

        // Mixed scripts
        let mixed_text = "English 中文 العربية русский";
        assert_eq!(mixed_text, "English 中文 العربية русский");

        // Test byte sequences that would cause issues if split
        // "é" is 0xC3 0xA9 in UTF-8
        let e_acute_bytes = [0xC3, 0xA9];
        let e_acute = String::from_utf8(e_acute_bytes.to_vec()).unwrap();
        assert_eq!(e_acute, "é");

        // Emoji "🌍" is 4 bytes: 0xF0 0x9F 0x8C 0x8D
        let earth_bytes = [0xF0, 0x9F, 0x8C, 0x8D];
        let earth = String::from_utf8(earth_bytes.to_vec()).unwrap();
        assert_eq!(earth, "🌍");

        // Invalid UTF-8 should be handled with replacement character
        let invalid_bytes = [0xFF, 0xFE];
        let invalid_string = String::from_utf8_lossy(&invalid_bytes);
        assert!(invalid_string.contains("\u{FFFD}")); // Replacement character

        println!("All unicode test examples passed!");
        println!("To properly test push_utf8, add these test cases to the test module in pty.rs:");
        println!();
        println!("```rust");
        println!("#[test]");
        println!("fn test_push_utf8_basic_unicode() {{");
        println!("    let mut scrollback = PtyScrollback::new(1000, 100_000);");
        println!("    let mut buffer = \"Hello 世界\".as_bytes().to_vec();");
        println!("    scrollback.push_utf8(&mut buffer, false);");
        println!("    let snapshot = scrollback.snapshot();");
        println!("    assert_eq!(snapshot, \"Hello 世界\");");
        println!("    assert!(buffer.is_empty());");
        println!("}}");
        println!();
        println!("#[test]");
        println!("fn test_push_utf8_emojis() {{");
        println!("    let mut scrollback = PtyScrollback::new(1000, 100_000);");
        println!("    let mut buffer = \"Hello 🌍🚀✨\".as_bytes().to_vec();");
        println!("    scrollback.push_utf8(&mut buffer, false);");
        println!("    let snapshot = scrollback.snapshot();");
        println!("    assert_eq!(snapshot, \"Hello 🌍🚀✨\");");
        println!("    assert!(buffer.is_empty());");
        println!("}}");
        println!();
        println!("#[test]");
        println!("fn test_push_utf8_split_utf8_across_buffers() {{");
        println!("    let mut scrollback = PtyScrollback::new(1000, 100_000);");
        println!("    // Test split UTF-8: \"é\" is 0xC3 0xA9, split it across two calls");
        println!("    let mut buffer1 = vec![0xC3]; // First byte of \"é\"");
        println!("    scrollback.push_utf8(&mut buffer1, false);");
        println!("    assert_eq!(buffer1, vec![0xC3]); // Should keep incomplete byte");
        println!("    // Add the second byte");
        println!("    let mut buffer2 = vec![0xA9, b' ', b'H']; // Second byte of \"é\" + \" H\"");
        println!("    scrollback.push_utf8(&mut buffer2, false);");
        println!("    let snapshot = scrollback.snapshot();");
        println!("    assert_eq!(snapshot, \"é H\");");
        println!("    assert!(buffer2.is_empty());");
        println!("}}");
        println!();
        println!("#[test]");
        println!("fn test_push_utf8_invalid_utf8_replacement() {{");
        println!("    let mut scrollback = PtyScrollback::new(1000, 100_000);");
        println!("    // Invalid UTF-8 sequence");
        println!("    let mut buffer = vec![0xFF, 0xFE, b'X'];");
        println!("    scrollback.push_utf8(&mut buffer, false);");
        println!("    let snapshot = scrollback.snapshot();");
        println!("    // Should contain replacement character (�) for invalid bytes");
        println!("    assert!(snapshot.contains(\"\\u{{FFFD}}\"));");
        println!("    assert!(snapshot.contains('X'));");
        println!("    assert!(buffer.is_empty());");
        println!("}}");
        println!("```");
    }

    #[test]
    fn test_unicode_edge_cases_documentation() {
        // Document edge cases that should be tested in push_utf8

        println!("Unicode edge cases to test in push_utf8:");
        println!("1. Split UTF-8 sequences across buffer boundaries");
        println!("   - 2-byte sequences like é (0xC3 0xA9)");
        println!("   - 3-byte sequences like 世 (0xE4 0B8 96)");
        println!("   - 4-byte sequences like 🌍 (0xF0 9F 8C 8D)");
        println!();
        println!("2. Invalid UTF-8 sequences");
        println!("   - Invalid continuation bytes");
        println!("   - Overlong encodings");
        println!("   - Surrogate code points");
        println!("   - Invalid start bytes");
        println!();
        println!("3. Edge conditions");
        println!("   - Empty buffer");
        println!("   - Buffer with only incomplete UTF-8 at EOF");
        println!("   - Mixed valid and invalid UTF-8");
        println!("   - Very long UTF-8 sequences");
        println!();
        println!("4. Character types");
        println!("   - Zero-width characters (\\u200B)");
        println!("   - Control characters");
        println!("   - Combining characters");
        println!("   - Right-to-left text");
    }
}

#[tokio::test]
async fn test_unicode_rendering_scenarios() {
    // This test demonstrates real-world scenarios where unicode rendering issues might occur

    let temp = tempfile::TempDir::new().expect("temp workspace");
    use vtcode_core::tools::ToolRegistry;

    let registry = ToolRegistry::new(temp.path().to_path_buf()).await;
    registry.allow_all_tools().await.ok();

    // Test 1: Command with unicode output
    println!("Testing unicode output from commands...");

    // Run the script and capture output
    let result = registry
        .execute_tool(
            "unified_exec",
            serde_json::json!({
                "action": "run",
                "command": "python3",
                "args": [
                    "-c",
                    "print('Testing unicode characters:'); print('  Emojis: 🌍🚀✨'); print('  CJK: 你好世界 こんにちは 안녕하세요'); print('  Accents: café naïve résumé'); print('  Mixed: English 中文 العربية русский')"
                ]
            }),
        )
        .await;

    match result {
        Ok(response) => {
            let output = response
                .get("output")
                .or_else(|| response.get("stdout"))
                .and_then(|value| value.as_str())
                .unwrap_or_default();
            println!("Command output:");
            println!("{}", output);

            // Check if unicode characters are preserved
            assert!(
                output.contains("🌍") || output.contains("🚀") || output.contains(""),
                "Should contain emojis"
            );
            assert!(
                output.contains("你好") || output.contains("世界"),
                "Should contain Chinese characters"
            );
            assert!(
                output.contains("café") || output.contains("naïve"),
                "Should contain accented characters"
            );

            println!("✓ Unicode characters preserved in PTY output");
        }
        Err(e) => {
            println!("Error running unicode test: {}", e);
            // Don't fail the test, just report the issue
        }
    }
    println!("Unicode rendering test completed.");
}