collet 0.1.0 - Docs.rs

//! Message formatting utilities for remote platforms.
//!
//! Handles message splitting, code block boundary preservation,
//! tool call summaries, and ANSI stripping.

/// Split a long message into chunks that fit within `max_len` **characters**
/// (Unicode code points, not bytes — most platforms measure limits in chars).
///
/// Respects code block boundaries (```) — if a split occurs inside a code
/// block, the block is closed and re-opened in the next chunk.
///
/// For lines that individually exceed `max_len`, the line is split at natural
/// semantic boundaries in priority order:
///   1. CJK sentence endings (。！？…)
///   2. Western sentence endings (`. ` `! ` `? `)
///   3. CJK clause breaks (，、；)
///   4. Comma/semicolon followed by space
///   5. Space (word boundary for Latin text)
///   6. Character boundary (last resort)
pub fn split_message(text: &str, max_len: usize) -> Vec<String> {
    if text.chars().count() <= max_len {
        return vec![text.to_string()];
    }

    // Reserve a small safety margin so caller's hard limit is never exceeded.
    // Clamp to at least 1 so very small limits (tests, unusual platforms) don't
    // produce a zero effective_max.
    let safety_margin = 20usize.min(max_len.saturating_sub(1));
    let effective_max = (max_len - safety_margin).max(1);
    let mut chunks: Vec<String> = Vec::new();
    let mut current = String::new();
    let mut current_chars: usize = 0;
    let mut in_code_block = false;
    let mut code_lang = String::new();

    let flush = |current: &mut String,
                 current_chars: &mut usize,
                 chunks: &mut Vec<String>,
                 in_code_block: bool,
                 code_lang: &str| {
        if in_code_block {
            current.push_str("\n```");
        }
        chunks.push(std::mem::take(current));
        *current_chars = 0;
        if in_code_block {
            let header = format!("```{code_lang}\n");
            *current_chars = header.chars().count();
            *current = header;
        }
    };

    for line in text.lines() {
        let trimmed = line.trim();
        if trimmed.starts_with("```") {
            if in_code_block {
                in_code_block = false;
            } else {
                in_code_block = true;
                code_lang = trimmed.strip_prefix("```").unwrap_or("").to_string();
            }
        }

        let line_chars = line.chars().count();

        if line_chars > effective_max {
            // Line is too long on its own — flush buffer, then split the line.
            if current_chars > 0 {
                flush(
                    &mut current,
                    &mut current_chars,
                    &mut chunks,
                    in_code_block,
                    &code_lang,
                );
            }
            for part in split_long_line(line, effective_max) {
                let part_chars = part.chars().count();
                let needed = part_chars + usize::from(current_chars > 0);
                if current_chars + needed > effective_max && current_chars > 0 {
                    flush(
                        &mut current,
                        &mut current_chars,
                        &mut chunks,
                        in_code_block,
                        &code_lang,
                    );
                }
                if current_chars > 0 {
                    current.push('\n');
                    current_chars += 1;
                }
                current.push_str(&part);
                current_chars += part_chars;
            }
            continue;
        }

        // Normal line: does it fit in the current chunk?
        let needed = line_chars + usize::from(current_chars > 0);
        if current_chars + needed > effective_max && current_chars > 0 {
            flush(
                &mut current,
                &mut current_chars,
                &mut chunks,
                in_code_block,
                &code_lang,
            );
        }

        if current_chars > 0 {
            current.push('\n');
            current_chars += 1;
        }
        current.push_str(line);
        current_chars += line_chars;
    }

    if !current.is_empty() {
        chunks.push(current);
    }

    chunks
}

/// Split a single line that exceeds `max_chars` at natural semantic boundaries.
fn split_long_line(line: &str, max_chars: usize) -> Vec<String> {
    let chars: Vec<char> = line.chars().collect();
    let total = chars.len();
    if total <= max_chars {
        return vec![line.to_string()];
    }

    let mut parts: Vec<String> = Vec::new();
    let mut start = 0;

    while start < total {
        let end = (start + max_chars).min(total);
        if end == total {
            parts.push(chars[start..end].iter().collect());
            break;
        }

        // Search up to 100 chars back from `end` for a natural break.
        let window_start = end.saturating_sub(100).max(start + 1);
        let break_at = find_natural_break(&chars[window_start..end])
            .map(|rel| window_start + rel + 1) // inclusive: break *after* the break char
            .unwrap_or(end);

        parts.push(chars[start..break_at].iter().collect());
        start = break_at;
    }

    parts
}

/// Find the best break position within a char window, scanning from the end.
/// Returns the index of the break character, or `None` if no natural break exists.
fn find_natural_break(chars: &[char]) -> Option<usize> {
    // 1. CJK sentence endings — strongest semantic break
    for i in (0..chars.len()).rev() {
        if matches!(chars[i], '。' | '！' | '？' | '…' | '⋯') {
            return Some(i);
        }
    }
    // 2. Western sentence endings followed by space/end
    for i in (0..chars.len()).rev() {
        if matches!(chars[i], '.' | '!' | '?') {
            let next = chars.get(i + 1);
            if next.is_none() || next == Some(&' ') || next == Some(&'\t') {
                return Some(i);
            }
        }
    }
    // 3. CJK clause breaks
    for i in (0..chars.len()).rev() {
        if matches!(chars[i], '，' | '、' | '；') {
            return Some(i);
        }
    }
    // 4. Western comma/semicolon followed by space
    for i in (0..chars.len()).rev() {
        if matches!(chars[i], ',' | ';') {
            let next = chars.get(i + 1);
            if next.is_none() || next == Some(&' ') {
                return Some(i);
            }
        }
    }
    // 5. Space (word boundary for Latin / mixed text)
    (0..chars.len()).rev().find(|&i| chars[i] == ' ')
}

/// Generate a one-line tool call summary.
///
/// Enhanced with tool-specific formatting inspired by remotecode's format.ts:
///   - Bash: detect language from command prefix for syntax highlighting hint
///   - file_read/write/edit: show the target path
///   - search: show the pattern
///   - skill/subagent: show the target name
pub fn tool_call_summary(name: &str, args: &str, max_len: usize) -> String {
    let detail = match name {
        "bash" => {
            let cmd = extract_json_field(args, "command").unwrap_or_default();
            // Detect language for better display (remotecode pattern)
            let lang = detect_bash_lang(&cmd);
            if lang != "bash" {
                format!("[{lang}] {}", truncate(&cmd, 150))
            } else {
                truncate(&cmd, 150)
            }
        }
        "file_read" | "file_write" | "file_edit" => {
            extract_json_field(args, "path").unwrap_or_default()
        }
        "search" => extract_json_field(args, "pattern")
            .map(|p| format!("/{p}/"))
            .unwrap_or_default(),
        "skill" => extract_json_field(args, "skill")
            .unwrap_or_else(|| extract_json_field(args, "name").unwrap_or_default()),
        "subagent" => extract_json_field(args, "prompt")
            .map(|p| truncate(&p, 80))
            .unwrap_or_default(),
        _ => {
            if args.len() > 60 {
                format!("{}...", truncate(args, 60))
            } else {
                args.to_string()
            }
        }
    };

    let full = format!("🔧 {name}: {detail}");
    truncate(&full, max_len)
}

/// Detect the programming language from a shell command prefix.
/// Inspired by remotecode's detectBashLang (format.ts).
pub fn detect_bash_lang(command: &str) -> &'static str {
    let cmd = command.trim_start();
    if cmd.starts_with("python3 ") || cmd.starts_with("python ") {
        "python"
    } else if cmd.starts_with("node ") {
        "javascript"
    } else if cmd.starts_with("ruby ") {
        "ruby"
    } else if cmd.starts_with("go ") {
        "go"
    } else if cmd.starts_with("cargo ") || cmd.starts_with("rustc ") {
        "rust"
    } else if cmd.starts_with("swift ") || cmd.starts_with("swiftc ") {
        "swift"
    } else if cmd.starts_with("java ")
        || cmd.starts_with("javac ")
        || cmd.starts_with("gradle ")
        || cmd.starts_with("mvn ")
    {
        "java"
    } else {
        "bash"
    }
}

/// Format a response for display (strip ANSI, trim).
pub fn format_response(text: &str) -> String {
    strip_ansi(text.trim())
}

/// Format a plan with a header.
pub fn format_plan(plan: &str) -> String {
    format!("📋 **Plan**\n\n{}", strip_ansi(plan.trim()))
}

/// Format a status message.
///
/// Inspired by remotecode's session-state.ts pattern — shows rich context
/// including queue depth, streaming level, and workspace scope.
#[allow(clippy::too_many_arguments)]
pub fn format_status(
    session_id: &str,
    project: &str,
    model: &str,
    busy: bool,
    queue_len: usize,
    streaming: &str,
    workspace: &str,
    suppressed: bool,
) -> String {
    let state = if busy {
        if suppressed {
            "🟡 Cancelling…"
        } else {
            "🟢 Running"
        }
    } else {
        "⚪ Idle"
    };
    let short_id: String = session_id.chars().take(8).collect();
    let short_project = shorten_path_for_display(project);
    let queue_info = if queue_len > 0 {
        format!("\n  Queued: {queue_len} message(s)")
    } else {
        String::new()
    };
    format!(
        "**Status**\n\
          Session: `{short_id}`\n\
          Project: `{short_project}`\n\
          Model: `{model}`\n\
          Streaming: `{streaming}`\n\
          Workspace: `{workspace}`\n\
          State: {state}{queue_info}",
    )
}

/// Shorten a path for display — last 2 components (e.g. `~/projects/app`).
pub fn shorten_path_for_display(path: &str) -> String {
    let home = dirs::home_dir().map(|h| h.to_string_lossy().to_string());
    let display = match &home {
        Some(h) if path.starts_with(h.as_str()) => {
            format!("~{}", &path[h.len()..])
        }
        _ => path.to_string(),
    };
    // Show last 2 components for readability
    let parts: Vec<&str> = display.split('/').filter(|s| !s.is_empty()).collect();
    if parts.len() <= 2 {
        display
    } else {
        format!("~/…/{}", parts[parts.len() - 2..].join("/"))
    }
}

/// Format a timestamp (ISO 8601) as a human-readable relative time.
/// Inspired by remotecode's formatTimeAgo (session-ui.ts).
pub fn format_time_ago(timestamp: &str) -> String {
    // Parse the timestamp (expecting ISO 8601 like "2025-01-15T12:30:00+00:00")
    let then = chrono::DateTime::parse_from_rfc3339(timestamp.trim())
        .ok()
        .map(|dt| dt.to_utc().timestamp())
        .unwrap_or(0);

    if then == 0 {
        // Fallback: show raw truncated timestamp
        return timestamp.chars().take(16).collect();
    }

    let now = chrono::Utc::now().timestamp();
    let diff = now - then;

    if diff < 0 {
        return "just now".to_string();
    }
    if diff < 60 {
        return "just now".to_string();
    }
    if diff < 3600 {
        return format!("{}m ago", diff / 60);
    }
    if diff < 86400 {
        return format!("{}h ago", diff / 3600);
    }
    format!("{}d ago", diff / 86400)
}

/// Truncate a string to at most `max` **characters**, appending "...".
///
/// Uses char-boundary-safe slicing so multi-byte characters (CJK, emoji)
/// are never split.
pub fn truncate(s: &str, max: usize) -> String {
    if s.chars().count() <= max {
        return s.to_string();
    }
    let end = max.saturating_sub(3);
    let truncated: String = s.chars().take(end).collect();
    format!("{truncated}...")
}

/// Strip ANSI escape codes from a string.
pub fn strip_ansi(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    let mut chars = s.chars().peekable();
    while let Some(ch) = chars.next() {
        if ch == '\x1b' {
            // Skip ESC sequence
            if chars.peek() == Some(&'[') {
                chars.next(); // consume '['
                // Consume until a letter is found
                for c in chars.by_ref() {
                    if c.is_ascii_alphabetic() {
                        break;
                    }
                }
            }
        } else {
            out.push(ch);
        }
    }
    out
}

/// Extract a JSON field value (simple string extraction without full parse).
fn extract_json_field(json: &str, field: &str) -> Option<String> {
    let needle = format!("\"{}\"", field);
    let pos = json.find(&needle)?;
    let after_key = &json[pos + needle.len()..];
    // Skip optional whitespace and colon
    let after_colon = after_key.trim_start().strip_prefix(':')?;
    let after_colon = after_colon.trim_start();

    if let Some(content) = after_colon.strip_prefix('"') {
        // String value — find closing quote (handle escaped quotes).
        // Accumulate byte length char-by-char to stay on char boundaries.
        let mut end = 0;
        let mut escaped = false;
        for ch in content.chars() {
            if escaped {
                escaped = false;
            } else if ch == '\\' {
                escaped = true;
            } else if ch == '"' {
                break;
            }
            end += ch.len_utf8();
        }
        Some(content[..end].to_string())
    } else {
        // Non-string value (number, bool, null).
        // find() on ASCII chars always returns valid UTF-8 char boundaries.
        let end = after_colon
            .find([',', '}', ']'])
            .unwrap_or(after_colon.len());
        Some(after_colon[..end].trim().to_string())
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

/// Format a brief completion summary shown after agent finishes.
/// Inspired by remotecode's sendFinalResponse pattern — gives users
/// a clear signal that the task is done with key metrics.
pub fn format_completion_summary(message_count: usize, elapsed_secs: u64) -> String {
    let time_str = if elapsed_secs < 60 {
        format!("{elapsed_secs}s")
    } else {
        format!("{}m {}s", elapsed_secs / 60, elapsed_secs % 60)
    };
    format!("✅ **Done** — {message_count} messages · {time_str}")
}

/// Format an error message for display in remote chat.
/// Strips SDK-internal error prefixes for cleaner user-facing messages.
/// Inspired by remotecode's rewriteSdkError pattern.
pub fn format_user_error(error: &str) -> String {
    let msg = error.trim();

    // Strip common SDK error prefixes
    let cleaned = msg
        .strip_prefix("Error: ")
        .unwrap_or(msg)
        .strip_prefix("API error: ")
        .unwrap_or(msg);

    // Truncate long errors
    if cleaned.chars().count() > 500 {
        format!("❌ {}…", truncate(cleaned, 497))
    } else {
        format!("❌ {cleaned}")
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn split_short_message() {
        let chunks = split_message("hello", 100);
        assert_eq!(chunks.len(), 1);
        assert_eq!(chunks[0], "hello");
    }

    #[test]
    fn split_long_message() {
        let text = "line1\nline2\nline3\nline4";
        let chunks = split_message(text, 15);
        assert!(chunks.len() > 1);
    }

    #[test]
    fn split_preserves_code_blocks() {
        let text = "before\n```rust\nfn main() {}\nlet x = 1;\nlet y = 2;\n```\nafter";
        let chunks = split_message(text, 40);
        assert!(chunks.len() > 1, "should split into multiple chunks");
        // Reassembled content should contain all original lines
        let rejoined = chunks.join("\n");
        assert!(rejoined.contains("fn main()"));
        assert!(rejoined.contains("let y = 2;"));
        assert!(rejoined.contains("after"));
    }

    #[test]
    fn truncate_utf8_safe() {
        // 15 Korean chars — truncating to 10 chars should produce valid UTF-8 + "..."
        let s = "가나다라마바사아자차카타파하히";
        let t = truncate(s, 10);
        assert!(
            std::str::from_utf8(t.as_bytes()).is_ok(),
            "must be valid UTF-8"
        );
        assert!(t.ends_with("..."), "must end with ellipsis, got: {t:?}");
        assert!(t.chars().count() <= 10, "must not exceed max chars");
    }

    #[test]
    fn split_long_korean_paragraph() {
        // Long Korean text with no newlines — must be split at sentence boundary
        let text = "안녕하세요. 오늘은 좋은 날씨입니다. 하늘이 맑고 바람이 시원합니다. 공원에 산책을 나가고 싶네요. 꽃도 피고 새도 울고 정말 봄 같은 날씨입니다.";
        // Force split at 30 chars
        let chunks = split_message(text, 30);
        // All content should be preserved
        let rejoined: String = chunks.join("");
        // All original sentences should be present somewhere in the output
        assert!(rejoined.contains("안녕하세요"));
        assert!(rejoined.contains("봄 같은 날씨입니다"));
        // Every chunk must be valid UTF-8
        for chunk in chunks {
            assert!(std::str::from_utf8(chunk.as_bytes()).is_ok());
        }
    }

    #[test]
    fn split_char_count_not_bytes() {
        // 10 Korean chars = 30 bytes; should NOT be split at max_len=15 chars
        let text = "가나다라마바사아자차"; // 10 chars
        let chunks = split_message(text, 15);
        assert_eq!(
            chunks.len(),
            1,
            "10-char Korean text should fit in 15-char limit"
        );
    }

    #[test]
    fn split_at_cjk_sentence_boundary() {
        let text = "첫 번째 문장입니다。두 번째 문장입니다。세 번째 문장입니다。";
        // Force split smaller than each sentence pair
        let chunks = split_long_line(text, 15);
        // Each chunk should be valid UTF-8
        for chunk in &chunks {
            assert!(std::str::from_utf8(chunk.as_bytes()).is_ok());
        }
        // First chunk should end at 。
        assert!(
            chunks[0].ends_with('。'),
            "should break at CJK sentence end, got: {:?}",
            chunks[0]
        );
    }

    #[test]
    fn tool_summary_bash() {
        let args = r#"{"command":"ls -la","timeout":5000}"#;
        let summary = tool_call_summary("bash", args, 100);
        assert!(summary.contains("ls -la"));
    }

    #[test]
    fn strip_ansi_codes() {
        let s = "\x1b[31mred\x1b[0m normal";
        assert_eq!(strip_ansi(s), "red normal");
    }

    #[test]
    fn format_time_ago_recent() {
        let now = chrono::Utc::now().to_rfc3339();
        assert_eq!(format_time_ago(&now), "just now");
    }

    #[test]
    fn format_time_ago_minutes() {
        let five_min_ago = (chrono::Utc::now() - chrono::Duration::minutes(5)).to_rfc3339();
        let result = format_time_ago(&five_min_ago);
        assert!(result.contains("m ago"), "expected 'Xm ago', got: {result}");
    }

    #[test]
    fn format_time_ago_hours() {
        let three_hours_ago = (chrono::Utc::now() - chrono::Duration::hours(3)).to_rfc3339();
        let result = format_time_ago(&three_hours_ago);
        assert!(result.contains("h ago"), "expected 'Xh ago', got: {result}");
    }

    #[test]
    fn format_time_ago_days() {
        let two_days_ago = (chrono::Utc::now() - chrono::Duration::days(2)).to_rfc3339();
        let result = format_time_ago(&two_days_ago);
        assert!(result.contains("d ago"), "expected 'Xd ago', got: {result}");
    }

    #[test]
    fn format_completion_summary_brief() {
        let summary = format_completion_summary(5, 30);
        assert!(summary.contains("Done"));
        assert!(summary.contains("5 messages"));
        assert!(summary.contains("30s"));
    }

    #[test]
    fn format_completion_summary_minutes() {
        let summary = format_completion_summary(10, 125);
        assert!(summary.contains("2m 5s"));
    }

    #[test]
    fn format_user_error_short() {
        let result = format_user_error("Something went wrong");
        assert!(result.starts_with("❌"));
        assert!(result.contains("Something went wrong"));
    }

    #[test]
    fn format_user_error_strips_prefix() {
        let result = format_user_error("Error: API error: timeout");
        assert!(
            !result.contains("Error: API error:"),
            "should strip prefixes"
        );
        assert!(result.contains("timeout"));
    }

    #[test]
    fn detect_bash_lang_various() {
        assert_eq!(detect_bash_lang("python3 script.py"), "python");
        assert_eq!(detect_bash_lang("node app.js"), "javascript");
        assert_eq!(detect_bash_lang("cargo build"), "rust");
        assert_eq!(detect_bash_lang("ls -la"), "bash");
        assert_eq!(detect_bash_lang("go run ."), "go");
    }

    #[test]
    fn shorten_path_display() {
        let result = shorten_path_for_display("/Users/test/projects/my-app");
        assert!(
            result.contains("my-app") || result.contains("/"),
            "got: {result}"
        );
    }
}