securegit 0.8.5

//! Compact output formatting for LLM/CI contexts.
//!
//! Token-optimized output algorithms adapted from RTK (MIT license).
//! See: <https://github.com/rtk-ai/rtk>

use std::fmt::Write;

// ── Utility Functions ───────────────────────────────────────────────────────

/// Truncate a string to `max_chars`, appending `"..."` if it exceeds the limit.
///
/// If the string is already within `max_chars`, it is returned unchanged.
/// When truncated, the result is exactly `max_chars` characters long (including
/// the three-character `"..."` suffix), so `max_chars` must be >= 3.
pub fn truncate_line(s: &str, max_chars: usize) -> String {
    debug_assert!(
        max_chars >= 3,
        "max_chars must be >= 3 to fit the \"...\" suffix"
    );
    if s.chars().count() <= max_chars {
        s.to_string()
    } else {
        let truncated: String = s.chars().take(max_chars.saturating_sub(3)).collect();
        format!("{}...", truncated)
    }
}

/// Estimate the number of LLM tokens in `text`.
///
/// Uses the simple heuristic of ceil(bytes / 4).
pub fn estimate_tokens(text: &str) -> usize {
    text.len().div_ceil(4)
}

/// Format a token count for display: `"500"`, `"1.5K"`, `"1.5M"`.
pub fn format_tokens(n: usize) -> String {
    if n >= 1_000_000 {
        let v = n as f64 / 1_000_000.0;
        if (v.fract()) < 0.05 {
            format!("{:.0}M", v)
        } else {
            format!("{:.1}M", v)
        }
    } else if n >= 1_000 {
        let v = n as f64 / 1_000.0;
        if (v.fract()) < 0.05 {
            format!("{:.0}K", v)
        } else {
            format!("{:.1}K", v)
        }
    } else {
        format!("{n}")
    }
}

// ── Compact Status ──────────────────────────────────────────────────────────

/// Format a compact, token-optimized status string.
///
/// Output examples:
/// - Clean: `"main\nClean"`
/// - With changes: `"main\nStaged: 3 (file1, file2, file3)\nModified: 1 (file4)"`
///
/// File lists are capped: staged shows first 5, unstaged first 5, untracked
/// first 3, with `"... +N more"` when truncated.
pub fn format_compact_status(
    branch: &str,
    staged: &[(&str, String)],
    unstaged: &[(&str, String)],
    untracked: &[String],
) -> String {
    let mut out = String::from(branch);

    if staged.is_empty() && unstaged.is_empty() && untracked.is_empty() {
        out.push_str("\nClean");
        return out;
    }

    if !staged.is_empty() {
        write!(out, "\nStaged: {} (", staged.len()).unwrap();
        let show = staged.len().min(5);
        for (i, (_, path)) in staged[..show].iter().enumerate() {
            if i > 0 {
                out.push_str(", ");
            }
            out.push_str(path);
        }
        if staged.len() > 5 {
            write!(out, ", ... +{} more", staged.len() - 5).unwrap();
        }
        out.push(')');
    }

    if !unstaged.is_empty() {
        write!(out, "\nModified: {} (", unstaged.len()).unwrap();
        let show = unstaged.len().min(5);
        for (i, (_, path)) in unstaged[..show].iter().enumerate() {
            if i > 0 {
                out.push_str(", ");
            }
            out.push_str(path);
        }
        if unstaged.len() > 5 {
            write!(out, ", ... +{} more", unstaged.len() - 5).unwrap();
        }
        out.push(')');
    }

    if !untracked.is_empty() {
        write!(out, "\nUntracked: {} (", untracked.len()).unwrap();
        let show = untracked.len().min(3);
        for (i, path) in untracked[..show].iter().enumerate() {
            if i > 0 {
                out.push_str(", ");
            }
            out.push_str(path);
        }
        if untracked.len() > 3 {
            write!(out, ", ... +{} more", untracked.len() - 3).unwrap();
        }
        out.push(')');
    }

    out
}

// ── Compact Diff Formatter ──────────────────────────────────────────────────

/// Stateful formatter that accumulates compact diff output.
///
/// Processes diff callbacks (file, hunk, line) and produces a token-optimized
/// representation suitable for LLM contexts.
///
/// Output format:
/// ```text
///   path/to/file.rs
///     @@ -1,3 +1,4 @@
///     +new line
///     -old line
///     ... (truncated)
///     +1 -1
///   path/to/other.rs
///     ...
/// ... (more changes truncated)
/// ```
pub struct CompactDiffFormatter {
    /// Maximum changed lines to emit per hunk before truncating.
    max_hunk_lines: usize,
    /// Maximum total output lines across all files.
    max_total_lines: usize,

    // ── Internal state ──────────────────────────────────────────────────
    output: String,
    total_lines: usize,
    total_truncated: bool,

    // Per-file tracking
    current_file: Option<String>,
    file_added: usize,
    file_removed: usize,

    // Per-hunk tracking
    hunk_lines: usize,
    hunk_truncated: bool,
}

impl CompactDiffFormatter {
    /// Create a new formatter with default limits.
    ///
    /// Defaults: `max_hunk_lines = 10`, `max_total_lines = 100`.
    pub fn new() -> Self {
        Self {
            max_hunk_lines: 10,
            max_total_lines: 100,
            output: String::new(),
            total_lines: 0,
            total_truncated: false,
            current_file: None,
            file_added: 0,
            file_removed: 0,
            hunk_lines: 0,
            hunk_truncated: false,
        }
    }

    /// Check whether we have exceeded the total output line budget.
    fn over_budget(&self) -> bool {
        self.total_truncated || self.total_lines >= self.max_total_lines
    }

    /// Flush the summary line (`+N -M`) for the current file, if any.
    fn flush_file_summary(&mut self) {
        if self.current_file.is_some() && (self.file_added > 0 || self.file_removed > 0) {
            let _ = writeln!(
                self.output,
                "    +{} -{}",
                self.file_added, self.file_removed
            );
            self.total_lines += 1;
        }
    }

    /// Start a new file section. Flushes the previous file's summary first.
    pub fn begin_file(&mut self, path: &str) {
        self.flush_file_summary();

        if self.over_budget() {
            if !self.total_truncated {
                self.total_truncated = true;
                let _ = writeln!(self.output, "... (more changes truncated)");
            }
            return;
        }

        let _ = writeln!(self.output, "  {path}");
        self.total_lines += 1;
        self.current_file = Some(path.to_string());
        self.file_added = 0;
        self.file_removed = 0;
        self.hunk_lines = 0;
        self.hunk_truncated = false;
    }

    /// Start a new hunk within the current file.
    pub fn begin_hunk(&mut self, header: &str) {
        if self.over_budget() {
            return;
        }

        let _ = writeln!(self.output, "    {}", header.trim_end());
        self.total_lines += 1;
        self.hunk_lines = 0;
        self.hunk_truncated = false;
    }

    /// Process a single diff line.
    ///
    /// `origin` is one of `'+'`, `'-'`, or `' '` (context).
    /// Only changed lines (`+`/`-`) count toward the per-hunk limit.
    pub fn add_line(&mut self, origin: char, content: &str) {
        if self.over_budget() {
            return;
        }

        match origin {
            '+' => self.file_added += 1,
            '-' => self.file_removed += 1,
            _ => {}
        }

        // Context lines are not counted toward the hunk limit but we still
        // skip them if the hunk was already truncated.
        let is_change = origin == '+' || origin == '-';

        if is_change {
            if self.hunk_lines >= self.max_hunk_lines {
                if !self.hunk_truncated {
                    self.hunk_truncated = true;
                    let _ = writeln!(self.output, "    ... (truncated)");
                    self.total_lines += 1;
                }
                return;
            }
            self.hunk_lines += 1;
        } else if self.hunk_truncated {
            // Skip context lines after truncation within the same hunk.
            return;
        }

        let line = content.trim_end_matches('\n').trim_end_matches('\r');
        let _ = writeln!(self.output, "    {origin}{line}");
        self.total_lines += 1;
    }

    /// Finalize and return the complete compact diff string.
    pub fn finish(mut self) -> String {
        self.flush_file_summary();
        self.output
    }
}

impl Default for CompactDiffFormatter {
    fn default() -> Self {
        Self::new()
    }
}

// ── Tests ───────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    // ── truncate_line ───────────────────────────────────────────────────

    #[test]
    fn test_truncate_line_short() {
        assert_eq!(truncate_line("hello", 10), "hello");
        assert_eq!(truncate_line("abc", 3), "abc");
        assert_eq!(truncate_line("", 5), "");
    }

    #[test]
    fn test_truncate_line_long() {
        assert_eq!(truncate_line("hello world", 8), "hello...");
        assert_eq!(truncate_line("abcdefghij", 6), "abc...");
        // Exact boundary: 10 chars, max 10 => no truncation
        assert_eq!(truncate_line("0123456789", 10), "0123456789");
        // 11 chars, max 10 => truncate
        assert_eq!(truncate_line("0123456789A", 10), "0123456...");
    }

    #[test]
    fn test_truncate_line_unicode() {
        let s = "日本語のファイル名.rs something else padding text here";
        let result = truncate_line(&s, 20);
        assert!(result.chars().count() <= 20);
        assert!(result.ends_with("..."));
    }

    // ── estimate_tokens ─────────────────────────────────────────────────

    #[test]
    fn test_estimate_tokens() {
        assert_eq!(estimate_tokens(""), 0);
        assert_eq!(estimate_tokens("a"), 1);
        assert_eq!(estimate_tokens("ab"), 1);
        assert_eq!(estimate_tokens("abc"), 1);
        assert_eq!(estimate_tokens("abcd"), 1);
        assert_eq!(estimate_tokens("abcde"), 2);
        assert_eq!(estimate_tokens("abcdefgh"), 2); // 8 bytes => 2 tokens
        assert_eq!(estimate_tokens("abcdefghi"), 3); // 9 bytes => ceil(9/4) = 3
    }

    // ── format_tokens ───────────────────────────────────────────────────

    #[test]
    fn test_format_tokens() {
        assert_eq!(format_tokens(0), "0");
        assert_eq!(format_tokens(500), "500");
        assert_eq!(format_tokens(999), "999");
        assert_eq!(format_tokens(1000), "1K");
        assert_eq!(format_tokens(1500), "1.5K");
        assert_eq!(format_tokens(10_000), "10K");
        assert_eq!(format_tokens(1_000_000), "1M");
        assert_eq!(format_tokens(1_500_000), "1.5M");
        assert_eq!(format_tokens(2_300_000), "2.3M");
    }

    // ── format_compact_status ───────────────────────────────────────────

    #[test]
    fn test_compact_status_clean() {
        let result = format_compact_status("main", &[], &[], &[]);
        assert_eq!(result, "main\nClean");
    }

    #[test]
    fn test_compact_status_with_changes() {
        let staged: Vec<(&str, String)> = vec![
            ("new", "file1.rs".into()),
            ("modified", "file2.rs".into()),
            ("modified", "file3.rs".into()),
        ];
        let unstaged: Vec<(&str, String)> = vec![("modified", "file4.rs".into())];
        let untracked: Vec<String> = vec!["file5.rs".into(), "file6.rs".into()];

        let result = format_compact_status("main", &staged, &unstaged, &untracked);

        assert!(result.starts_with("main\n"));
        assert!(result.contains("Staged: 3 (file1.rs, file2.rs, file3.rs)"));
        assert!(result.contains("Modified: 1 (file4.rs)"));
        assert!(result.contains("Untracked: 2 (file5.rs, file6.rs)"));
    }

    #[test]
    fn test_compact_status_truncated_staged() {
        let staged: Vec<(&str, String)> =
            (1..=8).map(|i| ("modified", format!("f{i}.rs"))).collect();

        let result = format_compact_status("dev", &staged, &[], &[]);

        assert!(result.contains("Staged: 8 (f1.rs, f2.rs, f3.rs, f4.rs, f5.rs, ... +3 more)"));
    }

    #[test]
    fn test_compact_status_truncated_untracked() {
        let untracked: Vec<String> = (1..=6).map(|i| format!("new{i}.txt")).collect();

        let result = format_compact_status("main", &[], &[], &untracked);

        assert!(result.contains("Untracked: 6 (new1.txt, new2.txt, new3.txt, ... +3 more)"));
    }

    // ── CompactDiffFormatter ────────────────────────────────────────────

    #[test]
    fn test_compact_diff_formatter() {
        let mut fmt = CompactDiffFormatter::new();
        fmt.begin_file("src/main.rs");
        fmt.begin_hunk("@@ -1,3 +1,4 @@");
        fmt.add_line('+', "use std::io;\n");
        fmt.add_line(' ', "\n");
        fmt.add_line('-', "old line\n");
        fmt.add_line('+', "new line\n");

        let output = fmt.finish();

        assert!(output.contains("src/main.rs"));
        assert!(output.contains("@@ -1,3 +1,4 @@"));
        assert!(output.contains("+use std::io;"));
        assert!(output.contains("-old line"));
        assert!(output.contains("+new line"));
        assert!(output.contains("+2 -1"));
    }

    #[test]
    fn test_compact_diff_hunk_truncation() {
        let mut fmt = CompactDiffFormatter::new();
        fmt.begin_file("big_file.rs");
        fmt.begin_hunk("@@ -1,20 +1,20 @@");

        // Add 15 changed lines — first 10 should appear, rest truncated
        for i in 0..15 {
            fmt.add_line('+', &format!("line {i}\n"));
        }

        let output = fmt.finish();

        // First 10 lines should appear
        assert!(output.contains("+line 0"));
        assert!(output.contains("+line 9"));
        // Lines 10-14 should be truncated
        assert!(!output.contains("+line 10"));
        assert!(!output.contains("+line 14"));
        // Truncation marker should be present
        assert!(output.contains("... (truncated)"));
        // File summary should reflect all 15 additions
        assert!(output.contains("+15 -0"));
    }

    #[test]
    fn test_compact_diff_multiple_files() {
        let mut fmt = CompactDiffFormatter::new();

        fmt.begin_file("a.rs");
        fmt.begin_hunk("@@ -1,1 +1,2 @@");
        fmt.add_line('+', "added\n");

        fmt.begin_file("b.rs");
        fmt.begin_hunk("@@ -1,2 +1,1 @@");
        fmt.add_line('-', "removed\n");

        let output = fmt.finish();

        assert!(output.contains("a.rs"));
        assert!(output.contains("+1 -0"));
        assert!(output.contains("b.rs"));
        assert!(output.contains("+0 -1"));
    }

    #[test]
    fn test_compact_diff_total_truncation() {
        let mut fmt = CompactDiffFormatter {
            max_total_lines: 10,
            ..CompactDiffFormatter::new()
        };

        // First file: 5 lines of output (file header + hunk header + 3 lines)
        fmt.begin_file("first.rs");
        fmt.begin_hunk("@@ -1,3 +1,3 @@");
        fmt.add_line('+', "a\n");
        fmt.add_line('+', "b\n");
        fmt.add_line('+', "c\n");

        // Second file: pushes past 10 total lines
        fmt.begin_file("second.rs");
        fmt.begin_hunk("@@ -1,1 +1,5 @@");
        fmt.add_line('+', "d\n");
        fmt.add_line('+', "e\n");
        fmt.add_line('+', "f\n");
        fmt.add_line('+', "g\n");

        // Third file: should be entirely truncated
        fmt.begin_file("third.rs");
        fmt.begin_hunk("@@ -1,1 +1,1 @@");
        fmt.add_line('+', "x\n");

        let output = fmt.finish();

        assert!(output.contains("first.rs"));
        assert!(output.contains("second.rs"));
        assert!(output.contains("... (more changes truncated)"));
        // third.rs should not appear
        assert!(!output.contains("third.rs"));
    }

    // ── Additional edge case tests ────────────────────────────────────

    // truncate_line boundary tests

    #[test]
    fn test_truncate_line_exact_boundary() {
        // String exactly at max_width should NOT be truncated
        let s = "0123456789"; // 10 chars
        assert_eq!(truncate_line(s, 10), "0123456789");
    }

    #[test]
    fn test_truncate_line_one_over() {
        // String one char over max_width should truncate
        let s = "0123456789A"; // 11 chars
        let result = truncate_line(s, 10);
        assert_eq!(result, "0123456...");
        assert_eq!(result.chars().count(), 10);
    }

    #[test]
    fn test_truncate_line_width_zero() {
        // max_width=0: saturating_sub(3) => 0 chars + "..."
        // This is a degenerate case; the debug_assert requires >= 3
        // but in release mode it should still produce "..."
        let s = "hello";
        let result = truncate_line(s, 3);
        // 5 chars > 3 => truncate to 0 chars + "..." = "..."
        assert_eq!(result, "...");
    }

    #[test]
    fn test_truncate_line_width_three() {
        // max_width=3: just enough for "..."
        // "hello" (5 chars) > 3 => take(3-3=0) + "..." = "..."
        let result = truncate_line("hello", 3);
        assert_eq!(result, "...");

        // A 3-char string should NOT be truncated
        let result2 = truncate_line("abc", 3);
        assert_eq!(result2, "abc");
    }

    #[test]
    fn test_truncate_line_width_two() {
        // max_width=2 is below the "..." length (3).
        // saturating_sub(3) => 0, so result is "..."
        // The function produces "..." which is 3 chars, exceeding max_width=2.
        // This is a known degenerate case (debug_assert guards against it).
        // In release mode, we just verify it doesn't panic.
        let result = truncate_line("hello world", 3);
        // At max_width=3, "hello world" truncates to "..."
        assert_eq!(result, "...");
    }

    // estimate_tokens edge cases

    #[test]
    fn test_estimate_tokens_empty() {
        assert_eq!(estimate_tokens(""), 0);
    }

    #[test]
    fn test_estimate_tokens_one_byte() {
        // 1 byte => ceil(1/4) = 1 token
        assert_eq!(estimate_tokens("a"), 1);
    }

    #[test]
    fn test_estimate_tokens_four_bytes() {
        // Exactly 4 bytes => ceil(4/4) = 1 token
        assert_eq!(estimate_tokens("abcd"), 1);
    }

    #[test]
    fn test_estimate_tokens_five_bytes() {
        // 5 bytes => ceil(5/4) = 2 tokens
        assert_eq!(estimate_tokens("abcde"), 2);
    }

    // CompactDiffFormatter edge cases

    #[test]
    fn test_compact_diff_empty_diff() {
        // Formatter with no files added should produce empty output
        let fmt = CompactDiffFormatter::new();
        let output = fmt.finish();
        assert!(
            output.is_empty(),
            "empty diff should produce empty output, got: {:?}",
            output
        );
    }

    #[test]
    fn test_compact_diff_file_with_no_hunks() {
        // begin_file but no hunks or lines
        let mut fmt = CompactDiffFormatter::new();
        fmt.begin_file("lonely_file.rs");
        let output = fmt.finish();

        assert!(output.contains("lonely_file.rs"));
        // No summary line because file_added and file_removed are both 0
        assert!(!output.contains("+0 -0"), "should not emit +0 -0 summary");
    }

    #[test]
    fn test_compact_diff_binary_file() {
        // A "binary" file: begin_file is called, but no text hunks/lines follow.
        // This simulates a binary diff where we just note the file changed.
        let mut fmt = CompactDiffFormatter::new();
        fmt.begin_file("image.png");
        // No hunks or lines
        fmt.begin_file("code.rs");
        fmt.begin_hunk("@@ -1,1 +1,2 @@");
        fmt.add_line('+', "new line\n");

        let output = fmt.finish();

        assert!(output.contains("image.png"));
        assert!(output.contains("code.rs"));
        assert!(output.contains("+1 -0"));
    }

    // format_compact_status edge cases

    #[test]
    fn test_compact_status_all_types() {
        // All three categories present simultaneously
        let staged: Vec<(&str, String)> = vec![
            ("new", "added.rs".into()),
            ("modified", "changed.rs".into()),
        ];
        let unstaged: Vec<(&str, String)> = vec![
            ("modified", "dirty.rs".into()),
            ("deleted", "removed.rs".into()),
            ("modified", "touched.rs".into()),
        ];
        let untracked: Vec<String> = vec!["new1.txt".into(), "new2.txt".into()];

        let result = format_compact_status("feature/test", &staged, &unstaged, &untracked);

        assert!(result.starts_with("feature/test\n"));
        assert!(result.contains("Staged: 2 (added.rs, changed.rs)"));
        assert!(result.contains("Modified: 3 (dirty.rs, removed.rs, touched.rs)"));
        assert!(result.contains("Untracked: 2 (new1.txt, new2.txt)"));
        // Should NOT contain "Clean"
        assert!(!result.contains("Clean"));
    }

    #[test]
    fn test_compact_status_unicode_filenames() {
        let staged: Vec<(&str, String)> = vec![
            ("new", "\u{65e5}\u{672c}\u{8a9e}.rs".into()), // Japanese chars
        ];
        let untracked: Vec<String> = vec![
            "\u{00e9}m\u{00f8}ji.txt".into(), // accented chars
            "\u{1f600}.md".into(),            // emoji
        ];

        let result = format_compact_status("main", &staged, &[], &untracked);

        assert!(result.contains("\u{65e5}\u{672c}\u{8a9e}.rs"));
        assert!(result.contains("\u{00e9}m\u{00f8}ji.txt"));
        assert!(result.contains("\u{1f600}.md"));
        assert!(result.contains("Staged: 1"));
        assert!(result.contains("Untracked: 2"));
    }
}