lha 1.0.6

Long-Horizon Agent command-line package that installs the lha binary.
Documentation
use std::collections::BTreeSet;

use super::StrategyOutput;
use crate::product::agent::input_slimming::InputSlimmingStrategy;
use crate::product::agent::truncate::approx_token_count;

const CHANGED_LINES_PER_HUNK_EDGE: usize = 4;
const MAX_KEYWORD_LINES_PER_HUNK: usize = 8;
const MAX_RENDERED_BLOCKS: usize = 120;

#[derive(Debug, Clone, Default)]
struct DiffFile {
    header_indices: Vec<usize>,
    hunks: Vec<DiffHunk>,
    is_binary: bool,
}

#[derive(Debug, Clone)]
struct DiffHunk {
    header_index: usize,
    changed_lines: Vec<ChangedLine>,
}

#[derive(Debug, Clone)]
struct ChangedLine {
    index: usize,
    is_critical: bool,
}

pub(super) fn diff_compact(text: &str) -> Option<StrategyOutput> {
    if approx_token_count(text) < 2_048
        || text.contains("GIT binary patch")
        || text.contains("Binary files")
    {
        return None;
    }
    let has_diff_signal = text.contains("diff --git")
        || (text.contains("--- ") && text.contains("+++ ") && text.contains("@@"));
    if !has_diff_signal {
        return None;
    }

    let lines = text.lines().collect::<Vec<_>>();
    let files = parse_diff_files(&lines);
    if files.is_empty() || files.iter().all(|file| file.hunks.is_empty()) {
        return None;
    }
    if files.iter().any(|file| file.is_binary) {
        return None;
    }

    let mut selected = BTreeSet::new();
    let mut hunks = 0usize;
    let mut changed_lines = 0usize;
    for file in &files {
        selected.extend(file.header_indices.iter().copied());
        for hunk in &file.hunks {
            hunks += 1;
            changed_lines += hunk.changed_lines.len();
            selected.insert(hunk.header_index);
            select_hunk_lines(&mut selected, hunk);
        }
    }

    let mut body = format!(
        "Input Slimming diff summary: original_lines={}, files={}, hunks={}, changed_lines={}, kept_lines={}, omitted_lines={}\n",
        lines.len(),
        files.len(),
        hunks,
        changed_lines,
        selected.len(),
        lines.len().saturating_sub(selected.len())
    );
    append_diff_selected_lines(&mut body, &lines, &selected);
    Some(StrategyOutput {
        strategy: InputSlimmingStrategy::DiffCompact,
        body,
    })
}

fn parse_diff_files(lines: &[&str]) -> Vec<DiffFile> {
    let mut files = Vec::new();
    let mut current_file: Option<DiffFile> = None;
    let mut current_hunk: Option<DiffHunk> = None;

    for (idx, line) in lines.iter().enumerate() {
        if line.starts_with("diff --git ") {
            flush_hunk(&mut current_file, &mut current_hunk);
            if let Some(file) = current_file.take() {
                files.push(file);
            }
            current_file = Some(DiffFile {
                header_indices: vec![idx],
                hunks: Vec::new(),
                is_binary: false,
            });
            continue;
        }

        let file = current_file.get_or_insert_with(DiffFile::default);
        if line.starts_with("--- ")
            || line.starts_with("+++ ")
            || line.starts_with("index ")
            || line.starts_with("rename ")
            || line.starts_with("new file mode ")
            || line.starts_with("deleted file mode ")
        {
            file.header_indices.push(idx);
            continue;
        }
        if line.starts_with("Binary files") || line.starts_with("GIT binary patch") {
            file.is_binary = true;
            continue;
        }
        if line.starts_with("@@") {
            flush_hunk(&mut current_file, &mut current_hunk);
            current_hunk = Some(DiffHunk {
                header_index: idx,
                changed_lines: Vec::new(),
            });
            continue;
        }
        if let Some(hunk) = current_hunk.as_mut()
            && (line.starts_with('+') || line.starts_with('-'))
            && !line.starts_with("+++")
            && !line.starts_with("---")
        {
            hunk.changed_lines.push(ChangedLine {
                index: idx,
                is_critical: is_critical_diff_line(line),
            });
        }
    }

    flush_hunk(&mut current_file, &mut current_hunk);
    if let Some(file) = current_file {
        files.push(file);
    }
    files
}

fn flush_hunk(file: &mut Option<DiffFile>, hunk: &mut Option<DiffHunk>) {
    let Some(hunk) = hunk.take() else {
        return;
    };
    file.get_or_insert_with(DiffFile::default).hunks.push(hunk);
}

fn select_hunk_lines(selected: &mut BTreeSet<usize>, hunk: &DiffHunk) {
    for changed in hunk.changed_lines.iter().take(CHANGED_LINES_PER_HUNK_EDGE) {
        selected.insert(changed.index);
    }
    for changed in hunk
        .changed_lines
        .iter()
        .rev()
        .take(CHANGED_LINES_PER_HUNK_EDGE)
    {
        selected.insert(changed.index);
    }
    for changed in hunk
        .changed_lines
        .iter()
        .filter(|changed| changed.is_critical)
        .take(MAX_KEYWORD_LINES_PER_HUNK)
    {
        selected.insert(changed.index);
    }
}

fn is_critical_diff_line(line: &str) -> bool {
    let lower = line.to_lowercase();
    [
        "error", "unsafe", "security", "panic", "unwrap", "todo!", "fn ", "class ", "test", "auth",
        "password", "token",
    ]
    .iter()
    .any(|needle| lower.contains(needle))
}

fn append_diff_selected_lines(body: &mut String, lines: &[&str], selected: &BTreeSet<usize>) {
    let mut last = None;
    for (rendered_blocks, idx) in selected.iter().copied().enumerate() {
        if rendered_blocks >= MAX_RENDERED_BLOCKS {
            body.push_str(&format!(
                "... omitted {} selected blocks ...\n",
                selected.len().saturating_sub(rendered_blocks)
            ));
            break;
        }
        if let Some(prev) = last
            && idx > prev + 1
        {
            body.push_str(&format!("... omitted {} lines ...\n", idx - prev - 1));
        }
        last = Some(idx);
        body.push_str(lines[idx]);
        body.push('\n');
    }

    if let Some(last_idx) = last
        && last_idx + 1 < lines.len()
    {
        body.push_str(&format!(
            "... omitted {} lines ...\n",
            lines.len() - last_idx - 1
        ));
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::product::agent::input_slimming::strategy::assert_strategy_retains_needles;
    use pretty_assertions::assert_eq;

    #[test]
    fn diff_strategy_preserves_headers_hunks_and_counts() {
        let mut text = String::from("diff --git a/a b/a\n--- a/a\n+++ b/a\n");
        for hunk in 0..400 {
            text.push_str(&format!("@@ -{hunk},1 +{hunk},1 @@\n"));
            text.push_str("-old line\n+new line\n context\n");
        }

        let output = diff_compact(&text).expect("strategy output");

        assert_eq!(output.strategy, InputSlimmingStrategy::DiffCompact);
        assert_strategy_retains_needles(
            &text,
            &output.body,
            &[
                "files=1",
                "hunks=400",
                "changed_lines=800",
                "diff --git a/a b/a",
                "@@ -0,1 +0,1 @@",
                "omitted",
            ],
        );
    }

    #[test]
    fn diff_strategy_keeps_multifile_headers_and_critical_middle_lines() {
        let mut text = String::new();
        text.push_str("diff --git a/a.rs b/a.rs\nindex 1..2\n--- a/a.rs\n+++ b/a.rs\n");
        text.push_str("@@ -1,200 +1,200 @@\n");
        for idx in 0..3_000 {
            if idx == 80 {
                text.push_str(
                    "+fn security_critical_token_check() { unsafe { panic!(\"needle\") } }\n",
                );
            } else {
                text.push_str(&format!("+ordinary changed line {idx}\n"));
            }
        }
        text.push_str("diff --git a/b.rs b/b.rs\n--- a/b.rs\n+++ b/b.rs\n");
        text.push_str("@@ -1,2 +1,2 @@\n-old\n+new\n");

        let output = diff_compact(&text).expect("strategy output");

        assert_strategy_retains_needles(
            &text,
            &output.body,
            &[
                "diff --git a/a.rs b/a.rs",
                "diff --git a/b.rs b/b.rs",
                "security_critical_token_check",
            ],
        );
    }

    #[test]
    fn diff_strategy_skips_binary_and_malformed_patches() {
        let binary = format!(
            "diff --git a/img.png b/img.png\nBinary files differ\n{}",
            "x".repeat(10_000)
        );
        assert_eq!(diff_compact(&binary), None);

        let malformed = format!(
            "diff --git a/a b/a\n--- a/a\n+++ b/a\n{}",
            "x".repeat(10_000)
        );
        assert_eq!(diff_compact(&malformed), None);
    }
}