lha 1.0.6

Long-Horizon Agent command-line package that installs the lha binary.
Documentation
use std::collections::BTreeMap;
use std::collections::BTreeSet;

use super::StrategyOutput;
use super::append_selected_lines_collapsed;
use crate::product::agent::input_slimming::InputSlimmingStrategy;

const HEAD_LINES: usize = 32;
const TAIL_LINES: usize = 64;
const ERROR_CONTEXT_LINES: usize = 3;
const MAX_WARNING_GROUPS: usize = 8;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum LogFormat {
    Pytest,
    Cargo,
    Npm,
    Jest,
    Make,
    Generic,
}

impl LogFormat {
    fn as_str(self) -> &'static str {
        match self {
            Self::Pytest => "pytest",
            Self::Cargo => "cargo",
            Self::Npm => "npm",
            Self::Jest => "jest",
            Self::Make => "make",
            Self::Generic => "generic",
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum LogLevel {
    Error,
    Fail,
    Warn,
    Info,
    Debug,
    Trace,
    Unknown,
}

#[derive(Debug, Clone)]
struct LogLine<'a> {
    index: usize,
    text: &'a str,
    level: LogLevel,
    is_stack_trace: bool,
    is_summary: bool,
    score: i32,
}

pub(super) fn looks_like_log(text: &str) -> bool {
    let lower = text.to_lowercase();
    [
        "error",
        "warning",
        "failed",
        "panic",
        "traceback",
        "stack backtrace",
        "compiling",
        "finished",
        "test result",
        "exit code",
        "npm err!",
        "make: ***",
    ]
    .iter()
    .any(|needle| lower.contains(needle))
}

pub(super) fn log_compact(text: &str) -> StrategyOutput {
    let lines = text.lines().collect::<Vec<_>>();
    let format = detect_format(&lines);
    let parsed = parse_lines(&lines);
    let warning_representatives = warning_representatives(&parsed);
    let warnings_deduped = parsed
        .iter()
        .filter(|line| line.level == LogLevel::Warn)
        .count()
        .saturating_sub(warning_representatives.len());

    let mut selected = BTreeSet::new();
    selected.extend(0..lines.len().min(HEAD_LINES));
    selected.extend(lines.len().saturating_sub(TAIL_LINES)..lines.len());
    for line in &parsed {
        if line.score > 0 || line.is_stack_trace || line.is_summary {
            let context = if matches!(line.level, LogLevel::Error | LogLevel::Fail) {
                ERROR_CONTEXT_LINES
            } else {
                1
            };
            let start = line.index.saturating_sub(context);
            let end = (line.index + context + 1).min(lines.len());
            selected.extend(start..end);
        }
    }
    selected.extend(warning_representatives);

    let mut body = format!(
        "Input Slimming log summary: format_detected={}, original_lines={}, kept_lines={}, omitted_lines={}, warnings_deduped={}\n",
        format.as_str(),
        lines.len(),
        selected.len(),
        lines.len().saturating_sub(selected.len()),
        warnings_deduped
    );
    append_selected_lines_collapsed(&mut body, &lines, selected);
    StrategyOutput {
        strategy: InputSlimmingStrategy::LogCompact,
        body,
    }
}

fn detect_format(lines: &[&str]) -> LogFormat {
    let joined = lines.join("\n").to_lowercase();
    if joined.contains("short test summary info")
        || joined.contains("traceback (most recent call last)")
        || joined.contains("= failures =")
    {
        LogFormat::Pytest
    } else if joined.contains("error[e")
        || joined.contains("compiling ")
        || joined.contains("test result:")
        || joined.contains("stack backtrace:")
    {
        LogFormat::Cargo
    } else if joined.contains("npm err!") {
        LogFormat::Npm
    } else if joined.contains("\nfail ") && joined.contains("\n    at ") {
        LogFormat::Jest
    } else if joined.contains("make: ***") || joined.contains("entering directory") {
        LogFormat::Make
    } else {
        LogFormat::Generic
    }
}

fn parse_lines<'a>(lines: &'a [&'a str]) -> Vec<LogLine<'a>> {
    let mut parsed = Vec::with_capacity(lines.len());
    let mut python_trace_remaining_blank_lines = 0usize;
    let mut rust_backtrace = false;
    let mut js_stack = false;

    for (idx, text) in lines.iter().enumerate() {
        let lower = text.to_lowercase();
        let level = classify_level(&lower);
        let is_summary = is_summary_line(&lower);

        if lower.contains("traceback (most recent call last)") {
            python_trace_remaining_blank_lines = 2;
        }
        if lower.contains("stack backtrace:") {
            rust_backtrace = true;
        }
        if text.trim_start().starts_with("at ") {
            js_stack = true;
        }

        let is_python_trace = python_trace_remaining_blank_lines > 0
            || text.trim_start().starts_with("File \"")
            || lower.contains("during handling of the above exception")
            || lower.contains("the above exception was the direct cause");
        let is_rust_trace = rust_backtrace
            && (text
                .trim_start()
                .starts_with(|ch: char| ch.is_ascii_digit())
                || lower.contains("stack backtrace:")
                || text.trim().is_empty());
        let is_js_trace = js_stack
            && (text.trim_start().starts_with("at ")
                || lower.contains("error:")
                || lower.contains("typeerror:")
                || lower.contains("referenceerror:"));
        let is_stack_trace = is_python_trace || is_rust_trace || is_js_trace;

        if python_trace_remaining_blank_lines > 0 {
            if text.trim().is_empty() {
                python_trace_remaining_blank_lines -= 1;
            } else if !is_python_trace && !text.starts_with(char::is_whitespace) {
                python_trace_remaining_blank_lines = 0;
            }
        }
        if rust_backtrace && !is_rust_trace && !text.trim().is_empty() {
            rust_backtrace = false;
        }
        if js_stack && !is_js_trace && !text.trim().is_empty() {
            js_stack = false;
        }

        let mut score = 0;
        score += match level {
            LogLevel::Error | LogLevel::Fail => 100,
            LogLevel::Warn => 60,
            LogLevel::Trace => 40,
            LogLevel::Info | LogLevel::Debug | LogLevel::Unknown => 0,
        };
        if is_stack_trace {
            score += 80;
        }
        if is_summary {
            score += 70;
        }

        parsed.push(LogLine {
            index: idx,
            text,
            level,
            is_stack_trace,
            is_summary,
            score,
        });
    }

    parsed
}

fn classify_level(lower: &str) -> LogLevel {
    if contains_word(lower, &["fatal", "critical", "error", "exception", "panic"]) {
        LogLevel::Error
    } else if contains_word(lower, &["failed", "fail"]) {
        LogLevel::Fail
    } else if contains_word(lower, &["warning", "warn"]) {
        LogLevel::Warn
    } else if contains_word(lower, &["trace", "backtrace"]) {
        LogLevel::Trace
    } else if contains_word(lower, &["debug"]) {
        LogLevel::Debug
    } else if contains_word(lower, &["info", "compiling", "finished"]) {
        LogLevel::Info
    } else {
        LogLevel::Unknown
    }
}

fn contains_word(text: &str, needles: &[&str]) -> bool {
    needles.iter().any(|needle| text.contains(needle))
}

fn is_summary_line(lower: &str) -> bool {
    lower.starts_with("====")
        || lower.starts_with("----")
        || lower.contains("test result:")
        || lower.contains("short test summary info")
        || lower.contains("failures:")
        || lower.contains("tests:")
        || lower.contains("suites:")
        || lower.contains("exit code")
        || lower.contains("finished ")
}

fn warning_representatives(lines: &[LogLine<'_>]) -> BTreeSet<usize> {
    let mut by_signature: BTreeMap<String, usize> = BTreeMap::new();
    for line in lines {
        if line.level != LogLevel::Warn {
            continue;
        }
        by_signature
            .entry(warning_signature(line.text))
            .or_insert(line.index);
    }
    by_signature
        .into_values()
        .take(MAX_WARNING_GROUPS)
        .collect()
}

fn warning_signature(text: &str) -> String {
    let mut split_at = text.len();
    for marker in [": ", " = ", " at ", " in "] {
        if let Some(idx) = text.find(marker) {
            split_at = split_at.min(idx + marker.len());
        }
    }
    let (prefix, suffix) = text.split_at(split_at);
    let normalized_suffix = suffix
        .chars()
        .map(|ch| {
            if ch.is_ascii_digit() || ch == '/' || ch == '\\' || ch == ':' {
                '#'
            } else {
                ch
            }
        })
        .collect::<String>();
    format!("{prefix}{normalized_suffix}")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::product::agent::input_slimming::strategy::assert_strategy_retains_needles;
    use pretty_assertions::assert_eq;

    #[test]
    fn log_strategy_preserves_errors_tail_and_collapses_repeats() {
        let mut lines = (0..160)
            .map(|idx| format!("line {idx}"))
            .collect::<Vec<_>>();
        lines.splice(
            45..70,
            std::iter::repeat_n("warning: same path /tmp/a123".to_string(), 25),
        );
        lines[95] = "ERROR: failed".to_string();
        lines[96] = "stack backtrace:".to_string();
        lines[97] = "   0: crate::thing".to_string();
        lines[159] = "exit code: 101".to_string();
        let text = lines.join("\n");

        let output = log_compact(&text);

        assert_eq!(output.strategy, InputSlimmingStrategy::LogCompact);
        assert_strategy_retains_needles(
            &text,
            &output.body,
            &[
                "format_detected=cargo",
                "ERROR: failed",
                "stack backtrace:",
                "crate::thing",
                "exit code: 101",
                "warnings_deduped=",
            ],
        );
    }

    #[test]
    fn log_strategy_keeps_chained_python_traceback_after_blank_line() {
        let mut lines = (0..100)
            .map(|idx| format!("noise {idx}"))
            .collect::<Vec<_>>();
        lines.splice(
            40..40,
            [
                "Traceback (most recent call last):",
                "  File \"app.py\", line 1, in <module>",
                "    run()",
                "",
                "ValueError: first failure",
                "",
                "The above exception was the direct cause of the following exception:",
                "  File \"worker.py\", line 9, in main",
                "    raise RuntimeError('needle chained')",
                "RuntimeError: needle chained",
            ]
            .into_iter()
            .map(str::to_string),
        );
        let text = lines.join("\n");

        let output = log_compact(&text);

        assert!(output.body.contains("format_detected=pytest"));
        assert!(output.body.contains("needle chained"));
        assert!(output.body.contains("worker.py"));
    }

    #[test]
    fn log_strategy_keeps_jest_stack_frames_and_distinct_warnings() {
        let text = [
            "PASS one.test.js",
            "warning: timeout at src/a.js:10",
            "warning: timeout at src/b.js:20",
            "FAIL two.test.js",
            "TypeError: cannot read property",
            "    at render (/repo/ui.tsx:42:10)",
            "    at Object.<anonymous> (/repo/ui.test.tsx:5:1)",
            "Tests: 1 failed, 1 passed",
        ]
        .repeat(40)
        .join("\n");

        let output = log_compact(&text);

        assert!(output.body.contains("format_detected=jest"));
        assert!(output.body.contains("TypeError: cannot read property"));
        assert!(output.body.contains("ui.tsx:42"));
        assert!(output.body.contains("Tests: 1 failed"));
    }
}