squeez 1.21.0

Hook-based token compressor for 5 AI CLI hosts (Claude Code, Copilot CLI, OpenCode, Gemini CLI, Codex CLI). Up to 95% bash compression, signature-mode for code reads, cross-call dedup, MCP server, self-teaching protocol. Zero runtime deps.
Documentation
// PostToolUse rewrite hook — Claude Code v2.1.119+ added `updatedToolOutput`
// to PostToolUse for all tools, not just MCP. This command reads the
// PostToolUse JSON from stdin, checks the content against SessionContext for
// exact/fuzzy redundancy, and prints a hookSpecificOutput JSON blob when the
// content can be compressed. Exits 0 with no stdout when no rewrite is needed
// (Claude Code sees the original result unchanged).
//
// Called from hooks/posttooluse.sh for Read / Grep / Glob / Edit / Write
// tool calls. Bash compression is handled at PreToolUse via `squeez wrap`.

use std::io::Read;
use std::path::Path;

use crate::config::Config;
use crate::context;
use crate::session;

const MAX_CONTENT_BYTES: usize = 256 * 1024;
/// Minimum byte saving required before emitting an Edit/Write preamble-strip
/// rewrite. Smaller wins aren't worth the rewrite plumbing overhead.
const EDIT_STRIP_MIN_SAVINGS: usize = 30;

pub fn run(tool: &str) -> i32 {
    let mut buf = String::new();
    if std::io::stdin().read_to_string(&mut buf).is_err() {
        return 0;
    }
    let dir = session::sessions_dir();
    let cfg = Config::load();
    run_with(&buf, tool, &dir, &cfg)
}

pub fn run_with(raw: &str, tool: &str, sessions_dir: &Path, cfg: &Config) -> i32 {
    if let Some(out) = compute_rewrite(raw, tool, sessions_dir, cfg) {
        emit_updated_output(&out);
    }
    0
}

/// Core logic: returns the rewritten content string if compression applies,
/// or `None` if the original should be kept as-is. Exposed for testing.
pub fn compute_rewrite(raw: &str, tool: &str, sessions_dir: &Path, cfg: &Config) -> Option<String> {
    if raw.trim().is_empty() {
        return None;
    }

    let raw_content = extract_content(raw).filter(|c| !c.trim().is_empty())?;
    let raw_content: String = raw_content.chars().take(MAX_CONTENT_BYTES).collect();

    // Edit/Write: strip the verbose cat -n preamble that Claude Code emits.
    // Saves ~70-100B per call across many edits; only applied when the gain
    // exceeds EDIT_STRIP_MIN_SAVINGS to avoid noisy rewrites for tiny results.
    if cfg.strip_edit_preamble && (tool == "Edit" || tool == "Write") {
        if let Some(stripped) = strip_edit_preamble(&raw_content) {
            let saved = raw_content.len().saturating_sub(stripped.len());
            if saved >= EDIT_STRIP_MIN_SAVINGS {
                return Some(stripped);
            }
        }
        // Edit/Write don't participate in redundancy/summarize.
        return None;
    }

    let content = raw_content;
    let lines: Vec<String> = content.lines().map(String::from).collect();

    if lines.is_empty() {
        return None;
    }

    let mut ctx = context::cache::SessionContext::load(sessions_dir);

    // Redundancy check: if we've seen this content before, replace with a note.
    if cfg.redundancy_cache_enabled {
        if let Some(hit) = context::redundancy::check(&ctx, &lines) {
            let note = match hit.similarity {
                None => format!(
                    "[squeez: identical to {} #{} — output omitted]",
                    tool, hit.call_n
                ),
                Some(j) => format!(
                    "[squeez: ~{}% similar to {} #{} — re-read if needed]",
                    (j * 100.0).round() as u32,
                    tool,
                    hit.call_n
                ),
            };
            ctx.exact_dedup_hits += 1;
            ctx.save(sessions_dir);
            return Some(note);
        }
    }

    // Summarize fallback for large outputs. Read tool uses a lower threshold
    // (default 150 lines) since most code files fall in the 80-300 range and
    // were slipping past the global default of 300.
    let rewritten = if context::summarize::should_apply_for_tool(&lines, cfg, tool) {
        let summary = context::summarize::apply(lines.clone(), tool);
        if summary.len() < lines.len() {
            Some(summary.join("\n"))
        } else {
            None
        }
    } else {
        None
    };

    // Record content so future calls can dedup against it.
    if cfg.redundancy_cache_enabled {
        context::redundancy::record(&mut ctx, tool, &lines);
        ctx.save(sessions_dir);
    }

    rewritten
}

/// Compress the Edit/Write tool preamble. Claude Code emits the boilerplate
/// `"The file <path> has been updated successfully. Here's the result of
/// running `cat -n` on a snippet of the edited file:"` (or similar for Write),
/// which costs ~70-100 tokens per call. Returns `Some(shortened)` when a
/// strip is possible, `None` if the preamble is missing.
fn strip_edit_preamble(content: &str) -> Option<String> {
    // Look for the verbose phrase. Multiple variants exist depending on
    // tool/version (Edit, Write, NotebookEdit). Match on the most stable
    // anchor: "Here's the result of running `cat -n` on".
    let anchor = "Here's the result of running `cat -n`";
    let idx = content.find(anchor)?;
    // Find end of preamble line (the colon + newline that introduces the snippet).
    let after_anchor = &content[idx..];
    let line_end_rel = after_anchor.find(":\n")?;
    let snippet_start = idx + line_end_rel + 2;
    let snippet = &content[snippet_start..];
    let prefix = content[..idx].trim_end();
    // Keep the leading status line (e.g. "The file X has been updated.") but
    // drop the verbose cat -n explanation.
    Some(format!("{}\n{}", prefix, snippet))
}

fn emit_updated_output(content: &str) {
    println!(
        r#"{{"hookSpecificOutput":{{"hookEventName":"PostToolUse","updatedToolOutput":"{}"}}}}"#,
        json_escape(content)
    );
}

fn json_escape(s: &str) -> String {
    let mut out = String::with_capacity(s.len() + 16);
    for ch in s.chars() {
        match ch {
            '"' => out.push_str("\\\""),
            '\\' => out.push_str("\\\\"),
            '\n' => out.push_str("\\n"),
            '\r' => out.push_str("\\r"),
            '\t' => out.push_str("\\t"),
            c if (c as u32) < 0x20 => {
                out.push_str(&format!("\\u{:04x}", c as u32));
            }
            c => out.push(c),
        }
    }
    out
}

/// Extract the tool result content from the PostToolUse JSON.
/// Handles both `"content":"…"` and Anthropic content-block format.
fn extract_content(raw: &str) -> Option<String> {
    // Try plain string first
    if let Some(s) = extract_string_field(raw, "content") {
        if !s.trim().is_empty() {
            return Some(unescape(&s));
        }
    }
    // Try `"text":"…"` (Anthropic content-block format)
    let mut out = String::new();
    let mut rest = raw;
    while let Some(idx) = rest.find("\"text\":") {
        let after = &rest[idx + 7..];
        let after = after.trim_start();
        if let Some(stripped) = after.strip_prefix('"') {
            if let Some(end) = stripped.find('"') {
                out.push_str(&unescape(&stripped[..end]));
                out.push('\n');
            }
        }
        rest = &rest[idx + 7..];
    }
    if out.is_empty() { None } else { Some(out) }
}

fn extract_string_field(raw: &str, key: &str) -> Option<String> {
    let pat = format!("\"{}\":", key);
    let mut rest = raw;
    while let Some(idx) = rest.find(&pat) {
        let after = &rest[idx + pat.len()..];
        let after = after.trim_start();
        if let Some(stripped) = after.strip_prefix('"') {
            if let Some(end) = stripped.find('"') {
                let val = &stripped[..end];
                if !val.is_empty() {
                    return Some(val.to_string());
                }
            }
        }
        rest = &rest[idx + pat.len()..];
    }
    None
}

fn unescape(s: &str) -> String {
    s.replace("\\n", "\n")
        .replace("\\t", "\t")
        .replace("\\\"", "\"")
        .replace("\\\\", "\\")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::config::Config;
    use std::sync::atomic::{AtomicU64, Ordering};

    fn tmp() -> std::path::PathBuf {
        static CTR: AtomicU64 = AtomicU64::new(0);
        let n = CTR.fetch_add(1, Ordering::Relaxed);
        let d = std::env::temp_dir().join(format!(
            "squeez_compress_output_{}_{}",
            std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .unwrap()
                .as_nanos(),
            n
        ));
        std::fs::create_dir_all(&d).unwrap();
        d
    }

    #[test]
    fn empty_input_exits_cleanly() {
        let dir = tmp();
        let cfg = Config::default();
        assert_eq!(run_with("", "Read", &dir, &cfg), 0);
        assert_eq!(run_with("  ", "Read", &dir, &cfg), 0);
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn no_content_field_exits_cleanly() {
        let dir = tmp();
        let cfg = Config::default();
        let json = r#"{"tool_name":"Read","tool_result":{}}"#;
        assert_eq!(run_with(json, "Read", &dir, &cfg), 0);
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn json_escape_handles_special_chars() {
        assert_eq!(json_escape("a\"b"), r#"a\"b"#);
        assert_eq!(json_escape("a\nb"), r"a\nb");
        assert_eq!(json_escape("a\\b"), r"a\\b");
    }

    #[test]
    fn small_content_not_compressed() {
        let dir = tmp();
        let cfg = Config::default();
        let json = r#"{"tool_name":"Read","tool_result":{"content":"hello world"}}"#;
        // Small content: no redundancy hit, no summarize → exit 0, no stdout
        assert_eq!(run_with(json, "Read", &dir, &cfg), 0);
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn edit_preamble_is_stripped() {
        let dir = tmp();
        let cfg = Config::default();
        let content = "The file /a/b.rs has been updated successfully. Here's the result of running `cat -n` on a snippet of the edited file:\n     1\thello\n     2\tworld";
        let json = format!(
            r#"{{"tool_name":"Edit","tool_result":{{"content":"{}"}}}}"#,
            content.replace('\\', "\\\\").replace('"', "\\\"").replace('\n', "\\n")
        );
        let rewrite = compute_rewrite(&json, "Edit", &dir, &cfg);
        assert!(rewrite.is_some(), "Edit preamble should be stripped");
        let out = rewrite.unwrap();
        assert!(
            !out.contains("Here's the result of running"),
            "verbose preamble survived: {}",
            out
        );
        assert!(out.contains("hello"), "snippet must remain");
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn edit_without_preamble_passes_through() {
        let dir = tmp();
        let cfg = Config::default();
        let json = r#"{"tool_name":"Edit","tool_result":{"content":"File written."}}"#;
        // No verbose preamble → no rewrite emitted.
        assert!(compute_rewrite(json, "Edit", &dir, &cfg).is_none());
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn edit_strip_disabled_by_config() {
        let dir = tmp();
        let mut cfg = Config::default();
        cfg.strip_edit_preamble = false;
        let content = "The file /a.rs has been updated successfully. Here's the result of running `cat -n` on a snippet of the edited file:\n     1\thi";
        let json = format!(
            r#"{{"tool_name":"Edit","tool_result":{{"content":"{}"}}}}"#,
            content.replace('\\', "\\\\").replace('"', "\\\"").replace('\n', "\\n")
        );
        // With strip disabled: Edit returns None (no other compression for Edit).
        assert!(compute_rewrite(&json, "Edit", &dir, &cfg).is_none());
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn read_uses_lower_summarize_threshold() {
        let dir = tmp();
        let cfg = Config::default();
        // Build 200 benign lines: passes global 300 default but triggers
        // Read-specific 150 default (×2 benign = 300; so we need >300 for benign).
        // Use error markers to ensure non-benign path (threshold = base = 150).
        let mut lines = Vec::with_capacity(200);
        lines.push("error: synthetic".to_string());
        for i in 1..200 {
            lines.push(format!("line {}", i));
        }
        let content = lines.join("\n");
        let json = format!(
            r#"{{"tool_name":"Read","tool_result":{{"content":"{}"}}}}"#,
            content.replace('\\', "\\\\").replace('"', "\\\"").replace('\n', "\\n")
        );
        let rewrite = compute_rewrite(&json, "Read", &dir, &cfg);
        assert!(
            rewrite.is_some(),
            "Read with 200 lines + error marker should trigger summarize at threshold 150"
        );
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn read_threshold_zero_falls_back_to_global() {
        let dir = tmp();
        let mut cfg = Config::default();
        cfg.read_summarize_threshold_lines = 0; // disable Read override
        cfg.summarize_threshold_lines = 1000;
        let mut lines = Vec::with_capacity(200);
        lines.push("error: x".to_string());
        for i in 1..200 {
            lines.push(format!("l{}", i));
        }
        let content = lines.join("\n");
        let json = format!(
            r#"{{"tool_name":"Read","tool_result":{{"content":"{}"}}}}"#,
            content.replace('\\', "\\\\").replace('"', "\\\"").replace('\n', "\\n")
        );
        // 200 lines < 1000 global threshold → no rewrite.
        assert!(compute_rewrite(&json, "Read", &dir, &cfg).is_none());
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn strip_edit_preamble_helper_handles_missing_anchor() {
        assert!(strip_edit_preamble("just a plain message").is_none());
        assert!(strip_edit_preamble("").is_none());
    }
}