use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::hash::{Hash, Hasher};
use std::path::Path;
use sqz_engine::{format_command, CompressedContent, DependencyMapper, NgramAbbreviator, SqzEngine};
use sqz_engine::stages::abbreviate_words;
pub const CLI_PATTERNS: &[&str] = &[
"git", "hg", "svn", "fossil",
"cargo", "make", "cmake", "ninja", "bazel", "buck", "gradle", "mvn",
"ant", "sbt", "lein", "mix", "rebar3",
"npm", "yarn", "pnpm", "bun", "pip", "pip3", "poetry", "pipenv",
"conda", "gem", "bundle", "composer", "go", "dep", "glide",
"apt", "apt-get", "dpkg", "yum", "dnf", "rpm", "pacman", "brew",
"port", "snap", "flatpak", "nix", "guix",
"docker", "podman", "buildah", "skopeo", "kubectl", "helm", "k9s",
"minikube", "kind", "k3s", "nomad", "consul", "vault",
"aws", "az", "gcloud", "gsutil", "terraform", "pulumi", "cdk",
"serverless", "sam",
"node", "deno", "python", "python3", "ruby", "java", "kotlin",
"scala", "clojure", "elixir", "erlang", "ghc", "rustc", "clang",
"gcc", "g++",
"jest", "mocha", "pytest", "rspec", "minitest", "phpunit", "vitest",
"cypress", "playwright",
"eslint", "tslint", "prettier", "black", "isort", "flake8", "mypy",
"pylint", "rubocop", "golangci-lint", "clippy", "rustfmt",
"curl", "wget", "ssh", "scp", "rsync", "nc", "netstat", "ss",
"ping", "traceroute", "dig", "nslookup", "openssl",
"find", "grep", "rg", "ag", "fd", "ls", "tree", "cat", "less",
"head", "tail", "wc", "sort", "uniq", "awk", "sed", "jq", "yq",
"psql", "mysql", "sqlite3", "mongo", "redis-cli", "influx",
"gh", "hub", "lab", "glab", "jira", "linear",
"ansible", "chef", "puppet", "salt",
"ffmpeg", "convert", "identify",
];
fn content_hash(content: &str) -> u64 {
let mut hasher = DefaultHasher::new();
content.hash(&mut hasher);
hasher.finish()
}
#[allow(dead_code)]
struct CacheEntry {
hash: u64,
tokens_original: u32,
}
pub struct CliProxy {
engine: SqzEngine,
l1_cache: std::cell::RefCell<HashSet<u64>>,
dep_mapper: std::cell::RefCell<DependencyMapper>,
abbreviator: std::cell::RefCell<NgramAbbreviator>,
}
impl CliProxy {
pub fn new() -> sqz_engine::Result<Self> {
let engine = SqzEngine::new()?;
Ok(Self {
engine,
l1_cache: std::cell::RefCell::new(HashSet::new()),
dep_mapper: std::cell::RefCell::new(DependencyMapper::new()),
abbreviator: std::cell::RefCell::new(NgramAbbreviator::new()),
})
}
#[allow(dead_code)]
pub fn with_engine(engine: SqzEngine) -> Self {
Self {
engine,
l1_cache: std::cell::RefCell::new(HashSet::new()),
dep_mapper: std::cell::RefCell::new(DependencyMapper::new()),
abbreviator: std::cell::RefCell::new(NgramAbbreviator::new()),
}
}
pub fn intercept_output(&self, cmd: &str, output: &str) -> String {
self.track_file(cmd, output);
self.engine.cache_manager().advance_turn();
let fast_hash = content_hash(output);
if self.l1_cache.borrow().contains(&fast_hash) {
if let Ok(Some(inline_ref)) = self.engine.cache_manager().check_dedup(output.as_bytes()) {
eprintln!("[sqz] dedup hit: {} (L1+L2)", inline_ref);
return inline_ref;
}
}
if let Ok(Some(inline_ref)) = self.engine.cache_manager().check_dedup(output.as_bytes()) {
self.l1_cache.borrow_mut().insert(fast_hash);
eprintln!("[sqz] dedup hit: {} (L2)", inline_ref);
return inline_ref;
}
if let Some(formatted) = format_command(cmd, output) {
let tokens_original = (output.len() as u32 + 3) / 4;
let tokens_compressed = (formatted.len() as u32 + 3) / 4;
if tokens_compressed < tokens_original {
if let Ok(compressed) = self.engine.compress(&formatted) {
let _ = self.engine.cache_manager().store_compressed(output.as_bytes(), &compressed);
}
self.l1_cache.borrow_mut().insert(fast_hash);
self.log_compression(cmd, tokens_original, tokens_compressed);
return self.apply_context_refs(&formatted);
}
}
match self.compress_output(cmd, output) {
Ok(compressed) => {
let tokens_original = compressed.tokens_original;
let tokens_compressed = compressed.tokens_compressed;
let _ = self.engine.cache_manager().store_compressed(output.as_bytes(), &compressed);
self.l1_cache.borrow_mut().insert(fast_hash);
self.log_compression(cmd, tokens_original, tokens_compressed);
let mut abbr = self.abbreviator.borrow_mut();
abbr.observe(&compressed.data);
let abbreviated = match abbr.abbreviate(&compressed.data) {
Ok(result) if result.total_tokens_saved > 0 => {
eprintln!("[sqz] n-gram abbreviation: {} tokens saved", result.total_tokens_saved);
result.text
}
_ => compressed.data,
};
self.apply_context_refs(&abbreviated)
}
Err(e) => {
eprintln!("[sqz] fallback: compression error for command '{cmd}': {e}");
output.to_owned()
}
}
}
fn log_compression(&self, cmd: &str, original: u32, compressed: u32) {
let saved = original.saturating_sub(compressed);
let pct = if original > 0 { (saved as f64 / original as f64 * 100.0) as u32 } else { 0 };
eprintln!("[sqz] {}/{} tokens ({}% reduction) [{}]", compressed, original, pct, cmd);
let _ = self.engine.session_store().log_compression(
original, compressed, &[], cmd,
);
}
fn compress_output(
&self,
_cmd: &str,
output: &str,
) -> sqz_engine::Result<CompressedContent> {
self.engine.compress(output)
}
fn apply_context_refs(&self, text: &str) -> String {
let known = match self.engine.session_store().known_files() {
Ok(files) => files,
Err(_) => return abbreviate_words(text),
};
if known.is_empty() {
return abbreviate_words(text);
}
let mut result = text.to_string();
for file_path in &known {
let marker = format!("--> {}", file_path);
if result.contains(&marker) {
let note = format!("{} [in context]", marker);
result = result.replace(&marker, ¬e);
}
let at_marker = format!("at {}:", file_path);
if result.contains(&at_marker) {
let note = format!("at {} [in context]:", file_path);
result = result.replace(&at_marker, ¬e);
}
}
abbreviate_words(&result)
}
fn track_file(&self, cmd: &str, output: &str) {
let parts: Vec<&str> = cmd.split_whitespace().collect();
let base = parts.first().map(|s| s.rsplit('/').next().unwrap_or(s)).unwrap_or("");
match base {
"cat" | "head" | "tail" | "less" | "bat" => {
if let Some(path) = parts.last() {
if Path::new(path).extension().is_some() {
let _ = self.engine.session_store().add_known_file(path);
self.predictive_precache(path, output);
}
}
}
_ => {}
}
}
fn predictive_precache(&self, file_path: &str, content: &str) {
let path = Path::new(file_path);
self.dep_mapper.borrow_mut().add_file(path, content);
let deps = self.dep_mapper.borrow().dependencies_of(path);
if deps.is_empty() {
return;
}
let mut precached = 0;
for dep_path in &deps {
let resolved = if dep_path.is_absolute() {
dep_path.clone()
} else if let Some(parent) = path.parent() {
parent.join(dep_path)
} else {
dep_path.clone()
};
if resolved.exists() && resolved.is_file() {
if let Ok(dep_content) = std::fs::read_to_string(&resolved) {
if let Ok(Some(_)) = self.engine.cache_manager().check_dedup(dep_content.as_bytes()) {
continue; }
if let Ok(compressed) = self.engine.compress(&dep_content) {
let _ = self.engine.cache_manager().store_compressed(
dep_content.as_bytes(), &compressed,
);
let hash = content_hash(&dep_content);
self.l1_cache.borrow_mut().insert(hash);
let dep_str = resolved.to_string_lossy().to_string();
let _ = self.engine.session_store().add_known_file(&dep_str);
precached += 1;
}
}
}
}
if precached > 0 {
eprintln!("[sqz] predictive pre-cache: {} dependencies of {} cached",
precached, file_path);
}
}
#[allow(dead_code)]
pub fn is_known_command(cmd: &str) -> bool {
let base = cmd
.split_whitespace()
.next()
.unwrap_or("")
.rsplit('/')
.next()
.unwrap_or("");
CLI_PATTERNS
.iter()
.any(|p| base.eq_ignore_ascii_case(p))
}
pub fn run_proxy(&self) -> sqz_engine::Result<()> {
use std::io::{self, BufRead, Write};
let stdin = io::stdin();
let stdout = io::stdout();
let mut out = stdout.lock();
let mut buf = String::new();
for line in stdin.lock().lines() {
let line = line.map_err(|e| sqz_engine::SqzError::Other(e.to_string()))?;
buf.push_str(&line);
buf.push('\n');
}
let cmd = std::env::var("SQZ_CMD").unwrap_or_else(|_| "stdin".to_string());
let compressed = self.intercept_output(&cmd, &buf);
out.write_all(compressed.as_bytes())
.map_err(|e| sqz_engine::SqzError::Other(e.to_string()))?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_known_command_git() {
assert!(CliProxy::is_known_command("git"));
assert!(CliProxy::is_known_command("/usr/bin/git"));
assert!(CliProxy::is_known_command("git status"));
}
#[test]
fn test_is_known_command_unknown() {
assert!(!CliProxy::is_known_command("my_custom_tool"));
}
#[test]
fn test_patterns_count() {
assert!(
CLI_PATTERNS.len() >= 90,
"expected ≥90 patterns, got {}",
CLI_PATTERNS.len()
);
}
#[test]
fn test_intercept_output_returns_string() {
let proxy = CliProxy::new().expect("engine init");
let output = "hello world\nsome output\n";
let result = proxy.intercept_output("echo", output);
assert!(!result.is_empty());
}
#[test]
fn test_intercept_output_fallback_on_empty() {
let proxy = CliProxy::new().expect("engine init");
let result = proxy.intercept_output("git", "");
let _ = result;
}
#[test]
fn test_dedup_cache_returns_ref_on_second_call() {
let proxy = CliProxy::new().expect("engine init");
let output = "some repeated output that is long enough to be meaningful\n".repeat(5);
let first = proxy.intercept_output("echo", &output);
let second = proxy.intercept_output("echo", &output);
assert!(second.starts_with("§ref:"), "expected dedup ref, got: {}", second);
assert!(second.len() < first.len() || first.starts_with("§ref:"),
"dedup ref should be shorter than original");
}
#[test]
fn test_file_tracking_on_cat() {
let proxy = CliProxy::new().expect("engine init");
let content = "use std::io;\nfn main() {}\n";
proxy.intercept_output("cat src/main.rs", content);
let known = proxy.engine.session_store().known_files().unwrap();
assert!(known.contains(&"src/main.rs".to_string()), "cat should track the file path");
}
#[test]
fn test_context_refs_annotate_known_files() {
let proxy = CliProxy::new().expect("engine init");
let _ = proxy.engine.session_store().add_known_file("src/auth.rs");
let error = "error[E0308]: mismatched types\n --> src/auth.rs:42:5\n";
let result = proxy.apply_context_refs(error);
assert!(result.contains("[in context]"), "should annotate known file: {}", result);
}
#[test]
fn test_context_refs_no_annotation_for_unknown_files() {
let proxy = CliProxy::new().expect("engine init");
let error = "error[E0308]: mismatched types\n --> src/unknown.rs:42:5\n";
let result = proxy.apply_context_refs(error);
assert!(!result.contains("[in context]"), "should not annotate unknown file");
}
}