bctx-weave 0.1.29

use forge::signal::compactor;
use once_cell::sync::Lazy;
use regex::Regex;

// ObjectId / ISODate / Timestamp verbose BSON types — compact inline
static OBJECT_ID_RE: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"ObjectId\('([a-f0-9]{24})'\)").unwrap());
static ISO_DATE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"ISODate\('([^']+)'\)").unwrap());
// "Type: collection" / "blockManager" debug lines from explain output
static DEBUG_LINES_RE: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"(?m)^\s*(?:blockManager|catalog|storageStats):[^\n]*\n?").unwrap());

// ── query / find / aggregate results ─────────────────────────────────────────

pub fn compress_query(raw: &str) -> String {
    let cleaned = compactor::normalise(raw);
    // Compact verbose BSON type wrappers
    let s = OBJECT_ID_RE.replace_all(&cleaned, "ObjectId($1)");
    let s = ISO_DATE_RE.replace_all(&s, "$1");

    let lines: Vec<&str> = s.lines().filter(|l| !l.trim().is_empty()).collect();
    if lines.len() > 60 {
        return format!(
            "{}\n… [{} more documents — use .limit() or .projection()] …",
            lines[..60].join("\n"),
            lines.len() - 60
        );
    }
    lines.join("\n")
}

// ── explain / executionStats ──────────────────────────────────────────────────

pub fn compress_explain(raw: &str) -> String {
    let cleaned = compactor::normalise(raw);
    let s = DEBUG_LINES_RE.replace_all(&cleaned, "");
    // Keep the key explain fields: stage, executionTimeMillis, totalDocsExamined, nReturned
    let useful: Vec<&str> = s
        .lines()
        .filter(|l| {
            let t = l.trim();
            !t.is_empty()
                && (t.contains("stage")
                    || t.contains("executionTimeMillis")
                    || t.contains("totalDocsExamined")
                    || t.contains("totalKeysExamined")
                    || t.contains("nReturned")
                    || t.contains("docsExamined")
                    || t.contains("indexName")
                    || t.contains("parsedQuery")
                    || t.contains("winningPlan")
                    || t.contains("rejectedPlans")
                    || t.contains("error")
                    || t.contains("Error"))
        })
        .collect();
    if useful.is_empty() {
        return compress_query(raw);
    }
    useful.join("\n")
}

// ── collection stats / db.stats() ────────────────────────────────────────────

pub fn compress_stats(raw: &str) -> String {
    let cleaned = compactor::normalise(raw);
    // Keep lines with counts, sizes, index info; strip internal storage engine details
    let useful: Vec<&str> = cleaned
        .lines()
        .filter(|l| {
            let t = l.trim();
            !t.is_empty()
                && !t.contains("wiredTiger")
                && !t.contains("blockManager")
                && !t.contains("cache")
                && !t.contains("LSM")
                && (t.contains("count")
                    || t.contains("size")
                    || t.contains("index")
                    || t.contains("avgObj")
                    || t.contains("storageSize")
                    || t.contains("totalSize")
                    || t.contains("ns")
                    || t.contains("ok"))
        })
        .collect();
    if useful.is_empty() {
        return compress_query(raw);
    }
    useful.join("\n")
}

// ── generic mongosh output ────────────────────────────────────────────────────

pub fn compress_mongosh(raw: &str) -> String {
    let cleaned = compactor::normalise(raw);
    let s = OBJECT_ID_RE.replace_all(&cleaned, "ObjectId($1)");
    let s = ISO_DATE_RE.replace_all(&s, "$1");
    let lines: Vec<&str> = s.lines().filter(|l| !l.trim().is_empty()).collect();
    if lines.len() > 80 {
        return format!(
            "{}\n… [{} more lines] …",
            lines[..80].join("\n"),
            lines.len() - 80
        );
    }
    lines.join("\n")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn query_truncates_large_result_set() {
        let docs: Vec<String> = (0..70)
            .map(|i| format!("{{ _id: ObjectId('6621f4b{i:015x}'), name: 'doc{i}' }}"))
            .collect();
        let out = compress_query(&docs.join("\n"));
        assert!(out.contains("more documents"), "{out}");
    }

    #[test]
    fn query_compacts_object_ids() {
        let raw = "{ _id: ObjectId('507f1f77bcf86cd799439011'), name: 'Alice' }";
        let out = compress_query(raw);
        assert!(!out.contains("ObjectId('"), "{out}");
        assert!(out.contains("ObjectId(507f"), "{out}");
    }

    #[test]
    fn query_compacts_iso_dates() {
        let raw = "{ createdAt: ISODate('2024-01-15T10:30:00.000Z') }";
        let out = compress_query(raw);
        assert!(!out.contains("ISODate('"), "{out}");
        assert!(out.contains("2024-01-15"), "{out}");
    }

    #[test]
    fn explain_keeps_execution_stats() {
        let raw = "{\n  winningPlan: { stage: 'IXSCAN', indexName: 'email_1' },\n  executionTimeMillis: 2,\n  totalDocsExamined: 1,\n  nReturned: 1,\n  blockManager: { bytesRead: 0 }\n}";
        let out = compress_explain(raw);
        assert!(out.contains("executionTimeMillis"), "{out}");
        assert!(out.contains("nReturned"), "{out}");
        assert!(!out.contains("blockManager"), "{out}");
    }

    #[test]
    fn stats_strips_wiredtiger_internals() {
        let raw = "{\n  count: 1234,\n  size: 567890,\n  wiredTiger: { cache: { 'bytes read': 1000 } },\n  ok: 1\n}";
        let out = compress_stats(raw);
        assert!(!out.contains("wiredTiger"), "{out}");
        assert!(out.contains("count"), "{out}");
    }
}