bctx-weave 0.1.16

bctx-weave — FilterMesh lens pipeline, CLI interception, domain compression
Documentation
use forge::signal::compactor;
use once_cell::sync::Lazy;
use regex::Regex;

// "INFO mlflow.tracking..." / "INFO mlflow.projects..." log lines
static INFO_LOG_RE: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"(?m)^(?:INFO|DEBUG)\s+mlflow\.[^\n]*\n?").unwrap());
// "2024/01/01 12:00:00" timestamp prefix
static TIMESTAMP_RE: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"(?m)^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\s+").unwrap());
// Metric lines: "Epoch 1/10: loss=0.324 accuracy=0.891"
static METRIC_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r"(?m)^(?:Epoch\s+\d+|\s*Step\s+\d+)[^\n]+(?:loss|acc|metric)[^\n]*$").unwrap()
});

// ── mlflow run ────────────────────────────────────────────────────────────────

pub fn compress_run(raw: &str) -> String {
    let cleaned = compactor::normalise(raw);
    let s = INFO_LOG_RE.replace_all(&cleaned, "");
    let s = TIMESTAMP_RE.replace_all(&s, "");

    // Keep: run ID, experiment, artifact path, metrics, errors
    let useful: Vec<&str> = s
        .lines()
        .filter(|l| {
            let t = l.trim();
            !t.is_empty()
                && (t.contains("run_id")
                    || t.contains("Run ID")
                    || t.contains("experiment")
                    || t.contains("artifact")
                    || t.contains("Logged")
                    || t.contains("loss")
                    || t.contains("accuracy")
                    || t.contains("metric")
                    || t.contains("ERROR")
                    || t.contains("error")
                    || t.starts_with("MLflow")
                    || METRIC_RE.is_match(t))
        })
        .collect();

    if useful.is_empty() {
        return compactor::collapse_blanks(&s);
    }
    useful.join("\n")
}

// ── mlflow experiments list ───────────────────────────────────────────────────

pub fn compress_experiments(raw: &str) -> String {
    let cleaned = compactor::normalise(raw);
    let s = INFO_LOG_RE.replace_all(&cleaned, "");
    let lines: Vec<&str> = s.lines().filter(|l| !l.trim().is_empty()).collect();
    if lines.len() > 25 {
        return format!(
            "{}\n… [{} more experiments]",
            lines[..25].join("\n"),
            lines.len() - 25
        );
    }
    lines.join("\n")
}

// ── mlflow models list ────────────────────────────────────────────────────────

pub fn compress_models(raw: &str) -> String {
    let cleaned = compactor::normalise(raw);
    let s = INFO_LOG_RE.replace_all(&cleaned, "");
    compactor::collapse_blanks(&s)
}

// ── top-level dispatcher ──────────────────────────────────────────────────────

pub fn compress_mlflow(subcmd: &str, raw: &str) -> String {
    let sub = subcmd.trim();
    if sub.starts_with("run") {
        return compress_run(raw);
    }
    if sub.starts_with("experiment") {
        return compress_experiments(raw);
    }
    if sub.starts_with("model") {
        return compress_models(raw);
    }
    // server / ui / gc — strip INFO noise, passthrough rest
    let cleaned = compactor::normalise(raw);
    let s = INFO_LOG_RE.replace_all(&cleaned, "");
    compactor::collapse_blanks(&s)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn run_strips_info_logs() {
        let raw = "INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.\nINFO mlflow.utils.autologging_utils: Created MLflow autolog run with ID 'abc123'\nRun ID: abc123\nExperiment: my_experiment\nLogged metrics: loss=0.245, accuracy=0.923\n";
        let out = compress_run(raw);
        assert!(!out.contains("INFO mlflow"), "{out}");
        assert!(out.contains("abc123"), "{out}");
        assert!(out.contains("accuracy"), "{out}");
    }

    #[test]
    fn experiments_truncates_long_list() {
        let rows: Vec<String> = (0..30)
            .map(|i| format!("{i}  experiment-{i}  /path/{i}  active"))
            .collect();
        let out = compress_experiments(&rows.join("\n"));
        assert!(out.contains("more experiments"), "{out}");
    }
}