use forge::signal::compactor;
use once_cell::sync::Lazy;
use regex::Regex;
static INFO_LOG_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^(?:INFO|DEBUG)\s+mlflow\.[^\n]*\n?").unwrap());
static TIMESTAMP_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\s+").unwrap());
static METRIC_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^(?:Epoch\s+\d+|\s*Step\s+\d+)[^\n]+(?:loss|acc|metric)[^\n]*$").unwrap()
});
pub fn compress_run(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s = INFO_LOG_RE.replace_all(&cleaned, "");
let s = TIMESTAMP_RE.replace_all(&s, "");
let useful: Vec<&str> = s
.lines()
.filter(|l| {
let t = l.trim();
!t.is_empty()
&& (t.contains("run_id")
|| t.contains("Run ID")
|| t.contains("experiment")
|| t.contains("artifact")
|| t.contains("Logged")
|| t.contains("loss")
|| t.contains("accuracy")
|| t.contains("metric")
|| t.contains("ERROR")
|| t.contains("error")
|| t.starts_with("MLflow")
|| METRIC_RE.is_match(t))
})
.collect();
if useful.is_empty() {
return compactor::collapse_blanks(&s);
}
useful.join("\n")
}
pub fn compress_experiments(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s = INFO_LOG_RE.replace_all(&cleaned, "");
let lines: Vec<&str> = s.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.len() > 25 {
return format!(
"{}\n… [{} more experiments]",
lines[..25].join("\n"),
lines.len() - 25
);
}
lines.join("\n")
}
pub fn compress_models(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s = INFO_LOG_RE.replace_all(&cleaned, "");
compactor::collapse_blanks(&s)
}
pub fn compress_mlflow(subcmd: &str, raw: &str) -> String {
let sub = subcmd.trim();
if sub.starts_with("run") {
return compress_run(raw);
}
if sub.starts_with("experiment") {
return compress_experiments(raw);
}
if sub.starts_with("model") {
return compress_models(raw);
}
let cleaned = compactor::normalise(raw);
let s = INFO_LOG_RE.replace_all(&cleaned, "");
compactor::collapse_blanks(&s)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn run_strips_info_logs() {
let raw = "INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.\nINFO mlflow.utils.autologging_utils: Created MLflow autolog run with ID 'abc123'\nRun ID: abc123\nExperiment: my_experiment\nLogged metrics: loss=0.245, accuracy=0.923\n";
let out = compress_run(raw);
assert!(!out.contains("INFO mlflow"), "{out}");
assert!(out.contains("abc123"), "{out}");
assert!(out.contains("accuracy"), "{out}");
}
#[test]
fn experiments_truncates_long_list() {
let rows: Vec<String> = (0..30)
.map(|i| format!("{i} experiment-{i} /path/{i} active"))
.collect();
let out = compress_experiments(&rows.join("\n"));
assert!(out.contains("more experiments"), "{out}");
}
}