miplog 0.2.0

Parse MIP/LP solver log files (Gurobi, Xpress, SCIP, HiGHS, COPT, …) into a unified, serde-serializable schema.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
//! Unified, solver-agnostic log schema.
//!
//! Fields are `Option<_>` because no single solver emits everything; parsers
//! fill in what they observe and leave the rest `None`. Solver-specific data
//! that doesn't fit the common vocabulary goes under [`SolverLog::other_data`].
//!
//! # Two tiers of fields
//!
//! The schema is one struct but the fields fall into two **tiers** by
//! reliability:
//!
//! 1. **Core (`verify_common`)** — fields we guarantee are populated when the
//!    solver log contains the corresponding information. Missing a Core field
//!    on a well-formed log is a parser bug. Downstream tooling can build
//!    cross-solver reports on these without defensive coding.
//!    - `solver` (trivially)
//!    - `termination.status` (non-`Unknown` for a complete run)
//!    - `timing.wall_seconds`
//!    - `bounds.primal` + `bounds.dual` when [`Status::Optimal`]
//!
//! 2. **Extended (best-effort)** — everything else. Parsers populate these
//!    when the log makes it easy, skip them when it doesn't. Missing an
//!    Extended field is not a bug. Examples: `version`, `solver_git_hash`,
//!    `cuts`, pre-presolve dims, root-LP times, simplex iterations.
//!
//! Promotion from Extended to Core happens with a minor version bump when
//! all active parsers reliably populate a field.
//!
//! [`Status::Optimal`]: Status::Optimal

use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;

/// Closed set of supported solvers. Adding one requires a PR + minor version
/// bump — this gives `match` exhaustiveness and keeps the schema coherent.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Solver {
    Gurobi,
    Xpress,
    Scip,
    Highs,
    Cplex,
    Cbc,
    Copt,
    Optverse,
    Mosek,
}

impl Solver {
    /// Short lowercase key. Stable — treat as part of the public API.
    pub const fn key(self) -> &'static str {
        match self {
            Solver::Gurobi => "gurobi",
            Solver::Xpress => "xpress",
            Solver::Scip => "scip",
            Solver::Highs => "highs",
            Solver::Cplex => "cplex",
            Solver::Cbc => "cbc",
            Solver::Copt => "copt",
            Solver::Optverse => "optverse",
            Solver::Mosek => "mosek",
        }
    }
}

/// Which version of `solverlog` produced this [`SolverLog`]. Captured so
/// persisted parse results can be re-validated after parser changes.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ParserInfo {
    /// Crate version (semver), e.g. "0.1.0".
    pub version: String,
    /// Short git hash of the crate build, empty string if unavailable.
    pub git_hash: String,
}

impl ParserInfo {
    /// Version + git hash of the currently-running crate.
    pub fn current() -> Self {
        Self {
            version: env!("CARGO_PKG_VERSION").to_string(),
            git_hash: env!("MIPLOG_GIT_HASH").to_string(),
        }
    }
}

/// Top-level parsed representation of a solver log.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SolverLog {
    pub solver: Solver,
    pub parser: ParserInfo,
    /// Free-form version string as reported by the log (e.g. "11.0.0").
    pub version: Option<String>,
    /// Solver git hash, when the solver emits one (SCIP's `[GitHash: ...]`,
    /// HiGHS's `git hash: ...`). Distinct from [`ParserInfo::git_hash`].
    pub solver_git_hash: Option<String>,
    /// Problem name as the solver reported it (often the input filename stem).
    pub problem: Option<String>,

    pub termination: Termination,
    pub timing: Timing,
    pub bounds: Bounds,
    pub tree: TreeStats,
    pub presolve: PresolveStats,

    /// Counts of cuts applied, keyed by solver-reported family name.
    /// (Families don't map cleanly across solvers — we preserve raw labels.)
    #[serde(skip_serializing_if = "BTreeMap::is_empty", default)]
    pub cuts: BTreeMap<String, u64>,

    /// Every B&B progress-line the solver emitted, in chronological order.
    /// Stored columnar (struct-of-arrays) for compression and columnar
    /// analytics; use [`ProgressTable::iter`] for row-oriented access.
    #[serde(skip_serializing_if = "ProgressTable::is_empty", default)]
    pub progress: ProgressTable,

    /// Everything the unified schema doesn't cover. Each entry is a
    /// solver-specific or solver-specific-but-common name paired with an
    /// arbitrary JSON value. Stable names (e.g. `"scip.heuristics"`,
    /// `"scip.root_node"`) let downstream tooling pattern-match without
    /// promising cross-solver compatibility.
    ///
    /// The `Display` summary skips this field — use JSON for full fidelity.
    #[serde(skip_serializing_if = "Vec::is_empty", default)]
    pub other_data: Vec<NamedValue>,
}

/// A named, freeform-value entry. Used in [`SolverLog::other_data`] as the
/// escape hatch for solver-specific data that doesn't fit the common schema.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NamedValue {
    pub name: String,
    pub value: serde_json::Value,
}

impl NamedValue {
    pub fn new(name: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
        Self {
            name: name.into(),
            value: value.into(),
        }
    }
}

/// Columnar store for B&B progress rows. Maintains an invariant: every
/// column vector has the same length. Rows are appended via [`ProgressTable::push`].
///
/// Column storage gives us:
/// * order-of-magnitude smaller size after gzip than row-oriented JSON
///   (repeated patterns in `time_seconds`, `nodes_explored`, etc. dedupe)
/// * natural shape for columnar analytics (`primal`, `dual`, `gap` as
///   time-series)
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ProgressTable {
    pub time_seconds: Vec<f64>,
    pub nodes_explored: Vec<Option<u64>>,
    pub primal: Vec<Option<f64>>,
    pub dual: Vec<Option<f64>>,
    pub gap: Vec<Option<f64>>,
    pub depth: Vec<Option<u32>>,
    pub lp_iterations: Vec<Option<u64>>,
    pub event: Vec<Option<NodeEvent>>,
}

impl ProgressTable {
    pub fn len(&self) -> usize {
        self.time_seconds.len()
    }
    pub fn is_empty(&self) -> bool {
        self.time_seconds.is_empty()
    }
    /// Append one row. Maintains the equal-length invariant across columns.
    pub fn push(&mut self, row: NodeSnapshot) {
        self.time_seconds.push(row.time_seconds);
        self.nodes_explored.push(row.nodes_explored);
        self.primal.push(row.primal);
        self.dual.push(row.dual);
        self.gap.push(row.gap);
        self.depth.push(row.depth);
        self.lp_iterations.push(row.lp_iterations);
        self.event.push(row.event);
    }
    /// Iterate rows as [`NodeSnapshot`] views.
    pub fn iter(&self) -> impl Iterator<Item = NodeSnapshot> + '_ {
        (0..self.len()).map(move |i| NodeSnapshot {
            time_seconds: self.time_seconds[i],
            nodes_explored: self.nodes_explored[i],
            primal: self.primal[i],
            dual: self.dual[i],
            gap: self.gap[i],
            depth: self.depth[i],
            lp_iterations: self.lp_iterations[i],
            event: self.event[i].clone(),
        })
    }
    /// The last recorded time (useful for end-of-run display).
    pub fn last_time(&self) -> Option<f64> {
        self.time_seconds.last().copied()
    }
}

/// Row view / input type for the B&B progress table. Solvers use different
/// column names for the same concepts: Gurobi `BestBd` ↔ Xpress/COPT/HiGHS
/// `BestBound` ↔ SCIP `dualbound`; `Incumbent` ↔ `BestSol` ↔ `primalbound`.
/// We collapse to one vocabulary.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct NodeSnapshot {
    /// Elapsed wall time the solver reported at this row (seconds).
    pub time_seconds: f64,
    /// Total nodes explored so far (Gurobi "Expl", Xpress "Node", COPT "Nodes").
    pub nodes_explored: Option<u64>,
    /// Best integer-feasible objective so far (primal bound).
    pub primal: Option<f64>,
    /// Best dual bound (valid lower bound on the optimal value for minimization).
    pub dual: Option<f64>,
    /// Relative gap as a fraction (0.0423 = 4.23%).
    pub gap: Option<f64>,
    /// Current search depth, when the solver reports one.
    pub depth: Option<u32>,
    /// Simplex iterations (per-node or cumulative — solver-specific).
    pub lp_iterations: Option<u64>,
    /// Optional row marker. Solvers flag noteworthy rows (incumbent update,
    /// heuristic hit, cutoff, branch by …). We normalize the common ones and
    /// stash the rest as [`NodeEvent::Other`].
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub event: Option<NodeEvent>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum NodeEvent {
    /// New incumbent found by a primal heuristic.
    Heuristic,
    /// New incumbent found during branching.
    BranchSolution,
    /// Node cut off by bound.
    Cutoff,
    /// Raw solver-specific marker that didn't map to the common set.
    Other(String),
}

// `Display` is implemented in `text.rs` and emits the human-readable summary.

impl SolverLog {
    /// Empty log for a given solver — parsers start here and fill fields in.
    pub fn new(solver: Solver) -> Self {
        Self {
            solver,
            parser: ParserInfo::current(),
            version: None,
            solver_git_hash: None,
            problem: None,
            termination: Termination::default(),
            timing: Timing::default(),
            bounds: Bounds::default(),
            tree: TreeStats::default(),
            presolve: PresolveStats::default(),
            cuts: BTreeMap::new(),
            progress: ProgressTable::default(),
            other_data: Vec::new(),
        }
    }

    /// Check that the **Core** fields (see module-level docs) are populated.
    /// Returns the list of missing field names, or `Ok(())` if all present.
    /// This is the strict tier — a well-formed log that fails this check
    /// indicates a parser gap worth filing.
    pub fn verify_common(&self) -> Result<(), Vec<&'static str>> {
        let mut missing = Vec::new();
        if self.termination.status == Status::Unknown {
            missing.push("termination.status");
        }
        if self.timing.wall_seconds.is_none() {
            missing.push("timing.wall_seconds");
        }
        // For Optimal runs, both bounds are expected (solver proved them equal).
        if self.termination.status == Status::Optimal {
            if self.bounds.primal.is_none() {
                missing.push("bounds.primal");
            }
            if self.bounds.dual.is_none() {
                missing.push("bounds.dual");
            }
        }
        if missing.is_empty() {
            Ok(())
        } else {
            Err(missing)
        }
    }
}

/// Why the solver stopped.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum Status {
    /// Search finished and optimality was proved.
    Optimal,
    /// Problem proved infeasible.
    Infeasible,
    /// Problem proved unbounded.
    Unbounded,
    /// Problem either infeasible or unbounded (solver couldn't distinguish).
    InfeasibleOrUnbounded,
    /// Stopped by wall-time limit.
    TimeLimit,
    /// Stopped by memory limit.
    MemoryLimit,
    /// Stopped by node/iteration limit or other numeric limit.
    OtherLimit,
    /// Stopped by user (signal, callback).
    UserInterrupt,
    /// Numerical failure (ill-conditioned, unrecoverable).
    NumericalError,
    /// Parser couldn't determine a terminal status.
    #[default]
    Unknown,
}

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Termination {
    pub status: Status,
    /// Solver-specific termination string, if any ("Time limit reached" etc.).
    pub raw_reason: Option<String>,
}

impl Termination {
    pub fn solved_to_completion(&self) -> bool {
        matches!(
            self.status,
            Status::Optimal
                | Status::Infeasible
                | Status::Unbounded
                | Status::InfeasibleOrUnbounded
        )
    }
}

/// Seconds. We standardize on wall time unless explicitly CPU time.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Timing {
    pub wall_seconds: Option<f64>,
    pub cpu_seconds: Option<f64>,
    /// Time spent reading the problem file (before presolve).
    pub reading_seconds: Option<f64>,
    pub presolve_seconds: Option<f64>,
    pub root_relaxation_seconds: Option<f64>,
}

/// Objective bounds at termination. `gap` is solver-reported when available;
/// otherwise parsers may leave it `None` and let consumers compute it.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Bounds {
    pub primal: Option<f64>,
    pub dual: Option<f64>,
    /// Gap as reported by the solver (as a fraction, `0.0423 = 4.23%`).
    /// Use [`Bounds::effective_gap`] to get a value derived from primal/dual
    /// when the solver didn't print one directly.
    pub gap: Option<f64>,
    /// Dual bound after the root LP (before branching). Equivalent to the
    /// first-LP objective on a minimization problem. Interesting quality
    /// signal independent of the final dual bound.
    pub root_dual: Option<f64>,
    /// Primal value of the first feasible solution, and when it was found.
    pub first_primal: Option<f64>,
    pub first_primal_time_seconds: Option<f64>,
    /// Primal-dual integral at termination. Rewards solvers that close the
    /// gap early even if they take equal total time; a direct benchmarking
    /// metric. Gurobi 11+ and SCIP 10+ report it natively.
    pub primal_dual_integral: Option<f64>,
}

impl Bounds {
    /// Reported gap if present, otherwise the gap derived from primal/dual
    /// using Gurobi's convention: `|primal - dual| / max(1e-10, |primal|)`.
    /// Returns `None` only if primal **and** dual are missing.
    pub fn effective_gap(&self) -> Option<f64> {
        if let Some(g) = self.gap {
            return Some(g);
        }
        match (self.primal, self.dual) {
            (Some(p), Some(d)) => Some((p - d).abs() / p.abs().max(1e-10)),
            _ => None,
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct TreeStats {
    pub nodes_explored: Option<u64>,
    pub simplex_iterations: Option<u64>,
    pub solutions_found: Option<u64>,
    /// Maximum depth reached in the B&B tree.
    pub max_depth: Option<u32>,
    /// Number of solver restarts (SCIP calls these "runs"; Gurobi occasionally
    /// does an internal restart; most solvers default to 0/1).
    pub restarts: Option<u32>,
}

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct PresolveStats {
    pub rows_before: Option<u64>,
    pub cols_before: Option<u64>,
    pub nonzeros_before: Option<u64>,
    pub rows_after: Option<u64>,
    pub cols_after: Option<u64>,
    pub nonzeros_after: Option<u64>,
}