Skip to main content

pounce_studio_core/
iter_dump.rs

1//! POUNCEIT v1 binary iter-dump parser.
2//!
3//! Spec: `tools/iter-dump/FORMAT.md`. Writer:
4//! `crates/pounce-algorithm/src/iter_dump.rs`. Reference Python parser:
5//! `tools/iter-dump/dump_inspect.py`.
6//!
7//! Format outline (all multi-byte values are little-endian; vectors are
8//! `u32 len + len*8` bytes of f64):
9//!
10//! ```text
11//! header: "POUNCEIT" | u32 version | u32 n | u32 m | u32 nnz_jac
12//!       | u32 nnz_h | u32 name_len | [u8; name_len]
13//! per iter:
14//!   scalar block (120 bytes): u32 iter, u32 status, 14 * f64
15//!   8 vectors:                x, s, y_c, y_d, z_L, z_U, v_L, v_U
16//!   filter block:             u32 filter_count, [(f64, f64); count]
17//! ```
18//!
19//! The parser is byte-driven and copy-free for vectors of length 0 (it
20//! still allocates a `Vec<f64>` for non-empty vectors so the public API
21//! stays simple). For very large traces, see [`IterDumpTrace::lazy_iter`]
22//! which yields one record at a time without retaining prior records.
23
24use serde::{Deserialize, Serialize};
25
26use crate::report::Error;
27
28/// ASCII magic bytes identifying a POUNCEIT v1 stream.
29pub const MAGIC: &[u8; 8] = b"POUNCEIT";
30/// The only format version this parser accepts.
31pub const FORMAT_VERSION: u32 = 1;
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct IterDumpHeader {
35    pub format_version: u32,
36    pub n: u32,
37    pub m: u32,
38    pub nnz_jac: u32,
39    pub nnz_h: u32,
40    pub name: String,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct IterDumpRecord {
45    pub iter: u32,
46    pub status: u32,
47    pub mu: f64,
48    pub tau: f64,
49    pub alpha_pr: f64,
50    pub alpha_du: f64,
51    pub delta_x: f64,
52    pub delta_s: f64,
53    pub delta_c: f64,
54    pub delta_d: f64,
55    pub inf_pr: f64,
56    pub inf_du: f64,
57    pub constr_viol: f64,
58    pub dual_inf: f64,
59    pub complementarity: f64,
60    pub f: f64,
61    pub x: Vec<f64>,
62    pub s: Vec<f64>,
63    pub y_c: Vec<f64>,
64    pub y_d: Vec<f64>,
65    pub z_l: Vec<f64>,
66    pub z_u: Vec<f64>,
67    pub v_l: Vec<f64>,
68    pub v_u: Vec<f64>,
69    pub filter: Vec<(f64, f64)>,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct IterDumpTrace {
74    pub header: IterDumpHeader,
75    pub records: Vec<IterDumpRecord>,
76}
77
78impl IterDumpTrace {
79    /// Parse a complete POUNCEIT v1 stream from a byte slice.
80    ///
81    /// Reads the header, then loops reading iteration records until the
82    /// stream is exhausted. Any truncation, version mismatch, or bad
83    /// magic returns [`Error::IterDump`].
84    pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
85        let mut cur = Cursor::new(bytes);
86        let header = parse_header(&mut cur)?;
87        let mut records = Vec::new();
88        while cur.remaining() > 0 {
89            records.push(parse_record(&mut cur)?);
90        }
91        Ok(Self { header, records })
92    }
93
94    /// Lazy iterator over records. Each call to `next` parses one
95    /// record, so memory stays bounded by the largest single record.
96    /// Useful when the trace is hundreds of MB. The iterator is
97    /// **fused**: after the first parse error it returns `None`
98    /// forever rather than retrying against an advanced cursor.
99    pub fn lazy_iter(bytes: &[u8]) -> Result<(IterDumpHeader, LazyRecords<'_>), Error> {
100        let mut cur = Cursor::new(bytes);
101        let header = parse_header(&mut cur)?;
102        Ok((
103            header,
104            LazyRecords {
105                cur,
106                poisoned: false,
107            },
108        ))
109    }
110}
111
112/// Cursor over a byte slice. We do not use `std::io::Cursor` because
113/// the library is `no_fs` (and `no_std`-friendly at the lib level —
114/// the bin target is what touches std::io).
115struct Cursor<'a> {
116    buf: &'a [u8],
117    off: usize,
118}
119
120impl<'a> Cursor<'a> {
121    fn new(buf: &'a [u8]) -> Self {
122        Self { buf, off: 0 }
123    }
124
125    fn remaining(&self) -> usize {
126        self.buf.len() - self.off
127    }
128
129    fn read(&mut self, n: usize) -> Result<&'a [u8], Error> {
130        // Use checked arithmetic: an attacker-supplied length could
131        // overflow `self.off + n` on 32-bit targets.
132        let end = self
133            .off
134            .checked_add(n)
135            .ok_or_else(|| Error::IterDump(format!("length overflow: off={} + n={n}", self.off)))?;
136        if end > self.buf.len() {
137            return Err(Error::IterDump(format!(
138                "truncated: wanted {n} bytes at offset {}, file is {} bytes",
139                self.off,
140                self.buf.len()
141            )));
142        }
143        let out = &self.buf[self.off..end];
144        self.off = end;
145        Ok(out)
146    }
147
148    fn read_u32(&mut self) -> Result<u32, Error> {
149        let bytes = self.read(4)?;
150        Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
151    }
152
153    fn read_f64(&mut self) -> Result<f64, Error> {
154        let bytes = self.read(8)?;
155        Ok(f64::from_le_bytes([
156            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
157        ]))
158    }
159
160    fn read_vec(&mut self) -> Result<Vec<f64>, Error> {
161        let len = self.read_u32()? as usize;
162        if len == 0 {
163            return Ok(Vec::new());
164        }
165        // Cap allocation by the bytes actually left in the buffer
166        // before allocating. A corrupt 4-byte length field could
167        // otherwise ask for tens of GiB and either OOM-crash the
168        // process or DoS the parent. The subsequent `read()` would
169        // also catch this, but only *after* we've allocated.
170        let max_possible = self.remaining() / 8;
171        if len > max_possible {
172            return Err(Error::IterDump(format!(
173                "vector length {len} exceeds remaining stream capacity ({max_possible} f64s)",
174            )));
175        }
176        let byte_len = len
177            .checked_mul(8)
178            .ok_or_else(|| Error::IterDump(format!("vector byte-size overflow at len={len}")))?;
179        let bytes = self.read(byte_len)?;
180        let mut out = Vec::with_capacity(len);
181        for chunk in bytes.chunks_exact(8) {
182            out.push(f64::from_le_bytes([
183                chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6], chunk[7],
184            ]));
185        }
186        Ok(out)
187    }
188}
189
190fn parse_header(cur: &mut Cursor<'_>) -> Result<IterDumpHeader, Error> {
191    let magic = cur.read(8)?;
192    if magic != MAGIC {
193        return Err(Error::IterDump(format!(
194            "bad magic: expected {MAGIC:?}, got {magic:?}",
195        )));
196    }
197    let format_version = cur.read_u32()?;
198    if format_version != FORMAT_VERSION {
199        return Err(Error::IterDump(format!(
200            "unsupported format_version {format_version} (only {FORMAT_VERSION} known)",
201        )));
202    }
203    let n = cur.read_u32()?;
204    let m = cur.read_u32()?;
205    let nnz_jac = cur.read_u32()?;
206    let nnz_h = cur.read_u32()?;
207    let name_len = cur.read_u32()? as usize;
208    let name_bytes = cur.read(name_len)?;
209    let name = std::str::from_utf8(name_bytes)
210        .map_err(|e| Error::IterDump(format!("name is not UTF-8: {e}")))?
211        .to_string();
212    Ok(IterDumpHeader {
213        format_version,
214        n,
215        m,
216        nnz_jac,
217        nnz_h,
218        name,
219    })
220}
221
222fn parse_record(cur: &mut Cursor<'_>) -> Result<IterDumpRecord, Error> {
223    let iter = cur.read_u32()?;
224    let status = cur.read_u32()?;
225    let mu = cur.read_f64()?;
226    let tau = cur.read_f64()?;
227    let alpha_pr = cur.read_f64()?;
228    let alpha_du = cur.read_f64()?;
229    let delta_x = cur.read_f64()?;
230    let delta_s = cur.read_f64()?;
231    let delta_c = cur.read_f64()?;
232    let delta_d = cur.read_f64()?;
233    let inf_pr = cur.read_f64()?;
234    let inf_du = cur.read_f64()?;
235    let constr_viol = cur.read_f64()?;
236    let dual_inf = cur.read_f64()?;
237    let complementarity = cur.read_f64()?;
238    let f = cur.read_f64()?;
239
240    let x = cur.read_vec()?;
241    let s = cur.read_vec()?;
242    let y_c = cur.read_vec()?;
243    let y_d = cur.read_vec()?;
244    let z_l = cur.read_vec()?;
245    let z_u = cur.read_vec()?;
246    let v_l = cur.read_vec()?;
247    let v_u = cur.read_vec()?;
248
249    let filter_count = cur.read_u32()? as usize;
250    // Each filter entry is two f64s (16 bytes); cap allocation by the
251    // remaining stream capacity before reserving.
252    let max_filter = cur.remaining() / 16;
253    if filter_count > max_filter {
254        return Err(Error::IterDump(format!(
255            "filter_count {filter_count} exceeds remaining stream capacity ({max_filter} entries)",
256        )));
257    }
258    let mut filter = Vec::with_capacity(filter_count);
259    for _ in 0..filter_count {
260        let theta = cur.read_f64()?;
261        let phi = cur.read_f64()?;
262        filter.push((theta, phi));
263    }
264
265    Ok(IterDumpRecord {
266        iter,
267        status,
268        mu,
269        tau,
270        alpha_pr,
271        alpha_du,
272        delta_x,
273        delta_s,
274        delta_c,
275        delta_d,
276        inf_pr,
277        inf_du,
278        constr_viol,
279        dual_inf,
280        complementarity,
281        f,
282        x,
283        s,
284        y_c,
285        y_d,
286        z_l,
287        z_u,
288        v_l,
289        v_u,
290        filter,
291    })
292}
293
294pub struct LazyRecords<'a> {
295    cur: Cursor<'a>,
296    poisoned: bool,
297}
298
299impl Iterator for LazyRecords<'_> {
300    type Item = Result<IterDumpRecord, Error>;
301
302    fn next(&mut self) -> Option<Self::Item> {
303        if self.poisoned || self.cur.remaining() == 0 {
304            return None;
305        }
306        match parse_record(&mut self.cur) {
307            Ok(rec) => Some(Ok(rec)),
308            Err(e) => {
309                self.poisoned = true;
310                Some(Err(e))
311            }
312        }
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319
320    /// Build a minimal POUNCEIT byte stream programmatically and
321    /// confirm the parser reproduces the same scalars and vectors.
322    /// We don't depend on pounce-algorithm here.
323    fn synth_trace() -> Vec<u8> {
324        let mut buf = Vec::new();
325        buf.extend_from_slice(MAGIC);
326        buf.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
327        buf.extend_from_slice(&4u32.to_le_bytes()); // n
328        buf.extend_from_slice(&2u32.to_le_bytes()); // m
329        buf.extend_from_slice(&0u32.to_le_bytes()); // nnz_jac
330        buf.extend_from_slice(&0u32.to_le_bytes()); // nnz_h
331        buf.extend_from_slice(&5u32.to_le_bytes()); // name_len
332        buf.extend_from_slice(b"hs071");
333
334        // One iteration record.
335        buf.extend_from_slice(&0u32.to_le_bytes()); // iter = 0
336        buf.extend_from_slice(&0u32.to_le_bytes()); // status = 0
337        for v in [
338            0.1, 0.99, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 2.0, 0.5, 2.0, 0.25, 17.0_f64,
339        ] {
340            buf.extend_from_slice(&v.to_le_bytes());
341        }
342        // x (n=4)
343        buf.extend_from_slice(&4u32.to_le_bytes());
344        for v in [1.0_f64, 5.0, 5.0, 1.0] {
345            buf.extend_from_slice(&v.to_le_bytes());
346        }
347        // s, y_c, y_d, z_L, z_U, v_L, v_U — all length 1 except v_U (0)
348        for vals in [
349            vec![0.5_f64],
350            vec![1.0],
351            vec![1.0],
352            vec![1.0],
353            vec![1.0],
354            vec![1.0],
355            vec![],
356        ] {
357            buf.extend_from_slice(&(vals.len() as u32).to_le_bytes());
358            for v in vals {
359                buf.extend_from_slice(&v.to_le_bytes());
360            }
361        }
362        // filter_count = 0
363        buf.extend_from_slice(&0u32.to_le_bytes());
364        buf
365    }
366
367    #[test]
368    fn parses_header_and_one_record() {
369        let bytes = synth_trace();
370        let trace = IterDumpTrace::from_bytes(&bytes).expect("parse");
371        assert_eq!(trace.header.format_version, 1);
372        assert_eq!(trace.header.n, 4);
373        assert_eq!(trace.header.m, 2);
374        assert_eq!(trace.header.name, "hs071");
375        assert_eq!(trace.records.len(), 1);
376        let rec = &trace.records[0];
377        assert_eq!(rec.iter, 0);
378        assert_eq!(rec.mu, 0.1);
379        assert_eq!(rec.x, vec![1.0, 5.0, 5.0, 1.0]);
380        assert_eq!(rec.v_u, Vec::<f64>::new());
381    }
382
383    #[test]
384    fn rejects_bad_magic() {
385        let mut bytes = synth_trace();
386        bytes[0] = b'X';
387        let err = IterDumpTrace::from_bytes(&bytes).expect_err("should fail");
388        assert!(matches!(err, Error::IterDump(_)));
389    }
390
391    #[test]
392    fn rejects_unsupported_version() {
393        let mut bytes = synth_trace();
394        bytes[8..12].copy_from_slice(&99u32.to_le_bytes());
395        let err = IterDumpTrace::from_bytes(&bytes).expect_err("should fail");
396        let msg = format!("{err}");
397        assert!(msg.contains("99"), "got: {msg}");
398    }
399
400    #[test]
401    fn truncated_stream_errors_cleanly() {
402        let bytes = synth_trace();
403        let err = IterDumpTrace::from_bytes(&bytes[..40]).expect_err("should fail");
404        let msg = format!("{err}");
405        assert!(msg.contains("truncated"), "got: {msg}");
406    }
407
408    #[test]
409    fn lazy_iter_returns_same_records() {
410        let bytes = synth_trace();
411        let (header, mut iter) = IterDumpTrace::lazy_iter(&bytes).expect("hdr");
412        assert_eq!(header.name, "hs071");
413        let first = iter.next().expect("one rec").expect("ok");
414        assert_eq!(first.iter, 0);
415        assert!(iter.next().is_none());
416    }
417
418    /// After a parse error the lazy iterator must fuse — i.e. return
419    /// `None` from every subsequent call rather than retrying against
420    /// an advanced cursor.
421    #[test]
422    fn lazy_iter_fuses_after_error() {
423        // Build a stream with a valid header followed by garbage. The
424        // first `next()` should return Err, and every subsequent call
425        // should return None.
426        let mut bytes = Vec::new();
427        bytes.extend_from_slice(MAGIC);
428        bytes.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
429        for _ in 0..5 {
430            bytes.extend_from_slice(&0u32.to_le_bytes());
431        }
432        // No name. Then append a few bytes that don't form a record.
433        bytes.extend_from_slice(&[0xff; 7]);
434        let (_h, mut it) = IterDumpTrace::lazy_iter(&bytes).expect("hdr");
435        let first = it.next().expect("first call yields an item");
436        assert!(first.is_err(), "expected parse error, got {first:?}");
437        for _ in 0..3 {
438            assert!(it.next().is_none(), "iterator should fuse after error");
439        }
440    }
441
442    /// Truncation mid-record (after the scalar block, partway through
443    /// the iterate-vector block) should error rather than panic.
444    #[test]
445    fn truncated_mid_record_errors_cleanly() {
446        let bytes = synth_trace();
447        // Header is 32+5 = 37 bytes; scalar block is 120 bytes. Cut
448        // partway into the first vector header.
449        let cut = 37 + 120 + 2;
450        let err = IterDumpTrace::from_bytes(&bytes[..cut]).expect_err("should fail");
451        let msg = format!("{err}");
452        assert!(msg.contains("truncated"), "got: {msg}");
453    }
454
455    /// An attacker-supplied huge `len` for a vector must be rejected
456    /// before we allocate. Tamper a length field to claim 1B entries —
457    /// the parser should refuse, not OOM.
458    #[test]
459    fn rejects_oversized_vector_length() {
460        let mut bytes = synth_trace();
461        // x is the first vector after the 120-byte scalar block.
462        // Header = 37 bytes; x_len_offset = 37 + 120 = 157.
463        let x_len_offset = 37 + 120;
464        bytes[x_len_offset..x_len_offset + 4].copy_from_slice(&1_000_000_000_u32.to_le_bytes());
465        let err = IterDumpTrace::from_bytes(&bytes).expect_err("should fail");
466        let msg = format!("{err}");
467        assert!(
468            msg.contains("exceeds remaining stream capacity"),
469            "got: {msg}",
470        );
471    }
472
473    /// Same defence for the filter block.
474    #[test]
475    fn rejects_oversized_filter_count() {
476        let mut bytes = synth_trace();
477        // The last 4 bytes are filter_count=0; flip to a huge value.
478        let n = bytes.len();
479        bytes[n - 4..].copy_from_slice(&u32::MAX.to_le_bytes());
480        let err = IterDumpTrace::from_bytes(&bytes).expect_err("should fail");
481        let msg = format!("{err}");
482        assert!(
483            msg.contains("exceeds remaining stream capacity"),
484            "got: {msg}",
485        );
486    }
487}