Skip to main content

pounce_common/
utils.rs

1//! Numerical and timing utilities.
2//!
3//! Mirrors `Common/IpUtils.{hpp,cpp}`. The PRNG is a portable
4//! reimplementation of POSIX `drand48` (48-bit LCG with the
5//! glibc-default seed) so output matches a `drand48`-built upstream
6//! Ipopt deterministically. Replace with libc's `drand48` later if a
7//! particular platform's bit-equivalence target requires it.
8
9use crate::types::Number;
10use std::cell::Cell;
11use std::sync::OnceLock;
12use std::time::{Instant, SystemTime, UNIX_EPOCH};
13
14/// Equivalent to `Ipopt::IsFiniteNumber`. Returns false for NaN/±∞.
15#[inline]
16pub fn is_finite_number(val: Number) -> bool {
17    val.is_finite()
18}
19
20/// Equivalent to `Ipopt::Compare_le(lhs, rhs, BasVal)` — relaxed `<=`
21/// with a tolerance proportional to `BasVal`:
22/// `lhs - rhs <= 10 * machine_epsilon * max(1, |BasVal|)`.
23///
24/// **Deliberate deviation from upstream.** Ipopt's `IpUtils.cpp` has no
25/// `Max(1, …)` floor — its tolerance shrinks to 0 as `BasVal → 0`. The
26/// floor was in this port from the initial commit and is load-bearing:
27/// `compare_le` backs the filter line-search acceptance tests
28/// (`filter_acceptor.rs` — Armijo with `BasVal = φ`, sufficient
29/// progress and filter dominance with `BasVal = θ` or `φ`), and near a
30/// solution θ and `φ_trial − φ` sit at summation-noise scale, which is
31/// set by the magnitude of the *barrier-sum terms*, not by `|φ|` or
32/// `|θ|` themselves. A strictly relative tolerance therefore undershoots
33/// the real round-off noise once `|BasVal| < 1`, rejecting trial points
34/// that genuinely reached the optimum; the line search then backtracks
35/// into fractional-step grinds and restoration excursions while the dual
36/// infeasibility refuses to close (the PALMER/VESUVIALS class documented
37/// on `FilterLsAcceptor::armijo_holds`).
38///
39/// Removing the floor for upstream bit-equivalence (review item L30)
40/// regressed the Mittelmann ampl-nlp set 41/47 → 33/47 — exactly
41/// upstream's score — with nine Optimal→timeout tail stalls
42/// (NARX_CFy, bearing_400, nql180, qssp180, five qcqp instances). The
43/// mechanism was confirmed differentially on synthesized QCQP/NARX
44/// proxies: with the floor restored the same boundary steps are
45/// accepted and the solves close crisply (e.g. 226 grinding iterations
46/// with a ~100-iteration restoration excursion → 141 clean ones). Keep
47/// the floor; it is the documented price of beating upstream here.
48#[inline]
49pub fn compare_le(lhs: Number, rhs: Number, bas_val: Number) -> bool {
50    let tol = 10.0 * Number::EPSILON * bas_val.abs().max(1.0);
51    lhs - rhs <= tol
52}
53
54/// Equivalent to `Ipopt::IpRandom01`. 48-bit LCG matching POSIX
55/// `drand48`: X_{n+1} = (0x5DEECE66D · X_n + 0xB) mod 2^48,
56/// returning the high-order 48 bits as a `f64` in [0, 1).
57pub fn ip_random_01() -> Number {
58    LCG_STATE.with(|s| {
59        let mut x = s.get();
60        x = x.wrapping_mul(LCG_A).wrapping_add(LCG_C) & LCG_MASK;
61        s.set(x);
62        // Top 32 bits → 53-bit mantissa portion, exactly as drand48
63        // converts. Match glibc's `erand48`:
64        //   r = ((double)(x >> 16)) * 2^-32 + ((x & 0xffff) << 4) * 2^-48 ... etc.
65        // The simplest reformulation that matches: split into 16-bit
66        // chunks and accumulate.
67        let x0 = (x & 0xFFFF) as f64;
68        let x1 = ((x >> 16) & 0xFFFF) as f64;
69        let x2 = ((x >> 32) & 0xFFFF) as f64;
70        x2 / 65536.0 + x1 / (65536.0 * 65536.0) + x0 / (65536.0 * 65536.0 * 65536.0)
71    })
72}
73
74/// Reset the PRNG to glibc's default seed. Equivalent to
75/// `Ipopt::IpResetRandom01`.
76pub fn ip_reset_random_01() {
77    LCG_STATE.with(|s| s.set(LCG_DEFAULT_SEED));
78}
79
80const LCG_A: u64 = 0x5DEECE66D;
81const LCG_C: u64 = 0xB;
82const LCG_MASK: u64 = (1 << 48) - 1;
83const LCG_DEFAULT_SEED: u64 = 0x1234_ABCD_330E;
84
85thread_local! {
86    static LCG_STATE: Cell<u64> = const { Cell::new(LCG_DEFAULT_SEED) };
87}
88
89/// Wallclock time in seconds since first call. Equivalent to
90/// `Ipopt::WallclockTime`.
91pub fn wallclock_time() -> Number {
92    static EPOCH: OnceLock<Instant> = OnceLock::new();
93    let e = EPOCH.get_or_init(Instant::now);
94    e.elapsed().as_secs_f64()
95}
96
97/// CPU time in seconds. Equivalent to `Ipopt::CpuTime`.
98///
99/// On Unix this returns the process's accumulated **user** CPU time via
100/// `getrusage(RUSAGE_SELF).ru_utime`, exactly matching upstream Ipopt's
101/// `CpuTime()` (`src/Common/IpUtils.cpp`). This is what `max_cpu_time`
102/// is meant to bound — a busy solve accrues CPU time at roughly the wall
103/// rate, but time spent blocked/sleeping does not count.
104///
105/// std exposes no portable CPU-time API, so non-Unix targets (Windows)
106/// fall back to wallclock. That mirrors upstream too: its Windows path
107/// uses `clock()`, which on the MSVC runtime measures elapsed real time
108/// rather than CPU time, so the two are already equivalent there.
109pub fn cpu_time() -> Number {
110    #[cfg(unix)]
111    {
112        // SAFETY: `getrusage` only writes into the provided `rusage` out-param
113        // and reads no global state; a zeroed `rusage` is a valid input.
114        let mut usage: libc::rusage = unsafe { core::mem::zeroed() };
115        if unsafe { libc::getrusage(libc::RUSAGE_SELF, &mut usage) } == 0 {
116            return usage.ru_utime.tv_sec as Number + 1.0e-6 * usage.ru_utime.tv_usec as Number;
117        }
118        // getrusage failing is exceedingly rare (EFAULT/EINVAL only); degrade
119        // to wallclock rather than panicking in a timing helper.
120        wallclock_time()
121    }
122    #[cfg(not(unix))]
123    {
124        wallclock_time()
125    }
126}
127
128/// System time in seconds since UNIX epoch. Equivalent to
129/// `Ipopt::SysTime`.
130pub fn sys_time() -> Number {
131    SystemTime::now()
132        .duration_since(UNIX_EPOCH)
133        .map(|d| d.as_secs_f64())
134        .unwrap_or(0.0)
135}
136
137/// Equivalent to `Ipopt::ComputeMemIncrease`. Updates `len` to a new
138/// length consistent with `recommended`/`min` while clamping to
139/// `T::MAX`. Returns `Err` if even the maximum representable size
140/// would not be an increase.
141pub fn compute_mem_increase_i64(
142    len: &mut i64,
143    recommended: f64,
144    min: i64,
145    context: &str,
146) -> Result<(), String> {
147    if recommended >= i64::MAX as f64 {
148        if *len < i64::MAX {
149            *len = i64::MAX;
150            Ok(())
151        } else {
152            Err(format!(
153                "Cannot allocate more than {} bytes for {} due to integer-type limit",
154                (i64::MAX as f64) * 8.0,
155                context
156            ))
157        }
158    } else {
159        *len = min.max(recommended as i64);
160        Ok(())
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn finite_check() {
170        assert!(is_finite_number(0.0));
171        assert!(is_finite_number(1e300));
172        assert!(!is_finite_number(f64::NAN));
173        assert!(!is_finite_number(f64::INFINITY));
174    }
175
176    #[test]
177    fn compare_le_tolerates_eps() {
178        let v = 1.0;
179        let near = v + 5.0 * Number::EPSILON;
180        assert!(compare_le(near, v, v));
181    }
182
183    #[test]
184    fn compare_le_floors_tolerance_for_small_basval() {
185        // Deliberate deviation from upstream (see `compare_le`'s doc):
186        // the tolerance is `10*eps*max(1, |BasVal|)`, NOT upstream's
187        // `10*eps*|BasVal|`. The floor keeps a ~2.2e-15 absolute slack
188        // when the filter quantities (θ, φ) are < 1, which is the
189        // round-off regime near a solution — removing it (review item
190        // L30) regressed the Mittelmann ampl-nlp set 41/47 -> 33/47 with
191        // nine near-convergence tail-stall timeouts. This test pins the
192        // floor so a future "upstream faithfulness" pass cannot silently
193        // reintroduce that regression.
194        let bas_val = 1e-6;
195        let gap = 1e-18;
196        assert!(
197            compare_le(gap, 0.0, bas_val),
198            "gap {gap} is below the floored tolerance 10*eps*max(1, {bas_val}) \
199             and must be accepted; rejecting it reintroduces the Mittelmann \
200             tail stall"
201        );
202        // Above the floored tolerance the relaxed `<=` still rejects.
203        let big = 1e-13;
204        assert!(!compare_le(big, 0.0, bas_val));
205        // For |BasVal| >= 1 the floor is inert and the tolerance is
206        // upstream's relative one.
207        assert!(compare_le(5.0 * Number::EPSILON * 1e6, 0.0, 1e6));
208        assert!(!compare_le(3e-9, 0.0, 1e6)); // tol(1e6) = 10*eps*1e6 ~ 2.22e-9
209    }
210
211    #[test]
212    fn random_01_in_range_and_deterministic() {
213        ip_reset_random_01();
214        let a: Vec<f64> = (0..16).map(|_| ip_random_01()).collect();
215        ip_reset_random_01();
216        let b: Vec<f64> = (0..16).map(|_| ip_random_01()).collect();
217        assert_eq!(a, b);
218        for v in a {
219            assert!((0.0..1.0).contains(&v), "{v}");
220        }
221    }
222
223    #[test]
224    fn wallclock_monotonic() {
225        let a = wallclock_time();
226        let b = wallclock_time();
227        assert!(b >= a);
228    }
229
230    #[cfg(unix)]
231    #[test]
232    fn cpu_time_excludes_sleep_but_counts_compute() {
233        use std::hint::black_box;
234        use std::thread::sleep;
235        use std::time::Duration;
236
237        // (1) Sleeping consumes no user CPU time, so `cpu_time()` must lag
238        // wallclock across a sleep — the defining property of `max_cpu_time`.
239        // Before the L5 fix `cpu_time()` was a `wallclock_time()` alias, so
240        // the two advanced in lockstep and the gap below was ~0.
241        let cpu0 = cpu_time();
242        let wall0 = wallclock_time();
243        sleep(Duration::from_millis(300));
244        let wall_slept = wallclock_time() - wall0;
245        let cpu_slept = cpu_time() - cpu0;
246        assert!(
247            wall_slept - cpu_slept > 0.1,
248            "cpu_time advanced {:.3}s across a {:.3}s sleep; it must measure \
249             CPU, not wallclock (wall−cpu gap was only {:.3}s)",
250            cpu_slept,
251            wall_slept,
252            wall_slept - cpu_slept
253        );
254
255        // (2) ...but a busy loop *does* accrue CPU time, confirming the clock
256        // is live (guards against a degenerate constant implementation).
257        let cpu1 = cpu_time();
258        let mut acc = 0u64;
259        for i in 0..50_000_000u64 {
260            acc = acc.wrapping_add(i ^ (i << 1));
261        }
262        black_box(acc);
263        let cpu_busy = cpu_time() - cpu1;
264        assert!(
265            cpu_busy > 0.0,
266            "cpu_time did not advance across a busy loop (got {:.6}s)",
267            cpu_busy
268        );
269    }
270}