pounce_common/utils.rs
1//! Numerical and timing utilities.
2//!
3//! Mirrors `Common/IpUtils.{hpp,cpp}`. The PRNG is a portable
4//! reimplementation of POSIX `drand48` (48-bit LCG with the
5//! glibc-default seed) so output matches a `drand48`-built upstream
6//! Ipopt deterministically. Replace with libc's `drand48` later if a
7//! particular platform's bit-equivalence target requires it.
8
9use crate::types::Number;
10use std::cell::Cell;
11use std::sync::OnceLock;
12use std::time::{Instant, SystemTime, UNIX_EPOCH};
13
14/// Equivalent to `Ipopt::IsFiniteNumber`. Returns false for NaN/±∞.
15#[inline]
16pub fn is_finite_number(val: Number) -> bool {
17 val.is_finite()
18}
19
20/// Equivalent to `Ipopt::Compare_le(lhs, rhs, BasVal)` — relaxed `<=`
21/// with a tolerance proportional to `BasVal`:
22/// `lhs - rhs <= 10 * machine_epsilon * max(1, |BasVal|)`.
23///
24/// **Deliberate deviation from upstream.** Ipopt's `IpUtils.cpp` has no
25/// `Max(1, …)` floor — its tolerance shrinks to 0 as `BasVal → 0`. The
26/// floor was in this port from the initial commit and is load-bearing:
27/// `compare_le` backs the filter line-search acceptance tests
28/// (`filter_acceptor.rs` — Armijo with `BasVal = φ`, sufficient
29/// progress and filter dominance with `BasVal = θ` or `φ`), and near a
30/// solution θ and `φ_trial − φ` sit at summation-noise scale, which is
31/// set by the magnitude of the *barrier-sum terms*, not by `|φ|` or
32/// `|θ|` themselves. A strictly relative tolerance therefore undershoots
33/// the real round-off noise once `|BasVal| < 1`, rejecting trial points
34/// that genuinely reached the optimum; the line search then backtracks
35/// into fractional-step grinds and restoration excursions while the dual
36/// infeasibility refuses to close (the PALMER/VESUVIALS class documented
37/// on `FilterLsAcceptor::armijo_holds`).
38///
39/// Removing the floor for upstream bit-equivalence (review item L30)
40/// regressed the Mittelmann ampl-nlp set 41/47 → 33/47 — exactly
41/// upstream's score — with nine Optimal→timeout tail stalls
42/// (NARX_CFy, bearing_400, nql180, qssp180, five qcqp instances). The
43/// mechanism was confirmed differentially on synthesized QCQP/NARX
44/// proxies: with the floor restored the same boundary steps are
45/// accepted and the solves close crisply (e.g. 226 grinding iterations
46/// with a ~100-iteration restoration excursion → 141 clean ones). Keep
47/// the floor; it is the documented price of beating upstream here.
48#[inline]
49pub fn compare_le(lhs: Number, rhs: Number, bas_val: Number) -> bool {
50 let tol = 10.0 * Number::EPSILON * bas_val.abs().max(1.0);
51 lhs - rhs <= tol
52}
53
54/// Equivalent to `Ipopt::IpRandom01`. 48-bit LCG matching POSIX
55/// `drand48`: X_{n+1} = (0x5DEECE66D · X_n + 0xB) mod 2^48,
56/// returning the high-order 48 bits as a `f64` in [0, 1).
57pub fn ip_random_01() -> Number {
58 LCG_STATE.with(|s| {
59 let mut x = s.get();
60 x = x.wrapping_mul(LCG_A).wrapping_add(LCG_C) & LCG_MASK;
61 s.set(x);
62 // Top 32 bits → 53-bit mantissa portion, exactly as drand48
63 // converts. Match glibc's `erand48`:
64 // r = ((double)(x >> 16)) * 2^-32 + ((x & 0xffff) << 4) * 2^-48 ... etc.
65 // The simplest reformulation that matches: split into 16-bit
66 // chunks and accumulate.
67 let x0 = (x & 0xFFFF) as f64;
68 let x1 = ((x >> 16) & 0xFFFF) as f64;
69 let x2 = ((x >> 32) & 0xFFFF) as f64;
70 x2 / 65536.0 + x1 / (65536.0 * 65536.0) + x0 / (65536.0 * 65536.0 * 65536.0)
71 })
72}
73
74/// Reset the PRNG to glibc's default seed. Equivalent to
75/// `Ipopt::IpResetRandom01`.
76pub fn ip_reset_random_01() {
77 LCG_STATE.with(|s| s.set(LCG_DEFAULT_SEED));
78}
79
80const LCG_A: u64 = 0x5DEECE66D;
81const LCG_C: u64 = 0xB;
82const LCG_MASK: u64 = (1 << 48) - 1;
83const LCG_DEFAULT_SEED: u64 = 0x1234_ABCD_330E;
84
85thread_local! {
86 static LCG_STATE: Cell<u64> = const { Cell::new(LCG_DEFAULT_SEED) };
87}
88
89/// Wallclock time in seconds since first call. Equivalent to
90/// `Ipopt::WallclockTime`.
91pub fn wallclock_time() -> Number {
92 static EPOCH: OnceLock<Instant> = OnceLock::new();
93 let e = EPOCH.get_or_init(Instant::now);
94 e.elapsed().as_secs_f64()
95}
96
97/// CPU time in seconds. Equivalent to `Ipopt::CpuTime`.
98///
99/// On Unix this returns the process's accumulated **user** CPU time via
100/// `getrusage(RUSAGE_SELF).ru_utime`, exactly matching upstream Ipopt's
101/// `CpuTime()` (`src/Common/IpUtils.cpp`). This is what `max_cpu_time`
102/// is meant to bound — a busy solve accrues CPU time at roughly the wall
103/// rate, but time spent blocked/sleeping does not count.
104///
105/// std exposes no portable CPU-time API, so non-Unix targets (Windows)
106/// fall back to wallclock. That mirrors upstream too: its Windows path
107/// uses `clock()`, which on the MSVC runtime measures elapsed real time
108/// rather than CPU time, so the two are already equivalent there.
109pub fn cpu_time() -> Number {
110 #[cfg(unix)]
111 {
112 // SAFETY: `getrusage` only writes into the provided `rusage` out-param
113 // and reads no global state; a zeroed `rusage` is a valid input.
114 let mut usage: libc::rusage = unsafe { core::mem::zeroed() };
115 if unsafe { libc::getrusage(libc::RUSAGE_SELF, &mut usage) } == 0 {
116 return usage.ru_utime.tv_sec as Number + 1.0e-6 * usage.ru_utime.tv_usec as Number;
117 }
118 // getrusage failing is exceedingly rare (EFAULT/EINVAL only); degrade
119 // to wallclock rather than panicking in a timing helper.
120 wallclock_time()
121 }
122 #[cfg(not(unix))]
123 {
124 wallclock_time()
125 }
126}
127
128/// System time in seconds since UNIX epoch. Equivalent to
129/// `Ipopt::SysTime`.
130pub fn sys_time() -> Number {
131 SystemTime::now()
132 .duration_since(UNIX_EPOCH)
133 .map(|d| d.as_secs_f64())
134 .unwrap_or(0.0)
135}
136
137/// Equivalent to `Ipopt::ComputeMemIncrease`. Updates `len` to a new
138/// length consistent with `recommended`/`min` while clamping to
139/// `T::MAX`. Returns `Err` if even the maximum representable size
140/// would not be an increase.
141pub fn compute_mem_increase_i64(
142 len: &mut i64,
143 recommended: f64,
144 min: i64,
145 context: &str,
146) -> Result<(), String> {
147 if recommended >= i64::MAX as f64 {
148 if *len < i64::MAX {
149 *len = i64::MAX;
150 Ok(())
151 } else {
152 Err(format!(
153 "Cannot allocate more than {} bytes for {} due to integer-type limit",
154 (i64::MAX as f64) * 8.0,
155 context
156 ))
157 }
158 } else {
159 *len = min.max(recommended as i64);
160 Ok(())
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn finite_check() {
170 assert!(is_finite_number(0.0));
171 assert!(is_finite_number(1e300));
172 assert!(!is_finite_number(f64::NAN));
173 assert!(!is_finite_number(f64::INFINITY));
174 }
175
176 #[test]
177 fn compare_le_tolerates_eps() {
178 let v = 1.0;
179 let near = v + 5.0 * Number::EPSILON;
180 assert!(compare_le(near, v, v));
181 }
182
183 #[test]
184 fn compare_le_floors_tolerance_for_small_basval() {
185 // Deliberate deviation from upstream (see `compare_le`'s doc):
186 // the tolerance is `10*eps*max(1, |BasVal|)`, NOT upstream's
187 // `10*eps*|BasVal|`. The floor keeps a ~2.2e-15 absolute slack
188 // when the filter quantities (θ, φ) are < 1, which is the
189 // round-off regime near a solution — removing it (review item
190 // L30) regressed the Mittelmann ampl-nlp set 41/47 -> 33/47 with
191 // nine near-convergence tail-stall timeouts. This test pins the
192 // floor so a future "upstream faithfulness" pass cannot silently
193 // reintroduce that regression.
194 let bas_val = 1e-6;
195 let gap = 1e-18;
196 assert!(
197 compare_le(gap, 0.0, bas_val),
198 "gap {gap} is below the floored tolerance 10*eps*max(1, {bas_val}) \
199 and must be accepted; rejecting it reintroduces the Mittelmann \
200 tail stall"
201 );
202 // Above the floored tolerance the relaxed `<=` still rejects.
203 let big = 1e-13;
204 assert!(!compare_le(big, 0.0, bas_val));
205 // For |BasVal| >= 1 the floor is inert and the tolerance is
206 // upstream's relative one.
207 assert!(compare_le(5.0 * Number::EPSILON * 1e6, 0.0, 1e6));
208 assert!(!compare_le(3e-9, 0.0, 1e6)); // tol(1e6) = 10*eps*1e6 ~ 2.22e-9
209 }
210
211 #[test]
212 fn random_01_in_range_and_deterministic() {
213 ip_reset_random_01();
214 let a: Vec<f64> = (0..16).map(|_| ip_random_01()).collect();
215 ip_reset_random_01();
216 let b: Vec<f64> = (0..16).map(|_| ip_random_01()).collect();
217 assert_eq!(a, b);
218 for v in a {
219 assert!((0.0..1.0).contains(&v), "{v}");
220 }
221 }
222
223 #[test]
224 fn wallclock_monotonic() {
225 let a = wallclock_time();
226 let b = wallclock_time();
227 assert!(b >= a);
228 }
229
230 #[cfg(unix)]
231 #[test]
232 fn cpu_time_excludes_sleep_but_counts_compute() {
233 use std::hint::black_box;
234 use std::thread::sleep;
235 use std::time::Duration;
236
237 // (1) Sleeping consumes no user CPU time, so `cpu_time()` must lag
238 // wallclock across a sleep — the defining property of `max_cpu_time`.
239 // Before the L5 fix `cpu_time()` was a `wallclock_time()` alias, so
240 // the two advanced in lockstep and the gap below was ~0.
241 let cpu0 = cpu_time();
242 let wall0 = wallclock_time();
243 sleep(Duration::from_millis(300));
244 let wall_slept = wallclock_time() - wall0;
245 let cpu_slept = cpu_time() - cpu0;
246 assert!(
247 wall_slept - cpu_slept > 0.1,
248 "cpu_time advanced {:.3}s across a {:.3}s sleep; it must measure \
249 CPU, not wallclock (wall−cpu gap was only {:.3}s)",
250 cpu_slept,
251 wall_slept,
252 wall_slept - cpu_slept
253 );
254
255 // (2) ...but a busy loop *does* accrue CPU time, confirming the clock
256 // is live (guards against a degenerate constant implementation).
257 let cpu1 = cpu_time();
258 let mut acc = 0u64;
259 for i in 0..50_000_000u64 {
260 acc = acc.wrapping_add(i ^ (i << 1));
261 }
262 black_box(acc);
263 let cpu_busy = cpu_time() - cpu1;
264 assert!(
265 cpu_busy > 0.0,
266 "cpu_time did not advance across a busy loop (got {:.6}s)",
267 cpu_busy
268 );
269 }
270}