Skip to main content

swarmkit_sailing/
range_cache.rs

1use crate::spherical::Segment;
2use crate::units::PathXY;
3use crate::{Sailboat, WindSource};
4
5fn lerp(a: f64, b: f64, t: f64) -> f64 {
6    a + (b - a) * t
7}
8
9#[cfg(feature = "probe-stats")]
10use std::sync::atomic::{AtomicU64, Ordering};
11
12/// `locate`-call counters plus out-of-range probe count (when the
13/// queried `dep` falls outside the tabulated range, `locate` clamps
14/// silently — see item D in the accuracy writeup). Feature-gated to
15/// avoid atomic traffic in normal builds; per-instance so concurrent
16/// searches don't share counters.
17#[cfg(feature = "probe-stats")]
18#[derive(Default, Debug)]
19pub struct ProbeCounters {
20    total: AtomicU64,
21    out_of_range: AtomicU64,
22}
23
24#[cfg(feature = "probe-stats")]
25impl ProbeCounters {
26    pub fn snapshot(&self) -> ProbeStats {
27        ProbeStats {
28            total: self.total.load(Ordering::Relaxed),
29            out_of_range: self.out_of_range.load(Ordering::Relaxed),
30        }
31    }
32
33    pub fn reset(&self) {
34        self.total.store(0, Ordering::Relaxed);
35        self.out_of_range.store(0, Ordering::Relaxed);
36    }
37}
38
39#[cfg(feature = "probe-stats")]
40#[derive(Clone, Copy, Debug, Default)]
41pub struct ProbeStats {
42    pub total: u64,
43    pub out_of_range: u64,
44}
45
46/// Default departure-time samples per segment. 8 gives sub-1%
47/// interpolation error on smooth weather fields. Overridden at call
48/// site via `SegmentRangeTables::build`'s `range_k` arg, which
49/// `SearchSettings::range_k` plumbs from `SearchConfig`.
50pub const DEFAULT_RANGE_K: usize = 8;
51
52/// Default `mcr_01` samples per departure-time bucket. 8 tracks the
53/// smooth-monotone `travel_time(mcr)` curve within ~1%. Sensitivity
54/// study (`bywind/profiling/stage2-counters.md`, iter 6) — worst-case
55/// regression on the large scenario:
56///
57/// - `k_mcr=6` → ~6% wallclock saving, ≤1.3% fitness loss.
58/// - `k_mcr=4` → ~11% wallclock saving, ≤2.1% fitness loss.
59///
60/// Geographic path is xy-determined, so `k_mcr` only moves the inner
61/// PSO's `t` lookup-table resolution; land-crossing is unaffected.
62/// Overridden at call site via `SegmentRangeTables::build`'s `k_mcr`
63/// arg, which `SearchSettings::k_mcr` plumbs from `SearchConfig`.
64pub const DEFAULT_K_MCR: usize = 8;
65
66/// Tabulation-range padding. Inner-PSO particles can briefly probe past
67/// the forward-propagated `best`/`worst` extrema before the boundary
68/// clamp pulls them back; padding avoids out-of-range queries in the
69/// common case (clamping catches the rest).
70const RANGE_PADDING: f64 = 0.15;
71
72#[derive(Clone, Debug)]
73struct SegmentTable {
74    dep_min: f64,
75    dep_max: f64,
76    /// Lower endpoint of the mcr sampling range. **Uniform across all
77    /// buckets of this segment** so `times[..][k]` shares one absolute
78    /// mcr value across `b` — lerping along the dep axis is then
79    /// well-defined (see `query_mcr_for_delta_time`). 0.0 when every
80    /// probed dep yields finite travel time at `mcr=0`, 0.1 otherwise.
81    worst_mcr: f64,
82    /// Row-major `times[b * k_mcr + k]` (length `range_k * k_mcr`),
83    /// monotonically decreasing in `k` within each row. `Vec` rather
84    /// than fixed arrays so resolution stays runtime-tunable.
85    times: Vec<f64>,
86}
87
88/// Per-segment `travel_time(departure_time, mcr)` lookup tables.
89/// Replaces full wind integrations with O(1) interpolated lookups in
90/// the inner time-PSO hot path. Rebuilt once per outer `xy` change.
91/// Segment `i` covers `xy[i] -> xy[i+1]` and arrives at `t[i+1]`.
92#[derive(Debug)]
93pub struct SegmentRangeTables<const N: usize> {
94    tables: Vec<SegmentTable>,
95    range_k: usize,
96    k_mcr: usize,
97    #[cfg(feature = "probe-stats")]
98    probes: ProbeCounters,
99}
100
101impl<const N: usize> Clone for SegmentRangeTables<N> {
102    fn clone(&self) -> Self {
103        Self {
104            tables: self.tables.clone(),
105            range_k: self.range_k,
106            k_mcr: self.k_mcr,
107            // Snapshot atomic counts so each instance owns its own.
108            #[cfg(feature = "probe-stats")]
109            probes: ProbeCounters {
110                total: AtomicU64::new(self.probes.total.load(Ordering::Relaxed)),
111                out_of_range: AtomicU64::new(self.probes.out_of_range.load(Ordering::Relaxed)),
112            },
113        }
114    }
115}
116
117impl<const N: usize> SegmentRangeTables<N> {
118    /// Forward pass through `xy` from `departure_time`. Each segment's
119    /// tabulation range comes from the previous segment's best/worst
120    /// extrema, padded by `RANGE_PADDING`. Segment 0's range is
121    /// degenerate (departure time is fixed). `range_k` and `k_mcr` must
122    /// be ≥ 2; see [`DEFAULT_RANGE_K`] / [`DEFAULT_K_MCR`].
123    pub fn build<SB: Sailboat, WS: WindSource>(
124        xy: &PathXY<N>,
125        boat: &SB,
126        wind: &WS,
127        step_distance_max: f64,
128        departure_time: f64,
129        range_k: usize,
130        k_mcr: usize,
131    ) -> Self {
132        #[cfg(feature = "profile-timers")]
133        let __profile_start = std::time::Instant::now();
134
135        assert!(
136            range_k >= 2,
137            "SegmentRangeTables::build requires range_k >= 2, got {range_k}",
138        );
139        assert!(
140            k_mcr >= 2,
141            "SegmentRangeTables::build requires k_mcr >= 2, got {k_mcr}",
142        );
143
144        let seg_count = N.saturating_sub(1);
145        let mut tables = Vec::with_capacity(seg_count);
146
147        let mut dep_nominal_lo = departure_time;
148        let mut dep_nominal_hi = departure_time;
149
150        let mut per_bucket_wmcr = vec![0.0f64; range_k];
151        let mut mcr_grid = vec![0.0f64; k_mcr];
152        let mut mcr_grid_01 = vec![0.0f64; k_mcr];
153
154        for seg_i in 0..seg_count {
155            let origin = xy.lat_lon(seg_i);
156            let destination = xy.lat_lon(seg_i + 1);
157
158            let (dep_min, dep_max) = pad_range(dep_nominal_lo, dep_nominal_hi);
159
160            // Reset per-bucket scratch state; reused across segments.
161            per_bucket_wmcr.fill(0.0);
162            let mut times = vec![0.0f64; range_k * k_mcr];
163
164            // Pass 1: sample each bucket on its natural mcr grid. Probe
165            // `mcr=0.0` first to detect a bucket that needs the 0.1
166            // fallback (zero-wind / pole-locked / etc.), then call the
167            // batched `get_travel_times_for_mcrs` once per bucket to fill
168            // every `times[b * k_mcr + k]` from a single shared substep walk.
169            for b in 0..range_k {
170                let dep = dep_at(dep_min, dep_max, b, range_k);
171                let segment = Segment {
172                    origin,
173                    destination,
174                    origin_time: dep,
175                    step_distance_max,
176                };
177
178                let t_at_zero = boat.get_travel_time(wind, segment, 0.0);
179                let wm = if t_at_zero.is_infinite() { 0.1 } else { 0.0 };
180                per_bucket_wmcr[b] = wm;
181
182                for (k, slot) in mcr_grid.iter_mut().enumerate() {
183                    *slot = mcr_at(wm, k, k_mcr);
184                }
185                let row = &mut times[b * k_mcr..(b + 1) * k_mcr];
186                boat.get_travel_times_for_mcrs(wind, segment, &mcr_grid, row);
187            }
188
189            // Pass 2 (mixed only): re-sample the wmcr=0.0 buckets on
190            // [0.1, 1.0] so every `times[..][k]` shares one mcr value
191            // across `b`. Buckets already at 0.1 stay put.
192            let worst_mcr = if per_bucket_wmcr.iter().any(|&w| w > 0.0) {
193                for (k, slot) in mcr_grid_01.iter_mut().enumerate() {
194                    *slot = mcr_at(0.1, k, k_mcr);
195                }
196                for b in 0..range_k {
197                    if per_bucket_wmcr[b] < 0.1 {
198                        let dep = dep_at(dep_min, dep_max, b, range_k);
199                        let segment = Segment {
200                            origin,
201                            destination,
202                            origin_time: dep,
203                            step_distance_max,
204                        };
205                        let row = &mut times[b * k_mcr..(b + 1) * k_mcr];
206                        boat.get_travel_times_for_mcrs(wind, segment, &mcr_grid_01, row);
207                    }
208                }
209                0.1
210            } else {
211                0.0
212            };
213
214            let best_min = (0..range_k)
215                .map(|b| times[b * k_mcr + (k_mcr - 1)])
216                .fold(f64::INFINITY, f64::min);
217            let worst_max = (0..range_k)
218                .map(|b| times[b * k_mcr])
219                .fold(f64::NEG_INFINITY, f64::max);
220            dep_nominal_lo = dep_min + best_min;
221            dep_nominal_hi = dep_max + worst_max;
222
223            tables.push(SegmentTable {
224                dep_min,
225                dep_max,
226                worst_mcr,
227                times,
228            });
229        }
230
231        let result = Self {
232            tables,
233            range_k,
234            k_mcr,
235            #[cfg(feature = "probe-stats")]
236            probes: ProbeCounters::default(),
237        };
238
239        #[cfg(feature = "profile-timers")]
240        crate::profile_timers::SEGMENT_CACHE_BUILD
241            .record(__profile_start.elapsed().as_nanos() as u64);
242
243        result
244    }
245
246    /// In-range vs. out-of-range probe counts.
247    #[cfg(feature = "probe-stats")]
248    pub fn probe_stats(&self) -> ProbeStats {
249        self.probes.snapshot()
250    }
251
252    /// Reset counters; use between measurement windows on one cache.
253    #[cfg(feature = "probe-stats")]
254    pub fn reset_probe_stats(&self) {
255        self.probes.reset();
256    }
257
258    /// `dep ≈ grid[idx] + frac * (grid[idx+1] - grid[idx])`. `idx`
259    /// clamped to `[0, range_k - 2]`. Returns the unclamped `rel`
260    /// alongside so `probe-stats` callers can tally out-of-range hits.
261    fn locate(t: &SegmentTable, range_k: usize, dep: f64) -> (usize, f64, f64) {
262        if t.dep_max <= t.dep_min {
263            return (0, 0.0, 0.0);
264        }
265        let span = t.dep_max - t.dep_min;
266        let step = span / (range_k - 1) as f64;
267        let rel = (dep - t.dep_min) / step;
268        let rel_clamped = rel.clamp(0.0, (range_k - 1) as f64);
269        let idx = (rel_clamped.floor() as usize).min(range_k - 2);
270        let frac = (rel_clamped - idx as f64).clamp(0.0, 1.0);
271        (idx, frac, rel)
272    }
273
274    #[cfg(feature = "probe-stats")]
275    fn record_probe(&self, rel: f64) {
276        self.probes.total.fetch_add(1, Ordering::Relaxed);
277        if rel < 0.0 || rel > (self.range_k - 1) as f64 {
278            self.probes.out_of_range.fetch_add(1, Ordering::Relaxed);
279        }
280    }
281
282    /// `(t_min, t_max)` for segment `seg_i` at `dep`, ordered. Scans
283    /// all `k_mcr` samples because in time-variant wind
284    /// `travel_time(mcr)` isn't monotonic (each mcr samples wind at a
285    /// different timestamp). On an all-INF row (e.g. pole-locked
286    /// segment) the lerp produces NaN; we catch that with
287    /// `!(t_min <= t_max)` and return `(INF, INF)` so `f64::clamp`
288    /// accepts it and the infeasibility propagates as a poor fitness
289    /// instead of panicking.
290    pub fn query_range(&self, seg_i: usize, dep: f64) -> (f64, f64) {
291        let t = &self.tables[seg_i];
292        let (idx, frac, _rel) = Self::locate(t, self.range_k, dep);
293        #[cfg(feature = "probe-stats")]
294        self.record_probe(_rel);
295        let mut t_min = f64::INFINITY;
296        let mut t_max = f64::NEG_INFINITY;
297        let k_mcr = self.k_mcr;
298        for k in 0..k_mcr {
299            let v = lerp(
300                t.times[idx * k_mcr + k],
301                t.times[(idx + 1) * k_mcr + k],
302                frac,
303            );
304            if v < t_min {
305                t_min = v;
306            }
307            if v > t_max {
308                t_max = v;
309            }
310        }
311        if t_min.is_nan() || t_max.is_nan() || t_max < t_min {
312            return (f64::INFINITY, f64::INFINITY);
313        }
314        (t_min, t_max)
315    }
316
317    /// Invert: `mcr_01` whose travel time matches `delta_time`. Clamps
318    /// to `[worst_mcr, 1.0]` when out of tabulated range. Replaces a
319    /// 12-iter bisection in the inner fitness path; error is two
320    /// lerps of a smooth monotone curve — see [`DEFAULT_K_MCR`].
321    pub fn query_mcr_for_delta_time(&self, seg_i: usize, dep: f64, delta_time: f64) -> f64 {
322        let t = &self.tables[seg_i];
323        let (idx, frac, _rel) = Self::locate(t, self.range_k, dep);
324        #[cfg(feature = "probe-stats")]
325        self.record_probe(_rel);
326
327        let k_mcr = self.k_mcr;
328        let wmcr = t.worst_mcr;
329        let t_worst = lerp(t.times[idx * k_mcr], t.times[(idx + 1) * k_mcr], frac);
330        let t_best = lerp(
331            t.times[idx * k_mcr + (k_mcr - 1)],
332            t.times[(idx + 1) * k_mcr + (k_mcr - 1)],
333            frac,
334        );
335
336        if delta_time >= t_worst {
337            return wmcr;
338        }
339        if delta_time <= t_best {
340            return 1.0;
341        }
342
343        // Linear scan beats binary search for the typical k_mcr=8;
344        // `times` is monotone and small enough to stay cache-hot.
345        let mut k_hi = 0;
346        let mut k_lo = k_mcr - 1;
347        let mut t_at_hi = t_worst;
348        let mut t_at_lo = t_best;
349        for k in 1..k_mcr {
350            let t_k = lerp(
351                t.times[idx * k_mcr + k],
352                t.times[(idx + 1) * k_mcr + k],
353                frac,
354            );
355            if t_k <= delta_time {
356                k_hi = k - 1;
357                k_lo = k;
358                t_at_hi = lerp(
359                    t.times[idx * k_mcr + (k - 1)],
360                    t.times[(idx + 1) * k_mcr + (k - 1)],
361                    frac,
362                );
363                t_at_lo = t_k;
364                break;
365            }
366        }
367
368        let mcr_at_hi = mcr_at(wmcr, k_hi, k_mcr);
369        let mcr_at_lo = mcr_at(wmcr, k_lo, k_mcr);
370
371        let span = t_at_hi - t_at_lo;
372        let frac_t = if span > 0.0 {
373            (t_at_hi - delta_time) / span
374        } else {
375            0.0
376        };
377        lerp(mcr_at_hi, mcr_at_lo, frac_t)
378    }
379}
380
381fn pad_range(lo: f64, hi: f64) -> (f64, f64) {
382    let span = hi - lo;
383    if span <= 0.0 {
384        return (lo, hi);
385    }
386    let pad = span * RANGE_PADDING;
387    (lo - pad, hi + pad)
388}
389
390#[expect(
391    clippy::float_cmp,
392    reason = "exact equality is the precise degenerate-range check."
393)]
394fn dep_at(dep_min: f64, dep_max: f64, k: usize, range_k: usize) -> f64 {
395    if range_k <= 1 || dep_max == dep_min {
396        return dep_min;
397    }
398    let frac = k as f64 / (range_k - 1) as f64;
399    dep_min + frac * (dep_max - dep_min)
400}
401
402/// `[wmcr, 1.0]` uniformly in `k_mcr` steps.
403fn mcr_at(wmcr: f64, k: usize, k_mcr: usize) -> f64 {
404    if k_mcr <= 1 {
405        return wmcr;
406    }
407    let frac = k as f64 / (k_mcr - 1) as f64;
408    wmcr + frac * (1.0 - wmcr)
409}