Skip to main content

sublinear_solver/
contrastive.rs

1//! Contrastive search — find the rows whose solution diverged most from a
2//! baseline. ADR-001 roadmap item #6.
3//!
4//! The architectural shape RuView, Cognitum, and Ruflo's inner loops
5//! actually want: not "give me the whole solution vector", but "tell me
6//! which entries crossed a boundary big enough to wake an agent". This
7//! is the change-driven activation primitive the ADR's thesis turns on.
8//!
9//! ## API
10//!
11//! ```rust,no_run
12//! # use sublinear_solver::contrastive::{find_anomalous_rows, AnomalyRow};
13//! # let baseline: Vec<f64> = vec![];
14//! # let current: Vec<f64> = vec![];
15//! let top_k = find_anomalous_rows(&baseline, &current, 5);
16//! for AnomalyRow { row, baseline, current, anomaly } in top_k {
17//!     println!("row {row}: was {baseline}, now {current} (Δ={anomaly})");
18//! }
19//! ```
20//!
21//! ## Complexity
22//!
23//! The current implementation is `O(n log k)` — one pass over the
24//! solution vectors with a `k`-sized min-heap. That's already useful
25//! (avoids `O(n log n)` of a full sort) but not yet the `O(k · log n)`
26//! the ADR promised. The follow-up will land a *direct* path that
27//! computes only the top-k entries of the new solution via the sublinear-
28//! Neumann single-entry primitive, without ever materialising the full
29//! current solution. Tracked as a `// TODO(ADR-001 #6 phase 2):` in the
30//! source.
31
32use crate::complexity::{Complexity, ComplexityClass};
33use crate::types::Precision;
34use alloc::collections::BinaryHeap;
35use alloc::vec::Vec;
36use core::cmp::Ordering;
37
38/// One row's anomaly report.
39#[derive(Debug, Clone, PartialEq)]
40#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
41pub struct AnomalyRow {
42    /// Row index in the solution vector.
43    pub row: usize,
44    /// The baseline value at this row.
45    pub baseline: Precision,
46    /// The current value at this row.
47    pub current: Precision,
48    /// `|current - baseline|`. The score used for ranking. Higher = more
49    /// anomalous.
50    pub anomaly: Precision,
51}
52
53// Min-heap helper: we want to keep the k *largest* anomalies, so we use a
54// max-of-min wrapper that orders by inverted anomaly score (smallest at the
55// top), and evict the smallest whenever a new entry beats it.
56#[derive(Debug, Clone)]
57struct MinHeapEntry(AnomalyRow);
58
59impl PartialEq for MinHeapEntry {
60    fn eq(&self, other: &Self) -> bool {
61        self.0.anomaly == other.0.anomaly && self.0.row == other.0.row
62    }
63}
64impl Eq for MinHeapEntry {}
65impl PartialOrd for MinHeapEntry {
66    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
67        Some(self.cmp(other))
68    }
69}
70impl Ord for MinHeapEntry {
71    fn cmp(&self, other: &Self) -> Ordering {
72        // Invert so BinaryHeap (max-heap) acts as a min-heap on anomaly.
73        // Tie-break on row index ascending so the API is deterministic
74        // (same inputs always yield the same top-k ordering).
75        other
76            .0
77            .anomaly
78            .partial_cmp(&self.0.anomaly)
79            .unwrap_or(Ordering::Equal)
80            .then_with(|| other.0.row.cmp(&self.0.row))
81    }
82}
83
84/// Return the `k` rows whose `current` value diverged most from `baseline`.
85///
86/// Result is sorted by descending anomaly score (largest first). Ties are
87/// broken by row index ascending so the API is deterministic.
88///
89/// - `O(n log k)` time, `O(k)` space.
90/// - If `k >= baseline.len()`, returns *all* rows sorted by anomaly.
91/// - If `k == 0`, returns an empty vector.
92///
93/// Panics if `baseline.len() != current.len()`.
94pub fn find_anomalous_rows(
95    baseline: &[Precision],
96    current: &[Precision],
97    k: usize,
98) -> Vec<AnomalyRow> {
99    assert_eq!(
100        baseline.len(),
101        current.len(),
102        "find_anomalous_rows: baseline.len()={} != current.len()={}",
103        baseline.len(),
104        current.len(),
105    );
106
107    if k == 0 || baseline.is_empty() {
108        return Vec::new();
109    }
110
111    // TODO(ADR-001 #6 phase 2): replace the O(n) full scan with a direct
112    // top-k path that computes individual entries of `current` via the
113    // sublinear-Neumann single-entry primitive, giving O(k · log n)
114    // total. Today this is the cheap O(n log k) baseline so RuView /
115    // Cognitum have something callable while phase 2 lands.
116
117    let mut heap: BinaryHeap<MinHeapEntry> = BinaryHeap::with_capacity(k.min(baseline.len()) + 1);
118    for (row, (&b, &c)) in baseline.iter().zip(current.iter()).enumerate() {
119        let anomaly = (c - b).abs();
120        let entry = MinHeapEntry(AnomalyRow {
121            row,
122            baseline: b,
123            current: c,
124            anomaly,
125        });
126
127        if heap.len() < k {
128            heap.push(entry);
129        } else if let Some(smallest) = heap.peek() {
130            // Smallest is at the top because of the inverted Ord.
131            if anomaly > smallest.0.anomaly {
132                heap.pop();
133                heap.push(entry);
134            }
135        }
136    }
137
138    // Drain into a sorted-descending vector.
139    let mut out: Vec<AnomalyRow> = heap.into_iter().map(|e| e.0).collect();
140    out.sort_by(|a, b| {
141        b.anomaly
142            .partial_cmp(&a.anomaly)
143            .unwrap_or(Ordering::Equal)
144            .then_with(|| a.row.cmp(&b.row))
145    });
146    out
147}
148
149/// Top-k variant constrained to a caller-supplied **candidate set**. Skips
150/// the rest of the solution vector entirely.
151///
152/// This is the building block for the genuinely sub-linear contrastive
153/// recipe outlined in [ADR-001 §Roadmap item #6 phase-2]. The caller
154/// computes the candidate set once — typically the rows reachable from
155/// the support of a sparse RHS delta in a few hops of `A` — and passes
156/// it here. Cost is `O(|candidates| log k)` instead of `O(n log k)`,
157/// so when `|candidates| ≪ n` the call drops to true sub-linear in n.
158///
159/// Composes with `incremental::SparseDelta` and a per-entry solver like
160/// `SublinearNeumannSolver`:
161///
162/// ```text
163///   1. candidates = closure(delta.indices, A, depth)
164///   2. current[i] = sublinear_neumann.solve_single_entry(A, b_new, i)
165///                                                 for i in candidates
166///   3. top_k    = find_anomalous_rows_in_subset(baseline, current,
167///                                                candidates, k)
168/// ```
169///
170/// `current` must be a length-`n` vector that has the correct values at
171/// the candidate indices; entries at non-candidate indices are ignored.
172/// (We don't require sparsity — callers can pass a dense vector with
173/// stale values everywhere except the candidates.)
174///
175/// Result is sorted by descending anomaly score; ties broken by row
176/// index ascending. Returns an empty vec if `k == 0` or `candidates` is
177/// empty. Panics on `baseline.len() != current.len()`.
178pub fn find_anomalous_rows_in_subset(
179    baseline: &[Precision],
180    current: &[Precision],
181    candidates: &[usize],
182    k: usize,
183) -> Vec<AnomalyRow> {
184    assert_eq!(
185        baseline.len(),
186        current.len(),
187        "find_anomalous_rows_in_subset: dim mismatch {} vs {}",
188        baseline.len(),
189        current.len(),
190    );
191
192    if k == 0 || candidates.is_empty() || baseline.is_empty() {
193        return Vec::new();
194    }
195
196    let mut heap: BinaryHeap<MinHeapEntry> =
197        BinaryHeap::with_capacity(k.min(candidates.len()) + 1);
198
199    for &row in candidates {
200        // Skip out-of-bounds indices silently — caller may supply a
201        // closure that overshoots the matrix dimension (e.g. via a
202        // wrap-around graph traversal). Treating it as a no-op is more
203        // useful than panicking.
204        if row >= baseline.len() {
205            continue;
206        }
207        let anomaly = (current[row] - baseline[row]).abs();
208        let entry = MinHeapEntry(AnomalyRow {
209            row,
210            baseline: baseline[row],
211            current: current[row],
212            anomaly,
213        });
214        if heap.len() < k {
215            heap.push(entry);
216        } else if let Some(smallest) = heap.peek() {
217            if anomaly > smallest.0.anomaly {
218                heap.pop();
219                heap.push(entry);
220            }
221        }
222    }
223
224    let mut out: Vec<AnomalyRow> = heap.into_iter().map(|e| e.0).collect();
225    out.sort_by(|a, b| {
226        b.anomaly
227            .partial_cmp(&a.anomaly)
228            .unwrap_or(Ordering::Equal)
229            .then_with(|| a.row.cmp(&b.row))
230    });
231    out
232}
233
234/// Return only the rows whose anomaly score exceeds `threshold`. Useful as
235/// the boundary-crossing primitive for change-driven activation: an agent
236/// stays asleep until at least one entry crosses the threshold.
237///
238/// - `O(n)` time, `O(matches)` space.
239///
240/// Panics if `baseline.len() != current.len()`.
241pub fn find_rows_above_threshold(
242    baseline: &[Precision],
243    current: &[Precision],
244    threshold: Precision,
245) -> Vec<AnomalyRow> {
246    assert_eq!(
247        baseline.len(),
248        current.len(),
249        "find_rows_above_threshold: dim mismatch {} vs {}",
250        baseline.len(),
251        current.len(),
252    );
253
254    baseline
255        .iter()
256        .zip(current.iter())
257        .enumerate()
258        .filter_map(|(row, (&b, &c))| {
259            let anomaly = (c - b).abs();
260            if anomaly > threshold {
261                Some(AnomalyRow {
262                    row,
263                    baseline: b,
264                    current: c,
265                    anomaly,
266                })
267            } else {
268                None
269            }
270        })
271        .collect()
272}
273
274// ─────────────────────────────────────────────────────────────────────────
275// Complexity declaration. The current path is Linear; phase-2 will drop
276// to SubLinear (O(k · log n)). Declared as Adaptive { Linear, Linear } for
277// now so the contract is honest about today's bound.
278// ─────────────────────────────────────────────────────────────────────────
279
280/// Marker type with a `Complexity` impl for `find_anomalous_rows`.
281pub struct FindAnomalousRowsOp;
282
283impl Complexity for FindAnomalousRowsOp {
284    const CLASS: ComplexityClass = ComplexityClass::Adaptive {
285        default: &ComplexityClass::Linear,
286        worst: &ComplexityClass::Linear,
287    };
288    const DETAIL: &'static str =
289        "O(n log k) full-scan baseline today; phase-2 lowers to O(k · log n) via the \
290         sublinear-Neumann single-entry primitive.";
291}
292
293#[cfg(test)]
294mod tests {
295    use super::*;
296
297    #[test]
298    fn empty_inputs_return_empty() {
299        let v: Vec<Precision> = alloc::vec![];
300        assert_eq!(find_anomalous_rows(&v, &v, 5), alloc::vec![]);
301        assert_eq!(find_anomalous_rows(&v, &v, 0), alloc::vec![]);
302    }
303
304    #[test]
305    fn k_zero_returns_empty() {
306        let b = alloc::vec![1.0, 2.0, 3.0];
307        let c = alloc::vec![10.0, 20.0, 30.0];
308        assert_eq!(find_anomalous_rows(&b, &c, 0), alloc::vec![]);
309    }
310
311    #[test]
312    fn top_k_is_correct_for_small_case() {
313        let b = alloc::vec![1.0, 1.0, 1.0, 1.0, 1.0];
314        let c = alloc::vec![1.0, 5.0, 1.0, 9.0, 2.0];
315        // anomalies: 0, 4, 0, 8, 1 — sorted desc: 8 (row 3), 4 (row 1), 1 (row 4).
316        let top = find_anomalous_rows(&b, &c, 3);
317        assert_eq!(top.len(), 3);
318        assert_eq!(top[0].row, 3);
319        assert_eq!(top[0].anomaly, 8.0);
320        assert_eq!(top[1].row, 1);
321        assert_eq!(top[1].anomaly, 4.0);
322        assert_eq!(top[2].row, 4);
323        assert_eq!(top[2].anomaly, 1.0);
324    }
325
326    #[test]
327    fn k_larger_than_n_returns_all_sorted() {
328        let b = alloc::vec![0.0, 0.0, 0.0];
329        let c = alloc::vec![3.0, 1.0, 2.0];
330        let top = find_anomalous_rows(&b, &c, 10);
331        assert_eq!(top.len(), 3);
332        // Sorted desc by anomaly.
333        assert!(top[0].anomaly >= top[1].anomaly);
334        assert!(top[1].anomaly >= top[2].anomaly);
335    }
336
337    #[test]
338    fn tie_breaks_on_row_index_ascending() {
339        let b = alloc::vec![0.0, 0.0, 0.0];
340        let c = alloc::vec![5.0, 5.0, 5.0]; // all tied
341        let top = find_anomalous_rows(&b, &c, 2);
342        assert_eq!(top.len(), 2);
343        assert_eq!(top[0].row, 0);
344        assert_eq!(top[1].row, 1);
345    }
346
347    #[test]
348    fn anomaly_is_absolute_value() {
349        let b = alloc::vec![0.0, 10.0];
350        let c = alloc::vec![-7.0, 3.0];
351        // anomalies: 7, 7 — both tied. Tie-break: row 0 before row 1.
352        let top = find_anomalous_rows(&b, &c, 2);
353        assert_eq!(top[0].anomaly, 7.0);
354        assert_eq!(top[1].anomaly, 7.0);
355        assert_eq!(top[0].row, 0);
356    }
357
358    #[test]
359    #[should_panic(expected = "dim mismatch")]
360    fn threshold_panics_on_dim_mismatch() {
361        let b = alloc::vec![1.0, 2.0];
362        let c = alloc::vec![1.0];
363        let _ = find_rows_above_threshold(&b, &c, 0.5);
364    }
365
366    #[test]
367    fn threshold_filters_correctly() {
368        let b = alloc::vec![0.0, 0.0, 0.0, 0.0];
369        let c = alloc::vec![0.1, 0.5, 2.0, 0.05];
370        let above = find_rows_above_threshold(&b, &c, 0.3);
371        // 0.5 and 2.0 pass; 0.1 and 0.05 don't.
372        assert_eq!(above.len(), 2);
373        assert_eq!(above[0].row, 1);
374        assert_eq!(above[1].row, 2);
375    }
376
377    #[test]
378    fn threshold_returns_empty_when_nothing_crosses() {
379        let b = alloc::vec![0.0; 5];
380        let c = alloc::vec![0.01, 0.02, 0.03, 0.04, 0.05];
381        let above = find_rows_above_threshold(&b, &c, 1.0);
382        assert!(above.is_empty(), "no entry above threshold should return empty");
383    }
384
385    // ─────────────────────────────────────────────────────────────────
386    // find_anomalous_rows_in_subset (ADR-001 #6 phase-2)
387    // ─────────────────────────────────────────────────────────────────
388
389    #[test]
390    fn subset_returns_only_candidates() {
391        let baseline = alloc::vec![0.0; 10];
392        let mut current = alloc::vec![0.0; 10];
393        // Put a huge anomaly at row 7 that ISN'T in the candidate set —
394        // we expect it to be ignored.
395        current[7] = 999.0;
396        // Real candidates with smaller anomalies.
397        current[2] = 5.0;
398        current[5] = 3.0;
399        let candidates = alloc::vec![2usize, 5];
400        let top = find_anomalous_rows_in_subset(&baseline, &current, &candidates, 5);
401        assert_eq!(top.len(), 2);
402        assert_eq!(top[0].row, 2);
403        assert_eq!(top[0].anomaly, 5.0);
404        assert_eq!(top[1].row, 5);
405        assert_eq!(top[1].anomaly, 3.0);
406        // 999.0 at row 7 is OUTSIDE the candidates — must not appear.
407        assert!(top.iter().all(|r| r.row != 7));
408    }
409
410    #[test]
411    fn subset_k_limit_works() {
412        let baseline = alloc::vec![0.0; 100];
413        let mut current = alloc::vec![0.0; 100];
414        for &i in &[10, 20, 30, 40, 50] {
415            current[i] = i as Precision;
416        }
417        let candidates = alloc::vec![10usize, 20, 30, 40, 50];
418        // Ask for top-3; should get the 3 biggest anomalies (rows 50, 40, 30).
419        let top = find_anomalous_rows_in_subset(&baseline, &current, &candidates, 3);
420        assert_eq!(top.len(), 3);
421        assert_eq!(top[0].row, 50);
422        assert_eq!(top[1].row, 40);
423        assert_eq!(top[2].row, 30);
424    }
425
426    #[test]
427    fn subset_ignores_out_of_bounds_indices() {
428        let baseline = alloc::vec![0.0; 5];
429        let mut current = alloc::vec![0.0; 5];
430        current[2] = 10.0;
431        // Caller's candidate closure overshoots — out-of-bound indices
432        // must be skipped silently, not panic.
433        let candidates = alloc::vec![2usize, 100, 200];
434        let top = find_anomalous_rows_in_subset(&baseline, &current, &candidates, 5);
435        assert_eq!(top.len(), 1);
436        assert_eq!(top[0].row, 2);
437    }
438
439    #[test]
440    fn subset_empty_candidates_returns_empty() {
441        let baseline = alloc::vec![0.0; 5];
442        let current = alloc::vec![1.0, 2.0, 3.0, 4.0, 5.0];
443        let candidates: Vec<usize> = alloc::vec![];
444        let top = find_anomalous_rows_in_subset(&baseline, &current, &candidates, 10);
445        assert!(top.is_empty());
446    }
447
448    #[test]
449    fn subset_matches_full_scan_when_candidates_cover_all_rows() {
450        // Sanity check: when the candidate set IS the full row range,
451        // the result should match find_anomalous_rows.
452        let baseline = alloc::vec![0.0; 8];
453        let current = alloc::vec![1.0, 5.0, 1.0, 9.0, 2.0, 7.0, 3.0, 6.0];
454        let full = find_anomalous_rows(&baseline, &current, 4);
455        let candidates: Vec<usize> = (0..8).collect();
456        let subset = find_anomalous_rows_in_subset(&baseline, &current, &candidates, 4);
457        assert_eq!(full, subset);
458    }
459}