Skip to main content

sphereql_embed/
meta_model.rs

1//! Meta-learning across corpora: predict a [`PipelineConfig`] for a new
2//! corpus by consulting past tuner runs on similar corpora.
3//!
4//! This is Level 2 of SphereQL's self-optimization hierarchy (per the
5//! metalearning-direction memory):
6//!
7//! - **L1** (`tuner::auto_tune`): per-corpus search. Produces a best config.
8//! - **L2** (this module): cross-corpus generalization. Takes the (corpus
9//!   features, best config) pairs produced by L1 and learns a function
10//!   `CorpusFeatures → PipelineConfig` so new corpora can skip search or
11//!   warm-start it.
12//! - **L3**: online adaptation from query feedback. Deferred.
13//!
14//! Today's meta-model is a simple z-score-normalized nearest neighbor
15//! over [`CorpusFeatures::to_vec`], with two model-space adjustments:
16//! scale-type features (item/category/dim counts) are `ln(1+x)`
17//! compressed before normalization so a single 500k corpus can't
18//! dominate the statistics, and training sets mixing multiple
19//! `metric_name`s are stratified to the dominant metric at fit time
20//! (scores under different objectives are not comparable). It works
21//! with any `N ≥ 1` training records, is deterministic, and has no
22//! free hyperparameters. When you've accumulated ≥ 10 diverse corpora
23//! you can swap in something fancier (gradient-boosted trees, small
24//! MLP) against the same [`MetaModel`] trait — the storage format
25//! ([`MetaTrainingRecord`]) stays stable.
26//!
27//! # Storage
28//!
29//! Records are serialized as a flat JSON array:
30//!
31//! ```json
32//! [
33//!   { "corpus_id": "built_in_775", "features": {...}, "best_config": {...}, ... },
34//!   ...
35//! ]
36//! ```
37//!
38//! [`MetaTrainingRecord::save_list`] and [`MetaTrainingRecord::load_list`]
39//! are convenience wrappers; the format is plain enough to edit by hand
40//! or process with `jq`.
41
42use std::collections::HashMap;
43use std::fs;
44use std::io;
45use std::path::{Path, PathBuf};
46
47use crate::config::{PipelineConfig, ProjectionKind};
48use crate::corpus_features::{CORPUS_FEATURE_COUNT, CorpusFeatures};
49use crate::feedback::FeedbackSummary;
50use crate::tuner::TuneReport;
51use crate::util::{default_timestamp, migrate_legacy_array_to_jsonl, sphereql_home_dir};
52
53/// One observation for the meta-learner: "on this corpus profile, this
54/// config was found to be best under this metric."
55#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
56pub struct MetaTrainingRecord {
57    /// User-supplied corpus identifier. Not used by the model — just for
58    /// human-readable provenance in logs and training-set audits.
59    pub corpus_id: String,
60    /// Low-dim profile of the corpus. Input to the meta-model.
61    pub features: CorpusFeatures,
62    /// The config that won the tuner run. Target of the meta-model.
63    pub best_config: PipelineConfig,
64    /// The score achieved by `best_config` under `metric_name`.
65    pub best_score: f64,
66    /// Normalized improvement of `best_score` over the run's mean trial
67    /// score, as a fraction of the available headroom:
68    /// `(best − mean) / (1 − mean)`, clamped to `[0, 1]`.
69    ///
70    /// Unlike `best_score`, this is comparable across corpora of
71    /// different intrinsic difficulty — a 0.9 on an easy corpus and a
72    /// 0.6 on a hard one can represent the same "the tuner found real
73    /// signal" evidence. A lift near 0 means the landscape was flat
74    /// and the winning config is weak evidence.
75    /// [`DistanceWeightedMetaModel`] prefers this over `best_score`
76    /// when present. `None` for records created before this field
77    /// existed or from runs with fewer than 2 trials.
78    #[serde(default)]
79    pub score_lift: Option<f64>,
80    /// Which quality metric was being optimized. Records with different
81    /// metrics aren't directly comparable; both shipped models
82    /// stratify to the dominant metric at fit time.
83    pub metric_name: String,
84    /// Short description of the search strategy, e.g.
85    /// `"random{budget=24,seed=...}"`. Free-form — for auditing only.
86    pub strategy: String,
87    /// RFC 3339 timestamp (or any string). Free-form.
88    pub timestamp: String,
89}
90
91impl MetaTrainingRecord {
92    /// Build a record from the ingredients of one tuner run.
93    ///
94    /// `corpus_id` and `strategy_label` are free-form strings the caller
95    /// provides for provenance — the tuner doesn't know either on its
96    /// own. `timestamp` defaults to seconds-since-Unix-epoch (sortable,
97    /// unambiguous, dependency-free); swap in your own format via
98    /// [`Self::with_timestamp`] if you want human-readable.
99    pub fn from_tune_result(
100        corpus_id: impl Into<String>,
101        features: CorpusFeatures,
102        report: &TuneReport,
103        strategy_label: impl Into<String>,
104    ) -> Self {
105        Self {
106            corpus_id: corpus_id.into(),
107            features,
108            best_config: report.best_config.clone(),
109            best_score: report.best_score,
110            score_lift: score_lift_from_report(report),
111            metric_name: report.metric_name.clone(),
112            strategy: strategy_label.into(),
113            timestamp: default_timestamp(),
114        }
115    }
116
117    /// Replace the timestamp. Useful when the caller has a preferred
118    /// format (e.g. an RFC 3339 string from `chrono`).
119    pub fn with_timestamp(mut self, ts: impl Into<String>) -> Self {
120        self.timestamp = ts.into();
121        self
122    }
123
124    /// Save a list of records as a JSON array to disk. Creates parent
125    /// directories as needed.
126    ///
127    /// Kept for callers who want a pretty-printed snapshot (backups,
128    /// audits, diffs). The default on-disk store uses JSONL under
129    /// [`Self::append_to_default_store`] for O(1) appends — read it
130    /// back via [`Self::load_default_store`], which auto-detects
131    /// legacy array files as well.
132    pub fn save_list(records: &[Self], path: impl AsRef<Path>) -> io::Result<()> {
133        let path = path.as_ref();
134        if let Some(parent) = path.parent()
135            && !parent.as_os_str().is_empty()
136        {
137            fs::create_dir_all(parent)?;
138        }
139        let json = serde_json::to_string_pretty(records)
140            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
141        fs::write(path, json)
142    }
143
144    /// Load a list of records from disk.
145    ///
146    /// Accepts both a JSON array (legacy format and what `save_list`
147    /// writes) and JSON Lines (one record per line, the new append
148    /// format). Detection is first-character based: `[` ⇒ array,
149    /// anything else ⇒ JSONL. Returns an empty vec if the file
150    /// doesn't exist.
151    pub fn load_list(path: impl AsRef<Path>) -> io::Result<Vec<Self>> {
152        let path = path.as_ref();
153        if !path.exists() {
154            return Ok(Vec::new());
155        }
156        let raw = fs::read_to_string(path)?;
157        let trimmed = raw.trim_start();
158        if trimmed.is_empty() {
159            return Ok(Vec::new());
160        }
161        if trimmed.starts_with('[') {
162            // Legacy JSON array.
163            return serde_json::from_str(trimmed)
164                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e));
165        }
166        // JSONL: one record per non-empty line.
167        trimmed
168            .lines()
169            .filter(|l| !l.trim().is_empty())
170            .map(|l| {
171                serde_json::from_str::<Self>(l)
172                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
173            })
174            .collect()
175    }
176
177    /// Default on-disk training-store path: `~/.sphereql/meta_records.json`.
178    pub fn default_store_path() -> io::Result<PathBuf> {
179        Ok(sphereql_home_dir()?.join("meta_records.json"))
180    }
181
182    /// Append this record to the user's default training store.
183    ///
184    /// Constant-time per call: opens the file in append mode and
185    /// writes one JSON-encoded line. Previously this loaded every
186    /// record, pushed the new one, and rewrote the entire file —
187    /// O(N) per append, which dominated at N → 10k.
188    ///
189    /// Existing stores written in legacy array format keep working;
190    /// on the first append we re-emit the file as JSONL (one-time
191    /// O(N) migration), then subsequent appends are O(1).
192    pub fn append_to_default_store(&self) -> io::Result<PathBuf> {
193        let path = Self::default_store_path()?;
194        self.append_to(&path)?;
195        Ok(path)
196    }
197
198    /// Append this record to an arbitrary JSONL file. Creates the
199    /// file and any missing parent directories on first call.
200    pub fn append_to(&self, path: impl AsRef<Path>) -> io::Result<()> {
201        use std::io::Write;
202
203        let path = path.as_ref();
204        if let Some(parent) = path.parent()
205            && !parent.as_os_str().is_empty()
206        {
207            fs::create_dir_all(parent)?;
208        }
209
210        // Migrate a legacy array file to JSONL on the first append —
211        // one-time cost that converts N records, after which appends
212        // are O(1). New files skip this path entirely.
213        migrate_legacy_array_to_jsonl(path, |head| {
214            let records: Vec<Self> = serde_json::from_str(head.trim_start())
215                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
216            let mut migrated = String::with_capacity(head.len());
217            for r in &records {
218                serde_json::to_string(r)
219                    .map(|line| {
220                        migrated.push_str(&line);
221                        migrated.push('\n');
222                    })
223                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
224            }
225            Ok(migrated)
226        })?;
227
228        let mut f = fs::OpenOptions::new()
229            .create(true)
230            .append(true)
231            .open(path)?;
232        let line = serde_json::to_string(self)
233            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
234        writeln!(f, "{line}")
235    }
236
237    /// Load all records from the user's default training store. Returns
238    /// an empty vec if the store doesn't exist yet.
239    pub fn load_default_store() -> io::Result<Vec<Self>> {
240        Self::load_list(Self::default_store_path()?)
241    }
242
243    /// Blend this record's automated `best_score` with a feedback
244    /// summary's `mean_score` into a single adjusted score.
245    ///
246    /// `alpha` ∈ `[0, 1]` controls how much weight to give feedback:
247    ///   - `0.0` returns `best_score` unchanged (ignore feedback).
248    ///   - `1.0` returns the feedback mean (trust feedback entirely).
249    ///   - `0.5` weights them equally.
250    ///
251    /// `alpha` is clamped to `[0, 1]`. When `summary` belongs to a
252    /// different corpus than `self` the function still computes the
253    /// blend — verifying corpus_id alignment is the caller's
254    /// responsibility; this keeps the API composable under custom
255    /// lookup schemes.
256    ///
257    /// Note: this blends `best_score`, not [`Self::score_lift`]. The
258    /// result is on the raw-score scale, which is not comparable
259    /// across corpora of different difficulty — don't substitute it
260    /// for `score_lift` in cross-corpus comparisons.
261    pub fn adjust_score_with_feedback(&self, summary: &FeedbackSummary, alpha: f64) -> f64 {
262        let a = alpha.clamp(0.0, 1.0);
263        (1.0 - a) * self.best_score + a * summary.mean_score
264    }
265}
266
267/// Compute [`MetaTrainingRecord::score_lift`] from a tuner report:
268/// `(best − mean) / (1 − mean)` — the fraction of the run's available
269/// headroom the winning config captured. `None` with fewer than 2
270/// trials (no distribution to compare against); `Some(0.0)` when every
271/// trial already scored ~1.0 (the config demonstrably didn't matter).
272fn score_lift_from_report(report: &TuneReport) -> Option<f64> {
273    if report.trials.len() < 2 {
274        return None;
275    }
276    let mean = report.mean_score();
277    let headroom = 1.0 - mean;
278    if headroom < 1e-9 {
279        return Some(0.0);
280    }
281    Some(((report.best_score - mean) / headroom).clamp(0.0, 1.0))
282}
283
284// ── Shared helpers ─────────────────────────────────────────────────
285
286/// Indices of the scale-type features in [`CorpusFeatures::to_vec`]
287/// (`n_items`, `n_categories`, `dim`, `mean_members_per_category`)
288/// that get `ln(1+x)` compression before z-scoring. Raw counts span
289/// 775 → 500,000 across real corpora; without the log transform a
290/// single large corpus either dominates the normalized distance or
291/// stretches the z-scale until every other corpus collapses together.
292const LOG_SCALED_FEATURES: [usize; 4] = [0, 1, 2, 3];
293
294/// Map a raw feature vector into model space: `ln(1+x)` on the scale
295/// features, everything else unchanged. Applied consistently to both
296/// training records and queries before normalization.
297fn to_model_space(raw: &[f64; CORPUS_FEATURE_COUNT]) -> [f64; CORPUS_FEATURE_COUNT] {
298    let mut out = *raw;
299    for &i in &LOG_SCALED_FEATURES {
300        out[i] = out[i].max(0.0).ln_1p();
301    }
302    out
303}
304
305/// Retain only the records sharing the most common `metric_name`.
306/// Scores produced under different objectives aren't comparable, so
307/// mixing them corrupts both the z-score statistics and the
308/// distance-weighted selection. Ties break toward the
309/// lexicographically largest name for determinism. Returns the input
310/// unchanged when it is empty or already single-metric.
311fn filter_dominant_metric(records: &[MetaTrainingRecord]) -> Vec<MetaTrainingRecord> {
312    if records.is_empty() {
313        return Vec::new();
314    }
315    let mut counts: HashMap<&str, usize> = HashMap::new();
316    for r in records {
317        *counts.entry(r.metric_name.as_str()).or_default() += 1;
318    }
319    if counts.len() <= 1 {
320        return records.to_vec();
321    }
322    let dominant = counts
323        .iter()
324        .max_by(|a, b| a.1.cmp(b.1).then(a.0.cmp(b.0)))
325        .map(|(k, _)| (*k).to_string())
326        .expect("counts non-empty");
327    records
328        .iter()
329        .filter(|r| r.metric_name == dominant)
330        .cloned()
331        .collect()
332}
333
334/// Per-feature mean + std computed across a training set (in model
335/// space — see [`to_model_space`]), used for z-score normalization by
336/// both meta-model implementations.
337///
338/// Returns `(means, stds)`. Features with near-zero variance get a
339/// stored std of `0.0` rather than the true tiny value, so
340/// [`normalize_features`] can detect the degenerate case and zero the
341/// feature out instead of dividing by something that blows up.
342fn compute_feature_stats(
343    records: &[MetaTrainingRecord],
344) -> ([f64; CORPUS_FEATURE_COUNT], [f64; CORPUS_FEATURE_COUNT]) {
345    let mut means = [0.0; CORPUS_FEATURE_COUNT];
346    let mut stds = [0.0; CORPUS_FEATURE_COUNT];
347    let n = records.len();
348    if n == 0 {
349        return (means, [1.0; CORPUS_FEATURE_COUNT]);
350    }
351    let vecs: Vec<[f64; CORPUS_FEATURE_COUNT]> = records
352        .iter()
353        .map(|r| to_model_space(&r.features.to_vec()))
354        .collect();
355
356    for i in 0..CORPUS_FEATURE_COUNT {
357        let mean: f64 = vecs.iter().map(|v| v[i]).sum::<f64>() / n as f64;
358        means[i] = mean;
359        let var: f64 =
360            vecs.iter().map(|v| (v[i] - mean).powi(2)).sum::<f64>() / (n - 1).max(1) as f64;
361        let sd = var.sqrt();
362        stds[i] = if sd > f64::EPSILON { sd } else { 0.0 };
363    }
364    (means, stds)
365}
366
367/// Z-score normalize a model-space feature vector against precomputed
368/// `means`/`stds`. Features whose stored std is below `f64::EPSILON`
369/// (zero-variance in the training set) map to `0.0` rather than
370/// dividing by a near-zero number.
371fn normalize_features(
372    model_space: &[f64; CORPUS_FEATURE_COUNT],
373    means: &[f64; CORPUS_FEATURE_COUNT],
374    stds: &[f64; CORPUS_FEATURE_COUNT],
375) -> [f64; CORPUS_FEATURE_COUNT] {
376    let mut out = [0.0; CORPUS_FEATURE_COUNT];
377    for i in 0..CORPUS_FEATURE_COUNT {
378        let sd = stds[i];
379        out[i] = if sd > f64::EPSILON {
380            (model_space[i] - means[i]) / sd
381        } else {
382            0.0
383        };
384    }
385    out
386}
387
388/// Euclidean distance between two z-score-normalized feature vectors.
389fn normalized_euclidean(a: &[f64; CORPUS_FEATURE_COUNT], b: &[f64; CORPUS_FEATURE_COUNT]) -> f64 {
390    a.iter()
391        .zip(b.iter())
392        .map(|(x, y)| (x - y).powi(2))
393        .sum::<f64>()
394        .sqrt()
395}
396
397/// Upper median of a non-empty value sequence (sorted by `total_cmp`).
398fn median_f64(values: impl Iterator<Item = f64>) -> f64 {
399    let mut v: Vec<f64> = values.collect();
400    assert!(!v.is_empty(), "median of empty sequence");
401    v.sort_by(|a, b| a.total_cmp(b));
402    v[v.len() / 2]
403}
404
405/// Upper median of a non-empty integer sequence.
406fn median_usize(values: impl Iterator<Item = usize>) -> usize {
407    let mut v: Vec<usize> = values.collect();
408    assert!(!v.is_empty(), "median of empty sequence");
409    v.sort_unstable();
410    v[v.len() / 2]
411}
412
413// ── Trait ──────────────────────────────────────────────────────────
414
415/// Predicts a [`PipelineConfig`] from a [`CorpusFeatures`] profile.
416///
417/// Implementers fit on a training set of [`MetaTrainingRecord`]s (pairs
418/// of (features, best_config) observed from past tuner runs) and predict
419/// a config for a new corpus.
420pub trait MetaModel {
421    /// Fit on a training set. Replacing any prior state.
422    fn fit(&mut self, records: &[MetaTrainingRecord]);
423
424    /// True once `fit` has been called with at least one usable record.
425    /// [`Self::predict`] panics when this is false, so Result-returning
426    /// boundaries (e.g. `SphereQLPipeline::new_from_metamodel`) check
427    /// this first. No default impl on purpose: every model must answer
428    /// for its own notion of "fitted" rather than inherit a guess.
429    fn is_fitted(&self) -> bool;
430
431    /// Predict the config that should work best on a corpus with the
432    /// given profile. Panics if `fit` has not been called with at least
433    /// one record — callers should treat `MetaModel` as a trained object
434    /// and front-load `fit`, or check [`Self::is_fitted`] when the
435    /// training state isn't statically known.
436    fn predict(&self, features: &CorpusFeatures) -> PipelineConfig;
437
438    /// Short name for logs and model comparison.
439    fn name(&self) -> &str;
440}
441
442// ── Nearest-neighbor baseline ─────────────────────────────────────────
443
444/// The simplest useful meta-model: given a new corpus, return the
445/// best_config of the training record whose corpus-feature vector is
446/// closest in z-score-normalized Euclidean distance (scale features
447/// log-compressed first — see [`to_model_space`]).
448///
449/// - Works with `N ≥ 1` records.
450/// - Deterministic; no hyperparameters.
451/// - Degenerate features (zero variance across training records) are
452///   dropped from the distance computation at fit time so they don't
453///   divide by zero or dominate via raw-scale inflation.
454/// - Training sets mixing multiple `metric_name`s are stratified to
455///   the dominant metric at fit time — see [`Self::records`] for what
456///   was actually retained.
457#[derive(Debug, Clone)]
458pub struct NearestNeighborMetaModel {
459    records: Vec<MetaTrainingRecord>,
460    feature_means: [f64; CORPUS_FEATURE_COUNT],
461    feature_stds: [f64; CORPUS_FEATURE_COUNT],
462}
463
464impl Default for NearestNeighborMetaModel {
465    fn default() -> Self {
466        Self {
467            records: Vec::new(),
468            feature_means: [0.0; CORPUS_FEATURE_COUNT],
469            feature_stds: [1.0; CORPUS_FEATURE_COUNT],
470        }
471    }
472}
473
474impl NearestNeighborMetaModel {
475    pub fn new() -> Self {
476        Self::default()
477    }
478
479    /// Borrow the training records — useful for introspecting what the
480    /// model considers the nearest-neighbor candidate pool (after the
481    /// dominant-metric stratification applied by `fit`).
482    pub fn records(&self) -> &[MetaTrainingRecord] {
483        &self.records
484    }
485
486    /// Distance from a given feature vector to every stored record,
487    /// sorted ascending. Returned as `(record_index, distance)` pairs.
488    pub fn rank_candidates(&self, features: &CorpusFeatures) -> Vec<(usize, f64)> {
489        let q = normalize_features(
490            &to_model_space(&features.to_vec()),
491            &self.feature_means,
492            &self.feature_stds,
493        );
494        let mut ranked: Vec<(usize, f64)> = self
495            .records
496            .iter()
497            .enumerate()
498            .map(|(i, r)| {
499                let v = normalize_features(
500                    &to_model_space(&r.features.to_vec()),
501                    &self.feature_means,
502                    &self.feature_stds,
503                );
504                (i, normalized_euclidean(&q, &v))
505            })
506            .collect();
507        // `total_cmp` sorts NaN to the end — which is what we want
508        // under a "nearest first" policy: any record whose distance
509        // is non-finite sinks to the bottom instead of silently
510        // equating with finite candidates.
511        ranked.sort_by(|a, b| a.1.total_cmp(&b.1));
512        ranked
513    }
514
515    /// k-NN prediction with per-knob aggregation, instead of copying
516    /// the single nearest record's config wholesale.
517    ///
518    /// Takes the `k` nearest records (clamped to the training-set
519    /// size), picks the majority `projection_kind` (ties break toward
520    /// the nearest record), and sets each tuned knob to the median of
521    /// the top-k values — kind-specific knobs aggregate over
522    /// kind-matching neighbors only.
523    ///
524    /// Blended knobs: `routing.num_domain_groups`,
525    /// `routing.low_evr_threshold`, `bridges.threshold_base`,
526    /// `bridges.threshold_evr_penalty`,
527    /// `bridges.overlap_artifact_territorial`,
528    /// `inner_sphere.min_evr_improvement`, plus the kind-specific
529    /// `laplacian.*` / `umap.*` knobs. Everything else
530    /// (`bridges.balanced_affinity_quantile`,
531    /// `bridges.min_evr_for_classification`,
532    /// `routing.group_routing_alpha`, the remaining `inner_sphere.*`
533    /// knobs, `spatial.*`, `min_category_size`) is inherited
534    /// nearest-neighbor from the closest record.
535    ///
536    /// With `k = 1` this is exactly [`MetaModel::predict`]. Larger `k`
537    /// trades sharpness for robustness against a single poorly-tuned
538    /// or mislabeled record. Panics like `predict` if `fit` has not
539    /// been called with at least one record.
540    pub fn predict_blended(&self, features: &CorpusFeatures, k: usize) -> PipelineConfig {
541        assert!(
542            !self.records.is_empty(),
543            "NearestNeighborMetaModel::predict_blended called before fit(); \
544             call .fit(records) with at least one record first"
545        );
546        let ranked = self.rank_candidates(features);
547        let k = k.clamp(1, ranked.len());
548        let top: Vec<&MetaTrainingRecord> =
549            ranked[..k].iter().map(|&(i, _)| &self.records[i]).collect();
550
551        // Majority projection kind; `top` is nearest-first so `find`
552        // breaks ties toward the closest record.
553        let mut kind_counts: HashMap<ProjectionKind, usize> = HashMap::new();
554        for r in &top {
555            *kind_counts
556                .entry(r.best_config.projection_kind)
557                .or_default() += 1;
558        }
559        let max_count = kind_counts.values().copied().max().unwrap_or(0);
560        let kind = top
561            .iter()
562            .map(|r| r.best_config.projection_kind)
563            .find(|kk| kind_counts[kk] == max_count)
564            .unwrap_or(top[0].best_config.projection_kind);
565
566        // Fields not blended below — bridges.balanced_affinity_quantile,
567        // bridges.min_evr_for_classification, routing.group_routing_alpha,
568        // the remaining inner_sphere.* knobs, spatial.*, and
569        // min_category_size — are intentionally inherited from the
570        // nearest record: they're outside the tuner's SearchSpace, so
571        // every record carries the same defaults and a median across
572        // neighbors would add nothing.
573        let mut cfg = top[0].best_config.clone();
574        cfg.projection_kind = kind;
575
576        // Kind-agnostic knobs: median over the full top-k.
577        cfg.routing.num_domain_groups =
578            median_usize(top.iter().map(|r| r.best_config.routing.num_domain_groups));
579        cfg.routing.low_evr_threshold =
580            median_f64(top.iter().map(|r| r.best_config.routing.low_evr_threshold));
581        cfg.bridges.threshold_base =
582            median_f64(top.iter().map(|r| r.best_config.bridges.threshold_base));
583        cfg.bridges.threshold_evr_penalty = median_f64(
584            top.iter()
585                .map(|r| r.best_config.bridges.threshold_evr_penalty),
586        );
587        cfg.bridges.overlap_artifact_territorial = median_f64(
588            top.iter()
589                .map(|r| r.best_config.bridges.overlap_artifact_territorial),
590        );
591        cfg.inner_sphere.min_evr_improvement = median_f64(
592            top.iter()
593                .map(|r| r.best_config.inner_sphere.min_evr_improvement),
594        );
595
596        // Kind-specific knobs: aggregate over kind-matching neighbors
597        // only. Non-empty by construction — `kind` won the majority
598        // vote over `top`, so at least one member matches it.
599        let kind_matching: Vec<&&MetaTrainingRecord> = top
600            .iter()
601            .filter(|r| r.best_config.projection_kind == kind)
602            .collect();
603        if !kind_matching.is_empty() {
604            match kind {
605                ProjectionKind::LaplacianEigenmap => {
606                    cfg.laplacian.k_neighbors = median_usize(
607                        kind_matching
608                            .iter()
609                            .map(|r| r.best_config.laplacian.k_neighbors),
610                    );
611                    cfg.laplacian.active_threshold = median_f64(
612                        kind_matching
613                            .iter()
614                            .map(|r| r.best_config.laplacian.active_threshold),
615                    );
616                }
617                ProjectionKind::UmapSphere => {
618                    cfg.umap.n_neighbors =
619                        median_usize(kind_matching.iter().map(|r| r.best_config.umap.n_neighbors));
620                    cfg.umap.n_epochs =
621                        median_usize(kind_matching.iter().map(|r| r.best_config.umap.n_epochs));
622                    cfg.umap.category_weight = median_f64(
623                        kind_matching
624                            .iter()
625                            .map(|r| r.best_config.umap.category_weight),
626                    );
627                    cfg.umap.min_dist =
628                        median_f64(kind_matching.iter().map(|r| r.best_config.umap.min_dist));
629                }
630                ProjectionKind::Pca | ProjectionKind::KernelPca => {}
631            }
632        }
633        cfg
634    }
635}
636
637impl MetaModel for NearestNeighborMetaModel {
638    fn fit(&mut self, records: &[MetaTrainingRecord]) {
639        self.records = filter_dominant_metric(records);
640        let (means, stds) = compute_feature_stats(&self.records);
641        self.feature_means = means;
642        self.feature_stds = if self.records.is_empty() {
643            [1.0; CORPUS_FEATURE_COUNT]
644        } else {
645            stds
646        };
647    }
648
649    fn is_fitted(&self) -> bool {
650        !self.records.is_empty()
651    }
652
653    fn predict(&self, features: &CorpusFeatures) -> PipelineConfig {
654        // Invariant: callers are expected to call fit() before predict().
655        // The trait contract documents this requirement, and the panic is
656        // intentional — a silent wrong prediction (returning Default) would
657        // be much harder to diagnose than a clear failure at the call site.
658        assert!(
659            !self.records.is_empty(),
660            "NearestNeighborMetaModel::predict called before fit(); \
661             call .fit(records) with at least one record first"
662        );
663        let ranked = self.rank_candidates(features);
664        let best_idx = ranked[0].0;
665        self.records[best_idx].best_config.clone()
666    }
667
668    fn name(&self) -> &str {
669        "nearest_neighbor"
670    }
671}
672
673// ── Distance-weighted ─────────────────────────────────────────────────
674
675/// Picks the training record that maximizes `evidence × w(distance)`,
676/// where `w(d) = 1 / (d + epsilon)` over z-score-normalized Euclidean
677/// distance and `evidence` is [`MetaTrainingRecord::score_lift`] when
678/// present, falling back to `best_score` for legacy records.
679///
680/// The distinction from [`NearestNeighborMetaModel`]: NN picks the
681/// closest record regardless of how well that record performed, so a
682/// single poorly-tuned outlier can pull predictions off. Distance-weighted
683/// folds the record's evidence into the selection — a record is "good" if
684/// it's both similar to the query AND demonstrated real tuner signal.
685/// Using lift instead of the raw score avoids the easy-corpus bias:
686/// `best_score = 0.9` on an easy corpus is weaker evidence than
687/// `best_score = 0.6` that beat its run's mean by a wide margin. At
688/// N = 1 this degenerates to NN (same record either way).
689///
690/// `epsilon` is a smoothing floor on the distance term; at `d ≈ 0` it
691/// prevents the weight from exploding and over-committing to a single
692/// near-duplicate record. Default `0.1`.
693#[derive(Debug, Clone)]
694pub struct DistanceWeightedMetaModel {
695    records: Vec<MetaTrainingRecord>,
696    feature_means: [f64; CORPUS_FEATURE_COUNT],
697    feature_stds: [f64; CORPUS_FEATURE_COUNT],
698    epsilon: f64,
699}
700
701impl Default for DistanceWeightedMetaModel {
702    fn default() -> Self {
703        Self {
704            records: Vec::new(),
705            feature_means: [0.0; CORPUS_FEATURE_COUNT],
706            feature_stds: [1.0; CORPUS_FEATURE_COUNT],
707            epsilon: 0.1,
708        }
709    }
710}
711
712impl DistanceWeightedMetaModel {
713    pub fn new() -> Self {
714        Self::default()
715    }
716
717    /// Override the smoothing constant added to distance before
718    /// inversion. Larger `epsilon` makes predictions smoother; smaller
719    /// sharpens the preference for near-duplicate records. Must be
720    /// strictly positive (silently clamped to `1e-12` if a zero or
721    /// negative value is passed).
722    pub fn with_epsilon(mut self, epsilon: f64) -> Self {
723        self.epsilon = epsilon.max(1e-12);
724        self
725    }
726
727    pub fn records(&self) -> &[MetaTrainingRecord] {
728        &self.records
729    }
730
731    /// Per-record (weighted_score, distance) pairs for the given query
732    /// features, sorted by descending weighted score. Useful for
733    /// introspecting why a particular prediction was made.
734    pub fn score_candidates(&self, features: &CorpusFeatures) -> Vec<(usize, f64, f64)> {
735        let q = normalize_features(
736            &to_model_space(&features.to_vec()),
737            &self.feature_means,
738            &self.feature_stds,
739        );
740        let mut out: Vec<(usize, f64, f64)> = self
741            .records
742            .iter()
743            .enumerate()
744            .filter_map(|(i, r)| {
745                // Cross-corpus-comparable evidence when available;
746                // raw best_score for legacy records without lift.
747                let evidence = r.score_lift.unwrap_or(r.best_score);
748                // Filter non-finite evidence at score time. NaN would
749                // otherwise propagate into `weighted`, hit the `total_cmp`
750                // below as "greatest" (NaN sorts to the end of a total
751                // order, but the *top* under a "descending" sort would
752                // put NaN first), and silently become the prediction.
753                if !evidence.is_finite() {
754                    return None;
755                }
756                let v = normalize_features(
757                    &to_model_space(&r.features.to_vec()),
758                    &self.feature_means,
759                    &self.feature_stds,
760                );
761                let d = normalized_euclidean(&q, &v);
762                let weighted = evidence / (d + self.epsilon);
763                if !weighted.is_finite() {
764                    return None;
765                }
766                Some((i, weighted, d))
767            })
768            .collect();
769        // `total_cmp` is NaN-safe; non-finite scores were already
770        // dropped above, so the ordering is total.
771        out.sort_by(|a, b| b.1.total_cmp(&a.1));
772        out
773    }
774}
775
776impl MetaModel for DistanceWeightedMetaModel {
777    fn fit(&mut self, records: &[MetaTrainingRecord]) {
778        self.records = filter_dominant_metric(records);
779        let (means, stds) = compute_feature_stats(&self.records);
780        self.feature_means = means;
781        self.feature_stds = if self.records.is_empty() {
782            [1.0; CORPUS_FEATURE_COUNT]
783        } else {
784            stds
785        };
786    }
787
788    fn is_fitted(&self) -> bool {
789        !self.records.is_empty()
790    }
791
792    fn predict(&self, features: &CorpusFeatures) -> PipelineConfig {
793        // Invariant: callers are expected to call fit() before predict().
794        // The trait contract documents this requirement, and the panic is
795        // intentional — a silent wrong prediction (returning Default) would
796        // be much harder to diagnose than a clear failure at the call site.
797        assert!(
798            !self.records.is_empty(),
799            "DistanceWeightedMetaModel::predict called before fit(); \
800             call .fit(records) with at least one record first"
801        );
802        let ranked = self.score_candidates(features);
803        // Fall back to record 0 if every record was filtered as
804        // non-finite — the records are non-empty (asserted) but none
805        // produced a comparable score.
806        let best_idx = ranked.first().map_or(0, |&(idx, _, _)| idx);
807        self.records[best_idx].best_config.clone()
808    }
809
810    fn name(&self) -> &str {
811        "distance_weighted"
812    }
813}
814
815// ── Tests ──────────────────────────────────────────────────────────
816
817#[cfg(test)]
818mod tests {
819    use super::*;
820    use crate::config::ProjectionKind;
821    use crate::tuner::TrialRecord;
822
823    fn feat(n: usize, c: usize, sparsity: f64, intra: f64) -> CorpusFeatures {
824        CorpusFeatures {
825            n_items: n,
826            n_categories: c,
827            dim: 128,
828            mean_members_per_category: n as f64 / c as f64,
829            category_size_entropy: 1.0,
830            mean_sparsity: sparsity,
831            axis_utilization_entropy: 0.9,
832            noise_estimate: 0.02,
833            mean_intra_category_similarity: intra,
834            mean_inter_category_similarity: 0.1,
835            category_separation_ratio: intra / 0.1,
836        }
837    }
838
839    fn record(id: &str, f: CorpusFeatures, kind: ProjectionKind, score: f64) -> MetaTrainingRecord {
840        MetaTrainingRecord {
841            corpus_id: id.to_string(),
842            features: f,
843            best_config: PipelineConfig {
844                projection_kind: kind,
845                ..Default::default()
846            },
847            best_score: score,
848            score_lift: None,
849            metric_name: "test_metric".to_string(),
850            strategy: "test_strategy".to_string(),
851            timestamp: "2026-04-22T00:00:00Z".to_string(),
852        }
853    }
854
855    fn trial(score: f64) -> TrialRecord {
856        TrialRecord {
857            config: PipelineConfig::default(),
858            score,
859            build_ms: 0,
860            components: Vec::new(),
861        }
862    }
863
864    #[test]
865    fn record_json_roundtrip() {
866        let r = record("r1", feat(100, 5, 0.2, 0.6), ProjectionKind::Pca, 0.5);
867        let json = serde_json::to_string(&r).unwrap();
868        let back: MetaTrainingRecord = serde_json::from_str(&json).unwrap();
869        assert_eq!(back.corpus_id, "r1");
870        assert_eq!(back.best_config.projection_kind, ProjectionKind::Pca);
871        assert!((back.best_score - 0.5).abs() < 1e-12);
872    }
873
874    #[test]
875    fn record_without_score_lift_field_still_deserializes() {
876        // Legacy stores predate `score_lift`; #[serde(default)] must
877        // accept them and produce None.
878        let r = record("r1", feat(100, 5, 0.2, 0.6), ProjectionKind::Pca, 0.5);
879        let mut json: serde_json::Value = serde_json::to_value(&r).unwrap();
880        json.as_object_mut().unwrap().remove("score_lift");
881        let back: MetaTrainingRecord = serde_json::from_value(json).unwrap();
882        assert!(back.score_lift.is_none());
883    }
884
885    #[test]
886    fn to_model_space_log_compresses_scale_features_only() {
887        let f = feat(500, 20, 0.25, 0.6);
888        let raw = f.to_vec();
889        let ms = to_model_space(&raw);
890        for &i in &LOG_SCALED_FEATURES {
891            assert!(
892                (ms[i] - raw[i].ln_1p()).abs() < 1e-12,
893                "scale feature {i} should be ln(1+x)"
894            );
895        }
896        for i in 0..CORPUS_FEATURE_COUNT {
897            if !LOG_SCALED_FEATURES.contains(&i) {
898                assert_eq!(ms[i], raw[i], "non-scale feature {i} must pass through");
899            }
900        }
901    }
902
903    #[test]
904    fn is_fitted_flips_after_fit() {
905        let mut nn = NearestNeighborMetaModel::new();
906        let mut dw = DistanceWeightedMetaModel::new();
907        assert!(!nn.is_fitted());
908        assert!(!dw.is_fitted());
909
910        let r = record("only", feat(500, 20, 0.1, 0.4), ProjectionKind::Pca, 0.7);
911        nn.fit(std::slice::from_ref(&r));
912        dw.fit(std::slice::from_ref(&r));
913        assert!(nn.is_fitted());
914        assert!(dw.is_fitted());
915
916        nn.fit(&[]);
917        assert!(
918            !nn.is_fitted(),
919            "refit on empty set must clear fitted state"
920        );
921    }
922
923    #[test]
924    fn nn_predict_single_record_returns_its_config() {
925        let r = record(
926            "only",
927            feat(500, 20, 0.1, 0.4),
928            ProjectionKind::LaplacianEigenmap,
929            0.7,
930        );
931        let mut m = NearestNeighborMetaModel::new();
932        m.fit(std::slice::from_ref(&r));
933        let predicted = m.predict(&feat(1000, 30, 0.05, 0.3));
934        assert_eq!(predicted.projection_kind, ProjectionKind::LaplacianEigenmap);
935    }
936
937    #[test]
938    fn nn_predict_picks_nearest_neighbor() {
939        // Two records with very different features. A query close to r_a
940        // should get r_a's config.
941        let r_a = record(
942            "sparse",
943            feat(500, 5, 0.05, 0.8),
944            ProjectionKind::LaplacianEigenmap,
945            0.7,
946        );
947        let r_b = record("dense", feat(500, 5, 0.50, 0.2), ProjectionKind::Pca, 0.6);
948        let mut m = NearestNeighborMetaModel::new();
949        m.fit(&[r_a.clone(), r_b.clone()]);
950
951        let query_near_a = feat(500, 5, 0.06, 0.78);
952        let query_near_b = feat(500, 5, 0.48, 0.22);
953
954        assert_eq!(
955            m.predict(&query_near_a).projection_kind,
956            ProjectionKind::LaplacianEigenmap,
957        );
958        assert_eq!(
959            m.predict(&query_near_b).projection_kind,
960            ProjectionKind::Pca,
961        );
962    }
963
964    #[test]
965    fn nn_rank_candidates_sorted_ascending() {
966        let r_a = record("a", feat(500, 5, 0.05, 0.8), ProjectionKind::Pca, 0.7);
967        let r_b = record("b", feat(500, 5, 0.50, 0.2), ProjectionKind::KernelPca, 0.6);
968        let mut m = NearestNeighborMetaModel::new();
969        m.fit(&[r_a, r_b]);
970        let q = feat(500, 5, 0.07, 0.75);
971        let ranked = m.rank_candidates(&q);
972        assert_eq!(ranked.len(), 2);
973        assert!(ranked[0].1 <= ranked[1].1);
974    }
975
976    #[test]
977    fn nn_handles_zero_variance_feature() {
978        // Both records have identical n_items/n_categories/dim — those
979        // features have zero std and should be ignored in the distance
980        // rather than produce NaN.
981        let r_a = record("a", feat(500, 5, 0.05, 0.8), ProjectionKind::Pca, 0.7);
982        let r_b = record(
983            "b",
984            feat(500, 5, 0.50, 0.2),
985            ProjectionKind::LaplacianEigenmap,
986            0.6,
987        );
988        let mut m = NearestNeighborMetaModel::new();
989        m.fit(&[r_a, r_b]);
990        let q = feat(500, 5, 0.1, 0.7);
991        let ranked = m.rank_candidates(&q);
992        assert!(ranked[0].1.is_finite());
993        assert!(ranked[1].1.is_finite());
994    }
995
996    #[test]
997    fn fit_stratifies_to_dominant_metric() {
998        // Two "m1" records + one "m2" record: fit must retain only the
999        // m1 pair, so even a query sitting exactly on the m2 record's
1000        // features predicts an m1 config.
1001        let r1 = record("a", feat(500, 5, 0.05, 0.8), ProjectionKind::Pca, 0.7);
1002        let r2 = record("b", feat(500, 5, 0.50, 0.2), ProjectionKind::Pca, 0.6);
1003        let mut alien = record("c", feat(500, 5, 0.30, 0.5), ProjectionKind::KernelPca, 0.9);
1004        alien.metric_name = "other_metric".to_string();
1005
1006        let mut m = NearestNeighborMetaModel::new();
1007        m.fit(&[r1, r2, alien.clone()]);
1008        assert_eq!(m.records().len(), 2, "dominant-metric records retained");
1009        assert!(m.records().iter().all(|r| r.metric_name == "test_metric"));
1010        let predicted = m.predict(&alien.features);
1011        assert_ne!(predicted.projection_kind, ProjectionKind::KernelPca);
1012    }
1013
1014    #[test]
1015    fn filter_dominant_metric_tie_picks_lexicographically_largest() {
1016        // Three metrics, one record each — an exact 3-way tie. The
1017        // deterministic tie-break is the lexicographically largest name.
1018        let mut r1 = record("a", feat(500, 5, 0.05, 0.8), ProjectionKind::Pca, 0.7);
1019        r1.metric_name = "alpha".to_string();
1020        let mut r2 = record("b", feat(500, 5, 0.50, 0.2), ProjectionKind::Pca, 0.6);
1021        r2.metric_name = "beta".to_string();
1022        let mut r3 = record("c", feat(500, 5, 0.30, 0.5), ProjectionKind::Pca, 0.5);
1023        r3.metric_name = "gamma".to_string();
1024
1025        let kept = filter_dominant_metric(&[r1, r2, r3]);
1026        assert_eq!(kept.len(), 1);
1027        assert_eq!(kept[0].metric_name, "gamma");
1028    }
1029
1030    #[test]
1031    fn single_metric_training_set_is_untouched() {
1032        let records = vec![
1033            record("a", feat(500, 5, 0.05, 0.8), ProjectionKind::Pca, 0.7),
1034            record("b", feat(500, 5, 0.50, 0.2), ProjectionKind::Pca, 0.6),
1035        ];
1036        let mut m = NearestNeighborMetaModel::new();
1037        m.fit(&records);
1038        assert_eq!(m.records().len(), 2);
1039    }
1040
1041    #[test]
1042    #[should_panic(expected = "called before fit")]
1043    fn nn_predict_before_fit_panics() {
1044        let m = NearestNeighborMetaModel::new();
1045        let _ = m.predict(&feat(100, 5, 0.1, 0.3));
1046    }
1047
1048    #[test]
1049    fn predict_blended_k1_matches_predict() {
1050        let r_a = record(
1051            "a",
1052            feat(500, 5, 0.05, 0.8),
1053            ProjectionKind::LaplacianEigenmap,
1054            0.7,
1055        );
1056        let r_b = record("b", feat(500, 5, 0.50, 0.2), ProjectionKind::Pca, 0.6);
1057        let mut m = NearestNeighborMetaModel::new();
1058        m.fit(&[r_a, r_b]);
1059        let q = feat(500, 5, 0.06, 0.78);
1060        let single = m.predict(&q);
1061        let blended = m.predict_blended(&q, 1);
1062        assert_eq!(blended.projection_kind, single.projection_kind);
1063        assert_eq!(
1064            blended.routing.num_domain_groups,
1065            single.routing.num_domain_groups
1066        );
1067        assert!((blended.bridges.threshold_base - single.bridges.threshold_base).abs() < 1e-12);
1068    }
1069
1070    #[test]
1071    fn predict_blended_takes_median_of_knobs() {
1072        // Three same-kind records whose num_domain_groups are 3, 5, 9:
1073        // the k=3 blend must pick the median (5), not any single
1074        // record's value.
1075        let mut r1 = record("a", feat(500, 5, 0.10, 0.70), ProjectionKind::Pca, 0.7);
1076        r1.best_config.routing.num_domain_groups = 3;
1077        let mut r2 = record("b", feat(500, 5, 0.12, 0.68), ProjectionKind::Pca, 0.6);
1078        r2.best_config.routing.num_domain_groups = 5;
1079        let mut r3 = record("c", feat(500, 5, 0.14, 0.66), ProjectionKind::Pca, 0.5);
1080        r3.best_config.routing.num_domain_groups = 9;
1081
1082        let mut m = NearestNeighborMetaModel::new();
1083        m.fit(&[r1, r2, r3]);
1084        let blended = m.predict_blended(&feat(500, 5, 0.12, 0.68), 3);
1085        assert_eq!(blended.projection_kind, ProjectionKind::Pca);
1086        assert_eq!(blended.routing.num_domain_groups, 5);
1087    }
1088
1089    #[test]
1090    fn predict_blended_majority_kind_wins() {
1091        // Two Laplacian records + one PCA record: the blend at k=3 must
1092        // pick Laplacian, and aggregate laplacian knobs over the two
1093        // kind-matching neighbors only.
1094        let mut r1 = record(
1095            "a",
1096            feat(500, 5, 0.10, 0.70),
1097            ProjectionKind::LaplacianEigenmap,
1098            0.7,
1099        );
1100        r1.best_config.laplacian.k_neighbors = 10;
1101        let mut r2 = record(
1102            "b",
1103            feat(500, 5, 0.12, 0.68),
1104            ProjectionKind::LaplacianEigenmap,
1105            0.6,
1106        );
1107        r2.best_config.laplacian.k_neighbors = 20;
1108        let r3 = record("c", feat(500, 5, 0.14, 0.66), ProjectionKind::Pca, 0.5);
1109
1110        let mut m = NearestNeighborMetaModel::new();
1111        m.fit(&[r1, r2, r3]);
1112        let blended = m.predict_blended(&feat(500, 5, 0.12, 0.68), 3);
1113        assert_eq!(blended.projection_kind, ProjectionKind::LaplacianEigenmap);
1114        // Upper median of {10, 20} = 20.
1115        assert_eq!(blended.laplacian.k_neighbors, 20);
1116    }
1117
1118    #[test]
1119    fn save_and_load_list_roundtrip() {
1120        let dir = std::env::temp_dir();
1121        let path = dir.join("sphereql_meta_test.json");
1122        let _ = fs::remove_file(&path);
1123
1124        let records = vec![
1125            record("r1", feat(100, 5, 0.2, 0.5), ProjectionKind::Pca, 0.4),
1126            record(
1127                "r2",
1128                feat(800, 30, 0.05, 0.6),
1129                ProjectionKind::LaplacianEigenmap,
1130                0.5,
1131            ),
1132        ];
1133        MetaTrainingRecord::save_list(&records, &path).unwrap();
1134
1135        let loaded = MetaTrainingRecord::load_list(&path).unwrap();
1136        assert_eq!(loaded.len(), 2);
1137        assert_eq!(loaded[0].corpus_id, "r1");
1138        assert_eq!(
1139            loaded[1].best_config.projection_kind,
1140            ProjectionKind::LaplacianEigenmap
1141        );
1142
1143        let _ = fs::remove_file(&path);
1144    }
1145
1146    #[test]
1147    fn load_nonexistent_returns_empty() {
1148        let path = std::env::temp_dir().join("sphereql_nonexistent_12345.json");
1149        let loaded = MetaTrainingRecord::load_list(&path).unwrap();
1150        assert!(loaded.is_empty());
1151    }
1152
1153    #[test]
1154    fn append_to_migrates_legacy_array_file() {
1155        let dir =
1156            std::env::temp_dir().join(format!("sphereql_meta_migrate_{}", std::process::id()));
1157        let _ = fs::remove_dir_all(&dir);
1158        let path = dir.join("records.json");
1159
1160        // Seed with a legacy array file (what `save_list` writes).
1161        let legacy = vec![
1162            record("r1", feat(100, 5, 0.2, 0.5), ProjectionKind::Pca, 0.4),
1163            record(
1164                "r2",
1165                feat(800, 30, 0.05, 0.6),
1166                ProjectionKind::LaplacianEigenmap,
1167                0.5,
1168            ),
1169        ];
1170        MetaTrainingRecord::save_list(&legacy, &path).unwrap();
1171
1172        // First append migrates the file to JSONL.
1173        record("r3", feat(200, 8, 0.1, 0.4), ProjectionKind::KernelPca, 0.6)
1174            .append_to(&path)
1175            .unwrap();
1176
1177        let loaded = MetaTrainingRecord::load_list(&path).unwrap();
1178        assert_eq!(loaded.len(), 3);
1179        assert_eq!(loaded[0].corpus_id, "r1");
1180        assert_eq!(loaded[1].corpus_id, "r2");
1181        assert_eq!(loaded[2].corpus_id, "r3");
1182        assert_eq!(
1183            loaded[1].best_config.projection_kind,
1184            ProjectionKind::LaplacianEigenmap
1185        );
1186
1187        // Post-migration shape is JSONL (one record per line).
1188        let raw = fs::read_to_string(&path).unwrap();
1189        assert!(!raw.trim_start().starts_with('['));
1190        assert_eq!(raw.lines().count(), 3);
1191
1192        let _ = fs::remove_dir_all(&dir);
1193    }
1194
1195    #[test]
1196    fn from_tune_result_copies_fields() {
1197        let cfg = PipelineConfig {
1198            projection_kind: ProjectionKind::LaplacianEigenmap,
1199            ..Default::default()
1200        };
1201        let report = TuneReport {
1202            metric_name: "connectivity_composite".to_string(),
1203            best_score: 0.42,
1204            best_config: cfg.clone(),
1205            trials: Vec::new(),
1206            failures: Vec::new(),
1207            umap_graph_builds: 0,
1208        };
1209        let r = MetaTrainingRecord::from_tune_result(
1210            "test_corpus",
1211            feat(100, 5, 0.1, 0.5),
1212            &report,
1213            "random{budget=24,seed=42}",
1214        );
1215        assert_eq!(r.corpus_id, "test_corpus");
1216        assert_eq!(r.metric_name, "connectivity_composite");
1217        assert!((r.best_score - 0.42).abs() < 1e-12);
1218        // Fewer than 2 trials → no lift evidence.
1219        assert!(r.score_lift.is_none());
1220        assert_eq!(
1221            r.best_config.projection_kind,
1222            ProjectionKind::LaplacianEigenmap
1223        );
1224        assert_eq!(r.strategy, "random{budget=24,seed=42}");
1225        // Timestamp should be epoch-seconds-ish — a non-empty numeric string.
1226        assert!(!r.timestamp.is_empty());
1227        assert!(r.timestamp.parse::<u64>().is_ok());
1228    }
1229
1230    #[test]
1231    fn from_tune_result_computes_headroom_lift() {
1232        // Trials {0.4, 0.6, 0.8}: mean = 0.6, best = 0.8, headroom = 0.4,
1233        // lift = (0.8 - 0.6) / 0.4 = 0.5.
1234        let report = TuneReport {
1235            metric_name: "m".to_string(),
1236            best_score: 0.8,
1237            best_config: PipelineConfig::default(),
1238            trials: vec![trial(0.4), trial(0.6), trial(0.8)],
1239            failures: Vec::new(),
1240            umap_graph_builds: 0,
1241        };
1242        let r = MetaTrainingRecord::from_tune_result("c", feat(10, 2, 0.1, 0.3), &report, "s");
1243        let lift = r.score_lift.expect("two or more trials produce lift");
1244        assert!((lift - 0.5).abs() < 1e-12, "got {lift}");
1245    }
1246
1247    #[test]
1248    fn from_tune_result_single_trial_has_no_lift() {
1249        // Exactly one trial: no distribution to compare against, so
1250        // the record carries no lift evidence.
1251        let report = TuneReport {
1252            metric_name: "m".to_string(),
1253            best_score: 0.7,
1254            best_config: PipelineConfig::default(),
1255            trials: vec![trial(0.7)],
1256            failures: Vec::new(),
1257            umap_graph_builds: 0,
1258        };
1259        let r = MetaTrainingRecord::from_tune_result("c", feat(10, 2, 0.1, 0.3), &report, "s");
1260        assert!(r.score_lift.is_none());
1261    }
1262
1263    #[test]
1264    fn from_tune_result_lift_zero_when_landscape_saturated() {
1265        // Every trial at ~1.0: no headroom, the config carried no signal.
1266        let report = TuneReport {
1267            metric_name: "m".to_string(),
1268            best_score: 1.0,
1269            best_config: PipelineConfig::default(),
1270            trials: vec![trial(1.0), trial(1.0)],
1271            failures: Vec::new(),
1272            umap_graph_builds: 0,
1273        };
1274        let r = MetaTrainingRecord::from_tune_result("c", feat(10, 2, 0.1, 0.3), &report, "s");
1275        assert_eq!(r.score_lift, Some(0.0));
1276    }
1277
1278    #[test]
1279    fn with_timestamp_overrides_default() {
1280        let report = TuneReport {
1281            metric_name: "m".to_string(),
1282            best_score: 0.5,
1283            best_config: PipelineConfig::default(),
1284            trials: Vec::new(),
1285            failures: Vec::new(),
1286            umap_graph_builds: 0,
1287        };
1288        let r = MetaTrainingRecord::from_tune_result("c", feat(10, 2, 0.1, 0.3), &report, "s")
1289            .with_timestamp("2026-04-22T12:00:00Z");
1290        assert_eq!(r.timestamp, "2026-04-22T12:00:00Z");
1291    }
1292
1293    #[test]
1294    fn save_list_creates_parent_dirs() {
1295        let dir = std::env::temp_dir().join(format!("sphereql_create_test_{}", std::process::id()));
1296        let _ = fs::remove_dir_all(&dir);
1297        let path = dir.join("nested").join("records.json");
1298
1299        let r = record("r1", feat(100, 5, 0.1, 0.5), ProjectionKind::Pca, 0.4);
1300        MetaTrainingRecord::save_list(&[r], &path).unwrap();
1301        assert!(path.exists());
1302
1303        let _ = fs::remove_dir_all(&dir);
1304    }
1305
1306    #[test]
1307    fn default_store_path_resolves() {
1308        // Verify the helper returns a path under $HOME or $USERPROFILE.
1309        // We can't assert the exact path (portability + test isolation),
1310        // just that it resolves and ends with the expected filename.
1311        let path = MetaTrainingRecord::default_store_path().unwrap();
1312        assert!(path.ends_with("meta_records.json"));
1313        assert!(path.iter().any(|c| c.to_string_lossy() == ".sphereql"));
1314    }
1315
1316    #[test]
1317    fn dw_predict_single_record_returns_its_config() {
1318        // At N=1 distance-weighted must agree with NN.
1319        let r = record(
1320            "only",
1321            feat(500, 20, 0.1, 0.4),
1322            ProjectionKind::LaplacianEigenmap,
1323            0.7,
1324        );
1325        let mut m = DistanceWeightedMetaModel::new();
1326        m.fit(std::slice::from_ref(&r));
1327        let predicted = m.predict(&feat(1000, 30, 0.05, 0.3));
1328        assert_eq!(predicted.projection_kind, ProjectionKind::LaplacianEigenmap);
1329    }
1330
1331    #[test]
1332    fn dw_prefers_higher_score_when_equidistant() {
1333        // Two records at identical features but different best_scores
1334        // — the high-score one should be picked.
1335        let shared_feat = feat(500, 5, 0.1, 0.5);
1336        let lo = record(
1337            "low",
1338            shared_feat.clone(),
1339            ProjectionKind::LaplacianEigenmap,
1340            0.2,
1341        );
1342        let hi = record("high", shared_feat.clone(), ProjectionKind::Pca, 0.9);
1343
1344        let mut m = DistanceWeightedMetaModel::new();
1345        m.fit(&[lo, hi]);
1346        let predicted = m.predict(&shared_feat);
1347        // Note: at perfectly identical features, distance is 0 and both
1348        // weights are 1/epsilon; the higher-score record wins.
1349        assert_eq!(predicted.projection_kind, ProjectionKind::Pca);
1350    }
1351
1352    #[test]
1353    fn dw_prefers_lift_evidence_over_raw_score() {
1354        // Same features; one record from an "easy" corpus (raw 0.9,
1355        // but the run was flat — lift 0.0) and one from a "hard"
1356        // corpus (raw 0.6, but the config beat its run's mean by a
1357        // wide margin — lift 0.8). The hard-won config is the better
1358        // evidence and must win.
1359        let shared_feat = feat(500, 5, 0.1, 0.5);
1360        let mut easy = record("easy", shared_feat.clone(), ProjectionKind::KernelPca, 0.9);
1361        easy.score_lift = Some(0.0);
1362        let mut hard = record(
1363            "hard",
1364            shared_feat.clone(),
1365            ProjectionKind::LaplacianEigenmap,
1366            0.6,
1367        );
1368        hard.score_lift = Some(0.8);
1369
1370        let mut m = DistanceWeightedMetaModel::new();
1371        m.fit(&[easy, hard]);
1372        let predicted = m.predict(&shared_feat);
1373        assert_eq!(predicted.projection_kind, ProjectionKind::LaplacianEigenmap);
1374    }
1375
1376    #[test]
1377    fn dw_all_records_without_lift_fall_back_to_best_score() {
1378        // Legacy training sets predate score_lift entirely. Evidence
1379        // must fall back to best_score and still yield a prediction —
1380        // the higher-scoring record wins at equal distance.
1381        let shared_feat = feat(500, 5, 0.1, 0.5);
1382        let lo = record("lo", shared_feat.clone(), ProjectionKind::Pca, 0.2);
1383        let hi = record(
1384            "hi",
1385            shared_feat.clone(),
1386            ProjectionKind::LaplacianEigenmap,
1387            0.9,
1388        );
1389        assert!(lo.score_lift.is_none() && hi.score_lift.is_none());
1390
1391        let mut m = DistanceWeightedMetaModel::new();
1392        m.fit(&[lo, hi]);
1393        let ranked = m.score_candidates(&shared_feat);
1394        assert_eq!(ranked.len(), 2, "no record filtered as non-finite");
1395        let predicted = m.predict(&shared_feat);
1396        assert_eq!(predicted.projection_kind, ProjectionKind::LaplacianEigenmap);
1397    }
1398
1399    #[test]
1400    fn dw_prefers_closer_when_similar_score() {
1401        // Two records with similar best_scores but very different
1402        // features — the closer one to the query should win.
1403        let close = record(
1404            "close",
1405            feat(500, 5, 0.06, 0.82),
1406            ProjectionKind::LaplacianEigenmap,
1407            0.70,
1408        );
1409        let far = record(
1410            "far",
1411            feat(500, 5, 0.55, 0.15),
1412            ProjectionKind::Pca,
1413            0.72, // only slightly better
1414        );
1415        let mut m = DistanceWeightedMetaModel::new();
1416        m.fit(&[close, far]);
1417        let q = feat(500, 5, 0.05, 0.80); // very close to "close"'s features
1418        assert_eq!(
1419            m.predict(&q).projection_kind,
1420            ProjectionKind::LaplacianEigenmap,
1421        );
1422    }
1423
1424    #[test]
1425    fn dw_score_candidates_sorted_descending() {
1426        let ra = record("a", feat(500, 5, 0.05, 0.8), ProjectionKind::Pca, 0.6);
1427        let rb = record("b", feat(500, 5, 0.50, 0.2), ProjectionKind::Pca, 0.9);
1428        let mut m = DistanceWeightedMetaModel::new();
1429        m.fit(&[ra, rb]);
1430        let ranked = m.score_candidates(&feat(500, 5, 0.07, 0.78));
1431        assert_eq!(ranked.len(), 2);
1432        assert!(ranked[0].1 >= ranked[1].1);
1433    }
1434
1435    #[test]
1436    fn dw_is_deterministic() {
1437        let records = vec![
1438            record("a", feat(500, 5, 0.05, 0.8), ProjectionKind::Pca, 0.7),
1439            record(
1440                "b",
1441                feat(500, 5, 0.50, 0.2),
1442                ProjectionKind::LaplacianEigenmap,
1443                0.6,
1444            ),
1445        ];
1446        let mut m1 = DistanceWeightedMetaModel::new();
1447        m1.fit(&records);
1448        let mut m2 = DistanceWeightedMetaModel::new();
1449        m2.fit(&records);
1450        let q = feat(500, 5, 0.10, 0.7);
1451        assert_eq!(
1452            m1.predict(&q).projection_kind,
1453            m2.predict(&q).projection_kind
1454        );
1455    }
1456
1457    #[test]
1458    fn dw_epsilon_clamps_non_positive() {
1459        let m = DistanceWeightedMetaModel::new().with_epsilon(-1.0);
1460        // Internal epsilon shouldn't be negative; we can probe via
1461        // score_candidates: at d=0 the weight is r.best_score/epsilon;
1462        // with a non-positive epsilon we'd otherwise divide by zero.
1463        let r = record("r", feat(100, 5, 0.1, 0.3), ProjectionKind::Pca, 0.5);
1464        let mut m = m;
1465        m.fit(std::slice::from_ref(&r));
1466        let ranked = m.score_candidates(&r.features);
1467        assert!(ranked[0].1.is_finite());
1468    }
1469
1470    #[test]
1471    #[should_panic(expected = "called before fit")]
1472    fn dw_predict_before_fit_panics() {
1473        let m = DistanceWeightedMetaModel::new();
1474        let _ = m.predict(&feat(100, 5, 0.1, 0.3));
1475    }
1476
1477    #[test]
1478    fn dw_name_stable() {
1479        let m = DistanceWeightedMetaModel::new();
1480        assert_eq!(m.name(), "distance_weighted");
1481    }
1482
1483    #[test]
1484    fn adjust_score_with_feedback_blends_at_alpha() {
1485        let r = record("r", feat(100, 5, 0.1, 0.3), ProjectionKind::Pca, 0.8);
1486        let summary = FeedbackSummary {
1487            corpus_id: "r".into(),
1488            n_events: 10,
1489            mean_score: 0.4,
1490            min_score: 0.1,
1491            max_score: 0.9,
1492        };
1493        // alpha = 0 → keep best_score
1494        assert!((r.adjust_score_with_feedback(&summary, 0.0) - 0.8).abs() < 1e-12);
1495        // alpha = 1 → replace with feedback
1496        assert!((r.adjust_score_with_feedback(&summary, 1.0) - 0.4).abs() < 1e-12);
1497        // alpha = 0.5 → midpoint 0.6
1498        assert!((r.adjust_score_with_feedback(&summary, 0.5) - 0.6).abs() < 1e-12);
1499        // alpha clamped: values outside [0,1] are clipped.
1500        assert!((r.adjust_score_with_feedback(&summary, 2.0) - 0.4).abs() < 1e-12);
1501        assert!((r.adjust_score_with_feedback(&summary, -1.0) - 0.8).abs() < 1e-12);
1502    }
1503}