Skip to main content

frankensearch_index/
mrl.rs

1//! Matryoshka Representation Learning (MRL) adaptive dimensionality at search time.
2//!
3//! MRL-trained embedding models (including potion-128M and many modern sentence
4//! transformers) produce vectors where the first N dimensions carry the most
5//! information. [`MrlConfig`] enables a two-phase search that exploits this:
6//!
7//! 1. **Truncated scan**: compute dot products using only the first
8//!    `search_dims` dimensions. This is 2-6x faster than a full-dimension scan
9//!    for large indices.
10//! 2. **Full-dimension rescore**: re-score the top `rescore_top_k` candidates
11//!    using the full stored dimensionality for maximum accuracy.
12//!
13//! # Performance model
14//!
15//! - Standard search (384 dims): 384 multiply-accumulate per vector.
16//! - MRL search (64 dims + rescore 30): 64*N + 384*30 = 64N + 11520 ops.
17//! - Break-even at ~36 vectors. For 10K vectors: 640K vs 3.84M ops = **6x**
18//!   speedup on the initial scan.
19//!
20//! # SIMD alignment
21//!
22//! For best SIMD throughput, `search_dims` should be a multiple of 8 (one
23//! `f32x8` operation). Common choices: 64, 128, 192, 256. Non-aligned values
24//! work correctly but with a scalar remainder tail.
25//!
26//! # Index format
27//!
28//! No FSVI changes are needed. Full-dimension vectors are stored as-is, and
29//! truncation is a runtime operation.
30
31use std::cmp::Ordering;
32use std::collections::BinaryHeap;
33
34use frankensearch_core::filter::SearchFilter;
35use frankensearch_core::{SearchError, SearchResult, VectorHit};
36use serde::{Deserialize, Serialize};
37
38use crate::wal::{from_wal_index, is_wal_index, to_wal_index};
39use crate::{
40    VectorIndex, dot_product_f16_bytes_f32, dot_product_f32_bytes_f32, dot_product_f32_f32,
41};
42
43// ---------------------------------------------------------------------------
44// Configuration
45// ---------------------------------------------------------------------------
46
47/// Configuration for MRL-accelerated search.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct MrlConfig {
50    /// Number of dimensions for the initial truncated scan.
51    ///
52    /// For best SIMD alignment, use a multiple of 8 (e.g., 64, 128).
53    /// Must be at least 1 and at most the index dimension.
54    /// Default: 64.
55    pub search_dims: usize,
56
57    /// Number of dimensions for re-scoring top candidates.
58    ///
59    /// Set to 0 to use the full index dimension (recommended).
60    /// Default: 0 (full dimension).
61    pub rescore_dims: usize,
62
63    /// Number of top candidates to re-score with full dimensions.
64    ///
65    /// Set to 0 to use `3 * limit`.
66    /// Default: 0 (auto = 3x limit).
67    pub rescore_top_k: usize,
68}
69
70impl Default for MrlConfig {
71    fn default() -> Self {
72        Self {
73            search_dims: 64,
74            rescore_dims: 0,
75            rescore_top_k: 0,
76        }
77    }
78}
79
80impl MrlConfig {
81    /// Resolve `rescore_dims` to the effective value given the index dimension.
82    ///
83    /// Always returns at least `self.search_dims` so the rescore phase never
84    /// uses fewer dimensions than the initial truncated scan.
85    const fn effective_rescore_dims(&self, index_dim: usize) -> usize {
86        let dims = if self.rescore_dims == 0 || self.rescore_dims > index_dim {
87            index_dim
88        } else {
89            self.rescore_dims
90        };
91        // Rescore must use at least as many dims as the initial scan,
92        // otherwise Phase 2 is strictly worse than Phase 1.
93        if dims < self.search_dims {
94            self.search_dims
95        } else {
96            dims
97        }
98    }
99
100    /// Resolve `rescore_top_k` to the effective value given the search limit.
101    const fn effective_rescore_top_k(&self, limit: usize) -> usize {
102        if self.rescore_top_k == 0 {
103            limit.saturating_mul(3)
104        } else {
105            self.rescore_top_k
106        }
107    }
108}
109
110// ---------------------------------------------------------------------------
111// MRL search stats
112// ---------------------------------------------------------------------------
113
114/// Diagnostic statistics from an MRL search execution.
115#[derive(Debug, Clone, Default)]
116pub struct MrlSearchStats {
117    /// Dimensions used for the initial truncated scan.
118    pub scan_dims: usize,
119    /// Dimensions used for re-scoring.
120    pub rescore_dims: usize,
121    /// Number of candidates passed to the rescore phase.
122    pub candidates_rescored: usize,
123    /// Total records scanned in the initial phase.
124    pub records_scanned: usize,
125    /// Whether the search fell back to standard full-dimension scan.
126    pub fell_back_to_full: bool,
127}
128
129// ---------------------------------------------------------------------------
130// Heap entry (reuse search.rs pattern)
131// ---------------------------------------------------------------------------
132
133#[derive(Debug, Clone, Copy)]
134struct MrlHeapEntry {
135    index: usize,
136    score: f32,
137}
138
139impl PartialEq for MrlHeapEntry {
140    fn eq(&self, other: &Self) -> bool {
141        self.index == other.index && self.score.to_bits() == other.score.to_bits()
142    }
143}
144
145impl Eq for MrlHeapEntry {}
146
147impl PartialOrd for MrlHeapEntry {
148    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
149        Some(self.cmp(other))
150    }
151}
152
153impl Ord for MrlHeapEntry {
154    fn cmp(&self, other: &Self) -> Ordering {
155        // Min-heap: worst score at the top for efficient pruning.
156        match nan_safe(self.score).total_cmp(&nan_safe(other.score)) {
157            Ordering::Less => Ordering::Greater,
158            Ordering::Greater => Ordering::Less,
159            Ordering::Equal => self.index.cmp(&other.index),
160        }
161    }
162}
163
164const fn nan_safe(score: f32) -> f32 {
165    if score.is_nan() {
166        f32::NEG_INFINITY
167    } else {
168        score
169    }
170}
171
172fn insert_mrl_candidate(
173    heap: &mut BinaryHeap<MrlHeapEntry>,
174    candidate: MrlHeapEntry,
175    limit: usize,
176) {
177    if limit == 0 {
178        return;
179    }
180    if heap.len() < limit {
181        heap.push(candidate);
182        return;
183    }
184    if let Some(&worst) = heap.peek() {
185        let better = match nan_safe(candidate.score).total_cmp(&nan_safe(worst.score)) {
186            Ordering::Greater => true,
187            Ordering::Less => false,
188            Ordering::Equal => candidate.index < worst.index,
189        };
190        if better {
191            let _ = heap.pop();
192            heap.push(candidate);
193        }
194    }
195}
196
197// ---------------------------------------------------------------------------
198// VectorIndex extension
199// ---------------------------------------------------------------------------
200
201impl VectorIndex {
202    /// Search using MRL-accelerated truncated scan with full-dimension rescore.
203    ///
204    /// If `config.search_dims >= self.dimension()`, this falls back to the
205    /// standard `search_top_k` (no truncation benefit).
206    ///
207    /// # Errors
208    ///
209    /// Returns `SearchError::DimensionMismatch` when `query.len()` does not
210    /// match index dimensionality, `SearchError::InvalidConfig` for invalid
211    /// config values, and `SearchError::IndexCorrupted` for malformed data.
212    pub fn mrl_search(
213        &self,
214        query: &[f32],
215        limit: usize,
216        config: &MrlConfig,
217        filter: Option<&dyn SearchFilter>,
218    ) -> SearchResult<Vec<VectorHit>> {
219        self.mrl_search_with_stats(query, limit, config, filter)
220            .map(|(hits, _stats)| hits)
221    }
222
223    /// Like [`mrl_search`](Self::mrl_search) but also returns diagnostic stats.
224    ///
225    /// # Errors
226    ///
227    /// Same error conditions as [`mrl_search`](Self::mrl_search).
228    pub fn mrl_search_with_stats(
229        &self,
230        query: &[f32],
231        limit: usize,
232        config: &MrlConfig,
233        filter: Option<&dyn SearchFilter>,
234    ) -> SearchResult<(Vec<VectorHit>, MrlSearchStats)> {
235        // Validate query dimension.
236        if query.len() != self.dimension() {
237            return Err(SearchError::DimensionMismatch {
238                expected: self.dimension(),
239                found: query.len(),
240            });
241        }
242
243        // Validate config.
244        if config.search_dims == 0 {
245            return Err(SearchError::InvalidConfig {
246                field: "search_dims".into(),
247                value: "0".into(),
248                reason: "search_dims must be at least 1".into(),
249            });
250        }
251
252        let dim = self.dimension();
253
254        // Fall back to standard search if truncation wouldn't help.
255        if config.search_dims >= dim {
256            let hits = self.search_top_k(query, limit, filter)?;
257            let stats = MrlSearchStats {
258                scan_dims: dim,
259                rescore_dims: dim,
260                candidates_rescored: 0,
261                records_scanned: self.record_count() + self.wal_entries.len(),
262                fell_back_to_full: true,
263            };
264            return Ok((hits, stats));
265        }
266
267        if limit == 0 || (self.record_count() == 0 && self.wal_entries.is_empty()) {
268            return Ok((Vec::new(), MrlSearchStats::default()));
269        }
270
271        let search_dims = config.search_dims;
272        let rescore_dims = config.effective_rescore_dims(dim);
273        let rescore_top_k = config.effective_rescore_top_k(limit);
274
275        // Phase 1: truncated scan.
276        let query_truncated = &query[..search_dims];
277        let mut heap =
278            self.mrl_truncated_scan(query_truncated, rescore_top_k, search_dims, filter)?;
279
280        // Also scan WAL entries.
281        self.mrl_scan_wal_truncated(
282            query_truncated,
283            &mut heap,
284            rescore_top_k,
285            search_dims,
286            filter,
287        )?;
288
289        let candidates: Vec<MrlHeapEntry> = heap.into_vec();
290        let records_scanned = self.record_count() + self.wal_entries.len();
291        let candidates_rescored = candidates.len();
292
293        // Phase 2: rescore candidates with full (or rescore_dims) dimensions.
294        let query_rescore = &query[..rescore_dims];
295        let mut rescored = Vec::with_capacity(candidates.len());
296
297        for candidate in &candidates {
298            let full_score = self.mrl_rescore(candidate.index, query_rescore, rescore_dims)?;
299            rescored.push(MrlHeapEntry {
300                index: candidate.index,
301                score: full_score,
302            });
303        }
304
305        // Select top `limit` from rescored candidates.
306        rescored.sort_by(|a, b| {
307            nan_safe(b.score)
308                .total_cmp(&nan_safe(a.score))
309                .then_with(|| a.index.cmp(&b.index))
310        });
311        rescored.truncate(limit);
312
313        // Resolve doc_ids.
314        let hits = self.resolve_mrl_hits(&rescored)?;
315
316        let stats = MrlSearchStats {
317            scan_dims: search_dims,
318            rescore_dims,
319            candidates_rescored,
320            records_scanned,
321            fell_back_to_full: false,
322        };
323
324        Ok((hits, stats))
325    }
326
327    // ── Internal: truncated scan ─────────────────────────────────────
328
329    fn mrl_truncated_scan(
330        &self,
331        query_truncated: &[f32],
332        limit: usize,
333        search_dims: usize,
334        filter: Option<&dyn SearchFilter>,
335    ) -> SearchResult<BinaryHeap<MrlHeapEntry>> {
336        let max_elements = self.record_count();
337        let mut heap = BinaryHeap::with_capacity(limit.min(max_elements).saturating_add(1));
338        let stride = match self.quantization() {
339            crate::Quantization::F16 => self.dimension() * 2,
340            crate::Quantization::F32 => self.dimension() * 4,
341        };
342
343        match self.quantization() {
344            crate::Quantization::F16 => {
345                let partial_bytes = search_dims * 2;
346                let mut record_offset = self.records_offset;
347                let mut vector_offset = self.vectors_offset;
348
349                for index in 0..self.record_count() {
350                    let flags_bytes = &self.data[record_offset + 14..record_offset + 16];
351                    let flags = u16::from_le_bytes([flags_bytes[0], flags_bytes[1]]);
352
353                    if (flags & 0x0001) != 0 {
354                        record_offset += 16;
355                        vector_offset += stride;
356                        continue;
357                    }
358
359                    let passed = if let Some(f) = filter {
360                        let hash_bytes = &self.data[record_offset..record_offset + 8];
361                        let hash = u64::from_le_bytes([
362                            hash_bytes[0],
363                            hash_bytes[1],
364                            hash_bytes[2],
365                            hash_bytes[3],
366                            hash_bytes[4],
367                            hash_bytes[5],
368                            hash_bytes[6],
369                            hash_bytes[7],
370                        ]);
371                        if let Some(matches) = f.matches_doc_id_hash(hash, None) {
372                            matches
373                        } else {
374                            let doc_id = self.doc_id_at(index)?;
375                            f.matches(doc_id, None)
376                        }
377                    } else {
378                        true
379                    };
380
381                    if passed {
382                        let vector_bytes = &self.data[vector_offset..vector_offset + partial_bytes];
383                        let score = dot_product_f16_bytes_f32(vector_bytes, query_truncated)?;
384                        insert_mrl_candidate(&mut heap, MrlHeapEntry { index, score }, limit);
385                    }
386
387                    record_offset += 16;
388                    vector_offset += stride;
389                }
390            }
391            crate::Quantization::F32 => {
392                let partial_bytes = search_dims * 4;
393                let mut record_offset = self.records_offset;
394                let mut vector_offset = self.vectors_offset;
395
396                for index in 0..self.record_count() {
397                    let flags_bytes = &self.data[record_offset + 14..record_offset + 16];
398                    let flags = u16::from_le_bytes([flags_bytes[0], flags_bytes[1]]);
399
400                    if (flags & 0x0001) != 0 {
401                        record_offset += 16;
402                        vector_offset += stride;
403                        continue;
404                    }
405
406                    let passed = if let Some(f) = filter {
407                        let hash_bytes = &self.data[record_offset..record_offset + 8];
408                        let hash = u64::from_le_bytes([
409                            hash_bytes[0],
410                            hash_bytes[1],
411                            hash_bytes[2],
412                            hash_bytes[3],
413                            hash_bytes[4],
414                            hash_bytes[5],
415                            hash_bytes[6],
416                            hash_bytes[7],
417                        ]);
418                        if let Some(matches) = f.matches_doc_id_hash(hash, None) {
419                            matches
420                        } else {
421                            let doc_id = self.doc_id_at(index)?;
422                            f.matches(doc_id, None)
423                        }
424                    } else {
425                        true
426                    };
427
428                    if passed {
429                        let vector_bytes = &self.data[vector_offset..vector_offset + partial_bytes];
430                        let score = dot_product_f32_bytes_f32(vector_bytes, query_truncated)?;
431                        insert_mrl_candidate(&mut heap, MrlHeapEntry { index, score }, limit);
432                    }
433
434                    record_offset += 16;
435                    vector_offset += stride;
436                }
437            }
438        }
439
440        Ok(heap)
441    }
442
443    fn mrl_scan_wal_truncated(
444        &self,
445        query_truncated: &[f32],
446        heap: &mut BinaryHeap<MrlHeapEntry>,
447        limit: usize,
448        search_dims: usize,
449        filter: Option<&dyn SearchFilter>,
450    ) -> SearchResult<()> {
451        for (idx, entry) in self.wal_entries.iter().enumerate() {
452            if let Some(f) = filter {
453                if let Some(matches) = f.matches_doc_id_hash(entry.doc_id_hash, None) {
454                    if !matches {
455                        continue;
456                    }
457                } else if !f.matches(&entry.doc_id, None) {
458                    continue;
459                }
460            }
461            // WAL embeddings are f32 in memory — truncate to search_dims.
462            let truncated_emb = &entry.embedding[..search_dims.min(entry.embedding.len())];
463            let truncated_query = &query_truncated[..truncated_emb.len()];
464            let score = dot_product_f32_f32(truncated_emb, truncated_query)?;
465
466            // Guard: corrupt WAL embeddings (e.g. from crash recovery) can
467            // produce NaN/Inf scores. Skip them rather than polluting results.
468            if !score.is_finite() {
469                continue;
470            }
471
472            insert_mrl_candidate(
473                heap,
474                MrlHeapEntry {
475                    index: to_wal_index(idx),
476                    score,
477                },
478                limit,
479            );
480        }
481        Ok(())
482    }
483
484    // ── Internal: rescore ────────────────────────────────────────────
485
486    fn mrl_rescore(
487        &self,
488        index: usize,
489        query_rescore: &[f32],
490        rescore_dims: usize,
491    ) -> SearchResult<f32> {
492        if is_wal_index(index) {
493            let wal_idx = from_wal_index(index);
494            let entry = &self.wal_entries[wal_idx];
495            let emb_slice = &entry.embedding[..rescore_dims.min(entry.embedding.len())];
496            let q_slice = &query_rescore[..emb_slice.len()];
497            return dot_product_f32_f32(emb_slice, q_slice);
498        }
499
500        match self.quantization() {
501            crate::Quantization::F16 => {
502                let byte_count = rescore_dims.checked_mul(2).ok_or_else(|| {
503                    crate::index_corrupted(&self.path, "f16 truncated byte length overflow")
504                })?;
505                let bytes = self.raw_vector_bytes_partial(index, byte_count)?;
506                dot_product_f16_bytes_f32(bytes, query_rescore)
507            }
508            crate::Quantization::F32 => {
509                let byte_count = rescore_dims.checked_mul(4).ok_or_else(|| {
510                    crate::index_corrupted(&self.path, "f32 truncated byte length overflow")
511                })?;
512                let bytes = self.raw_vector_bytes_partial(index, byte_count)?;
513                dot_product_f32_bytes_f32(bytes, query_rescore)
514            }
515        }
516    }
517
518    /// Read the first `byte_count` bytes of a stored vector (without reading
519    /// the full stride).
520    fn raw_vector_bytes_partial(&self, index: usize, byte_count: usize) -> SearchResult<&[u8]> {
521        self.ensure_index(index)?;
522        let start = self.vector_start(index)?;
523        let end = start
524            .checked_add(byte_count)
525            .ok_or_else(|| crate::index_corrupted(&self.path, "partial vector end overflow"))?;
526        if end > self.data.len() {
527            return Err(crate::index_corrupted(
528                &self.path,
529                "partial vector extends past file end",
530            ));
531        }
532        Ok(&self.data[start..end])
533    }
534
535    fn resolve_mrl_hits(&self, entries: &[MrlHeapEntry]) -> SearchResult<Vec<VectorHit>> {
536        let mut hits = Vec::with_capacity(entries.len());
537        for entry in entries {
538            if is_wal_index(entry.index) {
539                let wal_idx = from_wal_index(entry.index);
540                let wal_entry = &self.wal_entries[wal_idx];
541                let virtual_index = self.record_count().saturating_add(wal_idx);
542                let index_u32 =
543                    u32::try_from(virtual_index).map_err(|_| SearchError::InvalidConfig {
544                        field: "index".into(),
545                        value: virtual_index.to_string(),
546                        reason: "WAL entry index exceeds u32 range".into(),
547                    })?;
548                hits.push(VectorHit {
549                    index: index_u32,
550                    score: entry.score,
551                    doc_id: wal_entry.doc_id.clone(),
552                });
553            } else {
554                if self.is_deleted(entry.index) {
555                    continue;
556                }
557                let index_u32 =
558                    u32::try_from(entry.index).map_err(|_| SearchError::InvalidConfig {
559                        field: "index".into(),
560                        value: entry.index.to_string(),
561                        reason: "index exceeds u32 range".into(),
562                    })?;
563                let doc_id = self.doc_id_at(entry.index)?.to_owned();
564                hits.push(VectorHit {
565                    index: index_u32,
566                    score: entry.score,
567                    doc_id,
568                });
569            }
570        }
571        Ok(hits)
572    }
573}
574
575// ---------------------------------------------------------------------------
576// Tests
577// ---------------------------------------------------------------------------
578
579#[cfg(test)]
580mod tests {
581    use std::path::PathBuf;
582    use std::time::{SystemTime, UNIX_EPOCH};
583
584    use frankensearch_core::PredicateFilter;
585
586    use super::*;
587    use crate::{Quantization, VectorIndex};
588
589    fn temp_index_path(name: &str) -> PathBuf {
590        let now = SystemTime::now()
591            .duration_since(UNIX_EPOCH)
592            .unwrap_or_default()
593            .as_nanos();
594        std::env::temp_dir().join(format!(
595            "frankensearch-index-mrl-{name}-{}-{now}.fsvi",
596            std::process::id()
597        ))
598    }
599
600    fn write_index(path: &std::path::Path, rows: &[(&str, Vec<f32>)]) -> SearchResult<()> {
601        let dimension =
602            rows.first()
603                .map(|(_, vec)| vec.len())
604                .ok_or_else(|| SearchError::InvalidConfig {
605                    field: "rows".into(),
606                    value: "[]".into(),
607                    reason: "rows must not be empty".into(),
608                })?;
609        let mut writer = VectorIndex::create_with_revision(
610            path,
611            "test",
612            "mrl-test",
613            dimension,
614            Quantization::F16,
615        )?;
616        for (doc_id, vector) in rows {
617            writer.write_record(doc_id, vector)?;
618        }
619        writer.finish()
620    }
621
622    fn normalize(v: &[f32]) -> Vec<f32> {
623        let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
624        if norm < f32::EPSILON {
625            return v.to_vec();
626        }
627        v.iter().map(|x| x / norm).collect()
628    }
629
630    /// Build a vector with a strong signal in the first `signal_dims` dimensions.
631    fn signal_vector(dim: usize, signal_dims: usize, signal: f32) -> Vec<f32> {
632        let mut v = vec![0.01; dim];
633        for d in v.iter_mut().take(signal_dims) {
634            *d = signal;
635        }
636        normalize(&v)
637    }
638
639    // ── Basic MRL search ─────────────────────────────────────────────
640
641    #[test]
642    fn mrl_search_returns_correct_top_1() {
643        let dim = 16;
644        let path = temp_index_path("basic-top1");
645
646        // Use directionally distinct vectors (not normalized, so magnitudes
647        // in the first 8 dims differ clearly and survive f16 quantization).
648        let rows = [
649            ("doc-a", vec![1.0; dim]), // first 8 dot = 8.0
650            ("doc-b", vec![0.5; dim]), // first 8 dot = 4.0
651            ("doc-c", vec![0.1; dim]), // first 8 dot = 0.8
652        ];
653        write_index(&path, &rows).expect("write index");
654
655        let index = VectorIndex::open(&path).expect("open");
656        let query = vec![1.0; dim];
657
658        let config = MrlConfig {
659            search_dims: 8,
660            rescore_dims: 0,
661            rescore_top_k: 0,
662        };
663
664        let (hits, stats) = index
665            .mrl_search_with_stats(&query, 1, &config, None)
666            .expect("mrl search");
667
668        assert_eq!(hits.len(), 1);
669        assert_eq!(hits[0].doc_id, "doc-a");
670        assert_eq!(stats.scan_dims, 8);
671        assert!(!stats.fell_back_to_full);
672        assert!(stats.candidates_rescored > 0);
673
674        std::fs::remove_file(&path).ok();
675    }
676
677    // ── Fallback to full search when search_dims >= dimension ────────
678
679    #[test]
680    fn mrl_search_falls_back_when_search_dims_equals_dimension() {
681        let dim = 8;
682        let path = temp_index_path("fallback-full");
683
684        let rows = [
685            ("doc-a", signal_vector(dim, 4, 1.0)),
686            ("doc-b", signal_vector(dim, 4, 0.5)),
687        ];
688        write_index(&path, &rows).expect("write index");
689
690        let index = VectorIndex::open(&path).expect("open");
691        let query = signal_vector(dim, 4, 1.0);
692
693        let config = MrlConfig {
694            search_dims: 8, // equals dimension
695            ..MrlConfig::default()
696        };
697
698        let (hits, stats) = index
699            .mrl_search_with_stats(&query, 2, &config, None)
700            .expect("mrl search");
701
702        assert_eq!(hits.len(), 2);
703        assert!(stats.fell_back_to_full);
704        assert_eq!(stats.scan_dims, dim);
705
706        std::fs::remove_file(&path).ok();
707    }
708
709    #[test]
710    fn mrl_search_falls_back_when_search_dims_exceeds_dimension() {
711        let dim = 8;
712        let path = temp_index_path("fallback-exceed");
713
714        let rows = [("doc-a", signal_vector(dim, 4, 1.0))];
715        write_index(&path, &rows).expect("write index");
716
717        let index = VectorIndex::open(&path).expect("open");
718        let query = signal_vector(dim, 4, 1.0);
719
720        let config = MrlConfig {
721            search_dims: 100, // exceeds dimension
722            ..MrlConfig::default()
723        };
724
725        let (hits, stats) = index
726            .mrl_search_with_stats(&query, 1, &config, None)
727            .expect("mrl search");
728
729        assert_eq!(hits.len(), 1);
730        assert!(stats.fell_back_to_full);
731
732        std::fs::remove_file(&path).ok();
733    }
734
735    // ── search_dims = 0 → error ──────────────────────────────────────
736
737    #[test]
738    fn mrl_search_rejects_zero_search_dims() {
739        let dim = 8;
740        let path = temp_index_path("zero-dims");
741
742        let rows = [("doc-a", signal_vector(dim, 4, 1.0))];
743        write_index(&path, &rows).expect("write index");
744
745        let index = VectorIndex::open(&path).expect("open");
746        let query = signal_vector(dim, 4, 1.0);
747
748        let config = MrlConfig {
749            search_dims: 0,
750            ..MrlConfig::default()
751        };
752
753        let err = index
754            .mrl_search(&query, 1, &config, None)
755            .expect_err("should reject search_dims=0");
756        assert!(matches!(err, SearchError::InvalidConfig { .. }));
757
758        std::fs::remove_file(&path).ok();
759    }
760
761    // ── Empty index ──────────────────────────────────────────────────
762
763    #[test]
764    fn mrl_search_empty_index() {
765        let dim = 8;
766        let path = temp_index_path("empty-index");
767
768        let writer =
769            VectorIndex::create_with_revision(&path, "test", "mrl-test", dim, Quantization::F16)
770                .expect("writer");
771        writer.finish().expect("finish");
772
773        let index = VectorIndex::open(&path).expect("open");
774        let query = signal_vector(dim, 4, 1.0);
775
776        let config = MrlConfig {
777            search_dims: 4,
778            ..MrlConfig::default()
779        };
780
781        let hits = index
782            .mrl_search(&query, 10, &config, None)
783            .expect("mrl search");
784        assert!(hits.is_empty());
785
786        std::fs::remove_file(&path).ok();
787    }
788
789    // ── Single vector ────────────────────────────────────────────────
790
791    #[test]
792    fn mrl_search_single_vector() {
793        let dim = 16;
794        let path = temp_index_path("single-vector");
795
796        let rows = [("sole-doc", signal_vector(dim, 8, 1.0))];
797        write_index(&path, &rows).expect("write index");
798
799        let index = VectorIndex::open(&path).expect("open");
800        let query = signal_vector(dim, 8, 1.0);
801
802        let config = MrlConfig {
803            search_dims: 8,
804            ..MrlConfig::default()
805        };
806
807        let hits = index
808            .mrl_search(&query, 5, &config, None)
809            .expect("mrl search");
810        assert_eq!(hits.len(), 1);
811        assert_eq!(hits[0].doc_id, "sole-doc");
812
813        std::fs::remove_file(&path).ok();
814    }
815
816    // ── Dimension mismatch ───────────────────────────────────────────
817
818    #[test]
819    fn mrl_search_dimension_mismatch() {
820        let dim = 8;
821        let path = temp_index_path("dim-mismatch");
822
823        let rows = [("doc-a", signal_vector(dim, 4, 1.0))];
824        write_index(&path, &rows).expect("write index");
825
826        let index = VectorIndex::open(&path).expect("open");
827        let bad_query = vec![1.0; 4]; // wrong dimension
828
829        let config = MrlConfig {
830            search_dims: 4,
831            ..MrlConfig::default()
832        };
833
834        let err = index
835            .mrl_search(&bad_query, 1, &config, None)
836            .expect_err("should reject wrong dimension");
837        assert!(matches!(
838            err,
839            SearchError::DimensionMismatch {
840                expected: 8,
841                found: 4
842            }
843        ));
844
845        std::fs::remove_file(&path).ok();
846    }
847
848    // ── MRL search matches standard search on same top-1 ────────────
849
850    #[test]
851    fn mrl_search_agrees_with_standard_on_top_1() {
852        let dim = 16;
853        let path = temp_index_path("agrees-standard");
854
855        // Vectors with distinguishable signals in first 8 dims.
856        let rows = [
857            ("doc-best", signal_vector(dim, 8, 1.0)),
858            ("doc-mid", signal_vector(dim, 8, 0.6)),
859            ("doc-weak", signal_vector(dim, 8, 0.2)),
860        ];
861        write_index(&path, &rows).expect("write index");
862
863        let index = VectorIndex::open(&path).expect("open");
864        let query = signal_vector(dim, 8, 1.0);
865
866        let standard = index
867            .search_top_k(&query, 1, None)
868            .expect("standard search");
869
870        let config = MrlConfig {
871            search_dims: 8,
872            rescore_dims: 0,
873            rescore_top_k: 0,
874        };
875        let mrl = index
876            .mrl_search(&query, 1, &config, None)
877            .expect("mrl search");
878
879        assert_eq!(standard[0].doc_id, mrl[0].doc_id);
880
881        std::fs::remove_file(&path).ok();
882    }
883
884    // ── SIMD-aligned dims (multiple of 8) ────────────────────────────
885
886    #[test]
887    fn mrl_search_simd_aligned_dims() {
888        let dim = 64;
889        let path = temp_index_path("simd-aligned");
890
891        let rows = [("doc-a", vec![1.0; dim]), ("doc-b", vec![0.5; dim])];
892        write_index(&path, &rows).expect("write index");
893
894        let index = VectorIndex::open(&path).expect("open");
895        let query = vec![1.0; dim];
896
897        // search_dims = 8 (perfect SIMD alignment)
898        let config = MrlConfig {
899            search_dims: 8,
900            ..MrlConfig::default()
901        };
902
903        let hits = index
904            .mrl_search(&query, 2, &config, None)
905            .expect("mrl search");
906        assert_eq!(hits.len(), 2);
907        assert_eq!(hits[0].doc_id, "doc-a");
908
909        std::fs::remove_file(&path).ok();
910    }
911
912    // ── Non-aligned search_dims (remainder handling) ─────────────────
913
914    #[test]
915    fn mrl_search_non_aligned_dims() {
916        let dim = 16;
917        let path = temp_index_path("non-aligned");
918
919        let rows = [
920            ("doc-a", signal_vector(dim, 5, 1.0)),
921            ("doc-b", signal_vector(dim, 5, 0.5)),
922        ];
923        write_index(&path, &rows).expect("write index");
924
925        let index = VectorIndex::open(&path).expect("open");
926        let query = signal_vector(dim, 5, 1.0);
927
928        // search_dims = 5 (not a multiple of 8)
929        let config = MrlConfig {
930            search_dims: 5,
931            ..MrlConfig::default()
932        };
933
934        let hits = index
935            .mrl_search(&query, 2, &config, None)
936            .expect("mrl search");
937        assert_eq!(hits.len(), 2);
938        assert_eq!(hits[0].doc_id, "doc-a");
939
940        std::fs::remove_file(&path).ok();
941    }
942
943    // ── Filter integration ───────────────────────────────────────────
944
945    #[test]
946    fn mrl_search_with_filter() {
947        let dim = 16;
948        let path = temp_index_path("filter");
949
950        let rows = [
951            ("doc-a", vec![1.0; dim]),
952            ("doc-b", vec![0.8; dim]),
953            ("doc-c", vec![0.5; dim]),
954        ];
955        write_index(&path, &rows).expect("write index");
956
957        let index = VectorIndex::open(&path).expect("open");
958        let query = vec![1.0; dim];
959
960        let config = MrlConfig {
961            search_dims: 8,
962            ..MrlConfig::default()
963        };
964
965        let filter = PredicateFilter::new("no-a", |id| id != "doc-a");
966        let hits = index
967            .mrl_search(&query, 2, &config, Some(&filter))
968            .expect("mrl search");
969
970        assert_eq!(hits.len(), 2);
971        assert!(hits.iter().all(|h| h.doc_id != "doc-a"));
972        assert_eq!(hits[0].doc_id, "doc-b");
973
974        std::fs::remove_file(&path).ok();
975    }
976
977    #[test]
978    fn mrl_bitset_filter_skips_doc_id_decode_for_non_matching_records() {
979        let dim = 16;
980        let path = temp_index_path("bitset-hash-fast-path");
981
982        let rows = [("doc-a", vec![1.0; dim]), ("doc-b", vec![0.2; dim])];
983        write_index(&path, &rows).expect("write index");
984
985        let inspect = VectorIndex::open(&path).expect("open");
986        let bad_idx = inspect
987            .find_index_by_doc_hash(super::super::fnv1a_hash(b"doc-b"))
988            .expect("doc-b index");
989        let record = inspect.record_at(bad_idx).expect("record");
990        let bad_offset =
991            inspect.strings_offset + usize::try_from(record.doc_id_offset).expect("offset");
992        drop(inspect);
993
994        let mut bytes = std::fs::read(&path).expect("read bytes");
995        bytes[bad_offset] = 0xFF;
996        std::fs::write(&path, bytes).expect("write corrupt bytes");
997
998        let index = VectorIndex::open(&path).expect("open");
999        let config = MrlConfig {
1000            search_dims: 8,
1001            ..MrlConfig::default()
1002        };
1003        let filter = frankensearch_core::BitsetFilter::from_doc_ids(["doc-a"]);
1004        let hits = index
1005            .mrl_search(&vec![1.0; dim], 10, &config, Some(&filter))
1006            .expect("mrl search should ignore corrupted filtered-out doc_id");
1007
1008        assert_eq!(hits.len(), 1);
1009        assert_eq!(hits[0].doc_id, "doc-a");
1010
1011        std::fs::remove_file(&path).ok();
1012    }
1013
1014    // ── Tombstoned records excluded ──────────────────────────────────
1015
1016    #[test]
1017    fn mrl_search_excludes_tombstoned() {
1018        let dim = 16;
1019        let path = temp_index_path("tombstone");
1020
1021        let rows = [
1022            ("doc-a", signal_vector(dim, 8, 1.0)),
1023            ("doc-b", signal_vector(dim, 8, 0.8)),
1024        ];
1025        write_index(&path, &rows).expect("write index");
1026
1027        let mut index = VectorIndex::open(&path).expect("open");
1028        index.soft_delete("doc-a").expect("delete doc-a");
1029
1030        let query = signal_vector(dim, 8, 1.0);
1031        let config = MrlConfig {
1032            search_dims: 8,
1033            ..MrlConfig::default()
1034        };
1035
1036        let hits = index
1037            .mrl_search(&query, 10, &config, None)
1038            .expect("mrl search");
1039
1040        assert_eq!(hits.len(), 1);
1041        assert_eq!(hits[0].doc_id, "doc-b");
1042
1043        std::fs::remove_file(&path).ok();
1044    }
1045
1046    // ── WAL entries participate in MRL search ────────────────────────
1047
1048    #[test]
1049    fn mrl_search_includes_wal_entries() {
1050        let dim = 16;
1051        let path = temp_index_path("wal");
1052
1053        let rows = [("doc-main", vec![0.5; dim])];
1054        write_index(&path, &rows).expect("write index");
1055
1056        let mut index = VectorIndex::open(&path).expect("open");
1057        index.append("doc-wal", &vec![1.0; dim]).expect("append");
1058
1059        let query = vec![1.0; dim];
1060        let config = MrlConfig {
1061            search_dims: 8,
1062            ..MrlConfig::default()
1063        };
1064
1065        let hits = index
1066            .mrl_search(&query, 2, &config, None)
1067            .expect("mrl search");
1068
1069        assert_eq!(hits.len(), 2);
1070        assert_eq!(hits[0].doc_id, "doc-wal");
1071
1072        std::fs::remove_file(&path).ok();
1073        std::fs::remove_file(crate::wal::wal_path_for(&path)).ok();
1074    }
1075
1076    // ── rescore_top_k = 0 → defaults to 3x limit ────────────────────
1077
1078    #[test]
1079    fn mrl_rescore_top_k_defaults_to_3x() {
1080        let config = MrlConfig {
1081            search_dims: 8,
1082            rescore_dims: 0,
1083            rescore_top_k: 0,
1084        };
1085        assert_eq!(config.effective_rescore_top_k(5), 15);
1086        assert_eq!(config.effective_rescore_top_k(0), 0);
1087        assert_eq!(config.effective_rescore_top_k(10), 30);
1088    }
1089
1090    // ── rescore_dims = 0 → full dimension ────────────────────────────
1091
1092    #[test]
1093    fn mrl_rescore_dims_defaults_to_full() {
1094        let config = MrlConfig {
1095            search_dims: 8,
1096            rescore_dims: 0,
1097            rescore_top_k: 0,
1098        };
1099        assert_eq!(config.effective_rescore_dims(384), 384);
1100        assert_eq!(config.effective_rescore_dims(256), 256);
1101    }
1102
1103    #[test]
1104    fn mrl_rescore_dims_clamped_to_index_dim() {
1105        let config = MrlConfig {
1106            search_dims: 8,
1107            rescore_dims: 1000,
1108            rescore_top_k: 0,
1109        };
1110        // rescore_dims > index_dim → use index_dim
1111        assert_eq!(config.effective_rescore_dims(384), 384);
1112    }
1113
1114    // ── Config serde roundtrip ───────────────────────────────────────
1115
1116    #[test]
1117    fn mrl_config_serde_roundtrip() {
1118        let config = MrlConfig {
1119            search_dims: 128,
1120            rescore_dims: 256,
1121            rescore_top_k: 50,
1122        };
1123        let json = serde_json::to_string(&config).unwrap();
1124        let decoded: MrlConfig = serde_json::from_str(&json).unwrap();
1125        assert_eq!(decoded.search_dims, 128);
1126        assert_eq!(decoded.rescore_dims, 256);
1127        assert_eq!(decoded.rescore_top_k, 50);
1128    }
1129
1130    // ── Limit zero returns empty ─────────────────────────────────────
1131
1132    #[test]
1133    fn mrl_search_limit_zero() {
1134        let dim = 8;
1135        let path = temp_index_path("limit-zero");
1136
1137        let rows = [("doc-a", signal_vector(dim, 4, 1.0))];
1138        write_index(&path, &rows).expect("write index");
1139
1140        let index = VectorIndex::open(&path).expect("open");
1141        let query = signal_vector(dim, 4, 1.0);
1142
1143        let config = MrlConfig {
1144            search_dims: 4,
1145            ..MrlConfig::default()
1146        };
1147
1148        let hits = index
1149            .mrl_search(&query, 0, &config, None)
1150            .expect("mrl search");
1151        assert!(hits.is_empty());
1152
1153        std::fs::remove_file(&path).ok();
1154    }
1155
1156    // ── Verify truncated scan uses only search_dims ──────────────────
1157
1158    #[test]
1159    fn truncated_scan_uses_only_search_dims() {
1160        // doc-a: strong signal in dims 0-3, noise in dims 4-15
1161        // doc-b: weak in dims 0-3, strong in dims 4-15
1162        // With search_dims=4, doc-a should rank higher in truncated scan.
1163        // With full rescore, doc-a should still win because first dims carry
1164        // most info in MRL-style embeddings.
1165        let dim = 16;
1166        let path = temp_index_path("truncated-only");
1167
1168        let mut a = vec![0.01; dim];
1169        for d in a.iter_mut().take(4) {
1170            *d = 1.0;
1171        }
1172        let a = normalize(&a);
1173
1174        let mut b = vec![0.01; dim];
1175        for d in b.iter_mut().skip(4).take(12) {
1176            *d = 1.0;
1177        }
1178        let b = normalize(&b);
1179
1180        let rows = [("doc-a", a.clone()), ("doc-b", b)];
1181        write_index(&path, &rows).expect("write index");
1182
1183        let index = VectorIndex::open(&path).expect("open");
1184
1185        let config = MrlConfig {
1186            search_dims: 4,
1187            rescore_dims: 0,
1188            rescore_top_k: 10,
1189        };
1190
1191        let (hits, stats) = index
1192            .mrl_search_with_stats(&a, 2, &config, None)
1193            .expect("mrl search");
1194
1195        assert_eq!(hits.len(), 2);
1196        assert_eq!(hits[0].doc_id, "doc-a");
1197        assert_eq!(stats.scan_dims, 4);
1198        assert!(!stats.fell_back_to_full);
1199
1200        std::fs::remove_file(&path).ok();
1201    }
1202
1203    // ── Multiple results ordered by rescore ───────────────────────────
1204
1205    #[test]
1206    fn mrl_results_ordered_by_rescore() {
1207        let dim = 16;
1208        let path = temp_index_path("rescore-order");
1209
1210        let rows = [
1211            ("doc-a", signal_vector(dim, 8, 1.0)),
1212            ("doc-b", signal_vector(dim, 8, 0.7)),
1213            ("doc-c", signal_vector(dim, 8, 0.3)),
1214        ];
1215        write_index(&path, &rows).expect("write index");
1216
1217        let index = VectorIndex::open(&path).expect("open");
1218        let query = signal_vector(dim, 8, 1.0);
1219
1220        let config = MrlConfig {
1221            search_dims: 4,
1222            ..MrlConfig::default()
1223        };
1224
1225        let hits = index
1226            .mrl_search(&query, 3, &config, None)
1227            .expect("mrl search");
1228
1229        assert_eq!(hits.len(), 3);
1230        // Scores should be in descending order
1231        for pair in hits.windows(2) {
1232            assert!(
1233                pair[0].score >= pair[1].score,
1234                "results should be descending: {} >= {}",
1235                pair[0].score,
1236                pair[1].score
1237            );
1238        }
1239
1240        std::fs::remove_file(&path).ok();
1241    }
1242
1243    // ─── bd-2c7e tests begin ───
1244
1245    #[test]
1246    fn mrl_config_default_values() {
1247        let config = MrlConfig::default();
1248        assert_eq!(config.search_dims, 64);
1249        assert_eq!(config.rescore_dims, 0);
1250        assert_eq!(config.rescore_top_k, 0);
1251    }
1252
1253    #[test]
1254    fn mrl_config_debug_clone() {
1255        let config = MrlConfig {
1256            search_dims: 32,
1257            rescore_dims: 128,
1258            rescore_top_k: 20,
1259        };
1260        let cloned = config.clone();
1261        assert_eq!(cloned.search_dims, 32);
1262        assert_eq!(cloned.rescore_dims, 128);
1263        assert_eq!(cloned.rescore_top_k, 20);
1264        let dbg = format!("{config:?}");
1265        assert!(dbg.contains("MrlConfig"));
1266        assert!(dbg.contains("32"));
1267    }
1268
1269    #[test]
1270    fn mrl_search_stats_default_values() {
1271        let stats = MrlSearchStats::default();
1272        assert_eq!(stats.scan_dims, 0);
1273        assert_eq!(stats.rescore_dims, 0);
1274        assert_eq!(stats.candidates_rescored, 0);
1275        assert_eq!(stats.records_scanned, 0);
1276        assert!(!stats.fell_back_to_full);
1277    }
1278
1279    #[test]
1280    fn mrl_search_stats_debug_clone() {
1281        let stats = MrlSearchStats {
1282            scan_dims: 64,
1283            rescore_dims: 256,
1284            candidates_rescored: 30,
1285            records_scanned: 1000,
1286            fell_back_to_full: true,
1287        };
1288        let cloned = stats.clone();
1289        assert_eq!(cloned.scan_dims, 64);
1290        assert_eq!(cloned.rescore_dims, 256);
1291        assert_eq!(cloned.candidates_rescored, 30);
1292        assert_eq!(cloned.records_scanned, 1000);
1293        assert!(cloned.fell_back_to_full);
1294        let dbg = format!("{stats:?}");
1295        assert!(dbg.contains("MrlSearchStats"));
1296    }
1297
1298    #[test]
1299    fn nan_safe_replaces_nan_with_neg_infinity() {
1300        assert!((nan_safe(1.0) - 1.0).abs() < f32::EPSILON);
1301        assert!(nan_safe(0.0).abs() < f32::EPSILON);
1302        assert!((nan_safe(-1.0) + 1.0).abs() < f32::EPSILON);
1303        let neg_inf = nan_safe(f32::NEG_INFINITY);
1304        assert!(neg_inf.is_infinite() && neg_inf.is_sign_negative());
1305        let pos_inf = nan_safe(f32::INFINITY);
1306        assert!(pos_inf.is_infinite() && pos_inf.is_sign_positive());
1307        let nan = nan_safe(f32::NAN);
1308        assert!(nan.is_infinite() && nan.is_sign_negative());
1309    }
1310
1311    #[test]
1312    fn mrl_heap_entry_eq_same_values() {
1313        let a = MrlHeapEntry {
1314            index: 5,
1315            score: 0.9,
1316        };
1317        let b = MrlHeapEntry {
1318            index: 5,
1319            score: 0.9,
1320        };
1321        assert_eq!(a, b);
1322    }
1323
1324    #[test]
1325    fn mrl_heap_entry_ne_different_index() {
1326        let a = MrlHeapEntry {
1327            index: 5,
1328            score: 0.9,
1329        };
1330        let b = MrlHeapEntry {
1331            index: 6,
1332            score: 0.9,
1333        };
1334        assert_ne!(a, b);
1335    }
1336
1337    #[test]
1338    fn mrl_heap_entry_ne_different_score() {
1339        let a = MrlHeapEntry {
1340            index: 5,
1341            score: 0.9,
1342        };
1343        let b = MrlHeapEntry {
1344            index: 5,
1345            score: 0.8,
1346        };
1347        assert_ne!(a, b);
1348    }
1349
1350    #[test]
1351    fn mrl_heap_entry_ordering_min_heap() {
1352        // Min-heap: worse (lower) score should be "Greater" so it comes to top.
1353        let low = MrlHeapEntry {
1354            index: 0,
1355            score: 0.1,
1356        };
1357        let high = MrlHeapEntry {
1358            index: 1,
1359            score: 0.9,
1360        };
1361        // In std BinaryHeap (max-heap), the "greatest" element is popped first.
1362        // Our Ord reversal makes the lowest-score entry the "greatest" → it gets popped first.
1363        assert_eq!(low.cmp(&high), Ordering::Greater);
1364        assert_eq!(high.cmp(&low), Ordering::Less);
1365    }
1366
1367    #[test]
1368    fn mrl_heap_entry_ordering_nan_treated_as_worst() {
1369        let nan_entry = MrlHeapEntry {
1370            index: 0,
1371            score: f32::NAN,
1372        };
1373        let normal = MrlHeapEntry {
1374            index: 1,
1375            score: 0.5,
1376        };
1377        // NaN → NEG_INFINITY → worst score → should be "Greater" (popped first from min-heap).
1378        assert_eq!(nan_entry.cmp(&normal), Ordering::Greater);
1379    }
1380
1381    #[test]
1382    fn mrl_heap_entry_ordering_tie_breaks_on_index() {
1383        let a = MrlHeapEntry {
1384            index: 3,
1385            score: 0.5,
1386        };
1387        let b = MrlHeapEntry {
1388            index: 7,
1389            score: 0.5,
1390        };
1391        // Same score, lower index is "stable" (comes after in min-heap → Less).
1392        assert_eq!(a.cmp(&b), Ordering::Less);
1393        assert_eq!(b.cmp(&a), Ordering::Greater);
1394    }
1395
1396    #[test]
1397    fn mrl_heap_entry_partial_ord_consistent() {
1398        let a = MrlHeapEntry {
1399            index: 0,
1400            score: 0.3,
1401        };
1402        let b = MrlHeapEntry {
1403            index: 1,
1404            score: 0.7,
1405        };
1406        assert_eq!(a.partial_cmp(&b), Some(a.cmp(&b)));
1407    }
1408
1409    #[test]
1410    fn insert_mrl_candidate_limit_zero_noop() {
1411        let mut heap = BinaryHeap::new();
1412        insert_mrl_candidate(
1413            &mut heap,
1414            MrlHeapEntry {
1415                index: 0,
1416                score: 1.0,
1417            },
1418            0,
1419        );
1420        assert!(heap.is_empty());
1421    }
1422
1423    #[test]
1424    fn insert_mrl_candidate_fills_heap() {
1425        let mut heap = BinaryHeap::new();
1426        insert_mrl_candidate(
1427            &mut heap,
1428            MrlHeapEntry {
1429                index: 0,
1430                score: 0.5,
1431            },
1432            3,
1433        );
1434        insert_mrl_candidate(
1435            &mut heap,
1436            MrlHeapEntry {
1437                index: 1,
1438                score: 0.8,
1439            },
1440            3,
1441        );
1442        insert_mrl_candidate(
1443            &mut heap,
1444            MrlHeapEntry {
1445                index: 2,
1446                score: 0.3,
1447            },
1448            3,
1449        );
1450        assert_eq!(heap.len(), 3);
1451    }
1452
1453    #[test]
1454    fn insert_mrl_candidate_replaces_worst_when_better() {
1455        let mut heap = BinaryHeap::new();
1456        insert_mrl_candidate(
1457            &mut heap,
1458            MrlHeapEntry {
1459                index: 0,
1460                score: 0.1,
1461            },
1462            2,
1463        );
1464        insert_mrl_candidate(
1465            &mut heap,
1466            MrlHeapEntry {
1467                index: 1,
1468                score: 0.2,
1469            },
1470            2,
1471        );
1472        // Insert better candidate — should replace the worst (0.1).
1473        insert_mrl_candidate(
1474            &mut heap,
1475            MrlHeapEntry {
1476                index: 2,
1477                score: 0.9,
1478            },
1479            2,
1480        );
1481        assert_eq!(heap.len(), 2);
1482        let entries: Vec<MrlHeapEntry> = heap.into_vec();
1483        assert!(entries.iter().all(|e| e.index != 0));
1484    }
1485
1486    #[test]
1487    fn insert_mrl_candidate_keeps_worst_when_candidate_worse() {
1488        let mut heap = BinaryHeap::new();
1489        insert_mrl_candidate(
1490            &mut heap,
1491            MrlHeapEntry {
1492                index: 0,
1493                score: 0.5,
1494            },
1495            1,
1496        );
1497        // Insert worse candidate — heap should not change.
1498        insert_mrl_candidate(
1499            &mut heap,
1500            MrlHeapEntry {
1501                index: 1,
1502                score: 0.1,
1503            },
1504            1,
1505        );
1506        assert_eq!(heap.len(), 1);
1507        assert_eq!(heap.peek().unwrap().index, 0);
1508    }
1509
1510    #[test]
1511    fn insert_mrl_candidate_tie_prefers_lower_index() {
1512        let mut heap = BinaryHeap::new();
1513        insert_mrl_candidate(
1514            &mut heap,
1515            MrlHeapEntry {
1516                index: 5,
1517                score: 0.5,
1518            },
1519            1,
1520        );
1521        // Same score, lower index → should replace.
1522        insert_mrl_candidate(
1523            &mut heap,
1524            MrlHeapEntry {
1525                index: 2,
1526                score: 0.5,
1527            },
1528            1,
1529        );
1530        assert_eq!(heap.peek().unwrap().index, 2);
1531    }
1532
1533    #[test]
1534    fn effective_rescore_dims_uses_explicit_value() {
1535        let config = MrlConfig {
1536            search_dims: 8,
1537            rescore_dims: 128,
1538            rescore_top_k: 0,
1539        };
1540        assert_eq!(config.effective_rescore_dims(384), 128);
1541    }
1542
1543    #[test]
1544    fn effective_rescore_top_k_uses_explicit_value() {
1545        let config = MrlConfig {
1546            search_dims: 8,
1547            rescore_dims: 0,
1548            rescore_top_k: 42,
1549        };
1550        assert_eq!(config.effective_rescore_top_k(10), 42);
1551        assert_eq!(config.effective_rescore_top_k(1), 42);
1552    }
1553
1554    #[test]
1555    fn mrl_search_f32_quantization() {
1556        let dim = 16;
1557        let path = temp_index_path("f32-quant");
1558
1559        let dimension = dim;
1560        let mut writer = VectorIndex::create_with_revision(
1561            &path,
1562            "test",
1563            "mrl-test",
1564            dimension,
1565            Quantization::F32,
1566        )
1567        .expect("writer");
1568        let v_a = vec![1.0_f32; dim];
1569        let v_b = vec![0.5_f32; dim];
1570        writer.write_record("doc-a", &v_a).expect("write a");
1571        writer.write_record("doc-b", &v_b).expect("write b");
1572        writer.finish().expect("finish");
1573
1574        let index = VectorIndex::open(&path).expect("open");
1575        let query = vec![1.0; dim];
1576
1577        let config = MrlConfig {
1578            search_dims: 8,
1579            rescore_dims: 0,
1580            rescore_top_k: 0,
1581        };
1582
1583        let (hits, stats) = index
1584            .mrl_search_with_stats(&query, 2, &config, None)
1585            .expect("mrl search");
1586
1587        assert_eq!(hits.len(), 2);
1588        assert_eq!(hits[0].doc_id, "doc-a");
1589        assert!(!stats.fell_back_to_full);
1590        assert_eq!(stats.scan_dims, 8);
1591
1592        std::fs::remove_file(&path).ok();
1593    }
1594
1595    #[test]
1596    fn mrl_search_wal_entries_with_filter() {
1597        let dim = 16;
1598        let path = temp_index_path("wal-filter");
1599
1600        let rows = [("doc-main", vec![0.3; dim])];
1601        write_index(&path, &rows).expect("write index");
1602
1603        let mut index = VectorIndex::open(&path).expect("open");
1604        index
1605            .append("doc-wal-keep", &vec![1.0; dim])
1606            .expect("append keep");
1607        index
1608            .append("doc-wal-skip", &vec![0.9; dim])
1609            .expect("append skip");
1610
1611        let query = vec![1.0; dim];
1612        let config = MrlConfig {
1613            search_dims: 8,
1614            ..MrlConfig::default()
1615        };
1616
1617        let filter = PredicateFilter::new("keep-only", |id| id != "doc-wal-skip");
1618        let hits = index
1619            .mrl_search(&query, 10, &config, Some(&filter))
1620            .expect("mrl search");
1621
1622        assert!(hits.iter().all(|h| h.doc_id != "doc-wal-skip"));
1623        assert!(hits.iter().any(|h| h.doc_id == "doc-wal-keep"));
1624
1625        std::fs::remove_file(&path).ok();
1626        std::fs::remove_file(crate::wal::wal_path_for(&path)).ok();
1627    }
1628
1629    #[test]
1630    fn mrl_search_explicit_rescore_top_k() {
1631        let dim = 16;
1632        let path = temp_index_path("explicit-rescore-k");
1633
1634        let rows = [
1635            ("doc-a", vec![1.0; dim]),
1636            ("doc-b", vec![0.8; dim]),
1637            ("doc-c", vec![0.5; dim]),
1638            ("doc-d", vec![0.3; dim]),
1639        ];
1640        write_index(&path, &rows).expect("write index");
1641
1642        let index = VectorIndex::open(&path).expect("open");
1643        let query = vec![1.0; dim];
1644
1645        let config = MrlConfig {
1646            search_dims: 8,
1647            rescore_dims: 0,
1648            rescore_top_k: 2, // Only rescore top 2 candidates.
1649        };
1650
1651        let (hits, stats) = index
1652            .mrl_search_with_stats(&query, 2, &config, None)
1653            .expect("mrl search");
1654
1655        assert_eq!(hits.len(), 2);
1656        assert!(stats.candidates_rescored <= 2);
1657        assert!(!stats.fell_back_to_full);
1658
1659        std::fs::remove_file(&path).ok();
1660    }
1661
1662    #[test]
1663    fn mrl_search_explicit_rescore_dims() {
1664        let dim = 16;
1665        let path = temp_index_path("explicit-rescore-dims");
1666
1667        let rows = [("doc-a", vec![1.0; dim]), ("doc-b", vec![0.5; dim])];
1668        write_index(&path, &rows).expect("write index");
1669
1670        let index = VectorIndex::open(&path).expect("open");
1671        let query = vec![1.0; dim];
1672
1673        let config = MrlConfig {
1674            search_dims: 4,
1675            rescore_dims: 12, // Rescore with 12 dims (not full 16).
1676            rescore_top_k: 0,
1677        };
1678
1679        let (hits, stats) = index
1680            .mrl_search_with_stats(&query, 2, &config, None)
1681            .expect("mrl search");
1682
1683        assert_eq!(hits.len(), 2);
1684        assert_eq!(stats.rescore_dims, 12);
1685        assert_eq!(stats.scan_dims, 4);
1686
1687        std::fs::remove_file(&path).ok();
1688    }
1689
1690    #[test]
1691    fn mrl_heap_entry_debug() {
1692        let entry = MrlHeapEntry {
1693            index: 42,
1694            score: 0.75,
1695        };
1696        let dbg = format!("{entry:?}");
1697        assert!(dbg.contains("MrlHeapEntry"));
1698        assert!(dbg.contains("42"));
1699    }
1700
1701    #[test]
1702    fn mrl_heap_entry_copy() {
1703        let a = MrlHeapEntry {
1704            index: 1,
1705            score: 0.5,
1706        };
1707        let b = a; // Copy
1708        assert_eq!(a.index, b.index);
1709        assert_eq!(a.score.to_bits(), b.score.to_bits());
1710    }
1711
1712    // ─── bd-2c7e tests end ───
1713}