cqs 1.26.0

Code intelligence and RAG for AI agents. Semantic search, call graphs, impact analysis, type dependencies, and smart context assembly — in single tool calls. 54 languages + L5X/L5K PLC exports, 91.2% Recall@1 (BGE-large), 0.951 MRR (296 queries). Local ML, GPU-accelerated.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
//! In-memory inverted index for SPLADE sparse vectors.
//!
//! Loaded from SQLite at startup, queried during search.
//! Supports filtered search with the same chunk_type/language predicate
//! used by HNSW traversal-time filtering.
//!
//! ## Persistence
//!
//! The index also has an on-disk format mirroring the HNSW persistence
//! pattern. Build-from-SQLite is slow (7.58M postings for SPLADE-Code 0.6B
//! = ~45s per CLI invocation), so we serialize the built index alongside
//! the HNSW files and load it in a single read on subsequent invocations.
//! Invalidation is driven by a `splade_generation` counter in the `metadata`
//! table, bumped on every write to `sparse_vectors`; the generation is
//! embedded in the file header so loads from a stale file are detected
//! and fall back to rebuild-from-SQLite.

use std::collections::HashMap;
use std::io::{Read, Write};
use std::path::Path;

use crate::index::IndexResult;

use super::SparseVector;

/// File magic for the SPLADE persisted index.
const SPLADE_INDEX_MAGIC: &[u8; 4] = b"SPDX";

/// Format version. Bump when the on-disk layout changes.
const SPLADE_INDEX_VERSION: u32 = 1;

/// Canonical filename for the persisted SPLADE index inside the project's
/// `.cqs/` directory. Lives alongside the HNSW files so the whole index
/// dir moves as a unit.
pub const SPLADE_INDEX_FILENAME: &str = "splade.index.bin";

/// Fixed header size in bytes: magic(4) + version(4) + generation(8)
/// + chunk_count(8) + token_count(8) + body_checksum(32) = 64 bytes.
const SPLADE_INDEX_HEADER_LEN: usize = 64;

/// Default cap on `splade.index.bin` file size read at load time.
/// Audit RB-2: without an upper bound `read_to_end` could unbounded-alloc
/// from a corrupted or maliciously-grown file. 2 GB leaves ~20× headroom
/// over SPLADE-Code 0.6B on a cqs-sized project (~100 MB). Env override:
/// `CQS_SPLADE_MAX_INDEX_BYTES`.
const DEFAULT_SPLADE_MAX_INDEX_BYTES: u64 = 2 * 1024 * 1024 * 1024;

/// Read `CQS_SPLADE_MAX_INDEX_BYTES` env var, fall back to default. Cached
/// via `OnceLock` to avoid re-parsing per load call.
fn splade_max_index_bytes() -> u64 {
    static CACHED: std::sync::OnceLock<u64> = std::sync::OnceLock::new();
    *CACHED.get_or_init(|| match std::env::var("CQS_SPLADE_MAX_INDEX_BYTES") {
        Ok(val) => match val.parse::<u64>() {
            Ok(n) if n > 0 => {
                tracing::info!(max_bytes = n, "CQS_SPLADE_MAX_INDEX_BYTES override");
                n
            }
            _ => {
                tracing::warn!(
                    value = %val,
                    "Invalid CQS_SPLADE_MAX_INDEX_BYTES, using default 2GB"
                );
                DEFAULT_SPLADE_MAX_INDEX_BYTES
            }
        },
        Err(_) => DEFAULT_SPLADE_MAX_INDEX_BYTES,
    })
}

/// Errors specific to SpladeIndex persistence.
///
/// Audit EH-4 / API-8 / API-9: prior to v1.22.0 audit, five distinct
/// structural corruption conditions (chunk id > u32::MAX, posting list >
/// u32::MAX, chunk_idx > u32::MAX, chunk_count overflow, invalid utf-8 in
/// chunk id, out-of-bounds posting chunk_idx) were all wrapped as
/// `Io(io::Error::new(InvalidData, ...))`. That made the enum less
/// expressive than the dedicated variants already in place and produced
/// nonsense Display output ("io: chunk id exceeds u32::MAX bytes: …").
/// They now route through [`CorruptData`], which is structurally distinct
/// from actual I/O failures. The [`ChecksumMismatch`] variant gained
/// `path`, `expected`, `actual` fields to match `HnswError::ChecksumMismatch`.
/// [`FileTooLarge`] is new and covers audit RB-2 (unbounded allocation from
/// an oversized on-disk file).
#[derive(thiserror::Error, Debug)]
pub enum SpladeIndexPersistError {
    #[error("io: {0}")]
    Io(#[from] std::io::Error),
    #[error("SPLADE index file has wrong magic (not a SPLADE index)")]
    BadMagic,
    #[error("SPLADE index file version {0} not supported by this build (expected {1})")]
    UnsupportedVersion(u32, u32),
    #[error(
        "SPLADE index generation {disk} does not match store generation {store} — \
         sparse_vectors have been modified since the index was persisted"
    )]
    GenerationMismatch { disk: u64, store: u64 },
    #[error(
        "SPLADE index body checksum mismatch — file {path} is corrupt \
         (expected {expected}, got {actual})"
    )]
    ChecksumMismatch {
        path: String,
        expected: String,
        actual: String,
    },
    #[error("SPLADE index file truncated — expected more data at offset {0}")]
    Truncated(u64),
    #[error("SPLADE index payload corrupt: {0}")]
    CorruptData(String),
    #[error(
        "SPLADE index file {path} is {size} bytes, exceeds maximum {limit} bytes. \
         Set CQS_SPLADE_MAX_INDEX_BYTES to override."
    )]
    FileTooLarge { path: String, size: u64, limit: u64 },
}

/// In-memory inverted index for sparse vector search.
///
/// Structure: `token_id → [(chunk_index, weight)]`. For each vocabulary
/// token, stores which chunks contain it and how important it is.
pub struct SpladeIndex {
    /// Inverted postings: token_id → [(chunk_index, weight)]
    postings: HashMap<u32, Vec<(usize, f32)>>,
    /// Sequential chunk ID map (chunk_index → chunk_id string)
    id_map: Vec<String>,
}

impl SpladeIndex {
    /// Build from a list of (chunk_id, sparse_vector) pairs.
    pub fn build(chunks: Vec<(String, SparseVector)>) -> Self {
        let _span = tracing::info_span!("splade_index_build", chunks = chunks.len()).entered();

        let mut postings: HashMap<u32, Vec<(usize, f32)>> = HashMap::new();
        let mut id_map = Vec::with_capacity(chunks.len());

        for (idx, (chunk_id, sparse)) in chunks.into_iter().enumerate() {
            for &(token_id, weight) in &sparse {
                postings.entry(token_id).or_default().push((idx, weight));
            }
            id_map.push(chunk_id);
        }

        tracing::info!(
            unique_tokens = postings.len(),
            chunks = id_map.len(),
            "SPLADE index built"
        );

        Self { postings, id_map }
    }

    /// Search the inverted index (unfiltered).
    pub fn search(&self, query: &SparseVector, k: usize) -> Vec<IndexResult> {
        self.search_with_filter(query, k, &|_: &str| true)
    }

    /// Search with a chunk_id predicate filter.
    ///
    /// Computes dot product between query sparse vector and each document's
    /// sparse vector via the inverted index. Non-matching chunks (per filter)
    /// are skipped during score accumulation.
    pub fn search_with_filter(
        &self,
        query: &SparseVector,
        k: usize,
        filter: &dyn Fn(&str) -> bool,
    ) -> Vec<IndexResult> {
        let _span = tracing::debug_span!(
            "splade_index_search",
            k,
            query_terms = query.len(),
            index_size = self.id_map.len()
        )
        .entered();

        if query.is_empty() || self.id_map.is_empty() {
            return Vec::new();
        }

        // Accumulate dot product scores per chunk
        let mut scores: HashMap<usize, f32> = HashMap::new();
        for &(token_id, query_weight) in query {
            if let Some(posting_list) = self.postings.get(&token_id) {
                for &(chunk_idx, doc_weight) in posting_list {
                    // Apply filter (PF-13: direct indexing — idx always valid by construction)
                    if chunk_idx >= self.id_map.len() || !filter(&self.id_map[chunk_idx]) {
                        continue;
                    }
                    *scores.entry(chunk_idx).or_insert(0.0) += query_weight * doc_weight;
                }
            }
        }

        // PF-V1.25-3: bounded heap keeps top-k in O(n log k) instead of the
        // full O(n log n) sort+truncate. `BoundedScoreHeap::into_sorted_vec`
        // applies the id tie-breaker so equal-score results are
        // deterministically ordered across process invocations (the HashMap
        // above iterates in random order).
        let mut heap = crate::search::scoring::BoundedScoreHeap::new(k);
        for (idx, score) in scores {
            if let Some(id) = self.id_map.get(idx) {
                heap.push(id.clone(), score);
            }
        }
        let results: Vec<IndexResult> = heap
            .into_sorted_vec()
            .into_iter()
            .map(|(id, score)| IndexResult { id, score })
            .collect();

        tracing::debug!(results = results.len(), "SPLADE search complete");
        results
    }

    /// Number of chunks in the index.
    pub fn len(&self) -> usize {
        self.id_map.len()
    }

    /// Whether the index is empty.
    pub fn is_empty(&self) -> bool {
        self.id_map.is_empty()
    }

    /// Number of unique tokens in the index.
    pub fn unique_tokens(&self) -> usize {
        self.postings.len()
    }

    /// Serialize the index to `path` with the given generation counter.
    ///
    /// Writes atomically via a temp file + rename so a crash mid-save leaves
    /// the old file untouched. The file layout is:
    ///
    /// ```text
    /// Header (64 bytes):
    ///   [0..4]   magic "SPDX"
    ///   [4..8]   format version (u32 LE)
    ///   [8..16]  generation (u64 LE)
    ///   [16..24] chunk count (u64 LE)
    ///   [24..32] unique token count (u64 LE)
    ///   [32..64] blake3-256 of body
    ///
    /// Body:
    ///   id_map section:
    ///     for each chunk in insertion order:
    ///       u32 LE  id length (bytes)
    ///       N bytes id (utf-8, not null-terminated)
    ///   postings section:
    ///     for each unique token (HashMap iteration order — non-deterministic
    ///     across builds; the body checksum still matches because we hash
    ///     what we actually wrote):
    ///       u32 LE  token_id
    ///       u32 LE  posting count
    ///       for each posting (count times):
    ///         u32 LE  chunk_idx
    ///         f32 LE  weight
    /// ```
    ///
    /// The body is built in memory (~60-100MB for SPLADE-Code 0.6B on a
    /// cqs-sized project) so we can hash and write in one pass. That's the
    /// same memory footprint we already hold for the in-memory index itself,
    /// so no new budget is introduced.
    pub fn save(&self, path: &Path, generation: u64) -> Result<(), SpladeIndexPersistError> {
        let _span = tracing::info_span!(
            "splade_index_save",
            path = %path.display(),
            generation,
            chunks = self.id_map.len(),
            tokens = self.postings.len(),
        )
        .entered();

        // Build the body into a Vec<u8> so we can hash it in one pass and
        // write it without an extra seek-back step on the real file.
        let mut body: Vec<u8> = Vec::with_capacity(Self::estimate_body_size(
            self.id_map.len(),
            self.postings.values().map(|v| v.len()).sum::<usize>(),
        ));

        // id_map
        for id in &self.id_map {
            let len_u32: u32 = id.len().try_into().map_err(|_| {
                // Audit EH-4: these are structural invariants, not I/O errors.
                SpladeIndexPersistError::CorruptData(format!(
                    "chunk id exceeds u32::MAX bytes: {}",
                    id.len()
                ))
            })?;
            body.extend_from_slice(&len_u32.to_le_bytes());
            body.extend_from_slice(id.as_bytes());
        }

        // postings
        for (&token_id, posting_list) in &self.postings {
            body.extend_from_slice(&token_id.to_le_bytes());
            let count_u32: u32 = posting_list.len().try_into().map_err(|_| {
                SpladeIndexPersistError::CorruptData(format!(
                    "posting list for token {} exceeds u32::MAX entries: {}",
                    token_id,
                    posting_list.len()
                ))
            })?;
            body.extend_from_slice(&count_u32.to_le_bytes());
            for &(chunk_idx, weight) in posting_list {
                let idx_u32: u32 = chunk_idx.try_into().map_err(|_| {
                    SpladeIndexPersistError::CorruptData(format!(
                        "chunk_idx exceeds u32::MAX: {}",
                        chunk_idx
                    ))
                })?;
                body.extend_from_slice(&idx_u32.to_le_bytes());
                body.extend_from_slice(&weight.to_le_bytes());
            }
        }

        // Build the header FIRST (without the checksum), so we can include it
        // in the hash — audit RB-1: previously only the body was hashed, which
        // meant a single bit flip in the unhashed header `chunk_count` could
        // pass integrity checks and cause `Vec::with_capacity(usize::MAX)` to
        // panic inside `load()`. Now the hash covers bytes [0..32] of the
        // header AND the body, so any header corruption is detected at load
        // time. The hash field itself (bytes [32..64]) can't cover itself.
        let mut header = [0u8; SPLADE_INDEX_HEADER_LEN];
        header[0..4].copy_from_slice(SPLADE_INDEX_MAGIC);
        header[4..8].copy_from_slice(&SPLADE_INDEX_VERSION.to_le_bytes());
        header[8..16].copy_from_slice(&generation.to_le_bytes());
        header[16..24].copy_from_slice(&(self.id_map.len() as u64).to_le_bytes());
        header[24..32].copy_from_slice(&(self.postings.len() as u64).to_le_bytes());

        // Hash header[0..32] || body in one go.
        let mut hasher = blake3::Hasher::new();
        hasher.update(&header[0..32]);
        hasher.update(&body);
        let combined_hash = hasher.finalize();
        header[32..64].copy_from_slice(combined_hash.as_bytes());

        // Atomic write: write to a same-directory temp file, fsync, rename.
        let parent = path.parent().ok_or_else(|| {
            SpladeIndexPersistError::Io(std::io::Error::new(
                std::io::ErrorKind::InvalidInput,
                format!("SPLADE index path has no parent: {}", path.display()),
            ))
        })?;
        std::fs::create_dir_all(parent)?;

        // Audit PB-NEW-9: use `to_string_lossy()` instead of
        // `to_str().unwrap_or(...)` so non-UTF-8 path components produce a
        // unique-ish temp name rather than collapsing to a shared fallback
        // that could collide across concurrent saves.
        let file_name = path
            .file_name()
            .map(|s| s.to_string_lossy())
            .unwrap_or_else(|| "splade.index".into());
        // Randomized suffix so two concurrent saves don't clobber each other's
        // temp file. Same pattern as the HNSW save path.
        let suffix = crate::temp_suffix();
        let tmp_path = parent.join(format!(".{}.{:016x}.tmp", file_name, suffix));

        {
            let file = {
                #[cfg(unix)]
                {
                    use std::os::unix::fs::OpenOptionsExt;
                    std::fs::OpenOptions::new()
                        .write(true)
                        .create(true)
                        .truncate(true)
                        .mode(0o600)
                        .open(&tmp_path)?
                }
                #[cfg(not(unix))]
                {
                    std::fs::File::create(&tmp_path)?
                }
            };
            let mut writer = std::io::BufWriter::new(file);
            writer.write_all(&header)?;
            writer.write_all(&body)?;
            writer.flush()?;
            writer.get_ref().sync_all()?;
        }

        // Audit PB-NEW-3 / PB-NEW-4 / PB-NEW-5: atomic rename with
        // cross-device fallback and parent-dir fsync. The full sequence
        // (fsync tmp -> rename -> EXDEV fallback with fsync -> parent-dir
        // fsync) now lives in `cqs::fs::atomic_replace`; the previous
        // inline implementation was the fourth copy of this pattern and
        // two siblings had shipped without one of the fsync calls
        // (DS-V1.25-1, DS-V1.25-4).
        //
        // The BufWriter above already flushed and sync_all'd the tmp
        // file, but atomic_replace re-fsyncs the path it reopens — this
        // is effectively free compared to the write itself and matches
        // the invariant the helper advertises.
        crate::fs::atomic_replace(&tmp_path, path).map_err(|e| {
            // Best-effort cleanup of our own tmp on unexpected error
            // before returning.
            let _ = std::fs::remove_file(&tmp_path);
            SpladeIndexPersistError::Io(e)
        })?;

        tracing::info!(
            path = %path.display(),
            bytes = SPLADE_INDEX_HEADER_LEN + body.len(),
            "SPLADE index persisted"
        );
        Ok(())
    }

    /// Attempt to load a persisted index from `path`.
    ///
    /// If the file is missing the function returns `Ok(None)`. If the file
    /// exists but is unreadable, corrupt, or stale relative to
    /// `expected_generation`, returns an `Err` describing the reason; the
    /// caller is expected to fall back to rebuild-from-SQLite and re-persist.
    ///
    /// Safety guards (audit cluster):
    /// - RB-2: file size capped at `CQS_SPLADE_MAX_INDEX_BYTES` (default 2 GB)
    ///   before `read_to_end`, so an attacker or corruption can't trigger an
    ///   unbounded allocation
    /// - RB-1: blake3 hash covers header[0..32] + body, so any header bit
    ///   flip (not just body) is detected before `Vec::with_capacity` is
    ///   called on chunk_count / token_count
    /// - RM-4: orphan temp files from previous crashed saves are cleaned up
    ///   at the top of `load()`, mirroring the HNSW pattern
    /// - EH-4 / API-8: corrupt-data conditions route through the dedicated
    ///   `CorruptData` variant, and `ChecksumMismatch` carries `path` /
    ///   `expected` / `actual` hex fields instead of a unit variant
    pub fn load(
        path: &Path,
        expected_generation: u64,
    ) -> Result<Option<Self>, SpladeIndexPersistError> {
        let _span = tracing::info_span!(
            "splade_index_load",
            path = %path.display(),
            expected_generation,
        )
        .entered();

        // Audit RM-4: clean up orphan `.splade.index.bin.*.tmp` temp files
        // left by previous crashed saves, mirroring HNSW's cleanup loop.
        // Best-effort: errors are logged but don't fail the load.
        Self::cleanup_orphan_temp_files(path);

        // Audit RB-2: cap file size BEFORE read_to_end. Env override
        // `CQS_SPLADE_MAX_INDEX_BYTES` for cases where a genuine 2+ GB
        // index is expected (huge corpus with SPLADE-Code 0.6B).
        let metadata = match std::fs::metadata(path) {
            Ok(m) => m,
            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
                tracing::debug!("SPLADE index file absent, will rebuild");
                return Ok(None);
            }
            Err(e) => return Err(e.into()),
        };
        let file_size = metadata.len();
        let size_limit = splade_max_index_bytes();
        if file_size > size_limit {
            return Err(SpladeIndexPersistError::FileTooLarge {
                path: path.display().to_string(),
                size: file_size,
                limit: size_limit,
            });
        }
        if (file_size as usize) < SPLADE_INDEX_HEADER_LEN {
            return Err(SpladeIndexPersistError::Truncated(file_size));
        }

        let file = std::fs::File::open(path)?;
        let mut reader = std::io::BufReader::new(file);

        // Header.
        let mut header = [0u8; SPLADE_INDEX_HEADER_LEN];
        reader.read_exact(&mut header).map_err(|e| {
            if e.kind() == std::io::ErrorKind::UnexpectedEof {
                SpladeIndexPersistError::Truncated(0)
            } else {
                SpladeIndexPersistError::Io(e)
            }
        })?;

        if &header[0..4] != SPLADE_INDEX_MAGIC {
            return Err(SpladeIndexPersistError::BadMagic);
        }
        let version = u32::from_le_bytes(header[4..8].try_into().unwrap());
        if version != SPLADE_INDEX_VERSION {
            return Err(SpladeIndexPersistError::UnsupportedVersion(
                version,
                SPLADE_INDEX_VERSION,
            ));
        }
        let disk_generation = u64::from_le_bytes(header[8..16].try_into().unwrap());
        if disk_generation != expected_generation {
            return Err(SpladeIndexPersistError::GenerationMismatch {
                disk: disk_generation,
                store: expected_generation,
            });
        }
        let chunk_count = u64::from_le_bytes(header[16..24].try_into().unwrap());
        let token_count = u64::from_le_bytes(header[24..32].try_into().unwrap());
        let stored_hash: [u8; 32] = header[32..64].try_into().unwrap();

        // Audit PF-4: pre-allocate the body Vec from known file size. The
        // previous `Vec::new()` caused ~log₂(59MB) reallocations on a typical
        // SPLADE-Code 0.6B index, ~100ms of wasted memcpy per warm query.
        let body_len = (file_size as usize).saturating_sub(SPLADE_INDEX_HEADER_LEN);
        let mut body = Vec::with_capacity(body_len);
        reader.read_to_end(&mut body)?;

        // Audit RB-1: hash covers header[0..32] + body. Previously only the
        // body was hashed, so flipping a bit in `chunk_count` (header bytes
        // [16..24]) passed the integrity check and reached
        // `Vec::with_capacity(usize::MAX)` → process panic.
        let mut hasher = blake3::Hasher::new();
        hasher.update(&header[0..32]);
        hasher.update(&body);
        let actual_hash = hasher.finalize();
        if actual_hash.as_bytes() != &stored_hash {
            // Use blake3::Hash::to_hex for the expected hex encoding so we
            // don't pull in the `hex` crate just for this one call.
            let expected_hex = blake3::Hash::from_bytes(stored_hash).to_hex().to_string();
            return Err(SpladeIndexPersistError::ChecksumMismatch {
                path: path.display().to_string(),
                expected: expected_hex,
                actual: actual_hash.to_hex().to_string(),
            });
        }

        // Parse body. After the combined-header-hash check above, both
        // chunk_count and token_count are known to be authentic from the
        // author's perspective — but we still apply a loose sanity bound
        // to defend against pre-v1.22.0 files that were written under the
        // old unhashed-header scheme and may have been corrupted in that
        // window. Every chunk consumes >= 4 bytes for its length prefix
        // and every token entry consumes >= 8 bytes, so these are hard
        // upper bounds on feasible counts given the body length.
        let chunk_count_usize: usize = chunk_count.try_into().map_err(|_| {
            SpladeIndexPersistError::CorruptData(format!(
                "chunk_count {} does not fit in usize",
                chunk_count
            ))
        })?;
        if chunk_count_usize > body.len() / 4 {
            return Err(SpladeIndexPersistError::CorruptData(format!(
                "chunk_count {} exceeds feasible bound from body length {}",
                chunk_count_usize,
                body.len()
            )));
        }
        let mut id_map: Vec<String> = Vec::with_capacity(chunk_count_usize);
        let mut cursor: usize = 0;

        fn need(body: &[u8], cursor: usize, n: usize) -> Result<(), SpladeIndexPersistError> {
            if cursor.saturating_add(n) > body.len() {
                Err(SpladeIndexPersistError::Truncated(cursor as u64))
            } else {
                Ok(())
            }
        }

        for _ in 0..chunk_count_usize {
            need(&body, cursor, 4)?;
            let len = u32::from_le_bytes(body[cursor..cursor + 4].try_into().unwrap()) as usize;
            cursor += 4;
            need(&body, cursor, len)?;
            // Audit PF-5: `.to_string()` here allocates an owned String from
            // a `&str` borrow into `body`. This is inherent — `id_map` owns
            // its strings and the source is a transient byte-slice reference.
            // `String::from` would be equivalent; there is no zero-copy path
            // because `body` is dropped after parsing completes.
            let id = std::str::from_utf8(&body[cursor..cursor + len])
                .map_err(|e| {
                    SpladeIndexPersistError::CorruptData(format!(
                        "chunk id is not valid utf-8: {}",
                        e
                    ))
                })?
                .to_string();
            cursor += len;
            id_map.push(id);
        }

        let token_count_usize: usize = token_count.try_into().map_err(|_| {
            SpladeIndexPersistError::CorruptData(format!(
                "token_count {} does not fit in usize",
                token_count
            ))
        })?;
        if token_count_usize > body.len() / 8 {
            return Err(SpladeIndexPersistError::CorruptData(format!(
                "token_count {} exceeds feasible bound from body length {}",
                token_count_usize,
                body.len()
            )));
        }
        let mut postings: HashMap<u32, Vec<(usize, f32)>> =
            HashMap::with_capacity(token_count_usize);

        for _ in 0..token_count_usize {
            need(&body, cursor, 8)?;
            let token_id = u32::from_le_bytes(body[cursor..cursor + 4].try_into().unwrap());
            cursor += 4;
            let posting_count =
                u32::from_le_bytes(body[cursor..cursor + 4].try_into().unwrap()) as usize;
            cursor += 4;
            need(&body, cursor, posting_count.saturating_mul(8))?;
            let mut postings_for_token: Vec<(usize, f32)> = Vec::with_capacity(posting_count);
            for _ in 0..posting_count {
                let chunk_idx =
                    u32::from_le_bytes(body[cursor..cursor + 4].try_into().unwrap()) as usize;
                cursor += 4;
                let weight = f32::from_le_bytes(body[cursor..cursor + 4].try_into().unwrap());
                cursor += 4;
                if chunk_idx >= id_map.len() {
                    return Err(SpladeIndexPersistError::CorruptData(format!(
                        "posting chunk_idx {} out of bounds for id_map len {}",
                        chunk_idx,
                        id_map.len()
                    )));
                }
                postings_for_token.push((chunk_idx, weight));
            }
            postings.insert(token_id, postings_for_token);
        }

        if cursor != body.len() {
            tracing::warn!(
                parsed = cursor,
                body_len = body.len(),
                "SPLADE index body has trailing bytes after parse — tolerating but format may be wrong"
            );
        }

        tracing::info!(
            chunks = id_map.len(),
            tokens = postings.len(),
            "SPLADE index loaded from disk"
        );
        Ok(Some(Self { postings, id_map }))
    }

    /// Audit RM-4: clean up `.splade.index.bin.*.tmp` orphan files left by
    /// crashed saves. Mirrors the HNSW cleanup at `hnsw/persist.rs:498-510`.
    /// Best-effort — errors are logged and not propagated, because a
    /// leftover tmp file is annoying but not fatal.
    fn cleanup_orphan_temp_files(path: &Path) {
        let parent = match path.parent() {
            Some(p) => p,
            None => return,
        };
        let target_name = match path.file_name().and_then(|s| s.to_str()) {
            Some(n) => n,
            None => return,
        };
        // Temp files are named `.<target>.<hex>.tmp`.
        let prefix = format!(".{}.", target_name);
        let entries = match std::fs::read_dir(parent) {
            Ok(e) => e,
            Err(e) => {
                tracing::debug!(
                    error = %e,
                    parent = %parent.display(),
                    "read_dir for orphan cleanup failed, skipping"
                );
                return;
            }
        };
        for entry in entries.flatten() {
            let name = match entry.file_name().into_string() {
                Ok(n) => n,
                Err(_) => continue, // non-utf8 filename, leave it alone
            };
            if name.starts_with(&prefix) && name.ends_with(".tmp") {
                match std::fs::remove_file(entry.path()) {
                    Ok(_) => tracing::debug!(
                        orphan = %name,
                        "Removed orphan SPLADE temp file"
                    ),
                    Err(e) => tracing::debug!(
                        error = %e,
                        orphan = %name,
                        "Failed to remove orphan SPLADE temp file"
                    ),
                }
            }
        }
    }

    /// Convenience: load from disk if present and matching; otherwise build
    /// from the provided SQLite rows and persist. Returns the index and a
    /// flag indicating whether a rebuild happened.
    ///
    /// The caller is responsible for reading `expected_generation` from the
    /// store and passing the path next to the rest of the index files.
    pub fn load_or_build(
        path: &Path,
        expected_generation: u64,
        rows: impl FnOnce() -> Vec<(String, SparseVector)>,
    ) -> (Self, bool) {
        match Self::load(path, expected_generation) {
            Ok(Some(idx)) => return (idx, false),
            Ok(None) => {
                tracing::debug!("SPLADE index not on disk, building from store");
            }
            Err(e) => {
                tracing::warn!(
                    error = %e,
                    "SPLADE index on-disk load failed, rebuilding from store"
                );
            }
        }
        let vectors = rows();
        let idx = Self::build(vectors);
        // Best-effort persist; failure is logged and tolerated so search can
        // still proceed on the freshly-built in-memory index. Skip if the
        // index is empty — persisting an empty index creates a stub file
        // that gets reloaded as "no vectors" on next invocation, which is
        // correct but clutters the directory.
        if !idx.is_empty() {
            if let Err(e) = idx.save(path, expected_generation) {
                tracing::warn!(
                    path = %path.display(),
                    error = %e,
                    "SPLADE index persist failed, continuing with in-memory index only"
                );
            }
        }
        (idx, true)
    }

    /// Rough upper bound on the serialized body size so `save()` can allocate
    /// once. 4 bytes per chunk header + average id length (~60) +
    /// 8 bytes per posting + 8 bytes per token header.
    fn estimate_body_size(n_chunks: usize, n_postings: usize) -> usize {
        let id_estimate = n_chunks * (4 + 64);
        let postings_estimate = n_postings * 8 + n_chunks * 8;
        id_estimate + postings_estimate
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn make_test_index() -> SpladeIndex {
        SpladeIndex::build(vec![
            ("chunk_a".to_string(), vec![(1, 0.5), (2, 0.3), (3, 0.8)]),
            ("chunk_b".to_string(), vec![(1, 0.7), (4, 0.6)]),
            ("chunk_c".to_string(), vec![(2, 0.9), (3, 0.1), (5, 0.4)]),
        ])
    }

    #[test]
    fn test_build_empty() {
        let index = SpladeIndex::build(vec![]);
        assert!(index.is_empty());
        assert_eq!(index.unique_tokens(), 0);
    }

    #[test]
    fn test_build_and_search() {
        let index = make_test_index();
        assert_eq!(index.len(), 3);

        // Query that matches token 1 (in chunk_a and chunk_b)
        let results = index.search(&vec![(1, 1.0)], 10);
        assert!(!results.is_empty());
        // chunk_b has weight 0.7 for token 1, chunk_a has 0.5
        assert_eq!(results[0].id, "chunk_b");
        assert_eq!(results[1].id, "chunk_a");
    }

    #[test]
    fn test_dot_product_correct() {
        let index = make_test_index();
        // Query: token 1 (w=1.0) + token 2 (w=1.0)
        // chunk_a: 1*0.5 + 1*0.3 = 0.8
        // chunk_b: 1*0.7 + 0 = 0.7
        // chunk_c: 0 + 1*0.9 = 0.9
        let results = index.search(&vec![(1, 1.0), (2, 1.0)], 10);
        assert_eq!(results[0].id, "chunk_c"); // 0.9
        assert!((results[0].score - 0.9).abs() < 1e-5);
        assert_eq!(results[1].id, "chunk_a"); // 0.8
        assert!((results[1].score - 0.8).abs() < 1e-5);
        assert_eq!(results[2].id, "chunk_b"); // 0.7
        assert!((results[2].score - 0.7).abs() < 1e-5);
    }

    #[test]
    fn test_search_filter() {
        let index = make_test_index();
        // Filter: only chunk_a
        let results = index.search_with_filter(&vec![(1, 1.0)], 10, &|id: &str| id == "chunk_a");
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].id, "chunk_a");
    }

    #[test]
    fn test_search_no_match() {
        let index = make_test_index();
        // Query with token not in index
        let results = index.search(&vec![(999, 1.0)], 10);
        assert!(results.is_empty());
    }

    #[test]
    fn test_search_empty_query() {
        let index = make_test_index();
        let results = index.search(&vec![], 10);
        assert!(results.is_empty());
    }

    #[test]
    fn test_search_respects_k() {
        let index = make_test_index();
        let results = index.search(&vec![(1, 1.0), (2, 1.0), (3, 1.0)], 2);
        assert_eq!(results.len(), 2);
    }

    #[test]
    fn test_persist_roundtrip() {
        let dir = tempfile::TempDir::new().unwrap();
        let path = dir.path().join("splade.index.bin");
        let original = make_test_index();

        original.save(&path, 42).unwrap();
        let loaded = SpladeIndex::load(&path, 42).unwrap().unwrap();

        // Structural equivalence: id_map order + postings content.
        assert_eq!(loaded.id_map, original.id_map);
        assert_eq!(loaded.postings.len(), original.postings.len());
        for (token_id, postings) in &original.postings {
            let loaded_postings = loaded.postings.get(token_id).unwrap();
            assert_eq!(loaded_postings.len(), postings.len());
            // Each posting list is order-preserved within save/load so we can
            // compare element-wise.
            for (a, b) in loaded_postings.iter().zip(postings.iter()) {
                assert_eq!(a.0, b.0);
                assert!((a.1 - b.1).abs() < f32::EPSILON);
            }
        }

        // Query parity: running the same search on loaded vs original yields
        // identical results in both order and score.
        let q = vec![(1u32, 1.0f32), (2, 0.5)];
        let r_orig = original.search(&q, 10);
        let r_load = loaded.search(&q, 10);
        assert_eq!(r_orig.len(), r_load.len());
        for (a, b) in r_orig.iter().zip(r_load.iter()) {
            assert_eq!(a.id, b.id);
            assert!((a.score - b.score).abs() < f32::EPSILON);
        }
    }

    #[test]
    fn test_persist_generation_mismatch_rejected() {
        let dir = tempfile::TempDir::new().unwrap();
        let path = dir.path().join("splade.index.bin");
        let original = make_test_index();
        original.save(&path, 7).unwrap();

        match SpladeIndex::load(&path, 8) {
            Err(SpladeIndexPersistError::GenerationMismatch { disk, store }) => {
                assert_eq!(disk, 7);
                assert_eq!(store, 8);
            }
            Ok(_) => panic!("expected GenerationMismatch, got Ok"),
            Err(e) => panic!("expected GenerationMismatch, got {}", e),
        }

        // And a matching generation still loads.
        let reloaded = SpladeIndex::load(&path, 7).unwrap();
        assert!(reloaded.is_some());
    }

    #[test]
    fn test_persist_bad_magic_rejected() {
        let dir = tempfile::TempDir::new().unwrap();
        let path = dir.path().join("splade.index.bin");
        std::fs::write(&path, vec![0u8; SPLADE_INDEX_HEADER_LEN + 16]).unwrap();

        match SpladeIndex::load(&path, 0) {
            Err(SpladeIndexPersistError::BadMagic) => {}
            Ok(_) => panic!("expected BadMagic, got Ok"),
            Err(e) => panic!("expected BadMagic, got {}", e),
        }
    }

    #[test]
    fn test_persist_corrupt_body_rejected() {
        let dir = tempfile::TempDir::new().unwrap();
        let path = dir.path().join("splade.index.bin");
        let original = make_test_index();
        original.save(&path, 1).unwrap();

        // Flip a byte in the body (past the header).
        let mut bytes = std::fs::read(&path).unwrap();
        let target = SPLADE_INDEX_HEADER_LEN + 4;
        bytes[target] ^= 0xFF;
        std::fs::write(&path, &bytes).unwrap();

        match SpladeIndex::load(&path, 1) {
            Err(SpladeIndexPersistError::ChecksumMismatch { .. }) => {}
            Ok(_) => panic!("expected ChecksumMismatch, got Ok"),
            Err(e) => panic!("expected ChecksumMismatch, got {}", e),
        }
    }

    #[test]
    fn test_persist_missing_file_returns_none() {
        let dir = tempfile::TempDir::new().unwrap();
        let path = dir.path().join("does-not-exist.bin");
        let result = SpladeIndex::load(&path, 0).unwrap();
        assert!(result.is_none());
    }

    #[test]
    fn test_load_or_build_persists_on_first_call() {
        let dir = tempfile::TempDir::new().unwrap();
        let path = dir.path().join("splade.index.bin");

        // First call: no file exists, should build and persist.
        let (_idx1, rebuilt1) = SpladeIndex::load_or_build(&path, 5, || {
            vec![
                ("chunk_a".to_string(), vec![(1u32, 0.5f32)]),
                ("chunk_b".to_string(), vec![(1, 0.3), (2, 0.9)]),
            ]
        });
        assert!(rebuilt1, "first call should rebuild");
        assert!(path.exists(), "first call should persist the file");

        // Second call: file exists with matching generation, should load.
        let (idx2, rebuilt2) = SpladeIndex::load_or_build(&path, 5, || {
            panic!("closure should not run when the file is reusable")
        });
        assert!(!rebuilt2, "second call should load from disk");
        assert_eq!(idx2.len(), 2);

        // Third call with bumped generation: should rebuild from the closure.
        let rebuilt_called = std::sync::atomic::AtomicBool::new(false);
        let (_idx3, rebuilt3) = SpladeIndex::load_or_build(&path, 6, || {
            rebuilt_called.store(true, std::sync::atomic::Ordering::SeqCst);
            vec![("chunk_c".to_string(), vec![(3u32, 0.7f32)])]
        });
        assert!(rebuilt3);
        assert!(rebuilt_called.load(std::sync::atomic::Ordering::SeqCst));
    }
}