rgx/
index.rs

1//! The in-memory candidate index: trigram -> set of file IDs, plus a file table.
2//!
3//! This is the "narrow the candidate set" half of rgx. Building, querying, incremental update, and
4//! persistence are kept separable from the walk and from ripgrep so each can be tested in isolation.
5//!
6//! Incremental updates are deliberately simple and lean on the confirm step for correctness: when a
7//! file changes we add its *current* trigrams (so no new-trigram query can miss it); trigrams that
8//! the file no longer contains linger in their posting lists, which only ever produces an extra
9//! candidate that ripgrep then filters out. Deleted files are tombstoned (`live = false`) so they
10//! stop being candidates. See `docs/index-and-storage.md` sections 3.1 and 5.
11
12use std::io::{Read, Write};
13use std::path::{Path, PathBuf};
14use std::sync::Mutex;
15use std::sync::atomic::{AtomicUsize, Ordering};
16use std::time::UNIX_EPOCH;
17
18use anyhow::{Context, Result, bail};
19use ignore::{WalkBuilder, WalkState};
20use rayon::prelude::*;
21use roaring::RoaringBitmap;
22use rustc_hash::{FxHashMap, FxHashSet};
23
24use crate::query::Query;
25use crate::trigram::{self, Trigram};
26
27/// Snapshot magic + format version. Bump to invalidate (load returns an error -> caller rebuilds).
28const SNAPSHOT_MAGIC: &[u8; 8] = b"RGXIDX01";
29
30/// Bytes from the start of a file scanned for a NUL to decide "binary from the start". Conservative:
31/// if a NUL is in here, ripgrep also treats the file as binary and prints nothing for it in a
32/// recursive search, so giving it no postings is sound. Deeper NULs are handled by the confirm
33/// step. See `docs/index-and-storage.md` section 3.1.
34const BINARY_SNIFF_BYTES: usize = 1024;
35
36/// A file known to the index. `live` is false once the file is deleted (tombstoned).
37struct FileEntry {
38    path: PathBuf,
39    size: u64,
40    mtime_ns: u64,
41    live: bool,
42}
43
44#[derive(Default)]
45pub struct Index {
46    entries: Vec<FileEntry>,
47    path_to_id: FxHashMap<PathBuf, u32>,
48    postings: FxHashMap<u32, RoaringBitmap>,
49}
50
51impl Index {
52    /// Number of live (non-deleted) files known to the index.
53    pub fn file_count(&self) -> usize {
54        self.entries.iter().filter(|e| e.live).count()
55    }
56
57    pub fn trigram_count(&self) -> usize {
58        self.postings.len()
59    }
60
61    /// Build an index over `root`, honoring the same ignore rules ripgrep uses by default.
62    pub fn build(root: impl AsRef<Path>) -> Index {
63        let paths = walk_files(root.as_ref());
64        Self::from_paths(&paths, &AtomicUsize::new(0))
65    }
66
67    /// Build an index from an already-walked path set, bumping `progress` once per file processed so
68    /// a watcher can show a climbing count during a cold build.
69    pub fn from_paths(paths: &[PathBuf], progress: &AtomicUsize) -> Index {
70        let (metas, postings) = index_files(paths, progress);
71        let entries: Vec<FileEntry> = paths
72            .iter()
73            .cloned()
74            .zip(metas)
75            .map(|(path, (size, mtime_ns))| FileEntry {
76                path,
77                size,
78                mtime_ns,
79                live: true,
80            })
81            .collect();
82        let path_to_id = entries
83            .iter()
84            .enumerate()
85            .map(|(id, e)| (e.path.clone(), id as u32))
86            .collect();
87        Index {
88            entries,
89            path_to_id,
90            postings,
91        }
92    }
93
94    /// Apply a set of changed and removed paths to the resident index (incremental update).
95    pub fn apply_changes(&mut self, changed: &[PathBuf], removed: &[PathBuf]) {
96        for path in removed {
97            if let Some(&id) = self.path_to_id.get(path) {
98                self.entries[id as usize].live = false;
99            }
100        }
101        let mut seen = FxHashSet::default();
102        for path in changed {
103            // Stat BEFORE read: if the file is rewritten between our read and stat, we must record an
104            // mtime no newer than the bytes we indexed, so the next reconcile still sees a change and
105            // re-indexes. Stat-after-read could store the new mtime over old trigrams and miss it.
106            let (size, mtime_ns) = stat(path);
107            let Ok(bytes) = std::fs::read(path) else {
108                // Unreadable now (perhaps deleted between event and handling): tombstone if known.
109                if let Some(&id) = self.path_to_id.get(path) {
110                    self.entries[id as usize].live = false;
111                }
112                continue;
113            };
114            let id = self.intern(path, size, mtime_ns);
115            if collect_trigrams(&bytes, &mut seen) {
116                for &t in &seen {
117                    self.postings
118                        .entry(trigram::pack(t))
119                        .or_default()
120                        .insert(id);
121                }
122            }
123        }
124    }
125
126    /// Reconcile the index against the current tree by `(size, mtime)`: index new/changed files and
127    /// tombstone vanished ones. Used at daemon startup to catch changes made while it was down.
128    /// Returns the number of files re-indexed plus removed.
129    pub fn reconcile(&mut self, root: impl AsRef<Path>) -> usize {
130        let walked = walk_files(root.as_ref());
131        let walked_set: rustc_hash::FxHashSet<&Path> = walked.iter().map(|p| p.as_path()).collect();
132        let mut changed = Vec::new();
133        for p in &walked {
134            match self.path_to_id.get(p) {
135                None => changed.push(p.clone()),
136                Some(&id) => {
137                    let e = &self.entries[id as usize];
138                    let (size, mtime_ns) = stat(p);
139                    if !e.live || e.size != size || e.mtime_ns != mtime_ns {
140                        changed.push(p.clone());
141                    }
142                }
143            }
144        }
145        let removed: Vec<PathBuf> = self
146            .entries
147            .iter()
148            .filter(|e| e.live && !walked_set.contains(e.path.as_path()))
149            .map(|e| e.path.clone())
150            .collect();
151        let n = changed.len() + removed.len();
152        self.apply_changes(&changed, &removed);
153        n
154    }
155
156    /// Get the id for `path`, creating a live entry if new; refresh metadata and revive if known.
157    fn intern(&mut self, path: &Path, size: u64, mtime_ns: u64) -> u32 {
158        if let Some(&id) = self.path_to_id.get(path) {
159            let e = &mut self.entries[id as usize];
160            e.size = size;
161            e.mtime_ns = mtime_ns;
162            e.live = true;
163            return id;
164        }
165        let id = self.entries.len() as u32;
166        self.entries.push(FileEntry {
167            path: path.to_path_buf(),
168            size,
169            mtime_ns,
170            live: true,
171        });
172        self.path_to_id.insert(path.to_path_buf(), id);
173        id
174    }
175
176    /// Resolve a trigram query to candidate files (a sound superset of the real matches). A fallback
177    /// query (no usable trigram) simply makes *every* live file a candidate — there is no separate
178    /// "scan the tree" path; ripgrep then confirms over whatever this returns.
179    pub fn candidates(&self, query: &Query) -> Vec<&Path> {
180        let bitmap = self.eval(query);
181        bitmap
182            .iter()
183            .filter(|&id| self.entries[id as usize].live)
184            .map(|id| self.entries[id as usize].path.as_path())
185            .collect()
186    }
187
188    /// File/dir name lookup (fd/find-style): live paths whose string contains `needle`. Returns the
189    /// keyset page (paths whose string is strictly greater than `after`, capped at `limit`), the total
190    /// number of matches, and `start` (matches skipped before this page) so callers can report an
191    /// honest "X-Y of total". Sorted by path string — the same order callers keyset on — so paging is
192    /// deterministic and never skips or repeats.
193    pub fn find(
194        &self,
195        needle: &str,
196        after: Option<&str>,
197        limit: usize,
198    ) -> (Vec<&Path>, usize, usize) {
199        let mut hits: Vec<&Path> = self
200            .entries
201            .iter()
202            .filter(|e| e.live && e.path.to_string_lossy().contains(needle))
203            .map(|e| e.path.as_path())
204            .collect();
205        // Sort by the path string (the order callers keyset on). `sort_by_cached_key` decodes each
206        // path to a String once (O(n)), versus a comparator that would re-decode both sides on every
207        // one of the O(n log n) comparisons.
208        hits.sort_by_cached_key(|p| p.to_string_lossy().into_owned());
209        let total = hits.len();
210        let start = after.map_or(0, |a| {
211            hits.partition_point(|p| p.to_string_lossy().as_ref() <= a)
212        });
213        hits.drain(..start);
214        hits.truncate(limit);
215        (hits, total, start)
216    }
217
218    /// Approximate resident size of the posting lists (serialized roaring bytes), for status.
219    pub fn memory_bytes(&self) -> u64 {
220        self.postings
221            .values()
222            .map(|b| b.serialized_size() as u64)
223            .sum()
224    }
225
226    /// Evaluate a query to the set of matching file IDs via roaring set algebra. A fallback
227    /// (`Query::All`) yields every file id.
228    fn eval(&self, query: &Query) -> RoaringBitmap {
229        match query {
230            Query::All => self.all_ids(),
231            Query::Tri(t) => self
232                .postings
233                .get(&trigram::pack(*t))
234                .cloned()
235                .unwrap_or_default(),
236            // An empty AND is the identity "match all", not "match nothing" — guard the soundness
237            // invariant even though `Query::for_pattern` never builds an empty And/Or today.
238            Query::And(qs) => qs
239                .iter()
240                .map(|q| self.eval(q))
241                .reduce(|a, b| a & b)
242                .unwrap_or_else(|| self.all_ids()),
243            Query::Or(qs) => qs
244                .iter()
245                .map(|q| self.eval(q))
246                .reduce(|a, b| a | b)
247                .unwrap_or_default(),
248        }
249    }
250
251    fn all_ids(&self) -> RoaringBitmap {
252        (0..self.entries.len() as u32).collect()
253    }
254
255    /// Serialize the index to `path` (atomic via temp file + rename).
256    pub fn save(&self, path: impl AsRef<Path>) -> Result<()> {
257        let path = path.as_ref();
258        if let Some(dir) = path.parent() {
259            std::fs::create_dir_all(dir).ok();
260        }
261        let tmp = path.with_extension("tmp");
262        let mut w = std::io::BufWriter::new(std::fs::File::create(&tmp)?);
263        w.write_all(SNAPSHOT_MAGIC)?;
264        write_u64(&mut w, self.entries.len() as u64)?;
265        for e in &self.entries {
266            w.write_all(&[e.live as u8])?;
267            write_u64(&mut w, e.size)?;
268            write_u64(&mut w, e.mtime_ns)?;
269            let pb = path_to_bytes(&e.path);
270            write_u32(&mut w, pb.len() as u32)?;
271            w.write_all(&pb)?;
272        }
273        write_u64(&mut w, self.postings.len() as u64)?;
274        let mut buf = Vec::new();
275        for (&key, bm) in &self.postings {
276            write_u32(&mut w, key)?;
277            buf.clear();
278            bm.serialize_into(&mut buf)?;
279            write_u32(&mut w, buf.len() as u32)?;
280            w.write_all(&buf)?;
281        }
282        w.flush()?;
283        drop(w);
284        std::fs::rename(&tmp, path).context("rename snapshot into place")?;
285        Ok(())
286    }
287
288    /// Load an index snapshot from `path`. Errors (incl. version mismatch) signal "rebuild".
289    pub fn load(path: impl AsRef<Path>) -> Result<Index> {
290        let mut r = std::io::BufReader::new(std::fs::File::open(path)?);
291        let mut magic = [0u8; 8];
292        r.read_exact(&mut magic)?;
293        if &magic != SNAPSHOT_MAGIC {
294            bail!("snapshot version mismatch");
295        }
296        let n = read_u64(&mut r)? as usize;
297        let mut entries = Vec::with_capacity(n);
298        let mut path_to_id = FxHashMap::default();
299        for id in 0..n {
300            let mut live = [0u8; 1];
301            r.read_exact(&mut live)?;
302            let size = read_u64(&mut r)?;
303            let mtime_ns = read_u64(&mut r)?;
304            let plen = read_u32(&mut r)? as usize;
305            let mut pb = vec![0u8; plen];
306            r.read_exact(&mut pb)?;
307            let path = path_from_bytes(&pb);
308            path_to_id.insert(path.clone(), id as u32);
309            entries.push(FileEntry {
310                path,
311                size,
312                mtime_ns,
313                live: live[0] != 0,
314            });
315        }
316        let np = read_u64(&mut r)? as usize;
317        let n_entries = entries.len() as u32;
318        let mut postings = FxHashMap::default();
319        for _ in 0..np {
320            let key = read_u32(&mut r)?;
321            let blen = read_u32(&mut r)? as usize;
322            let mut bb = vec![0u8; blen];
323            r.read_exact(&mut bb)?;
324            let bm = RoaringBitmap::deserialize_from(&bb[..])?;
325            // A posting referencing a file id past the entries table means a corrupt or foreign
326            // snapshot; reject it so a query can't panic indexing `entries[id]` (caller rebuilds).
327            if bm.max().is_some_and(|m| m >= n_entries) {
328                bail!("snapshot posting references out-of-range file id");
329            }
330            postings.insert(key, bm);
331        }
332        Ok(Index {
333            entries,
334            path_to_id,
335            postings,
336        })
337    }
338}
339
340/// Encode a path for the snapshot. The snapshot is a per-machine rebuildable cache, so each platform
341/// only needs to round-trip its own paths: Unix uses the raw OS bytes (zero-copy, unchanged); other
342/// platforms use UTF-8 (lossy for the rare non-Unicode path, which the next rebuild repairs).
343#[cfg(unix)]
344fn path_to_bytes(p: &Path) -> std::borrow::Cow<'_, [u8]> {
345    use std::os::unix::ffi::OsStrExt;
346    std::borrow::Cow::Borrowed(p.as_os_str().as_bytes())
347}
348#[cfg(not(unix))]
349fn path_to_bytes(p: &Path) -> std::borrow::Cow<'_, [u8]> {
350    std::borrow::Cow::Owned(p.to_string_lossy().into_owned().into_bytes())
351}
352
353#[cfg(unix)]
354fn path_from_bytes(b: &[u8]) -> PathBuf {
355    use std::os::unix::ffi::OsStrExt;
356    PathBuf::from(std::ffi::OsStr::from_bytes(b))
357}
358#[cfg(not(unix))]
359fn path_from_bytes(b: &[u8]) -> PathBuf {
360    PathBuf::from(String::from_utf8_lossy(b).into_owned())
361}
362
363fn write_u32(w: &mut impl Write, v: u32) -> std::io::Result<()> {
364    w.write_all(&v.to_le_bytes())
365}
366fn write_u64(w: &mut impl Write, v: u64) -> std::io::Result<()> {
367    w.write_all(&v.to_le_bytes())
368}
369fn read_u32(r: &mut impl Read) -> std::io::Result<u32> {
370    let mut b = [0u8; 4];
371    r.read_exact(&mut b)?;
372    Ok(u32::from_le_bytes(b))
373}
374fn read_u64(r: &mut impl Read) -> std::io::Result<u64> {
375    let mut b = [0u8; 8];
376    r.read_exact(&mut b)?;
377    Ok(u64::from_le_bytes(b))
378}
379
380/// A `WalkBuilder` that enumerates exactly the files `rg` would under `root`. The crate defaults
381/// already match ripgrep (skip hidden; honor `.gitignore`/`.ignore`/`.git/info/exclude` and the
382/// global gitignore; read parent ignores; don't follow symlinks); the one thing the `rg` binary adds
383/// on top of the crate is the `.rgignore` custom ignore name. Both the index walk and the `full_scan`
384/// fallback go through here so they can't drift from `rg` — or from each other. See
385/// `docs/indexing.md` (What the walk includes).
386pub fn walk_builder(root: &Path) -> WalkBuilder {
387    let mut b = WalkBuilder::new(root);
388    b.add_custom_ignore_filename(".rgignore");
389    b
390}
391
392/// Collect the files ripgrep would search under `root`, sorted so file IDs are deterministic.
393pub fn walk_files(root: &Path) -> Vec<PathBuf> {
394    let found = Mutex::new(Vec::<PathBuf>::new());
395    walk_builder(root).build_parallel().run(|| {
396        let found = &found;
397        Box::new(move |res| {
398            if let Ok(entry) = res
399                && entry.file_type().is_some_and(|t| t.is_file())
400            {
401                found.lock().unwrap().push(entry.into_path());
402            }
403            WalkState::Continue
404        })
405    });
406    let mut paths = found.into_inner().unwrap();
407    paths.sort();
408    paths
409}
410
411/// Read each file once: collect (size, mtime) per file (ordered) and accumulate distinct trigrams
412/// into sharded posting lists in parallel. `progress` is bumped once per file (for watchers).
413fn index_files(
414    paths: &[PathBuf],
415    progress: &AtomicUsize,
416) -> (Vec<(u64, u64)>, FxHashMap<u32, RoaringBitmap>) {
417    const SHARDS: usize = 256;
418    const TRIGRAM_WORDS: usize = (1 << 24) / 64; // one bit per possible 24-bit trigram
419    // Shared posting maps, sharded so workers rarely contend a lock. This keeps peak memory near the
420    // final index size (~one compressed copy), unlike per-worker maps which would multiply it by the
421    // worker count.
422    let shards: Vec<Mutex<FxHashMap<u32, RoaringBitmap>>> = (0..SHARDS)
423        .map(|_| Mutex::new(FxHashMap::default()))
424        .collect();
425
426    // Per-worker scratch reused across files: a sparse bitset that dedups a file's trigrams by
427    // bit-test (24-bit keys are dense, so this beats hashing), the distinct keys, and the per-shard
428    // grouping buffers. The bitset is cleared via the distinct list, so clearing is O(set bits).
429    let init = || {
430        (
431            vec![0u64; TRIGRAM_WORDS],
432            Vec::<u32>::new(),
433            vec![Vec::<u32>::new(); SHARDS],
434        )
435    };
436    let metas: Vec<(u64, u64)> = paths
437        .par_iter()
438        .enumerate()
439        .map_init(init, |(bits, distinct, by_shard), (id, path)| {
440            progress.fetch_add(1, Ordering::Relaxed);
441            let id = id as u32;
442            let (size, mtime_ns) = stat(path);
443            if let Ok(bytes) = std::fs::read(path)
444                && !is_binary_from_start(&bytes)
445            {
446                distinct.clear();
447                trigram::for_each(&bytes, |t| {
448                    let key = trigram::pack(t);
449                    let (w, b) = ((key >> 6) as usize, key & 63);
450                    if bits[w] & (1u64 << b) == 0 {
451                        bits[w] |= 1u64 << b;
452                        distinct.push(key);
453                    }
454                });
455                by_shard.iter_mut().for_each(Vec::clear);
456                for &key in distinct.iter() {
457                    by_shard[(key as usize) & (SHARDS - 1)].push(key);
458                }
459                for (s, keys) in by_shard.iter().enumerate() {
460                    if keys.is_empty() {
461                        continue;
462                    }
463                    let mut g = shards[s].lock().unwrap();
464                    for &key in keys {
465                        g.entry(key).or_default().insert(id);
466                    }
467                }
468                for &key in distinct.iter() {
469                    bits[(key >> 6) as usize] &= !(1u64 << (key & 63));
470                }
471            }
472            (size, mtime_ns)
473        })
474        .collect();
475
476    let mut merged = FxHashMap::default();
477    for shard in shards {
478        merged.extend(shard.into_inner().unwrap());
479    }
480    (metas, merged)
481}
482
483fn stat(path: &Path) -> (u64, u64) {
484    match std::fs::metadata(path) {
485        Ok(m) => {
486            let mtime = m
487                .modified()
488                .ok()
489                .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
490                .map(|d| d.as_nanos() as u64)
491                .unwrap_or(0);
492            (m.len(), mtime)
493        }
494        Err(_) => (0, 0),
495    }
496}
497
498/// True if a NUL appears within the conservative sniff window — ripgrep suppresses such files
499/// entirely in a recursive search, so giving them no postings is sound (see [`BINARY_SNIFF_BYTES`]).
500fn is_binary_from_start(bytes: &[u8]) -> bool {
501    memchr::memchr(0, &bytes[..bytes.len().min(BINARY_SNIFF_BYTES)]).is_some()
502}
503
504/// Fill `seen` with the distinct trigrams of `bytes`, returning whether the file should be indexed.
505/// Binary-from-start files get no postings (the one place this decision is made, shared by the cold
506/// build and incremental updates so they can't drift).
507fn collect_trigrams(bytes: &[u8], seen: &mut FxHashSet<Trigram>) -> bool {
508    if is_binary_from_start(bytes) {
509        return false;
510    }
511    seen.clear();
512    trigram::for_each(bytes, |t| {
513        seen.insert(t);
514    });
515    true
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521    use crate::query::Options;
522
523    fn write(dir: &Path, name: &str, content: &[u8]) {
524        std::fs::write(dir.join(name), content).unwrap();
525    }
526
527    fn names(c: Vec<&Path>) -> Vec<String> {
528        let mut v: Vec<String> = c
529            .iter()
530            .map(|p| p.file_name().unwrap().to_string_lossy().into_owned())
531            .collect();
532        v.sort();
533        v
534    }
535
536    #[test]
537    fn build_candidates_and_binary_skip() {
538        let tmp = std::env::temp_dir().join(format!("rgx_idx_{}", std::process::id()));
539        let _ = std::fs::remove_dir_all(&tmp);
540        std::fs::create_dir_all(&tmp).unwrap();
541        write(&tmp, "a.txt", b"the quick brown fox\nNEEDLE here\n");
542        write(&tmp, "b.txt", b"no match in this file at all\n");
543        write(&tmp, "c.txt", b"another NEEDLE appears\n");
544        write(&tmp, "bin.dat", b"\x00\x00NEEDLE inside binary\x00");
545
546        let idx = Index::build(&tmp);
547        assert_eq!(idx.file_count(), 4);
548        let q = Query::for_pattern("NEEDLE", Options::default());
549        let n = names(idx.candidates(&q));
550        assert!(n.contains(&"a.txt".to_string()) && n.contains(&"c.txt".to_string()));
551        assert!(!n.contains(&"b.txt".to_string()) && !n.contains(&"bin.dat".to_string()));
552
553        // A fallback query (no usable trigram) makes every live file a candidate, incl. bin.dat.
554        let fb = names(idx.candidates(&Query::for_pattern("a.", Options::default())));
555        assert_eq!(fb, vec!["a.txt", "b.txt", "bin.dat", "c.txt"]);
556        let _ = std::fs::remove_dir_all(&tmp);
557    }
558
559    #[test]
560    fn incremental_add_change_remove() {
561        let tmp = std::env::temp_dir().join(format!("rgx_inc_{}", std::process::id()));
562        let _ = std::fs::remove_dir_all(&tmp);
563        std::fs::create_dir_all(&tmp).unwrap();
564        write(&tmp, "a.txt", b"WIDGET alpha\n");
565        let mut idx = Index::build(&tmp);
566        let q = Query::for_pattern("WIDGET", Options::default());
567        assert_eq!(names(idx.candidates(&q)), vec!["a.txt"]);
568
569        // Add a new file containing WIDGET.
570        write(&tmp, "b.txt", b"WIDGET beta\n");
571        idx.apply_changes(&[tmp.join("b.txt")], &[]);
572        let mut got = names(idx.candidates(&q));
573        got.sort();
574        assert_eq!(got, vec!["a.txt", "b.txt"]);
575
576        // Remove a.txt -> tombstoned, no longer a candidate.
577        std::fs::remove_file(tmp.join("a.txt")).unwrap();
578        idx.apply_changes(&[], &[tmp.join("a.txt")]);
579        assert_eq!(names(idx.candidates(&q)), vec!["b.txt"]);
580        let _ = std::fs::remove_dir_all(&tmp);
581    }
582
583    #[test]
584    fn find_returns_total_and_keyset_page() {
585        let tmp = std::env::temp_dir().join(format!("rgx_find_{}", std::process::id()));
586        let _ = std::fs::remove_dir_all(&tmp);
587        std::fs::create_dir_all(&tmp).unwrap();
588        for n in ["conf_a.txt", "conf_b.txt", "conf_c.txt", "other.txt"] {
589            write(&tmp, n, b"x\n");
590        }
591        let idx = Index::build(&tmp);
592
593        let (page, total, start) = idx.find("conf", None, 2);
594        assert_eq!(total, 3);
595        assert_eq!(start, 0);
596        assert_eq!(names(page.clone()), vec!["conf_a.txt", "conf_b.txt"]);
597
598        // Resume after the last path of the first page; total stays the full match count.
599        let after = page.last().unwrap().to_string_lossy().into_owned();
600        let (page2, total2, start2) = idx.find("conf", Some(&after), 2);
601        assert_eq!(total2, 3);
602        assert_eq!(start2, 2);
603        assert_eq!(names(page2), vec!["conf_c.txt"]);
604        let _ = std::fs::remove_dir_all(&tmp);
605    }
606
607    #[test]
608    fn snapshot_roundtrip() {
609        let tmp = std::env::temp_dir().join(format!("rgx_snap_{}", std::process::id()));
610        let _ = std::fs::remove_dir_all(&tmp);
611        std::fs::create_dir_all(&tmp).unwrap();
612        write(&tmp, "a.txt", b"SNAPSHOT token here\n");
613        write(&tmp, "b.txt", b"other content\n");
614        let idx = Index::build(&tmp);
615        let snap = tmp.join("index.bin");
616        idx.save(&snap).unwrap();
617        let loaded = Index::load(&snap).unwrap();
618        assert_eq!(loaded.file_count(), idx.file_count());
619        let q = Query::for_pattern("SNAPSHOT", Options::default());
620        assert_eq!(names(loaded.candidates(&q)), vec!["a.txt"]);
621        let _ = std::fs::remove_dir_all(&tmp);
622    }
623}
rgx/index.rs

rgx/
index.rs