rgx/lib.rs
1//! `rgx` — a candidate-file index in front of ripgrep.
2//!
3//! The crate is split so each piece is testable in isolation (see `CLAUDE.md`):
4//! - [`trigram`] — the atomic index unit and extraction helpers.
5//! - [`query`] — turning a regex into a sound boolean trigram query.
6//! - [`index`] — the trigram inverted index: build, candidate selection, incremental update, snapshot.
7//! - [`confirm`] — ripgrep's own engine over a candidate file set (the matching authority).
8//! - [`proto`]/[`server`]/[`client`]/[`paths`] — the per-project daemon and its wire protocol.
9//!
10//! [`search`] ties the search path together: pattern → trigram query → candidate files → ripgrep
11//! confirm, transparently falling back to a full scan when the query carries no usable constraint.
12
13use std::path::Path;
14
15use anyhow::Result;
16
17pub mod client;
18pub mod compact;
19pub mod config;
20pub mod confirm;
21pub mod cursor;
22pub mod index;
23pub mod mcp;
24pub mod pagination;
25pub mod paths;
26pub mod proto;
27pub mod query;
28pub mod server;
29pub mod skill;
30pub mod status;
31pub mod transport;
32pub mod trigram;
33
34use confirm::SearchOptions;
35use index::Index;
36use query::{Options as QueryOptions, Query};
37
38/// The pattern actually handed to the regex engine: escaped when `-F` (fixed strings) is set.
39pub fn effective_pattern(pattern: &str, opts: SearchOptions) -> String {
40 if opts.fixed_strings {
41 regex_syntax::escape(pattern)
42 } else {
43 pattern.to_string()
44 }
45}
46
47fn query_options(opts: SearchOptions) -> QueryOptions {
48 QueryOptions {
49 case_insensitive: opts.case_insensitive,
50 multi_line: opts.multi_line,
51 dot_matches_new_line: opts.dot_matches_new_line,
52 }
53}
54
55/// Whether `pattern` has no usable trigram constraint (so every file is a candidate). The CLI uses
56/// this to scan such queries in-process — one process streamed straight to stdout, like ripgrep —
57/// instead of paying the daemon round-trip to ship a potentially huge result set back.
58pub fn is_fallback(pattern: &str, opts: SearchOptions) -> bool {
59 Query::for_pattern(&effective_pattern(pattern, opts), query_options(opts)).is_fallback()
60}
61
62/// Resolve the candidate files for `pattern` as owned paths, so a caller holding the index lock can
63/// release it before the (potentially long) ripgrep confirm + output streaming — never hold the
64/// index lock across blocking I/O. A fallback pattern yields every live file.
65pub fn candidate_paths(
66 index: &Index,
67 pattern: &str,
68 opts: SearchOptions,
69) -> Vec<std::path::PathBuf> {
70 let effective = effective_pattern(pattern, opts);
71 let query = Query::for_pattern(&effective, query_options(opts));
72 index
73 .candidates(&query)
74 .into_iter()
75 .map(Path::to_path_buf)
76 .collect()
77}
78
79/// Stream a content search against a (ready) index, emitting `path:line:text` chunks via `emit`.
80///
81/// One path for everything: the index turns the pattern into a candidate file set (a precise subset
82/// for trigram-accelerable patterns, or *every* file for a fallback pattern with no usable trigram),
83/// and ripgrep confirms over exactly that set. There is no separate "scan the tree" branch.
84pub fn stream_search(
85 index: &Index,
86 root: &Path,
87 pattern: &str,
88 opts: SearchOptions,
89 emit: impl FnMut(&[u8]) -> Result<()>,
90) -> Result<()> {
91 let effective = effective_pattern(pattern, opts);
92 let query = Query::for_pattern(&effective, query_options(opts));
93 let paths = index.candidates(&query);
94 confirm::search_streaming(&effective, &paths, root, opts, emit)
95}
96
97/// Pipelined full-tree walk+search (matching ripgrep's model), streaming through `sink`. Used by the
98/// CLI for fallback queries (no usable trigram) and by the daemon's cold start before the first build
99/// finishes — both fully in-process. Once the index is ready, [`stream_search`] handles
100/// trigram-accelerable patterns.
101pub fn stream_full_scan(
102 root: impl AsRef<Path>,
103 pattern: &str,
104 opts: SearchOptions,
105 sink: impl Fn(&[u8]) + Sync,
106) -> Result<()> {
107 let effective = effective_pattern(pattern, opts);
108 confirm::full_scan(root.as_ref(), &effective, opts, sink)
109}
110
111/// Run a content search and buffer the whole `path:line:text` output, for callers that need the
112/// entire result at once (the compact/paged view) rather than a stream. Trigram-accelerable patterns
113/// go through the daemon (emitted in index file-id order, NOT path order); fallback patterns scan
114/// in-process in nondeterministic order. Neither is guaranteed sorted, so the compact view sorts the
115/// matches itself (see `compact::format`); the fallback block-sort here is a cheap extra that keeps
116/// even the raw buffered bytes deterministic across runs.
117pub fn collect_search(root: &Path, pattern: &str, opts: SearchOptions) -> Result<Vec<u8>> {
118 if is_fallback(pattern, opts) {
119 let chunks = std::sync::Mutex::new(Vec::<Vec<u8>>::new());
120 stream_full_scan(root, pattern, opts, |c| {
121 if let Ok(mut v) = chunks.lock() {
122 v.push(c.to_vec());
123 }
124 })?;
125 let mut chunks = chunks.into_inner().unwrap_or_default();
126 chunks.sort_unstable(); // each chunk is one file's block, so this orders by path
127 Ok(chunks.concat())
128 } else {
129 client::request(
130 root,
131 &proto::Request::Search {
132 opts,
133 pattern: pattern.to_string(),
134 },
135 )
136 }
137}
138
139/// Collecting convenience over [`stream_search`] (used in tests).
140pub fn search(index: &Index, root: &Path, pattern: &str, opts: SearchOptions) -> Result<Vec<u8>> {
141 let mut out = Vec::new();
142 stream_search(index, root, pattern, opts, |c| {
143 out.extend_from_slice(c);
144 Ok(())
145 })?;
146 Ok(out)
147}