1mod index;
6mod planner;
7mod search;
8pub mod storage;
9mod verify;
10
11pub use index::{CorpusKind, Index, IndexBuilder, QueryPlan};
12pub use storage::{lexicon, postings};
13pub use verify::{compile_pattern, compile_search_pattern};
14
15pub use planner::TrigramPlan;
16pub use search::{
17 walk_file_paths, CandidateInfo, CaseMode, CompiledSearch, FilenameMode, GlobConfig, HiddenMode,
18 IgnoreConfig, IgnoreSources, Match, OutputEmission, SearchFilter, SearchFilterConfig,
19 SearchMatchFlags, SearchMode, SearchOptions, SearchOutput, VisibilityConfig,
20};
21
22pub use ignore::{Walk, WalkBuilder};
23
24pub use index::trigram::extract_trigrams;
25
26use std::path::PathBuf;
27
28use thiserror::Error;
29
30pub const SIFT_DIR: &str = ".sift";
31pub const INDEX_SUBDIR: &str = ".index";
32pub const META_FILENAME: &str = "sift.meta";
33pub const FILES_BIN: &str = "files.bin";
34pub const LEXICON_BIN: &str = "lexicon.bin";
35pub const POSTINGS_BIN: &str = "postings.bin";
36
37#[derive(Debug, Error)]
38pub enum Error {
39 #[error("IO error: {0}")]
40 Io(#[from] std::io::Error),
41
42 #[error("ignore walk error: {0}")]
43 Ignore(#[from] ignore::Error),
44
45 #[error("regex error: {0}")]
46 Regex(#[from] Box<regex_automata::meta::BuildError>),
47
48 #[error("regex build error: {0}")]
49 RegexBuild(String),
50
51 #[error("search patterns must not be empty")]
52 EmptyPatterns,
53
54 #[error("invalid max-count: 0 matches requested")]
55 InvalidMaxCount,
56
57 #[error("invalid index metadata: {0}")]
58 InvalidMeta(PathBuf),
59
60 #[error("index not initialized (missing {0})")]
61 MissingMeta(PathBuf),
62
63 #[error("index component missing: {0}")]
64 MissingComponent(PathBuf),
65}
66
67pub type Result<T> = std::result::Result<T, Error>;
68
69#[cfg(test)]
70mod tests {
71 use super::*;
72 use std::fs;
73
74 fn normalized_path(p: &std::path::Path) -> std::path::PathBuf {
75 let s = p.display().to_string();
76 #[cfg(windows)]
77 let s = s.strip_prefix("\\\\?\\").unwrap_or(&s).to_string();
78 #[cfg(target_os = "macos")]
79 let s = s.replace("/private", "");
80 std::path::PathBuf::from(s)
81 }
82
83 #[test]
84 fn build_open_search_finds_line() {
85 let tmp = std::env::temp_dir().join(format!("sift-core-test-{}", std::process::id()));
86 let _ = fs::remove_dir_all(&tmp);
87 fs::create_dir_all(tmp.join("src")).unwrap();
88 fs::write(tmp.join("src/lib.rs"), "fn hello() {\n let x = 1;\n}\n").unwrap();
89
90 let idx = tmp.join(".sift");
91 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
92
93 let index = Index::open(&idx).unwrap();
94 assert!(index.file_count() > 0);
95 let pat = vec![r"let\s+x".to_string()];
96 let q = CompiledSearch::new(&pat, SearchOptions::default()).unwrap();
97 let hits = q.collect_index_matches(&index).unwrap();
98 assert_eq!(hits.len(), 1);
99 assert!(hits[0].file.ends_with("src/lib.rs"));
100 assert_eq!(hits[0].line, 2);
101 }
102
103 #[test]
104 fn open_missing_meta_errors() {
105 let tmp = std::env::temp_dir().join(format!("sift-missing-meta-{}", std::process::id()));
106 let _ = fs::remove_dir_all(&tmp);
107 fs::create_dir_all(&tmp).unwrap();
108 assert!(matches!(Index::open(&tmp), Err(Error::MissingMeta(_))));
109 }
110
111 #[test]
112 fn open_missing_table_errors() {
113 let tmp = std::env::temp_dir().join(format!("sift-missing-table-{}", std::process::id()));
114 let _ = fs::remove_dir_all(&tmp);
115 fs::create_dir_all(&tmp).unwrap();
116 let root_path = std::env::temp_dir().join("sift-test-root");
117 let meta = crate::index::IndexMeta {
118 root: root_path,
119 kind: crate::index::CorpusKind::Directory,
120 };
121 fs::write(
122 tmp.join(META_FILENAME),
123 serde_json::to_string_pretty(&meta).unwrap(),
124 )
125 .unwrap();
126 assert!(matches!(Index::open(&tmp), Err(Error::MissingComponent(_))));
127 }
128
129 #[test]
130 fn open_empty_meta_errors() {
131 let tmp = std::env::temp_dir().join(format!("sift-empty-meta-{}", std::process::id()));
132 let _ = fs::remove_dir_all(&tmp);
133 fs::create_dir_all(&tmp).unwrap();
134 fs::write(tmp.join(META_FILENAME), "").unwrap();
135 assert!(matches!(Index::open(&tmp), Err(Error::InvalidMeta(_))));
136 }
137
138 #[test]
139 fn explain_returns_indexed_plan_for_literal_prefix() {
140 let tmp = std::env::temp_dir().join(format!("sift-explain-indexed-{}", std::process::id()));
141 let _ = fs::remove_dir_all(&tmp);
142 fs::create_dir_all(&tmp).unwrap();
143 fs::write(tmp.join("a.txt"), "alpha beta\ngamma delta\n").unwrap();
144 let idx = tmp.join(".sift");
145 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
146 let index = Index::open(&idx).unwrap();
147 let plan = index.explain("foo.*");
148 assert_eq!(plan.pattern, "foo.*");
149 assert_eq!(plan.mode, "indexed_candidates");
150 }
151
152 #[test]
153 fn explain_returns_full_scan_for_true_no_literal() {
154 let tmp =
155 std::env::temp_dir().join(format!("sift-explain-fullscan-{}", std::process::id()));
156 let _ = fs::remove_dir_all(&tmp);
157 fs::create_dir_all(&tmp).unwrap();
158 fs::write(tmp.join("a.txt"), "alpha beta\ngamma delta\n").unwrap();
159 let idx = tmp.join(".sift");
160 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
161 let index = Index::open(&idx).unwrap();
162 let plan = index.explain(r"\w{5}\s+\w{5}");
163 assert_eq!(plan.pattern, r"\w{5}\s+\w{5}");
164 assert_eq!(plan.mode, "full_scan");
165 }
166
167 #[test]
168 fn indexed_search_matches_naive_for_literal() {
169 let tmp = std::env::temp_dir().join(format!("sift-idx-parity-{}", std::process::id()));
170 let _ = fs::remove_dir_all(&tmp);
171 fs::create_dir_all(tmp.join("a")).unwrap();
172 fs::create_dir_all(tmp.join("b")).unwrap();
173 fs::write(tmp.join("a/x.txt"), "alpha beta\n").unwrap();
174 fs::write(tmp.join("b/y.txt"), "gamma delta\n").unwrap();
175
176 let idx = tmp.join(".sift");
177 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
178 let index = Index::open(&idx).unwrap();
179
180 let pat = vec!["beta".to_string()];
181 let opts = SearchOptions::default();
182 let q = CompiledSearch::new(&pat, opts).unwrap();
183 let naive = q.collect_walk_matches(&tmp).unwrap();
184 let indexed = q.collect_index_matches(&index).unwrap();
185 assert_eq!(indexed, naive);
186 }
187
188 #[test]
189 fn full_scan_parallel_candidate_path_finds_all_files() {
190 let tmp = std::env::temp_dir().join(format!("sift-parallel-fs-{}", std::process::id()));
191 let _ = fs::remove_dir_all(&tmp);
192 fs::create_dir_all(tmp.join("d")).unwrap();
193
194 let min_parallel = crate::search::parallel_candidate_min_files();
195 let n_files = if min_parallel == usize::MAX {
196 3
197 } else {
198 min_parallel.clamp(2, 64)
199 };
200 for i in 0..n_files {
201 fs::write(
202 tmp.join("d").join(format!("f{i}.txt")),
203 format!("line {i} needle\n"),
204 )
205 .unwrap();
206 }
207 let idx = tmp.join(".sift");
208 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
209 let index = Index::open(&idx).unwrap();
210 assert_eq!(index.file_count(), n_files);
211
212 let pat = vec!["needle".to_string()];
213 let opts = SearchOptions::default();
214 let q = CompiledSearch::new(&pat, opts).unwrap();
215 let hits = q.collect_index_matches(&index).unwrap();
216 assert_eq!(hits.len(), n_files);
217 }
218
219 #[test]
220 fn full_scan_uses_files_bin_same_hits_as_fresh_walk() {
221 let tmp = std::env::temp_dir().join(format!("sift-fullscan-parity-{}", std::process::id()));
222 let _ = fs::remove_dir_all(&tmp);
223 fs::create_dir_all(tmp.join("keep")).unwrap();
224 fs::write(tmp.join("keep/a.txt"), "one\ntwo beta\n").unwrap();
225 fs::write(tmp.join("keep/b.txt"), "three\n").unwrap();
226 fs::write(tmp.join(".ignore"), "ignored\n").unwrap();
227 fs::create_dir_all(tmp.join("ignored")).unwrap();
228 fs::write(tmp.join("ignored/hidden.txt"), "beta skip\n").unwrap();
229
230 let idx = tmp.join(".sift");
231 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
232 let index = Index::open(&idx).unwrap();
233
234 let pat = vec![".*".to_string()];
235 let opts = SearchOptions::default();
236 let q = CompiledSearch::new(&pat, opts).unwrap();
237 let mut from_index = q.collect_index_matches(&index).unwrap();
238 let mut from_walk = q.collect_walk_matches(&tmp).unwrap();
239 from_index.sort_by(|a, b| (&a.file, a.line, &a.text).cmp(&(&b.file, b.line, &b.text)));
240 from_walk.sort_by(|a, b| (&a.file, a.line, &a.text).cmp(&(&b.file, b.line, &b.text)));
241 assert_eq!(from_index, from_walk);
242 }
243
244 #[test]
245 fn build_open_single_file_search_finds_line() {
246 let tmp = std::env::temp_dir().join(format!("sift-single-file-{}", std::process::id()));
247 let _ = fs::remove_dir_all(&tmp);
248 fs::create_dir_all(&tmp).unwrap();
249 let file = tmp.join("one.txt");
250 fs::write(&file, "alpha\nbeta needle\n").unwrap();
251
252 let idx = tmp.join(".sift");
253 let _ = IndexBuilder::new(&file).with_dir(&idx).build().unwrap();
254 let index = Index::open(&idx).unwrap();
255
256 let expected_root = file.canonicalize().unwrap().parent().unwrap().to_path_buf();
257 assert_eq!(
258 normalized_path(&index.root),
259 normalized_path(&expected_root)
260 );
261 assert!(matches!(index.corpus_kind, index::CorpusKind::File { .. }));
262 assert_eq!(index.file_count(), 1);
263 assert_eq!(index.file_path(0).unwrap(), std::path::Path::new("one.txt"));
264
265 let pat = vec!["needle".to_string()];
266 let q = CompiledSearch::new(&pat, SearchOptions::default()).unwrap();
267 let hits = q.collect_index_matches(&index).unwrap();
268 assert_eq!(hits.len(), 1);
269 assert_eq!(
270 normalized_path(&hits[0].file),
271 normalized_path(&file.canonicalize().unwrap())
272 );
273 assert_eq!(hits[0].line, 2);
274 }
275
276 #[test]
277 fn single_file_meta_is_json_with_explicit_kind() {
278 let tmp =
279 std::env::temp_dir().join(format!("sift-single-file-meta-{}", std::process::id()));
280 let _ = fs::remove_dir_all(&tmp);
281 fs::create_dir_all(&tmp).unwrap();
282 let file = tmp.join("one.txt");
283 fs::write(&file, "alpha\n").unwrap();
284
285 let idx = tmp.join(".sift");
286 let _ = IndexBuilder::new(&file).with_dir(&idx).build().unwrap();
287 let meta = fs::read_to_string(idx.join(META_FILENAME)).unwrap();
288
289 assert!(
290 meta.contains("\"kind\": \"file\""),
291 "unexpected meta: {meta}"
292 );
293 assert!(meta.contains("\"entries\""), "unexpected meta: {meta}");
294 assert!(meta.contains("\"one.txt\""), "unexpected meta: {meta}");
295 }
296}