1mod index;
6mod planner;
7mod search;
8pub mod storage;
9mod verify;
10
11pub use index::{Index, IndexBuilder, QueryPlan};
12pub use storage::{lexicon, postings};
13pub use verify::{compile_pattern, compile_search_pattern};
14
15pub use planner::TrigramPlan;
16pub use search::{
17 walk_file_paths, CompiledSearch, Match, SearchMatchFlags, SearchMode, SearchOptions,
18 SearchOutput,
19};
20
21pub use ignore::{Walk, WalkBuilder};
22
23pub use index::trigram::extract_trigrams;
24
25use std::path::PathBuf;
26
27use thiserror::Error;
28
29pub const SIFT_DIR: &str = ".sift";
30pub const INDEX_SUBDIR: &str = ".index";
31pub const META_FILENAME: &str = "sift.meta";
32pub const FILES_BIN: &str = "files.bin";
33pub const LEXICON_BIN: &str = "lexicon.bin";
34pub const POSTINGS_BIN: &str = "postings.bin";
35
36#[derive(Debug, Error)]
37pub enum Error {
38 #[error("IO error: {0}")]
39 Io(#[from] std::io::Error),
40
41 #[error("ignore walk error: {0}")]
42 Ignore(#[from] ignore::Error),
43
44 #[error("regex error: {0}")]
45 Regex(#[from] Box<regex_automata::meta::BuildError>),
46
47 #[error("regex build error: {0}")]
48 RegexBuild(String),
49
50 #[error("search patterns must not be empty")]
51 EmptyPatterns,
52
53 #[error("invalid index metadata: {0}")]
54 InvalidMeta(PathBuf),
55
56 #[error("index not initialized (missing {0})")]
57 MissingMeta(PathBuf),
58
59 #[error("index component missing: {0}")]
60 MissingComponent(PathBuf),
61}
62
63pub type Result<T> = std::result::Result<T, Error>;
64
65#[cfg(test)]
66mod tests {
67 use super::*;
68 use std::fs;
69
70 fn normalized_path(p: &std::path::Path) -> std::path::PathBuf {
71 let s = p.display().to_string();
72 #[cfg(windows)]
73 let s = s.strip_prefix("\\\\?\\").unwrap_or(&s).to_string();
74 #[cfg(target_os = "macos")]
75 let s = s.replace("/private", "");
76 std::path::PathBuf::from(s)
77 }
78
79 #[test]
80 fn build_open_search_finds_line() {
81 let tmp = std::env::temp_dir().join(format!("sift-core-test-{}", std::process::id()));
82 let _ = fs::remove_dir_all(&tmp);
83 fs::create_dir_all(tmp.join("src")).unwrap();
84 fs::write(tmp.join("src/lib.rs"), "fn hello() {\n let x = 1;\n}\n").unwrap();
85
86 let idx = tmp.join(".sift");
87 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
88
89 let index = Index::open(&idx).unwrap();
90 assert!(index.file_count() > 0);
91 let pat = vec![r"let\s+x".to_string()];
92 let q = CompiledSearch::new(&pat, SearchOptions::default()).unwrap();
93 let hits = q.collect_index_matches(&index).unwrap();
94 assert_eq!(hits.len(), 1);
95 assert!(hits[0].file.ends_with("src/lib.rs"));
96 assert_eq!(hits[0].line, 2);
97 }
98
99 #[test]
100 fn open_missing_meta_errors() {
101 let tmp = std::env::temp_dir().join(format!("sift-missing-meta-{}", std::process::id()));
102 let _ = fs::remove_dir_all(&tmp);
103 fs::create_dir_all(&tmp).unwrap();
104 assert!(matches!(Index::open(&tmp), Err(Error::MissingMeta(_))));
105 }
106
107 #[test]
108 fn open_missing_table_errors() {
109 let tmp = std::env::temp_dir().join(format!("sift-missing-table-{}", std::process::id()));
110 let _ = fs::remove_dir_all(&tmp);
111 fs::create_dir_all(&tmp).unwrap();
112 let root_path = std::env::temp_dir().join("sift-test-root");
113 let meta = crate::index::IndexMeta {
114 root: root_path,
115 kind: crate::index::CorpusKind::Directory,
116 };
117 fs::write(
118 tmp.join(META_FILENAME),
119 serde_json::to_string_pretty(&meta).unwrap(),
120 )
121 .unwrap();
122 assert!(matches!(Index::open(&tmp), Err(Error::MissingComponent(_))));
123 }
124
125 #[test]
126 fn open_empty_meta_errors() {
127 let tmp = std::env::temp_dir().join(format!("sift-empty-meta-{}", std::process::id()));
128 let _ = fs::remove_dir_all(&tmp);
129 fs::create_dir_all(&tmp).unwrap();
130 fs::write(tmp.join(META_FILENAME), "").unwrap();
131 assert!(matches!(Index::open(&tmp), Err(Error::InvalidMeta(_))));
132 }
133
134 #[test]
135 fn explain_returns_indexed_plan_for_literal_prefix() {
136 let tmp = std::env::temp_dir().join(format!("sift-explain-indexed-{}", std::process::id()));
137 let _ = fs::remove_dir_all(&tmp);
138 fs::create_dir_all(&tmp).unwrap();
139 fs::write(tmp.join("a.txt"), "alpha beta\ngamma delta\n").unwrap();
140 let idx = tmp.join(".sift");
141 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
142 let index = Index::open(&idx).unwrap();
143 let plan = index.explain("foo.*");
144 assert_eq!(plan.pattern, "foo.*");
145 assert_eq!(plan.mode, "indexed_candidates");
146 }
147
148 #[test]
149 fn explain_returns_full_scan_for_true_no_literal() {
150 let tmp =
151 std::env::temp_dir().join(format!("sift-explain-fullscan-{}", std::process::id()));
152 let _ = fs::remove_dir_all(&tmp);
153 fs::create_dir_all(&tmp).unwrap();
154 fs::write(tmp.join("a.txt"), "alpha beta\ngamma delta\n").unwrap();
155 let idx = tmp.join(".sift");
156 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
157 let index = Index::open(&idx).unwrap();
158 let plan = index.explain(r"\w{5}\s+\w{5}");
159 assert_eq!(plan.pattern, r"\w{5}\s+\w{5}");
160 assert_eq!(plan.mode, "full_scan");
161 }
162
163 #[test]
164 fn indexed_search_matches_naive_for_literal() {
165 let tmp = std::env::temp_dir().join(format!("sift-idx-parity-{}", std::process::id()));
166 let _ = fs::remove_dir_all(&tmp);
167 fs::create_dir_all(tmp.join("a")).unwrap();
168 fs::create_dir_all(tmp.join("b")).unwrap();
169 fs::write(tmp.join("a/x.txt"), "alpha beta\n").unwrap();
170 fs::write(tmp.join("b/y.txt"), "gamma delta\n").unwrap();
171
172 let idx = tmp.join(".sift");
173 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
174 let index = Index::open(&idx).unwrap();
175
176 let pat = vec!["beta".to_string()];
177 let opts = SearchOptions::default();
178 let q = CompiledSearch::new(&pat, opts).unwrap();
179 let naive = q.collect_walk_matches(&tmp).unwrap();
180 let indexed = q.collect_index_matches(&index).unwrap();
181 assert_eq!(indexed, naive);
182 }
183
184 #[test]
185 fn full_scan_parallel_candidate_path_finds_all_files() {
186 let tmp = std::env::temp_dir().join(format!("sift-parallel-fs-{}", std::process::id()));
187 let _ = fs::remove_dir_all(&tmp);
188 fs::create_dir_all(tmp.join("d")).unwrap();
189
190 let min_parallel = crate::search::parallel_candidate_min_files();
191 let n_files = if min_parallel == usize::MAX {
192 3
193 } else {
194 min_parallel.clamp(2, 64)
195 };
196 for i in 0..n_files {
197 fs::write(
198 tmp.join("d").join(format!("f{i}.txt")),
199 format!("line {i} needle\n"),
200 )
201 .unwrap();
202 }
203 let idx = tmp.join(".sift");
204 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
205 let index = Index::open(&idx).unwrap();
206 assert_eq!(index.file_count(), n_files);
207
208 let pat = vec!["needle".to_string()];
209 let opts = SearchOptions::default();
210 let q = CompiledSearch::new(&pat, opts).unwrap();
211 let hits = q.collect_index_matches(&index).unwrap();
212 assert_eq!(hits.len(), n_files);
213 }
214
215 #[test]
216 fn full_scan_uses_files_bin_same_hits_as_fresh_walk() {
217 let tmp = std::env::temp_dir().join(format!("sift-fullscan-parity-{}", std::process::id()));
218 let _ = fs::remove_dir_all(&tmp);
219 fs::create_dir_all(tmp.join("keep")).unwrap();
220 fs::write(tmp.join("keep/a.txt"), "one\ntwo beta\n").unwrap();
221 fs::write(tmp.join("keep/b.txt"), "three\n").unwrap();
222 fs::write(tmp.join(".ignore"), "ignored\n").unwrap();
223 fs::create_dir_all(tmp.join("ignored")).unwrap();
224 fs::write(tmp.join("ignored/hidden.txt"), "beta skip\n").unwrap();
225
226 let idx = tmp.join(".sift");
227 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
228 let index = Index::open(&idx).unwrap();
229
230 let pat = vec![".*".to_string()];
231 let opts = SearchOptions::default();
232 let q = CompiledSearch::new(&pat, opts).unwrap();
233 let mut from_index = q.collect_index_matches(&index).unwrap();
234 let mut from_walk = q.collect_walk_matches(&tmp).unwrap();
235 from_index.sort_by(|a, b| (&a.file, a.line, &a.text).cmp(&(&b.file, b.line, &b.text)));
236 from_walk.sort_by(|a, b| (&a.file, a.line, &a.text).cmp(&(&b.file, b.line, &b.text)));
237 assert_eq!(from_index, from_walk);
238 }
239
240 #[test]
241 fn build_open_single_file_search_finds_line() {
242 let tmp = std::env::temp_dir().join(format!("sift-single-file-{}", std::process::id()));
243 let _ = fs::remove_dir_all(&tmp);
244 fs::create_dir_all(&tmp).unwrap();
245 let file = tmp.join("one.txt");
246 fs::write(&file, "alpha\nbeta needle\n").unwrap();
247
248 let idx = tmp.join(".sift");
249 let _ = IndexBuilder::new(&file).with_dir(&idx).build().unwrap();
250 let index = Index::open(&idx).unwrap();
251
252 let expected_root = file.canonicalize().unwrap().parent().unwrap().to_path_buf();
253 assert_eq!(
254 normalized_path(&index.root),
255 normalized_path(&expected_root)
256 );
257 assert!(matches!(index.corpus_kind, index::CorpusKind::File { .. }));
258 assert_eq!(index.file_count(), 1);
259 assert_eq!(index.file_path(0).unwrap(), std::path::Path::new("one.txt"));
260
261 let pat = vec!["needle".to_string()];
262 let q = CompiledSearch::new(&pat, SearchOptions::default()).unwrap();
263 let hits = q.collect_index_matches(&index).unwrap();
264 assert_eq!(hits.len(), 1);
265 assert_eq!(
266 normalized_path(&hits[0].file),
267 normalized_path(&file.canonicalize().unwrap())
268 );
269 assert_eq!(hits[0].line, 2);
270 }
271
272 #[test]
273 fn single_file_meta_is_json_with_explicit_kind() {
274 let tmp =
275 std::env::temp_dir().join(format!("sift-single-file-meta-{}", std::process::id()));
276 let _ = fs::remove_dir_all(&tmp);
277 fs::create_dir_all(&tmp).unwrap();
278 let file = tmp.join("one.txt");
279 fs::write(&file, "alpha\n").unwrap();
280
281 let idx = tmp.join(".sift");
282 let _ = IndexBuilder::new(&file).with_dir(&idx).build().unwrap();
283 let meta = fs::read_to_string(idx.join(META_FILENAME)).unwrap();
284
285 assert!(
286 meta.contains("\"kind\": \"file\""),
287 "unexpected meta: {meta}"
288 );
289 assert!(meta.contains("\"entries\""), "unexpected meta: {meta}");
290 assert!(meta.contains("\"one.txt\""), "unexpected meta: {meta}");
291 }
292}