1mod index;
6mod planner;
7mod search;
8mod storage;
9mod verify;
10
11pub use index::{Index, IndexBuilder, QueryPlan};
12pub use storage::{lexicon, postings};
13pub use verify::{compile_pattern, compile_search_pattern};
14
15pub use planner::TrigramPlan;
16pub use search::{walk_file_paths, CompiledSearch, Match, SearchMatchFlags, SearchOptions};
17
18pub use ignore::{Walk, WalkBuilder};
19
20pub use index::trigram::extract_trigrams;
21
22use std::path::PathBuf;
23
24use thiserror::Error;
25
26pub const META_FILENAME: &str = "sift.meta";
27pub const FILES_BIN: &str = "files.bin";
28pub const LEXICON_BIN: &str = "lexicon.bin";
29pub const POSTINGS_BIN: &str = "postings.bin";
30
31#[derive(Debug, Error)]
32pub enum Error {
33 #[error("IO error: {0}")]
34 Io(#[from] std::io::Error),
35
36 #[error("ignore walk error: {0}")]
37 Ignore(#[from] ignore::Error),
38
39 #[error("regex error: {0}")]
40 Regex(#[from] Box<regex_automata::meta::BuildError>),
41
42 #[error("search patterns must not be empty")]
43 EmptyPatterns,
44
45 #[error("invalid index metadata: {0}")]
46 InvalidMeta(PathBuf),
47
48 #[error("index not initialized (missing {0})")]
49 MissingMeta(PathBuf),
50
51 #[error("index component missing: {0}")]
52 MissingComponent(PathBuf),
53}
54
55pub type Result<T> = std::result::Result<T, Error>;
56
57#[cfg(test)]
58mod tests {
59 use super::*;
60 use std::fs;
61
62 #[test]
63 fn build_open_search_finds_line() {
64 let tmp = std::env::temp_dir().join(format!("sift-core-test-{}", std::process::id()));
65 let _ = fs::remove_dir_all(&tmp);
66 fs::create_dir_all(tmp.join("src")).unwrap();
67 fs::write(tmp.join("src/lib.rs"), "fn hello() {\n let x = 1;\n}\n").unwrap();
68
69 let idx = tmp.join(".index");
70 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
71
72 let index = Index::open(&idx).unwrap();
73 assert!(!index.lexicon.is_empty());
74 let pat = vec![r"let\s+x".to_string()];
75 let q = CompiledSearch::new(&pat, SearchOptions::default()).unwrap();
76 let hits = q.search_index(&index).unwrap();
77 assert_eq!(hits.len(), 1);
78 assert!(hits[0].file.ends_with("src/lib.rs"));
79 assert_eq!(hits[0].line, 2);
80 }
81
82 #[test]
83 fn open_missing_meta_errors() {
84 let tmp = std::env::temp_dir().join(format!("sift-missing-meta-{}", std::process::id()));
85 let _ = fs::remove_dir_all(&tmp);
86 fs::create_dir_all(&tmp).unwrap();
87 assert!(matches!(Index::open(&tmp), Err(Error::MissingMeta(_))));
88 }
89
90 #[test]
91 fn open_missing_table_errors() {
92 let tmp = std::env::temp_dir().join(format!("sift-missing-table-{}", std::process::id()));
93 let _ = fs::remove_dir_all(&tmp);
94 fs::create_dir_all(&tmp).unwrap();
95 fs::write(tmp.join(META_FILENAME), "/tmp/foo\n").unwrap();
96 assert!(matches!(Index::open(&tmp), Err(Error::MissingComponent(_))));
97 }
98
99 #[test]
100 fn open_empty_meta_errors() {
101 let tmp = std::env::temp_dir().join(format!("sift-empty-meta-{}", std::process::id()));
102 let _ = fs::remove_dir_all(&tmp);
103 fs::create_dir_all(&tmp).unwrap();
104 fs::write(tmp.join(META_FILENAME), "").unwrap();
105 assert!(matches!(Index::open(&tmp), Err(Error::InvalidMeta(_))));
106 }
107
108 #[test]
109 fn explain_returns_naive_plan() {
110 let tmp = std::env::temp_dir().join(format!("sift-explain-{}", std::process::id()));
111 let _ = fs::remove_dir_all(&tmp);
112 fs::create_dir_all(&tmp).unwrap();
113 let idx = tmp.join(".index");
114 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
115 let index = Index::open(&idx).unwrap();
116 let plan = index.explain("foo.*");
117 assert_eq!(plan.pattern, "foo.*");
118 assert_eq!(plan.mode, "full_scan");
119 }
120
121 #[test]
122 fn indexed_search_matches_naive_for_literal() {
123 let tmp = std::env::temp_dir().join(format!("sift-idx-parity-{}", std::process::id()));
124 let _ = fs::remove_dir_all(&tmp);
125 fs::create_dir_all(tmp.join("a")).unwrap();
126 fs::create_dir_all(tmp.join("b")).unwrap();
127 fs::write(tmp.join("a/x.txt"), "alpha beta\n").unwrap();
128 fs::write(tmp.join("b/y.txt"), "gamma delta\n").unwrap();
129
130 let idx = tmp.join(".index");
131 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
132 let index = Index::open(&idx).unwrap();
133
134 let pat = vec!["beta".to_string()];
135 let opts = SearchOptions::default();
136 let q = CompiledSearch::new(&pat, opts).unwrap();
137 let naive = q.search_walk(&tmp, None).unwrap();
138 let indexed = q.search_index(&index).unwrap();
139 assert_eq!(indexed, naive);
140 }
141
142 #[test]
143 fn full_scan_parallel_candidate_path_finds_all_files() {
144 let tmp = std::env::temp_dir().join(format!("sift-parallel-fs-{}", std::process::id()));
145 let _ = fs::remove_dir_all(&tmp);
146 fs::create_dir_all(tmp.join("d")).unwrap();
147
148 let min_parallel = crate::search::parallel_candidate_min_files();
149 let n_files = if min_parallel == usize::MAX {
150 3
151 } else {
152 min_parallel.clamp(2, 64)
153 };
154 for i in 0..n_files {
155 fs::write(
156 tmp.join("d").join(format!("f{i}.txt")),
157 format!("line {i} needle\n"),
158 )
159 .unwrap();
160 }
161 let idx = tmp.join(".index");
162 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
163 let index = Index::open(&idx).unwrap();
164 assert_eq!(index.files.len(), n_files);
165
166 let pat = vec!["needle".to_string()];
167 let opts = SearchOptions::default();
168 let q = CompiledSearch::new(&pat, opts).unwrap();
169 let hits = q.search_index(&index).unwrap();
170 assert_eq!(hits.len(), n_files);
171 }
172
173 #[test]
174 fn full_scan_uses_files_bin_same_hits_as_fresh_walk() {
175 let tmp = std::env::temp_dir().join(format!("sift-fullscan-parity-{}", std::process::id()));
176 let _ = fs::remove_dir_all(&tmp);
177 fs::create_dir_all(tmp.join("keep")).unwrap();
178 fs::write(tmp.join("keep/a.txt"), "one\ntwo beta\n").unwrap();
179 fs::write(tmp.join("keep/b.txt"), "three\n").unwrap();
180 fs::write(tmp.join(".ignore"), "ignored\n").unwrap();
181 fs::create_dir_all(tmp.join("ignored")).unwrap();
182 fs::write(tmp.join("ignored/hidden.txt"), "beta skip\n").unwrap();
183
184 let idx = tmp.join(".index");
185 let _ = IndexBuilder::new(&tmp).with_dir(&idx).build().unwrap();
186 let index = Index::open(&idx).unwrap();
187
188 let pat = vec![".*".to_string()];
189 let opts = SearchOptions::default();
190 let q = CompiledSearch::new(&pat, opts).unwrap();
191 let mut from_index = q.search_index(&index).unwrap();
192 let mut from_walk = q.search_walk(&tmp, None).unwrap();
193 from_index.sort_by(|a, b| (&a.file, a.line, &a.text).cmp(&(&b.file, b.line, &b.text)));
194 from_walk.sort_by(|a, b| (&a.file, a.line, &a.text).cmp(&(&b.file, b.line, &b.text)));
195 assert_eq!(from_index, from_walk);
196 }
197}