1use crate::bloom::BloomFilter;
6use crate::error::{Error, Result};
7use crate::format::*;
8use crate::posting::PostingList;
9use crate::string_pool::StringPoolReader;
10use crate::trigram::Trigram;
11use memmap2::Mmap;
12use std::fs::File;
13use std::path::{Path, PathBuf};
14use std::time::UNIX_EPOCH;
15
16#[cfg(unix)]
17use std::os::unix::fs::MetadataExt;
18
19#[derive(Debug, Clone, Copy)]
20pub struct ShardMetadata {
21 pub shard_timestamp: u64,
22 pub file_count: u32,
23 pub trigram_count: u32,
24}
25
26pub struct Reader {
27 mmap: Mmap,
28 pub header: Header,
29 string_pool: StringPoolReader<'static>,
30 inode: Option<u64>,
31}
32
33#[derive(Debug)]
34pub struct TrigramInfo {
35 pub posting_offset: u64,
36 pub posting_length: u32,
37 pub doc_frequency: u32,
38}
39
40#[derive(Debug)]
41pub struct FileInfo {
42 pub file_id: u32,
43 pub path: PathBuf,
44 pub status: FileStatus,
45 pub mtime_ns: u64,
46 pub size_bytes: u64,
47 pub content_hash: u64,
48}
49
50impl Reader {
51 pub fn open(path: &Path) -> Result<Self> {
52 let file = File::open(path)?;
53
54 let mmap = unsafe { Mmap::map(&file)? };
58
59 if mmap.len() < HEADER_SIZE {
60 return Err(Error::IndexTooSmall);
61 }
62
63 let header = Header::parse(&mmap[0..HEADER_SIZE])?;
64 header.validate_bounds(mmap.len() as u64)?;
65
66 #[cfg(unix)]
67 let inode = Some(file.metadata()?.ino());
68
69 #[cfg(not(unix))]
70 let inode = None;
71
72 let string_pool_data: &'static [u8] = unsafe {
79 let start = header.string_pool_offset as usize;
80 let end = (header.string_pool_offset + header.string_pool_size) as usize;
81 std::mem::transmute::<&[u8], &'static [u8]>(&mmap[start..end])
82 };
83 let string_pool = StringPoolReader::new(string_pool_data)?;
84
85 Ok(Self {
86 mmap,
87 header,
88 string_pool,
89 inode,
90 })
91 }
92
93 pub fn get_last_modified(root: &Path) -> Result<u64> {
94 let mut last_modified = 0u64;
95 let walker = ignore::WalkBuilder::new(root)
96 .hidden(false)
97 .git_ignore(true)
98 .require_git(false)
99 .add_custom_ignore_filename(".ixignore")
100 .filter_entry(move |entry| {
101 let path = entry.path();
102 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
103
104 if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
105 && matches!(
106 name,
107 "lost+found"
108 | ".git"
109 | "node_modules"
110 | "target"
111 | "__pycache__"
112 | ".tox"
113 | ".venv"
114 | "venv"
115 | ".ix"
116 )
117 {
118 return false;
119 }
120
121 if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
122 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
123 if matches!(
124 ext,
125 "so" | "o"
126 | "dylib"
127 | "a"
128 | "dll"
129 | "exe"
130 | "pyc"
131 | "jpg"
132 | "png"
133 | "gif"
134 | "mp4"
135 | "mp3"
136 | "pdf"
137 | "zip"
138 | "7z"
139 | "rar"
140 | "sqlite"
141 | "db"
142 | "bin"
143 ) || name.ends_with(".tar.gz")
144 {
145 return false;
146 }
147 }
148 true
149 })
150 .build();
151
152 for result in walker {
153 match result {
154 Ok(entry) => {
155 if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
156 let metadata =
157 entry.metadata().map_err(|e| Error::Config(e.to_string()))?;
158 let mtime = metadata
159 .modified()
160 .and_then(|t| {
161 t.duration_since(UNIX_EPOCH)
162 .map_err(|_| std::io::Error::other("time went backwards"))
163 })
164 .map(|d| d.as_micros() as u64)
165 .unwrap_or(0);
166 if mtime > last_modified {
167 last_modified = mtime;
168 }
169 }
170 }
171 Err(e) => {
172 eprintln!("ix: warning: stale check skipping path: {}", e);
173 }
174 }
175 }
176 Ok(last_modified)
177 }
178
179 pub fn get_trigram(&self, trigram: Trigram) -> Option<TrigramInfo> {
180 let count = self.header.trigram_count as usize;
181 let table_start = self.header.trigram_table_offset as usize;
182
183 let mut low = 0;
184 let mut high = count;
185
186 while low < high {
187 let mid = low + (high - low) / 2;
188 let entry_off = table_start + mid * TRIGRAM_ENTRY_SIZE;
189
190 let key_bytes = self.mmap.get(entry_off..entry_off + 4)?;
191 let key = u32::from_le_bytes(key_bytes.try_into().ok()?);
192
193 if key == trigram {
194 let entry = self.mmap.get(entry_off..entry_off + TRIGRAM_ENTRY_SIZE)?;
195
196 let mut off_bytes = [0u8; 8];
197 off_bytes[..6].copy_from_slice(&entry[4..10]);
198 let posting_offset = u64::from_le_bytes(off_bytes);
199
200 let posting_length = entry
201 .get(10..14)
202 .and_then(|s| s.try_into().ok())
203 .map(u32::from_le_bytes)?;
204
205 let doc_frequency = entry
206 .get(14..18)
207 .and_then(|s| s.try_into().ok())
208 .map(u32::from_le_bytes)?;
209
210 return Some(TrigramInfo {
211 posting_offset,
212 posting_length,
213 doc_frequency,
214 });
215 } else if key < trigram {
216 low = mid + 1;
217 } else {
218 high = mid;
219 }
220 }
221
222 None
223 }
224
225 pub fn decode_postings(&self, info: &TrigramInfo) -> Result<PostingList> {
226 let start = info.posting_offset as usize;
227 let end = start + info.posting_length as usize;
228 if end > self.mmap.len() {
229 return Err(Error::PostingOutOfBounds);
230 }
231 PostingList::decode(&self.mmap[start..end])
232 }
233
234 pub fn get_file(&self, file_id: u32) -> Result<FileInfo> {
235 if file_id >= self.header.file_count {
236 return Err(Error::FileIdOutOfBounds(file_id));
237 }
238
239 let entry_off = self.header.file_table_offset as usize + file_id as usize * FILE_ENTRY_SIZE;
240 let entry = self
241 .mmap
242 .get(entry_off..entry_off + FILE_ENTRY_SIZE)
243 .ok_or(Error::SectionOutOfBounds {
244 section: "file_entry",
245 offset: entry_off as u64,
246 size: FILE_ENTRY_SIZE as u64,
247 file_len: self.mmap.len() as u64,
248 })?;
249
250 let path_off = u32::from_le_bytes(
251 entry[4..8]
252 .try_into()
253 .map_err(|_| Error::Config("invalid path offset".into()))?,
254 );
255 let status = FileStatus::from_u8(entry[10]);
256 let mtime_ns = u64::from_le_bytes(
257 entry[12..20]
258 .try_into()
259 .map_err(|_| Error::Config("invalid mtime".into()))?,
260 );
261 let size_bytes = u64::from_le_bytes(
262 entry[20..28]
263 .try_into()
264 .map_err(|_| Error::Config("invalid size".into()))?,
265 );
266 let content_hash = u64::from_le_bytes(
267 entry[28..36]
268 .try_into()
269 .map_err(|_| Error::Config("invalid hash".into()))?,
270 );
271
272 let path = self.string_pool.resolve(path_off)?;
273
274 Ok(FileInfo {
275 file_id,
276 path: PathBuf::from(path),
277 status,
278 mtime_ns,
279 size_bytes,
280 content_hash,
281 })
282 }
283
284 pub fn bloom_may_contain(&self, file_id: u32, trigram: Trigram) -> bool {
285 if !self.header.has_bloom() {
286 return true;
287 }
288
289 let entry_off = self.header.file_table_offset as usize + file_id as usize * FILE_ENTRY_SIZE;
290 let Some(bloom_bytes) = self.mmap.get(entry_off + 40..entry_off + 44) else {
291 return true;
292 };
293
294 let bloom_rel_off = u32::from_le_bytes(
295 bloom_bytes
296 .try_into()
297 .expect("bloom_bytes is exactly 4 bytes"),
298 );
299 let bloom_abs_off = self.header.bloom_offset as usize + bloom_rel_off as usize;
300
301 let Some(size_bytes) = self.mmap.get(bloom_abs_off..bloom_abs_off + 2) else {
302 return true;
303 };
304 let size = u16::from_le_bytes(
305 size_bytes
306 .try_into()
307 .expect("size_bytes is exactly 2 bytes"),
308 ) as usize;
309
310 let num_hashes = self.mmap.get(bloom_abs_off + 2).copied().unwrap_or(0);
311 let Some(bits) = self.mmap.get(bloom_abs_off + 4..bloom_abs_off + 4 + size) else {
312 return true;
313 };
314
315 BloomFilter::slice_contains(bits, num_hashes, trigram)
316 }
317
318 pub fn metadata(&self) -> ShardMetadata {
319 ShardMetadata {
320 shard_timestamp: self.header.created_at,
321 file_count: self.header.file_count,
322 trigram_count: self.header.trigram_count,
323 }
324 }
325
326 pub fn is_stale(&self, path: &Path) -> bool {
335 let current = match std::fs::metadata(path) {
336 Ok(m) => m,
337 Err(_) => return true,
338 };
339
340 if current.len() as usize != self.mmap.len() {
341 return true;
342 }
343
344 #[cfg(unix)]
345 {
346 if let Some(stored_inode) = self.inode
347 && current.ino() != stored_inode
348 {
349 return true;
350 }
351 }
352
353 false
354 }
355}