1use crate::bloom::BloomFilter;
6use crate::error::{Error, Result};
7use crate::format::*;
8use crate::posting::PostingList;
9use crate::string_pool::StringPoolReader;
10use crate::trigram::Trigram;
11use memmap2::Mmap;
12use std::fs::File;
13use std::path::{Path, PathBuf};
14use std::time::UNIX_EPOCH;
15
16pub struct Reader {
17 mmap: Mmap,
18 pub header: Header,
19 string_pool: StringPoolReader<'static>,
20}
21
22#[derive(Debug)]
23pub struct TrigramInfo {
24 pub posting_offset: u64,
25 pub posting_length: u32,
26 pub doc_frequency: u32,
27}
28
29#[derive(Debug)]
30pub struct FileInfo {
31 pub file_id: u32,
32 pub path: PathBuf,
33 pub status: FileStatus,
34 pub mtime_ns: u64,
35 pub size_bytes: u64,
36 pub content_hash: u64,
37}
38
39impl Reader {
40 pub fn open(path: &Path) -> Result<Self> {
41 let file = File::open(path)?;
42 let mmap = unsafe { Mmap::map(&file)? };
43
44 if mmap.len() < HEADER_SIZE {
45 return Err(Error::IndexTooSmall);
46 }
47
48 let header = Header::parse(&mmap[0..HEADER_SIZE])?;
49 header.validate_bounds(mmap.len() as u64)?;
50
51 let string_pool_data: &'static [u8] = unsafe {
54 let slice = &mmap[header.string_pool_offset as usize
55 ..(header.string_pool_offset + header.string_pool_size) as usize];
56 std::mem::transmute(slice)
57 };
58 let string_pool = StringPoolReader::new(string_pool_data)?;
59
60 Ok(Self {
61 mmap,
62 header,
63 string_pool,
64 })
65 }
66
67 pub fn get_last_modified(root: &Path) -> Result<u64> {
68 let mut last_modified = 0u64;
69 let walker = ignore::WalkBuilder::new(root)
70 .hidden(false)
71 .git_ignore(true)
72 .require_git(false)
73 .add_custom_ignore_filename(".ixignore")
74 .filter_entry(move |entry| {
75 let path = entry.path();
76 if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
77 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
78 if name == "target" || name == ".git" || name == "node_modules" || name == ".ix"
79 {
80 return false;
81 }
82 }
83 true
84 })
85 .build();
86
87 for result in walker {
88 let entry = result.map_err(|e| Error::Config(e.to_string()))?;
89 if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
90 let metadata = entry.metadata().map_err(|e| Error::Config(e.to_string()))?;
91 let mtime = metadata
92 .modified()
93 .and_then(|t| {
94 t.duration_since(UNIX_EPOCH)
95 .map_err(|_| std::io::Error::other("time went backwards"))
96 })
97 .map(|d| d.as_micros() as u64)
98 .unwrap_or(0);
99 if mtime > last_modified {
100 last_modified = mtime;
101 }
102 }
103 }
104 Ok(last_modified)
105 }
106
107 pub fn get_trigram(&self, trigram: Trigram) -> Option<TrigramInfo> {
108 let count = self.header.trigram_count as usize;
109 let table_start = self.header.trigram_table_offset as usize;
110
111 let mut low = 0;
112 let mut high = count;
113
114 while low < high {
115 let mid = low + (high - low) / 2;
116 let entry_off = table_start + mid * TRIGRAM_ENTRY_SIZE;
117
118 let key_bytes = self.mmap.get(entry_off..entry_off + 4)?;
120 let key = u32::from_le_bytes(key_bytes.try_into().ok()?);
121
122 if key == trigram {
123 let entry = self.mmap.get(entry_off..entry_off + TRIGRAM_ENTRY_SIZE)?;
124
125 let mut off_bytes = [0u8; 8];
127 off_bytes[..6].copy_from_slice(&entry[4..10]);
128 let posting_offset = u64::from_le_bytes(off_bytes);
129
130 let posting_length = entry
131 .get(10..14)
132 .and_then(|s| s.try_into().ok())
133 .map(u32::from_le_bytes)
134 .unwrap_or(0);
135 let doc_frequency = entry
136 .get(14..18)
137 .and_then(|s| s.try_into().ok())
138 .map(u32::from_le_bytes)
139 .unwrap_or(0);
140
141 return Some(TrigramInfo {
142 posting_offset,
143 posting_length,
144 doc_frequency,
145 });
146 } else if key < trigram {
147 low = mid + 1;
148 } else {
149 high = mid;
150 }
151 }
152
153 None
154 }
155
156 pub fn decode_postings(&self, info: &TrigramInfo) -> Result<PostingList> {
157 let start = info.posting_offset as usize;
158 let end = start + info.posting_length as usize;
159 if end > self.mmap.len() {
160 return Err(Error::PostingOutOfBounds);
161 }
162 PostingList::decode(&self.mmap[start..end])
163 }
164
165 pub fn get_file(&self, file_id: u32) -> Result<FileInfo> {
166 if file_id >= self.header.file_count {
167 return Err(Error::FileIdOutOfBounds(file_id));
168 }
169
170 let entry_off = self.header.file_table_offset as usize + file_id as usize * FILE_ENTRY_SIZE;
171 let entry = self
172 .mmap
173 .get(entry_off..entry_off + FILE_ENTRY_SIZE)
174 .ok_or(Error::SectionOutOfBounds {
175 section: "file_entry",
176 offset: entry_off as u64,
177 size: FILE_ENTRY_SIZE as u64,
178 file_len: self.mmap.len() as u64,
179 })?;
180
181 let path_off = u32::from_le_bytes(
182 entry[4..8]
183 .try_into()
184 .map_err(|_| Error::Config("invalid path offset".into()))?,
185 );
186 let status = FileStatus::from_u8(entry[10]);
187 let mtime_ns = u64::from_le_bytes(
188 entry[12..20]
189 .try_into()
190 .map_err(|_| Error::Config("invalid mtime".into()))?,
191 );
192 let size_bytes = u64::from_le_bytes(
193 entry[20..28]
194 .try_into()
195 .map_err(|_| Error::Config("invalid size".into()))?,
196 );
197 let content_hash = u64::from_le_bytes(
198 entry[28..36]
199 .try_into()
200 .map_err(|_| Error::Config("invalid hash".into()))?,
201 );
202
203 let path = self.string_pool.resolve(path_off)?;
204
205 Ok(FileInfo {
206 file_id,
207 path: PathBuf::from(path),
208 status,
209 mtime_ns,
210 size_bytes,
211 content_hash,
212 })
213 }
214
215 pub fn bloom_may_contain(&self, file_id: u32, trigram: Trigram) -> bool {
216 if !self.header.has_bloom() {
217 return true;
218 }
219
220 let entry_off = self.header.file_table_offset as usize + file_id as usize * FILE_ENTRY_SIZE;
221 let bloom_rel_off_bytes = self.mmap.get(entry_off + 40..entry_off + 44);
222 if bloom_rel_off_bytes.is_none() {
223 return true;
224 }
225
226 let bloom_rel_off = bloom_rel_off_bytes
227 .and_then(|b| b.try_into().ok())
228 .map(u32::from_le_bytes)
229 .unwrap_or(0);
230
231 let bloom_abs_off = self.header.bloom_offset as usize + bloom_rel_off as usize;
232 if bloom_abs_off + 4 > self.mmap.len() {
233 return true;
234 }
235
236 let size = u16::from_le_bytes(
237 self.mmap[bloom_abs_off..bloom_abs_off + 2]
238 .try_into()
239 .unwrap_or([0u8; 2]),
240 ) as usize;
241 let num_hashes = self.mmap[bloom_abs_off + 2];
242 let bits = match self.mmap.get(bloom_abs_off + 4..bloom_abs_off + 4 + size) {
243 Some(b) => b,
244 None => return true,
245 };
246
247 BloomFilter::slice_contains(bits, num_hashes, trigram)
248 }
249}