1pub const MAGIC: [u8; 4] = [0x49, 0x58, 0x30, 0x31];
8
9pub const VERSION_MAJOR: u16 = 1;
11pub const VERSION_MINOR: u16 = 2;
12
13pub const HEADER_SIZE: usize = 256;
15
16pub const TRIGRAM_ENTRY_SIZE: usize = 20;
18
19pub const FILE_ENTRY_SIZE: usize = 48;
21
22pub mod flags {
24 pub const HAS_BLOOM_FILTERS: u64 = 1 << 0;
25 pub const HAS_CONTENT_HASHES: u64 = 1 << 1;
26 pub const POSTING_LISTS_COMPRESSED: u64 = 1 << 2;
27 pub const POSTING_LISTS_CHECKSUMMED: u64 = 1 << 3;
28}
29
30#[repr(u8)]
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum FileStatus {
34 Fresh = 0x00,
35 Stale = 0x01,
36 Deleted = 0x02,
37}
38
39impl FileStatus {
40 pub fn from_u8(v: u8) -> Self {
41 match v {
42 0x00 => Self::Fresh,
43 0x01 => Self::Stale,
44 0x02 => Self::Deleted,
45 _ => Self::Stale, }
47 }
48}
49
50#[derive(Debug, Clone)]
52pub struct Header {
53 pub version_major: u16,
54 pub version_minor: u16,
55 pub flags: u64,
56 pub created_at: u64,
57 pub source_bytes_total: u64,
58 pub file_count: u32,
59 pub trigram_count: u32,
60 pub file_table_offset: u64,
61 pub file_table_size: u64,
62 pub trigram_table_offset: u64,
63 pub trigram_table_size: u64,
64 pub posting_data_offset: u64,
65 pub posting_data_size: u64,
66 pub bloom_offset: u64,
67 pub bloom_size: u64,
68 pub string_pool_offset: u64,
69 pub string_pool_size: u64,
70 pub name_index_offset: u64,
71 pub name_index_size: u64,
72}
73
74impl Header {
75 pub fn parse(data: &[u8]) -> crate::error::Result<Self> {
77 if data.len() < HEADER_SIZE {
78 return Err(crate::error::Error::IndexTooSmall);
79 }
80 if data[0..4] != MAGIC {
81 return Err(crate::error::Error::BadMagic);
82 }
83
84 let r = |off: usize| -> u64 {
85 data.get(off..off + 8)
86 .and_then(|s| s.try_into().ok())
87 .map(u64::from_le_bytes)
88 .unwrap_or(0)
89 };
90 let r16 = |off: usize| -> u16 {
91 data.get(off..off + 2)
92 .and_then(|s| s.try_into().ok())
93 .map(u16::from_le_bytes)
94 .unwrap_or(0)
95 };
96 let r32 = |off: usize| -> u32 {
97 data.get(off..off + 4)
98 .and_then(|s| s.try_into().ok())
99 .map(u32::from_le_bytes)
100 .unwrap_or(0)
101 };
102
103 let major = r16(0x04);
104 let minor = r16(0x06);
105 if major != VERSION_MAJOR || minor < VERSION_MINOR {
106 return Err(crate::error::Error::UnsupportedVersion { major, minor });
107 }
108
109 let expected_crc = r32(0xF8);
111 let actual_crc = crc32c::crc32c(&data[0..0xF8]);
112 if expected_crc != actual_crc {
113 return Err(crate::error::Error::HeaderCorrupted {
114 expected: expected_crc,
115 actual: actual_crc,
116 });
117 }
118
119 Ok(Header {
120 version_major: major,
121 version_minor: minor,
122 flags: r(0x08),
123 created_at: r(0x10),
124 source_bytes_total: r(0x18),
125 file_count: r32(0x20),
126 trigram_count: r32(0x24),
127 file_table_offset: r(0x28),
128 file_table_size: r(0x30),
129 trigram_table_offset: r(0x38),
130 trigram_table_size: r(0x40),
131 posting_data_offset: r(0x48),
132 posting_data_size: r(0x50),
133 bloom_offset: r(0x58),
134 bloom_size: r(0x60),
135 string_pool_offset: r(0x68),
136 string_pool_size: r(0x70),
137 name_index_offset: r(0x78),
138 name_index_size: r(0x80),
139 })
140 }
141
142 pub fn validate_bounds(&self, file_len: u64) -> crate::error::Result<()> {
144 let check = |name: &'static str, off: u64, sz: u64| -> crate::error::Result<()> {
145 if off + sz > file_len {
146 Err(crate::error::Error::SectionOutOfBounds {
147 section: name,
148 offset: off,
149 size: sz,
150 file_len,
151 })
152 } else {
153 Ok(())
154 }
155 };
156 check("file_table", self.file_table_offset, self.file_table_size)?;
157 check(
158 "trigram_table",
159 self.trigram_table_offset,
160 self.trigram_table_size,
161 )?;
162 check(
163 "posting_data",
164 self.posting_data_offset,
165 self.posting_data_size,
166 )?;
167 if self.bloom_size > 0 {
168 check("bloom", self.bloom_offset, self.bloom_size)?;
169 }
170 check(
171 "string_pool",
172 self.string_pool_offset,
173 self.string_pool_size,
174 )?;
175 if self.name_index_size > 0 {
176 check("name_index", self.name_index_offset, self.name_index_size)?;
177 }
178 Ok(())
179 }
180
181 pub fn has_bloom(&self) -> bool {
182 self.flags & flags::HAS_BLOOM_FILTERS != 0
183 }
184}
185
186use serde::{Deserialize, Serialize};
187use std::path::{Path, PathBuf};
188use std::time::{SystemTime, UNIX_EPOCH};
189
190#[derive(Debug, Serialize, Deserialize, Clone)]
191pub struct Beacon {
192 pub pid: i32,
193 pub root: PathBuf,
194 pub start_time: u64,
195 pub status: String,
196 pub last_event_at: u64,
197}
198
199impl Beacon {
200 pub fn new(root: &Path) -> Self {
201 let pid = std::process::id() as i32;
202 let now = SystemTime::now()
203 .duration_since(UNIX_EPOCH)
204 .unwrap_or_default()
205 .as_secs();
206
207 Self {
208 pid,
209 root: root.to_path_buf(),
210 start_time: now,
211 status: "idle".to_string(),
212 last_event_at: now,
213 }
214 }
215
216 pub fn is_live(&self) -> bool {
217 use nix::sys::signal::kill;
218 use nix::unistd::Pid;
219
220 if kill(Pid::from_raw(self.pid), None).is_err() {
222 return false;
223 }
224
225 self.root.exists()
227 }
228
229 pub fn write_to(&self, folder: &Path) -> crate::error::Result<()> {
230 let path = folder.join("beacon.json");
231 let f = std::fs::File::create(path)?;
232 serde_json::to_writer_pretty(f, self).map_err(std::io::Error::other)?;
233 Ok(())
234 }
235
236 pub fn read_from(folder: &Path) -> crate::error::Result<Self> {
237 let path = folder.join("beacon.json");
238 let f = std::fs::File::open(path)?;
239 let beacon = serde_json::from_reader(f).map_err(std::io::Error::other)?;
240 Ok(beacon)
241 }
242}
243
244pub fn is_binary(data: &[u8]) -> bool {
248 if data.is_empty() {
249 return false;
250 }
251 let check_len = data.len().min(512);
252 let non_printable = data[..check_len]
253 .iter()
254 .filter(|&&b| !matches!(b, 0x09 | 0x0A | 0x0D | 0x20..=0x7E))
255 .count();
256
257 (non_printable as f32 / check_len as f32) > 0.3
258}