Skip to main content

ix/
format.rs

1//! Index file format constants and header parsing.
2//!
3//! All integers little-endian. All offsets absolute from file start.
4//! Sections aligned to 8-byte boundaries.
5
6/// Magic bytes identifying an ix index file (`b"IX01"`).
7pub const MAGIC: [u8; 4] = [0x49, 0x58, 0x30, 0x31];
8
9/// Major version of the on-disk format this library writes.
10pub const VERSION_MAJOR: u16 = 1;
11
12/// Minimum minor version required to read an index file.
13pub const VERSION_MINOR: u16 = 2;
14
15/// Size of the fixed header at the start of every index file (256 bytes).
16pub const HEADER_SIZE: usize = 256;
17
18/// On-disk size of one trigram-table entry (u32 key + 16-byte payload).
19pub const TRIGRAM_ENTRY_SIZE: usize = 20;
20
21/// On-disk size of one file-table entry in the index.
22pub const FILE_ENTRY_SIZE: usize = 48;
23
24/// Bit-flag constants stored in the [`Header::flags`] field.
25pub mod flags {
26    /// The index contains per-trigram bloom filters.
27    pub const HAS_BLOOM_FILTERS: u64 = 1 << 0;
28    /// Per-file content hashes are stored in the file table.
29    pub const HAS_CONTENT_HASHES: u64 = 1 << 1;
30    /// Posting-list data is ZSTD-compressed.
31    pub const POSTING_LISTS_COMPRESSED: u64 = 1 << 2;
32    /// Each posting-list chunk carries an `XXHash64` checksum.
33    pub const POSTING_LISTS_CHECKSUMMED: u64 = 1 << 3;
34}
35
36/// Whether a file tracked by the index is current, out-of-date, or deleted.
37#[repr(u8)]
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum FileStatus {
40    /// File exists on disk and is in sync with the index.
41    Fresh = 0x00,
42    /// File has been modified since it was last indexed.
43    Stale = 0x01,
44    /// File has been removed from the file system.
45    Deleted = 0x02,
46}
47
48impl FileStatus {
49    /// Decode a file status from its on-disk `u8` representation.
50    ///
51    /// Unknown values default to [`FileStatus::Stale`].
52    #[must_use] 
53    pub const fn from_u8(v: u8) -> Self {
54        match v {
55            0x00 => Self::Fresh,
56            0x02 => Self::Deleted,
57            _ => Self::Stale, // unknown = treat as stale
58        }
59    }
60}
61
62/// Parsed contents of the fixed 256-byte index header.
63///
64/// All integer fields are stored little-endian in the file.
65/// Offsets are absolute byte offsets from the start of the file.
66#[derive(Debug, Clone)]
67pub struct Header {
68    /// Format major version (must equal [`VERSION_MAJOR`]).
69    pub version_major: u16,
70    /// Format minor version (must be ≥ [`VERSION_MINOR`]).
71    pub version_minor: u16,
72    /// Bit-field of feature flags (see [`flags`]).
73    pub flags: u64,
74    /// Unix timestamp (seconds) when the index was created.
75    pub created_at: u64,
76    /// Sum of byte-sizes of all source files when indexed.
77    pub source_bytes_total: u64,
78    /// Number of file entries in the file table.
79    pub file_count: u32,
80    /// Number of trigram entries in the trigram table.
81    pub trigram_count: u32,
82    /// Byte offset to the file table section.
83    pub file_table_offset: u64,
84    /// Byte length of the file table section.
85    pub file_table_size: u64,
86    /// Byte offset to the trigram lookup table.
87    pub trigram_table_offset: u64,
88    /// Byte length of the trigram lookup table.
89    pub trigram_table_size: u64,
90    /// Byte offset to the posting-list data blob.
91    pub posting_data_offset: u64,
92    /// Byte length of the posting-list data blob.
93    pub posting_data_size: u64,
94    /// Byte offset to the bloom-filter section (0 if absent).
95    pub bloom_offset: u64,
96    /// Byte length of the bloom-filter section (0 if absent).
97    pub bloom_size: u64,
98    /// Byte offset to the string pool section.
99    pub string_pool_offset: u64,
100    /// Byte length of the string pool section.
101    pub string_pool_size: u64,
102    /// Byte offset to the file-name index section (0 if absent).
103    pub name_index_offset: u64,
104    /// Byte length of the file-name index section (0 if absent).
105    pub name_index_size: u64,
106}
107
108impl Header {
109    /// Parse header from the first 256 bytes of an index file.
110    ///
111    /// # Errors
112    ///
113    /// Returns an error if the data is too small, has a bad magic number,
114    /// unsupported version, or corrupted CRC.
115    pub fn parse(data: &[u8]) -> crate::error::Result<Self> {
116        if data.len() < HEADER_SIZE {
117            return Err(crate::error::Error::IndexTooSmall);
118        }
119        if data.get(0..4).ok_or(crate::error::Error::IndexTooSmall)? != MAGIC {
120            return Err(crate::error::Error::BadMagic);
121        }
122
123        let r = |off: usize| -> u64 {
124            data.get(off..off + 8)
125                .and_then(|s| s.try_into().ok())
126                .map_or(0, u64::from_le_bytes)
127        };
128        let r16 = |off: usize| -> u16 {
129            data.get(off..off + 2)
130                .and_then(|s| s.try_into().ok())
131                .map_or(0, u16::from_le_bytes)
132        };
133        let r32 = |off: usize| -> u32 {
134            data.get(off..off + 4)
135                .and_then(|s| s.try_into().ok())
136                .map_or(0, u32::from_le_bytes)
137        };
138
139        let major = r16(0x04);
140        let minor = r16(0x06);
141        if major != VERSION_MAJOR || minor < VERSION_MINOR {
142            return Err(crate::error::Error::UnsupportedVersion { major, minor });
143        }
144
145        // Validate CRC32C of header (bytes 0x00..0xF8)
146        let expected_crc = r32(0xF8);
147        let actual_crc = crc32c::crc32c(data.get(0..0xF8).ok_or(crate::error::Error::IndexTooSmall)?);
148        if expected_crc != actual_crc {
149            return Err(crate::error::Error::HeaderCorrupted {
150                expected: expected_crc,
151                actual: actual_crc,
152            });
153        }
154
155        Ok(Self {
156            version_major: major,
157            version_minor: minor,
158            flags: r(0x08),
159            created_at: r(0x10),
160            source_bytes_total: r(0x18),
161            file_count: r32(0x20),
162            trigram_count: r32(0x24),
163            file_table_offset: r(0x28),
164            file_table_size: r(0x30),
165            trigram_table_offset: r(0x38),
166            trigram_table_size: r(0x40),
167            posting_data_offset: r(0x48),
168            posting_data_size: r(0x50),
169            bloom_offset: r(0x58),
170            bloom_size: r(0x60),
171            string_pool_offset: r(0x68),
172            string_pool_size: r(0x70),
173            name_index_offset: r(0x78),
174            name_index_size: r(0x80),
175        })
176    }
177
178    /// Validate all section offsets fit within the file.
179    ///
180    /// # Errors
181    ///
182    /// Returns an error if any section extends beyond the file length.
183    pub fn validate_bounds(&self, file_len: u64) -> crate::error::Result<()> {
184        let check = |name: &'static str, off: u64, sz: u64| -> crate::error::Result<()> {
185            if off + sz > file_len {
186                Err(crate::error::Error::SectionOutOfBounds {
187                    section: name,
188                    offset: off,
189                    size: sz,
190                    file_len,
191                })
192            } else {
193                Ok(())
194            }
195        };
196        check("file_table", self.file_table_offset, self.file_table_size)?;
197        check(
198            "trigram_table",
199            self.trigram_table_offset,
200            self.trigram_table_size,
201        )?;
202        check(
203            "posting_data",
204            self.posting_data_offset,
205            self.posting_data_size,
206        )?;
207        if self.bloom_size > 0 {
208            check("bloom", self.bloom_offset, self.bloom_size)?;
209        }
210        check(
211            "string_pool",
212            self.string_pool_offset,
213            self.string_pool_size,
214        )?;
215        if self.name_index_size > 0 {
216            check("name_index", self.name_index_offset, self.name_index_size)?;
217        }
218        Ok(())
219    }
220
221    /// Returns `true` when the index includes per-trigram bloom filters.
222    #[must_use] 
223    pub const fn has_bloom(&self) -> bool {
224        self.flags & flags::HAS_BLOOM_FILTERS != 0
225    }
226}
227
228use serde::{Deserialize, Serialize};
229use std::path::{Path, PathBuf};
230use std::time::{SystemTime, UNIX_EPOCH};
231
232/// A heartbeat file written by the `ixd` daemon so other processes can
233/// detect a running watcher and query its status.
234#[derive(Debug, Serialize, Deserialize, Clone)]
235pub struct Beacon {
236    /// PID of the `ixd` daemon process.
237    pub pid: i32,
238    /// Canonical root directory being watched.
239    pub root: PathBuf,
240    /// Unix timestamp (seconds) when the daemon started.
241    pub start_time: u64,
242    /// Human-readable status (e.g. `"idle"`, `"indexing"`).
243    pub status: String,
244    /// Unix timestamp (seconds) of the last filesystem event.
245    pub last_event_at: u64,
246}
247
248impl Beacon {
249    /// Create a new beacon for the current process, anchored at the given root.
250    #[must_use] 
251    pub fn new(root: &Path) -> Self {
252        let pid = i32::try_from(std::process::id()).unwrap_or(0);
253        let now = SystemTime::now()
254            .duration_since(UNIX_EPOCH)
255            .unwrap_or_default()
256            .as_secs();
257
258        Self {
259            pid,
260            root: root.to_path_buf(),
261            start_time: now,
262            status: "idle".to_string(),
263            last_event_at: now,
264        }
265    }
266
267    /// Check whether the daemon described by this beacon is still running.
268    ///
269    /// Verifies the recorded PID still exists, belongs to an `ixd` binary,
270    /// and the watched root directory is still accessible.
271    #[must_use] 
272    pub fn is_live(&self) -> bool {
273        use nix::sys::signal::kill;
274        use nix::unistd::Pid;
275
276        if kill(Pid::from_raw(self.pid), None).is_err() {
277            return false;
278        }
279
280        let comm_path = format!("/proc/{}/comm", self.pid);
281        if let Ok(comm) = std::fs::read_to_string(&comm_path) {
282            let comm = comm.trim();
283            if comm != "ixd" {
284                return false;
285            }
286        } else {
287            return false;
288        }
289
290        self.root.exists()
291    }
292
293    /// Write the beacon to `beacon.json` in the given folder.
294    ///
295    /// # Errors
296    ///
297    /// Returns an error if the file cannot be created or serialization fails.
298    pub fn write_to(&self, folder: &Path) -> crate::error::Result<()> {
299        let path = folder.join("beacon.json");
300        let f = std::fs::File::create(path)?;
301        serde_json::to_writer_pretty(f, self).map_err(std::io::Error::other)?;
302        Ok(())
303    }
304
305    /// Read a beacon from `beacon.json` in the given folder.
306    ///
307    /// # Errors
308    ///
309    /// Returns an error if the file cannot be opened or deserialization fails.
310    pub fn read_from(folder: &Path) -> crate::error::Result<Self> {
311        let path = folder.join("beacon.json");
312        let f = std::fs::File::open(path)?;
313        let beacon = serde_json::from_reader(f).map_err(std::io::Error::other)?;
314        Ok(beacon)
315    }
316}
317
318/// Centralized binary file detection.
319///
320/// Uses a heuristic based on the ratio of non-printable characters in the first 512 bytes.
321#[must_use]
322#[allow(clippy::cast_precision_loss, clippy::as_conversions)]
323pub fn is_binary(data: &[u8]) -> bool {
324    if data.is_empty() {
325        return false;
326    }
327    let check_len = data.len().min(512);
328    let non_printable = data.get(..check_len).unwrap_or(&[])
329        .iter()
330        .filter(|&&b| !matches!(b, 0x09 | 0x0A | 0x0D | 0x20..=0x7E))
331        .count();
332
333    (non_printable as f32 / check_len as f32) > 0.3
334}