Skip to main content

ix/
format.rs

1//! Index file format constants and header parsing.
2//!
3//! All integers little-endian. All offsets absolute from file start.
4//! Sections aligned to 8-byte boundaries.
5
6/// Magic bytes: ASCII "IX01"
7pub const MAGIC: [u8; 4] = [0x49, 0x58, 0x30, 0x31];
8
9/// Current format version
10pub const VERSION_MAJOR: u16 = 1;
11pub const VERSION_MINOR: u16 = 1;
12
13/// Fixed header size (256 bytes)
14pub const HEADER_SIZE: usize = 256;
15
16/// Trigram entry size in bytes (u32 key + 16-byte existing entry)
17pub const TRIGRAM_ENTRY_SIZE: usize = 20;
18
19/// File entry size in bytes
20pub const FILE_ENTRY_SIZE: usize = 48;
21
22/// Header flags
23pub mod flags {
24    pub const HAS_BLOOM_FILTERS: u64 = 1 << 0;
25    pub const HAS_CONTENT_HASHES: u64 = 1 << 1;
26    pub const POSTING_LISTS_COMPRESSED: u64 = 1 << 2;
27    pub const POSTING_LISTS_CHECKSUMMED: u64 = 1 << 3;
28}
29
30/// File status in the file table
31#[repr(u8)]
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum FileStatus {
34    Fresh = 0x00,
35    Stale = 0x01,
36    Deleted = 0x02,
37}
38
39impl FileStatus {
40    pub fn from_u8(v: u8) -> Self {
41        match v {
42            0x00 => Self::Fresh,
43            0x01 => Self::Stale,
44            0x02 => Self::Deleted,
45            _ => Self::Stale, // unknown = treat as stale
46        }
47    }
48}
49
50/// Parsed index header.
51#[derive(Debug, Clone)]
52pub struct Header {
53    pub version_major: u16,
54    pub version_minor: u16,
55    pub flags: u64,
56    pub created_at: u64,
57    pub source_bytes_total: u64,
58    pub file_count: u32,
59    pub trigram_count: u32,
60    pub file_table_offset: u64,
61    pub file_table_size: u64,
62    pub trigram_table_offset: u64,
63    pub trigram_table_size: u64,
64    pub posting_data_offset: u64,
65    pub posting_data_size: u64,
66    pub bloom_offset: u64,
67    pub bloom_size: u64,
68    pub string_pool_offset: u64,
69    pub string_pool_size: u64,
70    pub name_index_offset: u64,
71    pub name_index_size: u64,
72}
73
74impl Header {
75    /// Parse header from the first 256 bytes of an index file.
76    pub fn parse(data: &[u8]) -> crate::error::Result<Self> {
77        if data.len() < HEADER_SIZE {
78            return Err(crate::error::Error::IndexTooSmall);
79        }
80        if data[0..4] != MAGIC {
81            return Err(crate::error::Error::BadMagic);
82        }
83
84        let r = |off: usize| -> u64 {
85            data.get(off..off + 8)
86                .and_then(|s| s.try_into().ok())
87                .map(u64::from_le_bytes)
88                .unwrap_or(0)
89        };
90        let r16 = |off: usize| -> u16 {
91            data.get(off..off + 2)
92                .and_then(|s| s.try_into().ok())
93                .map(u16::from_le_bytes)
94                .unwrap_or(0)
95        };
96        let r32 = |off: usize| -> u32 {
97            data.get(off..off + 4)
98                .and_then(|s| s.try_into().ok())
99                .map(u32::from_le_bytes)
100                .unwrap_or(0)
101        };
102
103        let major = r16(0x04);
104        let minor = r16(0x06);
105        if major != VERSION_MAJOR || minor < VERSION_MINOR {
106            return Err(crate::error::Error::UnsupportedVersion { major, minor });
107        }
108
109        // Validate CRC32C of header (bytes 0x00..0xF8)
110        let expected_crc = r32(0xF8);
111        let actual_crc = crc32c::crc32c(&data[0..0xF8]);
112        if expected_crc != actual_crc {
113            return Err(crate::error::Error::HeaderCorrupted {
114                expected: expected_crc,
115                actual: actual_crc,
116            });
117        }
118
119        Ok(Header {
120            version_major: major,
121            version_minor: minor,
122            flags: r(0x08),
123            created_at: r(0x10),
124            source_bytes_total: r(0x18),
125            file_count: r32(0x20),
126            trigram_count: r32(0x24),
127            file_table_offset: r(0x28),
128            file_table_size: r(0x30),
129            trigram_table_offset: r(0x38),
130            trigram_table_size: r(0x40),
131            posting_data_offset: r(0x48),
132            posting_data_size: r(0x50),
133            bloom_offset: r(0x58),
134            bloom_size: r(0x60),
135            string_pool_offset: r(0x68),
136            string_pool_size: r(0x70),
137            name_index_offset: r(0x78),
138            name_index_size: r(0x80),
139        })
140    }
141
142    /// Validate all section offsets fit within the file.
143    pub fn validate_bounds(&self, file_len: u64) -> crate::error::Result<()> {
144        let check = |name: &'static str, off: u64, sz: u64| -> crate::error::Result<()> {
145            if off + sz > file_len {
146                Err(crate::error::Error::SectionOutOfBounds {
147                    section: name,
148                    offset: off,
149                    size: sz,
150                    file_len,
151                })
152            } else {
153                Ok(())
154            }
155        };
156        check("file_table", self.file_table_offset, self.file_table_size)?;
157        check(
158            "trigram_table",
159            self.trigram_table_offset,
160            self.trigram_table_size,
161        )?;
162        check(
163            "posting_data",
164            self.posting_data_offset,
165            self.posting_data_size,
166        )?;
167        if self.bloom_size > 0 {
168            check("bloom", self.bloom_offset, self.bloom_size)?;
169        }
170        check(
171            "string_pool",
172            self.string_pool_offset,
173            self.string_pool_size,
174        )?;
175        if self.name_index_size > 0 {
176            check("name_index", self.name_index_offset, self.name_index_size)?;
177        }
178        Ok(())
179    }
180
181    pub fn has_bloom(&self) -> bool {
182        self.flags & flags::HAS_BLOOM_FILTERS != 0
183    }
184}
185
186use serde::{Deserialize, Serialize};
187use std::path::{Path, PathBuf};
188use std::time::{SystemTime, UNIX_EPOCH};
189
190#[derive(Debug, Serialize, Deserialize, Clone)]
191pub struct Beacon {
192    pub pid: i32,
193    pub root: PathBuf,
194    pub start_time: u64,
195    pub status: String,
196    pub last_event_at: u64,
197}
198
199impl Beacon {
200    pub fn new(root: &Path) -> Self {
201        let pid = std::process::id() as i32;
202        let now = SystemTime::now()
203            .duration_since(UNIX_EPOCH)
204            .unwrap_or_default()
205            .as_secs();
206
207        Self {
208            pid,
209            root: root.to_path_buf(),
210            start_time: now,
211            status: "idle".to_string(),
212            last_event_at: now,
213        }
214    }
215
216    pub fn is_live(&self) -> bool {
217        use nix::sys::signal::kill;
218        use nix::unistd::Pid;
219
220        // Check if process exists
221        if kill(Pid::from_raw(self.pid), None).is_err() {
222            return false;
223        }
224
225        // Existence + matching root is a strong heuristic
226        self.root.exists()
227    }
228
229    pub fn write_to(&self, folder: &Path) -> crate::error::Result<()> {
230        let path = folder.join("beacon.json");
231        let f = std::fs::File::create(path)?;
232        serde_json::to_writer_pretty(f, self).map_err(std::io::Error::other)?;
233        Ok(())
234    }
235
236    pub fn read_from(folder: &Path) -> crate::error::Result<Self> {
237        let path = folder.join("beacon.json");
238        let f = std::fs::File::open(path)?;
239        let beacon = serde_json::from_reader(f).map_err(std::io::Error::other)?;
240        Ok(beacon)
241    }
242}
243
244/// Centralized binary file detection.
245///
246/// Uses a heuristic based on the ratio of non-printable characters in the first 512 bytes.
247pub fn is_binary(data: &[u8]) -> bool {
248    if data.is_empty() {
249        return false;
250    }
251    let check_len = data.len().min(512);
252    let non_printable = data[..check_len]
253        .iter()
254        .filter(|&&b| !matches!(b, 0x09 | 0x0A | 0x0D | 0x20..=0x7E))
255        .count();
256
257    (non_printable as f32 / check_len as f32) > 0.3
258}