Skip to main content

nxs/
decoder.rs

1/// Minimal .nxb decoder — reads the preamble and walks the root object,
2/// returning a flat list of (key_index, value_bytes) for inspection.
3use crate::error::{NxsError, Result};
4
5const MAGIC_FILE: u32 = 0x4E585342;
6const MAGIC_OBJ: u32 = 0x4E58534F;
7const MAGIC_LIST: u32 = 0x4E58534C;
8const MAGIC_FOOTER: u32 = 0x2153584E;
9
10// Sigil bytes
11const SIGIL_INT: u8 = b'='; // 0x3D
12const SIGIL_FLOAT: u8 = b'~'; // 0x7E
13const SIGIL_BOOL: u8 = b'?'; // 0x3F
14const SIGIL_STR: u8 = b'"'; // 0x22
15const SIGIL_TIME: u8 = b'@'; // 0x40
16const SIGIL_BINARY: u8 = b'<'; // 0x3C
17const SIGIL_LINK: u8 = b'&'; // 0x26
18const SIGIL_NULL: u8 = b'^'; // 0x5E
19
20pub struct DecodedFile {
21    pub version: u16,
22    pub flags: u16,
23    pub dict_hash: u64,
24    pub tail_ptr: u64,
25    pub keys: Vec<String>,
26    pub key_sigils: Vec<u8>,
27    pub root_fields: Vec<(String, DecodedValue)>,
28    pub record_count: usize,
29    pub tail_start: usize,
30    pub data_sector_start: usize,
31}
32
33#[derive(Debug, Clone, PartialEq)]
34pub enum DecodedValue {
35    Int(i64),
36    Float(f64),
37    Bool(bool),
38    Str(String),
39    Time(i64),
40    Binary(Vec<u8>),
41    Null,
42    List(Vec<DecodedValue>),
43    Object(Vec<(String, DecodedValue)>),
44    Raw(Vec<u8>),
45}
46
47fn murmur3_64(data: &[u8]) -> u64 {
48    let mut h: u64 = 0x9368_1D62_5531_3A99;
49    for chunk in data.chunks(8) {
50        let mut k = 0u64;
51        for (i, &b) in chunk.iter().enumerate() {
52            k |= (b as u64) << (i * 8);
53        }
54        k = k.wrapping_mul(0xFF51AFD7ED558CCD);
55        k ^= k >> 33;
56        h ^= k;
57        h = h.wrapping_mul(0xC4CEB9FE1A85EC53);
58        h ^= h >> 33;
59    }
60    h ^= data.len() as u64;
61    h ^= h >> 33;
62    h = h.wrapping_mul(0xFF51AFD7ED558CCD);
63    h ^= h >> 33;
64    h
65}
66
67pub fn decode(data: &[u8]) -> Result<DecodedFile> {
68    if data.len() < 32 {
69        return Err(NxsError::OutOfBounds);
70    }
71
72    let magic = u32::from_le_bytes(data[0..4].try_into().map_err(|_| NxsError::OutOfBounds)?);
73    if magic != MAGIC_FILE {
74        return Err(NxsError::BadMagic);
75    }
76
77    let footer_magic = u32::from_le_bytes(
78        data[data.len() - 4..]
79            .try_into()
80            .map_err(|_| NxsError::OutOfBounds)?,
81    );
82    if footer_magic != MAGIC_FOOTER {
83        return Err(NxsError::BadMagic);
84    }
85
86    let version = u16::from_le_bytes(data[4..6].try_into().map_err(|_| NxsError::OutOfBounds)?);
87    let flags = u16::from_le_bytes(data[6..8].try_into().map_err(|_| NxsError::OutOfBounds)?);
88    let dict_hash = u64::from_le_bytes(data[8..16].try_into().map_err(|_| NxsError::OutOfBounds)?);
89    let tail_ptr = u64::from_le_bytes(data[16..24].try_into().map_err(|_| NxsError::OutOfBounds)?);
90
91    let schema_embedded = flags & 0x0002 != 0;
92    let mut pos = 32usize;
93    let mut keys: Vec<String> = Vec::new();
94    let mut key_sigils: Vec<u8> = Vec::new();
95
96    if schema_embedded && pos + 2 <= data.len() {
97        let schema_start = pos;
98        let key_count = u16::from_le_bytes(
99            data[pos..pos + 2]
100                .try_into()
101                .map_err(|_| NxsError::OutOfBounds)?,
102        ) as usize;
103        if key_count > 256 {
104            return Err(NxsError::OutOfBounds); // spec max is 256 keys
105        }
106        pos += 2;
107        // TypeManifest
108        let end = pos.checked_add(key_count).ok_or(NxsError::OutOfBounds)?;
109        if end > data.len() {
110            return Err(NxsError::OutOfBounds);
111        }
112        key_sigils = data[pos..end].to_vec();
113        pos = end;
114        // StringPool — cap key name length to prevent OOM
115        for _ in 0..key_count {
116            let start = pos;
117            while pos < data.len() && data[pos] != 0 {
118                if pos - start > 256 {
119                    return Err(NxsError::OutOfBounds);
120                }
121                pos += 1;
122            }
123            let name = String::from_utf8_lossy(&data[start..pos]).to_string();
124            keys.push(name);
125            if pos >= data.len() {
126                return Err(NxsError::OutOfBounds);
127            }
128            pos += 1; // skip null terminator
129        }
130        // align to 8 — guard against pos already past end
131        if pos > data.len() {
132            return Err(NxsError::OutOfBounds);
133        }
134        while pos % 8 != 0 {
135            pos += 1;
136        }
137        if pos > data.len() {
138            return Err(NxsError::OutOfBounds);
139        }
140        let schema_end = pos;
141
142        // Validate DictHash
143        let computed = murmur3_64(&data[schema_start..schema_end]);
144        if computed != dict_hash {
145            return Err(NxsError::DictMismatch);
146        }
147    }
148
149    let data_sector_start = pos;
150
151    // Decode root object (first record)
152    let root_fields = if pos < data.len() {
153        decode_object(data, pos, &keys, &key_sigils).unwrap_or_default()
154    } else {
155        Vec::new()
156    };
157
158    // Read tail-index for record count — guard against overflow from large tail_ptr values.
159    let tail_offset = if tail_ptr as usize as u64 == tail_ptr {
160        tail_ptr as usize
161    } else {
162        return Err(NxsError::OutOfBounds);
163    };
164    let record_count = if tail_offset.saturating_add(4) <= data.len() {
165        u32::from_le_bytes(
166            data[tail_offset..tail_offset + 4]
167                .try_into()
168                .map_err(|_| NxsError::OutOfBounds)?,
169        ) as usize
170    } else {
171        0
172    };
173    let tail_start = tail_offset.saturating_add(4);
174
175    Ok(DecodedFile {
176        version,
177        flags,
178        dict_hash,
179        tail_ptr,
180        keys,
181        key_sigils,
182        root_fields,
183        record_count,
184        tail_start,
185        data_sector_start,
186    })
187}
188
189/// Decode a single record at the given absolute offset.
190pub fn decode_record_at(
191    data: &[u8],
192    offset: usize,
193    keys: &[String],
194    sigils: &[u8],
195) -> Result<Vec<(String, DecodedValue)>> {
196    decode_object(data, offset, keys, sigils)
197}
198
199fn decode_object(
200    data: &[u8],
201    offset: usize,
202    keys: &[String],
203    sigils: &[u8],
204) -> Result<Vec<(String, DecodedValue)>> {
205    let mut pos = offset;
206
207    if pos + 8 > data.len() {
208        return Err(NxsError::OutOfBounds);
209    }
210    let magic = u32::from_le_bytes(
211        data[pos..pos + 4]
212            .try_into()
213            .map_err(|_| NxsError::OutOfBounds)?,
214    );
215    if magic != MAGIC_OBJ {
216        return Err(NxsError::BadMagic);
217    }
218    pos += 4;
219
220    let _obj_len = u32::from_le_bytes(
221        data[pos..pos + 4]
222            .try_into()
223            .map_err(|_| NxsError::OutOfBounds)?,
224    ) as usize;
225    pos += 4;
226
227    // Read LEB128 bitmask — cap at 512 bits (74 bytes) to prevent OOM
228    let mut present_bits: Vec<bool> = Vec::new();
229    loop {
230        if pos >= data.len() {
231            return Err(NxsError::OutOfBounds);
232        }
233        if present_bits.len() >= 512 {
234            return Err(NxsError::OutOfBounds);
235        }
236        let byte = data[pos];
237        pos += 1;
238        for bit in 0..7 {
239            present_bits.push((byte >> bit) & 1 == 1);
240        }
241        if byte & 0x80 == 0 {
242            break;
243        }
244    }
245
246    // Count present fields — cap to prevent OOM from malformed inputs
247    let present_count = present_bits.iter().filter(|&&b| b).count();
248    if present_count > 512 {
249        return Err(NxsError::OutOfBounds);
250    }
251
252    // Read offset table (u16 each)
253    let mut offsets: Vec<usize> = Vec::new();
254    for _ in 0..present_count {
255        if pos + 2 > data.len() {
256            return Err(NxsError::OutOfBounds);
257        }
258        let off = u16::from_le_bytes(
259            data[pos..pos + 2]
260                .try_into()
261                .map_err(|_| NxsError::OutOfBounds)?,
262        ) as usize;
263        offsets.push(offset + off);
264        pos += 2;
265    }
266
267    // Map each present bit to its key and decode its value using sigil type info
268    let mut fields = Vec::new();
269    let mut offset_idx = 0;
270    for (bit_idx, &present) in present_bits.iter().enumerate() {
271        if !present {
272            continue;
273        }
274        let key_name = keys
275            .get(bit_idx)
276            .cloned()
277            .unwrap_or_else(|| format!("key_{bit_idx}"));
278        let sigil = sigils.get(bit_idx).copied().unwrap_or(0);
279        let val_offset = offsets[offset_idx];
280        offset_idx += 1;
281
282        let value = decode_value_at(data, val_offset, sigil, keys, sigils)?;
283        fields.push((key_name, value));
284    }
285
286    Ok(fields)
287}
288
289fn decode_value_at(
290    data: &[u8],
291    offset: usize,
292    sigil: u8,
293    keys: &[String],
294    sigils: &[u8],
295) -> Result<DecodedValue> {
296    let _ = (keys, sigils); // used by recursive calls on nested objects
297    if offset >= data.len() {
298        return Err(NxsError::OutOfBounds);
299    }
300
301    // Check for nested object or list magic first
302    if offset + 4 <= data.len() {
303        let maybe_magic = u32::from_le_bytes(
304            data[offset..offset + 4]
305                .try_into()
306                .map_err(|_| NxsError::OutOfBounds)?,
307        );
308        if maybe_magic == MAGIC_OBJ {
309            // Nested objects in the compiler path use a locally-scoped key schema,
310            // not the global one. Return Raw to avoid crashing with the wrong schema.
311            return Ok(DecodedValue::Raw(
312                data[offset..offset + 8.min(data.len() - offset)].to_vec(),
313            ));
314        }
315        if maybe_magic == MAGIC_LIST {
316            return decode_list(data, offset);
317        }
318    }
319
320    // Null sigil
321    if sigil == SIGIL_NULL {
322        return Ok(DecodedValue::Null);
323    }
324
325    // Use sigil to decode the correct type
326    match sigil {
327        SIGIL_INT => {
328            if offset + 8 > data.len() {
329                return Err(NxsError::OutOfBounds);
330            }
331            let v = i64::from_le_bytes(
332                data[offset..offset + 8]
333                    .try_into()
334                    .map_err(|_| NxsError::OutOfBounds)?,
335            );
336            Ok(DecodedValue::Int(v))
337        }
338        SIGIL_FLOAT => {
339            if offset + 8 > data.len() {
340                return Err(NxsError::OutOfBounds);
341            }
342            let bits = u64::from_le_bytes(
343                data[offset..offset + 8]
344                    .try_into()
345                    .map_err(|_| NxsError::OutOfBounds)?,
346            );
347            Ok(DecodedValue::Float(f64::from_bits(bits)))
348        }
349        SIGIL_BOOL => Ok(DecodedValue::Bool(data[offset] != 0)),
350        SIGIL_STR => {
351            if offset + 4 > data.len() {
352                return Err(NxsError::OutOfBounds);
353            }
354            let len = u32::from_le_bytes(
355                data[offset..offset + 4]
356                    .try_into()
357                    .map_err(|_| NxsError::OutOfBounds)?,
358            ) as usize;
359            // Guard against garbage lengths (compiler uses SIGIL_STR generically)
360            if len > 1024 * 1024 || offset + 4 + len > data.len() {
361                // Treat as raw i64 — the field is not a string despite the sigil
362                if offset + 8 <= data.len() {
363                    let v = i64::from_le_bytes(
364                        data[offset..offset + 8]
365                            .try_into()
366                            .map_err(|_| NxsError::OutOfBounds)?,
367                    );
368                    return Ok(DecodedValue::Int(v));
369                }
370                return Ok(DecodedValue::Raw(data[offset..].to_vec()));
371            }
372            let s = String::from_utf8_lossy(&data[offset + 4..offset + 4 + len]).to_string();
373            Ok(DecodedValue::Str(s))
374        }
375        SIGIL_TIME => {
376            if offset + 8 > data.len() {
377                return Err(NxsError::OutOfBounds);
378            }
379            let v = i64::from_le_bytes(
380                data[offset..offset + 8]
381                    .try_into()
382                    .map_err(|_| NxsError::OutOfBounds)?,
383            );
384            Ok(DecodedValue::Time(v))
385        }
386        SIGIL_BINARY => {
387            if offset + 4 > data.len() {
388                return Err(NxsError::OutOfBounds);
389            }
390            let len = u32::from_le_bytes(
391                data[offset..offset + 4]
392                    .try_into()
393                    .map_err(|_| NxsError::OutOfBounds)?,
394            ) as usize;
395            if offset + 4 + len > data.len() {
396                return Err(NxsError::OutOfBounds);
397            }
398            Ok(DecodedValue::Binary(
399                data[offset + 4..offset + 4 + len].to_vec(),
400            ))
401        }
402        _ => {
403            // Unknown sigil — return raw i64 as best-effort
404            if offset + 8 <= data.len() {
405                let v = i64::from_le_bytes(
406                    data[offset..offset + 8]
407                        .try_into()
408                        .map_err(|_| NxsError::OutOfBounds)?,
409                );
410                Ok(DecodedValue::Int(v))
411            } else {
412                Ok(DecodedValue::Raw(data[offset..].to_vec()))
413            }
414        }
415    }
416}
417
418fn decode_list(data: &[u8], offset: usize) -> Result<DecodedValue> {
419    if offset + 16 > data.len() {
420        return Err(NxsError::OutOfBounds);
421    }
422    let magic = u32::from_le_bytes(
423        data[offset..offset + 4]
424            .try_into()
425            .map_err(|_| NxsError::OutOfBounds)?,
426    );
427    if magic != MAGIC_LIST {
428        return Err(NxsError::BadMagic);
429    }
430    let elem_sigil = data[offset + 8];
431    let elem_count = u32::from_le_bytes(
432        data[offset + 9..offset + 13]
433            .try_into()
434            .map_err(|_| NxsError::OutOfBounds)?,
435    ) as usize;
436    let data_start = offset + 16;
437    // Reject impossible counts before allocating — elem slots are 8 bytes each.
438    let max_elems = (data.len().saturating_sub(data_start)) / 8;
439    if elem_count > max_elems {
440        return Err(NxsError::OutOfBounds);
441    }
442    let mut items = Vec::with_capacity(elem_count);
443    for i in 0..elem_count {
444        let elem_off = data_start + i * 8;
445        if elem_off + 8 > data.len() {
446            return Err(NxsError::OutOfBounds);
447        }
448        let v = match elem_sigil {
449            SIGIL_INT => {
450                let v = i64::from_le_bytes(
451                    data[elem_off..elem_off + 8]
452                        .try_into()
453                        .map_err(|_| NxsError::OutOfBounds)?,
454                );
455                DecodedValue::Int(v)
456            }
457            SIGIL_FLOAT => {
458                let bits = u64::from_le_bytes(
459                    data[elem_off..elem_off + 8]
460                        .try_into()
461                        .map_err(|_| NxsError::OutOfBounds)?,
462                );
463                DecodedValue::Float(f64::from_bits(bits))
464            }
465            _ => DecodedValue::Raw(data[elem_off..elem_off + 8].to_vec()),
466        };
467        items.push(v);
468    }
469    Ok(DecodedValue::List(items))
470}