1use crate::consts::{
2 FLAG_SCHEMA_EMBEDDED, MAGIC_FILE, MAGIC_FOOTER, MAGIC_LIST, MAGIC_OBJ, SIGIL_BINARY,
3 SIGIL_BOOL, SIGIL_FLOAT, SIGIL_INT, SIGIL_KEYWORD, SIGIL_LINK, SIGIL_NULL, SIGIL_STR,
4 SIGIL_TIME, VERSION,
5};
6use crate::error::{NxsError, Result};
7use crate::parser::{Field, Value};
8use std::collections::HashMap;
9
10pub struct Compiler {
11 dict: Vec<String>, key_map: HashMap<String, usize>, slot_sigils: Vec<u8>,
15}
16
17impl Compiler {
18 pub fn new() -> Self {
19 Compiler {
20 dict: Vec::new(),
21 key_map: HashMap::new(),
22 slot_sigils: Vec::new(),
23 }
24 }
25
26 pub fn collect_keys(&mut self, fields: &[Field]) {
28 for field in fields {
29 self.intern_key(&field.key);
30 self.collect_keys_from_value(&field.value);
31 }
32 }
33
34 fn collect_keys_from_value(&mut self, value: &Value) {
35 match value {
36 Value::Object(fields) => {
37 for field in fields {
38 self.intern_key(&field.key);
39 self.collect_keys_from_value(&field.value);
40 }
41 }
42 Value::List(elems) => {
43 for e in elems {
44 self.collect_keys_from_value(e);
45 }
46 }
47 _ => {}
48 }
49 }
50
51 fn intern_key(&mut self, key: &str) -> usize {
52 if let Some(&idx) = self.key_map.get(key) {
53 return idx;
54 }
55 let idx = self.dict.len();
56 self.dict.push(key.to_string());
57 self.slot_sigils.push(0);
58 self.key_map.insert(key.to_string(), idx);
59 idx
60 }
61
62 fn mark_slot_sigil(&mut self, slot: usize, sigil: u8) {
63 if slot >= self.slot_sigils.len() {
64 return;
65 }
66 let cur = self.slot_sigils[slot];
67 if cur == 0 || (cur == SIGIL_NULL && sigil != SIGIL_NULL) {
68 self.slot_sigils[slot] = sigil;
69 }
70 }
71
72 pub fn compile(&mut self, fields: &[Field]) -> Result<Vec<u8>> {
73 self.collect_keys(fields);
74
75 let mut data_sector: Vec<u8> = Vec::new();
76 let root_bytes = self.encode_object(fields)?;
78 data_sector.extend_from_slice(&root_bytes);
79
80 let schema_bytes = self.encode_schema();
81 let tail_ptr: u64 = 32 + schema_bytes.len() as u64 + data_sector.len() as u64;
82 let tail_index = self.encode_tail_index(32 + schema_bytes.len() as u64, tail_ptr);
83 let dict_hash = murmur3_64(&schema_bytes);
84
85 let preamble = self.encode_preamble(dict_hash, FLAG_SCHEMA_EMBEDDED);
86
87 let mut out = Vec::new();
88 out.extend_from_slice(&preamble);
89 out.extend_from_slice(&schema_bytes);
90 out.extend_from_slice(&data_sector);
91 out.extend_from_slice(&tail_index);
92 Ok(out)
93 }
94
95 fn encode_preamble(&self, dict_hash: u64, flags: u16) -> Vec<u8> {
96 let mut b = Vec::with_capacity(32);
97 b.extend_from_slice(&MAGIC_FILE.to_le_bytes()); b.extend_from_slice(&VERSION.to_le_bytes()); b.extend_from_slice(&flags.to_le_bytes()); b.extend_from_slice(&dict_hash.to_le_bytes()); b.extend_from_slice(&0u64.to_le_bytes()); b.extend_from_slice(&0u64.to_le_bytes()); b
106 }
107
108 fn encode_schema(&self) -> Vec<u8> {
109 let mut b = Vec::new();
110 let key_count = self.dict.len() as u16;
111 b.extend_from_slice(&key_count.to_le_bytes());
112
113 for (i, _) in self.dict.iter().enumerate() {
114 let s = self.slot_sigils.get(i).copied().unwrap_or(0);
115 b.push(if s == 0 { SIGIL_STR } else { s });
116 }
117
118 for key in &self.dict {
120 b.extend_from_slice(key.as_bytes());
121 b.push(0x00);
122 }
123
124 while b.len() % 8 != 0 {
126 b.push(0x00);
127 }
128 b
129 }
130
131 fn encode_object(&mut self, fields: &[Field]) -> Result<Vec<u8>> {
132 let resolved: Vec<(usize, Value)> = fields
134 .iter()
135 .map(|f| {
136 let idx = *self
137 .key_map
138 .get(&f.key)
139 .ok_or_else(|| NxsError::ParseError(format!("key not in dict: {}", f.key)))?;
140 let v = resolve_macro(&f.value, fields)?;
141 Ok((idx, v))
142 })
143 .collect::<Result<Vec<_>>>()?;
144
145 let mask = build_bitmask(
147 &resolved.iter().map(|(i, _)| *i).collect::<Vec<_>>(),
148 self.dict.len(),
149 );
150
151 let mut value_bufs: Vec<Vec<u8>> = Vec::new();
153 for (slot, v) in &resolved {
154 self.mark_slot_sigil(*slot, value_sigil_byte(v));
155 value_bufs.push(encode_value(v)?);
156 }
157
158 let header_size = 4 + 4; let bitmask_size = mask.len();
162 let offset_table_size = resolved.len() * 2; let data_start = header_size + bitmask_size + offset_table_size;
164
165 let data_start_aligned = align8(data_start);
167 let align_padding = data_start_aligned - data_start;
168
169 let mut offsets: Vec<u16> = Vec::new();
170 let mut cursor = data_start_aligned;
171 for buf in &value_bufs {
172 offsets.push(cursor as u16);
173 cursor += buf.len();
174 }
175
176 let total_len = cursor;
177
178 let mut obj = Vec::with_capacity(total_len);
179 obj.extend_from_slice(&MAGIC_OBJ.to_le_bytes());
180 obj.extend_from_slice(&(total_len as u32).to_le_bytes());
181 obj.extend_from_slice(&mask);
182 for off in &offsets {
183 obj.extend_from_slice(&off.to_le_bytes());
184 }
185 for _ in 0..align_padding {
186 obj.push(0x00);
187 }
188 for buf in &value_bufs {
189 obj.extend_from_slice(buf);
190 }
191 Ok(obj)
192 }
193
194 fn encode_tail_index(&self, data_sector_start: u64, tail_ptr: u64) -> Vec<u8> {
195 let mut b = Vec::new();
197 let entry_count: u32 = 1;
198 b.extend_from_slice(&entry_count.to_le_bytes());
199 b.extend_from_slice(&0u16.to_le_bytes());
201 b.extend_from_slice(&data_sector_start.to_le_bytes());
202 b.extend_from_slice(&tail_ptr.to_le_bytes());
203 b.extend_from_slice(&MAGIC_FOOTER.to_le_bytes());
204 b
205 }
206}
207
208fn encode_value(v: &Value) -> Result<Vec<u8>> {
211 match v {
212 Value::Int(n) => {
213 let mut b = Vec::with_capacity(8);
214 b.extend_from_slice(&n.to_le_bytes());
215 Ok(b)
216 }
217 Value::Float(f) => {
218 let mut b = Vec::with_capacity(8);
219 b.extend_from_slice(&f.to_le_bytes());
220 Ok(b)
221 }
222 Value::Bool(bl) => {
223 let mut b = vec![if *bl { 0x01u8 } else { 0x00u8 }];
224 b.extend_from_slice(&[0u8; 7]);
226 Ok(b)
227 }
228 Value::Keyword(_) => Err(NxsError::UnsupportedFieldType),
229 Value::Str(s) => {
230 let bytes = s.as_bytes();
231 let len = bytes.len() as u32;
232 let mut b = Vec::new();
233 b.extend_from_slice(&len.to_le_bytes());
234 b.extend_from_slice(bytes);
235 pad_to_8(&mut b);
236 Ok(b)
237 }
238 Value::Time(ns) => {
239 let mut b = Vec::with_capacity(8);
240 b.extend_from_slice(&ns.to_le_bytes());
241 Ok(b)
242 }
243 Value::Binary(raw) => {
244 let len = raw.len() as u32;
245 let mut b = Vec::new();
246 b.extend_from_slice(&len.to_le_bytes());
247 b.extend_from_slice(raw);
248 pad_to_8(&mut b);
249 Ok(b)
250 }
251 Value::Link(off) => {
252 let mut b = Vec::with_capacity(8);
253 b.extend_from_slice(&off.to_le_bytes());
254 b.extend_from_slice(&[0u8; 4]); Ok(b)
256 }
257 Value::Null => {
258 Ok(vec![])
263 }
264 Value::Object(fields) => {
265 let mut inner = Compiler::new();
268 inner.collect_keys(fields);
269 inner.dict = fields.iter().map(|f| f.key.clone()).collect();
271 inner.key_map = inner
272 .dict
273 .iter()
274 .cloned()
275 .enumerate()
276 .map(|(i, k)| (k, i))
277 .collect();
278 inner.encode_object(fields)
279 }
280 Value::List(elems) => encode_list(elems),
281 Value::Macro(_) => Err(NxsError::MacroUnresolved(
282 "unresolved macro in encode".into(),
283 )),
284 }
285}
286
287fn encode_list(elems: &[Value]) -> Result<Vec<u8>> {
288 if elems.is_empty() {
289 let mut b = Vec::new();
290 b.extend_from_slice(&MAGIC_LIST.to_le_bytes()); b.extend_from_slice(&16u32.to_le_bytes()); b.push(0x00); b.extend_from_slice(&0u32.to_le_bytes()); b.extend_from_slice(&[0u8; 3]); return Ok(b);
296 }
297
298 let sigil_byte = value_sigil_byte(elems.first().unwrap());
299
300 let mut elem_bufs: Vec<Vec<u8>> = elems
301 .iter()
302 .map(|e| {
303 if value_sigil_byte(e) != sigil_byte {
304 return Err(NxsError::ListTypeMismatch);
305 }
306 encode_value(e)
307 })
308 .collect::<Result<Vec<_>>>()?;
309
310 let data_len: usize = elem_bufs.iter().map(|b| b.len()).sum();
312 let total_len = 16 + data_len;
313
314 let mut b = Vec::with_capacity(total_len);
315 b.extend_from_slice(&MAGIC_LIST.to_le_bytes());
316 b.extend_from_slice(&(total_len as u32).to_le_bytes());
317 b.push(sigil_byte);
318 b.extend_from_slice(&(elems.len() as u32).to_le_bytes());
319 b.extend_from_slice(&[0u8; 3]); for buf in &mut elem_bufs {
321 b.append(buf);
322 }
323 Ok(b)
324}
325
326fn value_sigil_byte(v: &Value) -> u8 {
327 match v {
328 Value::Int(_) => SIGIL_INT,
329 Value::Float(_) => SIGIL_FLOAT,
330 Value::Bool(_) => SIGIL_BOOL,
331 Value::Keyword(_) => SIGIL_KEYWORD,
332 Value::Str(_) => SIGIL_STR,
333 Value::Time(_) => SIGIL_TIME,
334 Value::Binary(_) => SIGIL_BINARY,
335 Value::Link(_) => SIGIL_LINK,
336 Value::Null => SIGIL_NULL,
337 Value::Object(_) => b'O',
338 Value::List(_) => b'L',
339 Value::Macro(_) => b'!',
340 }
341}
342
343fn pad_to_8(b: &mut Vec<u8>) {
344 while b.len() % 8 != 0 {
345 b.push(0x00);
346 }
347}
348
349fn align8(n: usize) -> usize {
350 (n + 7) & !7
351}
352
353fn build_bitmask(present_indices: &[usize], total_keys: usize) -> Vec<u8> {
355 if total_keys == 0 {
356 return vec![0x00];
357 }
358 let mut bits = vec![false; total_keys];
359 for &idx in present_indices {
360 if idx < total_keys {
361 bits[idx] = true;
362 }
363 }
364 let mut result = Vec::new();
366 let mut i = 0;
367 while i < bits.len() {
368 let chunk: Vec<bool> = bits[i..bits.len().min(i + 7)].to_vec();
369 let mut byte: u8 = 0;
370 for (bit_pos, &set) in chunk.iter().enumerate() {
371 if set {
372 byte |= 1 << bit_pos;
373 }
374 }
375 let has_more = i + 7 < bits.len();
376 if has_more {
377 byte |= 0x80;
378 }
379 result.push(byte);
380 i += 7;
381 }
382 result
383}
384
385fn resolve_macro(value: &Value, scope: &[Field]) -> Result<Value> {
387 match value {
388 Value::Macro(expr) => eval_macro(expr, scope),
389 other => Ok(other.clone()),
390 }
391}
392
393fn eval_macro(expr: &str, scope: &[Field]) -> Result<Value> {
394 let expr = expr.trim();
395
396 if let Some(key) = expr.strip_prefix('@') {
398 return scope
399 .iter()
400 .find(|f| f.key == key)
401 .map(|f| f.value.clone())
402 .ok_or_else(|| NxsError::MacroUnresolved(format!("@{key} not found in scope")));
403 }
404
405 if expr == "now()" {
407 return Ok(Value::Time(0));
409 }
410
411 if expr.starts_with('"') && expr.ends_with('"') {
413 let inner = &expr[1..expr.len() - 1];
414 return Ok(Value::Str(inner.to_string()));
415 }
416 if let Ok(n) = expr.parse::<i64>() {
417 return Ok(Value::Int(n));
418 }
419 if let Ok(f) = expr.parse::<f64>() {
420 return Ok(Value::Float(f));
421 }
422
423 if expr.contains(" + ") {
425 let parts: Vec<&str> = expr.splitn(2, " + ").collect();
426 let left = eval_macro(parts[0].trim(), scope)?;
427 let right = eval_macro(parts[1].trim(), scope)?;
428 return match (left, right) {
429 (Value::Str(a), Value::Str(b)) => Ok(Value::Str(a + &b)),
430 (Value::Int(a), Value::Int(b)) => {
431 a.checked_add(b).map(Value::Int).ok_or(NxsError::Overflow)
432 }
433 (Value::Float(a), Value::Float(b)) => Ok(Value::Float(a + b)),
434 _ => Err(NxsError::MacroUnresolved(format!(
435 "incompatible types in +: {expr}"
436 ))),
437 };
438 }
439
440 Err(NxsError::MacroUnresolved(format!(
441 "cannot evaluate: {expr}"
442 )))
443}
444
445fn murmur3_64(data: &[u8]) -> u64 {
447 let mut h: u64 = 0x9368_1D62_5531_3A99;
448 for chunk in data.chunks(8) {
449 let mut k = 0u64;
450 for (i, &b) in chunk.iter().enumerate() {
451 k |= (b as u64) << (i * 8);
452 }
453 k = k.wrapping_mul(0xFF51AFD7ED558CCD);
454 k ^= k >> 33;
455 h ^= k;
456 h = h.wrapping_mul(0xC4CEB9FE1A85EC53);
457 h ^= h >> 33;
458 }
459 h ^= data.len() as u64;
460 h ^= h >> 33;
461 h = h.wrapping_mul(0xFF51AFD7ED558CCD);
462 h ^= h >> 33;
463 h
464}