Skip to main content

synx_core/
parser.rs

1//! SYNX Parser — converts raw .synx text into a structured value tree
2//! with metadata for engine resolution.
3
4use std::collections::HashMap;
5use memchr::memchr_iter;
6use crate::value::*;
7use crate::rng;
8
9/// Parse a SYNX text string into a value tree with metadata.
10pub fn parse(text: &str) -> ParseResult {
11    let bytes = text.as_bytes();
12
13    // SIMD-accelerated line splitting via memchr
14    let mut line_starts: Vec<usize> = Vec::with_capacity(64);
15    line_starts.push(0);
16    for pos in memchr_iter(b'\n', bytes) {
17        line_starts.push(pos + 1);
18    }
19    let line_count = line_starts.len();
20
21    let mut root = HashMap::new();
22    let mut stack: Vec<(i32, StackEntry)> = vec![(-1, StackEntry::Root)];
23    let mut mode = Mode::Static;
24    let mut locked = false;
25    let mut metadata: HashMap<String, MetaMap> = HashMap::new();
26    let mut includes: Vec<IncludeDirective> = Vec::new();
27
28    let mut block: Option<BlockState> = None;
29    let mut list: Option<ListState> = None;
30    let mut in_block_comment = false;
31
32    let mut i = 0;
33    while i < line_count {
34        // Extract line without allocating
35        let start = line_starts[i];
36        let end = if i + 1 < line_count { line_starts[i + 1] - 1 } else { bytes.len() };
37        // Handle \r\n
38        let end = if end > start && end > 0 && bytes.get(end - 1) == Some(&b'\r') { end - 1 } else { end };
39        let raw = &text[start..end];
40
41        let trimmed = raw.trim();
42
43        // Mode declaration
44        if trimmed == "!active" {
45            mode = Mode::Active;
46            i += 1;
47            continue;
48        }
49        if trimmed == "!lock" {
50            locked = true;
51            i += 1;
52            continue;
53        }
54        if trimmed.starts_with("!include ") {
55            let rest = trimmed[9..].trim();
56            let mut parts = rest.splitn(2, char::is_whitespace);
57            let path = parts.next().unwrap_or("").to_string();
58            let alias = parts.next().map(|s| s.trim().to_string()).unwrap_or_else(|| {
59                // Auto-derive alias from filename
60                let name = path.rsplit(&['/', '\\'][..]).next().unwrap_or(&path);
61                name.strip_suffix(".synx").or_else(|| name.strip_suffix(".SYNX")).unwrap_or(name).to_string()
62            });
63            includes.push(IncludeDirective { path, alias });
64            i += 1;
65            continue;
66        }
67        if trimmed.starts_with("#!mode:") {
68            let declared = trimmed.splitn(2, ':').nth(1).unwrap_or("static").trim();
69            mode = if declared == "active" { Mode::Active } else { Mode::Static };
70            i += 1;
71            continue;
72        }
73
74        // Block comment toggle: ###
75        if trimmed == "###" {
76            in_block_comment = !in_block_comment;
77            i += 1;
78            continue;
79        }
80        if in_block_comment {
81            i += 1;
82            continue;
83        }
84
85        // Skip empty / comments
86        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with("//") {
87            i += 1;
88            continue;
89        }
90
91        let indent = (raw.len() - raw.trim_start().len()) as i32;
92
93        // Continue multiline block
94        if let Some(ref mut blk) = block {
95            if indent > blk.indent {
96                if !blk.content.is_empty() {
97                    blk.content.push('\n');
98                }
99                blk.content.push_str(trimmed);
100                i += 1;
101                continue;
102            } else {
103                let content = std::mem::take(&mut blk.content);
104                let blk_key = blk.key.clone();
105                let blk_stack_idx = blk.stack_idx;
106                block = None;
107                insert_value(&mut root, &stack, blk_stack_idx, &blk_key, Value::String(content));
108            }
109        }
110
111        // Continue list items
112        if trimmed.starts_with("- ") {
113            if let Some(ref mut lst) = list {
114                if indent > lst.indent {
115                    let val_str = strip_comment(trimmed[2..].trim());
116                    lst.items.push(cast(&val_str));
117                    i += 1;
118                    continue;
119                }
120            }
121        } else if let Some(ref lst) = list {
122            if indent <= lst.indent {
123                let items = list.take().unwrap();
124                let arr = Value::Array(items.items);
125                insert_value(&mut root, &stack, items.stack_idx, &items.key, arr);
126            }
127        }
128
129        // Parse key line
130        if let Some(parsed) = parse_line(trimmed) {
131            // Pop stack to correct parent
132            while stack.len() > 1 && stack.last().unwrap().0 >= indent {
133                stack.pop();
134            }
135
136            let parent_idx = stack.len() - 1;
137
138            // Save metadata if in active mode
139            if mode == Mode::Active
140                && (!parsed.markers.is_empty()
141                    || parsed.constraints.is_some()
142                    || parsed.type_hint.is_some())
143            {
144                let path = build_path(&stack);
145                let meta_map = metadata.entry(path).or_default();
146                meta_map.insert(
147                    parsed.key.clone(),
148                    Meta {
149                        markers: parsed.markers.clone(),
150                        args: parsed.marker_args.clone(),
151                        type_hint: parsed.type_hint.clone(),
152                        constraints: parsed.constraints.clone(),
153                    },
154                );
155            }
156
157            let is_block = parsed.value == "|";
158            let is_list_marker = parsed.markers.iter().any(|m| {
159                matches!(m.as_str(), "random" | "unique" | "geo" | "join")
160            });
161
162            if is_block {
163                insert_value(
164                    &mut root,
165                    &stack,
166                    parent_idx,
167                    &parsed.key,
168                    Value::String(String::new()),
169                );
170                block = Some(BlockState {
171                    indent,
172                    key: parsed.key,
173                    content: String::new(),
174                    stack_idx: parent_idx,
175                });
176            } else if is_list_marker && parsed.value.is_empty() {
177                list = Some(ListState {
178                    indent,
179                    key: parsed.key,
180                    items: Vec::new(),
181                    stack_idx: parent_idx,
182                });
183            } else if parsed.value.is_empty() {
184                // Peek ahead for list
185                let mut peek = i + 1;
186                while peek < line_count {
187                    let ps = line_starts[peek];
188                    let pe = if peek + 1 < line_count {
189                        line_starts[peek + 1] - 1
190                    } else {
191                        bytes.len()
192                    };
193                    let pe = if pe > ps && bytes.get(pe - 1) == Some(&b'\r') { pe - 1 } else { pe };
194                    let pt = text[ps..pe].trim();
195                    if !pt.is_empty() {
196                        break;
197                    }
198                    peek += 1;
199                }
200
201                if peek < line_count {
202                    let ps = line_starts[peek];
203                    let pe = if peek + 1 < line_count {
204                        line_starts[peek + 1] - 1
205                    } else {
206                        bytes.len()
207                    };
208                    let pe = if pe > ps && bytes.get(pe - 1) == Some(&b'\r') { pe - 1 } else { pe };
209                    let pt = text[ps..pe].trim();
210                    if pt.starts_with("- ") {
211                        list = Some(ListState {
212                            indent,
213                            key: parsed.key,
214                            items: Vec::new(),
215                            stack_idx: parent_idx,
216                        });
217                        i += 1;
218                        continue;
219                    }
220                }
221
222                insert_value(
223                    &mut root,
224                    &stack,
225                    parent_idx,
226                    &parsed.key,
227                    Value::Object(HashMap::new()),
228                );
229                stack.push((indent, StackEntry::Key(parsed.key)));
230            } else {
231                let value = if let Some(ref hint) = parsed.type_hint {
232                    cast_typed(&parsed.value, hint)
233                } else {
234                    cast(&parsed.value)
235                };
236                insert_value(&mut root, &stack, parent_idx, &parsed.key, value);
237            }
238        }
239
240        i += 1;
241    }
242
243    // Flush pending block
244    if let Some(blk) = block {
245        insert_value(
246            &mut root,
247            &stack,
248            blk.stack_idx,
249            &blk.key,
250            Value::String(blk.content),
251        );
252    }
253
254    // Flush pending list
255    if let Some(lst) = list {
256        let arr = Value::Array(lst.items);
257        insert_value(&mut root, &stack, lst.stack_idx, &lst.key, arr);
258    }
259
260    ParseResult {
261        root: Value::Object(root),
262        mode,
263        locked,
264        metadata,
265        includes,
266    }
267}
268
269// ─── Internal types ──────────────────────────────────────
270
271#[derive(Debug)]
272enum StackEntry {
273    Root,
274    Key(String),
275}
276
277struct BlockState {
278    indent: i32,
279    key: String,
280    content: String,
281    stack_idx: usize,
282}
283
284struct ListState {
285    indent: i32,
286    key: String,
287    items: Vec<Value>,
288    stack_idx: usize,
289}
290
291struct ParsedLine {
292    key: String,
293    type_hint: Option<String>,
294    value: String,
295    markers: Vec<String>,
296    marker_args: Vec<String>,
297    constraints: Option<Constraints>,
298}
299
300// ─── Line parser ─────────────────────────────────────────
301
302fn parse_line(trimmed: &str) -> Option<ParsedLine> {
303    if trimmed.is_empty()
304        || trimmed.starts_with('#')
305        || trimmed.starts_with("//")
306        || trimmed.starts_with("- ")
307    {
308        return None;
309    }
310
311    let bytes = trimmed.as_bytes();
312    let len = bytes.len();
313
314    let first = bytes[0];
315    if first == b'[' || first == b':' || first == b'-' || first == b'#' || first == b'/' || first == b'(' {
316        return None;
317    }
318
319    // Extract key
320    let mut pos = 0;
321    while pos < len {
322        let ch = bytes[pos];
323        if ch == b' ' || ch == b'\t' || ch == b'[' || ch == b':' || ch == b'(' {
324            break;
325        }
326        pos += 1;
327    }
328    let key = trimmed[..pos].to_string();
329
330    // Optional (type)
331    let mut type_hint = None;
332    if pos < len && bytes[pos] == b'(' {
333        let start = pos + 1;
334        if let Some(c) = trimmed[start..].find(')') {
335            type_hint = Some(trimmed[start..start + c].to_string());
336            pos = start + c + 1;
337        }
338    }
339
340    // Optional [constraints]
341    let mut constraints = None;
342    if pos < len && bytes[pos] == b'[' {
343        if let Some(close) = trimmed[pos..].find(']') {
344            let constraint_str = &trimmed[pos + 1..pos + close];
345            constraints = Some(parse_constraints(constraint_str));
346            pos += close + 1;
347        }
348    }
349
350    // Optional :markers
351    let mut markers = Vec::new();
352    let mut marker_args = Vec::new();
353    if pos < len && bytes[pos] == b':' {
354        let marker_start = pos + 1;
355        let mut marker_end = marker_start;
356        while marker_end < len && bytes[marker_end] != b' ' && bytes[marker_end] != b'\t' {
357            marker_end += 1;
358        }
359        let chain = &trimmed[marker_start..marker_end];
360        markers = chain.split(':').map(|s| s.to_string()).collect();
361        pos = marker_end;
362    }
363
364    // Skip whitespace
365    while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
366        pos += 1;
367    }
368
369    // Value
370    let mut raw_value = if pos < len {
371        strip_comment(&trimmed[pos..])
372    } else {
373        String::new()
374    };
375
376    // For :random — parse weight percentages from value
377    if markers.contains(&"random".to_string()) && !raw_value.is_empty() {
378        let parts: Vec<&str> = raw_value.split_whitespace().collect();
379        let nums: Vec<String> = parts
380            .iter()
381            .filter(|s| s.parse::<f64>().is_ok())
382            .map(|s| s.to_string())
383            .collect();
384        if !nums.is_empty() {
385            marker_args = nums;
386            raw_value.clear();
387        }
388    }
389
390    Some(ParsedLine {
391        key,
392        type_hint,
393        value: raw_value,
394        markers,
395        marker_args,
396        constraints,
397    })
398}
399
400// ─── Constraints parser ──────────────────────────────────
401
402fn parse_constraints(raw: &str) -> Constraints {
403    let mut c = Constraints::default();
404    for part in raw.split(',').map(|s| s.trim()).filter(|s| !s.is_empty()) {
405        if part == "required" {
406            c.required = true;
407        } else if part == "readonly" {
408            c.readonly = true;
409        } else if let Some(colon) = part.find(':') {
410            let key = part[..colon].trim();
411            let val = part[colon + 1..].trim();
412            match key {
413                "min" => c.min = val.parse().ok(),
414                "max" => c.max = val.parse().ok(),
415                "type" => c.type_name = Some(val.to_string()),
416                "pattern" => c.pattern = Some(val.to_string()),
417                "enum" => c.enum_values = Some(val.split('|').map(|s| s.to_string()).collect()),
418                _ => {}
419            }
420        }
421    }
422    c
423}
424
425// ─── Value casting ───────────────────────────────────────
426
427fn cast(val: &str) -> Value {
428    // Quoted strings preserve literal value (bypass auto-casting)
429    // "null" → String("null"), "true" → String("true"), "123" → String("123")
430    if val.len() >= 2 {
431        let bytes = val.as_bytes();
432        if (bytes[0] == b'"' && bytes[bytes.len() - 1] == b'"')
433            || (bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\'')
434        {
435            return Value::String(val[1..val.len() - 1].to_string());
436        }
437    }
438
439    match val {
440        "true" => Value::Bool(true),
441        "false" => Value::Bool(false),
442        "null" => Value::Null,
443        _ => {
444            let bytes = val.as_bytes();
445            let len = bytes.len();
446            if len == 0 {
447                return Value::String(String::new());
448            }
449
450            let mut start = 0;
451            if bytes[0] == b'-' {
452                if len == 1 {
453                    return Value::String(val.to_string());
454                }
455                start = 1;
456            }
457
458            if bytes[start] >= b'0' && bytes[start] <= b'9' {
459                let mut dot_pos = None;
460                let mut all_numeric = true;
461                for j in start..len {
462                    if bytes[j] == b'.' {
463                        if dot_pos.is_some() {
464                            all_numeric = false;
465                            break;
466                        }
467                        dot_pos = Some(j);
468                    } else if bytes[j] < b'0' || bytes[j] > b'9' {
469                        all_numeric = false;
470                        break;
471                    }
472                }
473                if all_numeric {
474                    if let Some(dp) = dot_pos {
475                        if dp > start && dp < len - 1 {
476                            if let Ok(f) = val.parse::<f64>() {
477                                return Value::Float(f);
478                            }
479                        }
480                    } else if let Ok(n) = val.parse::<i64>() {
481                        return Value::Int(n);
482                    }
483                }
484            }
485
486            Value::String(val.to_string())
487        }
488    }
489}
490
491fn cast_typed(val: &str, hint: &str) -> Value {
492    match hint {
493        "int" => Value::Int(val.parse().unwrap_or(0)),
494        "float" => Value::Float(val.parse().unwrap_or(0.0)),
495        "bool" => Value::Bool(val.trim() == "true"),
496        "string" => Value::String(val.to_string()),
497        "random" | "random:int" => Value::Int(rng::random_i64()),
498        "random:float" => Value::Float(rng::random_f64_01()),
499        "random:bool" => Value::Bool(rng::random_bool()),
500        _ => cast(val),
501    }
502}
503
504fn strip_comment(val: &str) -> String {
505    let mut result = val.to_string();
506    if let Some(idx) = result.find(" //") {
507        result.truncate(idx);
508    }
509    if let Some(idx) = result.find(" #") {
510        result.truncate(idx);
511    }
512    result.trim_end().to_string()
513}
514
515// ─── Tree helpers ────────────────────────────────────────
516
517fn build_path(stack: &[(i32, StackEntry)]) -> String {
518    let mut parts = Vec::new();
519    for (_, entry) in stack.iter().skip(1) {
520        if let StackEntry::Key(ref k) = entry {
521            parts.push(k.as_str());
522        }
523    }
524    parts.join(".")
525}
526
527fn insert_value(
528    root: &mut HashMap<String, Value>,
529    stack: &[(i32, StackEntry)],
530    parent_idx: usize,
531    key: &str,
532    value: Value,
533) {
534    if let Some(target) = navigate_to_parent(root, stack, parent_idx) {
535        target.insert(key.to_string(), value);
536    }
537    // If the path is broken the line is silently skipped — this should not
538    // happen under well-formed input; malformed input simply loses the entry
539    // rather than inserting it at the wrong nesting level.
540}
541
542fn navigate_to_parent<'a>(
543    root: &'a mut HashMap<String, Value>,
544    stack: &[(i32, StackEntry)],
545    target_idx: usize,
546) -> Option<&'a mut HashMap<String, Value>> {
547    if target_idx == 0 {
548        return Some(root);
549    }
550
551    let path: Vec<&str> = stack
552        .iter()
553        .skip(1)
554        .take(target_idx)
555        .filter_map(|(_, entry)| match entry {
556            StackEntry::Key(k) => Some(k.as_str()),
557            _ => None,
558        })
559        .collect();
560
561    // SAFETY: We navigate a tree of nested HashMaps using a raw pointer to
562    // work around the borrow-checker's inability to track that successive
563    // `get_mut` calls target disjoint subtrees.  The invariants that make
564    // this sound are:
565    //   1. `root` is a valid, exclusively-owned mutable reference for 'a.
566    //   2. We descend strictly downward and never alias: at each step we
567    //      replace `current` with a pointer to a child map, discarding the
568    //      parent pointer.
569    //   3. The returned reference re-borrows from `root`'s lifetime 'a and
570    //      is the only mutable reference handed out by this function.
571    let mut current = root as *mut HashMap<String, Value>;
572    for key in path {
573        let child = unsafe { (*current).get_mut(key) };
574        match child {
575            Some(Value::Object(map)) => current = map as *mut HashMap<String, Value>,
576            _ => return None, // Path segment missing or not an Object
577        }
578    }
579    Some(unsafe { &mut *current })
580}
581
582#[cfg(test)]
583mod tests {
584    use super::*;
585
586    #[test]
587    fn test_simple_key_value() {
588        let data = parse("name Wario\nage 30\nactive true\nscore 99.5\nempty null");
589        let root = data.root.as_object().unwrap();
590        assert_eq!(root["name"], Value::String("Wario".into()));
591        assert_eq!(root["age"], Value::Int(30));
592        assert_eq!(root["active"], Value::Bool(true));
593        assert_eq!(root["score"], Value::Float(99.5));
594        assert_eq!(root["empty"], Value::Null);
595        assert_eq!(data.mode, Mode::Static);
596    }
597
598    #[test]
599    fn test_nested_objects() {
600        let data = parse("server\n  host 0.0.0.0\n  port 8080\n  ssl\n    enabled true");
601        let root = data.root.as_object().unwrap();
602        let server = root["server"].as_object().unwrap();
603        assert_eq!(server["host"], Value::String("0.0.0.0".into()));
604        assert_eq!(server["port"], Value::Int(8080));
605        let ssl = server["ssl"].as_object().unwrap();
606        assert_eq!(ssl["enabled"], Value::Bool(true));
607    }
608
609    #[test]
610    fn test_lists() {
611        let data = parse("inventory\n  - Sword\n  - Shield\n  - Potion");
612        let root = data.root.as_object().unwrap();
613        let inv = root["inventory"].as_array().unwrap();
614        assert_eq!(inv.len(), 3);
615        assert_eq!(inv[0], Value::String("Sword".into()));
616    }
617
618    #[test]
619    fn test_multiline_block() {
620        let data = parse("rules |\n  Rule one.\n  Rule two.\n  Rule three.");
621        let root = data.root.as_object().unwrap();
622        assert_eq!(
623            root["rules"],
624            Value::String("Rule one.\nRule two.\nRule three.".into())
625        );
626    }
627
628    #[test]
629    fn test_comments() {
630        let data = parse("# comment\nname Wario # inline\nage 30 // inline");
631        let root = data.root.as_object().unwrap();
632        assert_eq!(root["name"], Value::String("Wario".into()));
633        assert_eq!(root["age"], Value::Int(30));
634    }
635
636    #[test]
637    fn test_active_mode() {
638        let data = parse("!active\nprice 100\ntax:calc price * 0.2");
639        assert_eq!(data.mode, Mode::Active);
640        let root = data.root.as_object().unwrap();
641        assert_eq!(root["price"], Value::Int(100));
642        // Before engine resolution, :calc value is a string
643        assert_eq!(root["tax"], Value::String("price * 0.2".into()));
644        // Metadata should be saved
645        let meta = data.metadata.get("").unwrap();
646        assert!(meta.contains_key("tax"));
647        assert_eq!(meta["tax"].markers, vec!["calc"]);
648    }
649
650    #[test]
651    fn test_markers_env_default() {
652        let data = parse("!active\nport:env:default:3000 PORT");
653        let meta = data.metadata.get("").unwrap();
654        assert_eq!(meta["port"].markers, vec!["env", "default", "3000"]);
655    }
656
657    #[test]
658    fn test_type_hint() {
659        let data = parse("zip(string) 90210");
660        let root = data.root.as_object().unwrap();
661        assert_eq!(root["zip"], Value::String("90210".into()));
662    }
663
664    #[test]
665    fn test_constraints() {
666        let data = parse("!active\nname[min:3, max:30, required] Wario");
667        let meta = data.metadata.get("").unwrap();
668        let c = meta["name"].constraints.as_ref().unwrap();
669        assert_eq!(c.min, Some(3.0));
670        assert_eq!(c.max, Some(30.0));
671        assert!(c.required);
672    }
673
674    #[test]
675    fn test_random_weights() {
676        let data = parse("!active\ntier:random 90 5 5");
677        let meta = data.metadata.get("").unwrap();
678        assert_eq!(meta["tier"].markers, vec!["random"]);
679        assert_eq!(meta["tier"].args, vec!["90", "5", "5"]);
680    }
681}