Skip to main content

synx_core/
parser.rs

1//! SYNX Parser — converts raw .synx text into a structured value tree
2//! with metadata for engine resolution.
3
4use std::collections::HashMap;
5use memchr::memchr;
6use crate::value::*;
7use crate::rng;
8
9// ─── Resource limits (fuzz / hostile input) ─────────────────
10// All caps are documented here so callers know parsing is bounded.
11
12/// Maximum UTF-8 bytes accepted per `parse()` (truncate with valid UTF-8 boundary).
13pub(crate) const MAX_SYNX_INPUT_BYTES: usize = 16 * 1024 * 1024;
14
15/// Maximum indexed line starts (1 + number of `\n` before truncate). Bounds `line_starts` RAM (~8× on 64-bit).
16const MAX_LINE_STARTS: usize = 2_000_000;
17
18/// Indentation-tree depth for nested objects (stack size). Iterative parser — prevents giant parent chains.
19const MAX_PARSE_NESTING_DEPTH: usize = 128;
20
21/// Multiline `key |` block body: max accumulated UTF-8 bytes.
22const MAX_MULTILINE_BLOCK_BYTES: usize = 1024 * 1024;
23
24/// `- list item` entries per single list.
25const MAX_LIST_ITEMS: usize = 1_048_576;
26
27/// `!include` lines per file.
28const MAX_INCLUDE_DIRECTIVES: usize = 4096;
29
30/// Max comma-separated parts when parsing `[constraints]` enum values.
31const MAX_CONSTRAINT_ENUM_PARTS: usize = 4096;
32
33/// Max `:a:b:c` marker segments on one key line.
34const MAX_MARKER_CHAIN_SEGMENTS: usize = 512;
35
36/// Truncate `text` to a UTF-8-safe prefix (used by `parse` and canonical `format`).
37pub(crate) fn clamp_synx_text(text: &str) -> &str {
38    if text.len() <= MAX_SYNX_INPUT_BYTES {
39        return text;
40    }
41    let slice = &text.as_bytes()[..MAX_SYNX_INPUT_BYTES];
42    let end = core::str::from_utf8(slice)
43        .map(|s| s.len())
44        .unwrap_or_else(|e| e.valid_up_to());
45    &text[..end]
46}
47
48/// Byte length to parse: full slice, or truncate before the newline that would exceed
49/// `MAX_LINE_STARTS` lines (at most `MAX_LINE_STARTS.saturating_sub(1)` `\n` bytes kept).
50fn find_parse_end_bytes(bytes: &[u8]) -> usize {
51    let max_newlines = MAX_LINE_STARTS.saturating_sub(1);
52    let mut seen_newlines = 0usize;
53    let mut scan = 0usize;
54    while scan < bytes.len() {
55        if let Some(rel) = memchr(b'\n', &bytes[scan..]) {
56            if seen_newlines >= max_newlines {
57                return scan + rel;
58            }
59            seen_newlines += 1;
60            scan += rel + 1;
61        } else {
62            break;
63        }
64    }
65    bytes.len()
66}
67
68/// Parse a SYNX text string into a value tree with metadata.
69pub fn parse(text: &str) -> ParseResult {
70    let text = clamp_synx_text(text);
71    let parse_end = find_parse_end_bytes(text.as_bytes());
72    let text = &text[..parse_end];
73    let bytes = text.as_bytes();
74
75    let mut line_starts: Vec<usize> = Vec::new();
76    line_starts.push(0);
77    let mut scan = 0usize;
78    while scan < bytes.len() {
79        if let Some(rel) = memchr(b'\n', &bytes[scan..]) {
80            let pos = scan + rel;
81            line_starts.push(pos + 1);
82            scan = pos + 1;
83        } else {
84            break;
85        }
86    }
87    let line_count = line_starts.len();
88
89    let mut root = HashMap::new();
90    let mut stack: Vec<(i32, StackEntry)> = vec![(-1, StackEntry::Root)];
91    let mut mode = Mode::Static;
92    let mut locked = false;
93    let mut tool = false;
94    let mut schema = false;
95    let mut llm = false;
96    let mut metadata: HashMap<String, MetaMap> = HashMap::new();
97    let mut includes: Vec<IncludeDirective> = Vec::new();
98    let mut uses: Vec<UseDirective> = Vec::new();
99
100    let mut block: Option<BlockState> = None;
101    let mut list: Option<ListState> = None;
102    let mut in_block_comment = false;
103
104    let mut i = 0;
105    while i < line_count {
106        // Extract line without allocating
107        let start = line_starts[i];
108        let end = if i + 1 < line_count { line_starts[i + 1] - 1 } else { bytes.len() };
109        // Handle \r\n
110        let end = if end > start && end > 0 && bytes.get(end - 1) == Some(&b'\r') { end - 1 } else { end };
111        let raw = &text[start..end];
112
113        let trimmed = raw.trim();
114
115        // Mode declaration
116        if trimmed == "!active" {
117            mode = Mode::Active;
118            i += 1;
119            continue;
120        }
121        if trimmed == "!lock" {
122            locked = true;
123            i += 1;
124            continue;
125        }
126        if trimmed == "!tool" {
127            tool = true;
128            i += 1;
129            continue;
130        }
131        if trimmed == "!schema" {
132            schema = true;
133            i += 1;
134            continue;
135        }
136        if trimmed == "!llm" {
137            llm = true;
138            i += 1;
139            continue;
140        }
141        if trimmed.starts_with("!include ") {
142            if includes.len() < MAX_INCLUDE_DIRECTIVES {
143                let rest = trimmed[9..].trim();
144                let mut parts = rest.splitn(2, char::is_whitespace);
145                let path = parts.next().unwrap_or("").to_string();
146                let alias = parts.next().map(|s| s.trim().to_string()).unwrap_or_else(|| {
147                    // Auto-derive alias from filename
148                    let name = path.rsplit(&['/', '\\'][..]).next().unwrap_or(&path);
149                    name.strip_suffix(".synx").or_else(|| name.strip_suffix(".SYNX")).unwrap_or(name).to_string()
150                });
151                includes.push(IncludeDirective { path, alias });
152            }
153            i += 1;
154            continue;
155        }
156        if trimmed.starts_with("!use ") {
157            let rest = trimmed[5..].trim();
158            if rest.starts_with('@') {
159                // Parse: !use @scope/name [as alias]
160                let mut parts = rest.splitn(2, " as ");
161                let package = parts.next().unwrap_or("").trim().to_string();
162                let alias = parts.next().map(|s| s.trim().to_string()).unwrap_or_else(|| {
163                    // Auto-derive alias from last segment: @scope/name → name
164                    package.rsplit('/').next().unwrap_or(&package).to_string()
165                });
166                if !package.is_empty() {
167                    uses.push(UseDirective { package, alias });
168                }
169            }
170            i += 1;
171            continue;
172        }
173        if trimmed.starts_with("#!mode:") {
174            let declared = trimmed.splitn(2, ':').nth(1).unwrap_or("static").trim();
175            mode = if declared == "active" { Mode::Active } else { Mode::Static };
176            i += 1;
177            continue;
178        }
179
180        // Block comment toggle: ###
181        if trimmed == "###" {
182            in_block_comment = !in_block_comment;
183            i += 1;
184            continue;
185        }
186        if in_block_comment {
187            i += 1;
188            continue;
189        }
190
191        // Skip empty / comments
192        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with("//") {
193            i += 1;
194            continue;
195        }
196
197        let indent = (raw.len() - raw.trim_start().len()) as i32;
198
199        // Continue multiline block
200        if let Some(ref mut blk) = block {
201            if indent > blk.indent {
202                if blk.content.len() < MAX_MULTILINE_BLOCK_BYTES {
203                    if !blk.content.is_empty() {
204                        blk.content.push('\n');
205                    }
206                    let room = MAX_MULTILINE_BLOCK_BYTES.saturating_sub(blk.content.len());
207                    if room > 0 {
208                        let n = trimmed.len().min(room);
209                        blk.content.push_str(&trimmed[..n]);
210                    }
211                }
212                i += 1;
213                continue;
214            } else {
215                let content = std::mem::take(&mut blk.content);
216                let blk_key = blk.key.clone();
217                let blk_stack_idx = blk.stack_idx;
218                block = None;
219                insert_value(&mut root, &stack, blk_stack_idx, &blk_key, Value::String(content));
220            }
221        }
222
223        // Continue list items
224        if trimmed.starts_with("- ") {
225            if let Some(ref mut lst) = list {
226                if indent > lst.indent {
227                    if lst.items.len() < MAX_LIST_ITEMS {
228                        let val_str = strip_comment(trimmed[2..].trim());
229                        lst.items.push(cast(&val_str));
230                    }
231                    i += 1;
232                    continue;
233                }
234            }
235        } else if let Some(ref lst) = list {
236            if indent <= lst.indent {
237                let items = list.take().unwrap();
238                let arr = Value::Array(items.items);
239                insert_value(&mut root, &stack, items.stack_idx, &items.key, arr);
240            }
241        }
242
243        // Parse key line
244        if let Some(parsed) = parse_line(trimmed) {
245            // Pop stack to correct parent
246            while stack.len() > 1 && stack.last().unwrap().0 >= indent {
247                stack.pop();
248            }
249
250            let parent_idx = stack.len() - 1;
251
252            // Save metadata if in active mode
253            if mode == Mode::Active
254                && (!parsed.markers.is_empty()
255                    || parsed.constraints.is_some()
256                    || parsed.type_hint.is_some())
257            {
258                let path = build_path(&stack);
259                let meta_map = metadata.entry(path).or_default();
260                meta_map.insert(
261                    parsed.key.clone(),
262                    Meta {
263                        markers: parsed.markers.clone(),
264                        args: parsed.marker_args.clone(),
265                        type_hint: parsed.type_hint.clone(),
266                        constraints: parsed.constraints.clone(),
267                    },
268                );
269            }
270
271            let is_block = parsed.value == "|";
272            let is_list_marker = parsed.markers.iter().any(|m| {
273                matches!(m.as_str(), "random" | "unique" | "geo" | "join")
274            });
275
276            if is_block {
277                insert_value(
278                    &mut root,
279                    &stack,
280                    parent_idx,
281                    &parsed.key,
282                    Value::String(String::new()),
283                );
284                block = Some(BlockState {
285                    indent,
286                    key: parsed.key,
287                    content: String::new(),
288                    stack_idx: parent_idx,
289                });
290            } else if is_list_marker && parsed.value.is_empty() {
291                list = Some(ListState {
292                    indent,
293                    key: parsed.key,
294                    items: Vec::new(),
295                    stack_idx: parent_idx,
296                });
297            } else if parsed.value.is_empty() {
298                // Peek ahead for list
299                let mut peek = i + 1;
300                while peek < line_count {
301                    let ps = line_starts[peek];
302                    let pe = if peek + 1 < line_count {
303                        line_starts[peek + 1] - 1
304                    } else {
305                        bytes.len()
306                    };
307                    let pe = if pe > ps && bytes.get(pe - 1) == Some(&b'\r') { pe - 1 } else { pe };
308                    let pt = text[ps..pe].trim();
309                    if !pt.is_empty() {
310                        break;
311                    }
312                    peek += 1;
313                }
314
315                if peek < line_count {
316                    let ps = line_starts[peek];
317                    let pe = if peek + 1 < line_count {
318                        line_starts[peek + 1] - 1
319                    } else {
320                        bytes.len()
321                    };
322                    let pe = if pe > ps && bytes.get(pe - 1) == Some(&b'\r') { pe - 1 } else { pe };
323                    let pt = text[ps..pe].trim();
324                    if pt.starts_with("- ") {
325                        list = Some(ListState {
326                            indent,
327                            key: parsed.key,
328                            items: Vec::new(),
329                            stack_idx: parent_idx,
330                        });
331                        i += 1;
332                        continue;
333                    }
334                }
335
336                insert_value(
337                    &mut root,
338                    &stack,
339                    parent_idx,
340                    &parsed.key,
341                    Value::Object(HashMap::new()),
342                );
343                // Guard against pathological inputs that create extremely deep nesting,
344                // which can lead to large allocations (metadata path building, parent navigation, etc).
345                // If the cap is hit, we still insert the object but stop increasing nesting.
346                if stack.len() < MAX_PARSE_NESTING_DEPTH {
347                    stack.push((indent, StackEntry::Key(parsed.key)));
348                }
349            } else {
350                let value = if let Some(ref hint) = parsed.type_hint {
351                    cast_typed(&parsed.value, hint)
352                } else {
353                    cast(&parsed.value)
354                };
355                insert_value(&mut root, &stack, parent_idx, &parsed.key, value);
356            }
357        }
358
359        i += 1;
360    }
361
362    // Flush pending block
363    if let Some(blk) = block {
364        insert_value(
365            &mut root,
366            &stack,
367            blk.stack_idx,
368            &blk.key,
369            Value::String(blk.content),
370        );
371    }
372
373    // Flush pending list
374    if let Some(lst) = list {
375        let arr = Value::Array(lst.items);
376        insert_value(&mut root, &stack, lst.stack_idx, &lst.key, arr);
377    }
378
379    let parsed_root = Value::Object(root);
380
381    // !tool reshaping is deferred — done after engine resolution for !active compatibility.
382    // Non-active !tool files are reshaped via Synx::parse_tool() or resolve_tool_output().
383
384    ParseResult {
385        root: parsed_root,
386        mode,
387        locked,
388        tool,
389        schema,
390        llm,
391        metadata,
392        includes,
393        uses,
394    }
395}
396
397// ─── !tool output reshaping ──────────────────────────────
398
399/// Reshape parsed tree for `!tool` mode.
400///
401/// **Call mode** (`!tool` without `!schema`):
402///   First top-level key = tool name, its children = params.
403///   Output: `{ tool: "name", params: { ... } }`
404///
405/// **Schema mode** (`!tool` + `!schema`):
406///   Each top-level key = tool name, children = param type definitions.
407///   Output: `{ tools: [ { name: "tool1", params: { key: "type", ... } }, ... ] }`
408pub fn reshape_tool_output(root: &Value, schema: bool) -> Value {
409    let map = match root {
410        Value::Object(m) => m,
411        _ => return root.clone(),
412    };
413
414    if schema {
415        // Schema mode: list of tool definitions
416        let mut tools = Vec::new();
417        // Sort for deterministic output
418        let mut keys: Vec<&String> = map.keys().collect();
419        keys.sort();
420        for key in keys {
421            let val = &map[key];
422            let mut def = HashMap::new();
423            def.insert("name".to_string(), Value::String(key.clone()));
424            def.insert("params".to_string(), val.clone());
425            tools.push(Value::Object(def));
426        }
427        let mut out = HashMap::new();
428        out.insert("tools".to_string(), Value::Array(tools));
429        Value::Object(out)
430    } else {
431        // Call mode: first key = tool name, children = params
432        if map.is_empty() {
433            let mut out = HashMap::new();
434            out.insert("tool".to_string(), Value::Null);
435            out.insert("params".to_string(), Value::Object(HashMap::new()));
436            return Value::Object(out);
437        }
438
439        // Deterministic: pick the first key in source order.
440        // Since HashMap doesn't preserve order, sort and take first.
441        let mut keys: Vec<&String> = map.keys().collect();
442        keys.sort();
443        let tool_key = keys[0];
444        let tool_value = &map[tool_key];
445
446        let params = match tool_value {
447            Value::Object(m) => Value::Object(m.clone()),
448            // If tool has a single value (no nested params), wrap it
449            _ => Value::Object(HashMap::new()),
450        };
451
452        let mut out = HashMap::new();
453        out.insert("tool".to_string(), Value::String(tool_key.clone()));
454        out.insert("params".to_string(), params);
455        Value::Object(out)
456    }
457}
458
459// ─── Internal types ──────────────────────────────────────
460
461#[derive(Debug)]
462enum StackEntry {
463    Root,
464    Key(String),
465}
466
467struct BlockState {
468    indent: i32,
469    key: String,
470    content: String,
471    stack_idx: usize,
472}
473
474struct ListState {
475    indent: i32,
476    key: String,
477    items: Vec<Value>,
478    stack_idx: usize,
479}
480
481struct ParsedLine {
482    key: String,
483    type_hint: Option<String>,
484    value: String,
485    markers: Vec<String>,
486    marker_args: Vec<String>,
487    constraints: Option<Constraints>,
488}
489
490// ─── Line parser ─────────────────────────────────────────
491
492fn parse_line(trimmed: &str) -> Option<ParsedLine> {
493    if trimmed.is_empty()
494        || trimmed.starts_with('#')
495        || trimmed.starts_with("//")
496        || trimmed.starts_with("- ")
497    {
498        return None;
499    }
500
501    let bytes = trimmed.as_bytes();
502    let len = bytes.len();
503
504    let first = bytes[0];
505    if first == b'[' || first == b':' || first == b'-' || first == b'#' || first == b'/' || first == b'(' {
506        return None;
507    }
508
509    // Extract key
510    let mut pos = 0;
511    while pos < len {
512        let ch = bytes[pos];
513        if ch == b' ' || ch == b'\t' || ch == b'[' || ch == b':' || ch == b'(' {
514            break;
515        }
516        pos += 1;
517    }
518    let key = trimmed[..pos].to_string();
519
520    // Optional (type)
521    let mut type_hint = None;
522    if pos < len && bytes[pos] == b'(' {
523        let start = pos + 1;
524        if let Some(c) = trimmed[start..].find(')') {
525            type_hint = Some(trimmed[start..start + c].to_string());
526            pos = start + c + 1;
527        } else {
528            pos += 1;
529        }
530    }
531
532    // Optional [constraints]
533    let mut constraints = None;
534    if pos < len && bytes[pos] == b'[' {
535        if let Some(close) = trimmed[pos..].find(']') {
536            let constraint_str = &trimmed[pos + 1..pos + close];
537            constraints = Some(parse_constraints(constraint_str));
538            pos += close + 1;
539        } else {
540            pos += 1;
541        }
542    }
543
544    // Optional :markers
545    let mut markers = Vec::new();
546    let mut marker_args = Vec::new();
547    if pos < len && bytes[pos] == b':' {
548        let marker_start = pos + 1;
549        let mut marker_end = marker_start;
550        while marker_end < len && bytes[marker_end] != b' ' && bytes[marker_end] != b'\t' {
551            marker_end += 1;
552        }
553        let chain = &trimmed[marker_start..marker_end];
554        markers = chain
555            .split(':')
556            .take(MAX_MARKER_CHAIN_SEGMENTS)
557            .map(|s| s.to_string())
558            .collect();
559        pos = marker_end;
560    }
561
562    // Skip whitespace
563    while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
564        pos += 1;
565    }
566
567    // Value
568    let mut raw_value = if pos < len {
569        strip_comment(&trimmed[pos..])
570    } else {
571        String::new()
572    };
573
574    // For :random — parse weight percentages from value
575    if markers.contains(&"random".to_string()) && !raw_value.is_empty() {
576        let parts: Vec<&str> = raw_value.split_whitespace().collect();
577        let nums: Vec<String> = parts
578            .iter()
579            .filter(|s| s.parse::<f64>().is_ok())
580            .map(|s| s.to_string())
581            .collect();
582        if !nums.is_empty() {
583            marker_args = nums;
584            raw_value.clear();
585        }
586    }
587
588    Some(ParsedLine {
589        key,
590        type_hint,
591        value: raw_value,
592        markers,
593        marker_args,
594        constraints,
595    })
596}
597
598// ─── Constraints parser ──────────────────────────────────
599
600fn parse_constraints(raw: &str) -> Constraints {
601    let mut c = Constraints::default();
602    for part in raw.split(',').map(|s| s.trim()).filter(|s| !s.is_empty()) {
603        if part == "required" {
604            c.required = true;
605        } else if part == "readonly" {
606            c.readonly = true;
607        } else if let Some(colon) = part.find(':') {
608            let key = part[..colon].trim();
609            let val = part[colon + 1..].trim();
610            match key {
611                "min" => c.min = val.parse().ok(),
612                "max" => c.max = val.parse().ok(),
613                "type" => c.type_name = Some(val.to_string()),
614                "pattern" => c.pattern = Some(val.to_string()),
615                "enum" => {
616                    c.enum_values = Some(
617                        val.split('|')
618                            .take(MAX_CONSTRAINT_ENUM_PARTS)
619                            .map(|s| s.to_string())
620                            .collect(),
621                    );
622                }
623                _ => {}
624            }
625        }
626    }
627    c
628}
629
630// ─── Value casting ───────────────────────────────────────
631
632fn cast(val: &str) -> Value {
633    // Quoted strings preserve literal value (bypass auto-casting)
634    // "null" → String("null"), "true" → String("true"), "123" → String("123")
635    if val.len() >= 2 {
636        let bytes = val.as_bytes();
637        if (bytes[0] == b'"' && bytes[bytes.len() - 1] == b'"')
638            || (bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\'')
639        {
640            return Value::String(val[1..val.len() - 1].to_string());
641        }
642    }
643
644    match val {
645        "true" => Value::Bool(true),
646        "false" => Value::Bool(false),
647        "null" => Value::Null,
648        _ => {
649            let bytes = val.as_bytes();
650            let len = bytes.len();
651            if len == 0 {
652                return Value::String(String::new());
653            }
654
655            let mut start = 0;
656            if bytes[0] == b'-' {
657                if len == 1 {
658                    return Value::String(val.to_string());
659                }
660                start = 1;
661            }
662
663            if bytes[start] >= b'0' && bytes[start] <= b'9' {
664                let mut dot_pos = None;
665                let mut all_numeric = true;
666                for j in start..len {
667                    if bytes[j] == b'.' {
668                        if dot_pos.is_some() {
669                            all_numeric = false;
670                            break;
671                        }
672                        dot_pos = Some(j);
673                    } else if bytes[j] < b'0' || bytes[j] > b'9' {
674                        all_numeric = false;
675                        break;
676                    }
677                }
678                if all_numeric {
679                    if let Some(dp) = dot_pos {
680                        if dp > start && dp < len - 1 {
681                            if let Ok(f) = val.parse::<f64>() {
682                                return Value::Float(f);
683                            }
684                        }
685                    } else if let Ok(n) = val.parse::<i64>() {
686                        return Value::Int(n);
687                    }
688                }
689            }
690
691            Value::String(val.to_string())
692        }
693    }
694}
695
696fn cast_typed(val: &str, hint: &str) -> Value {
697    match hint {
698        "int" => Value::Int(val.parse().unwrap_or(0)),
699        "float" => Value::Float(val.parse().unwrap_or(0.0)),
700        "bool" => Value::Bool(val.trim() == "true"),
701        "string" => Value::String(val.to_string()),
702        "random" | "random:int" => Value::Int(rng::random_i64()),
703        "random:float" => Value::Float(rng::random_f64_01()),
704        "random:bool" => Value::Bool(rng::random_bool()),
705        _ => cast(val),
706    }
707}
708
709fn strip_comment(val: &str) -> String {
710    let mut result = val.to_string();
711    if let Some(idx) = result.find(" //") {
712        result.truncate(idx);
713    }
714    if let Some(idx) = result.find(" #") {
715        result.truncate(idx);
716    }
717    result.trim_end().to_string()
718}
719
720// ─── Tree helpers ────────────────────────────────────────
721
722fn build_path(stack: &[(i32, StackEntry)]) -> String {
723    let mut parts = Vec::new();
724    for (_, entry) in stack.iter().skip(1) {
725        if let StackEntry::Key(ref k) = entry {
726            parts.push(k.as_str());
727        }
728    }
729    parts.join(".")
730}
731
732fn insert_value(
733    root: &mut HashMap<String, Value>,
734    stack: &[(i32, StackEntry)],
735    parent_idx: usize,
736    key: &str,
737    value: Value,
738) {
739    if let Some(target) = navigate_to_parent(root, stack, parent_idx) {
740        target.insert(key.to_string(), value);
741    }
742    // If the path is broken the line is silently skipped — this should not
743    // happen under well-formed input; malformed input simply loses the entry
744    // rather than inserting it at the wrong nesting level.
745}
746
747fn navigate_to_parent<'a>(
748    root: &'a mut HashMap<String, Value>,
749    stack: &[(i32, StackEntry)],
750    target_idx: usize,
751) -> Option<&'a mut HashMap<String, Value>> {
752    if target_idx == 0 {
753        return Some(root);
754    }
755
756    let path: Vec<&str> = stack
757        .iter()
758        .skip(1)
759        .take(target_idx)
760        .filter_map(|(_, entry)| match entry {
761            StackEntry::Key(k) => Some(k.as_str()),
762            _ => None,
763        })
764        .collect();
765
766    // SAFETY: We navigate a tree of nested HashMaps using a raw pointer to
767    // work around the borrow-checker's inability to track that successive
768    // `get_mut` calls target disjoint subtrees.  The invariants that make
769    // this sound are:
770    //   1. `root` is a valid, exclusively-owned mutable reference for 'a.
771    //   2. We descend strictly downward and never alias: at each step we
772    //      replace `current` with a pointer to a child map, discarding the
773    //      parent pointer.
774    //   3. The returned reference re-borrows from `root`'s lifetime 'a and
775    //      is the only mutable reference handed out by this function.
776    let mut current = root as *mut HashMap<String, Value>;
777    for key in path {
778        let child = unsafe { (*current).get_mut(key) };
779        match child {
780            Some(Value::Object(map)) => current = map as *mut HashMap<String, Value>,
781            _ => return None, // Path segment missing or not an Object
782        }
783    }
784    Some(unsafe { &mut *current })
785}
786
787#[cfg(test)]
788mod tests {
789    use super::*;
790
791    #[test]
792    fn test_simple_key_value() {
793        let data = parse("name Wario\nage 30\nactive true\nscore 99.5\nempty null");
794        let root = data.root.as_object().unwrap();
795        assert_eq!(root["name"], Value::String("Wario".into()));
796        assert_eq!(root["age"], Value::Int(30));
797        assert_eq!(root["active"], Value::Bool(true));
798        assert_eq!(root["score"], Value::Float(99.5));
799        assert_eq!(root["empty"], Value::Null);
800        assert_eq!(data.mode, Mode::Static);
801    }
802
803    #[test]
804    fn test_nested_objects() {
805        let data = parse("server\n  host 0.0.0.0\n  port 8080\n  ssl\n    enabled true");
806        let root = data.root.as_object().unwrap();
807        let server = root["server"].as_object().unwrap();
808        assert_eq!(server["host"], Value::String("0.0.0.0".into()));
809        assert_eq!(server["port"], Value::Int(8080));
810        let ssl = server["ssl"].as_object().unwrap();
811        assert_eq!(ssl["enabled"], Value::Bool(true));
812    }
813
814    #[test]
815    fn test_lists() {
816        let data = parse("inventory\n  - Sword\n  - Shield\n  - Potion");
817        let root = data.root.as_object().unwrap();
818        let inv = root["inventory"].as_array().unwrap();
819        assert_eq!(inv.len(), 3);
820        assert_eq!(inv[0], Value::String("Sword".into()));
821    }
822
823    #[test]
824    fn test_multiline_block() {
825        let data = parse("rules |\n  Rule one.\n  Rule two.\n  Rule three.");
826        let root = data.root.as_object().unwrap();
827        assert_eq!(
828            root["rules"],
829            Value::String("Rule one.\nRule two.\nRule three.".into())
830        );
831    }
832
833    #[test]
834    fn test_comments() {
835        let data = parse("# comment\nname Wario # inline\nage 30 // inline");
836        let root = data.root.as_object().unwrap();
837        assert_eq!(root["name"], Value::String("Wario".into()));
838        assert_eq!(root["age"], Value::Int(30));
839    }
840
841    #[test]
842    fn test_active_mode() {
843        let data = parse("!active\nprice 100\ntax:calc price * 0.2");
844        assert_eq!(data.mode, Mode::Active);
845        let root = data.root.as_object().unwrap();
846        assert_eq!(root["price"], Value::Int(100));
847        // Before engine resolution, :calc value is a string
848        assert_eq!(root["tax"], Value::String("price * 0.2".into()));
849        // Metadata should be saved
850        let meta = data.metadata.get("").unwrap();
851        assert!(meta.contains_key("tax"));
852        assert_eq!(meta["tax"].markers, vec!["calc"]);
853    }
854
855    #[test]
856    fn test_markers_env_default() {
857        let data = parse("!active\nport:env:default:3000 PORT");
858        let meta = data.metadata.get("").unwrap();
859        assert_eq!(meta["port"].markers, vec!["env", "default", "3000"]);
860    }
861
862    #[test]
863    fn test_type_hint() {
864        let data = parse("zip(string) 90210");
865        let root = data.root.as_object().unwrap();
866        assert_eq!(root["zip"], Value::String("90210".into()));
867    }
868
869    #[test]
870    fn test_constraints() {
871        let data = parse("!active\nname[min:3, max:30, required] Wario");
872        let meta = data.metadata.get("").unwrap();
873        let c = meta["name"].constraints.as_ref().unwrap();
874        assert_eq!(c.min, Some(3.0));
875        assert_eq!(c.max, Some(30.0));
876        assert!(c.required);
877    }
878
879    #[test]
880    fn test_random_weights() {
881        let data = parse("!active\ntier:random 90 5 5");
882        let meta = data.metadata.get("").unwrap();
883        assert_eq!(meta["tier"].markers, vec!["random"]);
884        assert_eq!(meta["tier"].args, vec!["90", "5", "5"]);
885    }
886
887    #[test]
888    fn test_tool_directive_flags() {
889        let data = parse("!tool\nweb_search\n  query test\n  lang ru\n");
890        assert!(data.tool);
891        assert!(!data.schema);
892        assert_eq!(data.mode, Mode::Static);
893        // Raw parse keeps original tree structure
894        let root = data.root.as_object().unwrap();
895        let ws = root["web_search"].as_object().unwrap();
896        assert_eq!(ws["query"], Value::String("test".into()));
897        assert_eq!(ws["lang"], Value::String("ru".into()));
898    }
899
900    #[test]
901    fn test_tool_schema_flags() {
902        let data = parse("!tool\n!schema\nweb_search\n  query string\n");
903        assert!(data.tool);
904        assert!(data.schema);
905    }
906
907    #[test]
908    fn test_llm_directive() {
909        let data = parse("!llm\ncontext\n  user_profile demo\ntask summarize\n");
910        assert!(data.llm);
911        assert!(!data.tool);
912        let root = data.root.as_object().unwrap();
913        assert_eq!(root["task"], Value::String("summarize".into()));
914        let ctx = root["context"].as_object().unwrap();
915        assert_eq!(ctx["user_profile"], Value::String("demo".into()));
916    }
917
918    #[test]
919    fn test_parse_caps_nesting_depth() {
920        // Pathological input: one key per line, increasing indentation each time,
921        // with empty values so every line would normally create a new nested object.
922        let mut s = String::new();
923        for i in 0..(MAX_PARSE_NESTING_DEPTH as usize + 64) {
924            s.push_str(&" ".repeat(i));
925            s.push_str(&format!("k{i}\n"));
926        }
927
928        let data = parse(&s);
929        let mut cur = data.root.as_object().unwrap();
930        let mut depth = 0usize;
931        // Follow the single-child chain while it stays nested.
932        loop {
933            if cur.len() != 1 {
934                break;
935            }
936            let (_, v) = cur.iter().next().unwrap();
937            match v {
938                Value::Object(next) => {
939                    depth += 1;
940                    cur = next;
941                }
942                _ => break,
943            }
944        }
945
946        assert!(depth <= MAX_PARSE_NESTING_DEPTH);
947    }
948
949    #[test]
950    fn test_tool_call_reshape() {
951        let data = parse("!tool\nweb_search\n  query test\n  lang ru\n");
952        let shaped = reshape_tool_output(&data.root, false);
953        let m = shaped.as_object().unwrap();
954        assert_eq!(m["tool"], Value::String("web_search".into()));
955        let params = m["params"].as_object().unwrap();
956        assert_eq!(params["query"], Value::String("test".into()));
957        assert_eq!(params["lang"], Value::String("ru".into()));
958    }
959
960    #[test]
961    fn test_tool_schema_reshape() {
962        let data = parse("!tool\n!schema\nweb_search\n  query string\n  lang string\nmemory_write\n  path string\n  value string\n");
963        let shaped = reshape_tool_output(&data.root, true);
964        let m = shaped.as_object().unwrap();
965        let tools = m["tools"].as_array().unwrap();
966        assert_eq!(tools.len(), 2);
967        // Sorted: memory_write before web_search
968        let t0 = tools[0].as_object().unwrap();
969        assert_eq!(t0["name"], Value::String("memory_write".into()));
970        let p0 = t0["params"].as_object().unwrap();
971        assert_eq!(p0["path"], Value::String("string".into()));
972        let t1 = tools[1].as_object().unwrap();
973        assert_eq!(t1["name"], Value::String("web_search".into()));
974    }
975
976    #[test]
977    fn test_tool_empty() {
978        let data = parse("!tool\n");
979        assert!(data.tool);
980        let shaped = reshape_tool_output(&data.root, false);
981        let m = shaped.as_object().unwrap();
982        assert_eq!(m["tool"], Value::Null);
983    }
984
985    #[test]
986    fn test_tool_with_active() {
987        let data = parse("!tool\n!active\nweb_search\n  port:env:default:8080 PORT\n");
988        assert!(data.tool);
989        assert_eq!(data.mode, Mode::Active);
990        // Metadata should be captured for :env:default
991        let meta = data.metadata.get("web_search").unwrap();
992        assert_eq!(meta["port"].markers, vec!["env", "default", "8080"]);
993    }
994}