Skip to main content

mimir_core/
parse.rs

1//! Lisp S-expression parser for the Mimir write surface.
2//!
3//! Implements `docs/concepts/ir-write-surface.md` § 5 — the ten top-level
4//! forms (`sem`, `epi`, `pro`, `inf`, `alias`, `rename`, `retire`,
5//! `correct`, `promote`, `query`) with positional required fields and
6//! order-insensitive keyword arguments.
7//!
8//! The parser produces [`UnboundForm`] ASTs where every symbol is still
9//! a [`RawSymbolName`] — binding into a workspace-scoped `SymbolId` is
10//! the next pipeline stage (see `librarian-pipeline.md` § 3.4).
11
12use std::collections::BTreeMap;
13
14use thiserror::Error;
15
16use crate::clock::ClockTime;
17use crate::lex::{LexError, Position, Spanned, Token};
18
19/// A raw symbol name as written in the source (without the leading `@`),
20/// optionally carrying the `:Kind` annotation for the binder.
21///
22/// The parser produces these rather than resolved `SymbolId`s because
23/// symbol tables are workspace-scoped and binding happens in a later
24/// pipeline stage. When the surface uses `@name:Kind`, the annotation
25/// is preserved in [`Self::kind`] so the binder can override the
26/// position-default kind.
27#[derive(Clone, Debug, PartialEq, Eq, Hash)]
28pub struct RawSymbolName {
29    /// The `@name` part without the `@`.
30    pub name: String,
31    /// Optional `:Kind` annotation — passed through to the binder for
32    /// kind override or validation.
33    pub kind: Option<String>,
34}
35
36impl RawSymbolName {
37    /// Construct a raw symbol name without a kind annotation.
38    #[must_use]
39    pub fn new(name: impl Into<String>) -> Self {
40        Self {
41            name: name.into(),
42            kind: None,
43        }
44    }
45
46    /// Construct a raw symbol name with a `:Kind` annotation.
47    #[must_use]
48    pub fn with_kind(name: impl Into<String>, kind: impl Into<String>) -> Self {
49        Self {
50            name: name.into(),
51            kind: Some(kind.into()),
52        }
53    }
54
55    /// The underlying name string (without the `@` or `:Kind`).
56    #[must_use]
57    pub fn as_str(&self) -> &str {
58        &self.name
59    }
60}
61
62/// A raw value in a memory slot — pre-binding analogue of [`crate::Value`].
63///
64/// Differs from `Value` in two ways:
65///
66/// - `RawSymbol` carries a [`RawSymbolName`] rather than a resolved
67///   `SymbolId`.
68/// - Extra variants that only exist in surface syntax: `TypedSymbol`
69///   (the `@name:Kind` annotation), `Bareword` (predicate or string
70///   literal depending on slot), `List` (parenthesized lists like
71///   `participants` / `derived_from`), and `Nil`.
72#[derive(Clone, Debug, PartialEq)]
73pub enum RawValue {
74    /// `@name`.
75    RawSymbol(RawSymbolName),
76    /// `@name:Kind`.
77    TypedSymbol {
78        /// Symbol name without the `@`.
79        name: RawSymbolName,
80        /// Kind annotation without the leading `:`.
81        kind: String,
82    },
83    /// A bareword. In predicate slots resolves to a `Predicate`-kind
84    /// symbol; elsewhere resolves to a string literal (`Value::String`).
85    Bareword(String),
86    /// A quoted UTF-8 string literal.
87    String(String),
88    /// A signed integer.
89    Integer(i64),
90    /// An IEEE 754 binary64 float.
91    Float(f64),
92    /// A boolean.
93    Boolean(bool),
94    /// `nil` — represents `Option::None` in nullable positions.
95    Nil,
96    /// A parenthesized `(...)` list — used for `participants`,
97    /// `derived_from`, and similar multi-value slots. Each element is
98    /// itself a [`RawValue`].
99    List(Vec<RawValue>),
100    /// A timestamp token — stored as milliseconds. Pre-validated by the
101    /// lexer to look like an ISO-8601 timestamp.
102    Timestamp(ClockTime),
103    /// A raw timestamp text that the lexer could not yet convert to a
104    /// `ClockTime` (e.g. because the parser is collecting the value
105    /// for a slot where the binder does the conversion). Escape hatch
106    /// used by the bind stage; the parser itself always produces
107    /// `Timestamp(ClockTime)` where possible.
108    TimestampRaw(String),
109}
110
111/// Order-insensitive keyword arguments for a form.
112///
113/// Per `ir-write-surface.md` § 5.0, keyword pairs are collected into a
114/// dictionary; the form's production specifies the expected key set.
115pub type KeywordArgs = BTreeMap<String, RawValue>;
116
117/// A selector in a `query` form — currently parsed as a `RawValue` for
118/// forward compatibility; the read-protocol milestone refines this
119/// into a richer query DSL.
120pub type QuerySelector = RawValue;
121
122/// An unbound AST form — the parser's output.
123///
124/// Binding (resolving `RawSymbolName` into `SymbolId`, validating kind
125/// annotations against `SymbolKind`, materialising `RawValue` into
126/// `Value`) happens in a later stage — see `librarian-pipeline.md` § 3.4.
127#[derive(Clone, Debug, PartialEq)]
128pub enum UnboundForm {
129    /// Semantic memory write — `(sem s p o :src SRC :c CONF :v V)`.
130    Sem {
131        /// Subject.
132        s: RawSymbolName,
133        /// Predicate.
134        p: RawSymbolName,
135        /// Object.
136        o: RawValue,
137        /// Keyword arguments — must include `src`, `c`, `v`; may include
138        /// `projected`.
139        keywords: KeywordArgs,
140    },
141    /// Episodic memory write — `(epi EVENT_ID KIND (PAR*) LOC …)`.
142    Epi {
143        /// Stable memory ID for this event.
144        event_id: RawSymbolName,
145        /// Event-type symbol.
146        kind: RawSymbolName,
147        /// List of participant symbols.
148        participants: Vec<RawSymbolName>,
149        /// Location symbol.
150        location: RawSymbolName,
151        /// Expected keys: `at`, `obs`, `src`, `c`.
152        keywords: KeywordArgs,
153    },
154    /// Procedural memory write — `(pro RULE_ID TRIGGER ACTION …)`.
155    Pro {
156        /// Stable memory ID for this rule.
157        rule_id: RawSymbolName,
158        /// Trigger — typically a string literal.
159        trigger: RawValue,
160        /// Action — typically a string literal.
161        action: RawValue,
162        /// Expected keys: `scp`, `src`, `c`; optional `pre`.
163        keywords: KeywordArgs,
164    },
165    /// Inferential memory write — `(inf s p o (DERIVED*) METHOD …)`.
166    Inf {
167        /// Subject.
168        s: RawSymbolName,
169        /// Predicate.
170        p: RawSymbolName,
171        /// Object.
172        o: RawValue,
173        /// Parent memory symbols (must be non-empty).
174        derived_from: Vec<RawSymbolName>,
175        /// Registered inference method symbol.
176        method: RawSymbolName,
177        /// Expected keys: `c`, `v`; optional `projected`.
178        keywords: KeywordArgs,
179    },
180    /// `(alias @a @b)` — declare two names as aliases.
181    Alias {
182        /// First symbol.
183        a: RawSymbolName,
184        /// Second symbol.
185        b: RawSymbolName,
186    },
187    /// `(rename @old @new)` — rename a symbol.
188    Rename {
189        /// The old canonical name.
190        old: RawSymbolName,
191        /// The new canonical name.
192        new: RawSymbolName,
193    },
194    /// `(retire @name [:reason STRING])` — soft-retire a symbol.
195    Retire {
196        /// Target symbol.
197        name: RawSymbolName,
198        /// Optional `:reason` keyword.
199        keywords: KeywordArgs,
200    },
201    /// `(correct @target_episode … epi body …)` — correct a prior
202    /// Episodic memory. The corrected body is itself a parenthesised
203    /// Episodic form.
204    Correct {
205        /// The Episode being corrected.
206        target_episode: RawSymbolName,
207        /// The corrected Episodic memory (must be an `Epi` form).
208        corrected: Box<UnboundForm>,
209    },
210    /// `(promote @name)` — promote an ephemeral memory to canonical.
211    Promote {
212        /// The ephemeral memory symbol.
213        name: RawSymbolName,
214    },
215    /// `(query … keyword args …)` — read-path query.
216    ///
217    /// v1 parser treats the body as a keyword-arg bag; selector is an
218    /// optional single positional. Detailed query DSL validation is in
219    /// `read-protocol.md` and will land with the read-protocol
220    /// milestone.
221    Query {
222        /// Optional positional selector — a symbol or list.
223        selector: Option<QuerySelector>,
224        /// Remaining keyword arguments.
225        keywords: KeywordArgs,
226    },
227    /// `(episode :start [:label S] [:parent_episode @E] [:retracts (@E1 …)])`
228    /// or `(episode :close)` — explicit Episode-boundary directive.
229    ///
230    /// `:close` is a no-op under the single-`compile_batch`-per-Episode
231    /// model (the batch closes the Episode implicitly); the form is
232    /// still accepted so agents can emit it spec-compliantly.
233    ///
234    /// Note on `:retracts`: the spec text uses `[ … ]` brackets
235    /// (§ 9.1), but Mimir's write surface doesn't tokenize brackets.
236    /// The implementation accepts parenthesised symbol lists —
237    /// `:retracts (@E1 @E2)` — matching the existing list convention
238    /// used by Epi's participants and Inf's `derived_from`.
239    Episode {
240        /// Whether this form opens or closes an Episode.
241        action: EpisodeAction,
242        /// Optional human-readable label (spec § 4.3 — capped at 256
243        /// bytes; the semantic stage enforces).
244        label: Option<String>,
245        /// Optional parent Episode symbol (spec § 5.1).
246        parent_episode: Option<RawSymbolName>,
247        /// Zero or more Episodes this Episode retracts (spec § 5.2).
248        retracts: Vec<RawSymbolName>,
249    },
250    /// Pin / unpin / authoritative flag write — one of the four
251    /// `(pin @mem :actor @A)` / `(unpin @mem :actor @A)` /
252    /// `(authoritative-set @mem :actor @A)` /
253    /// `(authoritative-clear @mem :actor @A)` forms per
254    /// `confidence-decay.md` §§ 7 / 8 and `ir-canonical-form.md`
255    /// opcodes `0x35`–`0x38`.
256    Flag {
257        /// Which flag operation this form carries.
258        action: FlagAction,
259        /// The memory the flag applies to.
260        memory: RawSymbolName,
261        /// The agent or user invoking the flag change — required
262        /// for audit. Must resolve to an `Agent`-kind symbol at
263        /// bind time.
264        actor: RawSymbolName,
265    },
266}
267
268/// Which Episode-boundary action a `(episode …)` form carries.
269#[derive(Copy, Clone, Debug, PartialEq, Eq)]
270pub enum EpisodeAction {
271    /// `(episode :start …)` — opens an Episode with optional metadata.
272    Start,
273    /// `(episode :close)` — closes the current Episode. No-op under
274    /// the single-batch-per-Episode model; accepted for spec parity.
275    Close,
276}
277
278/// Which flag a `(pin …)` / `(unpin …)` /
279/// `(authoritative-set …)` / `(authoritative-clear …)` form
280/// operates on. Emitted into `FlagEventRecord`s at the canonical
281/// layer per `ir-canonical-form.md` opcodes `0x35`–`0x38`.
282#[derive(Copy, Clone, Debug, PartialEq, Eq)]
283pub enum FlagAction {
284    /// `(pin @mem :actor @agent)` — suspends decay (`confidence-decay.md` § 7).
285    Pin,
286    /// `(unpin @mem :actor @agent)` — resumes decay.
287    Unpin,
288    /// `(authoritative-set @mem :actor @operator)` — operator-authoritative flag on.
289    AuthoritativeSet,
290    /// `(authoritative-clear @mem :actor @operator)` — operator-authoritative flag off.
291    AuthoritativeClear,
292}
293
294/// Errors produced by [`parse`].
295///
296/// Per `ir-write-surface.md` § 8 — fail-fast on first violation, no
297/// partial recovery.
298#[derive(Debug, Error, PartialEq)]
299pub enum ParseError {
300    /// The lexer failed before the parser could start.
301    #[error("lex error: {0}")]
302    Lex(#[from] LexError),
303
304    /// Got a token that isn't allowed here.
305    #[error("unexpected token {found:?} at {pos:?}; expected {expected}")]
306    UnexpectedToken {
307        /// The token we saw.
308        found: Token,
309        /// Human-readable description of what was expected.
310        expected: &'static str,
311        /// Position of the token.
312        pos: Position,
313    },
314
315    /// Input ended before the parser could complete a form.
316    #[error("unexpected end of input; expected {expected}")]
317    UnexpectedEof {
318        /// Human-readable description of what was expected.
319        expected: &'static str,
320    },
321
322    /// The opcode at the head of a form isn't one of the registered ten.
323    #[error("unknown opcode {found:?} at {pos:?}")]
324    UnknownOpcode {
325        /// The offending opcode text.
326        found: String,
327        /// Position of the opcode bareword.
328        pos: Position,
329    },
330
331    /// A form received an unexpected keyword.
332    #[error("unexpected keyword {keyword:?} for form {form:?} at {pos:?}")]
333    BadKeyword {
334        /// The offending keyword.
335        keyword: String,
336        /// The form being parsed.
337        form: &'static str,
338        /// Position of the keyword.
339        pos: Position,
340    },
341
342    /// A keyword appears twice in the same form.
343    #[error("duplicate keyword {keyword:?} at {pos:?}")]
344    DuplicateKeyword {
345        /// The offending keyword.
346        keyword: String,
347        /// Position of the second occurrence.
348        pos: Position,
349    },
350
351    /// A form is missing a required keyword.
352    #[error("missing required keyword {missing:?} for form {form:?}")]
353    MissingRequiredKeyword {
354        /// The missing keyword.
355        missing: &'static str,
356        /// The form being parsed.
357        form: &'static str,
358    },
359
360    /// A form has the wrong number of positional arguments.
361    #[error("arity mismatch for {form:?}: expected {expected}, found {found} at {pos:?}")]
362    ArityMismatch {
363        /// The form being parsed.
364        form: &'static str,
365        /// Expected positional arity.
366        expected: usize,
367        /// Actual positional arity.
368        found: usize,
369        /// Position of the form's opening paren.
370        pos: Position,
371    },
372
373    /// A `RawValue` was expected but a non-value token was found.
374    #[error("expected value at {pos:?}, got {found:?}")]
375    ExpectedValue {
376        /// The offending token.
377        found: Token,
378        /// Position.
379        pos: Position,
380    },
381
382    /// A list parse saw unbalanced parens.
383    #[error("unbalanced list at {pos:?}")]
384    UnbalancedList {
385        /// Start position of the list.
386        pos: Position,
387    },
388
389    /// A list position required symbols but saw a non-symbol value.
390    #[error("expected symbol list element at {pos:?}, got {found:?}")]
391    ExpectedSymbolInList {
392        /// The offending value.
393        found: RawValue,
394        /// Position of the list.
395        pos: Position,
396    },
397
398    /// An ISO timestamp value could not be normalised to a [`ClockTime`].
399    #[error("invalid timestamp {text:?} at {pos:?}")]
400    InvalidTimestamp {
401        /// The raw timestamp text.
402        text: String,
403        /// Position of the timestamp.
404        pos: Position,
405    },
406
407    /// Parser nesting exceeded [`MAX_NESTING_DEPTH`]. Surfaced before
408    /// the recursive descent blows the host stack — closes Security
409    /// F3 (P2) from the v1.1 fresh assessment. Triggered by inputs
410    /// like `(((…)))` of pathological depth, whether through nested
411    /// list values (`parse_value` → `parse_value_list_body`) or
412    /// through nested `correct` forms (`parse_correct` →
413    /// `parse_form`).
414    #[error("nesting too deep at {pos:?}: limit is {max}")]
415    NestingTooDeep {
416        /// Position where the over-limit nesting was attempted.
417        pos: Position,
418        /// The hard limit (currently [`MAX_NESTING_DEPTH`]).
419        max: usize,
420    },
421}
422
423/// Maximum recursion depth permitted in the recursive-descent parser.
424///
425/// Mimir's grammar has no legitimate use case beyond a few nesting
426/// levels (a form might contain a list value containing a list value),
427/// so 256 is generous by orders of magnitude. A flat ~8 MiB main-thread
428/// stack on Linux blows around 5–10k of these frames; capping at 256
429/// keeps the worst-case stack consumption well under 1 MiB.
430pub const MAX_NESTING_DEPTH: usize = 256;
431
432/// Parse a UTF-8 input into a sequence of [`UnboundForm`]s.
433///
434/// # Errors
435///
436/// Returns the first [`ParseError`] encountered. No partial recovery.
437///
438/// # Examples
439///
440/// ```
441/// # #![allow(clippy::unwrap_used)]
442/// use mimir_core::parse::parse;
443///
444/// let forms = parse("(promote @ephemeral_42)").unwrap();
445/// assert_eq!(forms.len(), 1);
446/// ```
447pub fn parse(input: &str) -> Result<Vec<UnboundForm>, ParseError> {
448    let tokens = crate::lex::tokenize(input)?;
449    let mut parser = Parser::new(tokens);
450    let mut out = Vec::new();
451    while parser.peek().is_some() {
452        out.push(parser.parse_form()?);
453    }
454    Ok(out)
455}
456
457struct Parser {
458    tokens: Vec<Spanned>,
459    idx: usize,
460    /// Current recursive-descent depth. Bounded by [`MAX_NESTING_DEPTH`]
461    /// — see [`Parser::parse_value`]'s `LParen` branch and
462    /// [`Parser::parse_correct`] for the two recursion sites that
463    /// increment/decrement this.
464    depth: usize,
465}
466
467impl Parser {
468    fn new(tokens: Vec<Spanned>) -> Self {
469        Self {
470            tokens,
471            idx: 0,
472            depth: 0,
473        }
474    }
475
476    fn peek(&self) -> Option<&Spanned> {
477        self.tokens.get(self.idx)
478    }
479
480    fn bump(&mut self) -> Option<Spanned> {
481        let t = self.tokens.get(self.idx).cloned()?;
482        self.idx += 1;
483        Some(t)
484    }
485
486    fn expect_lparen(&mut self, expected: &'static str) -> Result<Position, ParseError> {
487        let Some(spanned) = self.bump() else {
488            return Err(ParseError::UnexpectedEof { expected });
489        };
490        if spanned.token == Token::LParen {
491            Ok(spanned.position)
492        } else {
493            Err(ParseError::UnexpectedToken {
494                found: spanned.token,
495                expected,
496                pos: spanned.position,
497            })
498        }
499    }
500
501    fn expect_rparen(&mut self, expected: &'static str) -> Result<(), ParseError> {
502        let Some(spanned) = self.bump() else {
503            return Err(ParseError::UnexpectedEof { expected });
504        };
505        if spanned.token == Token::RParen {
506            Ok(())
507        } else {
508            Err(ParseError::UnexpectedToken {
509                found: spanned.token,
510                expected,
511                pos: spanned.position,
512            })
513        }
514    }
515
516    fn expect_symbol(&mut self, expected: &'static str) -> Result<RawSymbolName, ParseError> {
517        let Some(spanned) = self.bump() else {
518            return Err(ParseError::UnexpectedEof { expected });
519        };
520        match spanned.token {
521            Token::Symbol(name) => Ok(RawSymbolName::new(name)),
522            Token::TypedSymbol { name, kind } => Ok(RawSymbolName::with_kind(name, kind)),
523            other => Err(ParseError::UnexpectedToken {
524                found: other,
525                expected,
526                pos: spanned.position,
527            }),
528        }
529    }
530
531    /// Accept a symbol OR a bareword in predicate slots. Per
532    /// `ir-write-surface.md` § 10, predicates may omit the `@`; the
533    /// binder normalises both to a `Predicate`-kind symbol.
534    fn expect_predicate(&mut self, expected: &'static str) -> Result<RawSymbolName, ParseError> {
535        let Some(spanned) = self.bump() else {
536            return Err(ParseError::UnexpectedEof { expected });
537        };
538        match spanned.token {
539            Token::Symbol(name) | Token::Bareword(name) => Ok(RawSymbolName::new(name)),
540            Token::TypedSymbol { name, kind } => Ok(RawSymbolName::with_kind(name, kind)),
541            other => Err(ParseError::UnexpectedToken {
542                found: other,
543                expected,
544                pos: spanned.position,
545            }),
546        }
547    }
548
549    fn parse_form(&mut self) -> Result<UnboundForm, ParseError> {
550        let open = self.expect_lparen("top-level `(`")?;
551        let Some(head) = self.bump() else {
552            return Err(ParseError::UnexpectedEof {
553                expected: "opcode after `(`",
554            });
555        };
556        let opcode = match head.token {
557            Token::Bareword(ref b) => b.clone(),
558            other => {
559                return Err(ParseError::UnexpectedToken {
560                    found: other,
561                    expected: "opcode bareword at form head",
562                    pos: head.position,
563                });
564            }
565        };
566        match opcode.as_str() {
567            "sem" => self.parse_sem(open),
568            "epi" => self.parse_epi(open),
569            "pro" => self.parse_pro(open),
570            "inf" => self.parse_inf(open),
571            "alias" => self.parse_alias(),
572            "rename" => self.parse_rename(),
573            "retire" => self.parse_retire(),
574            "correct" => self.parse_correct(open),
575            "promote" => self.parse_promote(),
576            "query" => self.parse_query(),
577            "episode" => self.parse_episode(),
578            "pin" => self.parse_flag(FlagAction::Pin, "pin"),
579            "unpin" => self.parse_flag(FlagAction::Unpin, "unpin"),
580            // Spec text uses `(authoritative-set @mem)` but the
581            // bareword grammar is `[a-z_][a-z0-9_]*` (no hyphens),
582            // so the surface accepts underscores.
583            "authoritative_set" => {
584                self.parse_flag(FlagAction::AuthoritativeSet, "authoritative_set")
585            }
586            "authoritative_clear" => {
587                self.parse_flag(FlagAction::AuthoritativeClear, "authoritative_clear")
588            }
589            _ => Err(ParseError::UnknownOpcode {
590                found: opcode,
591                pos: head.position,
592            }),
593        }
594    }
595
596    // ---- individual form productions ----
597
598    fn parse_sem(&mut self, _open: Position) -> Result<UnboundForm, ParseError> {
599        let s = self.expect_symbol("sem subject")?;
600        let p = self.expect_predicate("sem predicate")?;
601        let o = self.parse_value("sem object")?;
602        let keywords = self.parse_keywords("sem", &["src", "c", "v", "projected"])?;
603        Self::require_keywords("sem", &keywords, &["src", "c", "v"])?;
604        self.expect_rparen("closing `)` for sem")?;
605        Ok(UnboundForm::Sem { s, p, o, keywords })
606    }
607
608    fn parse_epi(&mut self, open: Position) -> Result<UnboundForm, ParseError> {
609        let event_id = self.expect_symbol("epi event_id")?;
610        let kind = self.expect_symbol("epi kind")?;
611        let participants = self.parse_symbol_list(open, "epi participants")?;
612        let location = self.expect_symbol("epi location")?;
613        let keywords = self.parse_keywords("epi", &["at", "obs", "src", "c"])?;
614        Self::require_keywords("epi", &keywords, &["at", "obs", "src", "c"])?;
615        self.expect_rparen("closing `)` for epi")?;
616        Ok(UnboundForm::Epi {
617            event_id,
618            kind,
619            participants,
620            location,
621            keywords,
622        })
623    }
624
625    fn parse_pro(&mut self, _open: Position) -> Result<UnboundForm, ParseError> {
626        let rule_id = self.expect_symbol("pro rule_id")?;
627        let trigger = self.parse_value("pro trigger")?;
628        let action = self.parse_value("pro action")?;
629        let keywords = self.parse_keywords("pro", &["scp", "src", "c", "pre"])?;
630        Self::require_keywords("pro", &keywords, &["scp", "src", "c"])?;
631        self.expect_rparen("closing `)` for pro")?;
632        Ok(UnboundForm::Pro {
633            rule_id,
634            trigger,
635            action,
636            keywords,
637        })
638    }
639
640    fn parse_inf(&mut self, open: Position) -> Result<UnboundForm, ParseError> {
641        let s = self.expect_symbol("inf subject")?;
642        let p = self.expect_predicate("inf predicate")?;
643        let o = self.parse_value("inf object")?;
644        let derived_from = self.parse_symbol_list(open, "inf derived_from")?;
645        let method = self.expect_symbol("inf method")?;
646        let keywords = self.parse_keywords("inf", &["c", "v", "projected"])?;
647        Self::require_keywords("inf", &keywords, &["c", "v"])?;
648        self.expect_rparen("closing `)` for inf")?;
649        Ok(UnboundForm::Inf {
650            s,
651            p,
652            o,
653            derived_from,
654            method,
655            keywords,
656        })
657    }
658
659    fn parse_alias(&mut self) -> Result<UnboundForm, ParseError> {
660        let a = self.expect_symbol("alias first arg")?;
661        let b = self.expect_symbol("alias second arg")?;
662        self.expect_rparen("closing `)` for alias")?;
663        Ok(UnboundForm::Alias { a, b })
664    }
665
666    fn parse_rename(&mut self) -> Result<UnboundForm, ParseError> {
667        let old = self.expect_symbol("rename old name")?;
668        let new = self.expect_symbol("rename new name")?;
669        self.expect_rparen("closing `)` for rename")?;
670        Ok(UnboundForm::Rename { old, new })
671    }
672
673    fn parse_retire(&mut self) -> Result<UnboundForm, ParseError> {
674        let name = self.expect_symbol("retire target")?;
675        let keywords = self.parse_keywords("retire", &["reason"])?;
676        self.expect_rparen("closing `)` for retire")?;
677        Ok(UnboundForm::Retire { name, keywords })
678    }
679
680    fn parse_correct(&mut self, open: Position) -> Result<UnboundForm, ParseError> {
681        let target_episode = self.expect_symbol("correct target_episode")?;
682        // The corrected body must be a parenthesised Epi form. Bound
683        // recursion depth: nested `(correct (correct (correct …)))`
684        // would otherwise blow the stack via parse_form (Security F3).
685        if self.depth >= MAX_NESTING_DEPTH {
686            return Err(ParseError::NestingTooDeep {
687                pos: open,
688                max: MAX_NESTING_DEPTH,
689            });
690        }
691        self.depth += 1;
692        let inner = self.parse_form();
693        self.depth -= 1;
694        let corrected = Box::new(inner?);
695        if !matches!(&*corrected, UnboundForm::Epi { .. }) {
696            return Err(ParseError::UnexpectedToken {
697                found: Token::LParen,
698                expected: "corrected body must be an `epi` form",
699                pos: Position::start(),
700            });
701        }
702        self.expect_rparen("closing `)` for correct")?;
703        Ok(UnboundForm::Correct {
704            target_episode,
705            corrected,
706        })
707    }
708
709    fn parse_promote(&mut self) -> Result<UnboundForm, ParseError> {
710        let name = self.expect_symbol("promote target")?;
711        self.expect_rparen("closing `)` for promote")?;
712        Ok(UnboundForm::Promote { name })
713    }
714
715    #[allow(clippy::too_many_lines)]
716    fn parse_episode(&mut self) -> Result<UnboundForm, ParseError> {
717        // First keyword must be either `:start` or `:close` — no
718        // value, just a flag token. Custom-parsed because the normal
719        // `parse_keywords` helper expects `:key value` pairs.
720        let head = self.bump().ok_or(ParseError::UnexpectedEof {
721            expected: "`:start` or `:close`",
722        })?;
723        let action_name = match head.token {
724            Token::Keyword(name) => name,
725            other => {
726                return Err(ParseError::UnexpectedToken {
727                    found: other,
728                    expected: "`:start` or `:close`",
729                    pos: head.position,
730                });
731            }
732        };
733        let action = match action_name.as_str() {
734            "start" => EpisodeAction::Start,
735            "close" => EpisodeAction::Close,
736            other => {
737                return Err(ParseError::BadKeyword {
738                    keyword: other.to_string(),
739                    form: "episode",
740                    pos: head.position,
741                });
742            }
743        };
744
745        if matches!(action, EpisodeAction::Close) {
746            // `(episode :close)` accepts no further keywords.
747            self.expect_rparen("closing `)` for episode :close")?;
748            return Ok(UnboundForm::Episode {
749                action,
750                label: None,
751                parent_episode: None,
752                retracts: Vec::new(),
753            });
754        }
755
756        // `:start` — parse optional `:label`, `:parent_episode`,
757        // `:retracts` in any order. Duplicates reject.
758        let mut label: Option<String> = None;
759        let mut parent_episode: Option<RawSymbolName> = None;
760        let mut retracts: Option<Vec<RawSymbolName>> = None;
761
762        while let Some(spanned) = self.peek() {
763            match &spanned.token {
764                Token::RParen => break,
765                Token::Keyword(k) => {
766                    let key = k.clone();
767                    let pos = spanned.position;
768                    self.bump();
769                    match key.as_str() {
770                        "label" => {
771                            if label.is_some() {
772                                return Err(ParseError::DuplicateKeyword { keyword: key, pos });
773                            }
774                            let Some(v) = self.bump() else {
775                                return Err(ParseError::UnexpectedEof {
776                                    expected: "`:label` string value",
777                                });
778                            };
779                            match v.token {
780                                Token::String(s) => label = Some(s),
781                                other => {
782                                    return Err(ParseError::UnexpectedToken {
783                                        found: other,
784                                        expected: "string literal for `:label`",
785                                        pos: v.position,
786                                    });
787                                }
788                            }
789                        }
790                        "parent_episode" => {
791                            if parent_episode.is_some() {
792                                return Err(ParseError::DuplicateKeyword { keyword: key, pos });
793                            }
794                            parent_episode = Some(self.expect_symbol("`:parent_episode` symbol")?);
795                        }
796                        "retracts" => {
797                            if retracts.is_some() {
798                                return Err(ParseError::DuplicateKeyword { keyword: key, pos });
799                            }
800                            // Parenthesised symbol list — matches the
801                            // convention from Epi's participants and
802                            // Inf's derived_from.
803                            let list_open = self.expect_lparen("`:retracts (`")?;
804                            retracts = Some(self.parse_retracts_list(list_open)?);
805                        }
806                        _ => {
807                            return Err(ParseError::BadKeyword {
808                                form: "episode :start",
809                                keyword: key,
810                                pos,
811                            });
812                        }
813                    }
814                }
815                _ => {
816                    let t = spanned.token.clone();
817                    let pos = spanned.position;
818                    return Err(ParseError::UnexpectedToken {
819                        found: t,
820                        expected: "keyword argument in `episode :start`",
821                        pos,
822                    });
823                }
824            }
825        }
826
827        self.expect_rparen("closing `)` for episode :start")?;
828        Ok(UnboundForm::Episode {
829            action,
830            label,
831            parent_episode,
832            retracts: retracts.unwrap_or_default(),
833        })
834    }
835
836    fn parse_flag(
837        &mut self,
838        action: FlagAction,
839        form: &'static str,
840    ) -> Result<UnboundForm, ParseError> {
841        // `(<opcode> @memory :actor @agent)` — memory positional,
842        // `:actor` required per confidence-decay.md § 7 / § 8
843        // audit-trail contract.
844        let memory = self.expect_symbol(match action {
845            FlagAction::Pin => "pin target",
846            FlagAction::Unpin => "unpin target",
847            FlagAction::AuthoritativeSet => "authoritative_set target",
848            FlagAction::AuthoritativeClear => "authoritative_clear target",
849        })?;
850        let keywords = self.parse_keywords(form, &["actor"])?;
851        Self::require_keywords(form, &keywords, &["actor"])?;
852        self.expect_rparen("closing `)` for flag form")?;
853        let actor = match keywords.get("actor") {
854            Some(RawValue::RawSymbol(s) | RawValue::TypedSymbol { name: s, .. }) => s.clone(),
855            _ => {
856                return Err(ParseError::BadKeyword {
857                    keyword: "actor".into(),
858                    form,
859                    pos: Position::start(),
860                });
861            }
862        };
863        Ok(UnboundForm::Flag {
864            action,
865            memory,
866            actor,
867        })
868    }
869
870    fn parse_retracts_list(&mut self, open: Position) -> Result<Vec<RawSymbolName>, ParseError> {
871        let raw = self.parse_value_list_body(open)?;
872        raw.into_iter()
873            .map(|v| match v {
874                RawValue::RawSymbol(name) | RawValue::TypedSymbol { name, .. } => Ok(name),
875                other => Err(ParseError::ExpectedSymbolInList {
876                    found: other,
877                    pos: open,
878                }),
879            })
880            .collect()
881    }
882
883    fn parse_query(&mut self) -> Result<UnboundForm, ParseError> {
884        // Optional positional selector: a value (symbol or list) that
885        // is NOT a keyword. If the next token is `:`, skip selector.
886        let selector = if matches!(self.peek().map(|s| &s.token), Some(Token::Keyword(_)))
887            || matches!(self.peek().map(|s| &s.token), Some(Token::RParen))
888        {
889            None
890        } else {
891            Some(self.parse_value("query selector")?)
892        };
893        let keywords = self.parse_keywords(
894            "query",
895            &[
896                "kind",
897                "s",
898                "p",
899                "o",
900                "in_episode",
901                "after_episode",
902                "before_episode",
903                "episode_chain",
904                "as_of",
905                "as_committed",
906                "include_retired",
907                "include_projected",
908                "confidence_threshold",
909                "limit",
910                "explain_filtered",
911                "show_framing",
912                "debug_mode",
913                "read_after",
914                "timeout_ms",
915            ],
916        )?;
917        self.expect_rparen("closing `)` for query")?;
918        Ok(UnboundForm::Query { selector, keywords })
919    }
920
921    // ---- shared helpers ----
922
923    fn parse_value(&mut self, expected: &'static str) -> Result<RawValue, ParseError> {
924        let Some(spanned) = self.bump() else {
925            return Err(ParseError::UnexpectedEof { expected });
926        };
927        match spanned.token {
928            Token::Symbol(name) => Ok(RawValue::RawSymbol(RawSymbolName::new(name))),
929            Token::TypedSymbol { name, kind } => Ok(RawValue::TypedSymbol {
930                name: RawSymbolName::new(name),
931                kind,
932            }),
933            Token::Bareword(b) => Ok(RawValue::Bareword(b)),
934            Token::String(s) => Ok(RawValue::String(s)),
935            Token::Integer(i) => Ok(RawValue::Integer(i)),
936            Token::Float(f) => Ok(RawValue::Float(f)),
937            Token::Boolean(b) => Ok(RawValue::Boolean(b)),
938            Token::Nil => Ok(RawValue::Nil),
939            Token::Timestamp(text) => parse_timestamp(&text, spanned.position)
940                .map(RawValue::Timestamp)
941                .or(Ok(RawValue::TimestampRaw(text))),
942            Token::LParen => {
943                // Bound stack consumption: each nested LParen recurses
944                // through parse_value_list_body → parse_value → here.
945                // Without this guard, `(((…)))` of pathological depth
946                // exhausts the host stack uncatchably (Security F3).
947                if self.depth >= MAX_NESTING_DEPTH {
948                    return Err(ParseError::NestingTooDeep {
949                        pos: spanned.position,
950                        max: MAX_NESTING_DEPTH,
951                    });
952                }
953                self.depth += 1;
954                let result = self.parse_value_list_body(spanned.position);
955                self.depth -= 1;
956                let inner = result?;
957                Ok(RawValue::List(inner))
958            }
959            other @ (Token::RParen | Token::Keyword(_)) => Err(ParseError::ExpectedValue {
960                found: other,
961                pos: spanned.position,
962            }),
963        }
964    }
965
966    fn parse_value_list_body(&mut self, open: Position) -> Result<Vec<RawValue>, ParseError> {
967        let mut out = Vec::new();
968        loop {
969            match self.peek().map(|s| &s.token) {
970                None => return Err(ParseError::UnbalancedList { pos: open }),
971                Some(Token::RParen) => {
972                    self.bump();
973                    return Ok(out);
974                }
975                _ => {
976                    out.push(self.parse_value("list element")?);
977                }
978            }
979        }
980    }
981
982    fn parse_symbol_list(
983        &mut self,
984        _open: Position,
985        expected: &'static str,
986    ) -> Result<Vec<RawSymbolName>, ParseError> {
987        let list_open = self.expect_lparen(expected)?;
988        let raw = self.parse_value_list_body(list_open)?;
989        raw.into_iter()
990            .map(|v| match v {
991                RawValue::RawSymbol(name) | RawValue::TypedSymbol { name, .. } => Ok(name),
992                other => Err(ParseError::ExpectedSymbolInList {
993                    found: other,
994                    pos: list_open,
995                }),
996            })
997            .collect()
998    }
999
1000    fn parse_keywords(
1001        &mut self,
1002        form: &'static str,
1003        allowed: &[&str],
1004    ) -> Result<KeywordArgs, ParseError> {
1005        let mut out = BTreeMap::new();
1006        while let Some(spanned) = self.peek() {
1007            match &spanned.token {
1008                Token::RParen => break,
1009                Token::Keyword(k) => {
1010                    let key = k.clone();
1011                    let pos = spanned.position;
1012                    if !allowed.iter().any(|allowed| *allowed == key) {
1013                        return Err(ParseError::BadKeyword {
1014                            keyword: key,
1015                            form,
1016                            pos,
1017                        });
1018                    }
1019                    self.bump(); // consume the keyword token
1020                    let value = self.parse_value("keyword value")?;
1021                    if out.insert(key.clone(), value).is_some() {
1022                        return Err(ParseError::DuplicateKeyword { keyword: key, pos });
1023                    }
1024                }
1025                other => {
1026                    return Err(ParseError::UnexpectedToken {
1027                        found: other.clone(),
1028                        expected: "`:keyword value` pair or closing `)`",
1029                        pos: spanned.position,
1030                    });
1031                }
1032            }
1033        }
1034        Ok(out)
1035    }
1036
1037    fn require_keywords(
1038        form: &'static str,
1039        keywords: &KeywordArgs,
1040        required: &[&'static str],
1041    ) -> Result<(), ParseError> {
1042        for k in required {
1043            if !keywords.contains_key(*k) {
1044                return Err(ParseError::MissingRequiredKeyword { missing: k, form });
1045            }
1046        }
1047        Ok(())
1048    }
1049}
1050
1051fn parse_timestamp(text: &str, pos: Position) -> Result<ClockTime, ParseError> {
1052    // Accept YYYY-MM-DD (midnight UTC) and YYYY-MM-DDTHH:MM:SS[Z|.frac Z].
1053    // Returns ms since Unix epoch.
1054    let bad = || ParseError::InvalidTimestamp {
1055        text: text.to_string(),
1056        pos,
1057    };
1058    if text.len() == 10 {
1059        // Date-only: YYYY-MM-DD → midnight UTC.
1060        let millis = date_to_millis(text).ok_or_else(bad)?;
1061        return ClockTime::try_from_millis(millis).map_err(|_| bad());
1062    }
1063    // Full date-time. Expect 'T' at offset 10.
1064    if text.len() < 20 || !text.is_char_boundary(10) || &text[10..11] != "T" {
1065        return Err(bad());
1066    }
1067    let (date_part, rest) = text.split_at(10);
1068    // `rest` = "THH:MM:SS[.frac]Z" or similar.
1069    let rest = rest
1070        .strip_prefix('T')
1071        .ok_or_else(bad)?
1072        .trim_end_matches('Z');
1073    let (hms_part, frac_millis) = if let Some(dot) = rest.find('.') {
1074        let (hms, frac) = rest.split_at(dot);
1075        let frac = &frac[1..];
1076        if frac.is_empty() || !frac.chars().all(|c| c.is_ascii_digit()) {
1077            return Err(bad());
1078        }
1079        let millis_str = if frac.len() >= 3 { &frac[..3] } else { frac };
1080        let mut millis: u64 = millis_str.parse().map_err(|_| bad())?;
1081        // Pad to 3 digits: e.g. "5" → 500ms, "50" → 500ms, "500" → 500ms.
1082        for _ in millis_str.len()..3 {
1083            millis *= 10;
1084        }
1085        (hms, millis)
1086    } else {
1087        (rest, 0_u64)
1088    };
1089    let parts: Vec<&str> = hms_part.split(':').collect();
1090    if parts.len() != 3 {
1091        return Err(bad());
1092    }
1093    let hours: u64 = parts[0].parse().map_err(|_| bad())?;
1094    let minutes: u64 = parts[1].parse().map_err(|_| bad())?;
1095    let seconds: u64 = parts[2].parse().map_err(|_| bad())?;
1096    let date_millis = date_to_millis(date_part).ok_or_else(bad)?;
1097    let total = date_millis + hours * 3_600_000 + minutes * 60_000 + seconds * 1_000 + frac_millis;
1098    ClockTime::try_from_millis(total).map_err(|_| bad())
1099}
1100
1101fn date_to_millis(date: &str) -> Option<u64> {
1102    // Proleptic-Gregorian conversion for 1970-01-01 through 9999-12-31.
1103    // Keeps the dependency surface minimal — no chrono in foundations.
1104    if date.len() != 10 {
1105        return None;
1106    }
1107    let b = date.as_bytes();
1108    if b[4] != b'-' || b[7] != b'-' {
1109        return None;
1110    }
1111    let year: i64 = std::str::from_utf8(&b[..4]).ok()?.parse().ok()?;
1112    let month: u32 = std::str::from_utf8(&b[5..7]).ok()?.parse().ok()?;
1113    let day: u32 = std::str::from_utf8(&b[8..10]).ok()?.parse().ok()?;
1114    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
1115        return None;
1116    }
1117
1118    // Days since 1970-01-01 via Howard Hinnant's civil_from_days inverse.
1119    let year_adjusted = if month <= 2 { year - 1 } else { year };
1120    let era = if year_adjusted >= 0 {
1121        year_adjusted
1122    } else {
1123        year_adjusted - 399
1124    } / 400;
1125    let year_of_era: u32 = u32::try_from(year_adjusted - era * 400).ok()?;
1126    let day_of_year = (153_u32 * (if month > 2 { month - 3 } else { month + 9 }) + 2) / 5 + day - 1;
1127    let day_of_era = year_of_era * 365 + year_of_era / 4 - year_of_era / 100 + day_of_year;
1128    let days = era * 146_097 + i64::from(day_of_era) - 719_468;
1129    if days < 0 {
1130        return None;
1131    }
1132    let millis = u64::try_from(days).ok()? * 86_400_000;
1133    Some(millis)
1134}
1135
1136#[cfg(test)]
1137mod tests {
1138    use super::*;
1139
1140    #[test]
1141    fn promote_form_is_single_symbol() {
1142        let forms = parse("(promote @scratch_42)").unwrap();
1143        assert_eq!(
1144            forms[0],
1145            UnboundForm::Promote {
1146                name: RawSymbolName::new("scratch_42"),
1147            }
1148        );
1149    }
1150
1151    #[test]
1152    fn alias_and_rename() {
1153        let a = parse("(alias @a @b)").unwrap();
1154        assert_eq!(
1155            a[0],
1156            UnboundForm::Alias {
1157                a: RawSymbolName::new("a"),
1158                b: RawSymbolName::new("b"),
1159            }
1160        );
1161        let r = parse("(rename @old @new)").unwrap();
1162        assert_eq!(
1163            r[0],
1164            UnboundForm::Rename {
1165                old: RawSymbolName::new("old"),
1166                new: RawSymbolName::new("new"),
1167            }
1168        );
1169    }
1170
1171    #[test]
1172    fn sem_form_with_all_required_keywords() {
1173        let src = r#"(sem @alice email "alice@example.com" :src @profile :c 0.95 :v 2024-01-15)"#;
1174        let forms = parse(src).unwrap();
1175        let UnboundForm::Sem { s, p, o, keywords } = &forms[0] else {
1176            panic!("expected sem form");
1177        };
1178        assert_eq!(s, &RawSymbolName::new("alice"));
1179        assert_eq!(p, &RawSymbolName::new("email"));
1180        assert_eq!(o, &RawValue::String("alice@example.com".into()));
1181        assert!(keywords.contains_key("src"));
1182        assert!(keywords.contains_key("c"));
1183        assert!(keywords.contains_key("v"));
1184        assert!(matches!(keywords.get("v"), Some(RawValue::Timestamp(_))));
1185    }
1186
1187    #[test]
1188    fn sem_missing_required_keyword_errors() {
1189        let src = r#"(sem @alice email "a" :src @profile :c 0.95)"#;
1190        let err = parse(src).unwrap_err();
1191        assert!(matches!(
1192            err,
1193            ParseError::MissingRequiredKeyword {
1194                missing: "v",
1195                form: "sem"
1196            }
1197        ));
1198    }
1199
1200    #[test]
1201    fn unknown_opcode_errors() {
1202        let err = parse("(xyz @a @b)").unwrap_err();
1203        assert!(matches!(err, ParseError::UnknownOpcode { .. }));
1204    }
1205
1206    #[test]
1207    fn unknown_keyword_errors() {
1208        let src = r#"(sem @a b "x" :src @y :c 0.5 :v 2024-01-15 :bogus 1)"#;
1209        let err = parse(src).unwrap_err();
1210        assert!(matches!(err, ParseError::BadKeyword { .. }));
1211    }
1212
1213    #[test]
1214    fn duplicate_keyword_errors() {
1215        let src = r#"(sem @a b "x" :src @y :src @y :c 0.5 :v 2024-01-15)"#;
1216        let err = parse(src).unwrap_err();
1217        assert!(matches!(err, ParseError::DuplicateKeyword { .. }));
1218    }
1219
1220    #[test]
1221    fn epi_parses_participants_list() {
1222        let src = r"(epi @ep_001 @rename (@old @new) @github
1223            :at 2026-04-17T10:00:00Z :obs 2026-04-17T10:00:00Z
1224            :src @alice :c 1.0)";
1225        let forms = parse(src).unwrap();
1226        let UnboundForm::Epi {
1227            event_id,
1228            kind,
1229            participants,
1230            location,
1231            ..
1232        } = &forms[0]
1233        else {
1234            panic!("expected epi form");
1235        };
1236        assert_eq!(event_id, &RawSymbolName::new("ep_001"));
1237        assert_eq!(kind, &RawSymbolName::new("rename"));
1238        assert_eq!(participants.len(), 2);
1239        assert_eq!(participants[0], RawSymbolName::new("old"));
1240        assert_eq!(location, &RawSymbolName::new("github"));
1241    }
1242
1243    #[test]
1244    fn pro_with_optional_precondition() {
1245        let src = r#"(pro @rule_1 "agent about to write" "route via librarian"
1246            :pre nil :scp @mimir :src @agents_md :c 1.0)"#;
1247        let forms = parse(src).unwrap();
1248        let UnboundForm::Pro {
1249            rule_id, keywords, ..
1250        } = &forms[0]
1251        else {
1252            panic!("expected pro form");
1253        };
1254        assert_eq!(rule_id, &RawSymbolName::new("rule_1"));
1255        assert_eq!(keywords.get("pre"), Some(&RawValue::Nil));
1256    }
1257
1258    #[test]
1259    fn inf_requires_method_and_derived_from() {
1260        let src = r"(inf @a p @b (@m1 @m2) @pattern_summarize :c 0.7 :v 2024-03-15)";
1261        let forms = parse(src).unwrap();
1262        let UnboundForm::Inf {
1263            derived_from,
1264            method,
1265            ..
1266        } = &forms[0]
1267        else {
1268            panic!("expected inf form");
1269        };
1270        assert_eq!(derived_from.len(), 2);
1271        assert_eq!(method, &RawSymbolName::new("pattern_summarize"));
1272    }
1273
1274    #[test]
1275    fn query_with_keywords_only() {
1276        let src = "(query :s @alice :p email :debug_mode true)";
1277        let forms = parse(src).unwrap();
1278        let UnboundForm::Query {
1279            selector, keywords, ..
1280        } = &forms[0]
1281        else {
1282            panic!("expected query form");
1283        };
1284        assert!(selector.is_none());
1285        assert_eq!(keywords.get("debug_mode"), Some(&RawValue::Boolean(true)));
1286    }
1287
1288    #[test]
1289    fn query_with_positional_selector() {
1290        let src = "(query @mem_x)";
1291        let forms = parse(src).unwrap();
1292        let UnboundForm::Query {
1293            selector,
1294            keywords: _,
1295        } = &forms[0]
1296        else {
1297            panic!("expected query form");
1298        };
1299        assert_eq!(
1300            selector.as_ref(),
1301            Some(&RawValue::RawSymbol(RawSymbolName::new("mem_x"))),
1302        );
1303    }
1304
1305    #[test]
1306    fn timestamp_converts_to_clocktime() {
1307        let src = r#"(sem @a b "x" :src @y :c 0.5 :v 2024-01-15)"#;
1308        let forms = parse(src).unwrap();
1309        let UnboundForm::Sem { keywords, .. } = &forms[0] else {
1310            panic!();
1311        };
1312        match keywords.get("v") {
1313            Some(RawValue::Timestamp(ct)) => {
1314                // 2024-01-15 = 1705276800 seconds since epoch = 1705276800000 ms.
1315                assert_eq!(ct.as_millis(), 1_705_276_800_000);
1316            }
1317            other => panic!("expected Timestamp, got {other:?}"),
1318        }
1319    }
1320
1321    #[test]
1322    fn multiple_forms_in_one_input() {
1323        let src = r"
1324            (alias @a @b)
1325            (rename @old @new)
1326            (promote @tmp)
1327        ";
1328        let forms = parse(src).unwrap();
1329        assert_eq!(forms.len(), 3);
1330    }
1331
1332    // ---- Security F3 (P2) regression: parser must not stack-overflow
1333    // on adversarially-deep nested input. Pre-fix, `parse_value` and
1334    // `parse_correct` recurred without bound; an input of N nested
1335    // parens consumed ~N stack frames, blowing the default 8 MiB
1336    // main-thread stack at a few thousand levels uncatchably (no
1337    // `Result::Err`, no `ParseError` variant).
1338
1339    /// Build a Sem form whose object-position value is a `depth`-deep
1340    /// nested list: `(sem @s @p ((((...0))))  :src @observation :c 0.5
1341    /// :v 2024-01-15)`. The outermost `(` opens the form (depth not
1342    /// incremented — it's the form opener, not a value); each
1343    /// subsequent `(` is one parser nesting level.
1344    fn nested_value_input(depth: usize) -> String {
1345        let opens = "(".repeat(depth);
1346        let closes = ")".repeat(depth);
1347        format!("(sem @s @p {opens}0{closes} :src @observation :c 0.5 :v 2024-01-15)")
1348    }
1349
1350    #[test]
1351    fn parser_accepts_value_nesting_at_limit() {
1352        // MAX_NESTING_DEPTH levels deep — the maximum permitted.
1353        let src = nested_value_input(MAX_NESTING_DEPTH);
1354        let forms = parse(&src).expect("must accept depth at the limit");
1355        assert_eq!(forms.len(), 1);
1356    }
1357
1358    #[test]
1359    fn parser_rejects_value_nesting_one_over_limit() {
1360        // One level too deep.
1361        let src = nested_value_input(MAX_NESTING_DEPTH + 1);
1362        let err = parse(&src).expect_err("must reject depth over the limit");
1363        match err {
1364            ParseError::NestingTooDeep { max, .. } => {
1365                assert_eq!(max, MAX_NESTING_DEPTH);
1366            }
1367            other => panic!("expected NestingTooDeep, got {other:?}"),
1368        }
1369    }
1370
1371    #[test]
1372    fn parser_rejects_pathologically_deep_value_nesting_without_stack_overflow() {
1373        // 10x the limit — pre-fix would have blown the stack
1374        // uncatchably. Post-fix returns a typed error. Test execution
1375        // not aborting the process is the load-bearing assertion;
1376        // checking the error variant is the cherry on top.
1377        let src = nested_value_input(MAX_NESTING_DEPTH * 10);
1378        let err = parse(&src).expect_err("must reject pathological nesting");
1379        assert!(
1380            matches!(err, ParseError::NestingTooDeep { .. }),
1381            "expected NestingTooDeep, got {err:?}"
1382        );
1383    }
1384
1385    #[test]
1386    fn parser_rejects_nested_correct_forms_past_limit() {
1387        // The other recursion site: parse_correct → parse_form →
1388        // parse_correct... Build N nested `correct` forms.
1389        // (correct @e1 (correct @e2 (correct @e3 ... (epi ...))))
1390        let depth = MAX_NESTING_DEPTH + 1;
1391        let mut src = String::new();
1392        for i in 0..depth {
1393            std::fmt::Write::write_fmt(&mut src, format_args!("(correct @e{i} "))
1394                .expect("write to String never fails");
1395        }
1396        src.push_str("(epi @ev @kind () @loc :at 2024-01-15 :obs 2024-01-15 :src @y :c 0.5)");
1397        for _ in 0..depth {
1398            src.push(')');
1399        }
1400        let err = parse(&src).expect_err("must reject deep `correct` nesting");
1401        assert!(
1402            matches!(err, ParseError::NestingTooDeep { .. }),
1403            "expected NestingTooDeep, got {err:?}"
1404        );
1405    }
1406}