Skip to main content

mimir_core/
semantic.rs

1//! Semantic stage — validates [`crate::bind::BoundForm`]s against the
2//! grounding model and produces [`ValidatedForm`] ASTs with typed
3//! fields extracted from keyword arguments.
4//!
5//! Implements the rules in `docs/concepts/grounding-model.md` §§ 3–4
6//! and the clock / projection rules in `docs/concepts/temporal-model.md`
7//! §§ 5, 9–10.
8
9use thiserror::Error;
10
11use crate::bind::{BoundForm, BoundKeywords, SymbolTable};
12use crate::clock::ClockTime;
13use crate::confidence::{Confidence, ConfidenceError};
14use crate::memory_kind::MemoryKindTag;
15use crate::source_kind::SourceKind;
16use crate::symbol::SymbolId;
17use crate::value::Value;
18
19/// An AST form after the semantic stage: typed fields are extracted
20/// from keyword bags; grounding + confidence + clock invariants are
21/// enforced.
22///
23/// Consumed by the canonical-form emitter.
24#[derive(Clone, Debug, PartialEq)]
25#[allow(clippy::module_name_repetitions)]
26pub enum ValidatedForm {
27    /// Semantic memory write.
28    Sem {
29        /// Subject symbol.
30        s: SymbolId,
31        /// Predicate symbol.
32        p: SymbolId,
33        /// Object value.
34        o: Value,
35        /// Source symbol (the grounding anchor).
36        source: SymbolId,
37        /// Derived grounding kind from the source symbol's canonical name.
38        source_kind: SourceKind,
39        /// Stored confidence — clamped by source bound.
40        confidence: Confidence,
41        /// Valid-time.
42        valid_at: ClockTime,
43        /// Whether this memory is a projection about future state.
44        projected: bool,
45    },
46    /// Episodic memory write.
47    Epi {
48        /// Stable event ID.
49        event_id: SymbolId,
50        /// Event-type symbol.
51        kind: SymbolId,
52        /// Participant symbols (may be empty).
53        participants: Vec<SymbolId>,
54        /// Location symbol.
55        location: SymbolId,
56        /// Event time.
57        at_time: ClockTime,
58        /// Observation time — must be `>= at_time`.
59        observed_at: ClockTime,
60        /// Source symbol (the witness).
61        source: SymbolId,
62        /// Derived grounding kind.
63        source_kind: SourceKind,
64        /// Confidence.
65        confidence: Confidence,
66    },
67    /// Procedural memory write.
68    Pro {
69        /// Stable rule ID.
70        rule_id: SymbolId,
71        /// Trigger value.
72        trigger: Value,
73        /// Action value.
74        action: Value,
75        /// Optional precondition.
76        precondition: Option<Value>,
77        /// Scope symbol.
78        scope: SymbolId,
79        /// Source symbol.
80        source: SymbolId,
81        /// Derived grounding kind.
82        source_kind: SourceKind,
83        /// Confidence.
84        confidence: Confidence,
85    },
86    /// Inferential memory write.
87    Inf {
88        /// Subject.
89        s: SymbolId,
90        /// Predicate.
91        p: SymbolId,
92        /// Object.
93        o: Value,
94        /// Non-empty list of parent memories.
95        derived_from: Vec<SymbolId>,
96        /// Inference method symbol (validated at bind time).
97        method: SymbolId,
98        /// Confidence.
99        confidence: Confidence,
100        /// Valid-time.
101        valid_at: ClockTime,
102        /// Projection flag.
103        projected: bool,
104    },
105    /// Alias declaration.
106    Alias {
107        /// First symbol.
108        a: SymbolId,
109        /// Second symbol.
110        b: SymbolId,
111    },
112    /// Rename — old name becomes alias of new.
113    Rename {
114        /// Previous canonical.
115        old: SymbolId,
116        /// New canonical.
117        new: SymbolId,
118    },
119    /// Retire a symbol.
120    Retire {
121        /// Target.
122        name: SymbolId,
123        /// Optional reason.
124        reason: Option<String>,
125    },
126    /// Correct a prior Episodic memory.
127    Correct {
128        /// Target episode.
129        target_episode: SymbolId,
130        /// Corrected Episodic body.
131        corrected: Box<ValidatedForm>,
132    },
133    /// Promote an ephemeral memory.
134    Promote {
135        /// Target symbol.
136        name: SymbolId,
137    },
138    /// Read-path query.
139    Query {
140        /// Optional positional selector.
141        selector: Option<Value>,
142        /// Remaining keyword arguments (uninterpreted at this stage).
143        keywords: BoundKeywords,
144    },
145    /// Explicit Episode-boundary directive — pass-through from bind.
146    /// The semantic stage enforces "at most one Episode directive per
147    /// batch" so the emitter can trust the batch carries a single
148    /// deterministic intent.
149    Episode {
150        /// Open or close.
151        action: crate::parse::EpisodeAction,
152        /// Optional label.
153        label: Option<String>,
154        /// Optional parent Episode.
155        parent_episode: Option<SymbolId>,
156        /// Retracted Episodes.
157        retracts: Vec<SymbolId>,
158    },
159    /// Pin / unpin / authoritative flag write — pass-through from
160    /// bind. See `confidence-decay.md` §§ 7 / 8.
161    Flag {
162        /// Which flag operation.
163        action: crate::parse::FlagAction,
164        /// Target memory.
165        memory: SymbolId,
166        /// Invoking agent / operator.
167        actor: SymbolId,
168    },
169}
170
171/// Errors produced by [`validate`].
172#[derive(Debug, Error, PartialEq)]
173pub enum SemanticError {
174    /// A confidence value exceeded the bound for its source kind.
175    #[error("confidence {requested} exceeds {source_kind:?} bound {bound}")]
176    ConfidenceExceedsSourceBound {
177        /// The requested confidence.
178        requested: Confidence,
179        /// The source kind's bound.
180        bound: Confidence,
181        /// The source kind derived from the source symbol.
182        source_kind: SourceKind,
183    },
184
185    /// The source kind does not admit the memory kind.
186    #[error("source kind {source_kind:?} does not admit memory kind {memory_kind:?}")]
187    SourceKindNotAdmitted {
188        /// The derived source kind.
189        source_kind: SourceKind,
190        /// The memory kind being written.
191        memory_kind: MemoryKindTag,
192    },
193
194    /// Agent supplied a future `valid_at` without the `:projected true` flag.
195    #[error("valid_at {valid_at:?} is in the future; require :projected true")]
196    FutureValidity {
197        /// The offending timestamp.
198        valid_at: ClockTime,
199    },
200
201    /// Episodic `observed_at` predates `at_time`.
202    #[error("Episodic observed_at {observed_at:?} < at_time {at_time:?}")]
203    InvalidClockOrder {
204        /// The event time.
205        at_time: ClockTime,
206        /// The observation time.
207        observed_at: ClockTime,
208    },
209
210    /// Inferential `derived_from` is empty.
211    #[error("Inferential derived_from must be non-empty")]
212    EmptyDerivedFrom,
213
214    /// A required keyword was missing post-bind. (Normally caught in
215    /// parse, but present here as a safety net for future form changes.)
216    #[error("semantic stage missing required keyword {keyword:?} for form {form:?}")]
217    MissingKeyword {
218        /// Missing keyword.
219        keyword: &'static str,
220        /// Form being validated.
221        form: &'static str,
222    },
223
224    /// A keyword had the wrong type for the semantic stage.
225    #[error("keyword {keyword:?} has wrong type for {form:?}: expected {expected}")]
226    BadKeywordType {
227        /// Keyword.
228        keyword: &'static str,
229        /// Form.
230        form: &'static str,
231        /// Description of expected type.
232        expected: &'static str,
233    },
234
235    /// A confidence value was malformed (e.g. NaN).
236    #[error("confidence malformed: {0}")]
237    ConfidenceMalformed(#[from] ConfidenceError),
238
239    /// A `correct` form's corrected body is not an `Epi` form.
240    #[error("correct body must be an Epi form")]
241    CorrectsNonEpisodic,
242
243    /// A batch contains more than one `(episode …)` directive. Per
244    /// `episode-semantics.md` § 3 / § 11, each batch corresponds to
245    /// at most one Episode, so a single directive per batch is the
246    /// only coherent shape.
247    #[error("batch contains {count} episode directives; at most 1 allowed")]
248    MultipleEpisodeDirectives {
249        /// Number of episode forms in the batch.
250        count: usize,
251    },
252}
253
254/// Validate a sequence of bound forms against the grounding + clock
255/// invariants, producing typed `ValidatedForm` ASTs.
256///
257/// # Errors
258///
259/// Returns the first [`SemanticError`] encountered.
260pub fn validate(
261    forms: Vec<BoundForm>,
262    table: &SymbolTable,
263    now: ClockTime,
264) -> Result<Vec<ValidatedForm>, SemanticError> {
265    let validated = forms
266        .into_iter()
267        .map(|form| validate_form(form, table, now))
268        .collect::<Result<Vec<_>, _>>()?;
269    // Spec invariant: a batch can carry at most one `(episode …)`
270    // form. Two :start directives in one batch would need to
271    // contradict each other about the Episode's metadata; two
272    // :close forms is a simple client bug. Reject at validation
273    // so emit can trust a singleton.
274    let episode_count = validated
275        .iter()
276        .filter(|f| matches!(f, ValidatedForm::Episode { .. }))
277        .count();
278    if episode_count > 1 {
279        return Err(SemanticError::MultipleEpisodeDirectives {
280            count: episode_count,
281        });
282    }
283    Ok(validated)
284}
285
286#[allow(clippy::too_many_lines)]
287fn validate_form(
288    form: BoundForm,
289    table: &SymbolTable,
290    now: ClockTime,
291) -> Result<ValidatedForm, SemanticError> {
292    match form {
293        BoundForm::Sem {
294            s,
295            p,
296            o,
297            mut keywords,
298        } => {
299            let source = take_symbol(&mut keywords, "src", "sem")?;
300            let confidence = take_confidence(&mut keywords, "sem")?;
301            let valid_at = take_timestamp(&mut keywords, "v", "sem")?;
302            let projected = take_projected(&mut keywords);
303            let source_kind = source_kind_for(source, table);
304            check_admits(source_kind, MemoryKindTag::Semantic)?;
305            check_confidence_bound(source_kind, confidence)?;
306            check_future_validity(valid_at, now, projected)?;
307            Ok(ValidatedForm::Sem {
308                s,
309                p,
310                o,
311                source,
312                source_kind,
313                confidence,
314                valid_at,
315                projected,
316            })
317        }
318        BoundForm::Epi {
319            event_id,
320            kind,
321            participants,
322            location,
323            mut keywords,
324        } => {
325            let source = take_symbol(&mut keywords, "src", "epi")?;
326            let confidence = take_confidence(&mut keywords, "epi")?;
327            let at_time = take_timestamp(&mut keywords, "at", "epi")?;
328            let observed_at = take_timestamp(&mut keywords, "obs", "epi")?;
329            let source_kind = source_kind_for(source, table);
330            check_admits(source_kind, MemoryKindTag::Episodic)?;
331            check_confidence_bound(source_kind, confidence)?;
332            if observed_at < at_time {
333                return Err(SemanticError::InvalidClockOrder {
334                    at_time,
335                    observed_at,
336                });
337            }
338            Ok(ValidatedForm::Epi {
339                event_id,
340                kind,
341                participants,
342                location,
343                at_time,
344                observed_at,
345                source,
346                source_kind,
347                confidence,
348            })
349        }
350        BoundForm::Pro {
351            rule_id,
352            trigger,
353            action,
354            mut keywords,
355        } => {
356            let source = take_symbol(&mut keywords, "src", "pro")?;
357            let confidence = take_confidence(&mut keywords, "pro")?;
358            let scope = take_symbol(&mut keywords, "scp", "pro")?;
359            let precondition = keywords.remove("pre");
360            let source_kind = source_kind_for(source, table);
361            check_admits(source_kind, MemoryKindTag::Procedural)?;
362            check_confidence_bound(source_kind, confidence)?;
363            Ok(ValidatedForm::Pro {
364                rule_id,
365                trigger,
366                action,
367                precondition,
368                scope,
369                source,
370                source_kind,
371                confidence,
372            })
373        }
374        BoundForm::Inf {
375            s,
376            p,
377            o,
378            derived_from,
379            method,
380            mut keywords,
381        } => {
382            if derived_from.is_empty() {
383                return Err(SemanticError::EmptyDerivedFrom);
384            }
385            let confidence = take_confidence(&mut keywords, "inf")?;
386            let valid_at = take_timestamp(&mut keywords, "v", "inf")?;
387            let projected = take_projected(&mut keywords);
388            check_future_validity(valid_at, now, projected)?;
389            Ok(ValidatedForm::Inf {
390                s,
391                p,
392                o,
393                derived_from,
394                method,
395                confidence,
396                valid_at,
397                projected,
398            })
399        }
400        BoundForm::Alias { a, b } => Ok(ValidatedForm::Alias { a, b }),
401        BoundForm::Rename { old, new } => Ok(ValidatedForm::Rename { old, new }),
402        BoundForm::Retire { name, reason } => Ok(ValidatedForm::Retire { name, reason }),
403        BoundForm::Correct {
404            target_episode,
405            corrected,
406        } => {
407            let bound = validate_form(*corrected, table, now)?;
408            if !matches!(&bound, ValidatedForm::Epi { .. }) {
409                return Err(SemanticError::CorrectsNonEpisodic);
410            }
411            Ok(ValidatedForm::Correct {
412                target_episode,
413                corrected: Box::new(bound),
414            })
415        }
416        BoundForm::Promote { name } => Ok(ValidatedForm::Promote { name }),
417        BoundForm::Query { selector, keywords } => Ok(ValidatedForm::Query { selector, keywords }),
418        BoundForm::Episode {
419            action,
420            label,
421            parent_episode,
422            retracts,
423        } => Ok(ValidatedForm::Episode {
424            action,
425            label,
426            parent_episode,
427            retracts,
428        }),
429        BoundForm::Flag {
430            action,
431            memory,
432            actor,
433        } => Ok(ValidatedForm::Flag {
434            action,
435            memory,
436            actor,
437        }),
438    }
439}
440
441fn take_symbol(
442    keywords: &mut BoundKeywords,
443    key: &'static str,
444    form: &'static str,
445) -> Result<SymbolId, SemanticError> {
446    match keywords.remove(key) {
447        Some(Value::Symbol(id)) => Ok(id),
448        Some(_) => Err(SemanticError::BadKeywordType {
449            keyword: key,
450            form,
451            expected: "symbol",
452        }),
453        None => Err(SemanticError::MissingKeyword { keyword: key, form }),
454    }
455}
456
457fn take_timestamp(
458    keywords: &mut BoundKeywords,
459    key: &'static str,
460    form: &'static str,
461) -> Result<ClockTime, SemanticError> {
462    match keywords.remove(key) {
463        Some(Value::Timestamp(t)) => Ok(t),
464        Some(_) => Err(SemanticError::BadKeywordType {
465            keyword: key,
466            form,
467            expected: "timestamp",
468        }),
469        None => Err(SemanticError::MissingKeyword { keyword: key, form }),
470    }
471}
472
473#[allow(clippy::cast_possible_truncation)]
474fn take_confidence(
475    keywords: &mut BoundKeywords,
476    form: &'static str,
477) -> Result<Confidence, SemanticError> {
478    let raw = match keywords.remove("c") {
479        Some(Value::Float(f)) => f as f32,
480        Some(_) => {
481            return Err(SemanticError::BadKeywordType {
482                keyword: "c",
483                form,
484                expected: "float confidence in [0.0, 1.0]",
485            });
486        }
487        None => {
488            return Err(SemanticError::MissingKeyword { keyword: "c", form });
489        }
490    };
491    Ok(Confidence::try_from_f32(raw)?)
492}
493
494fn take_projected(keywords: &mut BoundKeywords) -> bool {
495    matches!(keywords.remove("projected"), Some(Value::Boolean(true)))
496}
497
498fn check_admits(source_kind: SourceKind, memory_kind: MemoryKindTag) -> Result<(), SemanticError> {
499    if source_kind.admits(memory_kind) {
500        Ok(())
501    } else {
502        Err(SemanticError::SourceKindNotAdmitted {
503            source_kind,
504            memory_kind,
505        })
506    }
507}
508
509fn check_confidence_bound(
510    source_kind: SourceKind,
511    confidence: Confidence,
512) -> Result<(), SemanticError> {
513    let bound = source_kind.confidence_bound();
514    if confidence <= bound {
515        Ok(())
516    } else {
517        Err(SemanticError::ConfidenceExceedsSourceBound {
518            requested: confidence,
519            bound,
520            source_kind,
521        })
522    }
523}
524
525fn check_future_validity(
526    valid_at: ClockTime,
527    now: ClockTime,
528    projected: bool,
529) -> Result<(), SemanticError> {
530    if valid_at > now && !projected {
531        Err(SemanticError::FutureValidity { valid_at })
532    } else {
533        Ok(())
534    }
535}
536
537/// Derive [`SourceKind`] from a source symbol's canonical name.
538///
539/// The 12 reserved grounding-kind names from `grounding-model.md` § 3.1
540/// map to their specific kinds. Any other source symbol defaults to
541/// [`SourceKind::Observation`] — the most permissive kind (admits
542/// Semantic + Episodic, bound 1.0). This lets agents use specific
543/// witness symbols (e.g. `@mira`) as sources for Episodic memories
544/// without needing a reserved name.
545#[must_use]
546pub fn source_kind_for(source: SymbolId, table: &SymbolTable) -> SourceKind {
547    let Some(entry) = table.entry(source) else {
548        return SourceKind::Observation;
549    };
550    source_kind_from_name(&entry.canonical_name)
551}
552
553/// Map a source symbol's canonical name to a [`SourceKind`].
554#[must_use]
555pub fn source_kind_from_name(name: &str) -> SourceKind {
556    match name {
557        "profile" => SourceKind::Profile,
558        "self_report" => SourceKind::SelfReport,
559        "participant_report" => SourceKind::ParticipantReport,
560        "document" => SourceKind::Document,
561        "registry" => SourceKind::Registry,
562        "policy" => SourceKind::Policy,
563        "agent_instruction" => SourceKind::AgentInstruction,
564        "external_authority" => SourceKind::ExternalAuthority,
565        "pending_verification" => SourceKind::PendingVerification,
566        "librarian_assignment" => SourceKind::LibrarianAssignment,
567        // "observation" matches the default branch below; included in
568        // the 11 reserved names via the wildcard.
569        _ => SourceKind::Observation,
570    }
571}
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576    use crate::bind::{bind, SymbolTable};
577    use crate::parse::parse;
578
579    fn now() -> ClockTime {
580        ClockTime::try_from_millis(2_000_000_000_000).expect("non-sentinel") // ~2033 — always 'now' for tests
581    }
582
583    fn bind_and_validate(src: &str) -> Result<Vec<ValidatedForm>, SemanticError> {
584        let forms = parse(src).unwrap();
585        let mut table = SymbolTable::new();
586        let (bound, _journal) = bind(forms, &mut table).unwrap();
587        validate(bound, &table, now())
588    }
589
590    #[test]
591    fn sem_profile_passes() {
592        let r = bind_and_validate(r#"(sem @alice email "x" :src @profile :c 0.95 :v 2024-01-15)"#);
593        assert!(r.is_ok(), "got {r:?}");
594    }
595
596    #[test]
597    fn sem_profile_over_bound_fails() {
598        let err =
599            bind_and_validate(r#"(sem @alice email "x" :src @profile :c 0.99 :v 2024-01-15)"#)
600                .unwrap_err();
601        assert!(matches!(
602            err,
603            SemanticError::ConfidenceExceedsSourceBound { .. }
604        ));
605    }
606
607    #[test]
608    fn pro_observation_source_not_admitted() {
609        // Procedural requires Policy or AgentInstruction; Observation
610        // does not admit Procedural.
611        let src = r#"(pro @rule "trigger" "action" :scp @mimir :src @observation :c 0.9)"#;
612        let err = bind_and_validate(src).unwrap_err();
613        assert!(matches!(err, SemanticError::SourceKindNotAdmitted { .. }));
614    }
615
616    #[test]
617    fn pro_policy_admitted() {
618        let src = r#"(pro @rule "trigger" "action" :scp @mimir :src @policy :c 1.0)"#;
619        let r = bind_and_validate(src);
620        assert!(r.is_ok(), "got {r:?}");
621    }
622
623    #[test]
624    fn epi_observed_before_at_time_errors() {
625        let err = bind_and_validate(
626            r"(epi @ev @k (@p1) @loc :at 2024-01-15T10:00:00Z :obs 2024-01-15T09:00:00Z :src @alice :c 0.9)",
627        )
628        .unwrap_err();
629        assert!(matches!(err, SemanticError::InvalidClockOrder { .. }));
630    }
631
632    #[test]
633    fn epi_observed_equal_at_time_passes() {
634        let r = bind_and_validate(
635            r"(epi @ev @k (@p1) @loc :at 2024-01-15T10:00:00Z :obs 2024-01-15T10:00:00Z :src @alice :c 0.9)",
636        );
637        assert!(r.is_ok(), "got {r:?}");
638    }
639
640    #[test]
641    fn future_validity_without_projected_errors() {
642        // 2099-01-01 is far past `now` (~2033).
643        let err = bind_and_validate(
644            r"(sem @alice status @future :src @agent_instruction :c 0.9 :v 2099-01-01)",
645        )
646        .unwrap_err();
647        assert!(matches!(err, SemanticError::FutureValidity { .. }));
648    }
649
650    #[test]
651    fn future_validity_with_projected_passes() {
652        let r = bind_and_validate(
653            r"(sem @alice status @future :src @agent_instruction :c 0.9 :v 2099-01-01 :projected true)",
654        );
655        assert!(r.is_ok(), "got {r:?}");
656    }
657
658    #[test]
659    fn inf_empty_derived_from_not_allowed_by_parser() {
660        // The parser enforces at least the syntactic form; an empty
661        // derived_from list is written as () which the parser accepts
662        // but the semantic stage must reject.
663        let forms = parse("(inf @a p @b () @pattern_summarize :c 0.7 :v 2024-01-15)").unwrap();
664        let mut table = SymbolTable::new();
665        let (bound, _journal) = bind(forms, &mut table).unwrap();
666        let err = validate(bound, &table, now()).unwrap_err();
667        assert!(matches!(err, SemanticError::EmptyDerivedFrom));
668    }
669
670    #[test]
671    fn sem_unknown_source_defaults_to_observation() {
672        // `@mira` isn't a reserved grounding-kind name; defaults to
673        // Observation (bound 1.0, admits Semantic). Use distinct symbols
674        // for subject and source so kind-locking doesn't collide.
675        let r = bind_and_validate(r#"(sem @mimir founder "mira" :src @mira :c 1.0 :v 2024-01-15)"#);
676        assert!(r.is_ok(), "got {r:?}");
677    }
678
679    #[test]
680    fn correct_non_episodic_body_errors() {
681        // The parser's correct-form already enforces body must be epi;
682        // ensure the semantic stage's own check is defensive. Here the
683        // body IS epi, so this should pass.
684        let r = bind_and_validate(
685            r"(correct @target_ep (epi @ev @k (@p) @loc :at 2024-01-15T10:00:00Z :obs 2024-01-15T10:00:00Z :src @alice :c 0.9))",
686        );
687        assert!(r.is_ok(), "got {r:?}");
688    }
689
690    #[test]
691    fn source_kind_from_name_mapping() {
692        assert_eq!(source_kind_from_name("profile"), SourceKind::Profile);
693        assert_eq!(
694            source_kind_from_name("observation"),
695            SourceKind::Observation
696        );
697        assert_eq!(
698            source_kind_from_name("pending_verification"),
699            SourceKind::PendingVerification
700        );
701        // Unreserved → Observation default.
702        assert_eq!(source_kind_from_name("mira"), SourceKind::Observation);
703    }
704}