Skip to main content

mimir_core/
bind.rs

1//! Symbol binder — resolves [`crate::parse::RawSymbolName`] into
2//! [`crate::SymbolId`] against a per-workspace [`SymbolTable`] and
3//! produces a fully-typed [`BoundForm`].
4//!
5//! Implements the semantics specified in
6//! `docs/concepts/symbol-identity-semantics.md` §§ 3–9.
7
8use std::collections::{BTreeMap, HashMap, HashSet};
9
10use thiserror::Error;
11
12use crate::confidence::ConfidenceError;
13use crate::parse::{KeywordArgs, RawSymbolName, RawValue, UnboundForm};
14use crate::symbol::{ScopedSymbolId, SymbolId, SymbolKind};
15use crate::value::Value;
16
17/// Maximum length of an alias chain before the binder rejects further
18/// extensions with [`BindError::AliasChainLengthExceeded`]. Matches
19/// `symbol-identity-semantics.md` § 7.3.
20pub const ALIAS_CHAIN_LIMIT: usize = 16;
21
22// Inference-method registration is owned by
23// [`crate::inference_methods::InferenceMethod`]; bind validates against
24// that enum via `InferenceMethod::from_symbol_name`.
25
26/// One symbol-table mutation performed by the binder while processing a
27/// batch. The emit stage serializes the journal into `SYMBOL_*`
28/// canonical records (opcodes 0x30–0x33 per `ir-canonical-form.md`
29/// § 6.6) so that replay from the log can reconstitute the workspace's
30/// symbol table. Per `librarian-pipeline.md` § 3.4 the journal is part
31/// of bind's output alongside the bound AST.
32#[derive(Clone, Debug, PartialEq, Eq)]
33pub enum SymbolMutation {
34    /// A first-use allocation — `id`, `name` (canonical), `kind`.
35    Allocate {
36        /// Allocated symbol ID.
37        id: SymbolId,
38        /// Canonical name at allocation time.
39        name: String,
40        /// Locked kind.
41        kind: SymbolKind,
42    },
43    /// Rename — old canonical becomes an alias; new canonical attaches
44    /// to the same `id`. Replay reconstructs the canonical/alias state.
45    Rename {
46        /// Subject symbol.
47        id: SymbolId,
48        /// New canonical name.
49        new_canonical: String,
50        /// Locked kind at the time of the rename.
51        kind: SymbolKind,
52    },
53    /// Alias — an additional name resolves to the same `id`.
54    Alias {
55        /// Subject symbol.
56        id: SymbolId,
57        /// The alias attached.
58        alias: String,
59        /// Locked kind.
60        kind: SymbolKind,
61    },
62    /// Retire — sets the symbol's retired flag.
63    Retire {
64        /// Subject symbol.
65        id: SymbolId,
66        /// Canonical name at retire time (for the log record).
67        name: String,
68        /// Locked kind.
69        kind: SymbolKind,
70    },
71}
72
73/// A single entry in the symbol table.
74#[derive(Clone, Debug, PartialEq, Eq)]
75pub struct SymbolEntry {
76    /// The canonical name as currently recorded.
77    pub canonical_name: String,
78    /// Alternate names (aliases) that resolve to the same symbol.
79    pub aliases: Vec<String>,
80    /// Locked kind for this symbol.
81    pub kind: SymbolKind,
82    /// Whether the symbol is currently retired.
83    pub retired: bool,
84}
85
86/// A per-workspace symbol table.
87///
88/// Symbol IDs are allocated monotonically as a `u64`; canonical names
89/// and alias names are stored in a flat name→id lookup for O(1)
90/// resolution.
91///
92/// See `symbol-identity-semantics.md` § 3 for the design.
93///
94/// # Examples
95///
96/// ```
97/// # #![allow(clippy::unwrap_used)]
98/// use mimir_core::bind::SymbolTable;
99/// use mimir_core::SymbolKind;
100///
101/// let mut table = SymbolTable::new();
102/// let id = table
103///     .allocate("alice".into(), SymbolKind::Agent)
104///     .unwrap();
105/// assert_eq!(table.lookup("alice"), Some(id));
106/// ```
107#[derive(Clone, Debug, Default, PartialEq, Eq)]
108pub struct SymbolTable {
109    next_id: u64,
110    entries: HashMap<SymbolId, SymbolEntry>,
111    names_to_id: HashMap<String, SymbolId>,
112    /// Fast membership test for the `retired` flag; mirrors
113    /// `entries[id].retired`.
114    retired: HashSet<SymbolId>,
115}
116
117impl SymbolTable {
118    /// Construct an empty symbol table.
119    #[must_use]
120    pub fn new() -> Self {
121        Self::default()
122    }
123
124    /// Allocate a new symbol with the given canonical name and kind.
125    ///
126    /// # Errors
127    ///
128    /// - [`BindError::SymbolRenameConflict`] if `name` is already in
129    ///   the table (its existing binding must be used or renamed).
130    pub fn allocate(&mut self, name: String, kind: SymbolKind) -> Result<SymbolId, BindError> {
131        if self.names_to_id.contains_key(&name) {
132            return Err(BindError::SymbolRenameConflict { name });
133        }
134        let id = SymbolId::new(self.next_id);
135        self.next_id += 1;
136        self.entries.insert(
137            id,
138            SymbolEntry {
139                canonical_name: name.clone(),
140                aliases: Vec::new(),
141                kind,
142                retired: false,
143            },
144        );
145        self.names_to_id.insert(name, id);
146        Ok(id)
147    }
148
149    /// Resolve a name (canonical or alias) to a [`SymbolId`].
150    #[must_use]
151    pub fn lookup(&self, name: &str) -> Option<SymbolId> {
152        self.names_to_id.get(name).copied()
153    }
154
155    /// Return the [`SymbolKind`] for an already-allocated symbol.
156    #[must_use]
157    pub fn kind_of(&self, id: SymbolId) -> Option<SymbolKind> {
158        self.entries.get(&id).map(|e| e.kind)
159    }
160
161    /// Return the entry for an already-allocated symbol.
162    #[must_use]
163    pub fn entry(&self, id: SymbolId) -> Option<&SymbolEntry> {
164        self.entries.get(&id)
165    }
166
167    /// Iterate all entries in the table, yielding `(SymbolId, &SymbolEntry)`
168    /// pairs. Iteration order is undefined; callers that need stable
169    /// output should sort on the consuming side.
170    pub fn iter_entries(&self) -> impl Iterator<Item = (SymbolId, &SymbolEntry)> + '_ {
171        self.entries.iter().map(|(id, entry)| (*id, entry))
172    }
173
174    /// Declare an alias — both names resolve to the same symbol.
175    ///
176    /// Both symbols must already be allocated; they must resolve to
177    /// the same [`SymbolId`] already, OR one must not be an alias of
178    /// the other yet. See `symbol-identity-semantics.md` § 7.
179    ///
180    /// # Errors
181    ///
182    /// - [`BindError::AliasChainLengthExceeded`] if adding this alias
183    ///   would push the chain past [`ALIAS_CHAIN_LIMIT`].
184    /// - [`BindError::SymbolRenameConflict`] if `b_name` resolves to a
185    ///   different symbol than `a_name` (merging two distinct symbols
186    ///   is not an alias operation; it must go through rename).
187    pub fn add_alias(&mut self, a_name: &str, b_name: &str) -> Result<(), BindError> {
188        let a_id = self.names_to_id.get(a_name).copied();
189        let b_id = self.names_to_id.get(b_name).copied();
190        match (a_id, b_id) {
191            (Some(id_a), Some(id_b)) if id_a == id_b => Ok(()),
192            (Some(_), Some(_)) => Err(BindError::SymbolRenameConflict {
193                name: b_name.to_string(),
194            }),
195            (Some(id), None) => self.attach_alias(id, b_name.to_string()),
196            (None, Some(id)) => self.attach_alias(id, a_name.to_string()),
197            (None, None) => Err(BindError::UnknownSymbol {
198                name: a_name.to_string(),
199            }),
200        }
201    }
202
203    /// Rename a symbol. Old name becomes an alias; new name becomes the
204    /// canonical.
205    ///
206    /// # Errors
207    ///
208    /// - [`BindError::UnknownSymbol`] if `old_name` does not resolve.
209    /// - [`BindError::SymbolRenameConflict`] if `new_name` is already
210    ///   bound to a different symbol.
211    /// - [`BindError::AliasChainLengthExceeded`] if adding the old name
212    ///   as an alias would push the chain past the cap.
213    pub fn rename(&mut self, old_name: &str, new_name: &str) -> Result<SymbolId, BindError> {
214        let id =
215            self.names_to_id
216                .get(old_name)
217                .copied()
218                .ok_or_else(|| BindError::UnknownSymbol {
219                    name: old_name.to_string(),
220                })?;
221        if let Some(existing) = self.names_to_id.get(new_name).copied() {
222            if existing != id {
223                return Err(BindError::SymbolRenameConflict {
224                    name: new_name.to_string(),
225                });
226            }
227            // already aliased to the same symbol; promote the new_name to canonical.
228        }
229        // Rotate the canonical name; push old canonical into aliases.
230        let entry = self
231            .entries
232            .get_mut(&id)
233            .ok_or_else(|| BindError::UnknownSymbol {
234                name: old_name.to_string(),
235            })?;
236        let previous_canonical = std::mem::replace(&mut entry.canonical_name, new_name.to_string());
237        if entry.aliases.len() >= ALIAS_CHAIN_LIMIT {
238            return Err(BindError::AliasChainLengthExceeded {
239                name: new_name.to_string(),
240                limit: ALIAS_CHAIN_LIMIT,
241            });
242        }
243        if previous_canonical != new_name {
244            entry.aliases.push(previous_canonical);
245        }
246        self.names_to_id.insert(new_name.to_string(), id);
247        Ok(id)
248    }
249
250    /// Mark a symbol retired. Existing references still resolve; new
251    /// references through the agent API trigger `stale_symbol`
252    /// warnings on read.
253    ///
254    /// # Errors
255    ///
256    /// - [`BindError::UnknownSymbol`] if `name` does not resolve.
257    pub fn retire(&mut self, name: &str) -> Result<SymbolId, BindError> {
258        let id = self
259            .names_to_id
260            .get(name)
261            .copied()
262            .ok_or_else(|| BindError::UnknownSymbol {
263                name: name.to_string(),
264            })?;
265        if let Some(entry) = self.entries.get_mut(&id) {
266            entry.retired = true;
267        }
268        self.retired.insert(id);
269        Ok(id)
270    }
271
272    /// Clear a retirement flag. Symmetric with [`Self::retire`].
273    ///
274    /// # Errors
275    ///
276    /// - [`BindError::UnknownSymbol`] if `name` does not resolve.
277    pub fn unretire(&mut self, name: &str) -> Result<SymbolId, BindError> {
278        let id = self
279            .names_to_id
280            .get(name)
281            .copied()
282            .ok_or_else(|| BindError::UnknownSymbol {
283                name: name.to_string(),
284            })?;
285        if let Some(entry) = self.entries.get_mut(&id) {
286            entry.retired = false;
287        }
288        self.retired.remove(&id);
289        Ok(id)
290    }
291
292    /// Whether the symbol is currently retired.
293    #[must_use]
294    pub fn is_retired(&self, id: SymbolId) -> bool {
295        self.retired.contains(&id)
296    }
297
298    /// Replay an `SYMBOL_ALLOC` canonical record into this table.
299    /// Unlike [`Self::allocate`] the caller supplies the original
300    /// `SymbolId`; the `next_id` monotonic counter is advanced past the
301    /// replayed ID so future agent allocations stay unique.
302    ///
303    /// Used by [`crate::store::Store::open`] to rebuild the table from
304    /// a durable log.
305    ///
306    /// # Errors
307    ///
308    /// - [`BindError::SymbolRenameConflict`] if `id` or `name` is
309    ///   already bound (log corruption; replay must be strictly
310    ///   monotonic).
311    pub fn replay_allocate(
312        &mut self,
313        id: SymbolId,
314        name: String,
315        kind: SymbolKind,
316    ) -> Result<(), BindError> {
317        if self.entries.contains_key(&id) || self.names_to_id.contains_key(&name) {
318            return Err(BindError::SymbolRenameConflict { name });
319        }
320        self.entries.insert(
321            id,
322            SymbolEntry {
323                canonical_name: name.clone(),
324                aliases: Vec::new(),
325                kind,
326                retired: false,
327            },
328        );
329        self.names_to_id.insert(name, id);
330        let next_after = id.as_u64().saturating_add(1);
331        if next_after > self.next_id {
332            self.next_id = next_after;
333        }
334        Ok(())
335    }
336
337    /// Replay an `SYMBOL_ALIAS` canonical record. Attaches `alias` as an
338    /// additional name resolving to `id`.
339    ///
340    /// # Errors
341    ///
342    /// - [`BindError::UnknownSymbol`] if `id` has never been allocated.
343    /// - [`BindError::AliasChainLengthExceeded`] if adding the alias
344    ///   would exceed [`ALIAS_CHAIN_LIMIT`].
345    pub fn replay_alias(&mut self, id: SymbolId, alias: String) -> Result<(), BindError> {
346        self.attach_alias(id, alias)
347    }
348
349    /// Replay an `SYMBOL_RENAME` canonical record. The previous canonical
350    /// name is rotated into aliases.
351    ///
352    /// # Errors
353    ///
354    /// - [`BindError::UnknownSymbol`] if `id` has never been allocated.
355    pub fn replay_rename(&mut self, id: SymbolId, new_canonical: String) -> Result<(), BindError> {
356        let entry = self
357            .entries
358            .get_mut(&id)
359            .ok_or_else(|| BindError::UnknownSymbol {
360                name: new_canonical.clone(),
361            })?;
362        let previous_canonical =
363            std::mem::replace(&mut entry.canonical_name, new_canonical.clone());
364        if previous_canonical != new_canonical {
365            entry.aliases.push(previous_canonical);
366        }
367        self.names_to_id.insert(new_canonical, id);
368        Ok(())
369    }
370
371    /// Replay an `SYMBOL_RETIRE` canonical record. Marks the symbol
372    /// retired. `name` is the symbol's canonical name at retire time;
373    /// propagated into the error for diagnosability.
374    ///
375    /// # Errors
376    ///
377    /// - [`BindError::UnknownSymbol`] if `id` has never been allocated.
378    pub fn replay_retire(&mut self, id: SymbolId, name: String) -> Result<(), BindError> {
379        let entry = self
380            .entries
381            .get_mut(&id)
382            .ok_or(BindError::UnknownSymbol { name })?;
383        entry.retired = true;
384        self.retired.insert(id);
385        Ok(())
386    }
387
388    fn attach_alias(&mut self, id: SymbolId, alias: String) -> Result<(), BindError> {
389        let entry = self
390            .entries
391            .get_mut(&id)
392            .ok_or_else(|| BindError::UnknownSymbol {
393                name: alias.clone(),
394            })?;
395        if entry.aliases.len() >= ALIAS_CHAIN_LIMIT {
396            return Err(BindError::AliasChainLengthExceeded {
397                name: alias,
398                limit: ALIAS_CHAIN_LIMIT,
399            });
400        }
401        entry.aliases.push(alias.clone());
402        self.names_to_id.insert(alias, id);
403        Ok(())
404    }
405}
406
407/// Errors produced by the binder.
408///
409/// Typed per `PRINCIPLES.md` § 2. Agents route recovery on the error
410/// variant, never by matching message text.
411#[derive(Debug, Error, PartialEq)]
412pub enum BindError {
413    /// A symbol was used in a slot expecting a different kind than the
414    /// one locked at first allocation.
415    #[error("symbol kind mismatch for {name:?}: expected {expected:?}, locked as {existing:?}")]
416    SymbolKindMismatch {
417        /// The symbol name.
418        name: String,
419        /// The slot's expected kind.
420        expected: SymbolKind,
421        /// The kind this symbol was allocated with.
422        existing: SymbolKind,
423    },
424
425    /// Two distinct symbols cannot share a canonical name.
426    #[error("rename conflict: {name:?} already bound")]
427    SymbolRenameConflict {
428        /// The conflicting name.
429        name: String,
430    },
431
432    /// Alias chain length exceeded per [`ALIAS_CHAIN_LIMIT`].
433    #[error("alias chain for {name:?} exceeded length limit {limit}")]
434    AliasChainLengthExceeded {
435        /// The offending alias.
436        name: String,
437        /// The limit.
438        limit: usize,
439    },
440
441    /// A rename or retire referenced an unknown symbol.
442    #[error("unknown symbol {name:?}")]
443    UnknownSymbol {
444        /// The offending name.
445        name: String,
446    },
447
448    /// A `@name:Kind` annotation used a Kind that isn't in the
449    /// [`SymbolKind`] taxonomy.
450    #[error("unknown SymbolKind annotation {found:?}")]
451    BadKind {
452        /// The annotation text.
453        found: String,
454    },
455
456    /// An Inferential memory used a `method` symbol whose name is not
457    /// in the registered inference-method set
458    /// ([`crate::inference_methods::InferenceMethod`]).
459    #[error("unregistered inference method {found:?}")]
460    UnregisteredInferenceMethod {
461        /// The offending method name.
462        found: String,
463    },
464
465    /// A keyword argument's value did not have the expected shape.
466    #[error("invalid keyword value for {keyword:?}: {reason}")]
467    InvalidKeywordValue {
468        /// The keyword.
469        keyword: String,
470        /// A short diagnostic.
471        reason: &'static str,
472    },
473
474    /// A confidence value violated its range at bind time.
475    #[error("confidence out of range: {0}")]
476    ConfidenceOutOfRange(#[from] ConfidenceError),
477
478    /// A value position received a [`RawValue::List`] where a scalar
479    /// was required.
480    #[error("unexpected list value at {slot:?}")]
481    UnexpectedList {
482        /// The slot name.
483        slot: &'static str,
484    },
485
486    /// A timestamp keyword value was missing or not a timestamp.
487    #[error("missing or malformed timestamp for keyword {keyword:?}")]
488    InvalidTimestampKeyword {
489        /// The keyword.
490        keyword: String,
491    },
492
493    /// A cross-workspace symbol was referenced but this binder is
494    /// scoped to a single workspace and cannot locally allocate.
495    #[error("cross-workspace symbol reference not allowed locally: {scoped:?}")]
496    ForeignSymbolForbidden {
497        /// The offending scoped reference.
498        scoped: ScopedSymbolId,
499    },
500
501    /// A `(episode :start :label …)` label exceeds the
502    /// `episode-semantics.md` § 4.3 256-byte cap.
503    #[error("episode label length {len} exceeds {cap}-byte cap")]
504    LabelTooLong {
505        /// Actual byte length of the offending label.
506        len: usize,
507        /// Configured cap (spec § 4.3 — 256).
508        cap: usize,
509    },
510}
511
512/// Bound keyword arguments — keys preserved as strings, values typed.
513pub type BoundKeywords = BTreeMap<String, Value>;
514
515/// An AST form with all `RawSymbolName`s resolved to `SymbolId`s and
516/// all `RawValue`s converted to typed [`Value`]s.
517///
518/// Produced by [`bind`]. Consumed by the Semantic pipeline stage.
519#[derive(Clone, Debug, PartialEq)]
520#[allow(clippy::module_name_repetitions)]
521pub enum BoundForm {
522    /// Semantic memory write.
523    Sem {
524        /// Subject.
525        s: SymbolId,
526        /// Predicate.
527        p: SymbolId,
528        /// Object.
529        o: Value,
530        /// Keyword arguments: `src`, `c`, `v`, optionally `projected`.
531        keywords: BoundKeywords,
532    },
533    /// Episodic memory write.
534    Epi {
535        /// Stable event ID.
536        event_id: SymbolId,
537        /// Event-type symbol.
538        kind: SymbolId,
539        /// Participant symbols (may be empty).
540        participants: Vec<SymbolId>,
541        /// Location symbol.
542        location: SymbolId,
543        /// Keyword arguments: `at`, `obs`, `src`, `c`.
544        keywords: BoundKeywords,
545    },
546    /// Procedural memory write.
547    Pro {
548        /// Stable rule ID.
549        rule_id: SymbolId,
550        /// Trigger value.
551        trigger: Value,
552        /// Action value.
553        action: Value,
554        /// Keyword arguments: `scp`, `src`, `c`, optional `pre`.
555        keywords: BoundKeywords,
556    },
557    /// Inferential memory write.
558    Inf {
559        /// Subject.
560        s: SymbolId,
561        /// Predicate.
562        p: SymbolId,
563        /// Object.
564        o: Value,
565        /// Parent memory symbols.
566        derived_from: Vec<SymbolId>,
567        /// Registered inference-method symbol.
568        method: SymbolId,
569        /// Keyword arguments: `c`, `v`, optional `projected`.
570        keywords: BoundKeywords,
571    },
572    /// Alias declaration.
573    Alias {
574        /// First symbol.
575        a: SymbolId,
576        /// Second symbol.
577        b: SymbolId,
578    },
579    /// Rename — old name becomes alias of new (already canonical).
580    Rename {
581        /// The previous canonical.
582        old: SymbolId,
583        /// The new canonical.
584        new: SymbolId,
585    },
586    /// Retire a symbol.
587    Retire {
588        /// Target.
589        name: SymbolId,
590        /// Optional reason.
591        reason: Option<String>,
592    },
593    /// Correct a prior Episodic memory.
594    Correct {
595        /// The Episode being corrected.
596        target_episode: SymbolId,
597        /// The corrected Episodic body.
598        corrected: Box<BoundForm>,
599    },
600    /// Promote an ephemeral memory to canonical.
601    Promote {
602        /// The ephemeral memory symbol.
603        name: SymbolId,
604    },
605    /// Read-path query.
606    Query {
607        /// Optional positional selector.
608        selector: Option<Value>,
609        /// Remaining keyword arguments.
610        keywords: BoundKeywords,
611    },
612    /// Explicit Episode-boundary directive (`episode-semantics.md`
613    /// § 3.2). `:close` variants carry no metadata; `:start` variants
614    /// may carry any combination of label / parent / retracts.
615    Episode {
616        /// Whether this form opens or closes an Episode.
617        action: crate::parse::EpisodeAction,
618        /// Optional human-readable label — already checked for
619        /// length during bind so the semantic stage can trust it.
620        label: Option<String>,
621        /// Resolved parent Episode `SymbolId`, if set.
622        parent_episode: Option<SymbolId>,
623        /// Resolved retracted-Episode `SymbolId`s.
624        retracts: Vec<SymbolId>,
625    },
626    /// Pin / unpin / authoritative flag write.
627    Flag {
628        /// Which flag operation.
629        action: crate::parse::FlagAction,
630        /// The memory being pinned / unpinned / (un)marked.
631        memory: SymbolId,
632        /// The invoking agent / operator (required per
633        /// `confidence-decay.md` §§ 7 / 8 audit trail).
634        actor: SymbolId,
635    },
636}
637
638/// Bind a sequence of unbound forms against the given symbol table.
639///
640/// Mutations to `table` (symbol allocations, renames, alias attachments,
641/// retirement flag flips) are applied as each form binds. Pipeline
642/// callers that need transactional semantics should snapshot the table
643/// before calling [`bind`] and roll back on error.
644///
645/// # Errors
646///
647/// Returns the first [`BindError`] encountered.
648pub fn bind(
649    forms: Vec<UnboundForm>,
650    table: &mut SymbolTable,
651) -> Result<(Vec<BoundForm>, Vec<SymbolMutation>), BindError> {
652    let mut journal = Vec::new();
653    let bound = forms
654        .into_iter()
655        .map(|form| bind_form(form, table, &mut journal))
656        .collect::<Result<Vec<_>, _>>()?;
657    Ok((bound, journal))
658}
659
660#[allow(clippy::too_many_lines)]
661fn bind_form(
662    form: UnboundForm,
663    table: &mut SymbolTable,
664    journal: &mut Vec<SymbolMutation>,
665) -> Result<BoundForm, BindError> {
666    match form {
667        UnboundForm::Sem { s, p, o, keywords } => {
668            let s = resolve_or_allocate(table, journal, &s, SymbolKind::Agent)?;
669            let p = resolve_or_allocate(table, journal, &p, SymbolKind::Predicate)?;
670            let o = bind_value(o, table, journal, "sem.o", SymbolKind::Literal)?;
671            let keywords = bind_keywords(keywords, table, journal, sem_keyword_kinds())?;
672            Ok(BoundForm::Sem { s, p, o, keywords })
673        }
674        UnboundForm::Epi {
675            event_id,
676            kind,
677            participants,
678            location,
679            keywords,
680        } => {
681            let event_id = resolve_or_allocate(table, journal, &event_id, SymbolKind::Memory)?;
682            let kind = resolve_or_allocate(table, journal, &kind, SymbolKind::EventType)?;
683            let participants: Vec<SymbolId> = participants
684                .iter()
685                .map(|name| resolve_or_allocate(table, journal, name, SymbolKind::Agent))
686                .collect::<Result<_, _>>()?;
687            let location = resolve_or_allocate(table, journal, &location, SymbolKind::Literal)?;
688            let keywords = bind_keywords(keywords, table, journal, epi_keyword_kinds())?;
689            Ok(BoundForm::Epi {
690                event_id,
691                kind,
692                participants,
693                location,
694                keywords,
695            })
696        }
697        UnboundForm::Pro {
698            rule_id,
699            trigger,
700            action,
701            keywords,
702        } => {
703            let rule_id = resolve_or_allocate(table, journal, &rule_id, SymbolKind::Memory)?;
704            let trigger = bind_value(trigger, table, journal, "pro.trigger", SymbolKind::Literal)?;
705            let action = bind_value(action, table, journal, "pro.action", SymbolKind::Literal)?;
706            let keywords = bind_keywords(keywords, table, journal, pro_keyword_kinds())?;
707            Ok(BoundForm::Pro {
708                rule_id,
709                trigger,
710                action,
711                keywords,
712            })
713        }
714        UnboundForm::Inf {
715            s,
716            p,
717            o,
718            derived_from,
719            method,
720            keywords,
721        } => {
722            let s = resolve_or_allocate(table, journal, &s, SymbolKind::Agent)?;
723            let p = resolve_or_allocate(table, journal, &p, SymbolKind::Predicate)?;
724            let o = bind_value(o, table, journal, "inf.o", SymbolKind::Literal)?;
725            let derived_from: Vec<SymbolId> = derived_from
726                .iter()
727                .map(|name| resolve_or_allocate(table, journal, name, SymbolKind::Memory))
728                .collect::<Result<_, _>>()?;
729            let method = resolve_or_allocate(table, journal, &method, SymbolKind::InferenceMethod)?;
730            let method_name = method_name_for(method, table);
731            if crate::inference_methods::InferenceMethod::from_symbol_name(&method_name).is_none() {
732                return Err(BindError::UnregisteredInferenceMethod { found: method_name });
733            }
734            let keywords = bind_keywords(keywords, table, journal, inf_keyword_kinds())?;
735            Ok(BoundForm::Inf {
736                s,
737                p,
738                o,
739                derived_from,
740                method,
741                keywords,
742            })
743        }
744        UnboundForm::Alias { a, b } => {
745            let a_id = ensure_allocated(table, journal, &a, SymbolKind::Literal)?;
746            let b_id = ensure_allocated(table, journal, &b, SymbolKind::Literal)?;
747            // Pre-check: if both names already resolve to the same
748            // symbol, add_alias is a no-op and we emit no journal entry.
749            let already_aliased = a_id == b_id
750                && table.entry(a_id).is_some_and(|e| {
751                    e.canonical_name == b.as_str() || e.aliases.iter().any(|n| n == b.as_str())
752                });
753            table.add_alias(a.as_str(), b.as_str())?;
754            if !already_aliased {
755                // One of the two names has just been attached as an
756                // alias of the shared symbol. Record the newly-attached
757                // name; the kind is whichever side held the existing
758                // allocation.
759                let (attached_to, new_alias) = if let Some(entry) = table.entry(a_id) {
760                    if entry.aliases.iter().any(|n| n == b.as_str()) {
761                        (a_id, b.as_str().to_string())
762                    } else {
763                        (b_id, a.as_str().to_string())
764                    }
765                } else {
766                    (a_id, b.as_str().to_string())
767                };
768                let kind = table.kind_of(attached_to).unwrap_or(SymbolKind::Literal);
769                journal.push(SymbolMutation::Alias {
770                    id: attached_to,
771                    alias: new_alias,
772                    kind,
773                });
774            }
775            Ok(BoundForm::Alias { a: a_id, b: b_id })
776        }
777        UnboundForm::Rename { old, new } => {
778            let id = table.rename(old.as_str(), new.as_str())?;
779            let kind = table.kind_of(id).unwrap_or(SymbolKind::Literal);
780            journal.push(SymbolMutation::Rename {
781                id,
782                new_canonical: new.as_str().to_string(),
783                kind,
784            });
785            Ok(BoundForm::Rename { old: id, new: id })
786        }
787        UnboundForm::Retire { name, keywords } => {
788            let id = table.retire(name.as_str())?;
789            let kind = table.kind_of(id).unwrap_or(SymbolKind::Literal);
790            // Record the symbol's current canonical name (which may
791            // differ from `name` if the agent retired by an alias) so
792            // the log entry and any replay error reference the
793            // canonical identifier.
794            let canonical = table
795                .entry(id)
796                .map_or_else(|| name.as_str().to_string(), |e| e.canonical_name.clone());
797            journal.push(SymbolMutation::Retire {
798                id,
799                name: canonical,
800                kind,
801            });
802            let reason = keywords.get("reason").and_then(|v| match v {
803                RawValue::String(s) => Some(s.clone()),
804                _ => None,
805            });
806            Ok(BoundForm::Retire { name: id, reason })
807        }
808        UnboundForm::Correct {
809            target_episode,
810            corrected,
811        } => {
812            let target = resolve_or_allocate(table, journal, &target_episode, SymbolKind::Memory)?;
813            let bound = bind_form(*corrected, table, journal)?;
814            Ok(BoundForm::Correct {
815                target_episode: target,
816                corrected: Box::new(bound),
817            })
818        }
819        UnboundForm::Promote { name } => {
820            let id = resolve_or_allocate(table, journal, &name, SymbolKind::Memory)?;
821            Ok(BoundForm::Promote { name: id })
822        }
823        UnboundForm::Query { selector, keywords } => {
824            let selector = selector
825                .map(|v| bind_value(v, table, journal, "query.selector", SymbolKind::Literal))
826                .transpose()?;
827            // Query keyword types are heterogeneous; accept any typed value.
828            let keywords = bind_keywords(keywords, table, journal, &BTreeMap::new())?;
829            Ok(BoundForm::Query { selector, keywords })
830        }
831        UnboundForm::Flag {
832            action,
833            memory,
834            actor,
835        } => {
836            let memory = resolve_or_allocate(table, journal, &memory, SymbolKind::Memory)?;
837            let actor = resolve_or_allocate(table, journal, &actor, SymbolKind::Agent)?;
838            Ok(BoundForm::Flag {
839                action,
840                memory,
841                actor,
842            })
843        }
844        UnboundForm::Episode {
845            action,
846            label,
847            parent_episode,
848            retracts,
849        } => {
850            // Label length cap per episode-semantics.md § 4.3.
851            if let Some(ref l) = label {
852                if l.len() > MAX_EPISODE_LABEL_BYTES {
853                    return Err(BindError::LabelTooLong {
854                        len: l.len(),
855                        cap: MAX_EPISODE_LABEL_BYTES,
856                    });
857                }
858            }
859            let parent_episode = parent_episode
860                .map(|raw| resolve_or_allocate(table, journal, &raw, SymbolKind::Memory))
861                .transpose()?;
862            let retracts = retracts
863                .into_iter()
864                .map(|raw| resolve_or_allocate(table, journal, &raw, SymbolKind::Memory))
865                .collect::<Result<Vec<_>, _>>()?;
866            Ok(BoundForm::Episode {
867                action,
868                label,
869                parent_episode,
870                retracts,
871            })
872        }
873    }
874}
875
876/// `episode-semantics.md` § 4.3 label cap.
877const MAX_EPISODE_LABEL_BYTES: usize = 256;
878
879fn method_name_for(method: SymbolId, table: &SymbolTable) -> String {
880    table
881        .entry(method)
882        .map_or_else(String::new, |e| e.canonical_name.clone())
883}
884
885fn resolve_or_allocate(
886    table: &mut SymbolTable,
887    journal: &mut Vec<SymbolMutation>,
888    name: &RawSymbolName,
889    default_kind: SymbolKind,
890) -> Result<SymbolId, BindError> {
891    // If the source carried an explicit `:Kind` annotation, it overrides
892    // the position default and must be used both for first allocation
893    // and for consistency validation on reuse.
894    let effective_kind = if let Some(annotation) = &name.kind {
895        parse_symbol_kind(annotation)?
896    } else {
897        default_kind
898    };
899    if let Some(id) = table.lookup(name.as_str()) {
900        let existing = table.kind_of(id).ok_or_else(|| BindError::UnknownSymbol {
901            name: name.name.clone(),
902        })?;
903        if existing != effective_kind {
904            return Err(BindError::SymbolKindMismatch {
905                name: name.name.clone(),
906                expected: effective_kind,
907                existing,
908            });
909        }
910        return Ok(id);
911    }
912    let id = table.allocate(name.name.clone(), effective_kind)?;
913    journal.push(SymbolMutation::Allocate {
914        id,
915        name: name.name.clone(),
916        kind: effective_kind,
917    });
918    Ok(id)
919}
920
921fn ensure_allocated(
922    table: &mut SymbolTable,
923    journal: &mut Vec<SymbolMutation>,
924    name: &RawSymbolName,
925    default_kind: SymbolKind,
926) -> Result<SymbolId, BindError> {
927    if let Some(id) = table.lookup(name.as_str()) {
928        return Ok(id);
929    }
930    let id = table.allocate(name.name.clone(), default_kind)?;
931    journal.push(SymbolMutation::Allocate {
932        id,
933        name: name.name.clone(),
934        kind: default_kind,
935    });
936    Ok(id)
937}
938
939fn bind_value(
940    raw: RawValue,
941    table: &mut SymbolTable,
942    journal: &mut Vec<SymbolMutation>,
943    slot: &'static str,
944    default_kind_for_symbols: SymbolKind,
945) -> Result<Value, BindError> {
946    match raw {
947        RawValue::RawSymbol(name) => {
948            let id = resolve_or_allocate(table, journal, &name, default_kind_for_symbols)?;
949            Ok(Value::Symbol(id))
950        }
951        RawValue::TypedSymbol { name, kind } => {
952            let parsed_kind = parse_symbol_kind(&kind)?;
953            let id = resolve_or_allocate(table, journal, &name, parsed_kind)?;
954            Ok(Value::Symbol(id))
955        }
956        RawValue::Bareword(s) | RawValue::String(s) => Ok(Value::String(s)),
957        RawValue::Integer(i) => Ok(Value::Integer(i)),
958        RawValue::Float(f) => Ok(Value::Float(f)),
959        RawValue::Boolean(b) => Ok(Value::Boolean(b)),
960        RawValue::Timestamp(ct) => Ok(Value::Timestamp(ct)),
961        RawValue::TimestampRaw(text) => Err(BindError::InvalidTimestampKeyword { keyword: text }),
962        RawValue::Nil | RawValue::List(_) => Err(BindError::UnexpectedList { slot }),
963    }
964}
965
966fn bind_keywords(
967    raw: KeywordArgs,
968    table: &mut SymbolTable,
969    journal: &mut Vec<SymbolMutation>,
970    kind_hints: &BTreeMap<&'static str, SymbolKind>,
971) -> Result<BoundKeywords, BindError> {
972    let mut out = BoundKeywords::new();
973    for (key, value) in raw {
974        let fallback_kind = kind_hints
975            .get(key.as_str())
976            .copied()
977            .unwrap_or(SymbolKind::Literal);
978        // Confidence keyword `c` is preserved as Value::Float here; the
979        // semantic stage converts to Confidence and enforces source-bound
980        // + range (per grounding-model.md § 4).
981        let bound = if key == "c" {
982            #[allow(clippy::cast_precision_loss)]
983            let f = match value {
984                RawValue::Float(f) => f,
985                RawValue::Integer(i) => i as f64,
986                _ => {
987                    return Err(BindError::InvalidKeywordValue {
988                        keyword: key,
989                        reason: "expected numeric confidence in [0.0, 1.0]",
990                    });
991                }
992            };
993            Value::Float(f)
994        } else if key == "projected"
995            || key == "include_retired"
996            || key == "include_projected"
997            || key == "show_framing"
998            || key == "explain_filtered"
999            || key == "debug_mode"
1000        {
1001            let RawValue::Boolean(b) = value else {
1002                return Err(BindError::InvalidKeywordValue {
1003                    keyword: key,
1004                    reason: "expected boolean",
1005                });
1006            };
1007            Value::Boolean(b)
1008        } else {
1009            bind_value_with_fallback(value, table, journal, fallback_kind)?
1010        };
1011        out.insert(key, bound);
1012    }
1013    Ok(out)
1014}
1015
1016fn bind_value_with_fallback(
1017    raw: RawValue,
1018    table: &mut SymbolTable,
1019    journal: &mut Vec<SymbolMutation>,
1020    fallback_kind: SymbolKind,
1021) -> Result<Value, BindError> {
1022    // Shared-slot entry point — delegates to bind_value with the fallback kind.
1023    bind_value(raw, table, journal, "keyword value", fallback_kind)
1024}
1025
1026/// Map a `@name:Kind` annotation's kind portion to [`SymbolKind`].
1027///
1028/// # Errors
1029///
1030/// Returns [`BindError::BadKind`] if `text` is not one of the twelve
1031/// [`SymbolKind`] variant names per `symbol-identity-semantics.md` § 4.
1032pub fn parse_symbol_kind(text: &str) -> Result<SymbolKind, BindError> {
1033    let kind = match text {
1034        "Agent" => SymbolKind::Agent,
1035        "Document" => SymbolKind::Document,
1036        "Registry" => SymbolKind::Registry,
1037        "Service" => SymbolKind::Service,
1038        "Policy" => SymbolKind::Policy,
1039        "Memory" => SymbolKind::Memory,
1040        "InferenceMethod" => SymbolKind::InferenceMethod,
1041        "Scope" => SymbolKind::Scope,
1042        "Predicate" => SymbolKind::Predicate,
1043        "EventType" => SymbolKind::EventType,
1044        "Workspace" => SymbolKind::Workspace,
1045        "Literal" => SymbolKind::Literal,
1046        _ => {
1047            return Err(BindError::BadKind {
1048                found: text.to_string(),
1049            });
1050        }
1051    };
1052    Ok(kind)
1053}
1054
1055fn sem_keyword_kinds() -> &'static BTreeMap<&'static str, SymbolKind> {
1056    static KINDS: std::sync::OnceLock<BTreeMap<&'static str, SymbolKind>> =
1057        std::sync::OnceLock::new();
1058    KINDS.get_or_init(|| {
1059        let mut m = BTreeMap::new();
1060        m.insert("src", SymbolKind::Agent);
1061        m
1062    })
1063}
1064
1065fn epi_keyword_kinds() -> &'static BTreeMap<&'static str, SymbolKind> {
1066    static KINDS: std::sync::OnceLock<BTreeMap<&'static str, SymbolKind>> =
1067        std::sync::OnceLock::new();
1068    KINDS.get_or_init(|| {
1069        let mut m = BTreeMap::new();
1070        m.insert("src", SymbolKind::Agent);
1071        m
1072    })
1073}
1074
1075fn pro_keyword_kinds() -> &'static BTreeMap<&'static str, SymbolKind> {
1076    static KINDS: std::sync::OnceLock<BTreeMap<&'static str, SymbolKind>> =
1077        std::sync::OnceLock::new();
1078    KINDS.get_or_init(|| {
1079        let mut m = BTreeMap::new();
1080        m.insert("src", SymbolKind::Agent);
1081        m.insert("scp", SymbolKind::Scope);
1082        m
1083    })
1084}
1085
1086fn inf_keyword_kinds() -> &'static BTreeMap<&'static str, SymbolKind> {
1087    static KINDS: std::sync::OnceLock<BTreeMap<&'static str, SymbolKind>> =
1088        std::sync::OnceLock::new();
1089    KINDS.get_or_init(BTreeMap::new)
1090}
1091
1092#[cfg(test)]
1093mod tests {
1094    use super::*;
1095    use crate::parse::parse;
1096
1097    fn fresh_table() -> SymbolTable {
1098        SymbolTable::new()
1099    }
1100
1101    #[test]
1102    fn allocate_and_lookup() {
1103        let mut table = fresh_table();
1104        let id = table.allocate("alice".into(), SymbolKind::Agent).unwrap();
1105        assert_eq!(table.lookup("alice"), Some(id));
1106        assert_eq!(table.kind_of(id), Some(SymbolKind::Agent));
1107    }
1108
1109    #[test]
1110    fn monotonic_allocation() {
1111        let mut table = fresh_table();
1112        let a = table.allocate("a".into(), SymbolKind::Agent).unwrap();
1113        let b = table.allocate("b".into(), SymbolKind::Agent).unwrap();
1114        let c = table.allocate("c".into(), SymbolKind::Agent).unwrap();
1115        assert!(a.as_u64() < b.as_u64());
1116        assert!(b.as_u64() < c.as_u64());
1117    }
1118
1119    #[test]
1120    fn rename_preserves_id_and_swaps_canonical() {
1121        let mut table = fresh_table();
1122        let id = table.allocate("old".into(), SymbolKind::Agent).unwrap();
1123        let after = table.rename("old", "new").unwrap();
1124        assert_eq!(id, after);
1125        assert_eq!(table.lookup("new"), Some(id));
1126        assert_eq!(table.lookup("old"), Some(id));
1127        let entry = table.entry(id).unwrap();
1128        assert_eq!(entry.canonical_name, "new");
1129        assert!(entry.aliases.contains(&"old".to_string()));
1130    }
1131
1132    #[test]
1133    fn alias_collapses_to_same_id() {
1134        let mut table = fresh_table();
1135        let a = table.allocate("a".into(), SymbolKind::Agent).unwrap();
1136        table.allocate("b".into(), SymbolKind::Agent).unwrap();
1137        // `a` and `b` are distinct allocations — alias should refuse to
1138        // merge them.
1139        assert!(matches!(
1140            table.add_alias("a", "b"),
1141            Err(BindError::SymbolRenameConflict { .. })
1142        ));
1143        assert_eq!(table.lookup("a"), Some(a));
1144    }
1145
1146    #[test]
1147    fn retire_and_unretire_round_trip() {
1148        let mut table = fresh_table();
1149        let id = table.allocate("tmp".into(), SymbolKind::Agent).unwrap();
1150        assert!(!table.is_retired(id));
1151        table.retire("tmp").unwrap();
1152        assert!(table.is_retired(id));
1153        table.unretire("tmp").unwrap();
1154        assert!(!table.is_retired(id));
1155    }
1156
1157    #[test]
1158    fn bind_sem_form_produces_bound_ids() {
1159        let mut table = fresh_table();
1160        let forms =
1161            parse(r#"(sem @alice email "alice@example.com" :src @profile :c 0.95 :v 2024-01-15)"#)
1162                .unwrap();
1163        let (bound, _journal) = bind(forms, &mut table).unwrap();
1164        assert_eq!(bound.len(), 1);
1165        let BoundForm::Sem { s, p, o, keywords } = &bound[0] else {
1166            panic!("expected Sem");
1167        };
1168        assert_eq!(table.kind_of(*s), Some(SymbolKind::Agent));
1169        assert_eq!(table.kind_of(*p), Some(SymbolKind::Predicate));
1170        assert_eq!(o, &Value::String("alice@example.com".into()));
1171        assert!(keywords.contains_key("src"));
1172        assert!(keywords.contains_key("c"));
1173        assert!(keywords.contains_key("v"));
1174    }
1175
1176    #[test]
1177    fn kind_mismatch_on_reuse_is_reported() {
1178        let mut table = fresh_table();
1179        // First allocation locks `@x` as Agent.
1180        let _ = table.allocate("x".into(), SymbolKind::Agent).unwrap();
1181        // The parser in a sem form uses `@x` as the predicate — which
1182        // is locked to Predicate kind. That conflicts.
1183        let forms = parse(r#"(sem @alice @x "v" :src @profile :c 0.5 :v 2024-01-15)"#).unwrap();
1184        let err = bind(forms, &mut table).unwrap_err();
1185        assert!(matches!(err, BindError::SymbolKindMismatch { .. }));
1186    }
1187
1188    #[test]
1189    fn unregistered_inference_method_errors() {
1190        let mut table = fresh_table();
1191        let forms = parse("(inf @a p @b (@m1) @bogus_method :c 0.5 :v 2024-01-15)").unwrap();
1192        let err = bind(forms, &mut table).unwrap_err();
1193        assert!(matches!(err, BindError::UnregisteredInferenceMethod { .. }));
1194    }
1195
1196    #[test]
1197    fn registered_method_binds_cleanly() {
1198        let mut table = fresh_table();
1199        let forms = parse("(inf @a p @b (@m1) @pattern_summarize :c 0.7 :v 2024-03-15)").unwrap();
1200        let (bound, _journal) = bind(forms, &mut table).unwrap();
1201        assert_eq!(bound.len(), 1);
1202    }
1203
1204    #[test]
1205    fn rename_and_retire_forms_apply_to_table() {
1206        let mut table = fresh_table();
1207        let id = table.allocate("old".into(), SymbolKind::Agent).unwrap();
1208        let forms = parse("(rename @old @new) (retire @new)").unwrap();
1209        let (_bound, _journal) = bind(forms, &mut table).unwrap();
1210        let entry = table.entry(id).unwrap();
1211        assert_eq!(entry.canonical_name, "new");
1212        assert!(table.is_retired(id));
1213    }
1214
1215    #[test]
1216    fn typed_symbol_annotation_locks_kind() {
1217        let mut table = fresh_table();
1218        let forms =
1219            parse(r"(sem @alice:Agent email @book:Document :src @profile :c 0.5 :v 2024-01-15)")
1220                .unwrap();
1221        let (_bound, _journal) = bind(forms, &mut table).unwrap();
1222        let alice = table.lookup("alice").unwrap();
1223        let book = table.lookup("book").unwrap();
1224        assert_eq!(table.kind_of(alice), Some(SymbolKind::Agent));
1225        assert_eq!(table.kind_of(book), Some(SymbolKind::Document));
1226    }
1227
1228    #[test]
1229    fn bad_kind_annotation_errors() {
1230        let mut table = fresh_table();
1231        let forms =
1232            parse(r#"(sem @alice:Bogus email "v" :src @profile :c 0.5 :v 2024-01-15)"#).unwrap();
1233        let err = bind(forms, &mut table).unwrap_err();
1234        assert!(matches!(err, BindError::BadKind { .. }));
1235    }
1236}