Skip to main content

texform_core/
knowledge.rs

1//! Knowledge base: the backing store behind [`ParseContext`](crate::parse::ParseContext).
2//!
3//! A [`KnowledgeBase`] holds indexed command, environment, character, and
4//! delimiter-control metadata loaded from `texform-knowledge` package definitions.
5//! It is the single source of truth the parser consults when recognizing
6//! control sequences and environments.
7//!
8//! # Architecture
9//!
10//! The KB separates *raw storage* from the *parser-facing active view*:
11//!
12//! - **Explicit commands** are definitions with concrete argument specs
13//!   (`\frac`, `\text`, etc.).
14//! - **Character entries** are zero-arg symbols (`\alpha`, `\div`, etc.)
15//!   that the KB projects into synthetic command views so the parser can
16//!   recognize them uniformly.
17//! - **Active index** maps each name to whichever source (explicit or
18//!   character) is currently authoritative. Explicit commands always win.
19//!
20//! # Package import order
21//!
22//! Managed packages (base, ams, physics, …) are always imported in a
23//! fixed canonical order regardless of the caller-supplied order. This
24//! keeps merge results and `from_packages` arrays stable.
25//!
26//! For rapid prototyping, configuration errors fail fast (panic).
27
28use crate::ast::Node;
29use std::collections::{HashMap, HashSet};
30use texform_argspec::parse_arg_specs;
31use texform_interface::syntax_node::ContentMode;
32use texform_knowledge::builtin::{BuiltinPackage, PackageName};
33
34use crate::parse::{CommandItem, ContextItem, DelimiterControlItem, EnvironmentItem};
35
36pub use texform_argspec::{
37    ArgForm, ArgSpec, ArgSpecParseError, DelimiterToken, ParsedArgSpec, ValueKind,
38};
39use texform_knowledge::specs::CharacterAttributes;
40pub use texform_knowledge::specs::{
41    ActiveCharacterRecord, ActiveCommandRecord, ActiveDelimiterRecord, ActiveEnvironmentRecord,
42    AllowedMode, BuiltinCharacterRecord, BuiltinCommandRecord, BuiltinDelimiterRecord,
43    BuiltinEnvironmentRecord, CommandKind,
44};
45#[cfg(test)]
46use texform_knowledge::specs::{
47    CharacterSpec, CommandSpec, DelimiterSpec, EnvironmentSpec, PackageSpecs,
48};
49
50const RUNTIME_PACKAGE_NAME: &str = "runtime";
51#[cfg(test)]
52const UNKNOWN_PACKAGE_NAME: &str = "unknown";
53// Runtime defaults intentionally differ from the full builtin registry:
54// `braket` stays opt-in to avoid conflicting default semantics with `physics`.
55const DEFAULT_PACKAGE_NAMES: [&str; 6] = [
56    "base",
57    "ams",
58    "physics",
59    "textmacros",
60    "bboldx",
61    "boldsymbol",
62];
63const PHYSICS_COMMAND_MERGE_DENYLIST: [&str; 3] = ["Pr", "det", "exp"];
64
65pub fn default_package_names() -> &'static [&'static str] {
66    &DEFAULT_PACKAGE_NAMES
67}
68
69/// Error returned when a requested package name is not found in the registry.
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub enum PackageLoadError {
72    /// The named package does not exist in the `texform-knowledge` registry.
73    UnknownPackage { name: String },
74}
75
76impl std::fmt::Display for PackageLoadError {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        match self {
79            PackageLoadError::UnknownPackage { name } => {
80                write!(f, "unknown package: {name}")
81            }
82        }
83    }
84}
85
86impl std::error::Error for PackageLoadError {}
87
88/// The knowledge base separates raw storage from the parser-facing view:
89///
90/// - `commands` / `command_idx_by_name`: raw explicit command store.
91/// - `characters` / `character_idx_by_name`: raw character store.
92/// - `character_command_views`: zero-arg Prefix commands projected from characters,
93///   so the parser can still recognize character control sequences as commands.
94/// - `active_command_idx_by_name`: the single parser-facing index that tells
95///   `lookup_command()` whether the active entry for a name comes from an
96///   explicit command or a character-derived view.
97/// - `suppressed_command_names`: names removed via `remove_item(Command)`.
98///   Prevents a deleted name from "reviving" through a character fallback.
99#[derive(Debug, Clone)]
100pub struct KnowledgeBase {
101    commands: Vec<ActiveCommandRecord>,
102    command_idx_by_name: HashMap<&'static str, usize>,
103    characters: Vec<ActiveCharacterRecord>,
104    character_idx_by_name: HashMap<String, usize>,
105    delimiters: Vec<ActiveDelimiterRecord>,
106    delimiter_idx_by_key: HashMap<(String, bool), usize>,
107    character_command_views: Vec<ActiveCommandRecord>,
108    active_command_idx_by_name: HashMap<String, ActiveCommandSource>,
109    suppressed_command_names: HashSet<String>,
110    envs: Vec<ActiveEnvironmentRecord>,
111    env_idx_by_name: HashMap<&'static str, usize>,
112}
113
114/// Tracks whether the parser-facing active command for a name points to
115/// an explicit command or a character-derived command view.
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117enum ActiveCommandSource {
118    Explicit(usize),
119    Character(usize),
120}
121
122impl KnowledgeBase {
123    fn new() -> Self {
124        Self {
125            commands: Vec::new(),
126            command_idx_by_name: HashMap::new(),
127            characters: Vec::new(),
128            character_idx_by_name: HashMap::new(),
129            delimiters: Vec::new(),
130            delimiter_idx_by_key: HashMap::new(),
131            character_command_views: Vec::new(),
132            active_command_idx_by_name: HashMap::new(),
133            suppressed_command_names: HashSet::new(),
134            envs: Vec::new(),
135            env_idx_by_name: HashMap::new(),
136        }
137    }
138
139    /// Return the active command for `name`, respecting suppression.
140    ///
141    /// The active entry may be an explicit command or a character-derived
142    /// zero-arg view. Suppressed names always return `None`.
143    pub fn empty() -> Self {
144        Self::new()
145    }
146
147    pub fn build_from_packages(packages: &[&str]) -> Self {
148        Self::try_build_from_packages(packages).unwrap_or_else(|error| panic!("{error}"))
149    }
150
151    pub fn try_build_from_packages(packages: &[&str]) -> Result<Self, PackageLoadError> {
152        let mut kb = KnowledgeBase::new();
153        let to_load = canonical_package_import_order(packages);
154        import_package_names(&mut kb, to_load.as_slice())?;
155        Ok(kb)
156    }
157
158    pub fn try_build_from_packages_for_mode(
159        packages: &[&str],
160        target_mode: ContentMode,
161    ) -> Result<Self, PackageLoadError> {
162        let mut kb = KnowledgeBase::new();
163        let to_load = canonical_package_import_order(packages);
164        import_package_names_for_mode(&mut kb, to_load.as_slice(), target_mode)?;
165        Ok(kb)
166    }
167
168    pub fn lookup_command(&self, name: &str) -> Option<&ActiveCommandRecord> {
169        if self.suppressed_command_names.contains(name) {
170            return None;
171        }
172
173        match self.active_command_idx_by_name.get(name).copied()? {
174            ActiveCommandSource::Explicit(idx) => Some(&self.commands[idx]),
175            ActiveCommandSource::Character(idx) => Some(&self.character_command_views[idx]),
176        }
177    }
178
179    /// Look up only the explicit (non-character-derived) command for `name`.
180    pub fn lookup_explicit_command(&self, name: &str) -> Option<&ActiveCommandRecord> {
181        self.command_idx_by_name
182            .get(name)
183            .copied()
184            .map(|idx| &self.commands[idx])
185    }
186
187    /// Look up raw character metadata by control-sequence name.
188    pub fn lookup_character(&self, name: &str) -> Option<&ActiveCharacterRecord> {
189        self.character_idx_by_name
190            .get(name)
191            .copied()
192            .map(|idx| &self.characters[idx])
193    }
194
195    /// Look up environment metadata by name.
196    pub fn lookup_env(&self, name: &str) -> Option<&ActiveEnvironmentRecord> {
197        self.env_idx_by_name
198            .get(name)
199            .copied()
200            .map(|idx| &self.envs[idx])
201    }
202
203    pub fn lookup_delimiter(
204        &self,
205        name: &str,
206        is_control_sequence: bool,
207    ) -> Option<&ActiveDelimiterRecord> {
208        self.delimiter_idx_by_key
209            .get(&(name.to_string(), is_control_sequence))
210            .copied()
211            .map(|idx| &self.delimiters[idx])
212    }
213
214    /// Check whether `name` is registered as a delimiter control sequence.
215    pub fn is_delimiter_control(&self, name: &str) -> bool {
216        self.lookup_delimiter(name, true).is_some()
217    }
218
219    /// Look up a delimiter control, returning the interned `&'static str` name.
220    pub fn lookup_delimiter_control(&self, name: &str) -> Option<&'static str> {
221        self.lookup_delimiter(name, true).map(|record| record.name)
222    }
223
224    /// Insert a context item, dispatching to the appropriate typed inserter.
225    pub fn insert_item(&mut self, item: impl Into<ContextItem>) -> Result<(), ArgSpecParseError> {
226        match item.into() {
227            ContextItem::Command(item) => self.insert_command(item),
228            ContextItem::Environment(item) => self.insert_environment(item),
229            ContextItem::DelimiterControl(item) => {
230                self.insert_delimiter_control(item);
231                Ok(())
232            }
233        }
234    }
235
236    /// Runtime insertion: clears any prior suppression so a previously
237    /// removed name can be re-activated by explicit command injection.
238    pub(crate) fn insert_command(&mut self, item: CommandItem) -> Result<(), ArgSpecParseError> {
239        let meta = command_item_into_meta(item, vec![RUNTIME_PACKAGE_NAME.to_string()])?;
240        let name = meta.name;
241        let idx = self.append_command_meta(meta);
242        self.suppressed_command_names.remove(name);
243        self.set_active_command_source(name, ActiveCommandSource::Explicit(idx));
244        Ok(())
245    }
246
247    /// Remove a previously inserted item. Returns `true` if found.
248    pub fn remove_item(&mut self, item: impl Into<ContextItem>) -> bool {
249        match item.into() {
250            ContextItem::Command(item) => self.remove_command_by_name(item.name.as_str()),
251            ContextItem::Environment(item) => self.remove_environment_by_name(item.name.as_str()),
252            ContextItem::DelimiterControl(item) => {
253                self.remove_delimiter_by_key(item.name.as_str(), true)
254            }
255        }
256    }
257
258    pub fn insert_environment(&mut self, item: EnvironmentItem) -> Result<(), ArgSpecParseError> {
259        let meta = environment_item_into_meta(item, vec![RUNTIME_PACKAGE_NAME.to_string()])?;
260        self.append_env_meta(meta);
261        Ok(())
262    }
263
264    pub fn insert_delimiter_control(&mut self, item: DelimiterControlItem) {
265        if self.lookup_delimiter(item.name.as_str(), true).is_some() {
266            return;
267        }
268
269        let name: &'static str = Box::leak(item.name.into_boxed_str());
270        self.upsert_delimiter_meta(ActiveDelimiterRecord {
271            name,
272            is_control_sequence: true,
273            allowed_mode: AllowedMode::Both,
274            unicode_value: String::new(),
275            attributes: CharacterAttributes::default(),
276            package: RUNTIME_PACKAGE_NAME.to_string(),
277        });
278    }
279
280    /// Removes a command name from both raw and active indices, then adds it
281    /// to the suppression set. This prevents `lookup_command()` from falling
282    /// back to a character-derived view after the name is explicitly removed.
283    pub(crate) fn remove_command_by_name(&mut self, name: &str) -> bool {
284        let explicit_removed = self.command_idx_by_name.remove(name).is_some();
285        let active_removed = self.active_command_idx_by_name.remove(name).is_some();
286
287        if explicit_removed || active_removed {
288            self.suppressed_command_names.insert(name.to_string());
289            return true;
290        }
291
292        false
293    }
294
295    pub(crate) fn remove_environment_by_name(&mut self, name: &str) -> bool {
296        self.env_idx_by_name.remove(name).is_some()
297    }
298
299    fn remove_delimiter_by_key(&mut self, name: &str, is_control_sequence: bool) -> bool {
300        self.delimiter_idx_by_key
301            .remove(&(name.to_string(), is_control_sequence))
302            .is_some()
303    }
304
305    fn set_active_command_source(&mut self, name: impl Into<String>, source: ActiveCommandSource) {
306        self.active_command_idx_by_name.insert(name.into(), source);
307    }
308
309    fn append_command_meta(&mut self, meta: ActiveCommandRecord) -> usize {
310        let idx = self.commands.len();
311        let name = meta.name;
312        self.commands.push(meta);
313        self.command_idx_by_name.insert(name, idx);
314        idx
315    }
316
317    fn append_env_meta(&mut self, meta: ActiveEnvironmentRecord) {
318        let idx = self.envs.len();
319        let name = meta.name;
320        self.envs.push(meta);
321        self.env_idx_by_name.insert(name, idx);
322    }
323
324    fn upsert_character_meta(&mut self, meta: ActiveCharacterRecord) -> usize {
325        let idx = self.characters.len();
326        let name = meta.name.clone();
327        self.characters.push(meta);
328        self.character_idx_by_name.insert(name, idx);
329        idx
330    }
331
332    fn upsert_character_command_view(&mut self, meta: ActiveCommandRecord) -> usize {
333        let idx = self.character_command_views.len();
334        self.character_command_views.push(meta);
335        idx
336    }
337
338    fn upsert_delimiter_meta(&mut self, meta: ActiveDelimiterRecord) -> usize {
339        let idx = self.delimiters.len();
340        let key = (meta.name.to_string(), meta.is_control_sequence);
341        self.delimiters.push(meta);
342        self.delimiter_idx_by_key.insert(key, idx);
343        idx
344    }
345
346    /// Writes raw character metadata and creates a zero-arg Prefix command view
347    /// so the parser can recognize the character as a command head. Does NOT
348    /// write into the explicit command raw store.
349    #[cfg(test)]
350    fn insert_character_with_package(&mut self, character: CharacterSpec, package: &str) {
351        let CharacterSpec {
352            name,
353            allowed_mode,
354            unicode_value,
355            attributes,
356        } = character;
357
358        self.upsert_character_meta(ActiveCharacterRecord {
359            name: name.clone(),
360            allowed_mode,
361            unicode_value,
362            attributes,
363            package: package.to_string(),
364        });
365
366        let view_idx = self.upsert_character_command_view(make_command_meta(
367            name.clone(),
368            CommandKind::Prefix,
369            allowed_mode,
370            vec![],
371            vec![],
372            String::new(),
373            vec![package.to_string()],
374        ));
375        self.set_active_command_source(name, ActiveCommandSource::Character(view_idx));
376    }
377
378    fn insert_builtin_character_with_package(
379        &mut self,
380        character: &'static BuiltinCharacterRecord,
381        package: &str,
382    ) {
383        self.upsert_character_meta(ActiveCharacterRecord {
384            name: character.name.to_string(),
385            allowed_mode: character.allowed_mode,
386            unicode_value: character.unicode_value.to_string(),
387            attributes: character.attributes.into(),
388            package: package.to_string(),
389        });
390
391        let view_idx = self.upsert_character_command_view(ActiveCommandRecord {
392            name: character.name,
393            kind: CommandKind::Prefix,
394            allowed_mode: character.allowed_mode,
395            argspec: texform_knowledge::argspec!(""),
396            tags: &[],
397            from_packages: leak_string_array(vec![package.to_string()]),
398        });
399        self.set_active_command_source(character.name, ActiveCommandSource::Character(view_idx));
400    }
401
402    fn insert_builtin_delimiter_with_package(
403        &mut self,
404        delimiter: &'static BuiltinDelimiterRecord,
405        package: &str,
406    ) {
407        self.upsert_delimiter_meta(ActiveDelimiterRecord {
408            name: delimiter.name,
409            is_control_sequence: delimiter.is_control_sequence,
410            allowed_mode: delimiter.allowed_mode,
411            unicode_value: delimiter.unicode_value.to_string(),
412            attributes: delimiter.attributes.into(),
413            package: package.to_string(),
414        });
415    }
416
417    #[cfg(test)]
418    fn insert_delimiter_with_package(&mut self, delimiter: DelimiterSpec, package: &str) {
419        let name = leak_string(delimiter.name);
420        self.upsert_delimiter_meta(ActiveDelimiterRecord {
421            name,
422            is_control_sequence: delimiter.is_control_sequence,
423            allowed_mode: delimiter.allowed_mode,
424            unicode_value: delimiter.unicode_value,
425            attributes: delimiter.attributes,
426            package: package.to_string(),
427        });
428    }
429
430    #[cfg(test)]
431    pub(crate) fn insert_or_override_command(&mut self, spec: CommandSpec) {
432        self.insert_or_override_command_with_package(spec, UNKNOWN_PACKAGE_NAME);
433    }
434
435    #[cfg(test)]
436    fn insert_or_override_command_with_package(&mut self, spec: CommandSpec, package: &str) {
437        let meta = command_spec_into_meta(spec, vec![package.to_string()]);
438        let idx = self.append_command_meta(meta);
439        let name = self.commands[idx].name;
440        self.set_active_command_source(name, ActiveCommandSource::Explicit(idx));
441    }
442
443    /// Package import path: merges the incoming command with an existing one
444    /// if they share the same name/kind/spec and both come from managed packages;
445    /// otherwise falls back to override (last-writer-wins).
446    #[cfg(test)]
447    fn import_or_merge_command_with_package(&mut self, spec: CommandSpec, package: &str) {
448        let incoming = command_spec_into_meta(spec, vec![package.to_string()]);
449        if let Some(existing_idx) = self.command_idx_by_name.get(incoming.name).copied() {
450            let existing = &self.commands[existing_idx];
451            if should_merge_command(existing, &incoming) {
452                let merged = merge_command_meta(existing, &incoming);
453                let idx = self.append_command_meta(merged);
454                let name = self.commands[idx].name;
455                self.set_active_command_source(name, ActiveCommandSource::Explicit(idx));
456                return;
457            }
458        }
459
460        let idx = self.append_command_meta(incoming);
461        let name = self.commands[idx].name;
462        self.set_active_command_source(name, ActiveCommandSource::Explicit(idx));
463    }
464
465    fn import_or_merge_builtin_command_with_package(
466        &mut self,
467        record: &'static BuiltinCommandRecord,
468        package: &str,
469    ) {
470        let incoming = builtin_command_into_meta(record, vec![package.to_string()]);
471        if let Some(existing_idx) = self.command_idx_by_name.get(incoming.name).copied() {
472            let existing = &self.commands[existing_idx];
473            if should_merge_command(existing, &incoming) {
474                let merged = merge_command_meta(existing, &incoming);
475                let idx = self.append_command_meta(merged);
476                let name = self.commands[idx].name;
477                self.set_active_command_source(name, ActiveCommandSource::Explicit(idx));
478                return;
479            }
480        }
481
482        let idx = self.append_command_meta(incoming);
483        let name = self.commands[idx].name;
484        self.set_active_command_source(name, ActiveCommandSource::Explicit(idx));
485    }
486
487    #[cfg(test)]
488    fn insert_or_override_environment(&mut self, spec: EnvironmentSpec) {
489        self.insert_or_override_environment_with_package(spec, UNKNOWN_PACKAGE_NAME);
490    }
491
492    #[cfg(test)]
493    fn insert_or_override_environment_with_package(
494        &mut self,
495        spec: EnvironmentSpec,
496        package: &str,
497    ) {
498        let meta = environment_spec_into_meta(spec, vec![package.to_string()]);
499        self.append_env_meta(meta);
500    }
501
502    /// Same merge-or-override logic as commands, but for environments.
503    /// Merge requires matching name, argspec source, and body_mode.
504    #[cfg(test)]
505    fn import_or_merge_environment_with_package(&mut self, spec: EnvironmentSpec, package: &str) {
506        let incoming = environment_spec_into_meta(spec, vec![package.to_string()]);
507        if let Some(existing_idx) = self.env_idx_by_name.get(incoming.name).copied() {
508            let existing = &self.envs[existing_idx];
509            if should_merge_environment(existing, &incoming) {
510                self.append_env_meta(merge_environment_meta(existing, &incoming));
511                return;
512            }
513        }
514
515        self.append_env_meta(incoming);
516    }
517
518    fn import_or_merge_builtin_environment_with_package(
519        &mut self,
520        record: &'static BuiltinEnvironmentRecord,
521        package: &str,
522    ) {
523        let incoming = builtin_environment_into_meta(record, vec![package.to_string()]);
524        if let Some(existing_idx) = self.env_idx_by_name.get(incoming.name).copied() {
525            let existing = &self.envs[existing_idx];
526            if should_merge_environment(existing, &incoming) {
527                self.append_env_meta(merge_environment_meta(existing, &incoming));
528                return;
529            }
530        }
531
532        self.append_env_meta(incoming);
533    }
534
535    #[cfg(test)]
536    pub(crate) fn import_package(&mut self, specs: PackageSpecs) {
537        self.import_package_with_name(UNKNOWN_PACKAGE_NAME, specs);
538    }
539
540    #[cfg(test)]
541    fn import_package_with_name(&mut self, package: &str, specs: PackageSpecs) {
542        for character in specs.characters {
543            self.insert_character_with_package(character, package);
544        }
545        for delimiter in specs.delimiters {
546            self.insert_delimiter_with_package(delimiter, package);
547        }
548        for cmd in specs.commands {
549            self.import_or_merge_command_with_package(cmd, package);
550        }
551        for env in specs.environments {
552            self.import_or_merge_environment_with_package(env, package);
553        }
554    }
555
556    fn import_builtin_package(&mut self, package: &'static BuiltinPackage) {
557        for character in package.characters {
558            self.insert_builtin_character_with_package(character, package.name);
559        }
560        for delimiter in package.delimiters {
561            self.insert_builtin_delimiter_with_package(delimiter, package.name);
562        }
563        for command in package.commands {
564            self.import_or_merge_builtin_command_with_package(command, package.name);
565        }
566        for environment in package.environments {
567            self.import_or_merge_builtin_environment_with_package(environment, package.name);
568        }
569    }
570
571    fn import_builtin_package_for_mode(
572        &mut self,
573        package: &'static BuiltinPackage,
574        target_mode: ContentMode,
575    ) {
576        for character in package.characters {
577            if character.allowed_mode.allows(target_mode) {
578                self.insert_builtin_character_with_package(character, package.name);
579            }
580        }
581        for delimiter in package.delimiters {
582            if delimiter.allowed_mode.allows(target_mode) {
583                self.insert_builtin_delimiter_with_package(delimiter, package.name);
584            }
585        }
586        for command in package.commands {
587            if command.allowed_mode.allows(target_mode) {
588                self.import_or_merge_builtin_command_with_package(command, package.name);
589            }
590        }
591        for environment in package.environments {
592            if environment.allowed_mode.allows(target_mode) {
593                self.import_or_merge_builtin_environment_with_package(environment, package.name);
594            }
595        }
596    }
597}
598
599fn make_command_meta(
600    name: String,
601    kind: CommandKind,
602    allowed_mode: AllowedMode,
603    args: Vec<ArgSpec>,
604    tags: Vec<String>,
605    source: String,
606    from_packages: Vec<String>,
607) -> ActiveCommandRecord {
608    ActiveCommandRecord {
609        name: leak_string(name),
610        kind,
611        allowed_mode,
612        argspec: ParsedArgSpec {
613            args: leak_arg_specs(args),
614            source: leak_string(source),
615        },
616        tags: leak_tags(tags),
617        from_packages: leak_string_array(from_packages),
618    }
619}
620
621fn command_item_into_meta(
622    item: CommandItem,
623    from_packages: Vec<String>,
624) -> Result<ActiveCommandRecord, ArgSpecParseError> {
625    let context = format!("command {}", item.name);
626    let args = parse_arg_specs(item.spec.as_str(), context.as_str())?;
627    Ok(make_command_meta(
628        item.name,
629        item.kind,
630        item.allowed_mode,
631        args,
632        item.tags,
633        item.spec,
634        from_packages,
635    ))
636}
637
638fn make_env_meta(
639    name: String,
640    allowed_mode: AllowedMode,
641    args: Vec<ArgSpec>,
642    body_mode: ContentMode,
643    tags: Vec<String>,
644    source: String,
645    from_packages: Vec<String>,
646) -> ActiveEnvironmentRecord {
647    ActiveEnvironmentRecord {
648        name: leak_string(name),
649        allowed_mode,
650        argspec: ParsedArgSpec {
651            args: leak_arg_specs(args),
652            source: leak_string(source),
653        },
654        body_mode,
655        tags: leak_tags(tags),
656        from_packages: leak_string_array(from_packages),
657    }
658}
659
660fn environment_item_into_meta(
661    item: EnvironmentItem,
662    from_packages: Vec<String>,
663) -> Result<ActiveEnvironmentRecord, ArgSpecParseError> {
664    let context = format!("environment {}", item.name);
665    let args = parse_arg_specs(item.spec.as_str(), context.as_str())?;
666    Ok(make_env_meta(
667        item.name,
668        item.allowed_mode,
669        args,
670        item.body_mode,
671        item.tags,
672        item.spec,
673        from_packages,
674    ))
675}
676
677#[cfg(test)]
678fn command_spec_into_meta(spec: CommandSpec, from_packages: Vec<String>) -> ActiveCommandRecord {
679    make_command_meta(
680        spec.name,
681        spec.kind,
682        spec.allowed_mode,
683        spec.argspec.args,
684        spec.tags,
685        spec.argspec.source,
686        from_packages,
687    )
688}
689
690fn builtin_command_into_meta(
691    record: &'static BuiltinCommandRecord,
692    from_packages: Vec<String>,
693) -> ActiveCommandRecord {
694    ActiveCommandRecord {
695        name: record.name,
696        kind: record.kind,
697        allowed_mode: record.allowed_mode,
698        argspec: record.argspec,
699        tags: record.tags,
700        from_packages: leak_string_array(from_packages),
701    }
702}
703
704#[cfg(test)]
705fn environment_spec_into_meta(
706    spec: EnvironmentSpec,
707    from_packages: Vec<String>,
708) -> ActiveEnvironmentRecord {
709    make_env_meta(
710        spec.name,
711        spec.allowed_mode,
712        spec.argspec.args,
713        spec.body_mode,
714        spec.tags,
715        spec.argspec.source,
716        from_packages,
717    )
718}
719
720fn builtin_environment_into_meta(
721    record: &'static BuiltinEnvironmentRecord,
722    from_packages: Vec<String>,
723) -> ActiveEnvironmentRecord {
724    ActiveEnvironmentRecord {
725        name: record.name,
726        allowed_mode: record.allowed_mode,
727        argspec: record.argspec,
728        body_mode: record.body_mode,
729        tags: record.tags,
730        from_packages: leak_string_array(from_packages),
731    }
732}
733
734/// Leak a `String` into a `&'static str` for arena-style storage.
735///
736/// Active record structs use `&'static` references
737/// so they can be cheaply shared. The leaked memory lives for the process
738/// lifetime, which is acceptable for a knowledge base that is built once.
739fn leak_string(value: impl Into<String>) -> &'static str {
740    Box::leak(value.into().into_boxed_str())
741}
742
743fn leak_arg_specs(args: Vec<ArgSpec>) -> &'static [ArgSpec] {
744    Box::leak(args.into_boxed_slice())
745}
746
747fn leak_tags(tags: Vec<String>) -> &'static [&'static str] {
748    let tags: Vec<&'static str> = tags
749        .into_iter()
750        .map(|tag| Box::leak(tag.into_boxed_str()) as &'static str)
751        .collect();
752    Box::leak(tags.into_boxed_slice())
753}
754
755fn leak_string_array(values: Vec<String>) -> &'static [&'static str] {
756    let leaked: Vec<&'static str> = values.into_iter().map(leak_string).collect();
757    Box::leak(leaked.into_boxed_slice())
758}
759
760fn dedup_names_in_request_order<'a>(requested: &[&'a str]) -> Vec<&'a str> {
761    let mut unique = Vec::new();
762    for &name in requested {
763        if !unique.contains(&name) {
764            unique.push(name);
765        }
766    }
767    unique
768}
769
770fn managed_package_names() -> impl Iterator<Item = &'static str> {
771    texform_knowledge::builtin::MANAGED_PACKAGE_IMPORT_ORDER
772        .iter()
773        .map(|package| package.as_str())
774}
775
776fn is_managed_package(name: &str) -> bool {
777    PackageName::from_str(name).is_some()
778}
779
780/// Reorders requested package names so that managed packages always appear in
781/// the fixed order defined by `MANAGED_PACKAGE_IMPORT_ORDER`, followed by any
782/// unmanaged packages in their original request order. This ensures merge
783/// results and `from_packages` arrays are stable regardless of caller order.
784fn canonical_package_import_order<'a>(requested: &[&'a str]) -> Vec<&'a str> {
785    let unique = dedup_names_in_request_order(requested);
786    let mut normalized = Vec::new();
787
788    // Managed packages first, in the fixed canonical order.
789    for managed in managed_package_names() {
790        if let Some(&name) = unique.iter().find(|&&candidate| candidate == managed) {
791            normalized.push(name);
792        }
793    }
794
795    // Non-managed packages follow, preserving the caller's request order.
796    for &name in &unique {
797        if !is_managed_package(name) {
798            normalized.push(name);
799        }
800    }
801
802    normalized
803}
804
805/// Merge is only attempted between records that both originate from managed
806/// packages. This prevents merge rules from accidentally affecting runtime-
807/// injected commands or unknown-source test fixtures.
808fn from_packages_are_managed(packages: &[&str]) -> bool {
809    !packages.is_empty() && packages.iter().all(|package| is_managed_package(package))
810}
811
812fn is_physics_denylisted_command(name: &str) -> bool {
813    PHYSICS_COMMAND_MERGE_DENYLIST.contains(&name)
814}
815
816fn merge_tags(existing: &[&str], incoming: &[&str]) -> Vec<String> {
817    let mut merged = Vec::new();
818    for &tag in existing.iter().chain(incoming.iter()) {
819        if !merged.iter().any(|existing_tag| existing_tag == tag) {
820            merged.push(tag.to_string());
821        }
822    }
823    merged.sort();
824    merged
825}
826
827fn merge_from_packages(existing: &[&str], incoming: &[&str]) -> Vec<String> {
828    let combined: Vec<&str> = existing
829        .iter()
830        .copied()
831        .chain(incoming.iter().copied())
832        .collect();
833    canonical_package_import_order(combined.as_slice())
834        .into_iter()
835        .map(ToString::to_string)
836        .collect()
837}
838
839/// Two commands are mergeable iff they share name, kind, and argspec source,
840/// both come from managed packages, and neither side is a physics-denylisted
841/// command (Pr, det, exp — these intentionally override base definitions).
842fn should_merge_command(existing: &ActiveCommandRecord, incoming: &ActiveCommandRecord) -> bool {
843    existing.name == incoming.name
844        && existing.kind == incoming.kind
845        && existing.argspec.source == incoming.argspec.source
846        && from_packages_are_managed(existing.from_packages)
847        && from_packages_are_managed(incoming.from_packages)
848        && !(is_physics_denylisted_command(existing.name)
849            && (existing.from_packages.contains(&"physics")
850                || incoming.from_packages.contains(&"physics")))
851}
852
853fn should_merge_environment(
854    existing: &ActiveEnvironmentRecord,
855    incoming: &ActiveEnvironmentRecord,
856) -> bool {
857    existing.name == incoming.name
858        && existing.argspec.source == incoming.argspec.source
859        && existing.body_mode == incoming.body_mode
860        && from_packages_are_managed(existing.from_packages)
861        && from_packages_are_managed(incoming.from_packages)
862}
863
864/// Produces a merged command: allowed_mode and tags are unioned,
865/// from_packages collects both sources in canonical order.
866fn merge_command_meta(
867    existing: &ActiveCommandRecord,
868    incoming: &ActiveCommandRecord,
869) -> ActiveCommandRecord {
870    debug_assert!(should_merge_command(existing, incoming));
871    debug_assert_eq!(existing.argspec.args, incoming.argspec.args);
872
873    make_command_meta(
874        existing.name.to_string(),
875        existing.kind,
876        existing.allowed_mode.union(incoming.allowed_mode),
877        existing.argspec.args.to_vec(),
878        merge_tags(existing.tags, incoming.tags),
879        existing.argspec.source.to_string(),
880        merge_from_packages(existing.from_packages, incoming.from_packages),
881    )
882}
883
884fn merge_environment_meta(
885    existing: &ActiveEnvironmentRecord,
886    incoming: &ActiveEnvironmentRecord,
887) -> ActiveEnvironmentRecord {
888    debug_assert!(should_merge_environment(existing, incoming));
889    debug_assert_eq!(existing.argspec.args, incoming.argspec.args);
890
891    make_env_meta(
892        existing.name.to_string(),
893        existing.allowed_mode.union(incoming.allowed_mode),
894        existing.argspec.args.to_vec(),
895        existing.body_mode,
896        merge_tags(existing.tags, incoming.tags),
897        existing.argspec.source.to_string(),
898        merge_from_packages(existing.from_packages, incoming.from_packages),
899    )
900}
901
902pub fn lookup_command_node_name(node: &Node) -> Option<&str> {
903    match node {
904        Node::Command { name, .. } | Node::Infix { name, .. } | Node::Declarative { name, .. } => {
905            Some(name.as_str())
906        }
907        _ => None,
908    }
909}
910
911pub fn lookup_environment_node_name(node: &Node) -> Option<&str> {
912    match node {
913        Node::Environment { name, .. } => Some(name.as_str()),
914        _ => None,
915    }
916}
917
918fn import_package_names(
919    kb: &mut KnowledgeBase,
920    requested: &[&str],
921) -> Result<(), PackageLoadError> {
922    for &name in requested {
923        let pkg = texform_knowledge::builtin::lookup_package(name).ok_or_else(|| {
924            PackageLoadError::UnknownPackage {
925                name: name.to_string(),
926            }
927        })?;
928        kb.import_builtin_package(pkg);
929    }
930    Ok(())
931}
932
933fn import_package_names_for_mode(
934    kb: &mut KnowledgeBase,
935    requested: &[&str],
936    target_mode: ContentMode,
937) -> Result<(), PackageLoadError> {
938    for &name in requested {
939        let pkg = texform_knowledge::builtin::lookup_package(name).ok_or_else(|| {
940            PackageLoadError::UnknownPackage {
941                name: name.to_string(),
942            }
943        })?;
944        kb.import_builtin_package_for_mode(pkg, target_mode);
945    }
946    Ok(())
947}
948
949/// Same as [`try_build_kb_from_packages`] but preserves the caller's exact
950/// import order instead of canonicalizing it. Useful for tests that need to
951/// verify order-dependent behavior.
952#[cfg(test)]
953pub(crate) fn try_build_kb_from_exact_packages(
954    requested: &[&str],
955) -> Result<KnowledgeBase, PackageLoadError> {
956    let mut kb = KnowledgeBase::new();
957    import_package_names(&mut kb, requested)?;
958    Ok(kb)
959}
960
961#[cfg(test)]
962fn build_default_kb(packages: Option<&[&str]>) -> KnowledgeBase {
963    match packages {
964        Some(list) => KnowledgeBase::build_from_packages(list),
965        None => {
966            let package_names = texform_knowledge::builtin::all_package_names();
967            KnowledgeBase::build_from_packages(package_names.as_slice())
968        }
969    }
970}
971
972#[cfg(test)]
973#[path = "knowledge/tests.rs"]
974mod tests;