Skip to main content

texform_core/parse/
context.rs

1//! Parse context that owns a per-instance immutable knowledge base.
2//!
3//! [`ParseContext`] is the primary public API surface for freezing a knowledge
4//! base and parsing LaTeX formulas with a stable package-backed view.
5//!
6//! The module also defines the shared output types ([`ParseResult`],
7//! [`ParseDiagnostic`]) used by every parse entry point.
8
9use std::collections::HashSet;
10use std::sync::{Arc, OnceLock};
11
12use chumsky::prelude::*;
13use logos::Logos;
14use serde::Serialize;
15pub use texform_argspec::ArgSpecParseError;
16pub use texform_interface::syntax_node::ContentMode;
17use texform_knowledge::builtin::PackageName;
18pub use texform_knowledge::specs::{
19    ActiveCharacterRecord, ActiveCommandRecord, ActiveDelimiterRecord, ActiveEnvironmentRecord,
20    AllowedMode, CommandKind,
21};
22
23use crate::document::Document;
24pub use crate::knowledge::KnowledgeBase;
25pub use crate::knowledge::PackageLoadError;
26use crate::knowledge::default_package_names;
27use crate::lexer::Token;
28use crate::parse::grammar::{self, TokenStream, TrackedNode, build_token_stream};
29use crate::parse::{ParseConfig, ParserState};
30
31type LexedSource = Vec<(Token, std::ops::Range<usize>)>;
32
33// Diagnostic kind is propagated through two independent channels because chumsky
34// may discard context labels during error merging/deduplication, while Custom
35// message strings survive intact.  The context-label channel is the primary one
36// (cheaper to attach), and the message-prefix channel acts as a fallback.
37const DIAGNOSTIC_KIND_CONTEXT_PREFIX: &str = "__texform_diagnostic_kind:";
38const DIAGNOSTIC_KIND_MESSAGE_PREFIX: &str = "\x1etexform-kind:";
39const DIAGNOSTIC_KIND_MESSAGE_SEPARATOR: char = '\x1e';
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
42#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
43#[serde(rename_all = "kebab-case")]
44pub enum ParseDiagnosticKind {
45    AmbiguousInfix,
46    ArgumentValidation,
47    CommandModeError,
48    CommentTruncatedArgument,
49    EnvironmentModeError,
50    EnvironmentNameMismatch,
51    LeftRightDelimiter,
52    MaxGroupDepthExceeded,
53    RawExpectedFound,
54    TextScriptError,
55    UnclosedInlineMath,
56    UnexpectedMathShift,
57    UnknownCommand,
58    UnknownEnvironment,
59}
60
61impl ParseDiagnosticKind {
62    pub const fn as_str(self) -> &'static str {
63        match self {
64            ParseDiagnosticKind::AmbiguousInfix => "ambiguous-infix",
65            ParseDiagnosticKind::ArgumentValidation => "argument-validation",
66            ParseDiagnosticKind::CommandModeError => "command-mode-error",
67            ParseDiagnosticKind::CommentTruncatedArgument => "comment-truncated-argument",
68            ParseDiagnosticKind::EnvironmentModeError => "environment-mode-error",
69            ParseDiagnosticKind::EnvironmentNameMismatch => "environment-name-mismatch",
70            ParseDiagnosticKind::LeftRightDelimiter => "left-right-delimiter",
71            ParseDiagnosticKind::MaxGroupDepthExceeded => "max-group-depth-exceeded",
72            ParseDiagnosticKind::RawExpectedFound => "raw-expected-found",
73            ParseDiagnosticKind::TextScriptError => "text-script-error",
74            ParseDiagnosticKind::UnclosedInlineMath => "unclosed-inline-math",
75            ParseDiagnosticKind::UnexpectedMathShift => "unexpected-math-shift",
76            ParseDiagnosticKind::UnknownCommand => "unknown-command",
77            ParseDiagnosticKind::UnknownEnvironment => "unknown-environment",
78        }
79    }
80
81    pub(crate) fn from_str(s: &str) -> Option<Self> {
82        match s {
83            "ambiguous-infix" => Some(Self::AmbiguousInfix),
84            "argument-validation" => Some(Self::ArgumentValidation),
85            "command-mode-error" => Some(Self::CommandModeError),
86            "comment-truncated-argument" => Some(Self::CommentTruncatedArgument),
87            "environment-mode-error" => Some(Self::EnvironmentModeError),
88            "environment-name-mismatch" => Some(Self::EnvironmentNameMismatch),
89            "left-right-delimiter" => Some(Self::LeftRightDelimiter),
90            "max-group-depth-exceeded" => Some(Self::MaxGroupDepthExceeded),
91            "raw-expected-found" => Some(Self::RawExpectedFound),
92            "text-script-error" => Some(Self::TextScriptError),
93            "unclosed-inline-math" => Some(Self::UnclosedInlineMath),
94            "unexpected-math-shift" => Some(Self::UnexpectedMathShift),
95            "unknown-command" => Some(Self::UnknownCommand),
96            "unknown-environment" => Some(Self::UnknownEnvironment),
97            _ => None,
98        }
99    }
100
101    pub(crate) fn context_label(self) -> String {
102        format!("{DIAGNOSTIC_KIND_CONTEXT_PREFIX}{}", self.as_str())
103    }
104
105    pub(crate) fn from_context_label(label: &str) -> Option<Self> {
106        Self::from_str(label.strip_prefix(DIAGNOSTIC_KIND_CONTEXT_PREFIX)?)
107    }
108
109    pub(crate) fn wrap_message(self, message: impl AsRef<str>) -> String {
110        format!(
111            "{DIAGNOSTIC_KIND_MESSAGE_PREFIX}{}{DIAGNOSTIC_KIND_MESSAGE_SEPARATOR}{}",
112            self.as_str(),
113            message.as_ref()
114        )
115    }
116
117    pub(crate) fn split_message(message: &str) -> (Option<Self>, &str) {
118        let Some(rest) = message.strip_prefix(DIAGNOSTIC_KIND_MESSAGE_PREFIX) else {
119            return (None, message);
120        };
121        let Some((kind, public_message)) = rest.split_once(DIAGNOSTIC_KIND_MESSAGE_SEPARATOR)
122        else {
123            return (None, message);
124        };
125        (Self::from_str(kind), public_message)
126    }
127}
128
129fn lex_source(src: &str) -> LexedSource {
130    Token::lexer(src)
131        .spanned()
132        .map(|(token, span)| {
133            let token = token.unwrap_or_else(|()| {
134                panic!("Lexer error at byte offset {}..{}", span.start, span.end)
135            });
136            (token, span)
137        })
138        .collect()
139}
140
141/// A runtime-injectable definition that augments the knowledge base.
142///
143/// Context items let callers add temporary commands, environments, or
144/// delimiter controls without modifying the underlying package specs.
145#[derive(Debug, Clone, PartialEq, Eq)]
146pub enum ContextItem {
147    /// A command definition (prefix, infix, or declarative)
148    Command(CommandItem),
149    /// An environment definition
150    Environment(EnvironmentItem),
151    /// A delimiter control sequence (e.g. `langle`, `rangle`)
152    DelimiterControl(DelimiterControlItem),
153}
154
155impl ContextItem {
156    /// Return the name of the underlying item (command name, env name, etc.)
157    pub fn name(&self) -> &str {
158        match self {
159            ContextItem::Command(item) => item.name.as_str(),
160            ContextItem::Environment(item) => item.name.as_str(),
161            ContextItem::DelimiterControl(item) => item.name.as_str(),
162        }
163    }
164
165    /// Human-readable tag for error messages (`"command"`, `"environment"`, etc.)
166    pub const fn target_tag(&self) -> &'static str {
167        match self {
168            ContextItem::Command(_) => "command",
169            ContextItem::Environment(_) => "environment",
170            ContextItem::DelimiterControl(_) => "delimiter control",
171        }
172    }
173}
174
175/// Runtime command definition to be injected into a [`ParseContext`].
176///
177/// The `spec` field uses the xparse-style argument specification string
178/// (e.g. `"m m"` for two mandatory args, `"s o m"` for star + optional + mandatory).
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct CommandItem {
181    /// Command name without leading backslash
182    pub name: String,
183    /// Prefix, infix, or declarative
184    pub kind: CommandKind,
185    /// Which content modes this command may appear in
186    pub allowed_mode: AllowedMode,
187    /// xparse-style argument specification string
188    pub spec: String,
189    /// Metadata tags for transform-stage filtering
190    pub tags: Vec<String>,
191}
192
193impl CommandItem {
194    /// Create a command item with no tags.
195    pub fn new(
196        name: impl Into<String>,
197        kind: CommandKind,
198        allowed_mode: AllowedMode,
199        spec: impl Into<String>,
200    ) -> Self {
201        Self {
202            name: name.into(),
203            kind,
204            allowed_mode,
205            spec: spec.into(),
206            tags: Vec::new(),
207        }
208    }
209
210    /// Builder method to attach metadata tags.
211    pub fn with_tags<I, T>(mut self, tags: I) -> Self
212    where
213        I: IntoIterator<Item = T>,
214        T: Into<String>,
215    {
216        self.tags = tags.into_iter().map(Into::into).collect();
217        self
218    }
219}
220
221/// Runtime environment definition to be injected into a [`ParseContext`].
222#[derive(Debug, Clone, PartialEq, Eq)]
223pub struct EnvironmentItem {
224    /// Environment name (e.g. `"matrix"`, `"align"`)
225    pub name: String,
226    /// Which content modes this environment may appear in
227    pub allowed_mode: AllowedMode,
228    /// Content mode used to parse the environment body
229    pub body_mode: ContentMode,
230    /// xparse-style argument specification string
231    pub spec: String,
232    /// Metadata tags for transform-stage filtering
233    pub tags: Vec<String>,
234}
235
236impl EnvironmentItem {
237    /// Create an environment item with no tags.
238    pub fn new(
239        name: impl Into<String>,
240        allowed_mode: AllowedMode,
241        body_mode: ContentMode,
242        spec: impl Into<String>,
243    ) -> Self {
244        Self {
245            name: name.into(),
246            allowed_mode,
247            body_mode,
248            spec: spec.into(),
249            tags: Vec::new(),
250        }
251    }
252
253    /// Builder method to attach metadata tags.
254    pub fn with_tags<I, T>(mut self, tags: I) -> Self
255    where
256        I: IntoIterator<Item = T>,
257        T: Into<String>,
258    {
259        self.tags = tags.into_iter().map(Into::into).collect();
260        self
261    }
262}
263
264/// Runtime delimiter control sequence to be registered in the knowledge base.
265///
266/// Delimiter controls are names (without backslash) that may appear after
267/// `\left` / `\right` or in delimiter-typed argument slots (e.g. `langle`,
268/// `rangle`, `|`).
269#[derive(Debug, Clone, PartialEq, Eq)]
270pub struct DelimiterControlItem {
271    /// Delimiter name without leading backslash
272    pub name: String,
273}
274
275impl DelimiterControlItem {
276    /// Create a delimiter control item.
277    pub fn new(name: impl Into<String>) -> Self {
278        Self { name: name.into() }
279    }
280}
281
282impl From<CommandItem> for ContextItem {
283    fn from(item: CommandItem) -> Self {
284        ContextItem::Command(item)
285    }
286}
287
288impl From<EnvironmentItem> for ContextItem {
289    fn from(item: EnvironmentItem) -> Self {
290        ContextItem::Environment(item)
291    }
292}
293
294impl From<DelimiterControlItem> for ContextItem {
295    fn from(item: DelimiterControlItem) -> Self {
296        ContextItem::DelimiterControl(item)
297    }
298}
299
300#[derive(Debug, Default, Clone, PartialEq, Eq)]
301pub struct MutationSummary {
302    pub touched_commands: HashSet<String>,
303    pub touched_environments: HashSet<String>,
304}
305
306enum BuilderOp {
307    Insert(ContextItem),
308    RemoveCommand(String),
309    RemoveEnvironment(String),
310    RemoveDelimiterControl(String),
311}
312
313fn record_insert(summary: &mut MutationSummary, item: &ContextItem) {
314    match item {
315        ContextItem::Command(command) => {
316            summary.touched_commands.insert(command.name.clone());
317        }
318        ContextItem::Environment(environment) => {
319            summary
320                .touched_environments
321                .insert(environment.name.clone());
322        }
323        ContextItem::DelimiterControl(_) => {}
324    }
325}
326
327#[derive(Debug)]
328pub enum ParseContextBuildError {
329    PackageLoad(PackageLoadError),
330    InvalidContextItem {
331        name: String,
332        source: ArgSpecParseError,
333    },
334}
335
336enum KnowledgeBaseMode {
337    DefaultPackages,
338    Packages(Vec<String>),
339    Empty,
340}
341
342pub struct ParseContextBuilder {
343    mode: KnowledgeBaseMode,
344    ops: Vec<BuilderOp>,
345}
346
347impl ParseContextBuilder {
348    pub fn empty() -> Self {
349        Self {
350            mode: KnowledgeBaseMode::Empty,
351            ops: Vec::new(),
352        }
353    }
354
355    pub fn empty_knowledge(self) -> Self {
356        Self::empty()
357    }
358
359    pub fn packages(mut self, packages: &[&str]) -> Self {
360        self.mode =
361            KnowledgeBaseMode::Packages(packages.iter().map(|name| (*name).to_string()).collect());
362        self
363    }
364
365    pub fn insert_item(mut self, item: impl Into<ContextItem>) -> Self {
366        self.ops.push(BuilderOp::Insert(item.into()));
367        self
368    }
369
370    pub fn remove_command(mut self, name: impl Into<String>) -> Self {
371        self.ops.push(BuilderOp::RemoveCommand(name.into()));
372        self
373    }
374
375    pub fn remove_environment(mut self, name: impl Into<String>) -> Self {
376        self.ops.push(BuilderOp::RemoveEnvironment(name.into()));
377        self
378    }
379
380    pub fn remove_delimiter_control(mut self, name: impl Into<String>) -> Self {
381        self.ops
382            .push(BuilderOp::RemoveDelimiterControl(name.into()));
383        self
384    }
385
386    pub fn build(self) -> Result<ParseContext, ParseContextBuildError> {
387        let (mut math_kb, mut text_kb, enabled_packages) = match self.mode {
388            KnowledgeBaseMode::Empty => {
389                (KnowledgeBase::empty(), KnowledgeBase::empty(), Vec::new())
390            }
391            KnowledgeBaseMode::DefaultPackages => {
392                let refs = default_package_names().to_vec();
393                let enabled_packages = canonical_enabled_package_names(refs.as_slice())?;
394                let math_kb = KnowledgeBase::try_build_from_packages_for_mode(
395                    refs.as_slice(),
396                    ContentMode::Math,
397                )
398                .map_err(ParseContextBuildError::PackageLoad)?;
399                let text_kb = KnowledgeBase::try_build_from_packages_for_mode(
400                    refs.as_slice(),
401                    ContentMode::Text,
402                )
403                .map_err(ParseContextBuildError::PackageLoad)?;
404
405                (math_kb, text_kb, enabled_packages)
406            }
407            KnowledgeBaseMode::Packages(packages) => {
408                let refs = packages.iter().map(String::as_str).collect::<Vec<_>>();
409                let enabled_packages = canonical_enabled_package_names(refs.as_slice())?;
410                (
411                    KnowledgeBase::try_build_from_packages_for_mode(
412                        refs.as_slice(),
413                        ContentMode::Math,
414                    )
415                    .map_err(ParseContextBuildError::PackageLoad)?,
416                    KnowledgeBase::try_build_from_packages_for_mode(
417                        refs.as_slice(),
418                        ContentMode::Text,
419                    )
420                    .map_err(ParseContextBuildError::PackageLoad)?,
421                    enabled_packages,
422                )
423            }
424        };
425
426        let mut mutation_summary = MutationSummary::default();
427
428        for op in self.ops {
429            match op {
430                BuilderOp::Insert(item) => {
431                    record_insert(&mut mutation_summary, &item);
432                    insert_item_into_lane(&mut math_kb, &item, ContentMode::Math).map_err(
433                        |source| ParseContextBuildError::InvalidContextItem {
434                            name: item.name().to_string(),
435                            source,
436                        },
437                    )?;
438                    insert_item_into_lane(&mut text_kb, &item, ContentMode::Text).map_err(
439                        |source| ParseContextBuildError::InvalidContextItem {
440                            name: item.name().to_string(),
441                            source,
442                        },
443                    )?;
444                }
445                BuilderOp::RemoveCommand(name) => {
446                    mutation_summary.touched_commands.insert(name.clone());
447                    math_kb.remove_command_by_name(name.as_str());
448                    text_kb.remove_command_by_name(name.as_str());
449                }
450                BuilderOp::RemoveEnvironment(name) => {
451                    mutation_summary.touched_environments.insert(name.clone());
452                    math_kb.remove_environment_by_name(name.as_str());
453                    text_kb.remove_environment_by_name(name.as_str());
454                }
455                BuilderOp::RemoveDelimiterControl(name) => {
456                    let item = DelimiterControlItem::new(name);
457                    math_kb.remove_item(item.clone());
458                    text_kb.remove_item(item);
459                }
460            }
461        }
462
463        Ok(ParseContext::from_parts(
464            math_kb,
465            text_kb,
466            mutation_summary,
467            enabled_packages,
468        ))
469    }
470}
471
472fn canonical_enabled_package_names(
473    requested: &[&str],
474) -> Result<Vec<PackageName>, ParseContextBuildError> {
475    let mut packages = Vec::new();
476    for package in texform_knowledge::builtin::MANAGED_PACKAGE_IMPORT_ORDER {
477        if requested.contains(&package.as_str()) {
478            packages.push(*package);
479        }
480    }
481
482    for requested_name in requested {
483        if PackageName::from_str(requested_name).is_none() {
484            return Err(ParseContextBuildError::PackageLoad(
485                PackageLoadError::UnknownPackage {
486                    name: (*requested_name).to_string(),
487                },
488            ));
489        }
490    }
491
492    Ok(packages)
493}
494
495fn insert_item_into_lane(
496    kb: &mut KnowledgeBase,
497    item: &ContextItem,
498    mode: ContentMode,
499) -> Result<(), ArgSpecParseError> {
500    match item {
501        ContextItem::Command(command) => {
502            if command.allowed_mode.allows(mode) {
503                kb.insert_item(command.clone())?;
504            }
505            Ok(())
506        }
507        ContextItem::Environment(environment) => {
508            if environment.allowed_mode.allows(mode) {
509                kb.insert_item(environment.clone())?;
510            }
511            Ok(())
512        }
513        ContextItem::DelimiterControl(item) => kb.insert_item(item.clone()),
514    }
515}
516
517impl Default for ParseContextBuilder {
518    fn default() -> Self {
519        Self {
520            mode: KnowledgeBaseMode::DefaultPackages,
521            ops: Vec::new(),
522        }
523    }
524}
525
526/// Byte-offset span within the original source string.
527#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
528#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
529pub struct Span {
530    /// Inclusive start byte offset
531    pub start: usize,
532    /// Exclusive end byte offset
533    pub end: usize,
534}
535
536/// Additional source span attached to a diagnostic.
537#[derive(Debug, Clone, Serialize)]
538#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
539pub struct ParseDiagnosticContext {
540    /// Human-readable label for this related span
541    pub label: String,
542    /// Source location referenced by the label
543    pub span: Span,
544}
545
546/// Unified parse result carrying an optional document and zero or more diagnostics.
547///
548/// The design mirrors chumsky's `output + errors` semantics: a partial document
549/// may coexist with diagnostics, so consumers always receive as much
550/// information as the parser could extract.
551#[derive(Debug, Clone)]
552pub struct ParseResult {
553    /// Parsed document, present even when diagnostics exist for recovered input.
554    pub document: Option<Document>,
555    /// Zero or more diagnostics; empty on full success.
556    pub diagnostics: Vec<ParseDiagnostic>,
557}
558
559impl ParseResult {
560    /// Borrow the parsed document, if one was produced.
561    pub fn document(&self) -> Option<&Document> {
562        self.document.as_ref()
563    }
564
565    /// Borrow parse diagnostics.
566    pub fn diagnostics(&self) -> &[ParseDiagnostic] {
567        self.diagnostics.as_slice()
568    }
569
570    /// Consume the result and return only diagnostics.
571    pub fn into_diagnostics(self) -> Vec<ParseDiagnostic> {
572        self.diagnostics
573    }
574
575    /// `true` when a recovered document contains one or more `Error` nodes.
576    pub fn has_errors(&self) -> bool {
577        self.document.as_ref().is_some_and(Document::has_errors)
578    }
579
580    /// Return the document and diagnostics when the document is editable.
581    pub fn try_into_document(self) -> Result<(Document, Vec<ParseDiagnostic>), ParseError> {
582        match (self.document, self.diagnostics) {
583            (Some(document), diagnostics) if !document.has_errors() => Ok((document, diagnostics)),
584            (document, diagnostics) => Err(ParseError {
585                diagnostics,
586                document: document.map(Box::new),
587            }),
588        }
589    }
590
591    /// Consume the result into its two public parts.
592    pub fn into_parts(self) -> (Option<Document>, Vec<ParseDiagnostic>) {
593        (self.document, self.diagnostics)
594    }
595}
596
597/// A single diagnostic produced during parsing.
598///
599/// Diagnostics carry both a human-readable message and structured
600/// expected/found information for richer error reporting.
601#[derive(Debug, Clone, Serialize)]
602#[cfg_attr(feature = "tsify", derive(tsify_next::Tsify))]
603#[non_exhaustive]
604pub struct ParseDiagnostic {
605    /// Stable machine-readable diagnostic kind, when available
606    pub kind: Option<ParseDiagnosticKind>,
607    /// Human-readable error description
608    pub message: String,
609    /// Source location of the error
610    pub span: Span,
611    /// Tokens or patterns the parser expected at this point
612    pub expected: Vec<String>,
613    /// Token actually found, if any
614    pub found: Option<String>,
615    /// Additional related source ranges for richer diagnostics
616    pub contexts: Vec<ParseDiagnosticContext>,
617}
618
619impl ParseDiagnostic {
620    pub fn new(
621        message: impl Into<String>,
622        span: Span,
623        expected: Vec<String>,
624        found: Option<String>,
625        contexts: Vec<ParseDiagnosticContext>,
626    ) -> Self {
627        Self {
628            kind: None,
629            message: message.into(),
630            span,
631            expected,
632            found,
633            contexts,
634        }
635    }
636}
637
638#[derive(Debug, Clone)]
639pub struct ParseError {
640    pub diagnostics: Vec<ParseDiagnostic>,
641    pub document: Option<Box<Document>>,
642}
643
644impl ParseError {
645    pub fn diagnostics(&self) -> &[ParseDiagnostic] {
646        self.diagnostics.as_slice()
647    }
648
649    pub fn document(&self) -> Option<&Document> {
650        self.document.as_deref()
651    }
652
653    pub fn into_diagnostics(self) -> Vec<ParseDiagnostic> {
654        self.diagnostics
655    }
656
657    pub fn into_parts(self) -> (Option<Document>, Vec<ParseDiagnostic>) {
658        (self.document.map(|document| *document), self.diagnostics)
659    }
660}
661
662impl std::fmt::Display for ParseError {
663    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
664        if self.document.is_some() {
665            f.write_str("parse produced an incomplete document")
666        } else {
667            f.write_str("parse produced no document")
668        }
669    }
670}
671
672impl std::error::Error for ParseError {}
673
674/// Immutable parse context owning an isolated knowledge base.
675///
676/// A `ParseContext` is the main integration surface for callers that need to
677/// freeze a fully-built knowledge base, query metadata, and parse LaTeX
678/// formulas repeatedly.
679///
680/// # Construction
681///
682/// | Constructor | Loaded knowledge |
683/// |---|---|
684/// | [`empty()`](Self::empty) | Nothing |
685/// | [`from_packages()`](Self::from_packages) | Named packages only |
686/// | `Default::default()` | Default runtime packages |
687/// | [`shared()`](Self::shared) | Same as above, lazily cached `&'static` ref |
688///
689#[derive(Clone)]
690pub struct ParseContext {
691    math_kb: Arc<KnowledgeBase>,
692    text_kb: Arc<KnowledgeBase>,
693    mutation_summary: MutationSummary,
694    enabled_packages: Vec<PackageName>,
695}
696
697impl std::fmt::Debug for ParseContext {
698    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
699        f.debug_struct("ParseContext")
700            .field("math_kb", &self.math_kb)
701            .field("text_kb", &self.text_kb)
702            .field("enabled_packages", &self.enabled_packages)
703            .finish_non_exhaustive()
704    }
705}
706
707impl Default for ParseContext {
708    fn default() -> Self {
709        ParseContextBuilder::default()
710            .build()
711            .expect("default parse context should build")
712    }
713}
714
715impl ParseContext {
716    pub fn builder() -> ParseContextBuilder {
717        ParseContextBuilder::default()
718    }
719
720    pub(crate) fn from_parts(
721        math_kb: KnowledgeBase,
722        text_kb: KnowledgeBase,
723        mutation_summary: MutationSummary,
724        enabled_packages: Vec<PackageName>,
725    ) -> Self {
726        ParseContext {
727            math_kb: Arc::new(math_kb),
728            text_kb: Arc::new(text_kb),
729            mutation_summary,
730            enabled_packages,
731        }
732    }
733
734    pub fn mutation_summary(&self) -> &MutationSummary {
735        &self.mutation_summary
736    }
737
738    pub fn enabled_packages(&self) -> &[PackageName] {
739        self.enabled_packages.as_slice()
740    }
741
742    pub fn has_enabled_package(&self, package: PackageName) -> bool {
743        self.enabled_packages.contains(&package)
744    }
745
746    /// Build an empty context with no package specs loaded.
747    ///
748    /// Useful as a blank slate when every definition will be injected manually.
749    pub fn empty() -> Self {
750        ParseContextBuilder::empty()
751            .build()
752            .expect("empty parse context should build")
753    }
754
755    /// Build a context from an explicit list of package names.
756    /// The listed packages are imported in canonical order.
757    ///
758    /// # Panics
759    ///
760    /// Panics if any package name is unrecognized. Use [`try_from_packages`](Self::try_from_packages)
761    /// for fallible loading.
762    pub fn from_packages(packages: &[&str]) -> Self {
763        ParseContextBuilder::empty()
764            .packages(packages)
765            .build()
766            .expect("package parse context should build")
767    }
768
769    /// Fallible variant of [`from_packages`](Self::from_packages).
770    ///
771    /// Returns [`PackageLoadError`] instead of panicking when a package name
772    /// is unrecognized.
773    pub fn try_from_packages(packages: &[&str]) -> Result<Self, PackageLoadError> {
774        ParseContextBuilder::empty()
775            .packages(packages)
776            .build()
777            .map_err(|error| match error {
778                ParseContextBuildError::PackageLoad(error) => error,
779                ParseContextBuildError::InvalidContextItem { .. } => {
780                    panic!("try_from_packages should not hit invalid context item")
781                }
782            })
783    }
784
785    /// Borrow the lazily-initialized default-package context.
786    ///
787    /// This is the cheapest way to parse with the default knowledge base: the
788    /// context is built once on first call and shared for the process lifetime.
789    pub fn shared() -> &'static ParseContext {
790        shared_parser()
791    }
792
793    /// Check whether `name` is a registered delimiter control sequence.
794    pub fn is_delimiter_control(&self, name: &str) -> bool {
795        self.math_kb.is_delimiter_control(name) || self.text_kb.is_delimiter_control(name)
796    }
797
798    /// Look up a delimiter control by name, returning the interned name.
799    pub fn lookup_delimiter_control(&self, name: &str) -> Option<&'static str> {
800        self.math_kb
801            .lookup_delimiter_control(name)
802            .or_else(|| self.text_kb.lookup_delimiter_control(name))
803    }
804
805    pub fn lookup_delimiter(
806        &self,
807        name: &str,
808        is_control_sequence: bool,
809        mode: ContentMode,
810    ) -> Option<&ActiveDelimiterRecord> {
811        self.kb_for(mode)
812            .lookup_delimiter(name, is_control_sequence)
813    }
814
815    /// Parse a LaTeX formula and return a unified output.
816    ///
817    /// Uses chumsky's output+errors semantics so that a partial syntax tree
818    /// can coexist with diagnostics.
819    pub fn parse(&self, src: &str, config: &ParseConfig) -> ParseResult {
820        parse_with_context(self, src, config)
821    }
822
823    /// Look up the active command metadata for `name`.
824    ///
825    /// The active entry may come from an explicit command definition or a
826    /// character-derived zero-arg view. Returns `None` if the name is unknown
827    /// or has been suppressed.
828    pub fn kb_for(&self, mode: ContentMode) -> &KnowledgeBase {
829        match mode {
830            ContentMode::Math => self.math_kb.as_ref(),
831            ContentMode::Text => self.text_kb.as_ref(),
832        }
833    }
834
835    pub fn math_kb(&self) -> &KnowledgeBase {
836        self.math_kb.as_ref()
837    }
838
839    pub fn text_kb(&self) -> &KnowledgeBase {
840        self.text_kb.as_ref()
841    }
842
843    /// Look up the active command metadata for `name` in the selected lane.
844    pub fn lookup_command(&self, name: &str, mode: ContentMode) -> Option<&ActiveCommandRecord> {
845        self.kb_for(mode).lookup_command(name)
846    }
847
848    /// Look up only the explicit (non-character-derived) command for `name`.
849    pub fn lookup_explicit_command(
850        &self,
851        name: &str,
852        mode: ContentMode,
853    ) -> Option<&ActiveCommandRecord> {
854        self.kb_for(mode).lookup_explicit_command(name)
855    }
856
857    /// Look up character metadata for a control sequence name.
858    pub fn lookup_character(
859        &self,
860        name: &str,
861        mode: ContentMode,
862    ) -> Option<&ActiveCharacterRecord> {
863        self.kb_for(mode).lookup_character(name)
864    }
865
866    /// Look up environment metadata by name.
867    pub fn lookup_env(&self, name: &str, mode: ContentMode) -> Option<&ActiveEnvironmentRecord> {
868        self.kb_for(mode).lookup_env(name)
869    }
870
871    pub fn knows_command_name(&self, name: &str) -> bool {
872        self.knows_command_name_in(name, ContentMode::Math)
873            || self.knows_command_name_in(name, ContentMode::Text)
874    }
875
876    pub fn knows_env_name(&self, name: &str) -> bool {
877        self.knows_env_name_in(name, ContentMode::Math)
878            || self.knows_env_name_in(name, ContentMode::Text)
879    }
880
881    pub fn knows_character_name(&self, name: &str) -> bool {
882        self.knows_character_name_in(name, ContentMode::Math)
883            || self.knows_character_name_in(name, ContentMode::Text)
884    }
885
886    fn knows_command_name_in(&self, name: &str, mode: ContentMode) -> bool {
887        self.lookup_command(name, mode).is_some()
888    }
889
890    fn knows_env_name_in(&self, name: &str, mode: ContentMode) -> bool {
891        self.lookup_env(name, mode).is_some()
892    }
893
894    fn knows_character_name_in(&self, name: &str, mode: ContentMode) -> bool {
895        self.lookup_character(name, mode).is_some()
896    }
897}
898
899fn shared_parser() -> &'static ParseContext {
900    static DEFAULT: OnceLock<ParseContext> = OnceLock::new();
901    DEFAULT.get_or_init(ParseContext::default)
902}
903
904pub(crate) fn parse_with_context(
905    ctx: &ParseContext,
906    src: &str,
907    config: &ParseConfig,
908) -> ParseResult {
909    let token_stream = build_token_stream(src);
910    let (output, mut errors) = parse_raw(ctx, src, token_stream, config);
911
912    let document = output.map(|tracked| {
913        let (node, _span, records, diagnostics) = tracked.finish_root();
914        errors.extend(diagnostics);
915        let path_spans: Vec<_> = records
916            .into_iter()
917            .map(|entry| {
918                (
919                    entry.path,
920                    Span {
921                        start: entry.span.start,
922                        end: entry.span.end,
923                    },
924                )
925            })
926            .collect();
927        Document::from_syntax_with_spans(&node, &path_spans)
928            .expect("parser must produce a syntax root accepted by Document")
929    });
930
931    let mut diagnostics: Vec<_> = errors
932        .into_iter()
933        .map(|err| convert_diagnostic(ctx, src, err))
934        .collect();
935    diagnostics.sort_by_key(parse_diagnostic_priority);
936
937    ParseResult {
938        document,
939        diagnostics,
940    }
941}
942
943fn parse_raw(
944    ctx: &ParseContext,
945    src: &str,
946    token_stream: TokenStream<'_>,
947    config: &ParseConfig,
948) -> (Option<TrackedNode>, Vec<Rich<'static, Token>>) {
949    let state = ParserState::new(ctx, config, src);
950    let (output, errors) = grammar::math_block_parser_with_source(&state, src)
951        .then_ignore(end())
952        .parse(token_stream)
953        .into_output_errors();
954
955    // Convert borrowed errors to owned so they outlive the token stream.
956    let mut collected_errors = state.take_recovery_diagnostics();
957    collected_errors.extend(errors.into_iter().map(|e| e.into_owned()));
958    (output, collected_errors)
959}
960
961fn convert_diagnostic(ctx: &ParseContext, src: &str, err: Rich<'static, Token>) -> ParseDiagnostic {
962    let span = {
963        let s = err.span();
964        Span {
965            start: s.start,
966            end: s.end,
967        }
968    };
969
970    let reason = err.reason();
971    let mut kind = None;
972    let contexts = err
973        .contexts()
974        .filter_map(|(label, span)| {
975            let label = format!("{label}");
976            if let Some(context_kind) = ParseDiagnosticKind::from_context_label(label.as_str()) {
977                kind.get_or_insert(context_kind);
978                return None;
979            }
980
981            Some(ParseDiagnosticContext {
982                label,
983                span: Span {
984                    start: span.start,
985                    end: span.end,
986                },
987            })
988        })
989        .collect();
990
991    let (message, expected, found) = match reason {
992        chumsky::error::RichReason::ExpectedFound {
993            expected: exp,
994            found: f,
995        } => {
996            let expected: Vec<String> = exp.iter().map(|p| format!("{p}")).collect();
997            let found = f.as_ref().map(|t| format!("{}", &**t));
998
999            let msg = format!("{reason}");
1000            (msg, expected, found)
1001        }
1002        chumsky::error::RichReason::Custom(msg) => {
1003            let (message_kind, public_message) = ParseDiagnosticKind::split_message(msg.as_str());
1004            if let Some(message_kind) = message_kind {
1005                kind.get_or_insert(message_kind);
1006            }
1007            (public_message.to_string(), Vec::new(), None)
1008        }
1009    };
1010
1011    let mut kind =
1012        kind.or_else(|| infer_raw_diagnostic_kind(expected.as_slice(), found.as_deref()));
1013
1014    let mut diagnostic = ParseDiagnostic {
1015        kind,
1016        message,
1017        span,
1018        expected,
1019        found,
1020        contexts,
1021    };
1022
1023    supplement_comment_truncated_argument(src, &mut kind, &mut diagnostic);
1024    supplement_diagnostic_contexts(ctx, src, kind, &mut diagnostic);
1025    diagnostic
1026}
1027
1028fn parse_diagnostic_priority(diagnostic: &ParseDiagnostic) -> u8 {
1029    match diagnostic.kind {
1030        Some(
1031            ParseDiagnosticKind::UnknownCommand
1032            | ParseDiagnosticKind::UnknownEnvironment
1033            | ParseDiagnosticKind::CommentTruncatedArgument
1034            | ParseDiagnosticKind::UnexpectedMathShift
1035            | ParseDiagnosticKind::LeftRightDelimiter
1036            | ParseDiagnosticKind::AmbiguousInfix,
1037        ) => 1,
1038        Some(ParseDiagnosticKind::ArgumentValidation) => 2,
1039        Some(ParseDiagnosticKind::EnvironmentNameMismatch) => 2,
1040        Some(ParseDiagnosticKind::RawExpectedFound)
1041            if diagnostic
1042                .message
1043                .starts_with("found end of input expected ") =>
1044        {
1045            3
1046        }
1047        Some(ParseDiagnosticKind::RawExpectedFound) => 4,
1048        Some(_) | None => 2,
1049    }
1050}
1051
1052/// Best-effort fallback for chumsky-generated `ExpectedFound` errors that carry
1053/// no explicit `ParseDiagnosticKind`.  The heuristics here match the token
1054/// patterns that chumsky emits for known parser structures (e.g. `}` from an
1055/// environment-name mismatch, `\begin` from an unknown environment).
1056fn infer_raw_diagnostic_kind(
1057    expected: &[String],
1058    found: Option<&str>,
1059) -> Option<ParseDiagnosticKind> {
1060    if expected.iter().any(|pattern| pattern == "'$'")
1061        && matches!(found, None | Some("$") | Some("\\text"))
1062    {
1063        return Some(ParseDiagnosticKind::UnclosedInlineMath);
1064    }
1065
1066    match found {
1067        Some("$") => Some(ParseDiagnosticKind::UnexpectedMathShift),
1068        Some("}") => Some(ParseDiagnosticKind::EnvironmentNameMismatch),
1069        Some("\\begin") => Some(ParseDiagnosticKind::UnknownEnvironment),
1070        Some(_) if !expected.is_empty() => Some(ParseDiagnosticKind::RawExpectedFound),
1071        None if !expected.is_empty() => Some(ParseDiagnosticKind::RawExpectedFound),
1072        Some(_) | None => None,
1073    }
1074}
1075
1076fn supplement_diagnostic_contexts(
1077    ctx: &ParseContext,
1078    src: &str,
1079    kind: Option<ParseDiagnosticKind>,
1080    diagnostic: &mut ParseDiagnostic,
1081) {
1082    let mut lexed = None;
1083
1084    supplement_unclosed_inline_math_message(kind, src, diagnostic);
1085    supplement_unexpected_math_shift_message(kind, src, diagnostic);
1086    supplement_generic_unclosed_message(kind, src, diagnostic);
1087    supplement_environment_mode_error_message(kind, ctx, src, &mut lexed, diagnostic);
1088    supplement_environment_mismatch_message(kind, src, &mut lexed, diagnostic);
1089    supplement_unknown_environment_message(kind, ctx, src, &mut lexed, diagnostic);
1090    supplement_inner_content_error_span(kind, src, &mut lexed, diagnostic);
1091    supplement_argument_validation_span(kind, src, &mut lexed, diagnostic);
1092
1093    let needs_left_context = kind == Some(ParseDiagnosticKind::LeftRightDelimiter);
1094    if !needs_left_context {
1095        return;
1096    }
1097
1098    let Some((left_span, env_span)) =
1099        find_invalid_left_context(ctx, lexed.get_or_insert_with(|| lex_source(src)))
1100    else {
1101        return;
1102    };
1103
1104    if !diagnostic
1105        .contexts
1106        .iter()
1107        .any(|context| context.label == "left-delimited group")
1108    {
1109        diagnostic.contexts.push(ParseDiagnosticContext {
1110            label: "left-delimited group".to_string(),
1111            span: left_span,
1112        });
1113    }
1114
1115    if let Some(env_span) = env_span
1116        && !diagnostic
1117            .contexts
1118            .iter()
1119            .any(|context| context.label == "environment body")
1120    {
1121        diagnostic.contexts.push(ParseDiagnosticContext {
1122            label: "environment body".to_string(),
1123            span: env_span,
1124        });
1125    }
1126}
1127
1128/// Normalize the lone inline-math opener message so recoverable content
1129/// subparses report the same generic tail error shape as the top-level parser.
1130fn supplement_unclosed_inline_math_message(
1131    kind: Option<ParseDiagnosticKind>,
1132    src: &str,
1133    diagnostic: &mut ParseDiagnostic,
1134) {
1135    if kind != Some(ParseDiagnosticKind::UnclosedInlineMath) {
1136        return;
1137    }
1138
1139    diagnostic.message = "found '$' expected something else, or end of input".to_string();
1140    if diagnostic.expected.iter().any(|value| value == "'$'") {
1141        diagnostic.expected = vec!["something else".to_string(), "end of input".to_string()];
1142    }
1143    if diagnostic.found.as_deref() == Some("\\text")
1144        && let Some(span) = find_inline_math_shift_after_command(src, diagnostic.span.clone())
1145    {
1146        diagnostic.span = span;
1147        diagnostic.found = Some("$".to_string());
1148    }
1149}
1150
1151fn supplement_comment_truncated_argument(
1152    src: &str,
1153    kind: &mut Option<ParseDiagnosticKind>,
1154    diagnostic: &mut ParseDiagnostic,
1155) {
1156    if !matches!(
1157        *kind,
1158        Some(ParseDiagnosticKind::ArgumentValidation | ParseDiagnosticKind::RawExpectedFound)
1159            | None
1160    ) {
1161        return;
1162    }
1163
1164    if !matches!(
1165        diagnostic.message.as_str(),
1166        "unclosed brace argument" | "unclosed bracket argument" | "unclosed delimited argument"
1167    ) && !diagnostic
1168        .message
1169        .starts_with("found end of input expected ")
1170    {
1171        return;
1172    }
1173
1174    let tail_span = Span {
1175        start: diagnostic.span.start,
1176        end: src.len(),
1177    };
1178    let candidate_spans = std::iter::once(diagnostic.span.clone())
1179        .chain(std::iter::once(tail_span))
1180        .chain(
1181            diagnostic
1182                .contexts
1183                .iter()
1184                .filter(|context| context.label.contains("argument"))
1185                .map(|context| context.span.clone()),
1186        );
1187
1188    if !candidate_spans
1189        .filter_map(|span| src.get(span.start..span.end))
1190        .any(has_unescaped_percent)
1191    {
1192        return;
1193    }
1194
1195    *kind = Some(ParseDiagnosticKind::CommentTruncatedArgument);
1196    diagnostic.kind = *kind;
1197    diagnostic.message = "Unescaped % starts a comment inside this argument".to_string();
1198    diagnostic.expected.clear();
1199    diagnostic.found = None;
1200}
1201
1202fn has_unescaped_percent(slice: &str) -> bool {
1203    let mut escaped = false;
1204    for ch in slice.chars() {
1205        if escaped {
1206            escaped = false;
1207            continue;
1208        }
1209        if ch == '\\' {
1210            escaped = true;
1211            continue;
1212        }
1213        if ch == '%' {
1214            return true;
1215        }
1216    }
1217    false
1218}
1219
1220fn supplement_unexpected_math_shift_message(
1221    kind: Option<ParseDiagnosticKind>,
1222    src: &str,
1223    diagnostic: &mut ParseDiagnostic,
1224) {
1225    if kind != Some(ParseDiagnosticKind::UnexpectedMathShift) {
1226        return;
1227    }
1228
1229    diagnostic.message = if src
1230        .as_bytes()
1231        .get(diagnostic.span.end)
1232        .is_some_and(u8::is_ascii_digit)
1233    {
1234        "Unexpected $ inside a math formula; it looks like a currency marker".to_string()
1235    } else {
1236        "Unexpected $ inside a math formula".to_string()
1237    };
1238    diagnostic.expected.clear();
1239    diagnostic.found = Some("$".to_string());
1240}
1241
1242fn supplement_generic_unclosed_message(
1243    kind: Option<ParseDiagnosticKind>,
1244    src: &str,
1245    diagnostic: &mut ParseDiagnostic,
1246) {
1247    if kind != Some(ParseDiagnosticKind::RawExpectedFound)
1248        || !diagnostic
1249            .message
1250            .starts_with("found end of input expected ")
1251    {
1252        return;
1253    }
1254
1255    if let Some(argument_context) = diagnostic
1256        .contexts
1257        .iter()
1258        .find(|context| context.label.contains("argument"))
1259        && let Some(command_name) = command_name_before(src, argument_context.span.start)
1260    {
1261        diagnostic.message = format!("Command \\{} has an unclosed argument", command_name);
1262        return;
1263    }
1264
1265    if let Some(env_name) = last_unclosed_environment_name(src) {
1266        diagnostic.message = format!(
1267            "Environment {} missing closing \\end{{{}}}",
1268            env_name, env_name
1269        );
1270        return;
1271    }
1272
1273    if diagnostic
1274        .span
1275        .start
1276        .checked_sub(1)
1277        .and_then(|index| src.as_bytes().get(index))
1278        == Some(&b'{')
1279    {
1280        diagnostic.message = "Unclosed { ... } group".to_string();
1281    }
1282}
1283
1284fn command_name_before(src: &str, offset: usize) -> Option<&str> {
1285    let prefix = src.get(..offset)?;
1286    let slash = prefix.rfind('\\')?;
1287    let rest = prefix.get(slash + 1..)?;
1288    let end = rest
1289        .char_indices()
1290        .find_map(|(index, ch)| (!ch.is_ascii_alphabetic()).then_some(index))
1291        .unwrap_or(rest.len());
1292    (end > 0).then(|| &rest[..end])
1293}
1294
1295fn last_unclosed_environment_name(src: &str) -> Option<String> {
1296    let lexed = lex_source(src);
1297    let mut stack = Vec::new();
1298    let mut index = 0;
1299
1300    while index < lexed.len() {
1301        let Token::ControlSeq(head) = &lexed[index].0 else {
1302            index += 1;
1303            continue;
1304        };
1305        if !matches!(head.as_str(), "begin" | "end") {
1306            index += 1;
1307            continue;
1308        }
1309
1310        let mut next = index + 1;
1311        while matches!(lexed.get(next), Some((Token::Whitespaces, _))) {
1312            next += 1;
1313        }
1314        if !matches!(lexed.get(next), Some((Token::LBrace, _))) {
1315            index += 1;
1316            continue;
1317        }
1318        next += 1;
1319
1320        let mut env_name = String::new();
1321        while let Some((token, _)) = lexed.get(next) {
1322            match token {
1323                Token::Char(ch) => env_name.push(*ch),
1324                Token::Star => env_name.push('*'),
1325                Token::RBrace => break,
1326                _ => {
1327                    env_name.clear();
1328                    break;
1329                }
1330            }
1331            next += 1;
1332        }
1333
1334        if env_name.is_empty() {
1335            index += 1;
1336            continue;
1337        }
1338
1339        if head == "begin" {
1340            stack.push(env_name);
1341        } else if let Some(pos) = stack.iter().rposition(|open| open == &env_name) {
1342            stack.truncate(pos);
1343        }
1344        index += 1;
1345    }
1346
1347    stack.pop()
1348}
1349
1350/// Locate the `$` that immediately starts a braced inline-math argument after a command span.
1351fn find_inline_math_shift_after_command(src: &str, command_span: Span) -> Option<Span> {
1352    let mut offset = command_span.end;
1353    while matches!(src.as_bytes().get(offset), Some(b' ' | b'\t' | b'\n')) {
1354        offset += 1;
1355    }
1356    if src.as_bytes().get(offset) != Some(&b'{') || src.as_bytes().get(offset + 1) != Some(&b'$') {
1357        return None;
1358    }
1359
1360    Some(Span {
1361        start: offset + 1,
1362        end: offset + 2,
1363    })
1364}
1365
1366fn supplement_environment_mode_error_message(
1367    kind: Option<ParseDiagnosticKind>,
1368    ctx: &ParseContext,
1369    src: &str,
1370    lexed: &mut Option<LexedSource>,
1371    diagnostic: &mut ParseDiagnostic,
1372) {
1373    // Fallback: raw ExpectedFound errors come from chumsky before
1374    // TeXForm has a parser-private diagnostic kind to attach.
1375    if !matches!(
1376        kind,
1377        Some(ParseDiagnosticKind::RawExpectedFound | ParseDiagnosticKind::EnvironmentNameMismatch)
1378    ) {
1379        return;
1380    }
1381
1382    let Some((name, disallowed_mode, span)) = find_environment_mode_error_at_span(
1383        ctx,
1384        lexed.get_or_insert_with(|| lex_source(src)),
1385        diagnostic.span.clone(),
1386    )
1387    .or_else(|| {
1388        if diagnostic.span.start == 0 {
1389            find_first_known_but_disallowed_environment(
1390                ctx,
1391                lexed.get_or_insert_with(|| lex_source(src)),
1392            )
1393        } else {
1394            None
1395        }
1396    }) else {
1397        return;
1398    };
1399
1400    diagnostic.message = format!(
1401        "Environment {} is not allowed in {} mode",
1402        name, disallowed_mode
1403    );
1404    diagnostic.span = span;
1405    diagnostic.expected.clear();
1406    diagnostic.found = None;
1407}
1408
1409fn supplement_environment_mismatch_message(
1410    kind: Option<ParseDiagnosticKind>,
1411    src: &str,
1412    lexed: &mut Option<LexedSource>,
1413    diagnostic: &mut ParseDiagnostic,
1414) {
1415    if kind != Some(ParseDiagnosticKind::EnvironmentNameMismatch) {
1416        return;
1417    }
1418
1419    let Some((expected, found, span)) = find_environment_name_mismatch(
1420        lexed.get_or_insert_with(|| lex_source(src)),
1421        diagnostic.span.clone(),
1422    ) else {
1423        return;
1424    };
1425
1426    diagnostic.message = format!(
1427        "Environment name mismatch: expected \\end{{{}}}, found \\end{{{}}}",
1428        expected, found
1429    );
1430    diagnostic.span = span;
1431    diagnostic.expected = vec![format!("\\end{{{}}}", expected)];
1432    diagnostic.found = Some(format!("\\end{{{}}}", found));
1433}
1434
1435fn supplement_unknown_environment_message(
1436    kind: Option<ParseDiagnosticKind>,
1437    ctx: &ParseContext,
1438    src: &str,
1439    lexed: &mut Option<LexedSource>,
1440    diagnostic: &mut ParseDiagnostic,
1441) {
1442    if kind != Some(ParseDiagnosticKind::UnknownEnvironment) {
1443        return;
1444    }
1445
1446    let Some((name, span)) = find_unknown_environment_at_span(
1447        ctx,
1448        lexed.get_or_insert_with(|| lex_source(src)),
1449        diagnostic.span.clone(),
1450    ) else {
1451        return;
1452    };
1453
1454    diagnostic.message = format!("Unknown environment: {}", name);
1455    diagnostic.span = span;
1456    diagnostic.expected.clear();
1457    diagnostic.found = None;
1458}
1459
1460fn supplement_argument_validation_span(
1461    kind: Option<ParseDiagnosticKind>,
1462    src: &str,
1463    lexed: &mut Option<LexedSource>,
1464    diagnostic: &mut ParseDiagnostic,
1465) {
1466    if kind != Some(ParseDiagnosticKind::ArgumentValidation) {
1467        return;
1468    }
1469
1470    let Some(span_text) = src.get(diagnostic.span.start..diagnostic.span.end) else {
1471        return;
1472    };
1473    if !span_text.starts_with('\\') {
1474        return;
1475    }
1476
1477    let Some(argument_span) = find_argument_surface_span(
1478        lexed.get_or_insert_with(|| lex_source(src)),
1479        diagnostic.span.end,
1480    ) else {
1481        return;
1482    };
1483    diagnostic.span = argument_span;
1484}
1485
1486fn supplement_inner_content_error_span(
1487    kind: Option<ParseDiagnosticKind>,
1488    src: &str,
1489    lexed: &mut Option<LexedSource>,
1490    diagnostic: &mut ParseDiagnostic,
1491) {
1492    if !matches!(
1493        kind,
1494        Some(ParseDiagnosticKind::CommandModeError | ParseDiagnosticKind::TextScriptError)
1495    ) {
1496        return;
1497    }
1498
1499    let Some(span_text) = src.get(diagnostic.span.start..diagnostic.span.end) else {
1500        return;
1501    };
1502    if !span_text.starts_with('\\') {
1503        return;
1504    }
1505
1506    let Some(argument_span) = find_argument_surface_span(
1507        lexed.get_or_insert_with(|| lex_source(src)),
1508        diagnostic.span.end,
1509    ) else {
1510        return;
1511    };
1512
1513    if kind == Some(ParseDiagnosticKind::TextScriptError)
1514        && let Some(span) = find_first_script_marker_in_span(src, argument_span.clone())
1515    {
1516        diagnostic.span = span;
1517        return;
1518    }
1519
1520    let Some(command_name) = diagnostic
1521        .message
1522        .strip_prefix("Command ")
1523        .and_then(|rest| rest.split(" is not allowed in ").next())
1524    else {
1525        return;
1526    };
1527
1528    if span_text == command_name {
1529        return;
1530    }
1531
1532    if let Some(span) = find_command_name_in_span(src, argument_span, command_name) {
1533        diagnostic.span = span;
1534    }
1535}
1536
1537fn find_first_script_marker_in_span(src: &str, span: Span) -> Option<Span> {
1538    let slice = src.get(span.start..span.end)?;
1539    let offset = slice.find(['^', '_'])?;
1540    Some(Span {
1541        start: span.start + offset,
1542        end: span.start + offset + 1,
1543    })
1544}
1545
1546fn find_command_name_in_span(src: &str, span: Span, command_name: &str) -> Option<Span> {
1547    let slice = src.get(span.start..span.end)?;
1548    let offset = slice.find(command_name)?;
1549    Some(Span {
1550        start: span.start + offset,
1551        end: span.start + offset + command_name.len(),
1552    })
1553}
1554
1555fn find_argument_surface_span(tokens: &LexedSource, after: usize) -> Option<Span> {
1556    let mut index = 0;
1557    while index < tokens.len() && tokens[index].1.end <= after {
1558        index += 1;
1559    }
1560    while matches!(tokens.get(index), Some((Token::Whitespaces, _))) {
1561        index += 1;
1562    }
1563
1564    let (token, span) = tokens.get(index)?;
1565
1566    match token {
1567        Token::LBracket => {
1568            let mut brace_depth = 0usize;
1569            let mut bracket_depth = 0usize;
1570            let start = span.start;
1571            for (token, span) in tokens.iter().skip(index + 1) {
1572                match token {
1573                    Token::LBracket if brace_depth == 0 => bracket_depth += 1,
1574                    Token::RBracket if brace_depth == 0 => {
1575                        if bracket_depth == 0 {
1576                            return Some(Span {
1577                                start,
1578                                end: span.end,
1579                            });
1580                        }
1581                        bracket_depth -= 1;
1582                    }
1583                    Token::LBrace => brace_depth += 1,
1584                    Token::RBrace if brace_depth > 0 => brace_depth -= 1,
1585                    _ => {}
1586                }
1587            }
1588            None
1589        }
1590        Token::LBrace => {
1591            let mut depth = 0usize;
1592            let start = span.start;
1593            for (token, span) in tokens.iter().skip(index + 1) {
1594                match token {
1595                    Token::LBrace => depth += 1,
1596                    Token::RBrace => {
1597                        if depth == 0 {
1598                            return Some(Span {
1599                                start,
1600                                end: span.end,
1601                            });
1602                        }
1603                        depth -= 1;
1604                    }
1605                    _ => {}
1606                }
1607            }
1608            None
1609        }
1610        _ => None,
1611    }
1612}
1613
1614fn find_invalid_left_context(
1615    ctx: &ParseContext,
1616    tokens: &LexedSource,
1617) -> Option<(Span, Option<Span>)> {
1618    let mut environment_stack = Vec::new();
1619    let mut index = 0;
1620
1621    while index < tokens.len() {
1622        match &tokens[index].0 {
1623            Token::ControlSeq(name) if name == "begin" => {
1624                environment_stack.push(environment_body_start(tokens, index));
1625            }
1626            Token::ControlSeq(name) if name == "end" => {
1627                environment_stack.pop();
1628            }
1629            Token::ControlSeq(name) if name == "left" => {
1630                let mut next = index + 1;
1631                while matches!(tokens.get(next), Some((Token::Whitespaces, _))) {
1632                    next += 1;
1633                }
1634
1635                let Some((token, token_span)) = tokens.get(next) else {
1636                    let left_span = Span {
1637                        start: tokens[index].1.start,
1638                        end: tokens[index].1.end,
1639                    };
1640                    let env_span = environment_stack.last().map(|start| Span {
1641                        start: *start,
1642                        end: left_span.end,
1643                    });
1644                    return Some((left_span, env_span));
1645                };
1646
1647                let is_valid_delimiter = match token {
1648                    Token::Char(c) => ctx
1649                        .lookup_delimiter(c.to_string().as_str(), false, ContentMode::Math)
1650                        .is_some(),
1651                    Token::LBracket => ctx
1652                        .lookup_delimiter("[", false, ContentMode::Math)
1653                        .is_some(),
1654                    Token::RBracket => ctx
1655                        .lookup_delimiter("]", false, ContentMode::Math)
1656                        .is_some(),
1657                    Token::ControlSeq(name) => ctx
1658                        .lookup_delimiter(name.as_str(), true, ContentMode::Math)
1659                        .is_some(),
1660                    _ => false,
1661                };
1662
1663                if !is_valid_delimiter {
1664                    let left_span = Span {
1665                        start: tokens[index].1.start,
1666                        end: token_span.end,
1667                    };
1668                    let env_span = environment_stack.last().map(|start| Span {
1669                        start: *start,
1670                        end: token_span.end,
1671                    });
1672                    return Some((left_span, env_span));
1673                }
1674            }
1675            _ => {}
1676        }
1677
1678        index += 1;
1679    }
1680
1681    None
1682}
1683
1684fn find_environment_name_mismatch(
1685    tokens: &LexedSource,
1686    target_span: Span,
1687) -> Option<(String, String, Span)> {
1688    let mut stack = Vec::new();
1689    let mut index = 0;
1690
1691    while index < tokens.len() {
1692        let Some((Token::ControlSeq(head), _)) = tokens.get(index) else {
1693            index += 1;
1694            continue;
1695        };
1696
1697        if !matches!(head.as_str(), "begin" | "end") {
1698            index += 1;
1699            continue;
1700        }
1701
1702        let mut next = index + 1;
1703        while matches!(tokens.get(next), Some((Token::Whitespaces, _))) {
1704            next += 1;
1705        }
1706        if !matches!(tokens.get(next), Some((Token::LBrace, _))) {
1707            index += 1;
1708            continue;
1709        }
1710        next += 1;
1711
1712        let mut env_name = String::new();
1713        while let Some((token, _)) = tokens.get(next) {
1714            match token {
1715                Token::Char(c) => env_name.push(*c),
1716                Token::Star => env_name.push('*'),
1717                Token::RBrace => break,
1718                _ => {
1719                    env_name.clear();
1720                    break;
1721                }
1722            }
1723            next += 1;
1724        }
1725
1726        if env_name.is_empty() {
1727            index += 1;
1728            continue;
1729        }
1730
1731        if head == "begin" {
1732            stack.push(env_name);
1733        } else if let Some(expected) = stack.last() {
1734            if expected == &env_name {
1735                stack.pop();
1736            } else {
1737                let mismatch_closer_span = Span {
1738                    start: tokens[next].1.start,
1739                    end: tokens[next].1.end,
1740                };
1741                if mismatch_closer_span.start != target_span.start
1742                    || mismatch_closer_span.end != target_span.end
1743                {
1744                    index += 1;
1745                    continue;
1746                }
1747
1748                return Some((
1749                    expected.clone(),
1750                    env_name,
1751                    Span {
1752                        start: tokens[index].1.start,
1753                        end: tokens[next].1.end,
1754                    },
1755                ));
1756            }
1757        }
1758
1759        index += 1;
1760    }
1761
1762    None
1763}
1764
1765fn find_unknown_environment_at_span(
1766    ctx: &ParseContext,
1767    tokens: &LexedSource,
1768    target_span: Span,
1769) -> Option<(String, Span)> {
1770    let mut index = 0;
1771    while index < tokens.len() {
1772        let Some((Token::ControlSeq(name), begin_span)) = tokens.get(index) else {
1773            index += 1;
1774            continue;
1775        };
1776
1777        if name != "begin"
1778            || begin_span.start != target_span.start
1779            || begin_span.end != target_span.end
1780        {
1781            index += 1;
1782            continue;
1783        }
1784
1785        index += 1;
1786        while matches!(tokens.get(index), Some((Token::Whitespaces, _))) {
1787            index += 1;
1788        }
1789
1790        let Some((Token::LBrace, _)) = tokens.get(index) else {
1791            return None;
1792        };
1793        index += 1;
1794
1795        let name_start = tokens.get(index)?.1.start;
1796        let mut parsed_name = String::new();
1797        let mut name_end = name_start;
1798        while let Some((token, span)) = tokens.get(index) {
1799            match token {
1800                Token::Char(ch) => {
1801                    parsed_name.push(*ch);
1802                    name_end = span.end;
1803                    index += 1;
1804                }
1805                Token::Star => {
1806                    parsed_name.push('*');
1807                    name_end = span.end;
1808                    index += 1;
1809                }
1810                Token::RBrace => break,
1811                _ => return None,
1812            }
1813        }
1814
1815        if parsed_name.is_empty() || ctx.knows_env_name(parsed_name.as_str()) {
1816            return None;
1817        }
1818
1819        return Some((
1820            parsed_name,
1821            Span {
1822                start: name_start,
1823                end: name_end,
1824            },
1825        ));
1826    }
1827
1828    None
1829}
1830
1831fn find_first_known_but_disallowed_environment(
1832    ctx: &ParseContext,
1833    tokens: &LexedSource,
1834) -> Option<(String, ContentMode, Span)> {
1835    let mut index = 0;
1836    while index < tokens.len() {
1837        let Some((Token::ControlSeq(name), head_span)) = tokens.get(index) else {
1838            index += 1;
1839            continue;
1840        };
1841        if name != "begin" {
1842            index += 1;
1843            continue;
1844        }
1845
1846        let begin_start = head_span.start;
1847        index += 1;
1848        while matches!(tokens.get(index), Some((Token::Whitespaces, _))) {
1849            index += 1;
1850        }
1851        if !matches!(tokens.get(index), Some((Token::LBrace, _))) {
1852            continue;
1853        }
1854        index += 1;
1855
1856        let mut parsed_name = String::new();
1857        while let Some((token, _)) = tokens.get(index) {
1858            match token {
1859                Token::Char(ch) => {
1860                    parsed_name.push(*ch);
1861                    index += 1;
1862                }
1863                Token::Star => {
1864                    parsed_name.push('*');
1865                    index += 1;
1866                }
1867                Token::RBrace => break,
1868                _ => return None,
1869            }
1870        }
1871
1872        let Some((Token::RBrace, close_span)) = tokens.get(index) else {
1873            return None;
1874        };
1875        if parsed_name.is_empty() {
1876            index += 1;
1877            continue;
1878        }
1879
1880        let math_known = ctx
1881            .lookup_env(parsed_name.as_str(), ContentMode::Math)
1882            .is_some();
1883        let text_known = ctx
1884            .lookup_env(parsed_name.as_str(), ContentMode::Text)
1885            .is_some();
1886        let disallowed_mode = match (math_known, text_known) {
1887            (false, true) => ContentMode::Math,
1888            (true, false) => ContentMode::Text,
1889            _ => {
1890                index += 1;
1891                continue;
1892            }
1893        };
1894
1895        return Some((
1896            parsed_name,
1897            disallowed_mode,
1898            Span {
1899                start: begin_start,
1900                end: close_span.end,
1901            },
1902        ));
1903    }
1904
1905    None
1906}
1907
1908fn find_environment_mode_error_at_span(
1909    ctx: &ParseContext,
1910    tokens: &LexedSource,
1911    target_span: Span,
1912) -> Option<(String, ContentMode, Span)> {
1913    let mut index = 0;
1914    while index < tokens.len() {
1915        let Some((Token::ControlSeq(name), _)) = tokens.get(index) else {
1916            index += 1;
1917            continue;
1918        };
1919        if name != "begin" {
1920            index += 1;
1921            continue;
1922        }
1923
1924        let begin_start = tokens[index].1.start;
1925        index += 1;
1926        while matches!(tokens.get(index), Some((Token::Whitespaces, _))) {
1927            index += 1;
1928        }
1929        if !matches!(tokens.get(index), Some((Token::LBrace, _))) {
1930            continue;
1931        }
1932        index += 1;
1933
1934        let mut parsed_name = String::new();
1935        while let Some((token, _)) = tokens.get(index) {
1936            match token {
1937                Token::Char(ch) => {
1938                    parsed_name.push(*ch);
1939                    index += 1;
1940                }
1941                Token::Star => {
1942                    parsed_name.push('*');
1943                    index += 1;
1944                }
1945                Token::RBrace => break,
1946                _ => return None,
1947            }
1948        }
1949
1950        let Some((Token::RBrace, close_span)) = tokens.get(index) else {
1951            return None;
1952        };
1953
1954        let matches_target =
1955            close_span.start == target_span.start || close_span.end == target_span.end;
1956        if !matches_target || parsed_name.is_empty() {
1957            index += 1;
1958            continue;
1959        }
1960
1961        let math_known = ctx
1962            .lookup_env(parsed_name.as_str(), ContentMode::Math)
1963            .is_some();
1964        let text_known = ctx
1965            .lookup_env(parsed_name.as_str(), ContentMode::Text)
1966            .is_some();
1967        let disallowed_mode = match (math_known, text_known) {
1968            (false, true) => ContentMode::Math,
1969            (true, false) => ContentMode::Text,
1970            _ => return None,
1971        };
1972
1973        return Some((
1974            parsed_name,
1975            disallowed_mode,
1976            Span {
1977                start: begin_start,
1978                end: close_span.end,
1979            },
1980        ));
1981    }
1982
1983    None
1984}
1985
1986fn environment_body_start(tokens: &[(Token, std::ops::Range<usize>)], begin_index: usize) -> usize {
1987    let mut index = begin_index + 1;
1988    while matches!(tokens.get(index), Some((Token::Whitespaces, _))) {
1989        index += 1;
1990    }
1991
1992    if !matches!(tokens.get(index), Some((Token::LBrace, _))) {
1993        return tokens[begin_index].1.start;
1994    }
1995    index += 1;
1996
1997    while let Some((token, span)) = tokens.get(index) {
1998        if matches!(token, Token::RBrace) {
1999            return span.end;
2000        }
2001        index += 1;
2002    }
2003
2004    tokens[begin_index].1.start
2005}
2006
2007#[cfg(test)]
2008mod tests {
2009    use super::*;
2010
2011    #[test]
2012    fn eof_unclosed_inline_math_is_normalized() {
2013        let expected = vec!["something else".to_string(), "'$'".to_string()];
2014        let mut diagnostic = ParseDiagnostic {
2015            kind: Some(ParseDiagnosticKind::UnclosedInlineMath),
2016            message: "found end of input expected something else, or '$'".to_string(),
2017            span: Span { start: 0, end: 2 },
2018            expected,
2019            found: None,
2020            contexts: Vec::new(),
2021        };
2022
2023        supplement_diagnostic_contexts(
2024            &ParseContext::empty(),
2025            "$x",
2026            Some(ParseDiagnosticKind::UnclosedInlineMath),
2027            &mut diagnostic,
2028        );
2029
2030        assert_eq!(
2031            diagnostic.message,
2032            "found '$' expected something else, or end of input"
2033        );
2034        assert_eq!(diagnostic.expected, ["something else", "end of input"]);
2035        assert_eq!(diagnostic.found, None);
2036    }
2037
2038    #[test]
2039    fn argument_validation_span_uses_kind_not_message() {
2040        let mut diagnostic = ParseDiagnostic {
2041            kind: Some(ParseDiagnosticKind::ArgumentValidation),
2042            message: "argument value was rejected".to_string(),
2043            span: Span { start: 0, end: 7 },
2044            expected: Vec::new(),
2045            found: None,
2046            contexts: Vec::new(),
2047        };
2048
2049        supplement_diagnostic_contexts(
2050            &ParseContext::empty(),
2051            "\\hspace{bad}",
2052            Some(ParseDiagnosticKind::ArgumentValidation),
2053            &mut diagnostic,
2054        );
2055
2056        assert_eq!(diagnostic.span, Span { start: 7, end: 12 });
2057    }
2058}