Skip to main content

mdwright_latex/
registry.rs

1//! MathJax-style command registry and Unicode vocabulary.
2//!
3//! The registry is data, not parser code. It classifies commands by
4//! category, support status, argument shape, and Unicode spelling so
5//! parsing, rendering, linting, and source translation can ask narrow
6//! questions without owning parallel tables.
7
8/// MathJax-style command category.
9#[derive(Clone, Copy, Debug, PartialEq, Eq)]
10pub enum CommandCategory {
11    /// Direct symbol such as `\infty`.
12    Symbol,
13    /// Greek letter or variant.
14    Greek,
15    /// Binary operator.
16    BinaryOperator,
17    /// Relation symbol.
18    Relation,
19    /// Arrow symbol.
20    Arrow,
21    /// Delimiter command.
22    Delimiter,
23    /// Large operator.
24    LargeOperator,
25    /// Accent command that owns one argument.
26    Accent,
27    /// Spacing command.
28    Spacing,
29    /// Function/operator name.
30    Function,
31    /// Font or style command.
32    Font,
33    /// Environment name.
34    Environment,
35    /// Structural TeX command parsed specially.
36    Structural,
37    /// Known `MathJax` command outside mdwright's Unicode subset.
38    Unsupported,
39}
40
41/// Argument shape for a command or environment.
42#[derive(Clone, Copy, Debug, PartialEq, Eq)]
43pub enum ArgumentShape {
44    /// No arguments.
45    None,
46    /// One required argument.
47    OneRequired,
48    /// Two required arguments.
49    TwoRequired,
50    /// Optional argument followed by one required argument.
51    OptionalThenRequired,
52    /// Environment body with rows and cells.
53    EnvironmentBody,
54    /// Macro-like or otherwise variable argument shape.
55    Variable,
56}
57
58/// Whether mdwright can currently interpret a known command.
59#[derive(Clone, Copy, Debug, PartialEq, Eq)]
60pub enum SupportStatus {
61    /// Command maps directly to Unicode text.
62    DirectUnicode,
63    /// Parser has a typed construct for this command.
64    ParsedConstruct,
65    /// Command is recognised but intentionally produces no visible text.
66    RecognisedNoOutput,
67    /// Command is known from MathJax-style input but unsupported here.
68    Unsupported,
69}
70
71/// Public, copyable view of one command registry entry.
72#[derive(Clone, Copy, Debug, PartialEq, Eq)]
73pub struct CommandInfo {
74    name: &'static str,
75    category: CommandCategory,
76    arguments: ArgumentShape,
77    unicode: Option<&'static str>,
78    preferred: &'static str,
79    support: SupportStatus,
80    package: &'static str,
81}
82
83impl CommandInfo {
84    /// Command name without the leading backslash.
85    #[must_use]
86    pub const fn name(self) -> &'static str {
87        self.name
88    }
89
90    /// Registry category.
91    #[must_use]
92    pub const fn category(self) -> CommandCategory {
93        self.category
94    }
95
96    /// Argument shape.
97    #[must_use]
98    pub const fn arguments(self) -> ArgumentShape {
99        self.arguments
100    }
101
102    /// Direct Unicode output, when the command has one.
103    #[must_use]
104    pub const fn unicode(self) -> Option<&'static str> {
105        self.unicode
106    }
107
108    /// Preferred LaTeX spelling for reverse translation.
109    #[must_use]
110    pub const fn preferred(self) -> &'static str {
111        self.preferred
112    }
113
114    /// Current mdwright support status.
115    #[must_use]
116    pub const fn support(self) -> SupportStatus {
117        self.support
118    }
119
120    /// `MathJax` package/source classification used as a coverage note.
121    #[must_use]
122    pub const fn package(self) -> &'static str {
123        self.package
124    }
125}
126
127#[derive(Clone, Copy, Debug, PartialEq, Eq)]
128pub(crate) enum LatexSourceFragment {
129    Command(&'static str),
130    Raw(&'static str),
131}
132
133#[derive(Clone, Copy, Debug, PartialEq, Eq)]
134pub(crate) enum OperatorWordKind {
135    BuiltInCommand,
136    OperatorName,
137}
138
139#[derive(Clone, Copy, Debug, PartialEq, Eq)]
140pub(crate) struct OperatorWordInfo {
141    pub(crate) source: LatexSourceFragment,
142    pub(crate) kind: OperatorWordKind,
143}
144
145#[derive(Clone, Copy, Debug, PartialEq, Eq)]
146pub(crate) enum MathAlphabetStyle {
147    Bold,
148    Italic,
149    BoldItalic,
150    Script,
151    BoldScript,
152    Fraktur,
153    DoubleStruck,
154    BoldFraktur,
155    Sans,
156    SansBold,
157    SansItalic,
158    SansBoldItalic,
159    Monospace,
160}
161
162#[derive(Clone, Copy, Debug, PartialEq, Eq)]
163pub(crate) struct MathAlphabetChar {
164    pub style: MathAlphabetStyle,
165    pub base: char,
166}
167
168#[derive(Clone, Copy)]
169struct CommandEntry {
170    name: &'static str,
171    category: CommandCategory,
172    arguments: ArgumentShape,
173    unicode: Option<&'static str>,
174    preferred: &'static str,
175    support: SupportStatus,
176    package: &'static str,
177}
178
179impl CommandEntry {
180    const fn direct(
181        name: &'static str,
182        category: CommandCategory,
183        unicode: &'static str,
184        preferred: &'static str,
185        package: &'static str,
186    ) -> Self {
187        Self {
188            name,
189            category,
190            arguments: ArgumentShape::None,
191            unicode: Some(unicode),
192            preferred,
193            support: SupportStatus::DirectUnicode,
194            package,
195        }
196    }
197
198    const fn parsed(
199        name: &'static str,
200        category: CommandCategory,
201        arguments: ArgumentShape,
202        package: &'static str,
203    ) -> Self {
204        Self {
205            name,
206            category,
207            arguments,
208            unicode: None,
209            preferred: name,
210            support: SupportStatus::ParsedConstruct,
211            package,
212        }
213    }
214
215    const fn no_output(name: &'static str, category: CommandCategory, package: &'static str) -> Self {
216        Self {
217            name,
218            category,
219            arguments: ArgumentShape::None,
220            unicode: None,
221            preferred: name,
222            support: SupportStatus::RecognisedNoOutput,
223            package,
224        }
225    }
226
227    const fn unsupported(
228        name: &'static str,
229        category: CommandCategory,
230        arguments: ArgumentShape,
231        package: &'static str,
232    ) -> Self {
233        Self {
234            name,
235            category,
236            arguments,
237            unicode: None,
238            preferred: name,
239            support: SupportStatus::Unsupported,
240            package,
241        }
242    }
243
244    const fn info(self) -> CommandInfo {
245        CommandInfo {
246            name: self.name,
247            category: self.category,
248            arguments: self.arguments,
249            unicode: self.unicode,
250            preferred: self.preferred,
251            support: self.support,
252            package: self.package,
253        }
254    }
255}
256
257const BASE: &str = "base";
258const AMS: &str = "ams";
259const MATHTOOLS: &str = "mathtools";
260const TEXT_BASE: &str = "text-base";
261
262const COMMANDS: &[CommandEntry] = &[
263    // Structural commands parsed by the recursive-descent parser.
264    CommandEntry::parsed("frac", CommandCategory::Structural, ArgumentShape::TwoRequired, BASE),
265    CommandEntry::parsed("dfrac", CommandCategory::Structural, ArgumentShape::TwoRequired, AMS),
266    CommandEntry::parsed("tfrac", CommandCategory::Structural, ArgumentShape::TwoRequired, AMS),
267    CommandEntry::parsed(
268        "sqrt",
269        CommandCategory::Structural,
270        ArgumentShape::OptionalThenRequired,
271        BASE,
272    ),
273    CommandEntry::parsed("left", CommandCategory::Structural, ArgumentShape::OneRequired, BASE),
274    CommandEntry::parsed("right", CommandCategory::Structural, ArgumentShape::OneRequired, BASE),
275    CommandEntry::parsed("begin", CommandCategory::Structural, ArgumentShape::OneRequired, BASE),
276    CommandEntry::parsed("end", CommandCategory::Structural, ArgumentShape::OneRequired, BASE),
277    // Accents.
278    CommandEntry::parsed("hat", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
279    CommandEntry::parsed("widehat", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
280    CommandEntry::parsed("bar", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
281    CommandEntry::parsed("overline", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
282    CommandEntry::parsed("tilde", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
283    CommandEntry::parsed("widetilde", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
284    CommandEntry::parsed("vec", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
285    CommandEntry::parsed("dot", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
286    CommandEntry::parsed("ddot", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
287    CommandEntry::parsed("acute", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
288    CommandEntry::parsed("grave", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
289    CommandEntry::parsed("breve", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
290    CommandEntry::parsed("check", CommandCategory::Accent, ArgumentShape::OneRequired, BASE),
291    // Greek letters and variants.
292    CommandEntry::direct("alpha", CommandCategory::Greek, "α", "alpha", BASE),
293    CommandEntry::direct("beta", CommandCategory::Greek, "β", "beta", BASE),
294    CommandEntry::direct("gamma", CommandCategory::Greek, "γ", "gamma", BASE),
295    CommandEntry::direct("delta", CommandCategory::Greek, "δ", "delta", BASE),
296    CommandEntry::direct("epsilon", CommandCategory::Greek, "ε", "epsilon", BASE),
297    CommandEntry::direct("varepsilon", CommandCategory::Greek, "ϵ", "varepsilon", BASE),
298    CommandEntry::direct("zeta", CommandCategory::Greek, "ζ", "zeta", BASE),
299    CommandEntry::direct("eta", CommandCategory::Greek, "η", "eta", BASE),
300    CommandEntry::direct("theta", CommandCategory::Greek, "θ", "theta", BASE),
301    CommandEntry::direct("vartheta", CommandCategory::Greek, "ϑ", "vartheta", BASE),
302    CommandEntry::direct("iota", CommandCategory::Greek, "ι", "iota", BASE),
303    CommandEntry::direct("kappa", CommandCategory::Greek, "κ", "kappa", BASE),
304    CommandEntry::direct("varkappa", CommandCategory::Greek, "ϰ", "varkappa", AMS),
305    CommandEntry::direct("lambda", CommandCategory::Greek, "λ", "lambda", BASE),
306    CommandEntry::direct("mu", CommandCategory::Greek, "μ", "mu", BASE),
307    CommandEntry::direct("nu", CommandCategory::Greek, "ν", "nu", BASE),
308    CommandEntry::direct("xi", CommandCategory::Greek, "ξ", "xi", BASE),
309    CommandEntry::direct("pi", CommandCategory::Greek, "π", "pi", BASE),
310    CommandEntry::direct("varpi", CommandCategory::Greek, "ϖ", "varpi", BASE),
311    CommandEntry::direct("rho", CommandCategory::Greek, "ρ", "rho", BASE),
312    CommandEntry::direct("varrho", CommandCategory::Greek, "ϱ", "varrho", BASE),
313    CommandEntry::direct("sigma", CommandCategory::Greek, "σ", "sigma", BASE),
314    CommandEntry::direct("varsigma", CommandCategory::Greek, "ς", "varsigma", BASE),
315    CommandEntry::direct("tau", CommandCategory::Greek, "τ", "tau", BASE),
316    CommandEntry::direct("upsilon", CommandCategory::Greek, "υ", "upsilon", BASE),
317    CommandEntry::direct("phi", CommandCategory::Greek, "φ", "phi", BASE),
318    CommandEntry::direct("varphi", CommandCategory::Greek, "ϕ", "varphi", BASE),
319    CommandEntry::direct("chi", CommandCategory::Greek, "χ", "chi", BASE),
320    CommandEntry::direct("psi", CommandCategory::Greek, "ψ", "psi", BASE),
321    CommandEntry::direct("omega", CommandCategory::Greek, "ω", "omega", BASE),
322    CommandEntry::direct("Gamma", CommandCategory::Greek, "Γ", "Gamma", BASE),
323    CommandEntry::direct("Delta", CommandCategory::Greek, "Δ", "Delta", BASE),
324    CommandEntry::direct("Theta", CommandCategory::Greek, "Θ", "Theta", BASE),
325    CommandEntry::direct("Lambda", CommandCategory::Greek, "Λ", "Lambda", BASE),
326    CommandEntry::direct("Xi", CommandCategory::Greek, "Ξ", "Xi", BASE),
327    CommandEntry::direct("Pi", CommandCategory::Greek, "Π", "Pi", BASE),
328    CommandEntry::direct("Sigma", CommandCategory::Greek, "Σ", "Sigma", BASE),
329    CommandEntry::direct("Upsilon", CommandCategory::Greek, "Υ", "Upsilon", BASE),
330    CommandEntry::direct("Phi", CommandCategory::Greek, "Φ", "Phi", BASE),
331    CommandEntry::direct("Psi", CommandCategory::Greek, "Ψ", "Psi", BASE),
332    CommandEntry::direct("Omega", CommandCategory::Greek, "Ω", "Omega", BASE),
333    // Binary operators.
334    CommandEntry::direct("pm", CommandCategory::BinaryOperator, "±", "pm", BASE),
335    CommandEntry::direct("mp", CommandCategory::BinaryOperator, "∓", "mp", BASE),
336    CommandEntry::direct("times", CommandCategory::BinaryOperator, "×", "times", BASE),
337    CommandEntry::direct("div", CommandCategory::BinaryOperator, "÷", "div", BASE),
338    CommandEntry::direct("cdot", CommandCategory::BinaryOperator, "⋅", "cdot", BASE),
339    CommandEntry::direct("circ", CommandCategory::BinaryOperator, "∘", "circ", BASE),
340    CommandEntry::direct("bullet", CommandCategory::BinaryOperator, "•", "bullet", BASE),
341    CommandEntry::direct("ast", CommandCategory::BinaryOperator, "∗", "ast", BASE),
342    CommandEntry::direct("star", CommandCategory::BinaryOperator, "⋆", "star", BASE),
343    CommandEntry::direct("wedge", CommandCategory::BinaryOperator, "∧", "wedge", BASE),
344    CommandEntry::direct("land", CommandCategory::BinaryOperator, "∧", "wedge", BASE),
345    CommandEntry::direct("vee", CommandCategory::BinaryOperator, "∨", "vee", BASE),
346    CommandEntry::direct("lor", CommandCategory::BinaryOperator, "∨", "vee", BASE),
347    CommandEntry::direct("cap", CommandCategory::BinaryOperator, "∩", "cap", BASE),
348    CommandEntry::direct("cup", CommandCategory::BinaryOperator, "∪", "cup", BASE),
349    CommandEntry::direct("sqcup", CommandCategory::BinaryOperator, "⊔", "sqcup", AMS),
350    CommandEntry::direct("setminus", CommandCategory::BinaryOperator, "∖", "setminus", BASE),
351    CommandEntry::direct("oplus", CommandCategory::BinaryOperator, "⊕", "oplus", BASE),
352    CommandEntry::direct("bigoplus", CommandCategory::LargeOperator, "⨁", "bigoplus", BASE),
353    CommandEntry::direct("otimes", CommandCategory::BinaryOperator, "⊗", "otimes", BASE),
354    CommandEntry::direct("bigotimes", CommandCategory::LargeOperator, "⨂", "bigotimes", BASE),
355    CommandEntry::direct("boxtimes", CommandCategory::BinaryOperator, "⊠", "boxtimes", AMS),
356    CommandEntry::direct("ominus", CommandCategory::BinaryOperator, "⊖", "ominus", BASE),
357    CommandEntry::direct("oslash", CommandCategory::BinaryOperator, "⊘", "oslash", BASE),
358    CommandEntry::direct("odot", CommandCategory::BinaryOperator, "⊙", "odot", BASE),
359    CommandEntry::direct("amalg", CommandCategory::BinaryOperator, "⨿", "amalg", BASE),
360    // Relations.
361    CommandEntry::direct("leq", CommandCategory::Relation, "≤", "leq", BASE),
362    CommandEntry::direct("le", CommandCategory::Relation, "≤", "leq", BASE),
363    CommandEntry::direct("leqslant", CommandCategory::Relation, "⩽", "leqslant", AMS),
364    CommandEntry::direct(
365        "nleqslant",
366        CommandCategory::Relation,
367        "\u{2A7D}\u{0338}",
368        "nleqslant",
369        AMS,
370    ),
371    CommandEntry::direct("geq", CommandCategory::Relation, "≥", "geq", BASE),
372    CommandEntry::direct("ge", CommandCategory::Relation, "≥", "geq", BASE),
373    CommandEntry::direct("geqslant", CommandCategory::Relation, "⩾", "geqslant", AMS),
374    CommandEntry::direct(
375        "ngeqslant",
376        CommandCategory::Relation,
377        "\u{2A7E}\u{0338}",
378        "ngeqslant",
379        AMS,
380    ),
381    CommandEntry::direct("neq", CommandCategory::Relation, "≠", "neq", BASE),
382    CommandEntry::direct("ne", CommandCategory::Relation, "≠", "neq", BASE),
383    CommandEntry::direct("equiv", CommandCategory::Relation, "≡", "equiv", BASE),
384    CommandEntry::direct("sim", CommandCategory::Relation, "∼", "sim", BASE),
385    CommandEntry::direct("simeq", CommandCategory::Relation, "≃", "simeq", BASE),
386    CommandEntry::direct("approx", CommandCategory::Relation, "≈", "approx", BASE),
387    CommandEntry::direct("cong", CommandCategory::Relation, "≅", "cong", BASE),
388    CommandEntry::direct("propto", CommandCategory::Relation, "∝", "propto", BASE),
389    CommandEntry::direct("in", CommandCategory::Relation, "∈", "in", BASE),
390    CommandEntry::direct("ni", CommandCategory::Relation, "∋", "ni", BASE),
391    CommandEntry::direct("notin", CommandCategory::Relation, "∉", "notin", BASE),
392    CommandEntry::direct("subset", CommandCategory::Relation, "⊂", "subset", BASE),
393    CommandEntry::direct("supset", CommandCategory::Relation, "⊃", "supset", BASE),
394    CommandEntry::direct("nsubset", CommandCategory::Relation, "⊄", "nsubset", AMS),
395    CommandEntry::direct("nsupset", CommandCategory::Relation, "⊅", "nsupset", AMS),
396    CommandEntry::direct("nsupseteq", CommandCategory::Relation, "⊉", "nsupseteq", AMS),
397    CommandEntry::direct("subsetneq", CommandCategory::Relation, "⊊", "subsetneq", AMS),
398    CommandEntry::direct("supsetneq", CommandCategory::Relation, "⊋", "supsetneq", AMS),
399    CommandEntry::direct("subseteq", CommandCategory::Relation, "⊆", "subseteq", BASE),
400    CommandEntry::direct("supseteq", CommandCategory::Relation, "⊇", "supseteq", BASE),
401    CommandEntry::direct("models", CommandCategory::Relation, "⊨", "models", AMS),
402    CommandEntry::direct("vdash", CommandCategory::Relation, "⊢", "vdash", BASE),
403    CommandEntry::direct("dashv", CommandCategory::Relation, "⊣", "dashv", BASE),
404    CommandEntry::direct("perp", CommandCategory::Relation, "⊥", "perp", BASE),
405    CommandEntry::direct("parallel", CommandCategory::Relation, "∥", "parallel", BASE),
406    CommandEntry::direct("mid", CommandCategory::Relation, "∣", "mid", BASE),
407    CommandEntry::direct("asymp", CommandCategory::Relation, "≍", "asymp", BASE),
408    CommandEntry::direct("prec", CommandCategory::Relation, "≺", "prec", AMS),
409    CommandEntry::direct("nprec", CommandCategory::Relation, "⊀", "nprec", AMS),
410    CommandEntry::direct("preceq", CommandCategory::Relation, "≼", "preceq", AMS),
411    CommandEntry::direct("succeq", CommandCategory::Relation, "≽", "succeq", AMS),
412    CommandEntry::direct("gg", CommandCategory::Relation, "≫", "gg", BASE),
413    // Arrows.
414    CommandEntry::direct("to", CommandCategory::Arrow, "→", "to", BASE),
415    CommandEntry::direct("rightarrow", CommandCategory::Arrow, "→", "to", BASE),
416    CommandEntry::direct("gets", CommandCategory::Arrow, "←", "leftarrow", BASE),
417    CommandEntry::direct("leftarrow", CommandCategory::Arrow, "←", "leftarrow", BASE),
418    CommandEntry::direct("mapsto", CommandCategory::Arrow, "↦", "mapsto", BASE),
419    CommandEntry::direct("leftrightarrow", CommandCategory::Arrow, "↔", "leftrightarrow", BASE),
420    CommandEntry::direct(
421        "twoheadrightarrow",
422        CommandCategory::Arrow,
423        "↠",
424        "twoheadrightarrow",
425        AMS,
426    ),
427    CommandEntry::direct("Rightarrow", CommandCategory::Arrow, "⇒", "Rightarrow", BASE),
428    CommandEntry::direct("Leftarrow", CommandCategory::Arrow, "⇐", "Leftarrow", BASE),
429    CommandEntry::direct("Leftrightarrow", CommandCategory::Arrow, "⇔", "Leftrightarrow", BASE),
430    CommandEntry::direct("longrightarrow", CommandCategory::Arrow, "⟶", "longrightarrow", BASE),
431    CommandEntry::direct("longleftarrow", CommandCategory::Arrow, "⟵", "longleftarrow", BASE),
432    CommandEntry::direct(
433        "longleftrightarrow",
434        CommandCategory::Arrow,
435        "⟷",
436        "longleftrightarrow",
437        BASE,
438    ),
439    CommandEntry::direct("Longrightarrow", CommandCategory::Arrow, "⟹", "Longrightarrow", BASE),
440    CommandEntry::direct("Longleftarrow", CommandCategory::Arrow, "⟸", "Longleftarrow", BASE),
441    CommandEntry::direct(
442        "Longleftrightarrow",
443        CommandCategory::Arrow,
444        "⟺",
445        "Longleftrightarrow",
446        BASE,
447    ),
448    CommandEntry::direct("hookrightarrow", CommandCategory::Arrow, "↪", "hookrightarrow", BASE),
449    CommandEntry::direct("hookleftarrow", CommandCategory::Arrow, "↩", "hookleftarrow", BASE),
450    CommandEntry::direct("uparrow", CommandCategory::Arrow, "↑", "uparrow", BASE),
451    CommandEntry::direct("downarrow", CommandCategory::Arrow, "↓", "downarrow", BASE),
452    CommandEntry::direct("updownarrow", CommandCategory::Arrow, "↕", "updownarrow", BASE),
453    CommandEntry::direct("dashrightarrow", CommandCategory::Arrow, "⇢", "dashrightarrow", AMS),
454    CommandEntry::direct("curvearrowright", CommandCategory::Arrow, "↷", "curvearrowright", AMS),
455    CommandEntry::direct("rightsquigarrow", CommandCategory::Arrow, "↝", "rightsquigarrow", AMS),
456    // Delimiters and set symbols.
457    CommandEntry::direct("langle", CommandCategory::Delimiter, "⟨", "langle", BASE),
458    CommandEntry::direct("rangle", CommandCategory::Delimiter, "⟩", "rangle", BASE),
459    CommandEntry::direct("lbrace", CommandCategory::Delimiter, "{", "lbrace", BASE),
460    CommandEntry::direct("rbrace", CommandCategory::Delimiter, "}", "rbrace", BASE),
461    CommandEntry::direct("lvert", CommandCategory::Delimiter, "|", "lvert", BASE),
462    CommandEntry::direct("rvert", CommandCategory::Delimiter, "|", "rvert", BASE),
463    CommandEntry::direct("Vert", CommandCategory::Delimiter, "‖", "Vert", BASE),
464    CommandEntry::direct("lVert", CommandCategory::Delimiter, "‖", "Vert", BASE),
465    CommandEntry::direct("rVert", CommandCategory::Delimiter, "‖", "Vert", BASE),
466    CommandEntry::direct("backslash", CommandCategory::Delimiter, "\\", "backslash", BASE),
467    CommandEntry::direct("emptyset", CommandCategory::Symbol, "∅", "emptyset", BASE),
468    CommandEntry::direct("varnothing", CommandCategory::Symbol, "∅", "emptyset", AMS),
469    // Large operators and miscellaneous symbols.
470    CommandEntry::direct("sum", CommandCategory::LargeOperator, "∑", "sum", BASE),
471    CommandEntry::direct("prod", CommandCategory::LargeOperator, "∏", "prod", BASE),
472    CommandEntry::direct("coprod", CommandCategory::LargeOperator, "∐", "coprod", BASE),
473    CommandEntry::direct("int", CommandCategory::LargeOperator, "∫", "int", BASE),
474    CommandEntry::direct("iint", CommandCategory::LargeOperator, "∬", "iint", AMS),
475    CommandEntry::direct("iiint", CommandCategory::LargeOperator, "∭", "iiint", AMS),
476    CommandEntry::direct("oint", CommandCategory::LargeOperator, "∮", "oint", BASE),
477    CommandEntry::direct("bigcup", CommandCategory::LargeOperator, "⋃", "bigcup", BASE),
478    CommandEntry::direct("bigcap", CommandCategory::LargeOperator, "⋂", "bigcap", BASE),
479    CommandEntry::direct("bigsqcup", CommandCategory::LargeOperator, "⨆", "bigsqcup", BASE),
480    CommandEntry::direct("bigvee", CommandCategory::LargeOperator, "⋁", "bigvee", BASE),
481    CommandEntry::direct("bigwedge", CommandCategory::LargeOperator, "⋀", "bigwedge", BASE),
482    CommandEntry::direct("partial", CommandCategory::Symbol, "∂", "partial", BASE),
483    CommandEntry::direct("nabla", CommandCategory::Symbol, "∇", "nabla", BASE),
484    CommandEntry::direct("infty", CommandCategory::Symbol, "∞", "infty", BASE),
485    CommandEntry::direct("prime", CommandCategory::Symbol, "′", "prime", BASE),
486    CommandEntry::direct("forall", CommandCategory::Symbol, "∀", "forall", BASE),
487    CommandEntry::direct("exists", CommandCategory::Symbol, "∃", "exists", BASE),
488    CommandEntry::direct("neg", CommandCategory::Symbol, "¬", "neg", BASE),
489    CommandEntry::direct("lnot", CommandCategory::Symbol, "¬", "neg", BASE),
490    CommandEntry::direct("angle", CommandCategory::Symbol, "∠", "angle", BASE),
491    CommandEntry::direct("aleph", CommandCategory::Symbol, "ℵ", "aleph", BASE),
492    CommandEntry::direct("beth", CommandCategory::Symbol, "ℶ", "beth", AMS),
493    CommandEntry::direct("ell", CommandCategory::Symbol, "ℓ", "ell", BASE),
494    CommandEntry::direct("hbar", CommandCategory::Symbol, "ℏ", "hbar", BASE),
495    CommandEntry::direct("imath", CommandCategory::Symbol, "ı", "imath", BASE),
496    CommandEntry::direct("jmath", CommandCategory::Symbol, "ȷ", "jmath", BASE),
497    CommandEntry::direct("Re", CommandCategory::Symbol, "ℜ", "Re", BASE),
498    CommandEntry::direct("Im", CommandCategory::Symbol, "ℑ", "Im", BASE),
499    CommandEntry::direct("wp", CommandCategory::Symbol, "℘", "wp", BASE),
500    CommandEntry::direct("cdots", CommandCategory::Symbol, "⋯", "cdots", BASE),
501    CommandEntry::direct("dots", CommandCategory::Symbol, "…", "cdots", BASE),
502    CommandEntry::direct("square", CommandCategory::Symbol, "□", "square", AMS),
503    CommandEntry::direct("complement", CommandCategory::Symbol, "∁", "complement", AMS),
504    CommandEntry::direct("sharp", CommandCategory::Symbol, "♯", "sharp", BASE),
505    CommandEntry::direct("flat", CommandCategory::Symbol, "♭", "flat", BASE),
506    CommandEntry::direct("natural", CommandCategory::Symbol, "♮", "natural", BASE),
507    CommandEntry::direct("wr", CommandCategory::BinaryOperator, "≀", "wr", BASE),
508    // Function names.
509    CommandEntry::direct("sin", CommandCategory::Function, "sin", "sin", BASE),
510    CommandEntry::direct("cos", CommandCategory::Function, "cos", "cos", BASE),
511    CommandEntry::direct("tan", CommandCategory::Function, "tan", "tan", BASE),
512    CommandEntry::direct("cot", CommandCategory::Function, "cot", "cot", BASE),
513    CommandEntry::direct("sec", CommandCategory::Function, "sec", "sec", BASE),
514    CommandEntry::direct("csc", CommandCategory::Function, "csc", "csc", BASE),
515    CommandEntry::direct("arcsin", CommandCategory::Function, "arcsin", "arcsin", BASE),
516    CommandEntry::direct("arccos", CommandCategory::Function, "arccos", "arccos", BASE),
517    CommandEntry::direct("arctan", CommandCategory::Function, "arctan", "arctan", BASE),
518    CommandEntry::direct("exp", CommandCategory::Function, "exp", "exp", BASE),
519    CommandEntry::direct("log", CommandCategory::Function, "log", "log", BASE),
520    CommandEntry::direct("ln", CommandCategory::Function, "ln", "ln", BASE),
521    CommandEntry::direct("lim", CommandCategory::Function, "lim", "lim", BASE),
522    CommandEntry::direct("arg", CommandCategory::Function, "arg", "arg", BASE),
523    CommandEntry::direct("det", CommandCategory::Function, "det", "det", BASE),
524    CommandEntry::direct("dim", CommandCategory::Function, "dim", "dim", BASE),
525    CommandEntry::direct("ker", CommandCategory::Function, "ker", "ker", BASE),
526    CommandEntry::direct("im", CommandCategory::Function, "im", "im", BASE),
527    CommandEntry::direct("coker", CommandCategory::Function, "coker", "coker", AMS),
528    CommandEntry::direct("hom", CommandCategory::Function, "hom", "hom", BASE),
529    CommandEntry::direct("min", CommandCategory::Function, "min", "min", BASE),
530    CommandEntry::direct("max", CommandCategory::Function, "max", "max", BASE),
531    CommandEntry::direct("sup", CommandCategory::Function, "sup", "sup", BASE),
532    CommandEntry::direct("inf", CommandCategory::Function, "inf", "inf", BASE),
533    // Spacing and invisible commands.
534    CommandEntry::no_output(",", CommandCategory::Spacing, BASE),
535    CommandEntry::no_output(":", CommandCategory::Spacing, BASE),
536    CommandEntry::no_output(";", CommandCategory::Spacing, BASE),
537    CommandEntry::no_output("!", CommandCategory::Spacing, BASE),
538    CommandEntry::no_output(" ", CommandCategory::Spacing, BASE),
539    CommandEntry::no_output("quad", CommandCategory::Spacing, BASE),
540    CommandEntry::no_output("qquad", CommandCategory::Spacing, BASE),
541    // Font/style commands. Actual alphabet mapping is handled by a later pass.
542    CommandEntry::parsed("mathbb", CommandCategory::Font, ArgumentShape::OneRequired, AMS),
543    CommandEntry::parsed("mathcal", CommandCategory::Font, ArgumentShape::OneRequired, BASE),
544    CommandEntry::parsed("mathfrak", CommandCategory::Font, ArgumentShape::OneRequired, AMS),
545    CommandEntry::parsed("mathrm", CommandCategory::Font, ArgumentShape::OneRequired, BASE),
546    CommandEntry::parsed("mathbf", CommandCategory::Font, ArgumentShape::OneRequired, BASE),
547    CommandEntry::parsed("mathit", CommandCategory::Font, ArgumentShape::OneRequired, BASE),
548    CommandEntry::parsed("mathsf", CommandCategory::Font, ArgumentShape::OneRequired, BASE),
549    CommandEntry::parsed("mathtt", CommandCategory::Font, ArgumentShape::OneRequired, BASE),
550    CommandEntry::parsed(
551        "operatorname",
552        CommandCategory::Function,
553        ArgumentShape::OneRequired,
554        BASE,
555    ),
556    // Environments used by the parser.
557    CommandEntry::parsed(
558        "matrix",
559        CommandCategory::Environment,
560        ArgumentShape::EnvironmentBody,
561        BASE,
562    ),
563    CommandEntry::parsed(
564        "pmatrix",
565        CommandCategory::Environment,
566        ArgumentShape::EnvironmentBody,
567        AMS,
568    ),
569    CommandEntry::parsed(
570        "bmatrix",
571        CommandCategory::Environment,
572        ArgumentShape::EnvironmentBody,
573        AMS,
574    ),
575    CommandEntry::parsed(
576        "Bmatrix",
577        CommandCategory::Environment,
578        ArgumentShape::EnvironmentBody,
579        AMS,
580    ),
581    CommandEntry::parsed(
582        "vmatrix",
583        CommandCategory::Environment,
584        ArgumentShape::EnvironmentBody,
585        AMS,
586    ),
587    CommandEntry::parsed(
588        "Vmatrix",
589        CommandCategory::Environment,
590        ArgumentShape::EnvironmentBody,
591        AMS,
592    ),
593    CommandEntry::parsed(
594        "array",
595        CommandCategory::Environment,
596        ArgumentShape::EnvironmentBody,
597        BASE,
598    ),
599    CommandEntry::parsed(
600        "cases",
601        CommandCategory::Environment,
602        ArgumentShape::EnvironmentBody,
603        AMS,
604    ),
605    CommandEntry::parsed(
606        "aligned",
607        CommandCategory::Environment,
608        ArgumentShape::EnvironmentBody,
609        AMS,
610    ),
611    CommandEntry::parsed(
612        "split",
613        CommandCategory::Environment,
614        ArgumentShape::EnvironmentBody,
615        AMS,
616    ),
617    // Known MathJax commands outside this Unicode subset.
618    CommandEntry::unsupported(
619        "newcommand",
620        CommandCategory::Unsupported,
621        ArgumentShape::Variable,
622        BASE,
623    ),
624    CommandEntry::unsupported(
625        "renewcommand",
626        CommandCategory::Unsupported,
627        ArgumentShape::Variable,
628        BASE,
629    ),
630    CommandEntry::unsupported("def", CommandCategory::Unsupported, ArgumentShape::Variable, BASE),
631    CommandEntry::unsupported("let", CommandCategory::Unsupported, ArgumentShape::Variable, BASE),
632    CommandEntry::unsupported(
633        "require",
634        CommandCategory::Unsupported,
635        ArgumentShape::OneRequired,
636        BASE,
637    ),
638    CommandEntry::unsupported("color", CommandCategory::Unsupported, ArgumentShape::Variable, BASE),
639    CommandEntry::unsupported("href", CommandCategory::Unsupported, ArgumentShape::TwoRequired, BASE),
640    CommandEntry::unsupported("class", CommandCategory::Unsupported, ArgumentShape::TwoRequired, BASE),
641    CommandEntry::unsupported("style", CommandCategory::Unsupported, ArgumentShape::TwoRequired, BASE),
642    CommandEntry::unsupported("text", CommandCategory::Unsupported, ArgumentShape::OneRequired, BASE),
643    CommandEntry::unsupported("mbox", CommandCategory::Unsupported, ArgumentShape::OneRequired, BASE),
644    CommandEntry::unsupported(
645        "cancel",
646        CommandCategory::Unsupported,
647        ArgumentShape::OneRequired,
648        "cancel",
649    ),
650    CommandEntry::unsupported(
651        "bcancel",
652        CommandCategory::Unsupported,
653        ArgumentShape::OneRequired,
654        "cancel",
655    ),
656    CommandEntry::unsupported(
657        "xcancel",
658        CommandCategory::Unsupported,
659        ArgumentShape::OneRequired,
660        "cancel",
661    ),
662    CommandEntry::unsupported(
663        "enclose",
664        CommandCategory::Unsupported,
665        ArgumentShape::Variable,
666        "enclose",
667    ),
668    CommandEntry::unsupported(
669        "Aboxed",
670        CommandCategory::Unsupported,
671        ArgumentShape::OneRequired,
672        MATHTOOLS,
673    ),
674    CommandEntry::unsupported("bbox", CommandCategory::Unsupported, ArgumentShape::Variable, "bbox"),
675    CommandEntry::unsupported(
676        "unicode",
677        CommandCategory::Unsupported,
678        ArgumentShape::OneRequired,
679        TEXT_BASE,
680    ),
681];
682
683const SUPERSCRIPTS: &[(char, char, &str)] = &[
684    ('0', '⁰', "0"),
685    ('1', '¹', "1"),
686    ('2', '²', "2"),
687    ('3', '³', "3"),
688    ('4', '⁴', "4"),
689    ('5', '⁵', "5"),
690    ('6', '⁶', "6"),
691    ('7', '⁷', "7"),
692    ('8', '⁸', "8"),
693    ('9', '⁹', "9"),
694    ('+', '⁺', "+"),
695    ('=', '⁼', "="),
696    ('(', '⁽', "("),
697    (')', '⁾', ")"),
698    ('a', 'ᵃ', "a"),
699    ('b', 'ᵇ', "b"),
700    ('c', 'ᶜ', "c"),
701    ('d', 'ᵈ', "d"),
702    ('e', 'ᵉ', "e"),
703    ('f', 'ᶠ', "f"),
704    ('g', 'ᵍ', "g"),
705    ('h', 'ʰ', "h"),
706    ('n', 'ⁿ', "n"),
707    ('i', 'ⁱ', "i"),
708    ('j', 'ʲ', "j"),
709    ('k', 'ᵏ', "k"),
710    ('l', 'ˡ', "l"),
711    ('m', 'ᵐ', "m"),
712    ('o', 'ᵒ', "o"),
713    ('p', 'ᵖ', "p"),
714    ('r', 'ʳ', "r"),
715    ('s', 'ˢ', "s"),
716    ('t', 'ᵗ', "t"),
717    ('u', 'ᵘ', "u"),
718    ('v', 'ᵛ', "v"),
719    ('w', 'ʷ', "w"),
720    ('x', 'ˣ', "x"),
721    ('y', 'ʸ', "y"),
722    ('z', 'ᶻ', "z"),
723    ('A', 'ᴬ', "A"),
724    ('B', 'ᴮ', "B"),
725    ('D', 'ᴰ', "D"),
726    ('E', 'ᴱ', "E"),
727    ('G', 'ᴳ', "G"),
728    ('H', 'ᴴ', "H"),
729    ('I', 'ᴵ', "I"),
730    ('J', 'ᴶ', "J"),
731    ('K', 'ᴷ', "K"),
732    ('L', 'ᴸ', "L"),
733    ('M', 'ᴹ', "M"),
734    ('N', 'ᴺ', "N"),
735    ('O', 'ᴼ', "O"),
736    ('P', 'ᴾ', "P"),
737    ('R', 'ᴿ', "R"),
738    ('T', 'ᵀ', "T"),
739    ('U', 'ᵁ', "U"),
740    ('V', 'ⱽ', "V"),
741    ('W', 'ᵂ', "W"),
742    ('-', '⁻', "-"),
743];
744
745const SUBSCRIPTS: &[(char, char, &str)] = &[
746    ('0', '₀', "0"),
747    ('1', '₁', "1"),
748    ('2', '₂', "2"),
749    ('3', '₃', "3"),
750    ('4', '₄', "4"),
751    ('5', '₅', "5"),
752    ('6', '₆', "6"),
753    ('7', '₇', "7"),
754    ('8', '₈', "8"),
755    ('9', '₉', "9"),
756    ('+', '₊', "+"),
757    ('-', '₋', "-"),
758    ('=', '₌', "="),
759    ('(', '₍', "("),
760    (')', '₎', ")"),
761    ('a', 'ₐ', "a"),
762    ('e', 'ₑ', "e"),
763    ('h', 'ₕ', "h"),
764    ('j', 'ⱼ', "j"),
765    ('k', 'ₖ', "k"),
766    ('l', 'ₗ', "l"),
767    ('m', 'ₘ', "m"),
768    ('n', 'ₙ', "n"),
769    ('o', 'ₒ', "o"),
770    ('p', 'ₚ', "p"),
771    ('r', 'ᵣ', "r"),
772    ('s', 'ₛ', "s"),
773    ('t', 'ₜ', "t"),
774    ('u', 'ᵤ', "u"),
775    ('v', 'ᵥ', "v"),
776    ('x', 'ₓ', "x"),
777    ('i', 'ᵢ', "i"),
778];
779
780/// Look up a MathJax-style command by name without a leading backslash.
781#[must_use]
782pub fn lookup_command(name: &str) -> Option<CommandInfo> {
783    COMMANDS
784        .iter()
785        .find_map(|entry| (entry.name == name).then(|| entry.info()))
786}
787
788/// Return whether a command is known but outside mdwright's Unicode subset.
789#[must_use]
790pub fn is_known_unsupported_command(name: &str) -> bool {
791    lookup_command(name).is_some_and(|info| info.support() == SupportStatus::Unsupported)
792}
793
794/// Return the Unicode symbol for a direct LaTeX command.
795#[must_use]
796pub fn latex_symbol(name: &str) -> Option<&'static str> {
797    lookup_command(name).and_then(|info| {
798        (info.support() == SupportStatus::DirectUnicode)
799            .then(|| info.unicode())
800            .flatten()
801    })
802}
803
804/// Return one preferred LaTeX command name for a Unicode symbol.
805#[must_use]
806pub fn unicode_symbol_latex(symbol: &str) -> Option<&'static str> {
807    COMMANDS
808        .iter()
809        .find_map(|entry| (entry.unicode == Some(symbol) && entry.preferred == entry.name).then_some(entry.preferred))
810}
811
812pub(crate) fn unicode_symbol_latex_source(symbol: &str) -> Option<LatexSourceFragment> {
813    match symbol {
814        "−" => Some(LatexSourceFragment::Raw("-")),
815        "·" | "⋅" => Some(LatexSourceFragment::Command("cdot")),
816        "…" => Some(LatexSourceFragment::Command("cdots")),
817        "ℎ" => Some(LatexSourceFragment::Raw("h")),
818        "ℴ" => Some(LatexSourceFragment::Raw("o")),
819        "\u{227A}\u{0338}" => Some(LatexSourceFragment::Command("nprec")),
820        "\u{2A7D}\u{0338}" => Some(LatexSourceFragment::Command("nleqslant")),
821        "⥲" => Some(LatexSourceFragment::Raw(r"\xrightarrow{\sim}")),
822        "⤏" => Some(LatexSourceFragment::Command("dashrightarrow")),
823        _ => unicode_symbol_latex(symbol).map(LatexSourceFragment::Command),
824    }
825}
826
827pub(crate) fn unicode_math_alphabet_char(ch: char) -> Option<MathAlphabetChar> {
828    legacy_math_alphabet_char(ch)
829        .or_else(|| latin_math_alphabet_char(ch))
830        .or_else(|| digit_math_alphabet_char(ch))
831        .or_else(|| greek_math_alphabet_char(ch))
832}
833
834pub(crate) fn math_alphabet_latex_command(style: MathAlphabetStyle) -> Option<&'static str> {
835    match style {
836        MathAlphabetStyle::Bold => Some("mathbf"),
837        MathAlphabetStyle::Italic | MathAlphabetStyle::BoldItalic => Some("mathit"),
838        MathAlphabetStyle::Script | MathAlphabetStyle::BoldScript => Some("mathcal"),
839        MathAlphabetStyle::Fraktur | MathAlphabetStyle::BoldFraktur => Some("mathfrak"),
840        MathAlphabetStyle::DoubleStruck => Some("mathbb"),
841        MathAlphabetStyle::Sans
842        | MathAlphabetStyle::SansBold
843        | MathAlphabetStyle::SansItalic
844        | MathAlphabetStyle::SansBoldItalic => Some("mathsf"),
845        MathAlphabetStyle::Monospace => Some("mathtt"),
846    }
847}
848
849// Word normalization is registry data rather than xtask migration policy or
850// parser branches. The parser owns source shape; this table owns the narrower
851// vocabulary decision that a word is a mathematical operator.
852const BUILT_IN_OPERATOR_WORDS: &[(&str, &str)] = &[
853    ("log", "log"),
854    ("sin", "sin"),
855    ("cos", "cos"),
856    ("tan", "tan"),
857    ("exp", "exp"),
858    ("dim", "dim"),
859    ("ker", "ker"),
860    ("im", "im"),
861    ("coker", "coker"),
862    ("lim", "lim"),
863    ("sup", "sup"),
864    ("inf", "inf"),
865    ("max", "max"),
866    ("min", "min"),
867];
868
869const NAMED_OPERATOR_WORDS: &[(&str, &str)] = &[
870    ("Spec", r"\operatorname{Spec}"),
871    ("Proj", r"\operatorname{Proj}"),
872    ("Hom", r"\operatorname{Hom}"),
873    ("End", r"\operatorname{End}"),
874    ("Aut", r"\operatorname{Aut}"),
875    ("Gal", r"\operatorname{Gal}"),
876    ("Pic", r"\operatorname{Pic}"),
877    ("Div", r"\operatorname{Div}"),
878    ("Der", r"\operatorname{Der}"),
879    ("Idem", r"\operatorname{Idem}"),
880    ("Frob", r"\operatorname{Frob}"),
881    ("LabCusp", r"\operatorname{LabCusp}"),
882];
883
884pub(crate) fn operator_word_latex_source(word: &str) -> Option<OperatorWordInfo> {
885    if let Some((_word, command)) = BUILT_IN_OPERATOR_WORDS
886        .iter()
887        .find(|(candidate, _command)| *candidate == word)
888    {
889        return Some(OperatorWordInfo {
890            source: LatexSourceFragment::Command(command),
891            kind: OperatorWordKind::BuiltInCommand,
892        });
893    }
894    NAMED_OPERATOR_WORDS
895        .iter()
896        .find(|(candidate, _source)| *candidate == word)
897        .map(|(_word, source)| OperatorWordInfo {
898            source: LatexSourceFragment::Raw(source),
899            kind: OperatorWordKind::OperatorName,
900        })
901}
902
903pub(crate) fn styled_operator_word_latex_source(style: MathAlphabetStyle, base: &str) -> Option<OperatorWordInfo> {
904    match style {
905        MathAlphabetStyle::Script | MathAlphabetStyle::BoldScript => operator_word_latex_source(base),
906        MathAlphabetStyle::Bold
907        | MathAlphabetStyle::Italic
908        | MathAlphabetStyle::BoldItalic
909        | MathAlphabetStyle::Fraktur
910        | MathAlphabetStyle::DoubleStruck
911        | MathAlphabetStyle::BoldFraktur
912        | MathAlphabetStyle::Sans
913        | MathAlphabetStyle::SansBold
914        | MathAlphabetStyle::SansItalic
915        | MathAlphabetStyle::SansBoldItalic
916        | MathAlphabetStyle::Monospace => None,
917    }
918}
919
920fn legacy_math_alphabet_char(ch: char) -> Option<MathAlphabetChar> {
921    let (style, base) = match ch {
922        'ℂ' => (MathAlphabetStyle::DoubleStruck, 'C'),
923        'ℍ' => (MathAlphabetStyle::DoubleStruck, 'H'),
924        'ℕ' => (MathAlphabetStyle::DoubleStruck, 'N'),
925        'ℙ' => (MathAlphabetStyle::DoubleStruck, 'P'),
926        'ℚ' => (MathAlphabetStyle::DoubleStruck, 'Q'),
927        'ℝ' => (MathAlphabetStyle::DoubleStruck, 'R'),
928        'ℤ' => (MathAlphabetStyle::DoubleStruck, 'Z'),
929        'ℬ' => (MathAlphabetStyle::Script, 'B'),
930        'ℰ' => (MathAlphabetStyle::Script, 'E'),
931        'ℱ' => (MathAlphabetStyle::Script, 'F'),
932        'ℋ' => (MathAlphabetStyle::Script, 'H'),
933        'ℐ' => (MathAlphabetStyle::Script, 'I'),
934        'ℒ' => (MathAlphabetStyle::Script, 'L'),
935        'ℳ' => (MathAlphabetStyle::Script, 'M'),
936        'ℛ' => (MathAlphabetStyle::Script, 'R'),
937        'ℯ' => (MathAlphabetStyle::Script, 'e'),
938        'ℭ' => (MathAlphabetStyle::Fraktur, 'C'),
939        'ℌ' => (MathAlphabetStyle::Fraktur, 'H'),
940        'ℑ' => (MathAlphabetStyle::Fraktur, 'I'),
941        'ℜ' => (MathAlphabetStyle::Fraktur, 'R'),
942        'ℨ' => (MathAlphabetStyle::Fraktur, 'Z'),
943        _ => return None,
944    };
945    Some(MathAlphabetChar { style, base })
946}
947
948fn latin_math_alphabet_char(ch: char) -> Option<MathAlphabetChar> {
949    const LATIN_RANGES: &[(u32, MathAlphabetStyle, char, usize)] = &[
950        (0x1d400, MathAlphabetStyle::Bold, 'A', 26),
951        (0x1d41a, MathAlphabetStyle::Bold, 'a', 26),
952        (0x1d434, MathAlphabetStyle::Italic, 'A', 26),
953        (0x1d44e, MathAlphabetStyle::Italic, 'a', 26),
954        (0x1d468, MathAlphabetStyle::BoldItalic, 'A', 26),
955        (0x1d482, MathAlphabetStyle::BoldItalic, 'a', 26),
956        (0x1d49c, MathAlphabetStyle::Script, 'A', 26),
957        (0x1d4b6, MathAlphabetStyle::Script, 'a', 26),
958        (0x1d4d0, MathAlphabetStyle::BoldScript, 'A', 26),
959        (0x1d4ea, MathAlphabetStyle::BoldScript, 'a', 26),
960        (0x1d504, MathAlphabetStyle::Fraktur, 'A', 26),
961        (0x1d51e, MathAlphabetStyle::Fraktur, 'a', 26),
962        (0x1d538, MathAlphabetStyle::DoubleStruck, 'A', 26),
963        (0x1d552, MathAlphabetStyle::DoubleStruck, 'a', 26),
964        (0x1d56c, MathAlphabetStyle::BoldFraktur, 'A', 26),
965        (0x1d586, MathAlphabetStyle::BoldFraktur, 'a', 26),
966        (0x1d5a0, MathAlphabetStyle::Sans, 'A', 26),
967        (0x1d5ba, MathAlphabetStyle::Sans, 'a', 26),
968        (0x1d5d4, MathAlphabetStyle::SansBold, 'A', 26),
969        (0x1d5ee, MathAlphabetStyle::SansBold, 'a', 26),
970        (0x1d608, MathAlphabetStyle::SansItalic, 'A', 26),
971        (0x1d622, MathAlphabetStyle::SansItalic, 'a', 26),
972        (0x1d63c, MathAlphabetStyle::SansBoldItalic, 'A', 26),
973        (0x1d656, MathAlphabetStyle::SansBoldItalic, 'a', 26),
974        (0x1d670, MathAlphabetStyle::Monospace, 'A', 26),
975        (0x1d68a, MathAlphabetStyle::Monospace, 'a', 26),
976    ];
977    for &(start, style, base, len) in LATIN_RANGES {
978        if let Some(mapped) = offset_char(ch, start, base, len) {
979            return Some(MathAlphabetChar { style, base: mapped });
980        }
981    }
982    None
983}
984
985fn digit_math_alphabet_char(ch: char) -> Option<MathAlphabetChar> {
986    const DIGIT_RANGES: &[(u32, MathAlphabetStyle)] = &[
987        (0x1d7ce, MathAlphabetStyle::Bold),
988        (0x1d7d8, MathAlphabetStyle::DoubleStruck),
989        (0x1d7e2, MathAlphabetStyle::Sans),
990        (0x1d7ec, MathAlphabetStyle::SansBold),
991        (0x1d7f6, MathAlphabetStyle::Monospace),
992    ];
993    for &(start, style) in DIGIT_RANGES {
994        if let Some(base) = offset_char(ch, start, '0', 10) {
995            return Some(MathAlphabetChar { style, base });
996        }
997    }
998    None
999}
1000
1001fn greek_math_alphabet_char(ch: char) -> Option<MathAlphabetChar> {
1002    const GREEK_BOLD_UPPER: &[char] = &[
1003        'Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Ρ', 'ϴ', 'Σ', 'Τ', 'Υ', 'Φ',
1004        'Χ', 'Ψ', 'Ω',
1005    ];
1006    const GREEK_BOLD_LOWER: &[char] = &[
1007        '∇', 'α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 'θ', 'ι', 'κ', 'λ', 'μ', 'ν', 'ξ', 'ο', 'π', 'ρ', 'ς', 'σ', 'τ', 'υ',
1008        'φ', 'χ', 'ψ', 'ω',
1009    ];
1010    greek_range(ch, 0x1d6a8, MathAlphabetStyle::Bold, GREEK_BOLD_UPPER)
1011        .or_else(|| greek_range(ch, 0x1d6c2, MathAlphabetStyle::Bold, GREEK_BOLD_LOWER))
1012        .or_else(|| greek_range(ch, 0x1d6e2, MathAlphabetStyle::Italic, GREEK_BOLD_UPPER))
1013        .or_else(|| greek_range(ch, 0x1d6fc, MathAlphabetStyle::Italic, GREEK_BOLD_LOWER))
1014        .or_else(|| greek_range(ch, 0x1d71c, MathAlphabetStyle::BoldItalic, GREEK_BOLD_UPPER))
1015        .or_else(|| greek_range(ch, 0x1d736, MathAlphabetStyle::BoldItalic, GREEK_BOLD_LOWER))
1016        .or_else(|| greek_range(ch, 0x1d756, MathAlphabetStyle::SansBold, GREEK_BOLD_UPPER))
1017        .or_else(|| greek_range(ch, 0x1d770, MathAlphabetStyle::SansBold, GREEK_BOLD_LOWER))
1018        .or_else(|| greek_range(ch, 0x1d790, MathAlphabetStyle::SansBoldItalic, GREEK_BOLD_UPPER))
1019        .or_else(|| greek_range(ch, 0x1d7aa, MathAlphabetStyle::SansBoldItalic, GREEK_BOLD_LOWER))
1020}
1021
1022fn greek_range(ch: char, start: u32, style: MathAlphabetStyle, bases: &'static [char]) -> Option<MathAlphabetChar> {
1023    let value = u32::from(ch);
1024    let offset = value.checked_sub(start)?;
1025    let index = usize::try_from(offset).ok()?;
1026    let base = bases.get(index).copied()?;
1027    Some(MathAlphabetChar { style, base })
1028}
1029
1030fn offset_char(ch: char, start: u32, base: char, len: usize) -> Option<char> {
1031    let value = u32::from(ch);
1032    let offset = value.checked_sub(start)?;
1033    let index = usize::try_from(offset).ok()?;
1034    if index >= len {
1035        return None;
1036    }
1037    let base_value = u32::from(base);
1038    let mapped = base_value.checked_add(offset)?;
1039    char::from_u32(mapped)
1040}
1041
1042/// Unicode superscript for a single ASCII character.
1043#[must_use]
1044pub fn unicode_super(c: char) -> Option<char> {
1045    SUPERSCRIPTS
1046        .iter()
1047        .find_map(|(source, rendered, _latex)| (*source == c).then_some(*rendered))
1048}
1049
1050/// Unicode subscript for a single ASCII character.
1051#[must_use]
1052pub fn unicode_sub(c: char) -> Option<char> {
1053    SUBSCRIPTS
1054        .iter()
1055        .find_map(|(source, rendered, _latex)| (*source == c).then_some(*rendered))
1056}
1057
1058/// Render a whole script string as Unicode superscript.
1059#[must_use]
1060pub fn unicode_super_str(s: &str) -> Option<String> {
1061    s.chars().map(unicode_super).collect()
1062}
1063
1064/// Render a whole script string as Unicode subscript.
1065#[must_use]
1066pub fn unicode_sub_str(s: &str) -> Option<String> {
1067    s.chars().map(unicode_sub).collect()
1068}
1069
1070/// Preferred ASCII source for one Unicode superscript character.
1071#[must_use]
1072pub fn unicode_super_latex(c: char) -> Option<&'static str> {
1073    if c == '°' {
1074        return Some(r"\circ");
1075    }
1076    SUPERSCRIPTS
1077        .iter()
1078        .find_map(|(_source, rendered, latex)| (*rendered == c).then_some(*latex))
1079}
1080
1081/// Preferred ASCII source for one Unicode subscript character.
1082#[must_use]
1083pub fn unicode_sub_latex(c: char) -> Option<&'static str> {
1084    SUBSCRIPTS
1085        .iter()
1086        .find_map(|(_source, rendered, latex)| (*rendered == c).then_some(*latex))
1087}
1088
1089#[cfg(test)]
1090mod tests {
1091    #![allow(clippy::panic, reason = "registry tests fail with direct invariant context")]
1092
1093    use super::*;
1094
1095    fn command(name: &str) -> CommandInfo {
1096        lookup_command(name).unwrap_or_else(|| panic!("missing registry command {name}"))
1097    }
1098
1099    #[test]
1100    fn direct_symbol_lookup_covers_mathjax_style_categories() {
1101        assert_eq!(latex_symbol("alpha"), Some("α"));
1102        assert_eq!(latex_symbol("otimes"), Some("⊗"));
1103        assert_eq!(latex_symbol("le"), Some("≤"));
1104        assert_eq!(latex_symbol("rightarrow"), Some("→"));
1105        assert_eq!(latex_symbol("sum"), Some("∑"));
1106        assert_eq!(latex_symbol("langle"), Some("⟨"));
1107    }
1108
1109    #[test]
1110    fn alias_lookup_keeps_preferred_reverse_spelling() {
1111        let le = command("le");
1112        assert_eq!(le.unicode(), Some("≤"));
1113        assert_eq!(le.preferred(), "leq");
1114        assert_eq!(unicode_symbol_latex("≤"), Some("leq"));
1115        assert_eq!(unicode_symbol_latex("∅"), Some("emptyset"));
1116        assert_eq!(
1117            unicode_symbol_latex_source("≤"),
1118            Some(LatexSourceFragment::Command("leq"))
1119        );
1120        assert_eq!(unicode_symbol_latex_source("−"), Some(LatexSourceFragment::Raw("-")));
1121        assert_eq!(
1122            unicode_symbol_latex_source("⥲"),
1123            Some(LatexSourceFragment::Raw(r"\xrightarrow{\sim}"))
1124        );
1125        assert_eq!(
1126            unicode_symbol_latex_source("\u{227A}\u{0338}"),
1127            Some(LatexSourceFragment::Command("nprec"))
1128        );
1129        assert_eq!(
1130            unicode_symbol_latex_source("\u{2A7D}\u{0338}"),
1131            Some(LatexSourceFragment::Command("nleqslant"))
1132        );
1133        assert_eq!(
1134            unicode_symbol_latex_source("⤏"),
1135            Some(LatexSourceFragment::Command("dashrightarrow"))
1136        );
1137    }
1138
1139    #[test]
1140    fn operator_word_lookup_keeps_semantic_vocabulary_in_registry() {
1141        let log = operator_word_latex_source("log").unwrap_or_else(|| panic!("missing log operator entry"));
1142        assert_eq!(log.source, LatexSourceFragment::Command("log"));
1143        assert_eq!(log.kind, OperatorWordKind::BuiltInCommand);
1144
1145        let spec = operator_word_latex_source("Spec").unwrap_or_else(|| panic!("missing Spec operator entry"));
1146        assert_eq!(spec.source, LatexSourceFragment::Raw(r"\operatorname{Spec}"));
1147        assert_eq!(spec.kind, OperatorWordKind::OperatorName);
1148
1149        let styled_hom = styled_operator_word_latex_source(MathAlphabetStyle::Script, "Hom")
1150            .unwrap_or_else(|| panic!("missing script Hom semantic entry"));
1151        assert_eq!(styled_hom.source, LatexSourceFragment::Raw(r"\operatorname{Hom}"));
1152
1153        assert_eq!(operator_word_latex_source("Thing"), None);
1154        assert_eq!(
1155            styled_operator_word_latex_source(MathAlphabetStyle::Fraktur, "Spec"),
1156            None
1157        );
1158    }
1159
1160    #[test]
1161    fn registry_distinguishes_parsed_and_unsupported_commands() {
1162        let frac = command("frac");
1163        assert_eq!(frac.support(), SupportStatus::ParsedConstruct);
1164        assert_eq!(frac.arguments(), ArgumentShape::TwoRequired);
1165
1166        let color = command("color");
1167        assert_eq!(color.support(), SupportStatus::Unsupported);
1168        assert!(is_known_unsupported_command("color"));
1169    }
1170
1171    #[test]
1172    fn environments_are_registry_entries_not_parser_strings_only() {
1173        let matrix = command("matrix");
1174        assert_eq!(matrix.category(), CommandCategory::Environment);
1175        assert_eq!(matrix.arguments(), ArgumentShape::EnvironmentBody);
1176    }
1177
1178    #[test]
1179    fn script_maps_support_forward_and_reverse_lookup() {
1180        assert_eq!(unicode_super_str("-1"), Some("⁻¹".to_owned()));
1181        assert_eq!(unicode_super_str("n"), Some("ⁿ".to_owned()));
1182        assert_eq!(unicode_sub_str("i"), Some("ᵢ".to_owned()));
1183        assert_eq!(unicode_sub_str("x"), Some("ₓ".to_owned()));
1184        assert_eq!(unicode_super_latex('⁻'), Some("-"));
1185        assert_eq!(unicode_sub_latex('ᵢ'), Some("i"));
1186    }
1187}