mathhook_core/parser/lexer/
rules.rs

1//! Token classification rules for implicit multiplication
2//!
3//! This module contains the static data and rules that determine when
4//! implicit multiplication should be inserted between tokens.
5
6use std::collections::HashSet;
7use std::sync::LazyLock;
8
9/// Categories of mathematical tokens for implicit multiplication logic
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
11pub enum TokenCategory {
12    Number,
13    Identifier,
14    Constant,
15    GreekSymbol,
16    Function,
17    LeftParen,
18    RightParen,
19    Operator,
20    Other,
21}
22
23/// Mathematical constants that should trigger implicit multiplication
24pub static CONSTANTS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
25    HashSet::from([
26        // Basic constants (as token variants)
27        "PI",
28        "E_CONST",
29        "I_CONST",
30        "INFINITY",
31        "PHI",
32        "GOLDEN_RATIO",
33        "EULER_GAMMA",
34        "GAMMA_CONST",
35        "UNDEFINED",
36        // LaTeX constants
37        "LATEX_PI",
38        "LATEX_PHI",
39        "LATEX_VARPHI",
40        "LATEX_INFTY",
41        "LATEX_EULER_GAMMA",
42        "LATEX_GAMMA", // Gamma function
43        // Wolfram constants/functions that act like constants
44        "WOLFRAM_GAMMA", // Gamma function
45    ])
46});
47
48/// Greek symbols that should trigger implicit multiplication
49pub static GREEK_SYMBOLS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
50    HashSet::from([
51        // LaTeX Greek symbols
52        "LATEX_ALPHA",
53        "LATEX_BETA",
54        "LATEX_DELTA",
55        "LATEX_EPSILON",
56        "LATEX_ZETA",
57        "LATEX_ETA",
58        "LATEX_THETA",
59        "LATEX_IOTA",
60        "LATEX_KAPPA",
61        "LATEX_LAMBDA",
62        "LATEX_MU",
63        "LATEX_NU",
64        "LATEX_XI",
65        "LATEX_OMICRON",
66        "LATEX_RHO",
67        "LATEX_SIGMA",
68        "LATEX_TAU",
69        "LATEX_UPSILON",
70        "LATEX_CHI",
71        "LATEX_PSI",
72        "LATEX_OMEGA",
73        // Wolfram Greek symbols
74        "WOLFRAM_ALPHA",
75        "WOLFRAM_BETA",
76        "WOLFRAM_DELTA",
77        "WOLFRAM_EPSILON",
78        "WOLFRAM_ZETA",
79        "WOLFRAM_ETA",
80        "WOLFRAM_THETA",
81        "WOLFRAM_IOTA",
82        "WOLFRAM_KAPPA",
83        "WOLFRAM_LAMBDA",
84        "WOLFRAM_MU",
85        "WOLFRAM_NU",
86        "WOLFRAM_XI",
87        "WOLFRAM_OMICRON",
88        "WOLFRAM_RHO",
89        "WOLFRAM_SIGMA",
90        "WOLFRAM_TAU",
91        "WOLFRAM_UPSILON",
92        "WOLFRAM_CHI",
93        "WOLFRAM_PSI",
94        "WOLFRAM_OMEGA",
95        // Note: WOLFRAM_GAMMA is in CONSTANTS as it's the Gamma function
96    ])
97});
98
99/// Mathematical functions that should NOT trigger implicit multiplication when followed by parentheses
100pub static FUNCTIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
101    HashSet::from([
102        "sin", "cos", "tan", "sec", "csc", "cot", "sinh", "cosh", "tanh", "sech", "csch", "coth",
103        "arcsin", "arccos", "arctan", "arcsec", "arccsc", "arccot", "asin", "acos", "atan", "asec",
104        "acsc", "acot", "log", "ln", "exp", "sqrt", "abs", "floor", "ceil", "round", "sign", "max",
105        "min", "gcd", "lcm", "gamma", "beta", "zeta", "erf", "erfc", "J", "Y", "I",
106        "K", // Bessel functions
107        "P", "Q", "L", "H", // Legendre, Hermite functions
108        "F", "G", "U", "M", "W", // Hypergeometric, Whittaker functions
109    ])
110});
111
112/// Rules for when implicit multiplication should be inserted
113pub static IMPLICIT_MUL_RULES: LazyLock<HashSet<(TokenCategory, TokenCategory)>> =
114    LazyLock::new(|| {
115        HashSet::from([
116            // Number followed by anything multiplicative
117            (TokenCategory::Number, TokenCategory::Identifier),
118            (TokenCategory::Number, TokenCategory::Constant),
119            (TokenCategory::Number, TokenCategory::GreekSymbol),
120            (TokenCategory::Number, TokenCategory::LeftParen),
121            // Identifier followed by anything multiplicative
122            (TokenCategory::Identifier, TokenCategory::Identifier),
123            (TokenCategory::Identifier, TokenCategory::Constant),
124            (TokenCategory::Identifier, TokenCategory::GreekSymbol),
125            // Constants followed by anything multiplicative
126            (TokenCategory::Constant, TokenCategory::Identifier),
127            (TokenCategory::Constant, TokenCategory::Constant),
128            (TokenCategory::Constant, TokenCategory::GreekSymbol),
129            (TokenCategory::Constant, TokenCategory::LeftParen),
130            // Greek symbols followed by anything multiplicative
131            (TokenCategory::GreekSymbol, TokenCategory::Identifier),
132            (TokenCategory::GreekSymbol, TokenCategory::Constant),
133            (TokenCategory::GreekSymbol, TokenCategory::GreekSymbol),
134            (TokenCategory::GreekSymbol, TokenCategory::LeftParen),
135            // Right parentheses followed by anything multiplicative
136            (TokenCategory::RightParen, TokenCategory::Identifier),
137            (TokenCategory::RightParen, TokenCategory::Constant),
138            (TokenCategory::RightParen, TokenCategory::GreekSymbol),
139            (TokenCategory::RightParen, TokenCategory::LeftParen),
140        ])
141    });