panmath/
symbols.rs

1//! This module provides an ergonomic way of defining new symbols and building up a library of
2//! mathematical symbols to recognize and parse without custom specifications. Specifically, the
3//! approach this module takes is to split symbols into several distinct types that share a common
4//! structure, and then to implement generic Symbol conversions for those more specific types. This
5//! gives us the flexibility of Symbol when we need it, but allows us to save a lot of boilerplate
6//! when defining, for instance, every single trig function or letter.
7
8use crate::ast::Symbol;
9
10use std::collections::{BTreeMap, HashMap};
11use std::string::ToString;
12use strum::{EnumProperty, IntoEnumIterator};
13use strum_macros::{Display, EnumIter, EnumProperty, EnumString};
14
15/// A special function. These all share a couple traits. They have LaTeX commands that write them in
16/// roman type, not italic as is standard for other functions; they otherwise have standard names
17/// without other mathematical expressions; they often drop their parentheses. This last part is not
18/// implemented currently, but might be in the future.
19#[derive(Clone, Debug, Hash, PartialEq, Eq)]
20pub struct SpecialFunction(String);
21
22impl SpecialFunction {
23    /// Produces a symbol for the square of the function.
24    fn square(&self) -> Symbol {
25        Symbol {
26            unicode_repr: format!("{}²", self.0),
27            ascii_repr: format!("{}^2", self.0),
28            latex_repr: format!("\\{}^2", self.0),
29            other_reprs: vec![],
30        }
31    }
32
33    /// Produces a symbol for the inverse of the function.
34    fn inv(&self) -> Symbol {
35        Symbol {
36            unicode_repr: format!("{}⁻¹", self.0),
37            ascii_repr: format!("{}^-1", self.0),
38            latex_repr: format!("\\{}^{{-1}}", self.0),
39            other_reprs: vec![],
40        }
41    }
42}
43
44impl From<SpecialFunction> for Symbol {
45    fn from(func: SpecialFunction) -> Self {
46        Symbol {
47            unicode_repr: func.0.clone(),
48            ascii_repr: func.0.clone(),
49            latex_repr: format!("\\{}", func.0),
50            other_reprs: vec![],
51        }
52    }
53}
54
55/// A Greek letter with both uppercase and lowercase representations.
56#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, EnumString, EnumProperty, Display, EnumIter)]
57pub enum GreekLetter {
58    #[strum(props(Lower = "α", Upper = "Α"))]
59    Alpha,
60    #[strum(props(Lower = "β", Upper = "Β"))]
61    Beta,
62    #[strum(props(Lower = "γ", Upper = "Γ"))]
63    Gamma,
64    #[strum(props(Lower = "δ", Upper = "Δ"))]
65    Delta,
66    #[strum(props(Lower = "ε", Upper = "Ε"))]
67    Epsilon,
68    #[strum(props(Lower = "ζ", Upper = "Ζ"))]
69    Zeta,
70    #[strum(props(Lower = "η", Upper = "Η"))]
71    Eta,
72    #[strum(props(Lower = "θ", Upper = "Θ"))]
73    Theta,
74    #[strum(props(Lower = "ι", Upper = "Ι"))]
75    Iota,
76    #[strum(props(Lower = "κ", Upper = "Κ"))]
77    Kappa,
78    #[strum(props(Lower = "λ", Upper = "Λ"))]
79    Lambda,
80    #[strum(props(Lower = "μ", Upper = "Μ"))]
81    Mu,
82    #[strum(props(Lower = "ν", Upper = "Ν"))]
83    Nu,
84    #[strum(props(Lower = "ξ", Upper = "Ξ"))]
85    Xi,
86    #[strum(props(Lower = "ο", Upper = "Ο"))]
87    Omicron,
88    #[strum(props(Lower = "π", Upper = "Π"))]
89    Pi,
90    #[strum(props(Lower = "ρ", Upper = "Ρ"))]
91    Rho,
92    #[strum(props(Lower = "σ", Upper = "Σ"))]
93    Sigma,
94    #[strum(props(Lower = "τ", Upper = "Τ"))]
95    Tau,
96    #[strum(props(Lower = "υ", Upper = "Υ"))]
97    Upsilon,
98    #[strum(props(Lower = "φ", Upper = "Φ"))]
99    Phi,
100    #[strum(props(Lower = "χ", Upper = "Χ"))]
101    Chi,
102    #[strum(props(Lower = "ψ", Upper = "Ψ"))]
103    Psi,
104    #[strum(props(Lower = "ω", Upper = "Ω"))]
105    Omega,
106}
107
108/// The case of a greek letter: uppercase or lowercase.
109#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, EnumIter)]
110pub enum Case {
111    /// Uppercase.
112    Uppercase,
113    /// Lowercase.
114    Lowercase,
115}
116
117/// A Greek letter with a specified case.
118pub struct CasedGreekLetter {
119    /// The letter.
120    pub letter: GreekLetter,
121
122    /// The letter's case.
123    pub case: Case,
124}
125
126impl From<CasedGreekLetter> for Symbol {
127    fn from(cased: CasedGreekLetter) -> Self {
128        let unicode = match cased.case {
129            Case::Uppercase => cased.letter.get_str("Upper").unwrap(),
130            Case::Lowercase => cased.letter.get_str("Lower").unwrap(),
131        };
132
133        let letter = cased.letter.to_string();
134        let (ascii_start, ascii_rest) = letter.split_at(1);
135        let ascii_name = match cased.case {
136            Case::Uppercase => format!("{}{}", ascii_start.to_uppercase(), ascii_rest),
137            Case::Lowercase => format!("{}{}", ascii_start.to_lowercase(), ascii_rest),
138        };
139
140        Symbol {
141            unicode_repr: unicode.to_string(),
142            ascii_repr: ascii_name.clone(),
143            latex_repr: format!("\\{}", ascii_name),
144            other_reprs: vec![],
145        }
146    }
147}
148
149// General implementation of Symbol for any identifier. Outputs might break if you put in special
150// characters: this is intended to make it easy to get a symbol for x, not to encode some crazy
151// LaTeX thing.
152impl From<String> for Symbol {
153    fn from(sym: String) -> Self {
154        Symbol {
155            unicode_repr: sym.clone(),
156            ascii_repr: sym.clone(),
157            latex_repr: sym.clone(),
158            other_reprs: vec![],
159        }
160    }
161}
162
163/// See From<String>.
164impl From<&str> for Symbol {
165    fn from(sym: &str) -> Self {
166        Symbol {
167            unicode_repr: sym.to_string(),
168            ascii_repr: sym.to_string(),
169            latex_repr: sym.to_string(),
170            other_reprs: vec![],
171        }
172    }
173}
174
175lazy_static! {
176    /// All of the Greek letters, as Symbols that intelligently parse and display. They are keyed by
177    /// their ASCII representation, which is capitalized if the letter is uppercase and lowercase
178    /// otherwise. `Pi` maps to Π, and `pi` maps to π.
179    pub static ref GREEK_SYMBOLS: HashMap<String, Symbol> = {
180        let mut syms: HashMap<String, Symbol> = HashMap::new();
181        for letter in GreekLetter::iter() {
182            for case in Case::iter() {
183                let sym: Symbol = CasedGreekLetter { letter, case }.into();
184                syms.insert(sym.ascii_repr.clone(), sym);
185            }
186        }
187        syms
188    };
189
190    /// All of the Latin symbols that come pre-defined. They're indexed by their ASCII
191    /// representation, which is the only one they have: pretty straightforward.
192    pub static ref LATIN_SYMBOLS: HashMap<String, Symbol> = {
193        let mut syms: HashMap<String, Symbol> = HashMap::new();
194        let alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars();
195        for letter in alphabet {
196            syms.insert(letter.to_string(), letter.to_string().into());
197        }
198        syms
199    };
200
201    /// The special functions that come predefined. These are indexed by their normal name. The
202    /// current special functions are:
203    ///  - `exp`, `log`, `ln`, `lg`
204    ///  - The standard trig functions `sin`, `cos`, `tan`, `sec`, `csc`, `cot`
205    ///  - The inverse functions defined in `amsmath`: `arcsin`, `arccos`, `arctan`
206    ///  - The hyperbolic trig functions defined in `amsmath`: `sinh`, `cosh`, `tanh`, `coth`. Don't
207    ///  ask me why they have four defined, not three or six!
208    ///  - `max`, `min`
209    ///  - `Pr`
210    ///  - `gcd`
211    ///  - `det`, `dim`, `ker`
212    ///  - `inf`, `sup`
213    /// `amsmath` is very inconsistent, as you can see. I've only included the operators that
214    /// might be used in plaintext: limits, for example, aren't parseable using standard function
215    /// syntax.
216    pub static ref SPECIAL_FUNCS: BTreeMap<String, Symbol> = {
217        let mut syms: BTreeMap<String, Symbol> = BTreeMap::new();
218        let names = vec![
219            "exp", "log", "ln", "lg",
220            "sin", "cos", "tan", "sec", "csc", "cot",
221            "arcsin", "arccos", "arctan",
222            "sinh", "cosh", "tanh", "coth",
223            "max", "min",
224            "Pr",
225            "gcd",
226            "det", "dim", "ker",
227            "inf", "sup"
228        ];
229        for name in names {
230            let spf = SpecialFunction(name.to_string());
231            syms.insert(format!("{}^2", name), spf.square());
232            syms.insert(format!("{}^-1", name), spf.inv());
233            syms.insert(name.to_string(), spf.into());
234        }
235        syms
236    };
237
238    /**
239    Unfortunately, the rest of the symbols are a lot more idiosyncratic, without the clearer
240    patterns that allowed me to save a lot of boilerplate. The goal of these miscellaneous
241    symbols is to cover the bases of the most common abbreviations and ASCII versions of common
242    symbols, not to be complete or perfect. This will be updated over time to reflect usage.
243    Because of that, these are provided as individual variables, so you can make sure that
244    you'll get compile errors if you use symbols that don't exist and you can get tab
245    completion.
246     */
247
248    /// The ≤ (less than or equal to) symbol.
249    pub static ref LE: Symbol = Symbol::new("≤", "<=", r"\le", vec![" le"]);
250    /// The ≥ (greater than or equal to) symbol.
251    pub static ref GE: Symbol = Symbol::new("≥", ">=", r"\ge", vec![" ge"]);
252    /// The ≠ (not equal to) symbol.
253    pub static ref NEQ: Symbol = Symbol::new("≠", "!=", r"\neq", vec!["=/=", "/=", "neq"]);
254    /// The + symbol.
255    pub static ref PLUS: Symbol = Symbol::new("+", "+", "+", vec!["plus"]);
256    /// The - symbol.
257    pub static ref MINUS: Symbol = Symbol::new("-", "−", "-", vec!["minus"]);
258    /// The ± (plus or minus) symbol.
259    pub static ref PM: Symbol = Symbol::new("±", "+/-", r"\pm", vec!["+-", "pm"]);
260    /// The exponentiation symbol. This is not the binary XOR function, and is
261    /// also not used generically: exponentiation is special-cased.
262    pub static ref POWER: Symbol = Symbol::new("^", "^", r"\^{}", vec![]);
263
264    /// The division symbol. This is not the set difference or quotient group,
265    /// and generally using fractions is preferred.
266    pub static ref DIV: Symbol = Symbol::new("/", "/", r"/", vec![]);
267
268    // The ∞ (infinity) symbol.
269    pub static ref INF: Symbol = Symbol::new("∞", " inf", r"\infty", vec!["infinity", "oo"]);
270    /// The ∈ (element of) symbol.
271    // the question is whether to add E here so a E A becomes a ∈ A. I think it's about 50/50 in the
272    // server on whether people do this or not, so I've left it out.
273    pub static ref ELEM: Symbol = Symbol::new("∈", " in", r"\in", vec![" elem"]);
274    /// The ∼ (distributed as) symbol.
275    pub static ref SYM: Symbol = Symbol::new("∼", "~", r"\sym", vec![]);
276    /// The ≅ (approximately equal to) symbol.
277    pub static ref APPROX: Symbol = Symbol::new("≅", "~=", r"\approx", vec![]);
278    /// The multiplication symbol, using a dot instead of the times operator.
279    pub static ref MULT: Symbol = Symbol::new("·", "*", r"\cdot", vec![" times", "\times", "×"]);
280    /// The ° (degrees) symbol.
281    pub static ref DEGREE: Symbol = Symbol::new("°", "o", r"^{\circ}", vec!["deg", "degrees"]);
282    /// The left parenthesis `(``.
283    pub static ref LEFT_PAR: Symbol = Symbol::new("(", "(", r"\left(", vec![]);
284    /// The right parenthesis `)``.
285    pub static ref RIGHT_PAR: Symbol = Symbol::new(")", ")", r"\right)", vec![]);
286    /// The left bracket `[``.
287    pub static ref LEFT_BRACKET: Symbol = Symbol::new("[", "[", r"\left[", vec![]);
288    /// The right bracket `]``.
289    pub static ref RIGHT_BRACKET: Symbol = Symbol::new("]", "]", r"\right]", vec![]);
290
291    // The comma symbol, needed for variadic functions.
292    pub static ref COMMA: Symbol = Symbol::from(",");
293
294    // TODO add more
295
296    /// The delimiters.
297    pub static ref DELIMS: Vec<Symbol> = {
298        vec![
299            LEFT_PAR.clone(),
300            RIGHT_PAR.clone()
301        ]
302    };
303
304
305    /// The miscellaneous symbols.
306    pub static ref MISC: Vec<Symbol> = {
307        vec![
308            LE.clone(),
309            GE.clone(),
310            NEQ.clone(),
311            PM.clone(),
312            INF.clone(),
313            ELEM.clone(),
314            SYM.clone(),
315            APPROX.clone(),
316            MULT.clone(),
317            DEGREE.clone(),
318        ]
319    };
320
321
322    /// All of the symbols that come pre-defined. This order controls the preference for parsing: if
323    /// multiple symbols share a representation, the one that comes first matches.
324    pub static ref ALL_SYMBOLS: Vec<Symbol> = {
325        let mut symbols = vec![];
326        for (_k, sym) in GREEK_SYMBOLS.clone().drain() {
327            symbols.push(sym);
328        }
329        for (_k, sym) in LATIN_SYMBOLS.clone().drain() {
330            symbols.push(sym);
331        }
332        for (_k, sym) in SPECIAL_FUNCS.clone().into_iter() {
333            symbols.push(sym);
334        }
335        symbols.extend_from_slice(&*MISC.as_slice());
336        symbols
337    };
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    #[test]
345    fn test_all_greek_letters() {
346        assert_eq!(GREEK_SYMBOLS.len(), 48);
347    }
348
349    #[test]
350    fn test_all_latin_letters() {
351        assert_eq!(LATIN_SYMBOLS.len(), 52);
352    }
353
354    #[test]
355    fn test_greek_letters() {
356        let sym1: Symbol = CasedGreekLetter {
357            letter: GreekLetter::Phi,
358            case: Case::Lowercase,
359        }
360        .into();
361
362        assert_eq!(sym1.unicode_repr, "φ");
363        assert_eq!(sym1.ascii_repr, "phi");
364        assert_eq!(sym1.latex_repr, r"\phi");
365
366        let sym2: Symbol = CasedGreekLetter {
367            letter: GreekLetter::Sigma,
368            case: Case::Uppercase,
369        }
370        .into();
371
372        assert_eq!(sym2.unicode_repr, "Σ");
373        assert_eq!(sym2.ascii_repr, "Sigma");
374        assert_eq!(sym2.latex_repr, r"\Sigma");
375    }
376}