1use crate::{Plugin, ProcessResult};
16use regex::Regex;
17use std::collections::HashMap;
18use std::sync::LazyLock;
19use streamdown_config::ComputedStyle;
20use streamdown_core::state::ParseState;
21
22pub struct LatexPlugin {
24 in_block: bool,
26 buffer: String,
28}
29
30impl LatexPlugin {
31 pub fn new() -> Self {
33 Self {
34 in_block: false,
35 buffer: String::new(),
36 }
37 }
38}
39
40impl Default for LatexPlugin {
41 fn default() -> Self {
42 Self::new()
43 }
44}
45
46impl Plugin for LatexPlugin {
47 fn name(&self) -> &str {
48 "latex"
49 }
50
51 fn process_line(
52 &mut self,
53 line: &str,
54 _state: &ParseState,
55 _style: &ComputedStyle,
56 ) -> Option<ProcessResult> {
57 if !self.in_block && line.contains('$') && !line.contains("$$") {
59 let converted = convert_inline_math(line);
61 if converted != line {
62 return Some(ProcessResult::Lines(vec![converted]));
63 }
64 }
65
66 if !self.in_block {
68 if let Some(idx) = line.find("$$") {
69 self.in_block = true;
70 self.buffer.clear();
71
72 let after = &line[idx + 2..];
74
75 if let Some(end_idx) = after.find("$$") {
77 self.in_block = false;
79 let expr = &after[..end_idx];
80 let converted = latex_to_unicode(expr);
81 return Some(ProcessResult::Lines(vec![converted]));
82 }
83
84 self.buffer.push_str(after);
86 return Some(ProcessResult::Continue);
87 }
88 return None;
89 }
90
91 if let Some(idx) = line.find("$$") {
93 self.in_block = false;
95 self.buffer.push_str(&line[..idx]);
96
97 let converted = latex_to_unicode(&self.buffer);
98 self.buffer.clear();
99
100 return Some(ProcessResult::Lines(vec![converted]));
101 }
102
103 if !self.buffer.is_empty() {
105 self.buffer.push(' ');
106 }
107 self.buffer.push_str(line);
108 Some(ProcessResult::Continue)
109 }
110
111 fn flush(&mut self) -> Option<Vec<String>> {
112 if self.buffer.is_empty() {
113 return None;
114 }
115
116 let result = std::mem::take(&mut self.buffer);
118 self.in_block = false;
119 Some(vec![format!("$$ {} (incomplete)", result)])
120 }
121
122 fn reset(&mut self) {
123 self.in_block = false;
124 self.buffer.clear();
125 }
126
127 fn is_active(&self) -> bool {
128 self.in_block
129 }
130
131 fn priority(&self) -> i32 {
132 10 }
134}
135
136fn convert_inline_math(line: &str) -> String {
138 static INLINE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$([^$]+)\$").unwrap());
139
140 INLINE_RE
141 .replace_all(line, |caps: ®ex::Captures| latex_to_unicode(&caps[1]))
142 .to_string()
143}
144
145pub fn latex_to_unicode(latex: &str) -> String {
147 let mut result = latex.to_string();
148
149 result = convert_commands(&result);
151 result = convert_fractions(&result);
152 result = convert_subscripts(&result);
153 result = convert_superscripts(&result);
154 result = cleanup(&result);
155
156 result
157}
158
159static GREEK_LETTERS: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
161 let mut m = HashMap::new();
162 m.insert("alpha", "α");
164 m.insert("beta", "β");
165 m.insert("gamma", "γ");
166 m.insert("delta", "δ");
167 m.insert("epsilon", "ε");
168 m.insert("varepsilon", "ε");
169 m.insert("zeta", "ζ");
170 m.insert("eta", "η");
171 m.insert("theta", "θ");
172 m.insert("vartheta", "ϑ");
173 m.insert("iota", "ι");
174 m.insert("kappa", "κ");
175 m.insert("lambda", "λ");
176 m.insert("mu", "μ");
177 m.insert("nu", "ν");
178 m.insert("xi", "ξ");
179 m.insert("omicron", "ο");
180 m.insert("pi", "π");
181 m.insert("varpi", "ϖ");
182 m.insert("rho", "ρ");
183 m.insert("varrho", "ϱ");
184 m.insert("sigma", "σ");
185 m.insert("varsigma", "ς");
186 m.insert("tau", "τ");
187 m.insert("upsilon", "υ");
188 m.insert("phi", "φ");
189 m.insert("varphi", "ϕ");
190 m.insert("chi", "χ");
191 m.insert("psi", "ψ");
192 m.insert("omega", "ω");
193 m.insert("Gamma", "Γ");
195 m.insert("Delta", "Δ");
196 m.insert("Theta", "Θ");
197 m.insert("Lambda", "Λ");
198 m.insert("Xi", "Ξ");
199 m.insert("Pi", "Π");
200 m.insert("Sigma", "Σ");
201 m.insert("Upsilon", "Υ");
202 m.insert("Phi", "Φ");
203 m.insert("Psi", "Ψ");
204 m.insert("Omega", "Ω");
205 m
206});
207
208static OPERATORS: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
210 let mut m = HashMap::new();
211 m.insert("sum", "Σ");
212 m.insert("prod", "Π");
213 m.insert("int", "∫");
214 m.insert("iint", "∬");
215 m.insert("iiint", "∭");
216 m.insert("oint", "∮");
217 m.insert("partial", "∂");
218 m.insert("nabla", "∇");
219 m.insert("sqrt", "√");
220 m.insert("cbrt", "∛");
221 m.insert("times", "×");
222 m.insert("div", "÷");
223 m.insert("cdot", "·");
224 m.insert("ast", "∗");
225 m.insert("star", "⋆");
226 m.insert("circ", "∘");
227 m.insert("bullet", "•");
228 m.insert("oplus", "⊕");
229 m.insert("ominus", "⊖");
230 m.insert("otimes", "⊗");
231 m.insert("oslash", "⊘");
232 m.insert("odot", "⊙");
233 m
234});
235
236static RELATIONS: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
238 let mut m = HashMap::new();
239 m.insert("leq", "≤");
240 m.insert("le", "≤");
241 m.insert("geq", "≥");
242 m.insert("ge", "≥");
243 m.insert("neq", "≠");
244 m.insert("ne", "≠");
245 m.insert("approx", "≈");
246 m.insert("equiv", "≡");
247 m.insert("sim", "∼");
248 m.insert("simeq", "≃");
249 m.insert("cong", "≅");
250 m.insert("propto", "∝");
251 m.insert("ll", "≪");
252 m.insert("gg", "≫");
253 m.insert("subset", "⊂");
254 m.insert("supset", "⊃");
255 m.insert("subseteq", "⊆");
256 m.insert("supseteq", "⊇");
257 m.insert("in", "∈");
258 m.insert("notin", "∉");
259 m.insert("ni", "∋");
260 m.insert("forall", "∀");
261 m.insert("exists", "∃");
262 m.insert("nexists", "∄");
263 m
264});
265
266static SYMBOLS: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
268 let mut m = HashMap::new();
269 m.insert("infty", "∞");
270 m.insert("pm", "±");
271 m.insert("mp", "∓");
272 m.insert("to", "→");
273 m.insert("rightarrow", "→");
274 m.insert("leftarrow", "←");
275 m.insert("leftrightarrow", "↔");
276 m.insert("Rightarrow", "⇒");
277 m.insert("Leftarrow", "⇐");
278 m.insert("Leftrightarrow", "⇔");
279 m.insert("uparrow", "↑");
280 m.insert("downarrow", "↓");
281 m.insert("mapsto", "↦");
282 m.insert("ldots", "…");
283 m.insert("cdots", "⋯");
284 m.insert("vdots", "⋮");
285 m.insert("ddots", "⋱");
286 m.insert("therefore", "∴");
287 m.insert("because", "∵");
288 m.insert("angle", "∠");
289 m.insert("perp", "⊥");
290 m.insert("parallel", "∥");
291 m.insert("triangle", "△");
292 m.insert("square", "□");
293 m.insert("diamond", "◇");
294 m.insert("emptyset", "∅");
295 m.insert("varnothing", "∅");
296 m.insert("neg", "¬");
297 m.insert("lnot", "¬");
298 m.insert("land", "∧");
299 m.insert("wedge", "∧");
300 m.insert("lor", "∨");
301 m.insert("vee", "∨");
302 m.insert("cap", "∩");
303 m.insert("cup", "∪");
304 m.insert("setminus", "∖");
305 m.insert("aleph", "ℵ");
306 m.insert("hbar", "ℏ");
307 m.insert("ell", "ℓ");
308 m.insert("Re", "ℜ");
309 m.insert("Im", "ℑ");
310 m.insert("wp", "℘");
311 m.insert("prime", "′");
312 m.insert("degree", "°");
313 m
314});
315
316static SUBSCRIPT_DIGITS: LazyLock<HashMap<char, char>> = LazyLock::new(|| {
318 let mut m = HashMap::new();
319 m.insert('0', '₀');
320 m.insert('1', '₁');
321 m.insert('2', '₂');
322 m.insert('3', '₃');
323 m.insert('4', '₄');
324 m.insert('5', '₅');
325 m.insert('6', '₆');
326 m.insert('7', '₇');
327 m.insert('8', '₈');
328 m.insert('9', '₉');
329 m.insert('+', '₊');
330 m.insert('-', '₋');
331 m.insert('=', '₌');
332 m.insert('(', '₍');
333 m.insert(')', '₎');
334 m.insert('a', 'ₐ');
335 m.insert('e', 'ₑ');
336 m.insert('h', 'ₕ');
337 m.insert('i', 'ᵢ');
338 m.insert('j', 'ⱼ');
339 m.insert('k', 'ₖ');
340 m.insert('l', 'ₗ');
341 m.insert('m', 'ₘ');
342 m.insert('n', 'ₙ');
343 m.insert('o', 'ₒ');
344 m.insert('p', 'ₚ');
345 m.insert('r', 'ᵣ');
346 m.insert('s', 'ₛ');
347 m.insert('t', 'ₜ');
348 m.insert('u', 'ᵤ');
349 m.insert('v', 'ᵥ');
350 m.insert('x', 'ₓ');
351 m
352});
353
354static SUPERSCRIPT_CHARS: LazyLock<HashMap<char, char>> = LazyLock::new(|| {
356 let mut m = HashMap::new();
357 m.insert('0', '⁰');
358 m.insert('1', '¹');
359 m.insert('2', '²');
360 m.insert('3', '³');
361 m.insert('4', '⁴');
362 m.insert('5', '⁵');
363 m.insert('6', '⁶');
364 m.insert('7', '⁷');
365 m.insert('8', '⁸');
366 m.insert('9', '⁹');
367 m.insert('+', '⁺');
368 m.insert('-', '⁻');
369 m.insert('=', '⁼');
370 m.insert('(', '⁽');
371 m.insert(')', '⁾');
372 m.insert('a', 'ᵃ');
373 m.insert('b', 'ᵇ');
374 m.insert('c', 'ᶜ');
375 m.insert('d', 'ᵈ');
376 m.insert('e', 'ᵉ');
377 m.insert('f', 'ᶠ');
378 m.insert('g', 'ᵍ');
379 m.insert('h', 'ʰ');
380 m.insert('i', 'ⁱ');
381 m.insert('j', 'ʲ');
382 m.insert('k', 'ᵏ');
383 m.insert('l', 'ˡ');
384 m.insert('m', 'ᵐ');
385 m.insert('n', 'ⁿ');
386 m.insert('o', 'ᵒ');
387 m.insert('p', 'ᵖ');
388 m.insert('r', 'ʳ');
389 m.insert('s', 'ˢ');
390 m.insert('t', 'ᵗ');
391 m.insert('u', 'ᵘ');
392 m.insert('v', 'ᵛ');
393 m.insert('w', 'ʷ');
394 m.insert('x', 'ˣ');
395 m.insert('y', 'ʸ');
396 m.insert('z', 'ᶻ');
397 m
398});
399
400fn convert_commands(input: &str) -> String {
402 static CMD_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\\([a-zA-Z]+)").unwrap());
403
404 CMD_RE
405 .replace_all(input, |caps: ®ex::Captures| {
406 let cmd = &caps[1];
407
408 if let Some(s) = GREEK_LETTERS.get(cmd) {
410 return (*s).to_string();
411 }
412 if let Some(s) = OPERATORS.get(cmd) {
413 return (*s).to_string();
414 }
415 if let Some(s) = RELATIONS.get(cmd) {
416 return (*s).to_string();
417 }
418 if let Some(s) = SYMBOLS.get(cmd) {
419 return (*s).to_string();
420 }
421
422 format!("\\{}", cmd)
424 })
425 .to_string()
426}
427
428fn convert_fractions(input: &str) -> String {
430 static FRAC_RE: LazyLock<Regex> =
431 LazyLock::new(|| Regex::new(r"\\frac\{([^}]*)\}\{([^}]*)\}").unwrap());
432
433 FRAC_RE
434 .replace_all(input, |caps: ®ex::Captures| {
435 let num = &caps[1];
436 let den = &caps[2];
437 format!("({}/{})", num, den)
438 })
439 .to_string()
440}
441
442fn convert_subscripts(input: &str) -> String {
444 static BRACED_SUB_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"_\{([^}]+)\}").unwrap());
446
447 let result = BRACED_SUB_RE
448 .replace_all(input, |caps: ®ex::Captures| {
449 let content = &caps[1];
450 to_subscript(content)
451 })
452 .to_string();
453
454 static SINGLE_SUB_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"_([0-9a-z])").unwrap());
456
457 SINGLE_SUB_RE
458 .replace_all(&result, |caps: ®ex::Captures| {
459 let c = caps[1].chars().next().unwrap();
460 SUBSCRIPT_DIGITS
461 .get(&c)
462 .map(|&s| s.to_string())
463 .unwrap_or_else(|| format!("_{}", c))
464 })
465 .to_string()
466}
467
468fn convert_superscripts(input: &str) -> String {
470 static BRACED_SUP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^\{([^}]+)\}").unwrap());
472
473 let result = BRACED_SUP_RE
474 .replace_all(input, |caps: ®ex::Captures| {
475 let content = &caps[1];
476 to_superscript(content)
477 })
478 .to_string();
479
480 static SINGLE_SUP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^([0-9a-z])").unwrap());
482
483 SINGLE_SUP_RE
484 .replace_all(&result, |caps: ®ex::Captures| {
485 let c = caps[1].chars().next().unwrap();
486 SUPERSCRIPT_CHARS
487 .get(&c)
488 .map(|&s| s.to_string())
489 .unwrap_or_else(|| format!("^{}", c))
490 })
491 .to_string()
492}
493
494fn to_subscript(s: &str) -> String {
496 s.chars()
497 .map(|c| SUBSCRIPT_DIGITS.get(&c).copied().unwrap_or(c))
498 .collect()
499}
500
501fn to_superscript(s: &str) -> String {
503 s.chars()
504 .map(|c| SUPERSCRIPT_CHARS.get(&c).copied().unwrap_or(c))
505 .collect()
506}
507
508fn cleanup(input: &str) -> String {
510 input
512 .replace("{ ", "")
513 .replace(" }", "")
514 .replace("{}", "")
515 .trim()
516 .to_string()
517}
518
519#[cfg(test)]
520mod tests {
521 use super::*;
522
523 #[test]
524 fn test_greek_letters() {
525 assert_eq!(latex_to_unicode(r"\alpha + \beta"), "α + β");
526 assert_eq!(latex_to_unicode(r"\Gamma\Delta"), "ΓΔ");
527 assert_eq!(latex_to_unicode(r"\pi r^2"), "π r²");
528 }
529
530 #[test]
531 fn test_operators() {
532 assert_eq!(latex_to_unicode(r"\sum x"), "Σ x");
533 assert_eq!(latex_to_unicode(r"\int f(x) dx"), "∫ f(x) dx");
534 let result = latex_to_unicode(r"\prod_{i=1}");
536 assert!(result.contains("Π")); assert!(result.contains("₁")); }
539
540 #[test]
541 fn test_relations() {
542 assert_eq!(latex_to_unicode(r"x \leq y"), "x ≤ y");
543 assert_eq!(latex_to_unicode(r"a \neq b"), "a ≠ b");
544 assert_eq!(latex_to_unicode(r"A \subset B"), "A ⊂ B");
545 }
546
547 #[test]
548 fn test_symbols() {
549 assert_eq!(latex_to_unicode(r"\infty"), "∞");
550 assert_eq!(latex_to_unicode(r"\pm 1"), "± 1");
551 assert_eq!(latex_to_unicode(r"x \to y"), "x → y");
552 }
553
554 #[test]
555 fn test_subscripts() {
556 assert_eq!(latex_to_unicode("x_1"), "x₁");
557 assert_eq!(latex_to_unicode("x_{12}"), "x₁₂");
558 assert_eq!(latex_to_unicode("a_n"), "aₙ");
559 }
560
561 #[test]
562 fn test_superscripts() {
563 assert_eq!(latex_to_unicode("x^2"), "x²");
564 assert_eq!(latex_to_unicode("x^{10}"), "x¹⁰");
565 assert_eq!(latex_to_unicode("e^x"), "eˣ");
566 }
567
568 #[test]
569 fn test_fractions() {
570 assert_eq!(latex_to_unicode(r"\frac{a}{b}"), "(a/b)");
571 assert_eq!(latex_to_unicode(r"\frac{1}{2}"), "(1/2)");
572 }
573
574 #[test]
575 fn test_complex_expression() {
576 let input = r"E = mc^2";
577 assert_eq!(latex_to_unicode(input), "E = mc²");
578
579 let input = r"\sum_{i=1}^n x_i";
580 let result = latex_to_unicode(input);
581 assert!(result.contains("Σ")); assert!(result.contains("ᵢ") || result.contains("i")); }
585
586 #[test]
587 fn test_inline_math() {
588 assert_eq!(convert_inline_math("The value $x^2$ is"), "The value x² is");
589 assert_eq!(
590 convert_inline_math("We have $\\alpha$ and $\\beta$"),
591 "We have α and β"
592 );
593 }
594
595 #[test]
596 fn test_latex_plugin_single_line() {
597 let mut plugin = LatexPlugin::new();
598 let state = ParseState::new();
599 let style = ComputedStyle::default();
600
601 let result = plugin.process_line("$$E = mc^2$$", &state, &style);
602 assert!(matches!(result, Some(ProcessResult::Lines(_))));
603 if let Some(ProcessResult::Lines(lines)) = result {
604 assert_eq!(lines.len(), 1);
605 assert!(lines[0].contains("E = mc²"));
606 }
607 }
608
609 #[test]
610 fn test_latex_plugin_multiline() {
611 let mut plugin = LatexPlugin::new();
612 let state = ParseState::new();
613 let style = ComputedStyle::default();
614
615 let result = plugin.process_line("$$\\sum_{i=1}^n", &state, &style);
617 assert!(matches!(result, Some(ProcessResult::Continue)));
618
619 let result = plugin.process_line("x_i$$", &state, &style);
621 assert!(matches!(result, Some(ProcessResult::Lines(_))));
622 if let Some(ProcessResult::Lines(lines)) = result {
623 assert!(lines[0].contains("Σ"));
624 }
625 }
626
627 #[test]
628 fn test_latex_plugin_inline() {
629 let mut plugin = LatexPlugin::new();
630 let state = ParseState::new();
631 let style = ComputedStyle::default();
632
633 let result = plugin.process_line("The value $x^2$ is important", &state, &style);
634 assert!(matches!(result, Some(ProcessResult::Lines(_))));
635 if let Some(ProcessResult::Lines(lines)) = result {
636 assert!(lines[0].contains("x²"));
637 }
638 }
639
640 #[test]
641 fn test_latex_plugin_no_match() {
642 let mut plugin = LatexPlugin::new();
643 let state = ParseState::new();
644 let style = ComputedStyle::default();
645
646 let result = plugin.process_line("Normal text without math", &state, &style);
647 assert!(result.is_none());
648 }
649
650 #[test]
651 fn test_latex_plugin_flush() {
652 let mut plugin = LatexPlugin::new();
653 let state = ParseState::new();
654 let style = ComputedStyle::default();
655
656 plugin.process_line("$$x^2 + y^2", &state, &style);
658
659 let result = plugin.flush();
661 assert!(result.is_some());
662 }
663
664 #[test]
665 fn test_latex_plugin_reset() {
666 let mut plugin = LatexPlugin::new();
667 let state = ParseState::new();
668 let style = ComputedStyle::default();
669
670 plugin.process_line("$$x^2", &state, &style);
671 assert!(plugin.is_active());
672
673 plugin.reset();
674 assert!(!plugin.is_active());
675 }
676}