1use scraper::{ElementRef, Selector};
12
13#[must_use]
17pub fn is_formula_image(element: &ElementRef) -> bool {
18 let value = element.value();
19 if value.name() != "img" {
20 return false;
21 }
22 let classes = value.attr("class").unwrap_or("");
23 classes.contains("formula") || value.attr("source").is_some()
24}
25
26#[must_use]
28pub fn is_math_element(element: &ElementRef) -> bool {
29 let value = element.value();
30 let tag = value.name();
31 let classes = value.attr("class").unwrap_or("");
32 classes.contains("katex")
33 || classes.contains("math")
34 || classes.contains("MathJax")
35 || tag == "mjx-container"
36}
37
38#[must_use]
43pub fn extract_habr_formula(element: &ElementRef) -> Option<String> {
44 let value = element.value();
45 if let Some(source) = value.attr("source") {
46 let trimmed = source.trim();
47 if !trimmed.is_empty() {
48 return Some(trimmed.to_string());
49 }
50 }
51 if let Some(alt) = value.attr("alt") {
52 let trimmed = alt.trim();
53 if !trimmed.is_empty() {
54 return Some(trimmed.to_string());
55 }
56 }
57 None
58}
59
60#[must_use]
64pub fn extract_katex_formula(element: &ElementRef) -> Option<String> {
65 if let Ok(sel) = Selector::parse(r#"annotation[encoding="application/x-tex"]"#) {
67 if let Some(annotation) = element.select(&sel).next() {
68 let text: String = annotation.text().collect();
69 let trimmed = text.trim();
70 if !trimmed.is_empty() {
71 return Some(trimmed.to_string());
72 }
73 }
74 }
75 let value = element.value();
77 if let Some(tex) = value.attr("data-tex").or_else(|| value.attr("data-latex")) {
78 let trimmed = tex.trim();
79 if !trimmed.is_empty() {
80 return Some(trimmed.to_string());
81 }
82 }
83 None
84}
85
86#[must_use]
90pub fn extract_mathjax_formula(element: &ElementRef) -> Option<String> {
91 let value = element.value();
92 if let Some(tex) = value.attr("data-tex").or_else(|| value.attr("data-latex")) {
94 let trimmed = tex.trim();
95 if !trimmed.is_empty() {
96 return Some(trimmed.to_string());
97 }
98 }
99 if let Ok(sel) = Selector::parse(r#"annotation[encoding="application/x-tex"]"#) {
101 if let Some(annotation) = element.select(&sel).next() {
102 let text: String = annotation.text().collect();
103 let trimmed = text.trim();
104 if !trimmed.is_empty() {
105 return Some(trimmed.to_string());
106 }
107 }
108 }
109 None
110}
111
112#[must_use]
114pub fn extract_formula(element: &ElementRef) -> Option<String> {
115 if is_formula_image(element) {
116 return extract_habr_formula(element);
117 }
118 let tag = element.value().name();
119 if tag == "mjx-container" {
120 return extract_mathjax_formula(element);
121 }
122 if is_math_element(element) {
123 return extract_katex_formula(element);
124 }
125 None
126}