rumdl_lib/utils/
kramdown_utils.rs1use regex::Regex;
7use std::sync::LazyLock;
8
9use super::is_definition_list_item;
10
11static SPAN_IAL_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{[:\.#][^}]*\}$").unwrap());
13
14static EXTENSION_OPEN_PATTERN: LazyLock<Regex> =
16 LazyLock::new(|| Regex::new(r"^\s*\{::([a-z]+)(?:\s+[^}]*)?\}\s*$").unwrap());
17
18static EXTENSION_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\{:/([a-z]+)?\}\s*$").unwrap());
20
21static OPTIONS_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\{::options\s+[^}]+/\}\s*$").unwrap());
23
24static FOOTNOTE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[\^[a-zA-Z0-9_\-]+\]").unwrap());
26
27static FOOTNOTE_DEF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\[\^[a-zA-Z0-9_\-]+\]:").unwrap());
29
30static ABBREVIATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\*\[[^\]]+\]:").unwrap());
32
33static MATH_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\$\$").unwrap());
35static MATH_INLINE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$[^$]+\$").unwrap());
36
37pub fn is_kramdown_block_attribute(line: &str) -> bool {
60 let trimmed = line.trim();
61
62 if !trimmed.starts_with('{') || !trimmed.ends_with('}') || trimmed.len() < 3 {
64 return false;
65 }
66
67 let second_char = trimmed.chars().nth(1);
70 matches!(second_char, Some(':') | Some('#') | Some('.'))
71}
72
73pub fn has_span_ial(text: &str) -> bool {
84 SPAN_IAL_PATTERN.is_match(text.trim())
85}
86
87pub fn remove_span_ial(text: &str) -> &str {
89 if let Some(captures) = SPAN_IAL_PATTERN.find(text) {
90 &text[..captures.start()]
91 } else {
92 text
93 }
94}
95
96pub fn is_kramdown_extension_open(line: &str) -> bool {
100 EXTENSION_OPEN_PATTERN.is_match(line)
101}
102
103pub fn is_kramdown_extension_close(line: &str) -> bool {
105 EXTENSION_CLOSE_PATTERN.is_match(line)
106}
107
108pub fn is_kramdown_options(line: &str) -> bool {
110 OPTIONS_PATTERN.is_match(line)
111}
112
113pub fn is_kramdown_extension(line: &str) -> bool {
115 is_kramdown_extension_open(line) || is_kramdown_extension_close(line) || is_kramdown_options(line)
116}
117
118pub fn is_eob_marker(line: &str) -> bool {
122 line.trim() == "^"
123}
124
125pub fn has_footnote_reference(text: &str) -> bool {
127 FOOTNOTE_REF_PATTERN.is_match(text)
128}
129
130pub fn is_footnote_definition(line: &str) -> bool {
132 FOOTNOTE_DEF_PATTERN.is_match(line.trim_start())
133}
134
135pub fn is_abbreviation_definition(line: &str) -> bool {
137 ABBREVIATION_PATTERN.is_match(line.trim_start())
138}
139
140pub fn is_math_block_delimiter(line: &str) -> bool {
142 let trimmed = line.trim();
143 trimmed == "$$" || MATH_BLOCK_PATTERN.is_match(trimmed)
144}
145
146pub fn has_inline_math(text: &str) -> bool {
148 MATH_INLINE_PATTERN.is_match(text)
149}
150
151pub fn has_kramdown_syntax(line: &str) -> bool {
153 is_kramdown_block_attribute(line)
154 || has_span_ial(line)
155 || is_kramdown_extension(line)
156 || is_eob_marker(line)
157 || is_footnote_definition(line)
158 || is_abbreviation_definition(line)
159 || is_math_block_delimiter(line)
160 || is_definition_list_item(line)
161 || has_footnote_reference(line)
162 || has_inline_math(line)
163}
164
165pub fn heading_to_fragment(heading: &str) -> String {
178 if heading.is_empty() {
179 return "section".to_string();
180 }
181
182 let text = heading.trim();
183 if text.is_empty() {
184 return "section".to_string();
185 }
186
187 let mut step1 = String::new();
190 for c in text.chars() {
191 if c.is_ascii_alphabetic() || c.is_ascii_digit() || c == ' ' || c == '-' {
192 step1.push(c);
193 }
194 }
196
197 let mut processed = step1;
199
200 processed = processed.replace("-->", "--");
202
203 processed = processed.replace(" & ", "--");
205 processed = processed.replace(" > ", "--");
206
207 let chars: Vec<char> = processed.chars().collect();
209 let mut hyphen_consolidated = String::new();
210 let mut i = 0;
211
212 while i < chars.len() {
213 let c = chars[i];
214
215 if c == '-' {
216 let mut hyphen_count = 0;
218 let mut j = i;
219 while j < chars.len() && chars[j] == '-' {
220 hyphen_count += 1;
221 j += 1;
222 }
223
224 match hyphen_count {
226 1 => hyphen_consolidated.push('-'),
227 2 => {} 3 => {} 4 => hyphen_consolidated.push('-'), 5 => {} 6 => hyphen_consolidated.push_str("--"), _ => {
233 if hyphen_count % 2 == 0 && hyphen_count >= 6 {
234 hyphen_consolidated.push_str("--");
235 } else if hyphen_count % 4 == 0 {
236 hyphen_consolidated.push('-');
237 }
238 }
239 }
240
241 i = j;
242 } else {
243 hyphen_consolidated.push(c);
244 i += 1;
245 }
246 }
247
248 processed = hyphen_consolidated;
249
250 let mut start_pos = 0;
252 let mut found_letter = false;
253 for (i, c) in processed.char_indices() {
254 if c.is_ascii_alphabetic() {
255 start_pos = i;
256 found_letter = true;
257 break;
258 }
259 }
260
261 if !found_letter {
262 return "section".to_string();
263 }
264
265 let step2 = &processed[start_pos..];
266
267 let mut result = String::new();
269 for c in step2.chars() {
270 if c.is_ascii_alphabetic() {
271 result.push(c.to_ascii_lowercase());
272 } else if c.is_ascii_digit() {
273 result.push(c);
274 } else {
275 result.push('-');
277 }
278 }
279
280 let trimmed = result.trim_start_matches('-').to_string();
282
283 if trimmed.is_empty() {
284 "section".to_string()
285 } else {
286 trimmed
287 }
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293
294 #[test]
295 fn test_kramdown_class_attributes() {
296 assert!(is_kramdown_block_attribute("{:.wrap}"));
297 assert!(is_kramdown_block_attribute("{:.class-name}"));
298 assert!(is_kramdown_block_attribute("{:.multiple .classes}"));
299 }
300
301 #[test]
302 fn test_kramdown_id_attributes() {
303 assert!(is_kramdown_block_attribute("{:#my-id}"));
304 assert!(is_kramdown_block_attribute("{:#section-1}"));
305 }
306
307 #[test]
308 fn test_kramdown_generic_attributes() {
309 assert!(is_kramdown_block_attribute("{:style=\"color: red\"}"));
310 assert!(is_kramdown_block_attribute("{:data-value=\"123\"}"));
311 }
312
313 #[test]
314 fn test_kramdown_combined_attributes() {
315 assert!(is_kramdown_block_attribute("{:.class #id}"));
316 assert!(is_kramdown_block_attribute("{:#id .class style=\"color: blue\"}"));
317 assert!(is_kramdown_block_attribute("{:.wrap #my-code .highlight}"));
318 }
319
320 #[test]
321 fn test_non_kramdown_braces() {
322 assert!(!is_kramdown_block_attribute("{just some text}"));
323 assert!(!is_kramdown_block_attribute("{not kramdown}"));
324 assert!(!is_kramdown_block_attribute("{ spaces }"));
325 }
326
327 #[test]
328 fn test_edge_cases() {
329 assert!(!is_kramdown_block_attribute("{}"));
330 assert!(!is_kramdown_block_attribute("{"));
331 assert!(!is_kramdown_block_attribute("}"));
332 assert!(!is_kramdown_block_attribute(""));
333 assert!(!is_kramdown_block_attribute("not braces"));
334 }
335
336 #[test]
337 fn test_whitespace_handling() {
338 assert!(is_kramdown_block_attribute(" {:.wrap} "));
339 assert!(is_kramdown_block_attribute("\t{:#id}\t"));
340 assert!(is_kramdown_block_attribute(" {:.class #id} "));
341 }
342}