rumdl_lib/utils/
kramdown_utils.rs1use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10 static ref SPAN_IAL_PATTERN: Regex = Regex::new(r"\{[:\.#][^}]*\}$").unwrap();
12
13 static ref EXTENSION_OPEN_PATTERN: Regex = Regex::new(r"^\s*\{::([a-z]+)(?:\s+[^}]*)?\}\s*$").unwrap();
15
16 static ref EXTENSION_CLOSE_PATTERN: Regex = Regex::new(r"^\s*\{:/([a-z]+)?\}\s*$").unwrap();
18
19 static ref OPTIONS_PATTERN: Regex = Regex::new(r"^\s*\{::options\s+[^}]+/\}\s*$").unwrap();
21
22 static ref FOOTNOTE_REF_PATTERN: Regex = Regex::new(r"\[\^[a-zA-Z0-9_\-]+\]").unwrap();
24
25 static ref FOOTNOTE_DEF_PATTERN: Regex = Regex::new(r"^\[\^[a-zA-Z0-9_\-]+\]:").unwrap();
27
28 static ref ABBREVIATION_PATTERN: Regex = Regex::new(r"^\*\[[^\]]+\]:").unwrap();
30
31 static ref MATH_BLOCK_PATTERN: Regex = Regex::new(r"^\$\$").unwrap();
33 static ref MATH_INLINE_PATTERN: Regex = Regex::new(r"\$[^$]+\$").unwrap();
34}
35
36pub fn is_kramdown_block_attribute(line: &str) -> bool {
59 let trimmed = line.trim();
60
61 if !trimmed.starts_with('{') || !trimmed.ends_with('}') || trimmed.len() < 3 {
63 return false;
64 }
65
66 let second_char = trimmed.chars().nth(1);
69 matches!(second_char, Some(':') | Some('#') | Some('.'))
70}
71
72pub fn has_span_ial(text: &str) -> bool {
83 SPAN_IAL_PATTERN.is_match(text.trim())
84}
85
86pub fn remove_span_ial(text: &str) -> &str {
88 if let Some(captures) = SPAN_IAL_PATTERN.find(text) {
89 &text[..captures.start()]
90 } else {
91 text
92 }
93}
94
95pub fn is_kramdown_extension_open(line: &str) -> bool {
99 EXTENSION_OPEN_PATTERN.is_match(line)
100}
101
102pub fn is_kramdown_extension_close(line: &str) -> bool {
104 EXTENSION_CLOSE_PATTERN.is_match(line)
105}
106
107pub fn is_kramdown_options(line: &str) -> bool {
109 OPTIONS_PATTERN.is_match(line)
110}
111
112pub fn is_kramdown_extension(line: &str) -> bool {
114 is_kramdown_extension_open(line) || is_kramdown_extension_close(line) || is_kramdown_options(line)
115}
116
117pub fn is_eob_marker(line: &str) -> bool {
121 line.trim() == "^"
122}
123
124pub fn has_footnote_reference(text: &str) -> bool {
126 FOOTNOTE_REF_PATTERN.is_match(text)
127}
128
129pub fn is_footnote_definition(line: &str) -> bool {
131 FOOTNOTE_DEF_PATTERN.is_match(line.trim_start())
132}
133
134pub fn is_abbreviation_definition(line: &str) -> bool {
136 ABBREVIATION_PATTERN.is_match(line.trim_start())
137}
138
139pub fn is_math_block_delimiter(line: &str) -> bool {
141 let trimmed = line.trim();
142 trimmed == "$$" || MATH_BLOCK_PATTERN.is_match(trimmed)
143}
144
145pub fn has_inline_math(text: &str) -> bool {
147 MATH_INLINE_PATTERN.is_match(text)
148}
149
150pub fn is_definition_list_item(line: &str) -> bool {
158 let trimmed = line.trim_start();
159 trimmed.starts_with(": ")
160 || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
161}
162
163pub fn has_kramdown_syntax(line: &str) -> bool {
165 is_kramdown_block_attribute(line)
166 || has_span_ial(line)
167 || is_kramdown_extension(line)
168 || is_eob_marker(line)
169 || is_footnote_definition(line)
170 || is_abbreviation_definition(line)
171 || is_math_block_delimiter(line)
172 || is_definition_list_item(line)
173 || has_footnote_reference(line)
174 || has_inline_math(line)
175}
176
177pub fn heading_to_fragment(heading: &str) -> String {
190 if heading.is_empty() {
191 return "section".to_string();
192 }
193
194 let text = heading.trim();
195 if text.is_empty() {
196 return "section".to_string();
197 }
198
199 let mut step1 = String::new();
202 for c in text.chars() {
203 if c.is_ascii_alphabetic() || c.is_ascii_digit() || c == ' ' || c == '-' {
204 step1.push(c);
205 }
206 }
208
209 let mut processed = step1;
211
212 processed = processed.replace("-->", "--");
214
215 processed = processed.replace(" & ", "--");
217 processed = processed.replace(" > ", "--");
218
219 let chars: Vec<char> = processed.chars().collect();
221 let mut hyphen_consolidated = String::new();
222 let mut i = 0;
223
224 while i < chars.len() {
225 let c = chars[i];
226
227 if c == '-' {
228 let mut hyphen_count = 0;
230 let mut j = i;
231 while j < chars.len() && chars[j] == '-' {
232 hyphen_count += 1;
233 j += 1;
234 }
235
236 match hyphen_count {
238 1 => hyphen_consolidated.push('-'),
239 2 => {} 3 => {} 4 => hyphen_consolidated.push('-'), 5 => {} 6 => hyphen_consolidated.push_str("--"), _ => {
245 if hyphen_count % 2 == 0 && hyphen_count >= 6 {
246 hyphen_consolidated.push_str("--");
247 } else if hyphen_count % 4 == 0 {
248 hyphen_consolidated.push('-');
249 }
250 }
251 }
252
253 i = j;
254 } else {
255 hyphen_consolidated.push(c);
256 i += 1;
257 }
258 }
259
260 processed = hyphen_consolidated;
261
262 let mut start_pos = 0;
264 let mut found_letter = false;
265 for (i, c) in processed.char_indices() {
266 if c.is_ascii_alphabetic() {
267 start_pos = i;
268 found_letter = true;
269 break;
270 }
271 }
272
273 if !found_letter {
274 return "section".to_string();
275 }
276
277 let step2 = &processed[start_pos..];
278
279 let mut result = String::new();
281 for c in step2.chars() {
282 if c.is_ascii_alphabetic() {
283 result.push(c.to_ascii_lowercase());
284 } else if c.is_ascii_digit() {
285 result.push(c);
286 } else {
287 result.push('-');
289 }
290 }
291
292 let trimmed = result.trim_start_matches('-').to_string();
294
295 if trimmed.is_empty() {
296 "section".to_string()
297 } else {
298 trimmed
299 }
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305
306 #[test]
307 fn test_kramdown_class_attributes() {
308 assert!(is_kramdown_block_attribute("{:.wrap}"));
309 assert!(is_kramdown_block_attribute("{:.class-name}"));
310 assert!(is_kramdown_block_attribute("{:.multiple .classes}"));
311 }
312
313 #[test]
314 fn test_kramdown_id_attributes() {
315 assert!(is_kramdown_block_attribute("{:#my-id}"));
316 assert!(is_kramdown_block_attribute("{:#section-1}"));
317 }
318
319 #[test]
320 fn test_kramdown_generic_attributes() {
321 assert!(is_kramdown_block_attribute("{:style=\"color: red\"}"));
322 assert!(is_kramdown_block_attribute("{:data-value=\"123\"}"));
323 }
324
325 #[test]
326 fn test_kramdown_combined_attributes() {
327 assert!(is_kramdown_block_attribute("{:.class #id}"));
328 assert!(is_kramdown_block_attribute("{:#id .class style=\"color: blue\"}"));
329 assert!(is_kramdown_block_attribute("{:.wrap #my-code .highlight}"));
330 }
331
332 #[test]
333 fn test_non_kramdown_braces() {
334 assert!(!is_kramdown_block_attribute("{just some text}"));
335 assert!(!is_kramdown_block_attribute("{not kramdown}"));
336 assert!(!is_kramdown_block_attribute("{ spaces }"));
337 }
338
339 #[test]
340 fn test_edge_cases() {
341 assert!(!is_kramdown_block_attribute("{}"));
342 assert!(!is_kramdown_block_attribute("{"));
343 assert!(!is_kramdown_block_attribute("}"));
344 assert!(!is_kramdown_block_attribute(""));
345 assert!(!is_kramdown_block_attribute("not braces"));
346 }
347
348 #[test]
349 fn test_whitespace_handling() {
350 assert!(is_kramdown_block_attribute(" {:.wrap} "));
351 assert!(is_kramdown_block_attribute("\t{:#id}\t"));
352 assert!(is_kramdown_block_attribute(" {:.class #id} "));
353 }
354}