rumdl_lib/utils/
header_id_utils.rs1use lazy_static::lazy_static;
28use regex::Regex;
29
30lazy_static! {
31 static ref HEADER_ID_PATTERN: Regex = Regex::new(r"\s*\{\s*:?\s*([^}]*?#[^}]*?)\s*\}\s*$").unwrap();
36
37 static ref ID_EXTRACT_PATTERN: Regex = Regex::new(r"#([a-zA-Z0-9_\-:]+)(?:\s|$|[^a-zA-Z0-9_\-:])").unwrap();
40
41 static ref ID_VALIDATE_PATTERN: Regex = Regex::new(r"^[a-zA-Z0-9_\-:]+$").unwrap();
43
44 static ref STANDALONE_ATTR_LIST_PATTERN: Regex = Regex::new(r"^\s*\{\s*:?\s*([^}]*#[a-zA-Z0-9_\-:]+[^}]*)\s*\}\s*$").unwrap();
47}
48
49pub fn extract_header_id(line: &str) -> (String, Option<String>) {
70 if let Some(captures) = HEADER_ID_PATTERN.captures(line)
71 && let Some(full_match) = captures.get(0)
72 && let Some(attr_content) = captures.get(1)
73 {
74 let attr_str = attr_content.as_str().trim();
75
76 if let Some(hash_pos) = attr_str.find('#') {
78 let after_hash = &attr_str[hash_pos + 1..];
80
81 let is_simple_format = !attr_str.contains(' ') && !attr_str.contains('=') && attr_str.starts_with('#');
86
87 if is_simple_format {
88 let potential_id = after_hash;
90 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
91 let clean_text = line[..full_match.start()].trim_end().to_string();
92 return (clean_text, Some(potential_id.to_string()));
93 }
94 } else {
96 if let Some(delimiter_pos) = after_hash.find(|c: char| c.is_whitespace() || c == '.' || c == '=') {
98 let potential_id = &after_hash[..delimiter_pos];
99 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
100 let clean_text = line[..full_match.start()].trim_end().to_string();
101 return (clean_text, Some(potential_id.to_string()));
102 }
103 } else {
104 let potential_id = after_hash;
106 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
107 let clean_text = line[..full_match.start()].trim_end().to_string();
108 return (clean_text, Some(potential_id.to_string()));
109 }
110 }
111 }
112 }
113 }
114 (line.to_string(), None)
115}
116
117pub fn is_standalone_attr_list(line: &str) -> bool {
132 STANDALONE_ATTR_LIST_PATTERN.is_match(line)
133}
134
135pub fn extract_standalone_attr_list_id(line: &str) -> Option<String> {
148 if let Some(captures) = STANDALONE_ATTR_LIST_PATTERN.captures(line)
149 && let Some(attr_content) = captures.get(1)
150 {
151 let attr_str = attr_content.as_str().trim();
152
153 if let Some(hash_pos) = attr_str.find('#') {
155 let after_hash = &attr_str[hash_pos + 1..];
156
157 let is_simple_format = !attr_str.contains(' ') && !attr_str.contains('=') && attr_str.starts_with('#');
159
160 if is_simple_format {
161 let potential_id = after_hash;
163 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
164 return Some(potential_id.to_string());
165 }
166 } else {
167 if let Some(delimiter_pos) = after_hash.find(|c: char| c.is_whitespace() || c == '.' || c == '=') {
169 let potential_id = &after_hash[..delimiter_pos];
170 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
171 return Some(potential_id.to_string());
172 }
173 } else {
174 let potential_id = after_hash;
176 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
177 return Some(potential_id.to_string());
178 }
179 }
180 }
181 }
182 }
183 None
184}
185
186#[cfg(test)]
187mod tests {
188 use super::*;
189
190 #[test]
191 fn test_kramdown_format_extraction() {
192 let (text, id) = extract_header_id("# Header {#simple}");
194 assert_eq!(text, "# Header");
195 assert_eq!(id, Some("simple".to_string()));
196
197 let (text, id) = extract_header_id("## Section {#section-id}");
198 assert_eq!(text, "## Section");
199 assert_eq!(id, Some("section-id".to_string()));
200 }
201
202 #[test]
203 fn test_python_markdown_attr_list_extraction() {
204 let (text, id) = extract_header_id("# Header {:#colon-id}");
206 assert_eq!(text, "# Header");
207 assert_eq!(id, Some("colon-id".to_string()));
208
209 let (text, id) = extract_header_id("# Header {: #spaced-id }");
210 assert_eq!(text, "# Header");
211 assert_eq!(id, Some("spaced-id".to_string()));
212 }
213
214 #[test]
215 fn test_extended_attr_list_extraction() {
216 let (text, id) = extract_header_id("# Header {: #with-class .highlight }");
218 assert_eq!(text, "# Header");
219 assert_eq!(id, Some("with-class".to_string()));
220
221 let (text, id) = extract_header_id("## Section {: #multi .class1 .class2 }");
223 assert_eq!(text, "## Section");
224 assert_eq!(id, Some("multi".to_string()));
225
226 let (text, id) = extract_header_id("### Subsection {: #with-attrs data-test=\"value\" style=\"color: red\" }");
228 assert_eq!(text, "### Subsection");
229 assert_eq!(id, Some("with-attrs".to_string()));
230
231 let (text, id) = extract_header_id("#### Complex {: #complex .highlight data-role=\"button\" title=\"Test\" }");
233 assert_eq!(text, "#### Complex");
234 assert_eq!(id, Some("complex".to_string()));
235
236 let (text, id) = extract_header_id("##### Quotes {: #quotes title=\"Has \\\"nested\\\" quotes\" }");
238 assert_eq!(text, "##### Quotes");
239 assert_eq!(id, Some("quotes".to_string()));
240 }
241
242 #[test]
243 fn test_attr_list_detection_edge_cases() {
244 let (text, id) = extract_header_id("# Header {: .class-only }");
246 assert_eq!(text, "# Header {: .class-only }");
247 assert_eq!(id, None);
248
249 let (text, id) = extract_header_id("# Header { no-hash }");
251 assert_eq!(text, "# Header { no-hash }");
252 assert_eq!(id, None);
253
254 let (text, id) = extract_header_id("# Header {: # }");
256 assert_eq!(text, "# Header {: # }");
257 assert_eq!(id, None);
258
259 let (text, id) = extract_header_id("# Header {: #middle } with more text");
261 assert_eq!(text, "# Header {: #middle } with more text");
262 assert_eq!(id, None);
263 }
264
265 #[test]
266 fn test_standalone_attr_list_detection() {
267 assert!(is_standalone_attr_list("{#custom-id}"));
269 assert!(is_standalone_attr_list("{ #spaced-id }"));
270 assert!(is_standalone_attr_list("{:#colon-id}"));
271 assert!(is_standalone_attr_list("{: #full-format }"));
272
273 assert!(is_standalone_attr_list("{: #with-class .highlight }"));
275 assert!(is_standalone_attr_list("{: #multi .class1 .class2 }"));
276 assert!(is_standalone_attr_list("{: #complex .highlight data-test=\"value\" }"));
277
278 assert!(!is_standalone_attr_list("Some text {#not-standalone}"));
280 assert!(!is_standalone_attr_list("Text before {#id}"));
281 assert!(!is_standalone_attr_list("{#id} text after"));
282 assert!(!is_standalone_attr_list(""));
283 assert!(!is_standalone_attr_list(" ")); assert!(!is_standalone_attr_list("{: .class-only }")); }
286
287 #[test]
288 fn test_standalone_attr_list_id_extraction() {
289 assert_eq!(extract_standalone_attr_list_id("{#simple}"), Some("simple".to_string()));
291 assert_eq!(
292 extract_standalone_attr_list_id("{ #spaced }"),
293 Some("spaced".to_string())
294 );
295 assert_eq!(extract_standalone_attr_list_id("{:#colon}"), Some("colon".to_string()));
296 assert_eq!(extract_standalone_attr_list_id("{: #full }"), Some("full".to_string()));
297
298 assert_eq!(
300 extract_standalone_attr_list_id("{: #with-class .highlight }"),
301 Some("with-class".to_string())
302 );
303 assert_eq!(
304 extract_standalone_attr_list_id("{: #complex .class1 .class2 data=\"value\" }"),
305 Some("complex".to_string())
306 );
307
308 assert_eq!(extract_standalone_attr_list_id("Not an attr-list"), None);
310 assert_eq!(extract_standalone_attr_list_id("Text {#not-standalone}"), None);
311 assert_eq!(extract_standalone_attr_list_id("{: .class-only }"), None);
312 assert_eq!(extract_standalone_attr_list_id(""), None);
313 }
314
315 #[test]
316 fn test_backward_compatibility() {
317 let test_cases = vec![
319 ("# Header {#a}", "# Header", Some("a".to_string())),
320 ("# Header {#simple-id}", "# Header", Some("simple-id".to_string())),
321 ("## Heading {#heading-2}", "## Heading", Some("heading-2".to_string())),
322 (
323 "### With-Hyphens {#with-hyphens}",
324 "### With-Hyphens",
325 Some("with-hyphens".to_string()),
326 ),
327 ];
328
329 for (input, expected_text, expected_id) in test_cases {
330 let (text, id) = extract_header_id(input);
331 assert_eq!(text, expected_text, "Text mismatch for input: {input}");
332 assert_eq!(id, expected_id, "ID mismatch for input: {input}");
333 }
334 }
335
336 #[test]
337 fn test_invalid_id_with_dots() {
338 let (text, id) = extract_header_id("## Another. {#id.with.dots}");
340 assert_eq!(text, "## Another. {#id.with.dots}"); assert_eq!(id, None); let (text, id) = extract_header_id("## Another. {#id.more.dots}");
346 assert_eq!(text, "## Another. {#id.more.dots}");
347 assert_eq!(id, None);
348 }
349}