rumdl_lib/utils/
header_id_utils.rs1use regex::Regex;
28use std::sync::LazyLock;
29
30static HEADER_ID_PATTERN: LazyLock<Regex> =
35 LazyLock::new(|| Regex::new(r"\s*\{\s*:?\s*([^}]*?#[^}]*?)\s*\}\s*$").unwrap());
36
37static ID_VALIDATE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9_\-:]+$").unwrap());
39
40static STANDALONE_ATTR_LIST_PATTERN: LazyLock<Regex> =
43 LazyLock::new(|| Regex::new(r"^\s*\{\s*:?\s*([^}]*#[a-zA-Z0-9_\-:]+[^}]*)\s*\}\s*$").unwrap());
44
45pub fn extract_header_id(line: &str) -> (String, Option<String>) {
66 if let Some(captures) = HEADER_ID_PATTERN.captures(line)
67 && let Some(full_match) = captures.get(0)
68 && let Some(attr_content) = captures.get(1)
69 {
70 let attr_str = attr_content.as_str().trim();
71
72 if let Some(hash_pos) = attr_str.find('#') {
74 let after_hash = &attr_str[hash_pos + 1..];
76
77 let is_simple_format = !attr_str.contains(' ') && !attr_str.contains('=') && attr_str.starts_with('#');
82
83 if is_simple_format {
84 let potential_id = after_hash;
86 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
87 let clean_text = line[..full_match.start()].trim_end().to_string();
88 return (clean_text, Some(potential_id.to_string()));
89 }
90 } else {
92 if let Some(delimiter_pos) = after_hash.find(|c: char| c.is_whitespace() || c == '.' || c == '=') {
94 let potential_id = &after_hash[..delimiter_pos];
95 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
96 let clean_text = line[..full_match.start()].trim_end().to_string();
97 return (clean_text, Some(potential_id.to_string()));
98 }
99 } else {
100 let potential_id = after_hash;
102 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
103 let clean_text = line[..full_match.start()].trim_end().to_string();
104 return (clean_text, Some(potential_id.to_string()));
105 }
106 }
107 }
108 }
109 }
110 (line.to_string(), None)
111}
112
113pub fn is_standalone_attr_list(line: &str) -> bool {
128 STANDALONE_ATTR_LIST_PATTERN.is_match(line)
129}
130
131pub fn extract_standalone_attr_list_id(line: &str) -> Option<String> {
144 if let Some(captures) = STANDALONE_ATTR_LIST_PATTERN.captures(line)
145 && let Some(attr_content) = captures.get(1)
146 {
147 let attr_str = attr_content.as_str().trim();
148
149 if let Some(hash_pos) = attr_str.find('#') {
151 let after_hash = &attr_str[hash_pos + 1..];
152
153 let is_simple_format = !attr_str.contains(' ') && !attr_str.contains('=') && attr_str.starts_with('#');
155
156 if is_simple_format {
157 let potential_id = after_hash;
159 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
160 return Some(potential_id.to_string());
161 }
162 } else {
163 if let Some(delimiter_pos) = after_hash.find(|c: char| c.is_whitespace() || c == '.' || c == '=') {
165 let potential_id = &after_hash[..delimiter_pos];
166 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
167 return Some(potential_id.to_string());
168 }
169 } else {
170 let potential_id = after_hash;
172 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
173 return Some(potential_id.to_string());
174 }
175 }
176 }
177 }
178 }
179 None
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn test_kramdown_format_extraction() {
188 let (text, id) = extract_header_id("# Header {#simple}");
190 assert_eq!(text, "# Header");
191 assert_eq!(id, Some("simple".to_string()));
192
193 let (text, id) = extract_header_id("## Section {#section-id}");
194 assert_eq!(text, "## Section");
195 assert_eq!(id, Some("section-id".to_string()));
196 }
197
198 #[test]
199 fn test_python_markdown_attr_list_extraction() {
200 let (text, id) = extract_header_id("# Header {:#colon-id}");
202 assert_eq!(text, "# Header");
203 assert_eq!(id, Some("colon-id".to_string()));
204
205 let (text, id) = extract_header_id("# Header {: #spaced-id }");
206 assert_eq!(text, "# Header");
207 assert_eq!(id, Some("spaced-id".to_string()));
208 }
209
210 #[test]
211 fn test_extended_attr_list_extraction() {
212 let (text, id) = extract_header_id("# Header {: #with-class .highlight }");
214 assert_eq!(text, "# Header");
215 assert_eq!(id, Some("with-class".to_string()));
216
217 let (text, id) = extract_header_id("## Section {: #multi .class1 .class2 }");
219 assert_eq!(text, "## Section");
220 assert_eq!(id, Some("multi".to_string()));
221
222 let (text, id) = extract_header_id("### Subsection {: #with-attrs data-test=\"value\" style=\"color: red\" }");
224 assert_eq!(text, "### Subsection");
225 assert_eq!(id, Some("with-attrs".to_string()));
226
227 let (text, id) = extract_header_id("#### Complex {: #complex .highlight data-role=\"button\" title=\"Test\" }");
229 assert_eq!(text, "#### Complex");
230 assert_eq!(id, Some("complex".to_string()));
231
232 let (text, id) = extract_header_id("##### Quotes {: #quotes title=\"Has \\\"nested\\\" quotes\" }");
234 assert_eq!(text, "##### Quotes");
235 assert_eq!(id, Some("quotes".to_string()));
236 }
237
238 #[test]
239 fn test_attr_list_detection_edge_cases() {
240 let (text, id) = extract_header_id("# Header {: .class-only }");
242 assert_eq!(text, "# Header {: .class-only }");
243 assert_eq!(id, None);
244
245 let (text, id) = extract_header_id("# Header { no-hash }");
247 assert_eq!(text, "# Header { no-hash }");
248 assert_eq!(id, None);
249
250 let (text, id) = extract_header_id("# Header {: # }");
252 assert_eq!(text, "# Header {: # }");
253 assert_eq!(id, None);
254
255 let (text, id) = extract_header_id("# Header {: #middle } with more text");
257 assert_eq!(text, "# Header {: #middle } with more text");
258 assert_eq!(id, None);
259 }
260
261 #[test]
262 fn test_standalone_attr_list_detection() {
263 assert!(is_standalone_attr_list("{#custom-id}"));
265 assert!(is_standalone_attr_list("{ #spaced-id }"));
266 assert!(is_standalone_attr_list("{:#colon-id}"));
267 assert!(is_standalone_attr_list("{: #full-format }"));
268
269 assert!(is_standalone_attr_list("{: #with-class .highlight }"));
271 assert!(is_standalone_attr_list("{: #multi .class1 .class2 }"));
272 assert!(is_standalone_attr_list("{: #complex .highlight data-test=\"value\" }"));
273
274 assert!(!is_standalone_attr_list("Some text {#not-standalone}"));
276 assert!(!is_standalone_attr_list("Text before {#id}"));
277 assert!(!is_standalone_attr_list("{#id} text after"));
278 assert!(!is_standalone_attr_list(""));
279 assert!(!is_standalone_attr_list(" ")); assert!(!is_standalone_attr_list("{: .class-only }")); }
282
283 #[test]
284 fn test_standalone_attr_list_id_extraction() {
285 assert_eq!(extract_standalone_attr_list_id("{#simple}"), Some("simple".to_string()));
287 assert_eq!(
288 extract_standalone_attr_list_id("{ #spaced }"),
289 Some("spaced".to_string())
290 );
291 assert_eq!(extract_standalone_attr_list_id("{:#colon}"), Some("colon".to_string()));
292 assert_eq!(extract_standalone_attr_list_id("{: #full }"), Some("full".to_string()));
293
294 assert_eq!(
296 extract_standalone_attr_list_id("{: #with-class .highlight }"),
297 Some("with-class".to_string())
298 );
299 assert_eq!(
300 extract_standalone_attr_list_id("{: #complex .class1 .class2 data=\"value\" }"),
301 Some("complex".to_string())
302 );
303
304 assert_eq!(extract_standalone_attr_list_id("Not an attr-list"), None);
306 assert_eq!(extract_standalone_attr_list_id("Text {#not-standalone}"), None);
307 assert_eq!(extract_standalone_attr_list_id("{: .class-only }"), None);
308 assert_eq!(extract_standalone_attr_list_id(""), None);
309 }
310
311 #[test]
312 fn test_backward_compatibility() {
313 let test_cases = vec![
315 ("# Header {#a}", "# Header", Some("a".to_string())),
316 ("# Header {#simple-id}", "# Header", Some("simple-id".to_string())),
317 ("## Heading {#heading-2}", "## Heading", Some("heading-2".to_string())),
318 (
319 "### With-Hyphens {#with-hyphens}",
320 "### With-Hyphens",
321 Some("with-hyphens".to_string()),
322 ),
323 ];
324
325 for (input, expected_text, expected_id) in test_cases {
326 let (text, id) = extract_header_id(input);
327 assert_eq!(text, expected_text, "Text mismatch for input: {input}");
328 assert_eq!(id, expected_id, "ID mismatch for input: {input}");
329 }
330 }
331
332 #[test]
333 fn test_invalid_id_with_dots() {
334 let (text, id) = extract_header_id("## Another. {#id.with.dots}");
336 assert_eq!(text, "## Another. {#id.with.dots}"); assert_eq!(id, None); let (text, id) = extract_header_id("## Another. {#id.more.dots}");
342 assert_eq!(text, "## Another. {#id.more.dots}");
343 assert_eq!(id, None);
344 }
345}