1use regex::Regex;
28use std::sync::LazyLock;
29
30static HTML_ANCHOR_ELEMENT: LazyLock<Regex> =
34 LazyLock::new(|| Regex::new(r#"<a\s+(?:name|id)="[^"]*"(?:\s+(?:name|id)="[^"]*")?>\s*</a>\s*"#).unwrap());
35
36static HEADER_ID_PATTERN: LazyLock<Regex> =
41 LazyLock::new(|| Regex::new(r"\s*\{\s*:?\s*([^}]*?#[^}]*?)\s*\}\s*$").unwrap());
42
43static ID_VALIDATE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9_\-:]+$").unwrap());
45
46static STANDALONE_ATTR_LIST_PATTERN: LazyLock<Regex> =
49 LazyLock::new(|| Regex::new(r"^\s*\{\s*:?\s*([^}]*#[a-zA-Z0-9_\-:]+[^}]*)\s*\}\s*$").unwrap());
50
51pub fn extract_header_id(line: &str) -> (String, Option<String>) {
72 let line = HTML_ANCHOR_ELEMENT.replace_all(line, "");
75 let line = line.as_ref();
76
77 if let Some(captures) = HEADER_ID_PATTERN.captures(line)
78 && let Some(full_match) = captures.get(0)
79 && let Some(attr_content) = captures.get(1)
80 {
81 let attr_str = attr_content.as_str().trim();
82
83 if let Some(hash_pos) = attr_str.find('#') {
85 let after_hash = &attr_str[hash_pos + 1..];
87
88 let is_simple_format = !attr_str.contains(' ') && !attr_str.contains('=') && attr_str.starts_with('#');
93
94 if is_simple_format {
95 let potential_id = after_hash;
97 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
98 let clean_text = line[..full_match.start()].trim_end().to_string();
99 return (clean_text, Some(potential_id.to_string()));
100 }
101 } else {
103 if let Some(delimiter_pos) = after_hash.find(|c: char| c.is_whitespace() || c == '.' || c == '=') {
105 let potential_id = &after_hash[..delimiter_pos];
106 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
107 let clean_text = line[..full_match.start()].trim_end().to_string();
108 return (clean_text, Some(potential_id.to_string()));
109 }
110 } else {
111 let potential_id = after_hash;
113 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
114 let clean_text = line[..full_match.start()].trim_end().to_string();
115 return (clean_text, Some(potential_id.to_string()));
116 }
117 }
118 }
119 }
120 }
121 (line.to_string(), None)
122}
123
124pub fn is_standalone_attr_list(line: &str) -> bool {
139 STANDALONE_ATTR_LIST_PATTERN.is_match(line)
140}
141
142pub fn extract_standalone_attr_list_id(line: &str) -> Option<String> {
155 if let Some(captures) = STANDALONE_ATTR_LIST_PATTERN.captures(line)
156 && let Some(attr_content) = captures.get(1)
157 {
158 let attr_str = attr_content.as_str().trim();
159
160 if let Some(hash_pos) = attr_str.find('#') {
162 let after_hash = &attr_str[hash_pos + 1..];
163
164 let is_simple_format = !attr_str.contains(' ') && !attr_str.contains('=') && attr_str.starts_with('#');
166
167 if is_simple_format {
168 let potential_id = after_hash;
170 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
171 return Some(potential_id.to_string());
172 }
173 } else {
174 if let Some(delimiter_pos) = after_hash.find(|c: char| c.is_whitespace() || c == '.' || c == '=') {
176 let potential_id = &after_hash[..delimiter_pos];
177 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
178 return Some(potential_id.to_string());
179 }
180 } else {
181 let potential_id = after_hash;
183 if ID_VALIDATE_PATTERN.is_match(potential_id) && !potential_id.is_empty() {
184 return Some(potential_id.to_string());
185 }
186 }
187 }
188 }
189 }
190 None
191}
192
193#[cfg(test)]
194mod tests {
195 use super::*;
196
197 #[test]
198 fn test_kramdown_format_extraction() {
199 let (text, id) = extract_header_id("# Header {#simple}");
201 assert_eq!(text, "# Header");
202 assert_eq!(id, Some("simple".to_string()));
203
204 let (text, id) = extract_header_id("## Section {#section-id}");
205 assert_eq!(text, "## Section");
206 assert_eq!(id, Some("section-id".to_string()));
207 }
208
209 #[test]
210 fn test_python_markdown_attr_list_extraction() {
211 let (text, id) = extract_header_id("# Header {:#colon-id}");
213 assert_eq!(text, "# Header");
214 assert_eq!(id, Some("colon-id".to_string()));
215
216 let (text, id) = extract_header_id("# Header {: #spaced-id }");
217 assert_eq!(text, "# Header");
218 assert_eq!(id, Some("spaced-id".to_string()));
219 }
220
221 #[test]
222 fn test_extended_attr_list_extraction() {
223 let (text, id) = extract_header_id("# Header {: #with-class .highlight }");
225 assert_eq!(text, "# Header");
226 assert_eq!(id, Some("with-class".to_string()));
227
228 let (text, id) = extract_header_id("## Section {: #multi .class1 .class2 }");
230 assert_eq!(text, "## Section");
231 assert_eq!(id, Some("multi".to_string()));
232
233 let (text, id) = extract_header_id("### Subsection {: #with-attrs data-test=\"value\" style=\"color: red\" }");
235 assert_eq!(text, "### Subsection");
236 assert_eq!(id, Some("with-attrs".to_string()));
237
238 let (text, id) = extract_header_id("#### Complex {: #complex .highlight data-role=\"button\" title=\"Test\" }");
240 assert_eq!(text, "#### Complex");
241 assert_eq!(id, Some("complex".to_string()));
242
243 let (text, id) = extract_header_id("##### Quotes {: #quotes title=\"Has \\\"nested\\\" quotes\" }");
245 assert_eq!(text, "##### Quotes");
246 assert_eq!(id, Some("quotes".to_string()));
247 }
248
249 #[test]
250 fn test_attr_list_detection_edge_cases() {
251 let (text, id) = extract_header_id("# Header {: .class-only }");
253 assert_eq!(text, "# Header {: .class-only }");
254 assert_eq!(id, None);
255
256 let (text, id) = extract_header_id("# Header { no-hash }");
258 assert_eq!(text, "# Header { no-hash }");
259 assert_eq!(id, None);
260
261 let (text, id) = extract_header_id("# Header {: # }");
263 assert_eq!(text, "# Header {: # }");
264 assert_eq!(id, None);
265
266 let (text, id) = extract_header_id("# Header {: #middle } with more text");
268 assert_eq!(text, "# Header {: #middle } with more text");
269 assert_eq!(id, None);
270 }
271
272 #[test]
273 fn test_standalone_attr_list_detection() {
274 assert!(is_standalone_attr_list("{#custom-id}"));
276 assert!(is_standalone_attr_list("{ #spaced-id }"));
277 assert!(is_standalone_attr_list("{:#colon-id}"));
278 assert!(is_standalone_attr_list("{: #full-format }"));
279
280 assert!(is_standalone_attr_list("{: #with-class .highlight }"));
282 assert!(is_standalone_attr_list("{: #multi .class1 .class2 }"));
283 assert!(is_standalone_attr_list("{: #complex .highlight data-test=\"value\" }"));
284
285 assert!(!is_standalone_attr_list("Some text {#not-standalone}"));
287 assert!(!is_standalone_attr_list("Text before {#id}"));
288 assert!(!is_standalone_attr_list("{#id} text after"));
289 assert!(!is_standalone_attr_list(""));
290 assert!(!is_standalone_attr_list(" ")); assert!(!is_standalone_attr_list("{: .class-only }")); }
293
294 #[test]
295 fn test_standalone_attr_list_id_extraction() {
296 assert_eq!(extract_standalone_attr_list_id("{#simple}"), Some("simple".to_string()));
298 assert_eq!(
299 extract_standalone_attr_list_id("{ #spaced }"),
300 Some("spaced".to_string())
301 );
302 assert_eq!(extract_standalone_attr_list_id("{:#colon}"), Some("colon".to_string()));
303 assert_eq!(extract_standalone_attr_list_id("{: #full }"), Some("full".to_string()));
304
305 assert_eq!(
307 extract_standalone_attr_list_id("{: #with-class .highlight }"),
308 Some("with-class".to_string())
309 );
310 assert_eq!(
311 extract_standalone_attr_list_id("{: #complex .class1 .class2 data=\"value\" }"),
312 Some("complex".to_string())
313 );
314
315 assert_eq!(extract_standalone_attr_list_id("Not an attr-list"), None);
317 assert_eq!(extract_standalone_attr_list_id("Text {#not-standalone}"), None);
318 assert_eq!(extract_standalone_attr_list_id("{: .class-only }"), None);
319 assert_eq!(extract_standalone_attr_list_id(""), None);
320 }
321
322 #[test]
323 fn test_backward_compatibility() {
324 let test_cases = vec![
326 ("# Header {#a}", "# Header", Some("a".to_string())),
327 ("# Header {#simple-id}", "# Header", Some("simple-id".to_string())),
328 ("## Heading {#heading-2}", "## Heading", Some("heading-2".to_string())),
329 (
330 "### With-Hyphens {#with-hyphens}",
331 "### With-Hyphens",
332 Some("with-hyphens".to_string()),
333 ),
334 ];
335
336 for (input, expected_text, expected_id) in test_cases {
337 let (text, id) = extract_header_id(input);
338 assert_eq!(text, expected_text, "Text mismatch for input: {input}");
339 assert_eq!(id, expected_id, "ID mismatch for input: {input}");
340 }
341 }
342
343 #[test]
344 fn test_invalid_id_with_dots() {
345 let (text, id) = extract_header_id("## Another. {#id.with.dots}");
347 assert_eq!(text, "## Another. {#id.with.dots}"); assert_eq!(id, None); let (text, id) = extract_header_id("## Another. {#id.more.dots}");
353 assert_eq!(text, "## Another. {#id.more.dots}");
354 assert_eq!(id, None);
355 }
356
357 #[test]
358 fn test_html_anchor_stripping() {
359 let (text, id) = extract_header_id("<a name=\"cheatsheets\"></a>Cheat Sheets");
364 assert_eq!(text, "Cheat Sheets");
365 assert_eq!(id, None);
366
367 let (text, id) = extract_header_id("<a id=\"tools\"></a>Tools and session management");
369 assert_eq!(text, "Tools and session management");
370 assert_eq!(id, None);
371
372 let (text, id) = extract_header_id("<a name=\"foo\"></a> Heading with space");
374 assert_eq!(text, "Heading with space");
375 assert_eq!(id, None);
376
377 let (text, id) = extract_header_id("<a name=\"old\"></a>My Section {#my-custom-id}");
379 assert_eq!(text, "My Section");
380 assert_eq!(id, Some("my-custom-id".to_string()));
381 }
382}