1use crate::model::{ParsedSection, SectionType};
2use anyhow::Result;
3use htmd::HtmlToMarkdown;
4#[cfg(test)]
5use scraper::{Html, Selector};
6
7fn extract_heading_content(
10 heading: &scraper::ElementRef,
11 current_depth: u8,
12 converter: &HtmlToMarkdown,
13) -> Option<String> {
14 use super::markdown;
15
16 let mut content_html = String::new();
17 let mut current = heading.next_sibling();
18
19 while let Some(node) = current {
20 if let Some(sibling_elem) = scraper::ElementRef::wrap(node) {
21 let tag_name = sibling_elem.value().name();
22
23 if let Some(sibling_depth) = heading_depth(tag_name) {
25 if sibling_depth <= current_depth {
26 break;
27 }
28 }
29
30 if tag_name == "dfn" && sibling_elem.value().attr("id").is_some() {
32 break;
33 }
34
35 content_html.push_str(&sibling_elem.html());
37 }
38
39 current = node.next_sibling();
40 }
41
42 if content_html.trim().is_empty() {
43 return None;
44 }
45
46 let markdown = markdown::element_to_markdown_from_html(&content_html, converter);
47 let trimmed = markdown.trim();
48
49 if trimmed.is_empty() {
50 None
51 } else {
52 Some(trimmed.to_string())
53 }
54}
55
56fn extract_heading_title(element: &scraper::ElementRef) -> Option<String> {
58 let mut text_parts = Vec::new();
60
61 for node in element.children() {
62 if let Some(elem) = scraper::ElementRef::wrap(node) {
63 let classes = elem.value().classes().collect::<Vec<_>>();
66 if classes.contains(&"secno")
67 || classes.contains(&"secnum")
68 || classes.contains(&"self-link")
69 {
70 continue;
71 }
72 text_parts.push(elem.text().collect::<String>());
74 } else if let Some(text) = node.value().as_text() {
75 text_parts.push(text.to_string());
76 }
77 }
78
79 let result = text_parts.join("").trim().to_string();
80 if result.is_empty() {
81 None
82 } else {
83 Some(result)
84 }
85}
86
87fn heading_depth(tag: &str) -> Option<u8> {
89 match tag {
90 "h2" => Some(2),
91 "h3" => Some(3),
92 "h4" => Some(4),
93 "h5" => Some(5),
94 "h6" => Some(6),
95 _ => None,
96 }
97}
98
99pub fn parse_heading_element(
101 element: &scraper::ElementRef,
102 converter: &HtmlToMarkdown,
103) -> Result<Option<ParsedSection>> {
104 let anchor = match element.value().attr("id") {
105 Some(id) => id.to_string(),
106 None => return Ok(None), };
108
109 let title = extract_heading_title(element);
110 let depth = heading_depth(element.value().name())
111 .ok_or_else(|| anyhow::anyhow!("Invalid heading tag: {}", element.value().name()))?;
112
113 let content_text = extract_heading_content(element, depth, converter);
115
116 Ok(Some(ParsedSection {
117 anchor,
118 title,
119 content_text,
120 section_type: SectionType::Heading,
121 parent_anchor: None,
122 prev_anchor: None,
123 next_anchor: None,
124 depth: Some(depth),
125 }))
126}
127
128pub fn parse_dfn_element(
131 element: &scraper::ElementRef,
132 converter: &HtmlToMarkdown,
133) -> Result<Option<ParsedSection>> {
134 let anchor = match element.value().attr("id") {
135 Some(id) => id.to_string(),
136 None => return Ok(None), };
138
139 if is_inside_algorithm_content(element) {
142 return Ok(None);
143 }
144
145 let has_dfn_for = element.value().attr("data-dfn-for").is_some();
153 let has_dfn_type = element.value().attr("data-dfn-type").is_some();
154 let has_direct_var_child = element
155 .children()
156 .filter_map(scraper::ElementRef::wrap)
157 .any(|c| c.value().name() == "var");
158
159 if (has_dfn_for && !has_dfn_type) || has_direct_var_child {
161 return Ok(None);
162 }
163
164 if element.value().attr("data-dfn-type") == Some("argument") {
167 return Ok(None);
168 }
169
170 let title = element.text().collect::<String>().trim().to_string();
172 let title = if title.is_empty() { None } else { Some(title) };
173
174 let section_type = if is_inside_algorithm_div(element) {
177 SectionType::Algorithm
178 } else if is_idl_type(element) {
179 SectionType::Idl
180 } else {
181 SectionType::Definition
182 };
183
184 let content_text = match section_type {
186 SectionType::Definition => extract_definition_content(element, converter),
187 SectionType::Algorithm => extract_algorithm_content(element, converter),
188 SectionType::Idl => extract_idl_content(element),
189 _ => None,
190 };
191
192 Ok(Some(ParsedSection {
193 anchor,
194 title,
195 content_text,
196 section_type,
197 parent_anchor: None,
198 prev_anchor: None,
199 next_anchor: None,
200 depth: None,
201 }))
202}
203
204fn extract_definition_content(
207 element: &scraper::ElementRef,
208 converter: &HtmlToMarkdown,
209) -> Option<String> {
210 use super::markdown;
211
212 let mut current = element.parent();
214 while let Some(node) = current {
215 if let Some(parent_elem) = scraper::ElementRef::wrap(node) {
216 let tag_name = parent_elem.value().name();
217 if matches!(tag_name, "p" | "div" | "dd" | "dt" | "li" | "section") {
219 return Some(markdown::element_to_markdown(&parent_elem, converter));
220 }
221 }
222 current = node.parent();
223 }
224
225 Some(element.text().collect::<String>().trim().to_string())
227}
228
229fn extract_algorithm_content(
232 element: &scraper::ElementRef,
233 converter: &HtmlToMarkdown,
234) -> Option<String> {
235 use super::{algorithms, markdown};
236
237 let mut current = element.parent();
238 while let Some(node) = current {
239 if let Some(parent_elem) = scraper::ElementRef::wrap(node) {
240 if parent_elem.value().name() == "div" {
242 let classes: Vec<_> = parent_elem.value().classes().collect();
243 let is_algo_div = classes.contains(&"algorithm")
244 || parent_elem.value().attr("data-algorithm").is_some();
245 if is_algo_div {
246 return extract_from_algorithm_div(&parent_elem, converter);
247 }
248 }
249
250 if matches!(parent_elem.value().name(), "p" | "dd" | "li") {
252 let intro = markdown::element_to_markdown(&parent_elem, converter);
253
254 let mut sibling = node.next_sibling();
255 while let Some(sib_node) = sibling {
256 if let Some(sib_elem) = scraper::ElementRef::wrap(sib_node) {
257 if sib_elem.value().name() == "ol" {
258 let steps = algorithms::render_algorithm_ol(&sib_elem, converter);
259 return Some(format!("{}\n\n{}", intro.trim(), steps));
260 }
261 if matches!(
262 sib_elem.value().name(),
263 "p" | "div" | "h2" | "h3" | "h4" | "h5" | "h6"
264 ) {
265 break;
266 }
267 }
268 sibling = sib_node.next_sibling();
269 }
270 }
271 }
272 current = node.parent();
273 }
274
275 None
276}
277
278fn extract_from_algorithm_div(
281 div: &scraper::ElementRef,
282 converter: &HtmlToMarkdown,
283) -> Option<String> {
284 use super::algorithms;
285
286 let ol_selector = scraper::Selector::parse("ol").ok()?;
287 let ol_elem = div.select(&ol_selector).next()?;
288
289 let mut intro_html = String::new();
291 for child in div.children() {
292 if let Some(child_elem) = scraper::ElementRef::wrap(child) {
293 if child_elem.value().name() == "ol" {
294 break;
295 }
296 intro_html.push_str(&child_elem.html());
297 } else if let Some(text) = child.value().as_text() {
298 intro_html.push_str(text);
299 }
300 }
301
302 let intro = converter
303 .convert(&intro_html)
304 .unwrap_or_default()
305 .trim()
306 .to_string();
307 let steps = algorithms::render_algorithm_ol(&ol_elem, converter);
308 Some(format!("{}\n\n{}", intro, steps))
309}
310
311fn extract_idl_content(element: &scraper::ElementRef) -> Option<String> {
314 use super::idl;
315
316 let mut current = element.parent();
318 while let Some(node) = current {
319 if let Some(parent_elem) = scraper::ElementRef::wrap(node) {
320 if parent_elem.value().name() == "pre" {
321 let idl_text = idl::extract_idl_text(&parent_elem);
322 return Some(idl_text);
323 }
324 }
325 current = node.parent();
326 }
327
328 None
329}
330
331pub fn parse_anchor_element(
334 element: &scraper::ElementRef,
335 converter: &HtmlToMarkdown,
336) -> Result<Option<ParsedSection>> {
337 use super::markdown;
338
339 let anchor = match element.value().attr("id") {
340 Some(id) => id.to_string(),
341 None => return Ok(None),
342 };
343
344 let title_text = element.text().collect::<String>();
345 let title_text = title_text.trim();
346 let title = if title_text.is_empty() {
347 None
348 } else {
349 let truncated = if title_text.len() > 120 {
350 let boundary = title_text
351 .char_indices()
352 .map(|(i, _)| i)
353 .take_while(|&i| i <= 120)
354 .last()
355 .unwrap_or(0);
356 format!("{}…", &title_text[..boundary])
357 } else {
358 title_text.to_string()
359 };
360 Some(truncated)
361 };
362
363 let content_text = {
364 let html = element.html();
365 let md = markdown::element_to_markdown_from_html(&html, converter);
366 let trimmed = md.trim().to_string();
367 if trimmed.is_empty() {
368 None
369 } else {
370 Some(trimmed)
371 }
372 };
373
374 Ok(Some(ParsedSection {
375 anchor,
376 title,
377 content_text,
378 section_type: crate::model::SectionType::Definition,
379 parent_anchor: None,
380 prev_anchor: None,
381 next_anchor: None,
382 depth: None,
383 }))
384}
385
386pub fn parse_emu_clause_element(
390 element: &scraper::ElementRef,
391 converter: &HtmlToMarkdown,
392) -> Result<Option<ParsedSection>> {
393 let anchor = match element.value().attr("id") {
394 Some(id) => id.to_string(),
395 None => return Ok(None),
396 };
397
398 let h1 = element
400 .children()
401 .filter_map(scraper::ElementRef::wrap)
402 .find(|c| c.value().name() == "h1");
403
404 let (title, depth) = match h1 {
405 Some(h1_elem) => {
406 let title = extract_heading_title(&h1_elem);
407 let depth = extract_secnum_depth(&h1_elem);
408 (title, depth)
409 }
410 None => (None, None),
411 };
412
413 let section_type = if element.value().attr("type").is_some() {
415 SectionType::Algorithm
416 } else {
417 SectionType::Heading
418 };
419
420 let content_text = extract_emu_clause_content(element, converter);
421
422 Ok(Some(ParsedSection {
423 anchor,
424 title,
425 content_text,
426 section_type,
427 parent_anchor: None,
428 prev_anchor: None,
429 next_anchor: None,
430 depth,
431 }))
432}
433
434fn extract_secnum_depth(heading: &scraper::ElementRef) -> Option<u8> {
438 for child in heading.children() {
439 if let Some(elem) = scraper::ElementRef::wrap(child) {
440 let classes: Vec<_> = elem.value().classes().collect();
441 if classes.contains(&"secnum") {
442 let text = elem.text().collect::<String>();
443 let text = text.trim();
444 if text.is_empty() {
445 return None;
446 }
447 let parts = text.split('.').count();
449 return Some((parts + 1).min(255) as u8);
451 }
452 }
453 }
454 None
455}
456
457fn extract_emu_clause_content(
462 element: &scraper::ElementRef,
463 converter: &HtmlToMarkdown,
464) -> Option<String> {
465 use super::{algorithms, markdown};
466
467 let mut intro_html = String::new();
468 let mut algo_steps: Option<String> = None;
469
470 for child in element.children() {
471 if let Some(child_elem) = scraper::ElementRef::wrap(child) {
472 let tag = child_elem.value().name();
473
474 if tag == "h1" || tag == "emu-clause" || tag == "emu-annex" || tag == "emu-import" {
476 continue;
477 }
478
479 if tag == "emu-alg" {
481 if let Some(ol) = child_elem
482 .children()
483 .filter_map(scraper::ElementRef::wrap)
484 .find(|c| c.value().name() == "ol")
485 {
486 algo_steps = Some(algorithms::render_algorithm_ol(&ol, converter));
487 }
488 continue;
489 }
490
491 if tag == "span" && child_elem.value().attr("id").is_some() {
493 let text = child_elem.text().collect::<String>();
494 if text.trim().is_empty() {
495 continue;
496 }
497 }
498
499 intro_html.push_str(&child_elem.html());
500 }
501 }
502
503 let intro = markdown::element_to_markdown_from_html(&intro_html, converter);
504 let intro = intro.trim();
505
506 match (intro.is_empty(), algo_steps) {
507 (true, None) => None,
508 (true, Some(steps)) => Some(steps),
509 (false, None) => Some(intro.to_string()),
510 (false, Some(steps)) => Some(format!("{}\n\n{}", intro, steps)),
511 }
512}
513
514#[cfg(test)]
516pub fn collect_headings(html: &str) -> Result<Vec<ParsedSection>> {
517 let document = Html::parse_document(html);
518 let converter = crate::parse::markdown::build_converter("https://test.example.com");
519 let mut sections = Vec::new();
520
521 let selector = Selector::parse("h2[id], h3[id], h4[id], h5[id], h6[id]")
523 .map_err(|e| anyhow::anyhow!("Invalid selector: {:?}", e))?;
524
525 for element in document.select(&selector) {
526 if let Some(section) = parse_heading_element(&element, &converter)? {
527 sections.push(ParsedSection {
530 content_text: None,
531 ..section
532 });
533 }
534 }
535
536 Ok(sections)
537}
538
539fn is_inside_algorithm_content(element: &scraper::ElementRef) -> bool {
542 let mut current = element.parent();
544 while let Some(node) = current {
545 if let Some(parent_elem) = scraper::ElementRef::wrap(node) {
546 if parent_elem.value().name() == "ol" {
547 let mut ol_ancestor = parent_elem.parent();
554 while let Some(anc_node) = ol_ancestor {
555 if let Some(anc_elem) = scraper::ElementRef::wrap(anc_node) {
556 if anc_elem.value().name() == "div" {
557 let classes: Vec<_> = anc_elem.value().classes().collect();
558 if classes.contains(&"algorithm")
559 || anc_elem.value().attr("data-algorithm").is_some()
560 {
561 return true; }
563 }
564 }
565 ol_ancestor = anc_node.parent();
566 }
567
568 let mut prev_sibling = node.prev_sibling();
570 while let Some(prev_node) = prev_sibling {
571 if let Some(prev_elem) = scraper::ElementRef::wrap(prev_node) {
572 if matches!(prev_elem.value().name(), "p" | "dd" | "li") {
573 if let Ok(dfn_selector) = scraper::Selector::parse("dfn[id]") {
575 if prev_elem.select(&dfn_selector).next().is_some() {
576 return true; }
578 }
579 }
580 if matches!(
582 prev_elem.value().name(),
583 "p" | "div" | "h2" | "h3" | "h4" | "h5" | "h6"
584 ) {
585 break;
586 }
587 }
588 prev_sibling = prev_node.prev_sibling();
589 }
590
591 return false;
593 }
594 }
595 current = node.parent();
596 }
597 false
598}
599
600fn is_inside_algorithm_div(element: &scraper::ElementRef) -> bool {
603 let mut current = element.parent();
605 while let Some(node) = current {
606 if let Some(parent_elem) = scraper::ElementRef::wrap(node) {
607 if parent_elem.value().name() == "div" {
608 let classes: Vec<_> = parent_elem.value().classes().collect();
609 if classes.contains(&"algorithm") {
610 return true;
611 }
612 }
613
614 if matches!(parent_elem.value().name(), "p" | "div" | "dd" | "li") {
617 let mut sibling = node.next_sibling();
619 while let Some(sib_node) = sibling {
620 if let Some(sib_elem) = scraper::ElementRef::wrap(sib_node) {
621 if sib_elem.value().name() == "ol" {
622 return true;
623 }
624 if matches!(
626 sib_elem.value().name(),
627 "p" | "div" | "h2" | "h3" | "h4" | "h5" | "h6"
628 ) {
629 break;
630 }
631 }
632 sibling = sib_node.next_sibling();
633 }
634 }
635 }
636 current = node.parent();
637 }
638 false
639}
640
641fn is_idl_type(element: &scraper::ElementRef) -> bool {
643 if let Some(dfn_type) = element.value().attr("data-dfn-type") {
644 matches!(
645 dfn_type,
646 "interface" | "dictionary" | "enum" | "callback" | "callback interface" | "typedef"
647 )
648 } else {
649 false
650 }
651}
652
653#[cfg(test)]
655pub fn collect_idl(html: &str) -> Result<Vec<ParsedSection>> {
656 let document = Html::parse_document(html);
657 let mut sections = Vec::new();
658
659 let selector = Selector::parse("dfn[id][data-dfn-type]")
661 .map_err(|e| anyhow::anyhow!("Invalid selector: {:?}", e))?;
662
663 for element in document.select(&selector) {
664 if !is_idl_type(&element) {
666 continue;
667 }
668
669 let anchor = element
670 .value()
671 .attr("id")
672 .ok_or_else(|| anyhow::anyhow!("IDL type missing id"))?
673 .to_string();
674
675 let title = element.text().collect::<String>().trim().to_string();
677 let title = if title.is_empty() { None } else { Some(title) };
678
679 sections.push(ParsedSection {
680 anchor,
681 title,
682 content_text: None, section_type: SectionType::Idl,
684 parent_anchor: None, prev_anchor: None, next_anchor: None, depth: None, });
689 }
690
691 Ok(sections)
692}
693
694#[cfg(test)]
696pub fn collect_algorithms(html: &str) -> Result<Vec<ParsedSection>> {
697 let document = Html::parse_document(html);
698 let mut sections = Vec::new();
699
700 let selector = Selector::parse("div.algorithm dfn[id]")
702 .map_err(|e| anyhow::anyhow!("Invalid selector: {:?}", e))?;
703
704 for element in document.select(&selector) {
705 let anchor = element
706 .value()
707 .attr("id")
708 .ok_or_else(|| anyhow::anyhow!("Algorithm missing id"))?
709 .to_string();
710
711 let title = element.text().collect::<String>().trim().to_string();
713 let title = if title.is_empty() { None } else { Some(title) };
714
715 sections.push(ParsedSection {
716 anchor,
717 title,
718 content_text: None, section_type: SectionType::Algorithm,
720 parent_anchor: None, prev_anchor: None, next_anchor: None, depth: None, });
725 }
726
727 Ok(sections)
728}
729
730#[cfg(test)]
732pub fn collect_definitions(html: &str) -> Result<Vec<ParsedSection>> {
733 let document = Html::parse_document(html);
734 let mut sections = Vec::new();
735
736 let selector =
738 Selector::parse("dfn[id]").map_err(|e| anyhow::anyhow!("Invalid selector: {:?}", e))?;
739
740 for element in document.select(&selector) {
741 if is_inside_algorithm_div(&element) {
743 continue;
744 }
745
746 if is_idl_type(&element) {
748 continue;
749 }
750
751 let anchor = element
752 .value()
753 .attr("id")
754 .ok_or_else(|| anyhow::anyhow!("Definition missing id"))?
755 .to_string();
756
757 let title = element.text().collect::<String>().trim().to_string();
759 let title = if title.is_empty() { None } else { Some(title) };
760
761 sections.push(ParsedSection {
762 anchor,
763 title,
764 content_text: None, section_type: SectionType::Definition,
766 parent_anchor: None, prev_anchor: None, next_anchor: None, depth: None, });
771 }
772
773 Ok(sections)
774}
775
776pub fn build_section_tree(mut sections: Vec<ParsedSection>) -> Vec<ParsedSection> {
778 for i in 0..sections.len() {
780 if let Some(current_depth) = sections[i].depth {
781 for j in (0..i).rev() {
783 if let Some(parent_depth) = sections[j].depth {
784 if parent_depth < current_depth {
785 sections[i].parent_anchor = Some(sections[j].anchor.clone());
786 break;
787 }
788 }
789 }
790 } else {
791 for j in (0..i).rev() {
794 if sections[j].depth.is_some() {
795 sections[i].parent_anchor = Some(sections[j].anchor.clone());
796 break;
797 }
798 }
799 }
800 }
801
802 for i in 0..sections.len() {
804 let current_depth = sections[i].depth;
805 let current_parent = sections[i].parent_anchor.clone();
806
807 for j in (0..i).rev() {
809 if sections[j].depth == current_depth && sections[j].parent_anchor == current_parent {
810 sections[i].prev_anchor = Some(sections[j].anchor.clone());
811 break;
812 }
813 }
814
815 for j in (i + 1)..sections.len() {
817 if sections[j].depth == current_depth && sections[j].parent_anchor == current_parent {
818 sections[i].next_anchor = Some(sections[j].anchor.clone());
819 break;
820 }
821 }
822 }
823
824 sections
825}
826
827#[cfg(test)]
828mod tests {
829 use super::*;
830
831 #[test]
832 fn test_bikeshed_heading_parsing() {
833 let html = include_str!("../../tests/fixtures/headings/bikeshed_heading.html");
834 let sections = collect_headings(html).unwrap();
835
836 assert_eq!(sections.len(), 1);
837 let section = §ions[0];
838
839 assert_eq!(section.anchor, "trees");
840 assert_eq!(section.title, Some("Trees".to_string()));
841 assert_eq!(section.section_type, SectionType::Heading);
842 assert_eq!(section.depth, Some(3));
843 }
844
845 #[test]
846 fn test_wattsi_heading_parsing() {
847 let html = include_str!("../../tests/fixtures/headings/wattsi_heading.html");
848 let sections = collect_headings(html).unwrap();
849
850 assert_eq!(sections.len(), 1);
851 let section = §ions[0];
852
853 assert_eq!(section.anchor, "abstract");
854 assert_eq!(
855 section.title,
856 Some("Where does this specification fit?".to_string())
857 );
858 assert_eq!(section.section_type, SectionType::Heading);
859 assert_eq!(section.depth, Some(3));
860 }
861
862 #[test]
863 fn test_multiple_heading_levels() {
864 let html = r#"
865 <h2 id="section-1">Section 1</h2>
866 <h3 id="section-1-1">Section 1.1</h3>
867 <h4 id="section-1-1-1">Section 1.1.1</h4>
868 <h2 id="section-2">Section 2</h2>
869 "#;
870
871 let sections = collect_headings(html).unwrap();
872 assert_eq!(sections.len(), 4);
873
874 assert_eq!(sections[0].anchor, "section-1");
875 assert_eq!(sections[0].depth, Some(2));
876
877 assert_eq!(sections[1].anchor, "section-1-1");
878 assert_eq!(sections[1].depth, Some(3));
879
880 assert_eq!(sections[2].anchor, "section-1-1-1");
881 assert_eq!(sections[2].depth, Some(4));
882
883 assert_eq!(sections[3].anchor, "section-2");
884 assert_eq!(sections[3].depth, Some(2));
885 }
886
887 #[test]
888 fn test_heading_without_id_ignored() {
889 let html = r#"
890 <h2 id="has-id">With ID</h2>
891 <h2>Without ID</h2>
892 "#;
893
894 let sections = collect_headings(html).unwrap();
895 assert_eq!(sections.len(), 1);
896 assert_eq!(sections[0].anchor, "has-id");
897 }
898
899 #[test]
900 fn test_build_section_tree_simple_nesting() {
901 let html = r#"
902 <h2 id="s1">Section 1</h2>
903 <h3 id="s1-1">Section 1.1</h3>
904 <h3 id="s1-2">Section 1.2</h3>
905 <h4 id="s1-2-1">Section 1.2.1</h4>
906 <h2 id="s2">Section 2</h2>
907 "#;
908
909 let sections = collect_headings(html).unwrap();
910 let tree = build_section_tree(sections);
911
912 assert_eq!(tree[0].parent_anchor, None);
914 assert_eq!(tree[0].prev_anchor, None);
915 assert_eq!(tree[0].next_anchor, Some("s2".to_string()));
916
917 assert_eq!(tree[1].parent_anchor, Some("s1".to_string()));
919 assert_eq!(tree[1].prev_anchor, None);
920 assert_eq!(tree[1].next_anchor, Some("s1-2".to_string()));
921
922 assert_eq!(tree[2].parent_anchor, Some("s1".to_string()));
924 assert_eq!(tree[2].prev_anchor, Some("s1-1".to_string()));
925 assert_eq!(tree[2].next_anchor, None);
926
927 assert_eq!(tree[3].parent_anchor, Some("s1-2".to_string()));
929 assert_eq!(tree[3].prev_anchor, None);
930 assert_eq!(tree[3].next_anchor, None);
931
932 assert_eq!(tree[4].parent_anchor, None);
934 assert_eq!(tree[4].prev_anchor, Some("s1".to_string()));
935 assert_eq!(tree[4].next_anchor, None);
936 }
937
938 #[test]
939 fn test_build_section_tree_flat_structure() {
940 let html = r#"
941 <h2 id="a">A</h2>
942 <h2 id="b">B</h2>
943 <h2 id="c">C</h2>
944 "#;
945
946 let sections = collect_headings(html).unwrap();
947 let tree = build_section_tree(sections);
948
949 assert_eq!(tree[0].parent_anchor, None);
951 assert_eq!(tree[0].prev_anchor, None);
952 assert_eq!(tree[0].next_anchor, Some("b".to_string()));
953
954 assert_eq!(tree[1].parent_anchor, None);
956 assert_eq!(tree[1].prev_anchor, Some("a".to_string()));
957 assert_eq!(tree[1].next_anchor, Some("c".to_string()));
958
959 assert_eq!(tree[2].parent_anchor, None);
961 assert_eq!(tree[2].prev_anchor, Some("b".to_string()));
962 assert_eq!(tree[2].next_anchor, None);
963 }
964
965 #[test]
966 fn test_build_section_tree_single_heading() {
967 let html = r#"<h2 id="only">Only Section</h2>"#;
968
969 let sections = collect_headings(html).unwrap();
970 let tree = build_section_tree(sections);
971
972 assert_eq!(tree.len(), 1);
973 assert_eq!(tree[0].parent_anchor, None);
974 assert_eq!(tree[0].prev_anchor, None);
975 assert_eq!(tree[0].next_anchor, None);
976 }
977
978 #[test]
979 fn test_build_section_tree_skip_levels() {
980 let html = r#"
982 <h2 id="top">Top</h2>
983 <h4 id="nested">Nested (skipped h3)</h4>
984 <h2 id="next">Next Top</h2>
985 "#;
986
987 let sections = collect_headings(html).unwrap();
988 let tree = build_section_tree(sections);
989
990 assert_eq!(tree[1].parent_anchor, Some("top".to_string()));
992 assert_eq!(tree[1].prev_anchor, None); assert_eq!(tree[1].next_anchor, None);
994 }
995
996 #[test]
997 fn test_bikeshed_definition_parsing() {
998 let html = include_str!("../../tests/fixtures/definitions/bikeshed_definition.html");
999 let sections = collect_definitions(html).unwrap();
1000
1001 assert_eq!(sections.len(), 1);
1002 let section = §ions[0];
1003
1004 assert_eq!(section.anchor, "concept-tree");
1005 assert_eq!(section.title, Some("tree".to_string()));
1006 assert_eq!(section.section_type, SectionType::Definition);
1007 assert_eq!(section.depth, None);
1008 }
1009
1010 #[test]
1011 fn test_wattsi_definition_parsing() {
1012 let html = include_str!("../../tests/fixtures/definitions/wattsi_definition.html");
1013 let sections = collect_definitions(html).unwrap();
1014
1015 assert_eq!(sections.len(), 1);
1016 let section = §ions[0];
1017
1018 assert_eq!(section.anchor, "in-parallel");
1019 assert_eq!(section.title, Some("in parallel".to_string()));
1020 assert_eq!(section.section_type, SectionType::Definition);
1021 assert_eq!(section.depth, None);
1022 }
1023
1024 #[test]
1025 fn test_definition_with_code() {
1026 let html = include_str!("../../tests/fixtures/definitions/definition_with_code.html");
1027 let sections = collect_definitions(html).unwrap();
1028
1029 assert_eq!(sections.len(), 1);
1030 let section = §ions[0];
1031
1032 assert_eq!(section.anchor, "x-that");
1033 assert_eq!(section.title, Some("createElement".to_string()));
1034 assert_eq!(section.section_type, SectionType::Definition);
1035 }
1036
1037 #[test]
1038 fn test_definition_without_id_ignored() {
1039 let html = r#"
1040 <dfn id="has-id">With ID</dfn>
1041 <dfn>Without ID</dfn>
1042 "#;
1043
1044 let sections = collect_definitions(html).unwrap();
1045 assert_eq!(sections.len(), 1);
1046 assert_eq!(sections[0].anchor, "has-id");
1047 }
1048
1049 #[test]
1050 fn test_multiple_definitions() {
1051 let html = r#"
1052 <p>A <dfn id="def-1">first term</dfn> and a <dfn id="def-2">second term</dfn>.</p>
1053 <p>Also a <dfn id="def-3">third term</dfn>.</p>
1054 "#;
1055
1056 let sections = collect_definitions(html).unwrap();
1057 assert_eq!(sections.len(), 3);
1058 assert_eq!(sections[0].anchor, "def-1");
1059 assert_eq!(sections[1].anchor, "def-2");
1060 assert_eq!(sections[2].anchor, "def-3");
1061 }
1062
1063 #[test]
1064 fn test_bikeshed_algorithm_parsing() {
1065 let html = include_str!("../../tests/fixtures/algorithms/bikeshed_algorithm.html");
1066 let sections = collect_algorithms(html).unwrap();
1067
1068 assert_eq!(sections.len(), 1);
1069 let section = §ions[0];
1070
1071 assert_eq!(section.anchor, "concept-ordered-set-parser");
1072 assert_eq!(section.title, Some("ordered set parser".to_string()));
1073 assert_eq!(section.section_type, SectionType::Algorithm);
1074 assert_eq!(section.depth, None);
1075 }
1076
1077 #[test]
1078 fn test_algorithm_vs_definition_distinction() {
1079 let html =
1080 include_str!("../../tests/fixtures/algorithms/mixed_definitions_algorithms.html");
1081
1082 let algorithms = collect_algorithms(html).unwrap();
1084 assert_eq!(algorithms.len(), 1);
1085 assert_eq!(algorithms[0].anchor, "algorithm-def");
1086 assert_eq!(algorithms[0].section_type, SectionType::Algorithm);
1087
1088 let definitions = collect_definitions(html).unwrap();
1090 assert_eq!(definitions.len(), 2);
1091 assert_eq!(definitions[0].anchor, "standalone-def");
1092 assert_eq!(definitions[0].section_type, SectionType::Definition);
1093 assert_eq!(definitions[1].anchor, "another-standalone");
1094 assert_eq!(definitions[1].section_type, SectionType::Definition);
1095
1096 let def_anchors: Vec<_> = definitions.iter().map(|d| &d.anchor).collect();
1098 assert!(!def_anchors.contains(&&"algorithm-def".to_string()));
1099 }
1100
1101 #[test]
1102 fn test_algorithm_without_dfn() {
1103 let html = r#"
1105 <div class="algorithm" data-algorithm="no dfn">
1106 <p>This algorithm has no dfn element.</p>
1107 <ol><li>Step 1</li></ol>
1108 </div>
1109 "#;
1110
1111 let sections = collect_algorithms(html).unwrap();
1112 assert_eq!(sections.len(), 0); }
1114
1115 #[test]
1116 fn test_idl_interface_parsing() {
1117 let html = include_str!("../../tests/fixtures/idl/interface.html");
1118 let sections = collect_idl(html).unwrap();
1119
1120 assert_eq!(sections.len(), 1);
1121 let section = §ions[0];
1122
1123 assert_eq!(section.anchor, "event");
1124 assert_eq!(section.title, Some("Event".to_string()));
1125 assert_eq!(section.section_type, SectionType::Idl);
1126 assert_eq!(section.depth, None);
1127 }
1128
1129 #[test]
1130 fn test_idl_dictionary_parsing() {
1131 let html = include_str!("../../tests/fixtures/idl/dictionary.html");
1132 let sections = collect_idl(html).unwrap();
1133
1134 assert_eq!(sections.len(), 1);
1135 let section = §ions[0];
1136
1137 assert_eq!(section.anchor, "eventinit");
1138 assert_eq!(section.title, Some("EventInit".to_string()));
1139 assert_eq!(section.section_type, SectionType::Idl);
1140 assert_eq!(section.depth, None);
1141 }
1142
1143 #[test]
1144 fn test_idl_vs_definition_distinction() {
1145 let html = include_str!("../../tests/fixtures/idl/mixed_idl_definitions.html");
1146
1147 let idl = collect_idl(html).unwrap();
1149 assert_eq!(idl.len(), 2);
1150 assert_eq!(idl[0].anchor, "myinterface");
1151 assert_eq!(idl[0].section_type, SectionType::Idl);
1152 assert_eq!(idl[1].anchor, "mydict");
1153 assert_eq!(idl[1].section_type, SectionType::Idl);
1154
1155 let definitions = collect_definitions(html).unwrap();
1157 assert_eq!(definitions.len(), 2);
1158 assert_eq!(definitions[0].anchor, "regular-term");
1159 assert_eq!(definitions[0].section_type, SectionType::Definition);
1160 assert_eq!(definitions[1].anchor, "another-term");
1161 assert_eq!(definitions[1].section_type, SectionType::Definition);
1162
1163 let def_anchors: Vec<_> = definitions.iter().map(|d| &d.anchor).collect();
1165 assert!(!def_anchors.contains(&&"myinterface".to_string()));
1166 assert!(!def_anchors.contains(&&"mydict".to_string()));
1167 }
1168
1169 #[test]
1170 fn test_idl_without_data_dfn_type_ignored() {
1171 let html = r#"
1172 <pre class="idl">
1173 <dfn id="has-type" data-dfn-type="interface">WithType</dfn>
1174 <dfn id="no-type">WithoutType</dfn>
1175 </pre>
1176 "#;
1177
1178 let sections = collect_idl(html).unwrap();
1179 assert_eq!(sections.len(), 1);
1180 assert_eq!(sections[0].anchor, "has-type");
1181 }
1182
1183 #[test]
1184 fn test_wattsi_algorithm_pattern() {
1185 let html = include_str!("../../tests/fixtures/algorithms/wattsi_navigate.html");
1188 let converter = crate::parse::markdown::build_converter("https://html.spec.whatwg.org");
1189
1190 let document = Html::parse_document(html);
1191 let selector = Selector::parse("dfn[id]").unwrap();
1192
1193 let mut algorithms = Vec::new();
1194 for element in document.select(&selector) {
1195 if let Some(section) = parse_dfn_element(&element, &converter).unwrap() {
1196 algorithms.push(section);
1197 }
1198 }
1199
1200 assert_eq!(algorithms.len(), 1, "Should detect one algorithm");
1201 let algo = &algorithms[0];
1202
1203 assert_eq!(algo.anchor, "navigate");
1204 assert_eq!(algo.title, Some("navigate".to_string()));
1205 assert_eq!(
1206 algo.section_type,
1207 SectionType::Algorithm,
1208 "Should be classified as Algorithm, not Definition"
1209 );
1210
1211 let content = algo.content_text.as_ref().unwrap();
1213 assert!(content.contains("navigate"), "Should include intro text");
1214 assert!(content.contains("1. "), "Should include first step");
1215 assert!(content.contains("2. "), "Should include second step");
1216 assert!(
1218 content.contains(" 1. "),
1219 "Should include nested step with indentation"
1220 );
1221 }
1222
1223 #[test]
1224 fn test_dfn_inside_algorithm_content_skipped() {
1225 let html = r#"
1228 <h2 id="algorithms">Algorithms</h2>
1229 <p>To <dfn id="do-something">do something</dfn> with <var>input</var>:</p>
1230 <ol>
1231 <li><p>Let <var>result</var> be the result of calling <dfn id="helper">helper</dfn>.</p></li>
1232 <li><p>Return <var>result</var>.</p></li>
1233 </ol>
1234 <p>The <dfn id="outside-def">outside definition</dfn> is separate.</p>
1235 "#;
1236
1237 let converter = crate::parse::markdown::build_converter("https://test.example.com");
1238 let document = Html::parse_document(html);
1239 let selector = Selector::parse("dfn[id]").unwrap();
1240
1241 let mut sections = Vec::new();
1242 for element in document.select(&selector) {
1243 if let Some(section) = parse_dfn_element(&element, &converter).unwrap() {
1244 sections.push(section);
1245 }
1246 }
1247
1248 assert_eq!(
1251 sections.len(),
1252 2,
1253 "Should collect 2 sections (algorithm + outside def), not the helper inside <ol>"
1254 );
1255
1256 let anchors: Vec<_> = sections.iter().map(|s| s.anchor.as_str()).collect();
1257 assert!(
1258 anchors.contains(&"do-something"),
1259 "Should include the algorithm-defining dfn"
1260 );
1261 assert!(
1262 anchors.contains(&"outside-def"),
1263 "Should include the outside definition"
1264 );
1265 assert!(
1266 !anchors.contains(&"helper"),
1267 "Should NOT include dfn inside algorithm <ol>"
1268 );
1269 }
1270
1271 #[test]
1272 fn test_dfn_inside_bikeshed_algorithm_content_skipped() {
1273 let html = r#"
1275 <h2 id="algorithms">Algorithms</h2>
1276 <div class="algorithm">
1277 <p>To <dfn id="process">process</dfn> the <var>data</var>:</p>
1278 <ol>
1279 <li><p>Let <var>x</var> be a new <dfn id="internal-thing">internal thing</dfn>.</p></li>
1280 <li><p>Return <var>x</var>.</p></li>
1281 </ol>
1282 </div>
1283 <p>A <dfn id="external-term">external term</dfn> here.</p>
1284 "#;
1285
1286 let converter = crate::parse::markdown::build_converter("https://test.example.com");
1287 let document = Html::parse_document(html);
1288 let selector = Selector::parse("dfn[id]").unwrap();
1289
1290 let mut sections = Vec::new();
1291 for element in document.select(&selector) {
1292 if let Some(section) = parse_dfn_element(&element, &converter).unwrap() {
1293 sections.push(section);
1294 }
1295 }
1296
1297 assert_eq!(
1299 sections.len(),
1300 2,
1301 "Should collect 2 sections, not the internal-thing inside <ol>"
1302 );
1303
1304 let anchors: Vec<_> = sections.iter().map(|s| s.anchor.as_str()).collect();
1305 assert!(anchors.contains(&"process"));
1306 assert!(anchors.contains(&"external-term"));
1307 assert!(
1308 !anchors.contains(&"internal-thing"),
1309 "Should NOT include dfn inside algorithm <ol>"
1310 );
1311 }
1312
1313 #[test]
1314 fn test_parameter_dfns_skipped() {
1315 let html = r#"
1318 <h2 id="algorithms">Algorithms</h2>
1319 <p>To <dfn id="navigate">navigate</dfn> with <dfn data-dfn-for="navigate" id="param1"><var>url</var></dfn>
1320 and <dfn id="param2"><var>options</var></dfn>:</p>
1321 <ol>
1322 <li><p>Do something.</p></li>
1323 </ol>
1324 <p>A standalone <dfn id="regular-def">definition</dfn>.</p>
1325 "#;
1326
1327 let converter = crate::parse::markdown::build_converter("https://test.example.com");
1328 let document = Html::parse_document(html);
1329 let selector = Selector::parse("dfn[id]").unwrap();
1330
1331 let mut sections = Vec::new();
1332 for element in document.select(&selector) {
1333 if let Some(section) = parse_dfn_element(&element, &converter).unwrap() {
1334 sections.push(section);
1335 }
1336 }
1337
1338 assert_eq!(
1341 sections.len(),
1342 2,
1343 "Should collect 2 sections (algorithm + regular def)"
1344 );
1345
1346 let anchors: Vec<_> = sections.iter().map(|s| s.anchor.as_str()).collect();
1347 assert!(
1348 anchors.contains(&"navigate"),
1349 "Should include the algorithm"
1350 );
1351 assert!(
1352 anchors.contains(&"regular-def"),
1353 "Should include standalone definition"
1354 );
1355 assert!(
1356 !anchors.contains(&"param1"),
1357 "Should NOT include parameter dfn with data-dfn-for"
1358 );
1359 assert!(
1360 !anchors.contains(&"param2"),
1361 "Should NOT include parameter dfn containing <var>"
1362 );
1363 }
1364
1365 #[test]
1366 fn test_property_dfns_with_dfn_for_and_dfn_type_kept() {
1367 let html = r#"
1371 <h2 id="trees">Trees</h2>
1372 <p>An object that <dfn class="dfn-paneled" data-dfn-type="dfn" data-export id="concept-tree">participates</dfn>
1373 in a tree has a <dfn class="dfn-paneled" data-dfn-for="tree" data-dfn-type="dfn" data-export id="concept-tree-parent">parent</dfn>,
1374 which is either null or an object, and has
1375 <dfn class="dfn-paneled" data-dfn-for="tree" data-dfn-type="dfn" data-export id="concept-tree-child">children</dfn>,
1376 which is an ordered set of objects.</p>
1377 "#;
1378
1379 let converter = crate::parse::markdown::build_converter("https://test.example.com");
1380 let document = Html::parse_document(html);
1381 let selector = Selector::parse("dfn[id]").unwrap();
1382
1383 let mut sections = Vec::new();
1384 for element in document.select(&selector) {
1385 if let Some(section) = parse_dfn_element(&element, &converter).unwrap() {
1386 sections.push(section);
1387 }
1388 }
1389
1390 let anchors: Vec<_> = sections.iter().map(|s| s.anchor.as_str()).collect();
1391 assert!(
1392 anchors.contains(&"concept-tree"),
1393 "Should include dfn without data-dfn-for"
1394 );
1395 assert!(
1396 anchors.contains(&"concept-tree-parent"),
1397 "Should include property dfn with data-dfn-for + data-dfn-type"
1398 );
1399 assert!(
1400 anchors.contains(&"concept-tree-child"),
1401 "Should include property dfn with data-dfn-for + data-dfn-type"
1402 );
1403 }
1404
1405 #[test]
1406 fn test_argument_dfns_skipped() {
1407 let html = r#"
1410 <h2 id="api">API</h2>
1411 <pre class="idl">
1412 <dfn data-dfn-type="interface" id="audiodecoder"><code>AudioDecoder</code></dfn>
1413 <dfn data-dfn-for="AudioDecoder" data-dfn-type="constructor" id="dom-audiodecoder-ctor"><code>AudioDecoder(init)</code></dfn>
1414 <dfn data-dfn-for="AudioDecoder/AudioDecoder(init)" data-dfn-type="argument" id="dom-audiodecoder-ctor-init"><code>init</code></dfn>
1415 <dfn data-dfn-for="AudioDecoder" data-dfn-type="method" id="dom-audiodecoder-configure"><code>configure(config)</code></dfn>
1416 <dfn data-dfn-for="AudioDecoder/configure(config)" data-dfn-type="argument" id="dom-audiodecoder-configure-config"><code>config</code></dfn>
1417 <dfn data-dfn-for="AudioDecoder" data-dfn-type="attribute" id="dom-audiodecoder-state"><code>state</code></dfn>
1418 </pre>
1419 "#;
1420
1421 let converter = crate::parse::markdown::build_converter("https://test.example.com");
1422 let document = Html::parse_document(html);
1423 let selector = Selector::parse("dfn[id]").unwrap();
1424
1425 let mut sections = Vec::new();
1426 for element in document.select(&selector) {
1427 if let Some(section) = parse_dfn_element(&element, &converter).unwrap() {
1428 sections.push(section);
1429 }
1430 }
1431
1432 let anchors: Vec<_> = sections.iter().map(|s| s.anchor.as_str()).collect();
1433
1434 assert!(
1436 anchors.contains(&"audiodecoder"),
1437 "Interface should be kept"
1438 );
1439 assert!(
1440 anchors.contains(&"dom-audiodecoder-ctor"),
1441 "Constructor should be kept"
1442 );
1443 assert!(
1444 anchors.contains(&"dom-audiodecoder-configure"),
1445 "Method should be kept"
1446 );
1447 assert!(
1448 anchors.contains(&"dom-audiodecoder-state"),
1449 "Attribute should be kept"
1450 );
1451
1452 assert!(
1454 !anchors.contains(&"dom-audiodecoder-ctor-init"),
1455 "Argument should be skipped"
1456 );
1457 assert!(
1458 !anchors.contains(&"dom-audiodecoder-configure-config"),
1459 "Argument should be skipped"
1460 );
1461 }
1462
1463 #[test]
1466 fn test_emu_clause_prose_section() {
1467 let html = r#"
1468 <emu-clause id="sec-overview">
1469 <h1><span class="secnum">4</span> Overview</h1>
1470 <p>This section contains a non-normative overview of the ECMAScript language.</p>
1471 </emu-clause>
1472 "#;
1473
1474 let converter = crate::parse::markdown::build_converter("https://tc39.es/ecma262");
1475 let document = Html::parse_document(html);
1476 let selector = Selector::parse("emu-clause[id]").unwrap();
1477 let element = document.select(&selector).next().unwrap();
1478
1479 let section = parse_emu_clause_element(&element, &converter)
1480 .unwrap()
1481 .unwrap();
1482
1483 assert_eq!(section.anchor, "sec-overview");
1484 assert_eq!(section.title, Some("Overview".to_string()));
1485 assert_eq!(section.depth, Some(2)); assert_eq!(section.section_type, SectionType::Heading);
1487 assert!(section.content_text.is_some());
1488 assert!(section
1489 .content_text
1490 .as_ref()
1491 .unwrap()
1492 .contains("non-normative overview"));
1493 }
1494
1495 #[test]
1496 fn test_emu_clause_algorithm_section() {
1497 let html = r#"
1498 <emu-clause id="sec-tostring" type="abstract operation" aoid="ToString">
1499 <h1><span class="secnum">7.1.17</span> ToString ( <var>argument</var> )</h1>
1500 <p>The abstract operation ToString converts argument to a String.</p>
1501 <emu-alg>
1502 <ol>
1503 <li>If <var>argument</var> is a String, return <var>argument</var>.</li>
1504 <li>If <var>argument</var> is <emu-val>undefined</emu-val>, return "undefined".</li>
1505 <li>If <var>argument</var> is <emu-val>null</emu-val>, return "null".</li>
1506 </ol>
1507 </emu-alg>
1508 </emu-clause>
1509 "#;
1510
1511 let converter = crate::parse::markdown::build_converter("https://tc39.es/ecma262");
1512 let document = Html::parse_document(html);
1513 let selector = Selector::parse("emu-clause[id]").unwrap();
1514 let element = document.select(&selector).next().unwrap();
1515
1516 let section = parse_emu_clause_element(&element, &converter)
1517 .unwrap()
1518 .unwrap();
1519
1520 assert_eq!(section.anchor, "sec-tostring");
1521 assert_eq!(section.title, Some("ToString ( argument )".to_string()));
1522 assert_eq!(section.depth, Some(4)); assert_eq!(section.section_type, SectionType::Algorithm);
1524
1525 let content = section.content_text.unwrap();
1526 assert!(
1527 content.contains("converts argument"),
1528 "Should have intro prose"
1529 );
1530 assert!(content.contains("1."), "Should have algorithm steps");
1531 }
1532
1533 #[test]
1534 fn test_emu_clause_nested_sections_excluded_from_content() {
1535 let html = r#"
1536 <emu-clause id="sec-parent">
1537 <h1><span class="secnum">23</span> Parent Section</h1>
1538 <p>Intro text for the parent.</p>
1539 <emu-clause id="sec-child">
1540 <h1><span class="secnum">23.1</span> Child Section</h1>
1541 <p>This should NOT appear in parent content.</p>
1542 </emu-clause>
1543 </emu-clause>
1544 "#;
1545
1546 let converter = crate::parse::markdown::build_converter("https://tc39.es/ecma262");
1547 let document = Html::parse_document(html);
1548 let selector = Selector::parse("emu-clause[id]").unwrap();
1549
1550 let mut sections = Vec::new();
1551 for element in document.select(&selector) {
1552 if let Some(section) = parse_emu_clause_element(&element, &converter).unwrap() {
1553 sections.push(section);
1554 }
1555 }
1556
1557 assert_eq!(sections.len(), 2);
1558 assert_eq!(sections[0].anchor, "sec-parent");
1559 assert_eq!(sections[1].anchor, "sec-child");
1560
1561 let parent_content = sections[0].content_text.as_ref().unwrap();
1563 assert!(parent_content.contains("Intro text"));
1564 assert!(!parent_content.contains("should NOT appear"));
1565 }
1566
1567 #[test]
1568 fn test_secnum_depth_derivation() {
1569 fn depth_from_html(secnum: &str) -> Option<u8> {
1571 let html = format!(r#"<h1><span class="secnum">{}</span> Title</h1>"#, secnum);
1572 let document = Html::parse_document(&html);
1573 let selector = Selector::parse("h1").unwrap();
1574 let h1 = document.select(&selector).next().unwrap();
1575 extract_secnum_depth(&h1)
1576 }
1577
1578 assert_eq!(depth_from_html("4"), Some(2)); assert_eq!(depth_from_html("4.3"), Some(3)); assert_eq!(depth_from_html("7.1.17"), Some(4)); assert_eq!(depth_from_html("23.1.3.30"), Some(5)); assert_eq!(depth_from_html("A"), Some(2)); assert_eq!(depth_from_html("A.1"), Some(3)); assert_eq!(depth_from_html("A.1.2"), Some(4)); }
1586
1587 #[test]
1588 fn test_emu_clause_secnum_stripped_from_title() {
1589 let html = r#"
1590 <emu-clause id="sec-test">
1591 <h1><span class="secnum">7.1.17</span> ToString ( <var>argument</var> )</h1>
1592 </emu-clause>
1593 "#;
1594
1595 let converter = crate::parse::markdown::build_converter("https://tc39.es/ecma262");
1596 let document = Html::parse_document(html);
1597 let selector = Selector::parse("emu-clause[id]").unwrap();
1598 let element = document.select(&selector).next().unwrap();
1599
1600 let section = parse_emu_clause_element(&element, &converter)
1601 .unwrap()
1602 .unwrap();
1603
1604 let title = section.title.unwrap();
1606 assert!(
1607 !title.contains("7.1.17"),
1608 "secnum should be stripped: {}",
1609 title
1610 );
1611 assert!(
1612 title.contains("ToString"),
1613 "Title should have function name: {}",
1614 title
1615 );
1616 }
1617
1618 #[test]
1619 fn test_emu_annex_parsed() {
1620 let html = r#"
1621 <emu-annex id="sec-additional-built-in-properties">
1622 <h1><span class="secnum">B</span> Additional Built-in Properties</h1>
1623 <p>Annex content here.</p>
1624 </emu-annex>
1625 "#;
1626
1627 let converter = crate::parse::markdown::build_converter("https://tc39.es/ecma262");
1628 let document = Html::parse_document(html);
1629 let selector = Selector::parse("emu-annex[id]").unwrap();
1630 let element = document.select(&selector).next().unwrap();
1631
1632 let section = parse_emu_clause_element(&element, &converter)
1633 .unwrap()
1634 .unwrap();
1635
1636 assert_eq!(section.anchor, "sec-additional-built-in-properties");
1637 assert_eq!(
1638 section.title,
1639 Some("Additional Built-in Properties".to_string())
1640 );
1641 assert_eq!(section.depth, Some(2)); }
1643
1644 #[test]
1647 fn test_ecmarkup_fixture_tostring_algorithm() {
1648 let html = include_str!("../../tests/fixtures/ecmarkup/tostring.html");
1649 let converter = crate::parse::markdown::build_converter("https://tc39.es/ecma262");
1650 let document = Html::parse_document(html);
1651 let selector = Selector::parse("emu-clause[id]").unwrap();
1652 let element = document.select(&selector).next().unwrap();
1653
1654 let section = parse_emu_clause_element(&element, &converter)
1655 .unwrap()
1656 .unwrap();
1657
1658 assert_eq!(section.anchor, "sec-tostring");
1659 assert_eq!(section.title, Some("ToString ( argument )".to_string()));
1660 assert_eq!(section.depth, Some(4)); assert_eq!(section.section_type, SectionType::Algorithm);
1662
1663 let content = section.content_text.as_ref().unwrap();
1664
1665 assert!(
1667 content.contains("The abstract operation ToString takes argument *argument*"),
1668 "Intro should have italic var: {}",
1669 &content[..200]
1670 );
1671 assert!(
1672 content.contains("[ECMAScript language value](https://tc39.es/ecma262#sec-ecmascript-language-types)"),
1673 "emu-xref links should be inline markdown links"
1674 );
1675
1676 assert!(
1678 content.contains("1. If *argument* [is a String]("),
1679 "Step 1 should be on a single line with inline link"
1680 );
1681 assert!(
1682 content.contains("2. If *argument* [is a Symbol]("),
1683 "Step 2 should follow immediately"
1684 );
1685 assert!(
1686 content.contains("3. If *argument* is undefined, return \"undefined\"."),
1687 "Step 3: emu-val should render inline"
1688 );
1689 assert!(
1690 content.contains("10. Let *primValue* be ?"),
1691 "Step 10 should have var and link inline"
1692 );
1693 assert!(
1694 content.contains("10. Let *primValue*") && content.contains("[ToPrimitive]("),
1695 "Step 10 should have ToPrimitive link"
1696 );
1697 assert!(
1698 content.contains("12. Return ?") && content.contains("[ToString]("),
1699 "Step 12 should have recursive call"
1700 );
1701
1702 for i in 1..=12 {
1704 let prefix = format!("{}. ", i);
1705 let matches: Vec<_> = content
1706 .lines()
1707 .filter(|l| {
1708 let trimmed = l.trim_start();
1709 trimmed.starts_with(&prefix)
1710 || (i >= 10 && trimmed.starts_with(&format!("{}.", i)))
1711 })
1712 .collect();
1713 assert!(
1714 !matches.is_empty(),
1715 "Step {} should appear on its own line",
1716 i
1717 );
1718 }
1719 }
1720
1721 #[test]
1722 fn test_ecmarkup_fixture_undefined_type_prose() {
1723 let html = include_str!("../../tests/fixtures/ecmarkup/undefined_type.html");
1724 let converter = crate::parse::markdown::build_converter("https://tc39.es/ecma262");
1725 let document = Html::parse_document(html);
1726 let selector = Selector::parse("emu-clause[id]").unwrap();
1727 let element = document.select(&selector).next().unwrap();
1728
1729 let section = parse_emu_clause_element(&element, &converter)
1730 .unwrap()
1731 .unwrap();
1732
1733 assert_eq!(
1734 section.anchor,
1735 "sec-ecmascript-language-types-undefined-type"
1736 );
1737 assert_eq!(section.title, Some("The Undefined Type".to_string()));
1738 assert_eq!(section.depth, Some(4)); assert_eq!(section.section_type, SectionType::Heading);
1740
1741 let content = section.content_text.as_ref().unwrap();
1742
1743 assert!(
1745 content.contains("The Undefined type has exactly one value, called undefined."),
1746 "emu-val should render inline as plain text: {}",
1747 content
1748 );
1749 assert!(
1750 content.contains("the value undefined."),
1751 "Second emu-val should also be inline"
1752 );
1753 let line_count = content.lines().count();
1755 assert!(
1756 line_count <= 2,
1757 "Simple prose should be 1-2 lines, got {}: {}",
1758 line_count,
1759 content
1760 );
1761 }
1762}