Skip to main content

webspec_index/analyze/
scanner.rs

1//! Document scanning for spec URLs and step comments.
2
3use regex::Regex;
4
5/// A spec URL found in a document.
6#[derive(Debug, Clone)]
7pub struct UrlMatch {
8    pub line: usize,
9    pub col_start: usize,
10    pub col_end: usize,
11    pub indent: usize,
12    pub spec: String,
13    pub anchor: String,
14    pub url: String,
15}
16
17/// A step comment found in source code.
18#[derive(Debug, Clone)]
19pub struct StepComment {
20    pub line: usize,
21    pub col_start: usize,
22    pub col_end: usize,
23    pub indent: usize,
24    pub number: Vec<u32>,
25    pub text: String,
26    /// Last line for multi-line comments (None = same as `line`)
27    pub end_line: Option<usize>,
28}
29
30/// Build a regex from known spec base URLs.
31///
32/// Matches both single-page URLs (base/#anchor) and multipage URLs
33/// (base/multipage/page.html#anchor).
34pub fn build_url_pattern(spec_urls: &[SpecUrl]) -> Regex {
35    let bases: Vec<String> = spec_urls
36        .iter()
37        .map(|s| regex::escape(&s.base_url))
38        .collect();
39    let pattern = format!(r"({})/(?:[^\s#]*)?#([\w:._%{{}}\(\)-]+)", bases.join("|"));
40    Regex::new(&pattern).expect("invalid URL pattern")
41}
42
43/// Spec name + base URL pair.
44#[derive(Debug, Clone)]
45pub struct SpecUrl {
46    pub spec: String,
47    pub base_url: String,
48}
49
50/// Build base_url -> spec name lookup.
51pub fn build_spec_lookup(spec_urls: &[SpecUrl]) -> std::collections::HashMap<String, String> {
52    spec_urls
53        .iter()
54        .map(|s| (s.base_url.clone(), s.spec.clone()))
55        .collect()
56}
57
58/// Count leading whitespace characters on a line.
59fn leading_indent(line: &str) -> usize {
60    line.len() - line.trim_start().len()
61}
62
63/// Scan document text for spec URLs.
64///
65/// Returns list of `UrlMatch` sorted by (line, col_start).
66pub fn scan_document(
67    text: &str,
68    pattern: &Regex,
69    spec_lookup: &std::collections::HashMap<String, String>,
70) -> Vec<UrlMatch> {
71    let mut matches = Vec::new();
72    for (line_num, line) in text.lines().enumerate() {
73        let indent = leading_indent(line);
74        for m in pattern.find_iter(line) {
75            // Re-run with captures to get groups
76            if let Some(caps) = pattern.captures(&line[m.start()..]) {
77                let base_url = caps.get(1).map_or("", |m| m.as_str());
78                let anchor = caps.get(2).map_or("", |m| m.as_str());
79                let spec = spec_lookup.get(base_url).cloned().unwrap_or_default();
80                matches.push(UrlMatch {
81                    line: line_num,
82                    col_start: m.start(),
83                    col_end: m.end(),
84                    indent,
85                    spec,
86                    anchor: anchor.to_string(),
87                    url: m.as_str().to_string(),
88                });
89            }
90        }
91    }
92    matches
93}
94
95/// Step comment pattern matching various comment styles.
96///
97/// Requires at least one of: "Step" prefix, multi-part number (5.1), trailing dot.
98fn step_pattern() -> &'static Regex {
99    use std::sync::OnceLock;
100    static RE: OnceLock<Regex> = OnceLock::new();
101    RE.get_or_init(|| {
102        Regex::new(
103            r"(?://|#|;+|/\*+|\*)\s*([Ss]tep\s+)?(\d{1,3}(?:\.\d{1,3})*)(\.)?(?:\s*(.*?))\s*(?:\*/)?$",
104        )
105        .expect("invalid step pattern")
106    })
107}
108
109/// Continuation line pattern.
110fn continuation_pattern() -> &'static Regex {
111    use std::sync::OnceLock;
112    static RE: OnceLock<Regex> = OnceLock::new();
113    RE.get_or_init(|| {
114        Regex::new(r"^\s*(?://|#|;+|\*)\s*(.*?)\s*(?:\*/)?$").expect("invalid continuation pattern")
115    })
116}
117
118/// Scan document text for step comments.
119///
120/// Supports multi-line comments: continuation lines immediately following
121/// a step comment are appended to its text.
122pub fn scan_steps(text: &str) -> Vec<StepComment> {
123    let step_re = step_pattern();
124    let cont_re = continuation_pattern();
125    let lines: Vec<&str> = text.lines().collect();
126    let mut results = Vec::new();
127    let mut i = 0;
128
129    while i < lines.len() {
130        if let Some(caps) = step_re.captures(lines[i]) {
131            let has_step_prefix = caps.get(1).is_some();
132            let number_str = caps.get(2).map_or("", |m| m.as_str());
133            let has_trailing_dot = caps.get(3).is_some();
134            let mut step_text = caps.get(4).map_or("", |m| m.as_str()).to_string();
135            let is_multi_part = number_str.contains('.');
136
137            // Require at least one signal that this is a step reference
138            if !has_step_prefix && !has_trailing_dot && !is_multi_part {
139                i += 1;
140                continue;
141            }
142
143            let indent = leading_indent(lines[i]);
144            let col_start = caps.get(0).map_or(0, |m| m.start());
145            let mut col_end = caps.get(0).map_or(0, |m| m.end());
146
147            // Collect continuation lines
148            let mut j = i + 1;
149            while j < lines.len() {
150                // Stop if the next line is itself a step
151                if step_re.is_match(lines[j]) {
152                    break;
153                }
154                if let Some(cont_caps) = cont_re.captures(lines[j]) {
155                    let cont_text = cont_caps.get(1).map_or("", |m| m.as_str());
156                    if !cont_text.is_empty() {
157                        step_text.push(' ');
158                        step_text.push_str(cont_text);
159                        col_end = cont_caps.get(0).map_or(col_end, |m| m.end());
160                        j += 1;
161                    } else {
162                        break;
163                    }
164                } else {
165                    break;
166                }
167            }
168
169            let end_line = if j > i + 1 { Some(j - 1) } else { None };
170            let number: Vec<u32> = number_str
171                .split('.')
172                .filter_map(|p| p.parse().ok())
173                .collect();
174
175            results.push(StepComment {
176                line: i,
177                col_start,
178                col_end,
179                indent,
180                number,
181                text: step_text,
182                end_line,
183            });
184            i = j;
185        } else {
186            i += 1;
187        }
188    }
189    results
190}
191
192/// Find a URL match at the given cursor position.
193pub fn find_url_at_position(matches: &[UrlMatch], line: usize, col: usize) -> Option<&UrlMatch> {
194    matches
195        .iter()
196        .find(|m| m.line == line && m.col_start <= col && col <= m.col_end)
197}
198
199/// Associate step comments with spec URLs using indentation-based scoping.
200///
201/// Scoping rules:
202/// - A spec URL comment at indent level N opens a scope.
203/// - A URL at the same indent as the top of the scope stack replaces it;
204///   a URL at deeper indent stacks on top (nested scope).
205/// - Step comments are assigned to the innermost (top-of-stack) scope.
206/// - Scopes close when a non-blank line at indent L satisfies:
207///   - `L < N` (left the block entirely), OR
208///   - `L == N` and the scope saw deeper content (`max_seen > N`) —
209///     this catches closing braces returning to the scope's indent level.
210///
211/// This correctly handles:
212/// - Comments above a function (scope survives the function signature, closes at `}`)
213/// - Comments inside a function body (scope closes at `}` which is at lower indent)
214/// - Nested spec URLs (inner algorithm inside an outer one)
215pub fn build_scopes(
216    text: &str,
217    url_matches: &[UrlMatch],
218    step_comments: &[StepComment],
219) -> Vec<(UrlMatch, Vec<StepComment>)> {
220    if url_matches.is_empty() {
221        return Vec::new();
222    }
223
224    // Index url_matches and step_comments by line number for O(1) lookup.
225    let mut url_by_line: std::collections::HashMap<usize, Vec<&UrlMatch>> =
226        std::collections::HashMap::new();
227    for u in url_matches {
228        url_by_line.entry(u.line).or_default().push(u);
229    }
230
231    let mut step_by_line: std::collections::HashMap<usize, Vec<&StepComment>> =
232        std::collections::HashMap::new();
233    for s in step_comments {
234        step_by_line.entry(s.line).or_default().push(s);
235    }
236
237    // Scope stack: each entry tracks the URL, its indent, the maximum indent seen
238    // since it was pushed, and the collected step comments.
239    struct Scope {
240        url: UrlMatch,
241        indent: usize,
242        max_seen: usize,
243        steps: Vec<StepComment>,
244    }
245
246    let mut stack: Vec<Scope> = Vec::new();
247    let mut finished: Vec<(UrlMatch, Vec<StepComment>)> = Vec::new();
248
249    let lines: Vec<&str> = text.lines().collect();
250
251    for (line_num, line_text) in lines.iter().enumerate() {
252        let indent = leading_indent(line_text);
253        let is_blank = line_text.trim().is_empty();
254
255        // Blank lines don't affect scoping.
256        if is_blank {
257            continue;
258        }
259
260        // Check if this line has URL matches — handle scope push/replace.
261        if let Some(urls) = url_by_line.get(&line_num) {
262            for url in urls {
263                let url_indent = url.indent;
264
265                // Pop scopes at the same or higher indent before pushing: a
266                // same-indent URL replaces the sibling algorithm, a deeper one
267                // nests. (Higher indent than the top simply nests, popping none.)
268                while let Some(popped) = stack.pop_if(|top| top.indent >= url_indent) {
269                    finished.push((popped.url, popped.steps));
270                }
271
272                stack.push(Scope {
273                    url: (*url).clone(),
274                    indent: url_indent,
275                    max_seen: url_indent,
276                    steps: Vec::new(),
277                });
278            }
279            continue;
280        }
281
282        // Check if this line has step comments — assign to top of stack.
283        if let Some(steps) = step_by_line.get(&line_num) {
284            if let Some(top) = stack.last_mut() {
285                if indent > top.max_seen {
286                    top.max_seen = indent;
287                }
288                for step in steps {
289                    top.steps.push((*step).clone());
290                }
291            }
292            // Step comment lines don't close scopes (they're comments).
293            continue;
294        }
295
296        // Regular line: update max_seen and check scope closing.
297        // Close scopes from top of stack where the closing condition is met.
298        while let Some(top) = stack.last() {
299            let should_close =
300                indent < top.indent || (indent == top.indent && top.max_seen > top.indent);
301            if should_close {
302                let popped = stack.pop().unwrap();
303                finished.push((popped.url, popped.steps));
304            } else {
305                break;
306            }
307        }
308
309        // Update max_seen for remaining top scope.
310        if let Some(top) = stack.last_mut() {
311            if indent > top.max_seen {
312                top.max_seen = indent;
313            }
314        }
315    }
316
317    // Flush remaining scopes (EOF closes everything).
318    while let Some(scope) = stack.pop() {
319        finished.push((scope.url, scope.steps));
320    }
321
322    // Sort by the URL's line number so output order is deterministic.
323    finished.sort_by_key(|(url, _)| url.line);
324    finished
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    fn test_spec_urls() -> Vec<SpecUrl> {
332        vec![
333            SpecUrl {
334                spec: "HTML".into(),
335                base_url: "https://html.spec.whatwg.org".into(),
336            },
337            SpecUrl {
338                spec: "DOM".into(),
339                base_url: "https://dom.spec.whatwg.org".into(),
340            },
341            SpecUrl {
342                spec: "URL".into(),
343                base_url: "https://url.spec.whatwg.org".into(),
344            },
345        ]
346    }
347
348    fn pattern() -> Regex {
349        build_url_pattern(&test_spec_urls())
350    }
351
352    fn lookup() -> std::collections::HashMap<String, String> {
353        build_spec_lookup(&test_spec_urls())
354    }
355
356    /// Helper: scan text and build scopes in one call.
357    fn scopes_for(text: &str) -> Vec<(UrlMatch, Vec<StepComment>)> {
358        let urls = scan_document(text, &pattern(), &lookup());
359        let steps = scan_steps(text);
360        build_scopes(text, &urls, &steps)
361    }
362
363    // ── URL pattern tests ──
364
365    #[test]
366    fn matches_html_url() {
367        let p = pattern();
368        let caps = p
369            .captures("https://html.spec.whatwg.org/#navigate")
370            .unwrap();
371        assert_eq!(
372            caps.get(1).unwrap().as_str(),
373            "https://html.spec.whatwg.org"
374        );
375        assert_eq!(caps.get(2).unwrap().as_str(), "navigate");
376    }
377
378    #[test]
379    fn matches_dom_url() {
380        let p = pattern();
381        let caps = p
382            .captures("https://dom.spec.whatwg.org/#concept-tree")
383            .unwrap();
384        assert_eq!(caps.get(2).unwrap().as_str(), "concept-tree");
385    }
386
387    #[test]
388    fn no_match_unknown_spec() {
389        let p = pattern();
390        assert!(p.captures("https://example.com/#foo").is_none());
391    }
392
393    #[test]
394    fn no_match_without_fragment() {
395        let p = pattern();
396        assert!(p.captures("https://html.spec.whatwg.org/").is_none());
397    }
398
399    #[test]
400    fn anchor_with_dots() {
401        let p = pattern();
402        let caps = p
403            .captures("https://html.spec.whatwg.org/#dom-element-click")
404            .unwrap();
405        assert_eq!(caps.get(2).unwrap().as_str(), "dom-element-click");
406    }
407
408    #[test]
409    fn anchor_with_colons() {
410        let p = pattern();
411        let caps = p
412            .captures("https://html.spec.whatwg.org/#concept-url-parser:percent-encoded-bytes")
413            .unwrap();
414        assert_eq!(
415            caps.get(2).unwrap().as_str(),
416            "concept-url-parser:percent-encoded-bytes"
417        );
418    }
419
420    #[test]
421    fn multipage_url() {
422        let p = pattern();
423        let caps = p
424            .captures("https://html.spec.whatwg.org/multipage/browsing-the-web.html#navigate")
425            .unwrap();
426        assert_eq!(
427            caps.get(1).unwrap().as_str(),
428            "https://html.spec.whatwg.org"
429        );
430        assert_eq!(caps.get(2).unwrap().as_str(), "navigate");
431    }
432
433    // ── Scan document tests ──
434
435    #[test]
436    fn single_url_in_comment() {
437        let text = "// https://html.spec.whatwg.org/#navigate";
438        let matches = scan_document(text, &pattern(), &lookup());
439        assert_eq!(matches.len(), 1);
440        assert_eq!(matches[0].spec, "HTML");
441        assert_eq!(matches[0].anchor, "navigate");
442        assert_eq!(matches[0].line, 0);
443        assert_eq!(matches[0].indent, 0);
444    }
445
446    #[test]
447    fn indented_url() {
448        let text = "    // https://html.spec.whatwg.org/#navigate";
449        let matches = scan_document(text, &pattern(), &lookup());
450        assert_eq!(matches.len(), 1);
451        assert_eq!(matches[0].indent, 4);
452    }
453
454    #[test]
455    fn multiple_urls() {
456        let text = "// https://html.spec.whatwg.org/#navigate\ncode();\n// https://dom.spec.whatwg.org/#concept-tree\n";
457        let matches = scan_document(text, &pattern(), &lookup());
458        assert_eq!(matches.len(), 2);
459        assert_eq!(matches[0].spec, "HTML");
460        assert_eq!(matches[0].line, 0);
461        assert_eq!(matches[1].spec, "DOM");
462        assert_eq!(matches[1].line, 2);
463    }
464
465    #[test]
466    fn no_urls() {
467        let text = "just some code\nwith no spec urls\n";
468        let matches = scan_document(text, &pattern(), &lookup());
469        assert!(matches.is_empty());
470    }
471
472    // ── Scan steps tests ──
473
474    #[test]
475    fn cpp_step_comment() {
476        let text = "// Step 5.1. Assert: userInvolvement is browser UI";
477        let steps = scan_steps(text);
478        assert_eq!(steps.len(), 1);
479        assert_eq!(steps[0].number, vec![5, 1]);
480        assert!(steps[0].text.contains("Assert"));
481        assert_eq!(steps[0].indent, 0);
482    }
483
484    #[test]
485    fn indented_step() {
486        let text = "      // Step 1. Do something";
487        let steps = scan_steps(text);
488        assert_eq!(steps.len(), 1);
489        assert_eq!(steps[0].indent, 6);
490    }
491
492    #[test]
493    fn step_without_prefix() {
494        let text = "// 5.1. Let x be something";
495        let steps = scan_steps(text);
496        assert_eq!(steps.len(), 1);
497        assert_eq!(steps[0].number, vec![5, 1]);
498    }
499
500    #[test]
501    fn step_no_trailing_dot() {
502        let text = "// Step 5.1 Assert: foo";
503        let steps = scan_steps(text);
504        assert_eq!(steps.len(), 1);
505        assert_eq!(steps[0].number, vec![5, 1]);
506    }
507
508    #[test]
509    fn step_number_only() {
510        let text = "// Step 5.";
511        let steps = scan_steps(text);
512        assert_eq!(steps.len(), 1);
513        assert_eq!(steps[0].number, vec![5]);
514        assert_eq!(steps[0].text, "");
515    }
516
517    #[test]
518    fn python_step_comment() {
519        let text = "# Step 3. Do something";
520        let steps = scan_steps(text);
521        assert_eq!(steps.len(), 1);
522        assert_eq!(steps[0].number, vec![3]);
523    }
524
525    #[test]
526    fn css_step_comment() {
527        let text = "/* Step 1. Init */";
528        let steps = scan_steps(text);
529        assert_eq!(steps.len(), 1);
530        assert_eq!(steps[0].number, vec![1]);
531        assert_eq!(steps[0].text, "Init");
532    }
533
534    #[test]
535    fn no_step_comment() {
536        let text = "// This is just a regular comment";
537        let steps = scan_steps(text);
538        assert!(steps.is_empty());
539    }
540
541    #[test]
542    fn multiple_steps() {
543        let text = "// Step 1. First\n// Step 2. Second\n// Step 3. Third";
544        let steps = scan_steps(text);
545        assert_eq!(steps.len(), 3);
546        assert_eq!(steps[0].line, 0);
547        assert_eq!(steps[1].line, 1);
548        assert_eq!(steps[2].line, 2);
549    }
550
551    #[test]
552    fn deeply_nested_number() {
553        let text = "// Step 5.1.2 Deeply nested step";
554        let steps = scan_steps(text);
555        assert_eq!(steps.len(), 1);
556        assert_eq!(steps[0].number, vec![5, 1, 2]);
557    }
558
559    #[test]
560    fn asm_comment() {
561        let text = "; Step 1. Assembly step";
562        let steps = scan_steps(text);
563        assert_eq!(steps.len(), 1);
564        assert_eq!(steps[0].number, vec![1]);
565    }
566
567    #[test]
568    fn bare_number_not_matched() {
569        let text = "// 42 is the answer to life";
570        let steps = scan_steps(text);
571        assert!(steps.is_empty());
572    }
573
574    #[test]
575    fn bare_number_with_port() {
576        let text = "// Use port 8080";
577        let steps = scan_steps(text);
578        assert!(steps.is_empty());
579    }
580
581    #[test]
582    fn single_number_with_trailing_dot() {
583        let text = "// 5. Let x be something";
584        let steps = scan_steps(text);
585        assert_eq!(steps.len(), 1);
586        assert_eq!(steps[0].number, vec![5]);
587    }
588
589    #[test]
590    fn multi_part_without_prefix_or_dot() {
591        let text = "// 5.1 Let x be something";
592        let steps = scan_steps(text);
593        assert_eq!(steps.len(), 1);
594        assert_eq!(steps[0].number, vec![5, 1]);
595    }
596
597    #[test]
598    fn multiline_continuation() {
599        let text = "// Step 2.1 Foo Bar baz\n//       continues here";
600        let steps = scan_steps(text);
601        assert_eq!(steps.len(), 1);
602        assert_eq!(steps[0].number, vec![2, 1]);
603        assert_eq!(steps[0].text, "Foo Bar baz continues here");
604        assert_eq!(steps[0].line, 0);
605    }
606
607    #[test]
608    fn multiline_stops_at_next_step() {
609        let text = "// Step 1. First\n//   more first\n// Step 2. Second";
610        let steps = scan_steps(text);
611        assert_eq!(steps.len(), 2);
612        assert_eq!(steps[0].text, "First more first");
613        assert_eq!(steps[1].text, "Second");
614    }
615
616    #[test]
617    fn multiline_stops_at_non_comment() {
618        let text = "// Step 1. First\ncode();\n// Step 2. Second";
619        let steps = scan_steps(text);
620        assert_eq!(steps.len(), 2);
621        assert_eq!(steps[0].text, "First");
622        assert_eq!(steps[1].text, "Second");
623    }
624
625    // ── find_url_at_position tests ──
626
627    #[test]
628    fn cursor_on_url() {
629        let text = "// https://html.spec.whatwg.org/#navigate";
630        let matches = scan_document(text, &pattern(), &lookup());
631        assert!(find_url_at_position(&matches, 0, 10).is_some());
632    }
633
634    #[test]
635    fn cursor_before_url() {
636        let text = "// https://html.spec.whatwg.org/#navigate";
637        let matches = scan_document(text, &pattern(), &lookup());
638        assert!(find_url_at_position(&matches, 0, 0).is_none());
639    }
640
641    #[test]
642    fn cursor_wrong_line() {
643        let text = "// https://html.spec.whatwg.org/#navigate\nfoo";
644        let matches = scan_document(text, &pattern(), &lookup());
645        assert!(find_url_at_position(&matches, 1, 0).is_none());
646    }
647
648    // ── build_scopes tests (indentation-based) ──
649
650    #[test]
651    fn scope_simple_flat() {
652        // All at indent 0: URL + steps, no closing brace.
653        let text = "\
654// https://html.spec.whatwg.org/#navigate
655// Step 1. First
656// Step 2. Second
657";
658        let scopes = scopes_for(text);
659        assert_eq!(scopes.len(), 1);
660        assert_eq!(scopes[0].0.anchor, "navigate");
661        assert_eq!(scopes[0].1.len(), 2);
662    }
663
664    #[test]
665    fn scope_comment_above_function() {
666        // URL at indent 0, function body at indent 4, closing } at indent 0.
667        let text = "\
668// https://html.spec.whatwg.org/#navigate
669void DoNavigate() {
670    // Step 1. First
671    code();
672    // Step 2. Second
673    more_code();
674}
675";
676        let scopes = scopes_for(text);
677        assert_eq!(scopes.len(), 1);
678        assert_eq!(scopes[0].0.anchor, "navigate");
679        assert_eq!(scopes[0].1.len(), 2);
680    }
681
682    #[test]
683    fn scope_comment_inside_function() {
684        // URL at indent 4 (inside function body), } at indent 0 closes it.
685        let text = "\
686void DoNavigate() {
687    // https://html.spec.whatwg.org/#navigate
688    // Step 1. First
689    code();
690    // Step 2. Second
691}
692";
693        let scopes = scopes_for(text);
694        assert_eq!(scopes.len(), 1);
695        assert_eq!(scopes[0].0.anchor, "navigate");
696        assert_eq!(scopes[0].1.len(), 2);
697    }
698
699    #[test]
700    fn scope_class_member_closes_at_brace() {
701        // URL at indent 2 (class member), function body at indent 4, } at indent 2 closes scope.
702        let text = "\
703class Foo {
704  // https://html.spec.whatwg.org/#navigate
705  void foo() {
706    // Step 1. Do this
707    do_this();
708    // Step 2. Do that
709    do_that();
710  }
711
712  void bar() {
713    // Step 3. Should not be in navigate scope
714    other();
715  }
716}
717";
718        let scopes = scopes_for(text);
719        assert_eq!(scopes.len(), 1);
720        assert_eq!(scopes[0].0.anchor, "navigate");
721        assert_eq!(scopes[0].1.len(), 2);
722        assert_eq!(scopes[0].1[0].number, vec![1]);
723        assert_eq!(scopes[0].1[1].number, vec![2]);
724    }
725
726    #[test]
727    fn scope_two_separate_functions() {
728        // Two functions, each with its own spec URL.
729        let text = "\
730class Foo {
731  // https://html.spec.whatwg.org/#navigate
732  void navigate() {
733    // Step 1. Nav step
734    nav();
735  }
736
737  // https://dom.spec.whatwg.org/#concept-tree
738  void tree() {
739    // Step 1. Tree step
740    tree_op();
741  }
742}
743";
744        let scopes = scopes_for(text);
745        assert_eq!(scopes.len(), 2);
746        assert_eq!(scopes[0].0.anchor, "navigate");
747        assert_eq!(scopes[0].1.len(), 1);
748        assert_eq!(scopes[0].1[0].number, vec![1]);
749        assert_eq!(scopes[1].0.anchor, "concept-tree");
750        assert_eq!(scopes[1].1.len(), 1);
751        assert_eq!(scopes[1].1[0].number, vec![1]);
752    }
753
754    #[test]
755    fn scope_nested_stacked() {
756        // Outer algorithm with an inner algorithm nested inside.
757        let text = "\
758void Navigate() {
759    // https://html.spec.whatwg.org/#navigate
760    // Step 1. Outer step one
761    code();
762    if (cond) {
763        // https://dom.spec.whatwg.org/#concept-tree
764        // Step 1. Inner step one
765        inner_code();
766    }
767    // Step 2. Outer step two
768    more_code();
769}
770";
771        let scopes = scopes_for(text);
772        assert_eq!(scopes.len(), 2);
773        // Inner scope
774        assert_eq!(scopes[1].0.anchor, "concept-tree");
775        assert_eq!(scopes[1].1.len(), 1);
776        assert_eq!(scopes[1].1[0].number, vec![1]);
777        // Outer scope
778        assert_eq!(scopes[0].0.anchor, "navigate");
779        assert_eq!(scopes[0].1.len(), 2);
780        assert_eq!(scopes[0].1[0].number, vec![1]);
781        assert_eq!(scopes[0].1[1].number, vec![2]);
782    }
783
784    #[test]
785    fn scope_same_indent_replaces() {
786        // Two URLs at the same indent level replace each other.
787        let text = "\
788void foo() {
789    // https://html.spec.whatwg.org/#navigate
790    // Step 1. Navigate step
791    code();
792
793    // https://dom.spec.whatwg.org/#concept-tree
794    // Step 1. Tree step
795    more_code();
796}
797";
798        let scopes = scopes_for(text);
799        assert_eq!(scopes.len(), 2);
800        assert_eq!(scopes[0].0.anchor, "navigate");
801        assert_eq!(scopes[0].1.len(), 1);
802        assert_eq!(scopes[0].1[0].text, "Navigate step");
803        assert_eq!(scopes[1].0.anchor, "concept-tree");
804        assert_eq!(scopes[1].1.len(), 1);
805        assert_eq!(scopes[1].1[0].text, "Tree step");
806    }
807
808    #[test]
809    fn scope_orphan_steps_ignored() {
810        // Steps before any URL are not assigned to any scope.
811        let text = "\
812// Step 1. Orphan step
813// https://html.spec.whatwg.org/#navigate
814// Step 2. Assigned step
815";
816        let scopes = scopes_for(text);
817        assert_eq!(scopes.len(), 1);
818        assert_eq!(scopes[0].1.len(), 1);
819        assert_eq!(scopes[0].1[0].number, vec![2]);
820    }
821
822    #[test]
823    fn scope_no_urls_empty() {
824        let text = "// Step 1. Orphan";
825        let scopes = scopes_for(text);
826        assert!(scopes.is_empty());
827    }
828
829    #[test]
830    fn scope_deeply_nested_stack() {
831        // Three levels of nesting.
832        let text = "\
833class Outer {
834  // https://html.spec.whatwg.org/#navigate
835  void foo() {
836    // Step 1. Outer step
837    if (a) {
838      // https://dom.spec.whatwg.org/#concept-tree
839      // Step 1. Middle step
840      if (b) {
841        // https://url.spec.whatwg.org/#url-parsing
842        // Step 1. Inner step
843        parse();
844      }
845      // Step 2. Middle step two
846      tree();
847    }
848    // Step 2. Outer step two
849    done();
850  }
851}
852";
853        let scopes = scopes_for(text);
854        assert_eq!(scopes.len(), 3);
855
856        let nav = scopes.iter().find(|(u, _)| u.anchor == "navigate").unwrap();
857        assert_eq!(nav.1.len(), 2);
858        assert_eq!(nav.1[0].number, vec![1]);
859        assert_eq!(nav.1[1].number, vec![2]);
860
861        let tree = scopes
862            .iter()
863            .find(|(u, _)| u.anchor == "concept-tree")
864            .unwrap();
865        assert_eq!(tree.1.len(), 2);
866        assert_eq!(tree.1[0].number, vec![1]);
867        assert_eq!(tree.1[1].number, vec![2]);
868
869        let url = scopes
870            .iter()
871            .find(|(u, _)| u.anchor == "url-parsing")
872            .unwrap();
873        assert_eq!(url.1.len(), 1);
874        assert_eq!(url.1[0].number, vec![1]);
875    }
876
877    #[test]
878    fn scope_existing_fixture_compat() {
879        // Matches the existing test fixture: input.cpp
880        let text = "\
881// https://html.spec.whatwg.org/#navigate
882void DoNavigate(bool userInvolvement) {
883  // Step 1. Let cspNavigationType be form-submission
884  auto cspNavigationType = GetCSPNavType();
885
886  // Step 2. Let sourceSnapshotParams be the result of snapshotting
887  auto params = SnapshotParams();
888
889  // Step 3. If url is about:blank, then return
890  if (IsAboutBlank(url)) {
891    return;
892  }
893
894  // Step 99. Nonexistent step
895  DoSomething();
896}
897";
898        let scopes = scopes_for(text);
899        assert_eq!(scopes.len(), 1);
900        assert_eq!(scopes[0].0.anchor, "navigate");
901        assert_eq!(scopes[0].1.len(), 4);
902        assert_eq!(scopes[0].1[0].number, vec![1]);
903        assert_eq!(scopes[0].1[1].number, vec![2]);
904        assert_eq!(scopes[0].1[2].number, vec![3]);
905        assert_eq!(scopes[0].1[3].number, vec![99]);
906    }
907}