Skip to main content

webspec_index/analyze/
steps.rs

1//! Algorithm step parsing from spec markdown content.
2
3use regex::Regex;
4
5/// A single step in a spec algorithm.
6#[derive(Debug, Clone)]
7pub struct AlgorithmStep {
8    pub number: Vec<u32>,
9    pub text: String,
10    pub children: Vec<AlgorithmStep>,
11}
12
13/// Strip markdown inline formatting, keeping the text content.
14pub fn strip_markdown(text: &str) -> String {
15    use std::sync::OnceLock;
16    static LINK_RE: OnceLock<Regex> = OnceLock::new();
17    static BOLD_RE: OnceLock<Regex> = OnceLock::new();
18    static ITALIC_RE: OnceLock<Regex> = OnceLock::new();
19    static CODE_RE: OnceLock<Regex> = OnceLock::new();
20
21    let link_re = LINK_RE.get_or_init(|| Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap());
22    let bold_re = BOLD_RE.get_or_init(|| Regex::new(r"\*\*([^*]*)\*\*").unwrap());
23    let italic_re = ITALIC_RE.get_or_init(|| Regex::new(r"\*([^*]*)\*").unwrap());
24    let code_re = CODE_RE.get_or_init(|| Regex::new(r"`([^`]*)`").unwrap());
25
26    let text = link_re.replace_all(text, "$1");
27    let text = bold_re.replace_all(&text, "$1");
28    let text = italic_re.replace_all(&text, "$1");
29    let text = code_re.replace_all(&text, "$1");
30    text.to_string()
31}
32
33/// Matches a numbered list item: optional indentation, then "N. text"
34fn step_line_re() -> &'static Regex {
35    use std::sync::OnceLock;
36    static RE: OnceLock<Regex> = OnceLock::new();
37    RE.get_or_init(|| Regex::new(r"^( *)\d+\.\s").unwrap())
38}
39
40/// Parse a numbered list line.
41///
42/// Returns (indent_level, step_num, text) or None if not a step line.
43fn parse_step_line(line: &str) -> Option<(usize, u32, String)> {
44    let re = step_line_re();
45    let m = re.find(line)?;
46    let spaces = line.len() - line.trim_start().len();
47    let indent = spaces / 4;
48
49    let rest = line.trim_start();
50    let dot_pos = rest.find('.')?;
51    let num: u32 = rest[..dot_pos].parse().ok()?;
52    let text = rest[dot_pos + 1..].trim().to_string();
53    let _ = m; // used for match detection
54    Some((indent, num, text))
55}
56
57/// Parse algorithm steps from markdown content.
58///
59/// Expects the content field from query results, which contains numbered lists
60/// at various indentation levels representing algorithm steps.
61pub fn parse_steps(content: &str) -> Vec<AlgorithmStep> {
62    let lines: Vec<&str> = content.split('\n').collect();
63    let mut raw_steps: Vec<(usize, u32, String)> = Vec::new(); // (indent, num, text)
64
65    let mut i = 0;
66    while i < lines.len() {
67        if let Some((indent, num, mut text)) = parse_step_line(lines[i]) {
68            // Accumulate continuation lines
69            let mut j = i + 1;
70            while j < lines.len() {
71                let next_line = lines[j];
72                if next_line.trim().is_empty() {
73                    j += 1;
74                    continue;
75                }
76                if parse_step_line(next_line).is_some() {
77                    break;
78                }
79                let stripped = next_line.trim_start();
80                let next_indent = next_line.len() - stripped.len();
81                let step_indent = indent * 4;
82                if next_indent > step_indent
83                    && !stripped.starts_with('>')
84                    && !stripped.starts_with('*')
85                {
86                    text.push(' ');
87                    text.push_str(stripped);
88                } else {
89                    break;
90                }
91                j += 1;
92            }
93            raw_steps.push((indent, num, text));
94            i = j;
95        } else {
96            i += 1;
97        }
98    }
99
100    // Build hierarchical step tree
101    let mut steps: Vec<AlgorithmStep> = Vec::new();
102    // Stack of (indent_level, &mut children_vec)
103    // We use indices to avoid borrow issues
104    struct StackEntry {
105        indent: isize,
106        // Index path to the children vec in the tree
107        path: Vec<usize>,
108    }
109    let mut stack: Vec<StackEntry> = vec![StackEntry {
110        indent: -1,
111        path: vec![],
112    }];
113
114    for (indent, _num, text) in &raw_steps {
115        let plain_text = strip_markdown(text);
116        let indent = *indent as isize;
117
118        // Pop stack until we find the parent level
119        while stack.len() > 1 && stack.last().unwrap().indent >= indent {
120            stack.pop();
121        }
122
123        let parent_path = stack.last().unwrap().path.clone();
124
125        // Navigate to the parent's children list and add the new step
126        let step = AlgorithmStep {
127            number: vec![], // assigned later
128            text: plain_text,
129            children: vec![],
130        };
131
132        if parent_path.is_empty() {
133            steps.push(step);
134            let new_idx = steps.len() - 1;
135            let mut child_path = parent_path;
136            child_path.push(new_idx);
137            stack.push(StackEntry {
138                indent,
139                path: child_path,
140            });
141        } else {
142            // Navigate to parent step using path
143            let children = get_children_mut(&mut steps, &parent_path);
144            children.push(step);
145            let new_idx = children.len() - 1;
146            let mut child_path = parent_path;
147            child_path.push(new_idx);
148            stack.push(StackEntry {
149                indent,
150                path: child_path,
151            });
152        }
153    }
154
155    assign_numbers(&mut steps, &[]);
156    steps
157}
158
159/// Navigate to children of the step at the given path.
160fn get_children_mut<'a>(
161    root: &'a mut [AlgorithmStep],
162    path: &[usize],
163) -> &'a mut Vec<AlgorithmStep> {
164    if path.is_empty() {
165        panic!("empty path in get_children_mut");
166    }
167    let mut current = &mut root[path[0]];
168    for &idx in &path[1..] {
169        current = &mut current.children[idx];
170    }
171    &mut current.children
172}
173
174/// Assign hierarchical step numbers based on tree position.
175fn assign_numbers(steps: &mut [AlgorithmStep], prefix: &[u32]) {
176    for (i, step) in steps.iter_mut().enumerate() {
177        let mut num = prefix.to_vec();
178        num.push((i + 1) as u32);
179        step.number = num.clone();
180        assign_numbers(&mut step.children, &num);
181    }
182}
183
184/// Find a step by its hierarchical number path.
185pub fn find_step<'a>(steps: &'a [AlgorithmStep], number: &[u32]) -> Option<&'a AlgorithmStep> {
186    if number.is_empty() {
187        return None;
188    }
189    let mut current = steps;
190    let mut target = None;
191    for &n in number {
192        if n < 1 || n as usize > current.len() {
193            return None;
194        }
195        target = Some(&current[(n - 1) as usize]);
196        current = &target.unwrap().children;
197    }
198    target
199}
200
201/// Flatten a step tree into a list (depth-first).
202pub fn flatten_steps(steps: &[AlgorithmStep]) -> Vec<&AlgorithmStep> {
203    let mut result = Vec::new();
204    for step in steps {
205        result.push(step);
206        result.extend(flatten_steps(&step.children));
207    }
208    result
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214
215    #[test]
216    fn strip_bold() {
217        assert_eq!(strip_markdown("**bold**"), "bold");
218    }
219
220    #[test]
221    fn strip_italic() {
222        assert_eq!(strip_markdown("*italic*"), "italic");
223    }
224
225    #[test]
226    fn strip_code() {
227        assert_eq!(strip_markdown("`code`"), "code");
228    }
229
230    #[test]
231    fn strip_link() {
232        assert_eq!(strip_markdown("[text](https://example.com)"), "text");
233    }
234
235    #[test]
236    fn strip_mixed() {
237        let result = strip_markdown("Let *x* be the result of [foo](https://bar.com)");
238        assert_eq!(result, "Let x be the result of foo");
239    }
240
241    #[test]
242    fn strip_nested_bold_link() {
243        let result = strip_markdown("[**bold link**](url)");
244        assert_eq!(result, "bold link");
245    }
246
247    #[test]
248    fn simple_flat() {
249        let content = "1. First step.\n2. Second step.\n3. Third step.";
250        let steps = parse_steps(content);
251        assert_eq!(steps.len(), 3);
252        assert_eq!(steps[0].number, vec![1]);
253        assert_eq!(steps[1].number, vec![2]);
254        assert_eq!(steps[2].number, vec![3]);
255        assert!(steps[0].text.contains("First step"));
256        assert!(steps[1].text.contains("Second step"));
257    }
258
259    #[test]
260    fn nested_steps() {
261        let content = "1. Parent step.\n\n    1. Child one.\n    2. Child two.\n2. Next parent.\n";
262        let steps = parse_steps(content);
263        assert_eq!(steps.len(), 2);
264        assert_eq!(steps[0].number, vec![1]);
265        assert_eq!(steps[1].number, vec![2]);
266        assert_eq!(steps[0].children.len(), 2);
267        assert_eq!(steps[0].children[0].number, vec![1, 1]);
268        assert_eq!(steps[0].children[1].number, vec![1, 2]);
269    }
270
271    #[test]
272    fn deeply_nested() {
273        let content = "1. Top level.\n\n    1. Second level.\n\n        1. Third level.\n        2. Third level b.\n    2. Second level b.\n2. Top level b.\n";
274        let steps = parse_steps(content);
275        assert_eq!(steps.len(), 2);
276        let deep = &steps[0].children[0].children[0];
277        assert_eq!(deep.number, vec![1, 1, 1]);
278        assert_eq!(steps[0].children[0].children[1].number, vec![1, 1, 2]);
279    }
280
281    #[test]
282    fn preamble_ignored() {
283        let content = "To **navigate** a navigable:\n\n1. First actual step.\n2. Second step.\n";
284        let steps = parse_steps(content);
285        assert_eq!(steps.len(), 2);
286        assert_eq!(steps[0].number, vec![1]);
287    }
288
289    #[test]
290    fn markdown_stripped_from_text() {
291        let content = "1. Let *cspNavigationType* be \"`form-submission`\".";
292        let steps = parse_steps(content);
293        assert_eq!(steps.len(), 1);
294        assert!(steps[0].text.contains("cspNavigationType"));
295        assert!(!steps[0].text.contains('*'));
296    }
297
298    #[test]
299    fn empty_content() {
300        assert!(parse_steps("").is_empty());
301    }
302
303    #[test]
304    fn no_steps() {
305        assert!(parse_steps("Just a paragraph with no numbered list.").is_empty());
306    }
307
308    #[test]
309    fn find_top_level() {
310        let steps = parse_steps("1. A.\n2. B.\n3. C.");
311        assert_eq!(find_step(&steps, &[2]).unwrap().text, "B.");
312    }
313
314    #[test]
315    fn find_nested() {
316        let content = "1. Parent.\n\n    1. Child.\n    2. Child b.\n2. Other.";
317        let steps = parse_steps(content);
318        let step = find_step(&steps, &[1, 2]);
319        assert!(step.is_some());
320        assert!(step.unwrap().text.contains("Child b"));
321    }
322
323    #[test]
324    fn find_not_found() {
325        let steps = parse_steps("1. A.\n2. B.");
326        assert!(find_step(&steps, &[99]).is_none());
327    }
328
329    #[test]
330    fn find_empty_number() {
331        let steps = parse_steps("1. A.");
332        assert!(find_step(&steps, &[]).is_none());
333    }
334
335    #[test]
336    fn flatten_flat() {
337        let steps = parse_steps("1. A.\n2. B.\n3. C.");
338        let flat = flatten_steps(&steps);
339        assert_eq!(flat.len(), 3);
340        assert_eq!(flat[0].number, vec![1]);
341        assert_eq!(flat[1].number, vec![2]);
342        assert_eq!(flat[2].number, vec![3]);
343    }
344
345    #[test]
346    fn flatten_nested() {
347        let content = "1. Parent.\n\n    1. Child.\n    2. Child b.\n2. Other.";
348        let steps = parse_steps(content);
349        let flat = flatten_steps(&steps);
350        assert_eq!(flat.len(), 4);
351        assert_eq!(flat[0].number, vec![1]);
352        assert_eq!(flat[1].number, vec![1, 1]);
353        assert_eq!(flat[2].number, vec![1, 2]);
354        assert_eq!(flat[3].number, vec![2]);
355    }
356}