webspec_index/analyze/
steps.rs1use regex::Regex;
4
5#[derive(Debug, Clone)]
7pub struct AlgorithmStep {
8 pub number: Vec<u32>,
9 pub text: String,
10 pub children: Vec<AlgorithmStep>,
11}
12
13pub fn strip_markdown(text: &str) -> String {
15 use std::sync::OnceLock;
16 static LINK_RE: OnceLock<Regex> = OnceLock::new();
17 static BOLD_RE: OnceLock<Regex> = OnceLock::new();
18 static ITALIC_RE: OnceLock<Regex> = OnceLock::new();
19 static CODE_RE: OnceLock<Regex> = OnceLock::new();
20
21 let link_re = LINK_RE.get_or_init(|| Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap());
22 let bold_re = BOLD_RE.get_or_init(|| Regex::new(r"\*\*([^*]*)\*\*").unwrap());
23 let italic_re = ITALIC_RE.get_or_init(|| Regex::new(r"\*([^*]*)\*").unwrap());
24 let code_re = CODE_RE.get_or_init(|| Regex::new(r"`([^`]*)`").unwrap());
25
26 let text = link_re.replace_all(text, "$1");
27 let text = bold_re.replace_all(&text, "$1");
28 let text = italic_re.replace_all(&text, "$1");
29 let text = code_re.replace_all(&text, "$1");
30 text.to_string()
31}
32
33fn step_line_re() -> &'static Regex {
35 use std::sync::OnceLock;
36 static RE: OnceLock<Regex> = OnceLock::new();
37 RE.get_or_init(|| Regex::new(r"^( *)\d+\.\s").unwrap())
38}
39
40fn parse_step_line(line: &str) -> Option<(usize, u32, String)> {
44 let re = step_line_re();
45 let m = re.find(line)?;
46 let spaces = line.len() - line.trim_start().len();
47 let indent = spaces / 4;
48
49 let rest = line.trim_start();
50 let dot_pos = rest.find('.')?;
51 let num: u32 = rest[..dot_pos].parse().ok()?;
52 let text = rest[dot_pos + 1..].trim().to_string();
53 let _ = m; Some((indent, num, text))
55}
56
57pub fn parse_steps(content: &str) -> Vec<AlgorithmStep> {
62 let lines: Vec<&str> = content.split('\n').collect();
63 let mut raw_steps: Vec<(usize, u32, String)> = Vec::new(); let mut i = 0;
66 while i < lines.len() {
67 if let Some((indent, num, mut text)) = parse_step_line(lines[i]) {
68 let mut j = i + 1;
70 while j < lines.len() {
71 let next_line = lines[j];
72 if next_line.trim().is_empty() {
73 j += 1;
74 continue;
75 }
76 if parse_step_line(next_line).is_some() {
77 break;
78 }
79 let stripped = next_line.trim_start();
80 let next_indent = next_line.len() - stripped.len();
81 let step_indent = indent * 4;
82 if next_indent > step_indent
83 && !stripped.starts_with('>')
84 && !stripped.starts_with('*')
85 {
86 text.push(' ');
87 text.push_str(stripped);
88 } else {
89 break;
90 }
91 j += 1;
92 }
93 raw_steps.push((indent, num, text));
94 i = j;
95 } else {
96 i += 1;
97 }
98 }
99
100 let mut steps: Vec<AlgorithmStep> = Vec::new();
102 struct StackEntry {
105 indent: isize,
106 path: Vec<usize>,
108 }
109 let mut stack: Vec<StackEntry> = vec![StackEntry {
110 indent: -1,
111 path: vec![],
112 }];
113
114 for (indent, _num, text) in &raw_steps {
115 let plain_text = strip_markdown(text);
116 let indent = *indent as isize;
117
118 while stack.len() > 1 && stack.last().unwrap().indent >= indent {
120 stack.pop();
121 }
122
123 let parent_path = stack.last().unwrap().path.clone();
124
125 let step = AlgorithmStep {
127 number: vec![], text: plain_text,
129 children: vec![],
130 };
131
132 if parent_path.is_empty() {
133 steps.push(step);
134 let new_idx = steps.len() - 1;
135 let mut child_path = parent_path;
136 child_path.push(new_idx);
137 stack.push(StackEntry {
138 indent,
139 path: child_path,
140 });
141 } else {
142 let children = get_children_mut(&mut steps, &parent_path);
144 children.push(step);
145 let new_idx = children.len() - 1;
146 let mut child_path = parent_path;
147 child_path.push(new_idx);
148 stack.push(StackEntry {
149 indent,
150 path: child_path,
151 });
152 }
153 }
154
155 assign_numbers(&mut steps, &[]);
156 steps
157}
158
159fn get_children_mut<'a>(
161 root: &'a mut [AlgorithmStep],
162 path: &[usize],
163) -> &'a mut Vec<AlgorithmStep> {
164 if path.is_empty() {
165 panic!("empty path in get_children_mut");
166 }
167 let mut current = &mut root[path[0]];
168 for &idx in &path[1..] {
169 current = &mut current.children[idx];
170 }
171 &mut current.children
172}
173
174fn assign_numbers(steps: &mut [AlgorithmStep], prefix: &[u32]) {
176 for (i, step) in steps.iter_mut().enumerate() {
177 let mut num = prefix.to_vec();
178 num.push((i + 1) as u32);
179 step.number = num.clone();
180 assign_numbers(&mut step.children, &num);
181 }
182}
183
184pub fn find_step<'a>(steps: &'a [AlgorithmStep], number: &[u32]) -> Option<&'a AlgorithmStep> {
186 if number.is_empty() {
187 return None;
188 }
189 let mut current = steps;
190 let mut target = None;
191 for &n in number {
192 if n < 1 || n as usize > current.len() {
193 return None;
194 }
195 target = Some(¤t[(n - 1) as usize]);
196 current = &target.unwrap().children;
197 }
198 target
199}
200
201pub fn flatten_steps(steps: &[AlgorithmStep]) -> Vec<&AlgorithmStep> {
203 let mut result = Vec::new();
204 for step in steps {
205 result.push(step);
206 result.extend(flatten_steps(&step.children));
207 }
208 result
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn strip_bold() {
217 assert_eq!(strip_markdown("**bold**"), "bold");
218 }
219
220 #[test]
221 fn strip_italic() {
222 assert_eq!(strip_markdown("*italic*"), "italic");
223 }
224
225 #[test]
226 fn strip_code() {
227 assert_eq!(strip_markdown("`code`"), "code");
228 }
229
230 #[test]
231 fn strip_link() {
232 assert_eq!(strip_markdown("[text](https://example.com)"), "text");
233 }
234
235 #[test]
236 fn strip_mixed() {
237 let result = strip_markdown("Let *x* be the result of [foo](https://bar.com)");
238 assert_eq!(result, "Let x be the result of foo");
239 }
240
241 #[test]
242 fn strip_nested_bold_link() {
243 let result = strip_markdown("[**bold link**](url)");
244 assert_eq!(result, "bold link");
245 }
246
247 #[test]
248 fn simple_flat() {
249 let content = "1. First step.\n2. Second step.\n3. Third step.";
250 let steps = parse_steps(content);
251 assert_eq!(steps.len(), 3);
252 assert_eq!(steps[0].number, vec![1]);
253 assert_eq!(steps[1].number, vec![2]);
254 assert_eq!(steps[2].number, vec![3]);
255 assert!(steps[0].text.contains("First step"));
256 assert!(steps[1].text.contains("Second step"));
257 }
258
259 #[test]
260 fn nested_steps() {
261 let content = "1. Parent step.\n\n 1. Child one.\n 2. Child two.\n2. Next parent.\n";
262 let steps = parse_steps(content);
263 assert_eq!(steps.len(), 2);
264 assert_eq!(steps[0].number, vec![1]);
265 assert_eq!(steps[1].number, vec![2]);
266 assert_eq!(steps[0].children.len(), 2);
267 assert_eq!(steps[0].children[0].number, vec![1, 1]);
268 assert_eq!(steps[0].children[1].number, vec![1, 2]);
269 }
270
271 #[test]
272 fn deeply_nested() {
273 let content = "1. Top level.\n\n 1. Second level.\n\n 1. Third level.\n 2. Third level b.\n 2. Second level b.\n2. Top level b.\n";
274 let steps = parse_steps(content);
275 assert_eq!(steps.len(), 2);
276 let deep = &steps[0].children[0].children[0];
277 assert_eq!(deep.number, vec![1, 1, 1]);
278 assert_eq!(steps[0].children[0].children[1].number, vec![1, 1, 2]);
279 }
280
281 #[test]
282 fn preamble_ignored() {
283 let content = "To **navigate** a navigable:\n\n1. First actual step.\n2. Second step.\n";
284 let steps = parse_steps(content);
285 assert_eq!(steps.len(), 2);
286 assert_eq!(steps[0].number, vec![1]);
287 }
288
289 #[test]
290 fn markdown_stripped_from_text() {
291 let content = "1. Let *cspNavigationType* be \"`form-submission`\".";
292 let steps = parse_steps(content);
293 assert_eq!(steps.len(), 1);
294 assert!(steps[0].text.contains("cspNavigationType"));
295 assert!(!steps[0].text.contains('*'));
296 }
297
298 #[test]
299 fn empty_content() {
300 assert!(parse_steps("").is_empty());
301 }
302
303 #[test]
304 fn no_steps() {
305 assert!(parse_steps("Just a paragraph with no numbered list.").is_empty());
306 }
307
308 #[test]
309 fn find_top_level() {
310 let steps = parse_steps("1. A.\n2. B.\n3. C.");
311 assert_eq!(find_step(&steps, &[2]).unwrap().text, "B.");
312 }
313
314 #[test]
315 fn find_nested() {
316 let content = "1. Parent.\n\n 1. Child.\n 2. Child b.\n2. Other.";
317 let steps = parse_steps(content);
318 let step = find_step(&steps, &[1, 2]);
319 assert!(step.is_some());
320 assert!(step.unwrap().text.contains("Child b"));
321 }
322
323 #[test]
324 fn find_not_found() {
325 let steps = parse_steps("1. A.\n2. B.");
326 assert!(find_step(&steps, &[99]).is_none());
327 }
328
329 #[test]
330 fn find_empty_number() {
331 let steps = parse_steps("1. A.");
332 assert!(find_step(&steps, &[]).is_none());
333 }
334
335 #[test]
336 fn flatten_flat() {
337 let steps = parse_steps("1. A.\n2. B.\n3. C.");
338 let flat = flatten_steps(&steps);
339 assert_eq!(flat.len(), 3);
340 assert_eq!(flat[0].number, vec![1]);
341 assert_eq!(flat[1].number, vec![2]);
342 assert_eq!(flat[2].number, vec![3]);
343 }
344
345 #[test]
346 fn flatten_nested() {
347 let content = "1. Parent.\n\n 1. Child.\n 2. Child b.\n2. Other.";
348 let steps = parse_steps(content);
349 let flat = flatten_steps(&steps);
350 assert_eq!(flat.len(), 4);
351 assert_eq!(flat[0].number, vec![1]);
352 assert_eq!(flat[1].number, vec![1, 1]);
353 assert_eq!(flat[2].number, vec![1, 2]);
354 assert_eq!(flat[3].number, vec![2]);
355 }
356}