Skip to main content

hyalo_cli/commands/
section_scanner.rs

1#![allow(clippy::missing_errors_doc)]
2use hyalo_core::heading::parse_atx_heading;
3use hyalo_core::links;
4use hyalo_core::scanner::{FileVisitor, ScanAction};
5use hyalo_core::types::{OutlineSection, TaskCount};
6
7// ---------------------------------------------------------------------------
8// SectionScanner visitor
9// ---------------------------------------------------------------------------
10
11/// State accumulated for the current section being built.
12struct SectionBuilder {
13    level: u8,
14    heading: Option<String>,
15    /// 1-based line number where this section starts (heading line, or 1 for pre-heading)
16    line: usize,
17    links: Vec<String>,
18    task_total: usize,
19    task_done: usize,
20    code_blocks: Vec<String>,
21}
22
23impl SectionBuilder {
24    fn new(level: u8, heading: Option<String>, line: usize) -> Self {
25        Self {
26            level,
27            heading,
28            line,
29            links: Vec::new(),
30            task_total: 0,
31            task_done: 0,
32            code_blocks: Vec::new(),
33        }
34    }
35
36    fn finish(self) -> OutlineSection {
37        let tasks = if self.task_total > 0 {
38            Some(TaskCount {
39                total: self.task_total,
40                done: self.task_done,
41            })
42        } else {
43            None
44        };
45        OutlineSection {
46            level: self.level,
47            heading: self.heading,
48            line: self.line,
49            links: self.links,
50            tasks,
51            code_blocks: self.code_blocks,
52        }
53    }
54}
55
56/// Visitor that builds outline sections from body events.
57/// Tracks headings, links, tasks, and code blocks per section.
58pub struct SectionScanner {
59    current: SectionBuilder,
60    sections: Vec<OutlineSection>,
61}
62
63impl Default for SectionScanner {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl SectionScanner {
70    #[must_use]
71    pub fn new() -> Self {
72        Self {
73            current: SectionBuilder::new(0, None, 1),
74            sections: Vec::new(),
75        }
76    }
77
78    /// Consume and return all collected sections.
79    #[must_use]
80    pub fn into_sections(mut self) -> Vec<OutlineSection> {
81        // Flush the last section
82        let last = std::mem::replace(&mut self.current, SectionBuilder::new(0, None, 0));
83        let finished = last.finish();
84        let should_emit = finished.level > 0
85            || !finished.links.is_empty()
86            || finished.tasks.is_some()
87            || !finished.code_blocks.is_empty();
88        if should_emit {
89            self.sections.push(finished);
90        }
91        self.sections
92    }
93}
94
95impl FileVisitor for SectionScanner {
96    fn on_body_line(&mut self, raw: &str, cleaned: &str, line_num: usize) -> ScanAction {
97        // Use raw for ATX heading detection to preserve code spans in heading text
98        // (e.g. `## The \`versions\` field` → heading text is `The \`versions\` field`).
99        if let Some((level, heading_text)) = parse_atx_heading(raw) {
100            let finished = std::mem::replace(
101                &mut self.current,
102                SectionBuilder::new(level, Some(heading_text.to_owned()), line_num),
103            );
104
105            let should_emit = finished.level > 0
106                || !finished.links.is_empty()
107                || finished.task_total > 0
108                || !finished.code_blocks.is_empty();
109
110            if should_emit {
111                self.sections.push(finished.finish());
112            }
113
114            return ScanAction::Continue;
115        }
116
117        // Normal text line — use cleaned (inline code spans stripped) so that
118        // [[links]] inside backtick spans are not extracted as real links.
119        let mut line_links: Vec<links::Link> = Vec::new();
120        links::extract_links_from_text(cleaned, &mut line_links);
121
122        for link in line_links {
123            let formatted = format_link_string(&link);
124            self.current.links.push(formatted);
125        }
126
127        if let Some((_status, done)) = hyalo_core::tasks::detect_task_checkbox(raw) {
128            self.current.task_total += 1;
129            if done {
130                self.current.task_done += 1;
131            }
132        }
133
134        ScanAction::Continue
135    }
136
137    fn on_code_fence_open(&mut self, _raw: &str, language: &str, _line_num: usize) -> ScanAction {
138        if !language.is_empty() {
139            self.current.code_blocks.push(language.to_owned());
140        }
141        ScanAction::Continue
142    }
143}
144
145// ---------------------------------------------------------------------------
146// Helpers
147// ---------------------------------------------------------------------------
148
149/// Format a `Link` into a human-readable string for storage in the outline.
150fn format_link_string(link: &links::Link) -> String {
151    match link.kind {
152        links::LinkKind::Wikilink => match &link.label {
153            Some(label) if !label.is_empty() => format!("[[{}|{}]]", link.target, label),
154            _ => format!("[[{}]]", link.target),
155        },
156        links::LinkKind::Markdown => match &link.label {
157            Some(label) if !label.is_empty() => format!("[{}]({})", label, link.target),
158            _ => format!("[]({})", link.target),
159        },
160    }
161}
162
163// ---------------------------------------------------------------------------
164// Unit tests
165// ---------------------------------------------------------------------------
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    use hyalo_core::scanner;
171    use std::fs;
172
173    macro_rules! md {
174        ($s:expr) => {
175            $s.strip_prefix('\n').unwrap_or($s)
176        };
177    }
178
179    /// Helper: scan a file and return its sections using the new visitor.
180    fn scan_sections(path: &std::path::Path) -> Vec<OutlineSection> {
181        let mut ss = SectionScanner::new();
182        scanner::scan_file_multi(path, &mut [&mut ss]).unwrap();
183        ss.into_sections()
184    }
185
186    // parse_atx_heading tests are in src/heading.rs
187
188    // --- extract_fence_language ---
189
190    #[test]
191    fn fence_language_rust() {
192        assert_eq!(scanner::extract_fence_language("```rust", '`', 3), "rust");
193    }
194
195    #[test]
196    fn fence_no_language() {
197        assert_eq!(scanner::extract_fence_language("```", '`', 3), "");
198    }
199
200    #[test]
201    fn fence_language_with_spaces() {
202        assert_eq!(scanner::extract_fence_language("```  sh  ", '`', 3), "sh");
203    }
204
205    // --- format_link_string ---
206
207    #[test]
208    fn format_wikilink_no_label() {
209        let link = links::Link {
210            target: "my-note".to_owned(),
211            label: None,
212            kind: links::LinkKind::Wikilink,
213        };
214        assert_eq!(format_link_string(&link), "[[my-note]]");
215    }
216
217    #[test]
218    fn format_wikilink_with_label() {
219        let link = links::Link {
220            target: "my-note".to_owned(),
221            label: Some("My Note".to_owned()),
222            kind: links::LinkKind::Wikilink,
223        };
224        assert_eq!(format_link_string(&link), "[[my-note|My Note]]");
225    }
226
227    #[test]
228    fn format_markdown_link_with_label() {
229        let link = links::Link {
230            target: "https://example.com".to_owned(),
231            label: Some("Example".to_owned()),
232            kind: links::LinkKind::Markdown,
233        };
234        assert_eq!(format_link_string(&link), "[Example](https://example.com)");
235    }
236
237    #[test]
238    fn format_file_path_link_with_label() {
239        let link = links::Link {
240            target: "docs/some-note.md".to_owned(),
241            label: Some("Some Note".to_owned()),
242            kind: links::LinkKind::Markdown,
243        };
244        assert_eq!(format_link_string(&link), "[Some Note](docs/some-note.md)");
245    }
246
247    // --- scan_sections ---
248
249    #[test]
250    fn empty_file_produces_no_sections() {
251        let tmp = tempfile::tempdir().unwrap();
252        let path = tmp.path().join("empty.md");
253        fs::write(&path, "").unwrap();
254        let sections = scan_sections(&path);
255        assert!(sections.is_empty());
256    }
257
258    #[test]
259    fn file_with_only_frontmatter_produces_no_sections() {
260        let tmp = tempfile::tempdir().unwrap();
261        let path = tmp.path().join("fm_only.md");
262        fs::write(
263            &path,
264            md!(r"
265---
266title: Test
267---
268"),
269        )
270        .unwrap();
271        let sections = scan_sections(&path);
272        assert!(sections.is_empty());
273    }
274
275    #[test]
276    fn single_heading_produces_one_section() {
277        let tmp = tempfile::tempdir().unwrap();
278        let path = tmp.path().join("note.md");
279        fs::write(
280            &path,
281            md!(r"
282# Hello
283
284Some text.
285"),
286        )
287        .unwrap();
288        let sections = scan_sections(&path);
289        assert_eq!(sections.len(), 1);
290        assert_eq!(sections[0].level, 1);
291        assert_eq!(sections[0].heading.as_deref(), Some("Hello"));
292        assert_eq!(sections[0].line, 1);
293    }
294
295    #[test]
296    fn multiple_headings_produce_multiple_sections() {
297        let tmp = tempfile::tempdir().unwrap();
298        let path = tmp.path().join("note.md");
299        fs::write(
300            &path,
301            md!(r"
302# First
303
304Text A.
305
306## Sub
307
308Text B.
309"),
310        )
311        .unwrap();
312        let sections = scan_sections(&path);
313        assert_eq!(sections.len(), 2);
314        assert_eq!(sections[0].level, 1);
315        assert_eq!(sections[0].heading.as_deref(), Some("First"));
316        assert_eq!(sections[1].level, 2);
317        assert_eq!(sections[1].heading.as_deref(), Some("Sub"));
318    }
319
320    #[test]
321    fn pre_heading_section_emitted_when_has_links() {
322        let tmp = tempfile::tempdir().unwrap();
323        let path = tmp.path().join("note.md");
324        fs::write(
325            &path,
326            md!(r"
327See [[some-note]] for details.
328
329# Heading
330"),
331        )
332        .unwrap();
333        let sections = scan_sections(&path);
334        // pre-heading section (level 0) + heading section
335        assert_eq!(sections.len(), 2);
336        assert_eq!(sections[0].level, 0);
337        assert_eq!(sections[0].heading, None);
338        assert_eq!(sections[0].links.len(), 1);
339        assert_eq!(sections[0].links[0], "[[some-note]]");
340    }
341
342    #[test]
343    fn pre_heading_section_not_emitted_when_empty() {
344        let tmp = tempfile::tempdir().unwrap();
345        let path = tmp.path().join("note.md");
346        fs::write(
347            &path,
348            md!(r"
349# Heading
350
351Text here.
352"),
353        )
354        .unwrap();
355        let sections = scan_sections(&path);
356        assert_eq!(sections.len(), 1);
357        assert_eq!(sections[0].level, 1);
358    }
359
360    #[test]
361    fn links_extracted_per_section() {
362        let tmp = tempfile::tempdir().unwrap();
363        let path = tmp.path().join("note.md");
364        fs::write(
365            &path,
366            md!(r"
367# Section A
368
369See [[note-a]] and [[note-b]].
370
371# Section B
372
373See [[note-c]].
374"),
375        )
376        .unwrap();
377        let sections = scan_sections(&path);
378        assert_eq!(sections.len(), 2);
379        assert_eq!(sections[0].links.len(), 2);
380        assert!(sections[0].links.contains(&"[[note-a]]".to_owned()));
381        assert!(sections[0].links.contains(&"[[note-b]]".to_owned()));
382        assert_eq!(sections[1].links.len(), 1);
383        assert_eq!(sections[1].links[0], "[[note-c]]");
384    }
385
386    #[test]
387    fn tasks_counted_per_section() {
388        let tmp = tempfile::tempdir().unwrap();
389        let path = tmp.path().join("note.md");
390        fs::write(
391            &path,
392            md!(r"
393# Tasks
394
395- [ ] Open task
396- [x] Done task
397- [X] Also done
398- Regular bullet
399"),
400        )
401        .unwrap();
402        let sections = scan_sections(&path);
403        assert_eq!(sections.len(), 1);
404        let tasks = sections[0].tasks.as_ref().unwrap();
405        assert_eq!(tasks.total, 3);
406        assert_eq!(tasks.done, 2);
407    }
408
409    #[test]
410    fn no_tasks_field_when_no_tasks() {
411        let tmp = tempfile::tempdir().unwrap();
412        let path = tmp.path().join("note.md");
413        fs::write(
414            &path,
415            md!(r"
416# Section
417
418Just text, no tasks.
419"),
420        )
421        .unwrap();
422        let sections = scan_sections(&path);
423        assert!(sections[0].tasks.is_none());
424    }
425
426    #[test]
427    fn code_blocks_tracked_per_section() {
428        let tmp = tempfile::tempdir().unwrap();
429        let path = tmp.path().join("note.md");
430        fs::write(
431            &path,
432            md!(r"
433# Code Section
434
435```rust
436let x = 1;
437```
438
439~~~python
440print('hello')
441~~~
442"),
443        )
444        .unwrap();
445        let sections = scan_sections(&path);
446        assert_eq!(sections.len(), 1);
447        assert_eq!(sections[0].code_blocks.len(), 2);
448        assert!(sections[0].code_blocks.contains(&"rust".to_owned()));
449        assert!(sections[0].code_blocks.contains(&"python".to_owned()));
450    }
451
452    #[test]
453    fn links_inside_code_blocks_not_extracted() {
454        let tmp = tempfile::tempdir().unwrap();
455        let path = tmp.path().join("note.md");
456        fs::write(
457            &path,
458            md!(r"
459# Section
460
461```
462[[not-a-link]]
463```
464
465[[real-link]]
466"),
467        )
468        .unwrap();
469        let sections = scan_sections(&path);
470        assert_eq!(sections[0].links.len(), 1);
471        assert_eq!(sections[0].links[0], "[[real-link]]");
472    }
473
474    #[test]
475    fn links_inside_inline_code_not_extracted() {
476        let tmp = tempfile::tempdir().unwrap();
477        let path = tmp.path().join("note.md");
478        fs::write(
479            &path,
480            md!(r"
481# Section
482
483Use `[[not-a-link]]` and [[real-link]].
484"),
485        )
486        .unwrap();
487        let sections = scan_sections(&path);
488        assert_eq!(sections[0].links.len(), 1);
489        assert_eq!(sections[0].links[0], "[[real-link]]");
490    }
491
492    #[test]
493    fn line_numbers_correct_for_headings() {
494        let tmp = tempfile::tempdir().unwrap();
495        let path = tmp.path().join("note.md");
496        fs::write(
497            &path,
498            md!(r"
499---
500title: Test
501---
502# First Heading
503
504## Second Heading
505"),
506        )
507        .unwrap();
508        let sections = scan_sections(&path);
509        assert_eq!(sections.len(), 2);
510        // "---", "title: Test", "---" = 3 lines of frontmatter. First heading is line 4.
511        assert_eq!(sections[0].line, 4);
512        // Blank line at 5, second heading at 6
513        assert_eq!(sections[1].line, 6);
514    }
515
516    #[test]
517    fn heading_with_inline_code_span_preserved() {
518        // Regression test: heading text must include code spans verbatim.
519        // A heading like `## The \`versions\` field` must NOT have its backtick
520        // content replaced with spaces in the section heading field.
521        let tmp = tempfile::tempdir().unwrap();
522        let path = tmp.path().join("note.md");
523        fs::write(
524            &path,
525            md!(r"
526## The `versions` field
527
528Some text.
529"),
530        )
531        .unwrap();
532        let sections = scan_sections(&path);
533        assert_eq!(sections.len(), 1);
534        assert_eq!(sections[0].heading.as_deref(), Some("The `versions` field"));
535    }
536
537    #[test]
538    fn links_inside_inline_code_in_heading_not_extracted() {
539        // A heading like `## See \`[[not-a-link]]\`` must not emit the wikilink as
540        // a real outbound link. The code span sits on the heading line itself,
541        // and on_body_line returns early on headings before link extraction, so
542        // only the real body link `[[real-link]]` should be recorded.
543        let tmp = tempfile::tempdir().unwrap();
544        let path = tmp.path().join("note.md");
545        fs::write(
546            &path,
547            md!(r"
548## See `[[not-a-link]]`
549
550Real link: [[real-link]].
551"),
552        )
553        .unwrap();
554        let sections = scan_sections(&path);
555        assert_eq!(sections.len(), 1);
556        assert_eq!(sections[0].links.len(), 1);
557        assert_eq!(sections[0].links[0], "[[real-link]]");
558    }
559}