hyalo_core/scanner/visitor.rs
1use indexmap::IndexMap;
2use serde_json::Value;
3
4use super::ScanAction;
5
6/// Callback-based scanner that streams through a markdown file.
7/// Skips frontmatter, fenced code blocks, and inline code spans.
8/// Calls the visitor function for each text segment with its 1-based line number.
9#[cfg(test)]
10pub(crate) fn scan_file<F>(path: &std::path::Path, visitor: F) -> anyhow::Result<()>
11where
12 F: FnMut(&str, usize) -> ScanAction,
13{
14 use anyhow::Context as _;
15 use std::fs::File;
16 use std::io::BufReader;
17 let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
18 let reader = BufReader::new(file);
19 scan_reader(reader, visitor)
20}
21
22/// Scan from any buffered reader (useful for testing without file I/O).
23#[cfg(test)]
24pub(crate) fn scan_reader<R: std::io::BufRead, F>(reader: R, visitor: F) -> anyhow::Result<()>
25where
26 F: FnMut(&str, usize) -> ScanAction,
27{
28 let mut wrapper = ClosureVisitor { visitor };
29 super::scan_reader_multi(reader, &mut [&mut wrapper])
30}
31
32/// Wraps a closure as a [`FileVisitor`].
33///
34/// [`dispatch_body_line`] strips both inline code spans and inline comments
35/// before calling visitors, so this wrapper is a trivial passthrough.
36#[cfg(test)]
37struct ClosureVisitor<F: FnMut(&str, usize) -> ScanAction> {
38 visitor: F,
39}
40
41#[cfg(test)]
42impl<F: FnMut(&str, usize) -> ScanAction> FileVisitor for ClosureVisitor<F> {
43 fn on_body_line(&mut self, _raw: &str, cleaned: &str, line_num: usize) -> ScanAction {
44 // Legacy closure-based API receives cleaned text for backward compatibility.
45 (self.visitor)(cleaned, line_num)
46 }
47}
48
49/// Trait for visitors that receive events from a single-pass file scan.
50///
51/// All methods have default no-op implementations, so visitors only need
52/// to override the events they care about.
53pub trait FileVisitor {
54 /// Called with parsed frontmatter properties (empty `IndexMap` if none).
55 ///
56 /// The scanner passes ownership of the map to avoid a clone in the common
57 /// single-visitor case. When multiple visitors are present, the scanner
58 /// clones for all but the last, so only N-1 allocations occur for N visitors.
59 ///
60 /// Return `ScanAction::Stop` to skip the body scan for this visitor.
61 fn on_frontmatter(&mut self, _props: IndexMap<String, Value>) -> ScanAction {
62 ScanAction::Continue
63 }
64
65 /// Called for each body line outside fenced code blocks and comment blocks.
66 ///
67 /// `raw` is the original line text (code spans and comments intact).
68 /// `cleaned` has inline code spans and `%%comment%%` spans replaced with spaces
69 /// so that `[[links]]` inside backticks or comments are not extracted.
70 ///
71 /// Use `raw` for heading text extraction (to preserve code span content).
72 /// Use `cleaned` for link and task extraction (to skip backtick-escaped markup).
73 fn on_body_line(&mut self, _raw: &str, _cleaned: &str, _line_num: usize) -> ScanAction {
74 ScanAction::Continue
75 }
76
77 /// Called when a fenced code block opens (e.g. `` ```rust ``).
78 fn on_code_fence_open(&mut self, _raw: &str, _language: &str, _line_num: usize) -> ScanAction {
79 ScanAction::Continue
80 }
81
82 /// Called when a fenced code block closes.
83 fn on_code_fence_close(&mut self, _line_num: usize) -> ScanAction {
84 ScanAction::Continue
85 }
86
87 /// Called for each line inside a fenced code block (between open/close fences).
88 /// Default: no-op. Override this to receive code block content.
89 fn on_code_block_line(&mut self, _raw: &str, _line_num: usize) -> ScanAction {
90 ScanAction::Continue
91 }
92
93 /// Whether this visitor needs body events (`on_body_line`, `on_code_block_line`,
94 /// `on_code_fence_*`). If `false`, the visitor only receives `on_frontmatter`
95 /// and is then stopped. Default: `true`.
96 fn needs_body(&self) -> bool {
97 true
98 }
99
100 /// Whether this visitor needs parsed frontmatter properties.
101 /// If **no** visitor needs frontmatter, the scanner skips YAML accumulation
102 /// and `serde_saphyr` parsing (but still reads past the `---` delimiters).
103 /// Default: `true`.
104 fn needs_frontmatter(&self) -> bool {
105 true
106 }
107}