Skip to main content

hyalo_core/scanner/
visitor.rs

1use indexmap::IndexMap;
2use serde_json::Value;
3
4use super::ScanAction;
5
6/// Callback-based scanner that streams through a markdown file.
7/// Skips frontmatter, fenced code blocks, and inline code spans.
8/// Calls the visitor function for each text segment with its 1-based line number.
9#[cfg(test)]
10pub(crate) fn scan_file<F>(path: &std::path::Path, visitor: F) -> anyhow::Result<()>
11where
12    F: FnMut(&str, usize) -> ScanAction,
13{
14    use anyhow::Context as _;
15    use std::fs::File;
16    use std::io::BufReader;
17    let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
18    let reader = BufReader::new(file);
19    scan_reader(reader, visitor)
20}
21
22/// Scan from any buffered reader (useful for testing without file I/O).
23#[cfg(test)]
24pub(crate) fn scan_reader<R: std::io::BufRead, F>(reader: R, visitor: F) -> anyhow::Result<()>
25where
26    F: FnMut(&str, usize) -> ScanAction,
27{
28    let mut wrapper = ClosureVisitor { visitor };
29    super::scan_reader_multi(reader, &mut [&mut wrapper])
30}
31
32/// Wraps a closure as a [`FileVisitor`].
33///
34/// [`dispatch_body_line`] strips both inline code spans and inline comments
35/// before calling visitors, so this wrapper is a trivial passthrough.
36#[cfg(test)]
37struct ClosureVisitor<F: FnMut(&str, usize) -> ScanAction> {
38    visitor: F,
39}
40
41#[cfg(test)]
42impl<F: FnMut(&str, usize) -> ScanAction> FileVisitor for ClosureVisitor<F> {
43    fn on_body_line(&mut self, _raw: &str, cleaned: &str, line_num: usize) -> ScanAction {
44        // Legacy closure-based API receives cleaned text for backward compatibility.
45        (self.visitor)(cleaned, line_num)
46    }
47}
48
49/// Trait for visitors that receive events from a single-pass file scan.
50///
51/// All methods have default no-op implementations, so visitors only need
52/// to override the events they care about.
53pub trait FileVisitor {
54    /// Called with parsed frontmatter properties (empty `IndexMap` if none).
55    ///
56    /// The scanner passes ownership of the map to avoid a clone in the common
57    /// single-visitor case. When multiple visitors are present, the scanner
58    /// clones for all but the last, so only N-1 allocations occur for N visitors.
59    ///
60    /// Return `ScanAction::Stop` to skip the body scan for this visitor.
61    fn on_frontmatter(&mut self, _props: IndexMap<String, Value>) -> ScanAction {
62        ScanAction::Continue
63    }
64
65    /// Called for each body line outside fenced code blocks and comment blocks.
66    ///
67    /// `raw` is the original line text (code spans and comments intact).
68    /// `cleaned` has inline code spans and `%%comment%%` spans replaced with spaces
69    /// so that `[[links]]` inside backticks or comments are not extracted.
70    ///
71    /// Use `raw` for heading text extraction (to preserve code span content).
72    /// Use `cleaned` for link and task extraction (to skip backtick-escaped markup).
73    fn on_body_line(&mut self, _raw: &str, _cleaned: &str, _line_num: usize) -> ScanAction {
74        ScanAction::Continue
75    }
76
77    /// Called when a fenced code block opens (e.g. `` ```rust ``).
78    fn on_code_fence_open(&mut self, _raw: &str, _language: &str, _line_num: usize) -> ScanAction {
79        ScanAction::Continue
80    }
81
82    /// Called when a fenced code block closes.
83    fn on_code_fence_close(&mut self, _line_num: usize) -> ScanAction {
84        ScanAction::Continue
85    }
86
87    /// Called for each line inside a fenced code block (between open/close fences).
88    /// Default: no-op. Override this to receive code block content.
89    fn on_code_block_line(&mut self, _raw: &str, _line_num: usize) -> ScanAction {
90        ScanAction::Continue
91    }
92
93    /// Whether this visitor needs body events (`on_body_line`, `on_code_block_line`,
94    /// `on_code_fence_*`). If `false`, the visitor only receives `on_frontmatter`
95    /// and is then stopped. Default: `true`.
96    fn needs_body(&self) -> bool {
97        true
98    }
99
100    /// Whether this visitor needs parsed frontmatter properties.
101    /// If **no** visitor needs frontmatter, the scanner skips YAML accumulation
102    /// and `serde_saphyr` parsing (but still reads past the `---` delimiters).
103    /// Default: `true`.
104    fn needs_frontmatter(&self) -> bool {
105        true
106    }
107}