Skip to main content

skill_veil_core/
ports.rs

1//! Port traits for dependency inversion. The domain layer depends only
2//! on these traits; infrastructure implementations live in [`adapters`]
3//! and are wired in at construction time.
4//!
5//! Traits:
6//!
7//! - [`MarkdownParser`] — parse markdown into sections
8//! - [`PatternMatcher`] — regex pattern matching
9//! - [`FileSystemProvider`] — filesystem operations
10//!
11//! Default implementations:
12//!
13//! - [`PulldownMarkdownParser`] (pulldown-cmark)
14//! - [`RegexPatternMatcher`] (regex crate)
15//! - [`StdFileSystemProvider`] (`std::fs`)
16//!
17//! [`adapters`]: crate::adapters
18//! [`PulldownMarkdownParser`]: crate::adapters::PulldownMarkdownParser
19//! [`RegexPatternMatcher`]: crate::adapters::RegexPatternMatcher
20//! [`StdFileSystemProvider`]: crate::adapters::StdFileSystemProvider
21
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24
25/// A section parsed from a markdown document.
26///
27/// This is the output contract of the [`MarkdownParser`] port.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct Section {
30    pub name: String,
31    pub level: u8,
32    pub content: String,
33    pub code_blocks: Vec<CodeBlock>,
34    /// 1-based line number of the section header within the full document.
35    /// Used to convert section-relative offsets into document-relative
36    /// line numbers so that inline suppressions (which operate on
37    /// document-level line numbers) can match findings produced by
38    /// `SectionRegex` rules.
39    #[serde(default)]
40    pub start_line: usize,
41}
42
43/// A fenced code block within a [`Section`].
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct CodeBlock {
46    pub language: Option<String>,
47    pub code: String,
48}
49
50/// Error type for parser operations
51///
52/// Returned by [`MarkdownParser::parse_sections`] when parsing fails.
53#[derive(Debug, thiserror::Error)]
54pub enum ParserError {
55    /// Failed to parse the content
56    #[error("Failed to parse content: {0}")]
57    ParseError(String),
58}
59
60/// Error type for file system operations
61///
62/// Returned by [`FileSystemProvider`] methods when operations fail.
63#[derive(Debug, thiserror::Error)]
64pub enum FileSystemError {
65    /// I/O error from the underlying file system
66    #[error("IO error: {0}")]
67    IoError(#[from] std::io::Error),
68    /// The specified path was not found
69    #[error("Path not found: {0}")]
70    PathNotFound(PathBuf),
71}
72
73/// Trait for markdown parsing - allows swapping pulldown-cmark for other parsers
74///
75/// Implement this trait to provide custom markdown parsing logic.
76/// The default implementation is [`PulldownMarkdownParser`].
77///
78/// [`PulldownMarkdownParser`]: crate::adapters::PulldownMarkdownParser
79pub trait MarkdownParser: Send + Sync {
80    /// Parse markdown content into sections
81    ///
82    /// Extracts heading-based sections from markdown content, including
83    /// any code blocks within each section.
84    fn parse_sections(&self, content: &str) -> Result<Vec<Section>, ParserError>;
85}
86
87/// Trait for pattern matching - allows swapping regex for other matchers
88///
89/// Implement this trait to provide custom pattern matching logic.
90/// The default implementation is [`RegexPatternMatcher`].
91///
92/// [`RegexPatternMatcher`]: crate::adapters::RegexPatternMatcher
93pub trait PatternMatcher: Send + Sync {
94    /// Find all matches of a pattern in the given text
95    ///
96    /// Returns a vector of [`PatternMatch`] for each occurrence found.
97    fn find_matches(&self, pattern: &str, text: &str) -> Vec<PatternMatch>;
98
99    /// Compile a pattern for efficient reuse
100    ///
101    /// Use this when the same pattern will be matched against multiple texts.
102    fn compile(&self, pattern: &str) -> Result<CompiledPattern, PatternError>;
103
104    /// Check whether a pattern occurs in the given text.
105    ///
106    /// Implementations can override this for performance; the default
107    /// derives the answer from [`PatternMatcher::find_matches`].
108    fn is_match(&self, pattern: &str, text: &str) -> bool {
109        !self.find_matches(pattern, text).is_empty()
110    }
111
112    /// Iterate captures (full match plus capture groups) over a pattern.
113    ///
114    /// Each [`Captures`] entry corresponds to one match in `text`. Group
115    /// `0` is the full match; subsequent groups follow the pattern's
116    /// declaration order. Groups that did not participate in a particular
117    /// match are returned as `None`.
118    fn captures_iter(&self, pattern: &str, text: &str) -> Vec<Captures>;
119}
120
121/// A match found by the pattern matcher
122///
123/// Contains the position and content of a single pattern match.
124#[derive(Debug, Clone)]
125pub struct PatternMatch {
126    /// Start offset in the original text (0-based, in bytes)
127    pub start: usize,
128    /// End offset in the original text (exclusive, in bytes)
129    pub end: usize,
130    /// The matched text content
131    pub matched_text: String,
132}
133
134/// Capture groups produced by [`PatternMatcher::captures_iter`].
135///
136/// Group `0` is the full match. Groups that did not participate in a
137/// particular match are stored as `None`. Use [`Captures::get`] for a
138/// nullable lookup that mirrors the regex crate's `.get(idx)` ergonomics
139/// without leaking the concrete `Match` type.
140#[derive(Debug, Clone)]
141pub struct Captures {
142    groups: Vec<Option<PatternMatch>>,
143}
144
145impl Captures {
146    /// Build captures from a vector of optional groups.
147    #[must_use]
148    pub fn new(groups: Vec<Option<PatternMatch>>) -> Self {
149        Self { groups }
150    }
151
152    /// Return the capture group at `idx`, if present.
153    #[must_use]
154    pub fn get(&self, idx: usize) -> Option<&PatternMatch> {
155        self.groups.get(idx).and_then(Option::as_ref)
156    }
157
158    /// Total number of capture slots (including non-participating groups).
159    #[must_use]
160    pub fn len(&self) -> usize {
161        self.groups.len()
162    }
163
164    /// Whether the captures collection holds no groups.
165    #[must_use]
166    pub fn is_empty(&self) -> bool {
167        self.groups.is_empty()
168    }
169}
170
171/// Closure stored inside [`CompiledPattern`] for finding matches.
172type FindFn = Box<dyn Fn(&str) -> Vec<PatternMatch> + Send + Sync>;
173/// Closure stored inside [`CompiledPattern`] for membership tests.
174type IsMatchFn = Box<dyn Fn(&str) -> bool + Send + Sync>;
175/// Closure stored inside [`CompiledPattern`] for capture iteration.
176type CapturesFn = Box<dyn Fn(&str) -> Vec<Captures> + Send + Sync>;
177
178/// A compiled pattern for efficient reuse
179///
180/// Created by [`PatternMatcher::compile`] for patterns that will be
181/// matched against multiple texts. The three operation closures share
182/// the underlying compiled state in the adapter so that a single
183/// pattern compilation services all three operations.
184pub struct CompiledPattern {
185    find: FindFn,
186    is_match: IsMatchFn,
187    captures: CapturesFn,
188}
189
190impl CompiledPattern {
191    /// Build a compiled pattern from its three operation closures.
192    ///
193    /// Adapters are expected to share their compiled state (e.g. an
194    /// `Arc<Regex>`) across the three closures so that `find_matches`,
195    /// `is_match`, and `captures_iter` all reuse the same compilation.
196    #[must_use]
197    pub fn new(find: FindFn, is_match: IsMatchFn, captures: CapturesFn) -> Self {
198        Self {
199            find,
200            is_match,
201            captures,
202        }
203    }
204
205    /// Find every occurrence of the pattern in `text`.
206    pub fn find_matches(&self, text: &str) -> Vec<PatternMatch> {
207        (self.find)(text)
208    }
209
210    /// Whether the pattern occurs at least once in `text`.
211    pub fn is_match(&self, text: &str) -> bool {
212        (self.is_match)(text)
213    }
214
215    /// Iterate captures (full match plus groups) for every occurrence.
216    pub fn captures_iter(&self, text: &str) -> Vec<Captures> {
217        (self.captures)(text)
218    }
219}
220
221/// Error type for pattern operations
222///
223/// Returned by [`PatternMatcher::compile`] when pattern compilation fails.
224#[derive(Debug, thiserror::Error)]
225pub enum PatternError {
226    /// The pattern syntax is invalid
227    #[error("Invalid pattern: {0}")]
228    InvalidPattern(String),
229}
230
231/// Raw file content returned by the filesystem port.
232///
233/// The core can decide how to decode these bytes depending on context.
234#[derive(Debug, Clone)]
235pub struct FileContent {
236    bytes: Vec<u8>,
237}
238
239impl FileContent {
240    #[must_use]
241    pub fn new(bytes: Vec<u8>) -> Self {
242        Self { bytes }
243    }
244
245    #[must_use]
246    pub fn as_bytes(&self) -> &[u8] {
247        &self.bytes
248    }
249
250    #[must_use]
251    pub fn decode_utf8_lossy(&self) -> DecodedText {
252        let decode_warning = std::str::from_utf8(&self.bytes).is_err();
253        DecodedText {
254            text: String::from_utf8_lossy(&self.bytes).into_owned(),
255            decode_warning,
256        }
257    }
258}
259
260/// Decoded text plus whether lossy decoding was required.
261#[derive(Debug, Clone)]
262pub struct DecodedText {
263    pub text: String,
264    pub decode_warning: bool,
265}
266
267/// Subset of `std::fs::Metadata` exposed through the
268/// [`FileSystemProvider`] port. Keeping the surface minimal lets test
269/// adapters synthesize values without instantiating real OS metadata.
270#[derive(Debug, Clone, Copy)]
271pub struct FileMeta {
272    /// Total size of the file in bytes.
273    pub len: u64,
274}
275
276/// Trait for file system operations - allows mocking in tests
277///
278/// Implement this trait to provide custom file system access.
279/// The default implementation is [`StdFileSystemProvider`].
280///
281/// [`StdFileSystemProvider`]: crate::adapters::StdFileSystemProvider
282pub trait FileSystemProvider: Send + Sync {
283    /// Read raw file contents
284    ///
285    /// # Errors
286    /// Returns [`FileSystemError::PathNotFound`] if the file does not exist,
287    /// or [`FileSystemError::IoError`] for other I/O errors.
288    fn read_file_bytes(&self, path: &Path) -> Result<FileContent, FileSystemError>;
289
290    /// List files in a directory matching a glob pattern
291    ///
292    /// # Arguments
293    /// * `path` - The directory to search
294    /// * `pattern` - A glob pattern (e.g., "*.md")
295    /// * `recursive` - Whether to search subdirectories
296    fn list_files(
297        &self,
298        path: &Path,
299        pattern: &str,
300        recursive: bool,
301    ) -> Result<Vec<PathBuf>, FileSystemError>;
302
303    /// Check if a path exists
304    fn exists(&self, path: &Path) -> bool;
305
306    /// Look up the size (and other minimal metadata) for a path.
307    ///
308    /// Adapters with direct filesystem access (the std adapter, mocks with
309    /// explicit metadata) MUST override this method to avoid the default
310    /// implementation, which reads the entire file via `read_file_bytes`
311    /// just to obtain the length. The `StdFileSystemProvider` override uses
312    /// `std::fs::metadata` (a single stat syscall) instead.
313    ///
314    /// # Errors
315    /// Returns [`FileSystemError::PathNotFound`] when the path does not
316    /// exist, or [`FileSystemError::IoError`] for other I/O failures.
317    fn metadata(&self, path: &Path) -> Result<FileMeta, FileSystemError> {
318        let bytes = self.read_file_bytes(path)?;
319        Ok(FileMeta {
320            len: bytes.as_bytes().len() as u64,
321        })
322    }
323
324    /// Whether `path` resolves to a regular file.
325    ///
326    /// Used by the scanner entrypoints to decide between single-file and
327    /// package scans. Routing this through the port (instead of calling
328    /// `Path::is_file` directly) keeps test doubles consistent with
329    /// production behaviour and preserves the hexagonal contract.
330    ///
331    /// The default implementation derives the answer from
332    /// `read_file_bytes`: a path whose bytes can be read is treated as
333    /// a file. This is correct for the std adapter but slow; adapters
334    /// with cheaper file-type access SHOULD override.
335    fn is_file(&self, path: &Path) -> bool {
336        self.read_file_bytes(path).is_ok()
337    }
338
339    /// Whether `path` resolves to a directory.
340    ///
341    /// Counterpart of [`FileSystemProvider::is_file`]. The default
342    /// implementation treats an existing path that is not a file as a
343    /// directory. Adapters MUST override this when they need to model
344    /// special files (devices, sockets, FIFOs) explicitly.
345    fn is_dir(&self, path: &Path) -> bool {
346        self.exists(path) && !self.is_file(path)
347    }
348
349    /// Walk regular files under `path`, returning their absolute paths.
350    ///
351    /// `max_depth` caps descent depth (`0` means unlimited). `skip_dirs`
352    /// names directories whose subtrees MUST be skipped — used to keep
353    /// the walker out of vendored / generated trees on adversarial
354    /// inputs. Implementations MUST NOT follow symlinks.
355    ///
356    /// The default implementation delegates to `list_files(path, "*",
357    /// recursive=true)` and ignores `max_depth` / `skip_dirs`. This is
358    /// correct (just less efficient) and lets test mocks pick up the
359    /// new method without bespoke walk logic. The std adapter overrides
360    /// to honour both knobs.
361    ///
362    /// # Errors
363    /// Returns [`FileSystemError::PathNotFound`] when the root does not
364    /// exist, or [`FileSystemError::IoError`] for other I/O failures
365    /// on the root path. Errors on individual children are logged and
366    /// the walk continues, mirroring [`FileSystemProvider::list_files`].
367    fn walk_files(
368        &self,
369        path: &Path,
370        _max_depth: usize,
371        _skip_dirs: &[&str],
372    ) -> Result<Vec<PathBuf>, FileSystemError> {
373        self.list_files(path, "*", true)
374    }
375}