skill_veil_core/ports.rs
1//! Port traits for dependency inversion. The domain layer depends only
2//! on these traits; infrastructure implementations live in [`adapters`]
3//! and are wired in at construction time.
4//!
5//! Traits:
6//!
7//! - [`MarkdownParser`] — parse markdown into sections
8//! - [`PatternMatcher`] — regex pattern matching
9//! - [`FileSystemProvider`] — filesystem operations
10//!
11//! Default implementations:
12//!
13//! - [`PulldownMarkdownParser`] (pulldown-cmark)
14//! - [`RegexPatternMatcher`] (regex crate)
15//! - [`StdFileSystemProvider`] (`std::fs`)
16//!
17//! [`adapters`]: crate::adapters
18//! [`PulldownMarkdownParser`]: crate::adapters::PulldownMarkdownParser
19//! [`RegexPatternMatcher`]: crate::adapters::RegexPatternMatcher
20//! [`StdFileSystemProvider`]: crate::adapters::StdFileSystemProvider
21
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24
25/// A section parsed from a markdown document.
26///
27/// This is the output contract of the [`MarkdownParser`] port.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct Section {
30 pub name: String,
31 pub level: u8,
32 pub content: String,
33 pub code_blocks: Vec<CodeBlock>,
34 /// 1-based line number of the section header within the full document.
35 /// Used to convert section-relative offsets into document-relative
36 /// line numbers so that inline suppressions (which operate on
37 /// document-level line numbers) can match findings produced by
38 /// `SectionRegex` rules.
39 #[serde(default)]
40 pub start_line: usize,
41}
42
43/// A fenced code block within a [`Section`].
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct CodeBlock {
46 pub language: Option<String>,
47 pub code: String,
48}
49
50/// Error type for parser operations
51///
52/// Returned by [`MarkdownParser::parse_sections`] when parsing fails.
53#[derive(Debug, thiserror::Error)]
54pub enum ParserError {
55 /// Failed to parse the content
56 #[error("Failed to parse content: {0}")]
57 ParseError(String),
58}
59
60/// Error type for file system operations
61///
62/// Returned by [`FileSystemProvider`] methods when operations fail.
63#[derive(Debug, thiserror::Error)]
64pub enum FileSystemError {
65 /// I/O error from the underlying file system
66 #[error("IO error: {0}")]
67 IoError(#[from] std::io::Error),
68 /// The specified path was not found
69 #[error("Path not found: {0}")]
70 PathNotFound(PathBuf),
71}
72
73/// Trait for markdown parsing - allows swapping pulldown-cmark for other parsers
74///
75/// Implement this trait to provide custom markdown parsing logic.
76/// The default implementation is [`PulldownMarkdownParser`].
77///
78/// [`PulldownMarkdownParser`]: crate::adapters::PulldownMarkdownParser
79pub trait MarkdownParser: Send + Sync {
80 /// Parse markdown content into sections
81 ///
82 /// Extracts heading-based sections from markdown content, including
83 /// any code blocks within each section.
84 fn parse_sections(&self, content: &str) -> Result<Vec<Section>, ParserError>;
85}
86
87/// Trait for pattern matching - allows swapping regex for other matchers
88///
89/// Implement this trait to provide custom pattern matching logic.
90/// The default implementation is [`RegexPatternMatcher`].
91///
92/// [`RegexPatternMatcher`]: crate::adapters::RegexPatternMatcher
93pub trait PatternMatcher: Send + Sync {
94 /// Find all matches of a pattern in the given text
95 ///
96 /// Returns a vector of [`PatternMatch`] for each occurrence found.
97 fn find_matches(&self, pattern: &str, text: &str) -> Vec<PatternMatch>;
98
99 /// Compile a pattern for efficient reuse
100 ///
101 /// Use this when the same pattern will be matched against multiple texts.
102 fn compile(&self, pattern: &str) -> Result<CompiledPattern, PatternError>;
103
104 /// Check whether a pattern occurs in the given text.
105 ///
106 /// Implementations can override this for performance; the default
107 /// derives the answer from [`PatternMatcher::find_matches`].
108 fn is_match(&self, pattern: &str, text: &str) -> bool {
109 !self.find_matches(pattern, text).is_empty()
110 }
111
112 /// Iterate captures (full match plus capture groups) over a pattern.
113 ///
114 /// Each [`Captures`] entry corresponds to one match in `text`. Group
115 /// `0` is the full match; subsequent groups follow the pattern's
116 /// declaration order. Groups that did not participate in a particular
117 /// match are returned as `None`.
118 fn captures_iter(&self, pattern: &str, text: &str) -> Vec<Captures>;
119}
120
121/// A match found by the pattern matcher
122///
123/// Contains the position and content of a single pattern match.
124#[derive(Debug, Clone)]
125pub struct PatternMatch {
126 /// Start offset in the original text (0-based, in bytes)
127 pub start: usize,
128 /// End offset in the original text (exclusive, in bytes)
129 pub end: usize,
130 /// The matched text content
131 pub matched_text: String,
132}
133
134/// Capture groups produced by [`PatternMatcher::captures_iter`].
135///
136/// Group `0` is the full match. Groups that did not participate in a
137/// particular match are stored as `None`. Use [`Captures::get`] for a
138/// nullable lookup that mirrors the regex crate's `.get(idx)` ergonomics
139/// without leaking the concrete `Match` type.
140#[derive(Debug, Clone)]
141pub struct Captures {
142 groups: Vec<Option<PatternMatch>>,
143}
144
145impl Captures {
146 /// Build captures from a vector of optional groups.
147 #[must_use]
148 pub fn new(groups: Vec<Option<PatternMatch>>) -> Self {
149 Self { groups }
150 }
151
152 /// Return the capture group at `idx`, if present.
153 #[must_use]
154 pub fn get(&self, idx: usize) -> Option<&PatternMatch> {
155 self.groups.get(idx).and_then(Option::as_ref)
156 }
157
158 /// Total number of capture slots (including non-participating groups).
159 #[must_use]
160 pub fn len(&self) -> usize {
161 self.groups.len()
162 }
163
164 /// Whether the captures collection holds no groups.
165 #[must_use]
166 pub fn is_empty(&self) -> bool {
167 self.groups.is_empty()
168 }
169}
170
171/// Closure stored inside [`CompiledPattern`] for finding matches.
172type FindFn = Box<dyn Fn(&str) -> Vec<PatternMatch> + Send + Sync>;
173/// Closure stored inside [`CompiledPattern`] for membership tests.
174type IsMatchFn = Box<dyn Fn(&str) -> bool + Send + Sync>;
175/// Closure stored inside [`CompiledPattern`] for capture iteration.
176type CapturesFn = Box<dyn Fn(&str) -> Vec<Captures> + Send + Sync>;
177
178/// A compiled pattern for efficient reuse
179///
180/// Created by [`PatternMatcher::compile`] for patterns that will be
181/// matched against multiple texts. The three operation closures share
182/// the underlying compiled state in the adapter so that a single
183/// pattern compilation services all three operations.
184pub struct CompiledPattern {
185 find: FindFn,
186 is_match: IsMatchFn,
187 captures: CapturesFn,
188}
189
190impl CompiledPattern {
191 /// Build a compiled pattern from its three operation closures.
192 ///
193 /// Adapters are expected to share their compiled state (e.g. an
194 /// `Arc<Regex>`) across the three closures so that `find_matches`,
195 /// `is_match`, and `captures_iter` all reuse the same compilation.
196 #[must_use]
197 pub fn new(find: FindFn, is_match: IsMatchFn, captures: CapturesFn) -> Self {
198 Self {
199 find,
200 is_match,
201 captures,
202 }
203 }
204
205 /// Find every occurrence of the pattern in `text`.
206 pub fn find_matches(&self, text: &str) -> Vec<PatternMatch> {
207 (self.find)(text)
208 }
209
210 /// Whether the pattern occurs at least once in `text`.
211 pub fn is_match(&self, text: &str) -> bool {
212 (self.is_match)(text)
213 }
214
215 /// Iterate captures (full match plus groups) for every occurrence.
216 pub fn captures_iter(&self, text: &str) -> Vec<Captures> {
217 (self.captures)(text)
218 }
219}
220
221/// Error type for pattern operations
222///
223/// Returned by [`PatternMatcher::compile`] when pattern compilation fails.
224#[derive(Debug, thiserror::Error)]
225pub enum PatternError {
226 /// The pattern syntax is invalid
227 #[error("Invalid pattern: {0}")]
228 InvalidPattern(String),
229}
230
231/// Raw file content returned by the filesystem port.
232///
233/// The core can decide how to decode these bytes depending on context.
234#[derive(Debug, Clone)]
235pub struct FileContent {
236 bytes: Vec<u8>,
237}
238
239impl FileContent {
240 #[must_use]
241 pub fn new(bytes: Vec<u8>) -> Self {
242 Self { bytes }
243 }
244
245 #[must_use]
246 pub fn as_bytes(&self) -> &[u8] {
247 &self.bytes
248 }
249
250 #[must_use]
251 pub fn decode_utf8_lossy(&self) -> DecodedText {
252 let decode_warning = std::str::from_utf8(&self.bytes).is_err();
253 DecodedText {
254 text: String::from_utf8_lossy(&self.bytes).into_owned(),
255 decode_warning,
256 }
257 }
258}
259
260/// Decoded text plus whether lossy decoding was required.
261#[derive(Debug, Clone)]
262pub struct DecodedText {
263 pub text: String,
264 pub decode_warning: bool,
265}
266
267/// Subset of `std::fs::Metadata` exposed through the
268/// [`FileSystemProvider`] port. Keeping the surface minimal lets test
269/// adapters synthesize values without instantiating real OS metadata.
270#[derive(Debug, Clone, Copy)]
271pub struct FileMeta {
272 /// Total size of the file in bytes.
273 pub len: u64,
274}
275
276/// Trait for file system operations - allows mocking in tests
277///
278/// Implement this trait to provide custom file system access.
279/// The default implementation is [`StdFileSystemProvider`].
280///
281/// [`StdFileSystemProvider`]: crate::adapters::StdFileSystemProvider
282pub trait FileSystemProvider: Send + Sync {
283 /// Read raw file contents
284 ///
285 /// # Errors
286 /// Returns [`FileSystemError::PathNotFound`] if the file does not exist,
287 /// or [`FileSystemError::IoError`] for other I/O errors.
288 fn read_file_bytes(&self, path: &Path) -> Result<FileContent, FileSystemError>;
289
290 /// List files in a directory matching a glob pattern
291 ///
292 /// # Arguments
293 /// * `path` - The directory to search
294 /// * `pattern` - A glob pattern (e.g., "*.md")
295 /// * `recursive` - Whether to search subdirectories
296 fn list_files(
297 &self,
298 path: &Path,
299 pattern: &str,
300 recursive: bool,
301 ) -> Result<Vec<PathBuf>, FileSystemError>;
302
303 /// Check if a path exists
304 fn exists(&self, path: &Path) -> bool;
305
306 /// Look up the size (and other minimal metadata) for a path.
307 ///
308 /// Adapters with direct filesystem access (the std adapter, mocks with
309 /// explicit metadata) MUST override this method to avoid the default
310 /// implementation, which reads the entire file via `read_file_bytes`
311 /// just to obtain the length. The `StdFileSystemProvider` override uses
312 /// `std::fs::metadata` (a single stat syscall) instead.
313 ///
314 /// # Errors
315 /// Returns [`FileSystemError::PathNotFound`] when the path does not
316 /// exist, or [`FileSystemError::IoError`] for other I/O failures.
317 fn metadata(&self, path: &Path) -> Result<FileMeta, FileSystemError> {
318 let bytes = self.read_file_bytes(path)?;
319 Ok(FileMeta {
320 len: bytes.as_bytes().len() as u64,
321 })
322 }
323
324 /// Whether `path` resolves to a regular file.
325 ///
326 /// Used by the scanner entrypoints to decide between single-file and
327 /// package scans. Routing this through the port (instead of calling
328 /// `Path::is_file` directly) keeps test doubles consistent with
329 /// production behaviour and preserves the hexagonal contract.
330 ///
331 /// The default implementation derives the answer from
332 /// `read_file_bytes`: a path whose bytes can be read is treated as
333 /// a file. This is correct for the std adapter but slow; adapters
334 /// with cheaper file-type access SHOULD override.
335 fn is_file(&self, path: &Path) -> bool {
336 self.read_file_bytes(path).is_ok()
337 }
338
339 /// Whether `path` resolves to a directory.
340 ///
341 /// Counterpart of [`FileSystemProvider::is_file`]. The default
342 /// implementation treats an existing path that is not a file as a
343 /// directory. Adapters MUST override this when they need to model
344 /// special files (devices, sockets, FIFOs) explicitly.
345 fn is_dir(&self, path: &Path) -> bool {
346 self.exists(path) && !self.is_file(path)
347 }
348
349 /// Walk regular files under `path`, returning their absolute paths.
350 ///
351 /// `max_depth` caps descent depth (`0` means unlimited). `skip_dirs`
352 /// names directories whose subtrees MUST be skipped — used to keep
353 /// the walker out of vendored / generated trees on adversarial
354 /// inputs. Implementations MUST NOT follow symlinks.
355 ///
356 /// The default implementation delegates to `list_files(path, "*",
357 /// recursive=true)` and ignores `max_depth` / `skip_dirs`. This is
358 /// correct (just less efficient) and lets test mocks pick up the
359 /// new method without bespoke walk logic. The std adapter overrides
360 /// to honour both knobs.
361 ///
362 /// # Errors
363 /// Returns [`FileSystemError::PathNotFound`] when the root does not
364 /// exist, or [`FileSystemError::IoError`] for other I/O failures
365 /// on the root path. Errors on individual children are logged and
366 /// the walk continues, mirroring [`FileSystemProvider::list_files`].
367 fn walk_files(
368 &self,
369 path: &Path,
370 _max_depth: usize,
371 _skip_dirs: &[&str],
372 ) -> Result<Vec<PathBuf>, FileSystemError> {
373 self.list_files(path, "*", true)
374 }
375}