lychee_lib/types/input/resolver.rs
1//! Input source resolution.
2//!
3//! Provides the `InputResolver` which handles resolution of various input sources
4//! into concrete, processable sources by expanding glob patterns and applying filters.
5
6use super::input::Input;
7use super::source::{InputSource, ResolvedInputSource};
8use crate::Result;
9use crate::filter::PathExcludes;
10use crate::types::file::FileExtensions;
11use async_stream::try_stream;
12use futures::stream::Stream;
13use futures::stream::once;
14use glob::glob_with;
15use ignore::{Walk, WalkBuilder};
16use shellexpand::tilde;
17use std::path::Path;
18use std::pin::Pin;
19
20/// Resolves input sources into concrete, processable sources.
21///
22/// Handles expansion of glob patterns and filtering based on exclusion rules.
23#[derive(Copy, Clone, Debug)]
24pub struct InputResolver;
25
26impl InputResolver {
27 /// Resolve an input into a stream of concrete input sources.
28 ///
29 /// This returns a stream of resolved input sources for the given input,
30 /// taking into account the matching file extensions and respecting
31 /// exclusions. Glob patterns are expanded into individual file paths.
32 ///
33 /// # Returns
34 ///
35 /// Returns a stream of `Result<ResolvedInputSource>` for all matching input
36 /// sources. Glob patterns are expanded, so `FsGlob` never appears in the
37 /// output.
38 ///
39 /// # Errors
40 ///
41 /// Returns an error (within the stream) if:
42 /// - The glob pattern is invalid or expansion encounters I/O errors
43 /// - Directory traversal fails, including:
44 /// - Permission denied when accessing directories or files
45 /// - I/O errors while reading directory contents
46 /// - Filesystem errors (disk errors, network filesystem issues, etc.)
47 /// - Invalid file paths or symbolic link resolution failures
48 /// - Errors when reading or evaluating `.gitignore` or `.ignore` files
49 /// - Errors occur during file extension or path exclusion evaluation
50 ///
51 /// Once an error is returned, resolution of that input source halts
52 /// and no further `Ok(ResolvedInputSource)` will be produced.
53 #[must_use]
54 pub fn resolve<'a>(
55 input: &Input,
56 file_extensions: FileExtensions,
57 skip_hidden: bool,
58 skip_ignored: bool,
59 excluded_paths: &'a PathExcludes,
60 ) -> Pin<Box<dyn Stream<Item = Result<ResolvedInputSource>> + Send + 'a>> {
61 Self::resolve_input(
62 input,
63 file_extensions,
64 skip_hidden,
65 skip_ignored,
66 excluded_paths,
67 )
68 }
69
70 /// Create a [`Walk`] iterator for directory traversal
71 ///
72 /// # Errors
73 ///
74 /// Fails if [`FileExtensions`] cannot be converted
75 pub(crate) fn walk(
76 path: &Path,
77 file_extensions: FileExtensions,
78 skip_hidden: bool,
79 skip_ignored: bool,
80 ) -> Result<Walk> {
81 Ok(WalkBuilder::new(path)
82 // Skip over files which are ignored by git or `.ignore` if necessary
83 .git_ignore(skip_ignored)
84 .git_global(skip_ignored)
85 .git_exclude(skip_ignored)
86 .ignore(skip_ignored)
87 .parents(skip_ignored)
88 // Ignore hidden files if necessary
89 .hidden(skip_hidden)
90 // Configure the file types filter to only include files with matching extensions
91 .types(file_extensions.build(skip_hidden)?)
92 .build())
93 }
94
95 /// Internal method for resolving input sources.
96 ///
97 /// Takes an Input and returns a stream of `ResolvedInputSource` items,
98 /// expanding glob patterns and applying filtering based on the provided
99 /// configuration.
100 fn resolve_input<'a>(
101 input: &Input,
102 file_extensions: FileExtensions,
103 skip_hidden: bool,
104 skip_ignored: bool,
105 excluded_paths: &'a PathExcludes,
106 ) -> Pin<Box<dyn Stream<Item = Result<ResolvedInputSource>> + Send + 'a>> {
107 match &input.source {
108 InputSource::RemoteUrl(url) => {
109 let url = url.clone();
110 Box::pin(once(async move { Ok(ResolvedInputSource::RemoteUrl(url)) }))
111 }
112 InputSource::FsGlob {
113 pattern,
114 ignore_case,
115 } => {
116 // NOTE: we convert the glob::Pattern back to str because
117 // `glob_with` only takes string arguments.
118 let glob_expanded = tilde(pattern.as_str()).to_string();
119 let mut match_opts = glob::MatchOptions::new();
120 match_opts.case_sensitive = !ignore_case;
121
122 Box::pin(try_stream! {
123 // For glob patterns, we expand the pattern and yield
124 // matching paths as ResolvedInputSource::FsPath items.
125 for entry in glob_with(&glob_expanded, match_opts)? {
126 match entry {
127 Ok(path) => {
128 // Skip directories or files that don't match
129 // extensions
130 if path.is_dir() {
131 continue;
132 }
133 if Self::is_excluded_path(&path, excluded_paths) {
134 continue;
135 }
136
137 // We do not filter by extensions here.
138 //
139 // Instead, we always check files captured by
140 // the glob pattern, as the user explicitly
141 // specified them.
142 yield ResolvedInputSource::FsPath(path);
143 }
144 Err(e) => {
145 eprintln!("Error in glob pattern: {e:?}");
146 }
147 }
148 }
149 })
150 }
151 InputSource::FsPath(path) => {
152 if path.is_dir() {
153 let walk = match Self::walk(path, file_extensions, skip_hidden, skip_ignored) {
154 Ok(x) => x,
155 Err(e) => {
156 return Box::pin(once(async move { Err(e) }));
157 }
158 };
159
160 Box::pin(try_stream! {
161 for entry in walk {
162 let entry = entry?;
163 if Self::is_excluded_path(entry.path(), excluded_paths)
164 {
165 continue;
166 }
167
168 match entry.file_type() {
169 None => continue,
170 Some(file_type) => {
171 if !file_type.is_file() {
172 continue;
173 }
174 }
175 }
176
177 yield ResolvedInputSource::FsPath(
178 entry.path().to_path_buf()
179 );
180 }
181 })
182 } else {
183 // For individual files, yield if not excluded.
184 //
185 // We do not filter by extension here, as individual
186 // files should always be checked, no matter if their
187 // extension matches or not.
188 //
189 // This follows the principle of least surprise because
190 // the user explicitly specified the file, so they
191 // expect it to be checked.
192 if Self::is_excluded_path(path, excluded_paths) {
193 Box::pin(futures::stream::empty())
194 } else {
195 let path = path.clone();
196 Box::pin(once(async move { Ok(ResolvedInputSource::FsPath(path)) }))
197 }
198 }
199 }
200 InputSource::Stdin => Box::pin(once(async move { Ok(ResolvedInputSource::Stdin) })),
201 InputSource::String(s) => {
202 let s = s.clone();
203 Box::pin(once(async move { Ok(ResolvedInputSource::String(s)) }))
204 }
205 }
206 }
207
208 /// Check if the given path was excluded from link checking
209 fn is_excluded_path(path: &Path, excluded_paths: &PathExcludes) -> bool {
210 excluded_paths.is_match(&path.to_string_lossy())
211 }
212}