lychee_lib/types/input/resolver.rs
1//! Input source resolution.
2//!
3//! Provides the `InputResolver` which handles resolution of various input sources
4//! into concrete, processable sources by expanding glob patterns and applying filters.
5
6use super::input::Input;
7use super::source::{InputSource, ResolvedInputSource};
8use crate::Result;
9use crate::filter::PathExcludes;
10use crate::types::file::FileExtensions;
11use async_stream::try_stream;
12use futures::stream::Stream;
13use futures::stream::once;
14use glob::glob_with;
15use ignore::{Walk, WalkBuilder};
16use shellexpand::tilde;
17use std::path::Path;
18use std::pin::Pin;
19
20/// Resolves input sources into concrete, processable sources.
21///
22/// Handles expansion of glob patterns and filtering based on exclusion rules.
23#[derive(Copy, Clone, Debug)]
24pub struct InputResolver;
25
26impl InputResolver {
27 /// Resolve an input into a stream of concrete input sources.
28 ///
29 /// This returns a stream of resolved input sources for the given input,
30 /// taking into account the matching file extensions and respecting
31 /// exclusions. Glob patterns are expanded into individual file paths.
32 ///
33 /// # Returns
34 ///
35 /// Returns a stream of `Result<ResolvedInputSource>` for all matching input
36 /// sources. Glob patterns are expanded, so `FsGlob` never appears in the
37 /// output.
38 ///
39 /// # Errors
40 ///
41 /// Returns an error (within the stream) if:
42 /// - The glob pattern is invalid or expansion encounters I/O errors
43 /// - Directory traversal fails, including:
44 /// - Permission denied when accessing directories or files
45 /// - I/O errors while reading directory contents
46 /// - Filesystem errors (disk errors, network filesystem issues, etc.)
47 /// - Invalid file paths or symbolic link resolution failures
48 /// - Errors when reading or evaluating `.gitignore` or `.ignore` files
49 /// - Errors occur during file extension or path exclusion evaluation
50 ///
51 /// Once an error is returned, resolution of that input source halts
52 /// and no further `Ok(ResolvedInputSource)` will be produced.
53 #[must_use]
54 pub fn resolve<'a>(
55 input: &Input,
56 file_extensions: FileExtensions,
57 skip_hidden: bool,
58 skip_ignored: bool,
59 excluded_paths: &'a PathExcludes,
60 ) -> Pin<Box<dyn Stream<Item = Result<ResolvedInputSource>> + Send + 'a>> {
61 Self::resolve_input(
62 input,
63 file_extensions,
64 skip_hidden,
65 skip_ignored,
66 excluded_paths,
67 )
68 }
69
70 /// Create a [`Walk`] iterator for directory traversal
71 ///
72 /// # Errors
73 ///
74 /// Fails if [`FileExtensions`] cannot be converted
75 pub(crate) fn walk(
76 path: &Path,
77 file_extensions: FileExtensions,
78 skip_hidden: bool,
79 skip_ignored: bool,
80 ) -> Result<Walk> {
81 Ok(WalkBuilder::new(path)
82 // Skip over files which are ignored by git or `.ignore` if necessary
83 .git_ignore(skip_ignored)
84 .git_global(skip_ignored)
85 .git_exclude(skip_ignored)
86 .ignore(skip_ignored)
87 .parents(skip_ignored)
88 // Ignore hidden files if necessary
89 .hidden(skip_hidden)
90 // Configure the file types filter to only include files with matching extensions
91 .types(file_extensions.build(skip_hidden)?)
92 .build())
93 }
94
95 /// Internal method for resolving input sources.
96 ///
97 /// Takes an Input and returns a stream of `ResolvedInputSource` items,
98 /// expanding glob patterns and applying filtering based on the provided
99 /// configuration.
100 fn resolve_input<'a>(
101 input: &Input,
102 file_extensions: FileExtensions,
103 skip_hidden: bool,
104 skip_ignored: bool,
105 excluded_paths: &'a PathExcludes,
106 ) -> Pin<Box<dyn Stream<Item = Result<ResolvedInputSource>> + Send + 'a>> {
107 match &input.source {
108 InputSource::RemoteUrl(url) => {
109 let url = url.clone();
110 Box::pin(once(async move { Ok(ResolvedInputSource::RemoteUrl(url)) }))
111 }
112 InputSource::FsGlob {
113 pattern,
114 ignore_case,
115 } => {
116 // NOTE: we convert the glob::Pattern back to str because
117 // `glob_with` only takes string arguments.
118 let glob_expanded = tilde(pattern.as_str()).to_string();
119 let mut match_opts = glob::MatchOptions::new();
120 match_opts.case_sensitive = !ignore_case;
121 // Match shell behavior: wildcards like `*` and `**` should not
122 // match hidden files/dirs by default.
123 match_opts.require_literal_leading_dot = skip_hidden;
124
125 Box::pin(try_stream! {
126 // For glob patterns, we expand the pattern and yield
127 // matching paths as ResolvedInputSource::FsPath items.
128 for entry in glob_with(&glob_expanded, match_opts)? {
129 match entry {
130 Ok(path) => {
131 // Skip directories or files that don't match
132 // extensions
133 if path.is_dir() {
134 continue;
135 }
136 if Self::is_excluded_path(&path, excluded_paths) {
137 continue;
138 }
139
140 // We do not filter by extensions here.
141 //
142 // Instead, we always check files captured by
143 // the glob pattern, as the user explicitly
144 // specified them.
145 yield ResolvedInputSource::FsPath(path);
146 }
147 Err(e) => {
148 eprintln!("Error in glob pattern: {e:?}");
149 }
150 }
151 }
152 })
153 }
154 InputSource::FsPath(path) => {
155 if path.is_dir() {
156 let walk = match Self::walk(path, file_extensions, skip_hidden, skip_ignored) {
157 Ok(x) => x,
158 Err(e) => {
159 return Box::pin(once(async move { Err(e) }));
160 }
161 };
162
163 Box::pin(try_stream! {
164 for entry in walk {
165 let entry = entry?;
166 if Self::is_excluded_path(entry.path(), excluded_paths)
167 {
168 continue;
169 }
170
171 match entry.file_type() {
172 None => continue,
173 Some(file_type) => {
174 if !file_type.is_file() {
175 continue;
176 }
177 }
178 }
179
180 yield ResolvedInputSource::FsPath(
181 entry.path().to_path_buf()
182 );
183 }
184 })
185 } else {
186 // For individual files, yield if not excluded.
187 //
188 // We do not filter by extension here, as individual
189 // files should always be checked, no matter if their
190 // extension matches or not.
191 //
192 // This follows the principle of least surprise because
193 // the user explicitly specified the file, so they
194 // expect it to be checked.
195 if Self::is_excluded_path(path, excluded_paths) {
196 Box::pin(futures::stream::empty())
197 } else {
198 let path = path.clone();
199 Box::pin(once(async move { Ok(ResolvedInputSource::FsPath(path)) }))
200 }
201 }
202 }
203 InputSource::Stdin => Box::pin(once(async move { Ok(ResolvedInputSource::Stdin) })),
204 InputSource::String(s) => {
205 let s = s.clone();
206 Box::pin(once(async move { Ok(ResolvedInputSource::String(s)) }))
207 }
208 }
209 }
210
211 /// Check if the given path was excluded from link checking
212 fn is_excluded_path(path: &Path, excluded_paths: &PathExcludes) -> bool {
213 excluded_paths.is_match(&path.to_string_lossy())
214 }
215}