Skip to main content

lychee_lib/types/input/
resolver.rs

1//! Input source resolution.
2//!
3//! Provides the `InputResolver` which handles resolution of various input sources
4//! into concrete, processable sources by expanding glob patterns and applying filters.
5
6use super::input::Input;
7use super::source::{InputSource, ResolvedInputSource};
8use crate::Result;
9use crate::filter::PathExcludes;
10use crate::types::file::FileExtensions;
11use async_stream::try_stream;
12use futures::stream::Stream;
13use futures::stream::once;
14use glob::glob_with;
15use ignore::{Walk, WalkBuilder};
16use shellexpand::tilde;
17use std::path::Path;
18use std::pin::Pin;
19
20/// Resolves input sources into concrete, processable sources.
21///
22/// Handles expansion of glob patterns and filtering based on exclusion rules.
23#[derive(Copy, Clone, Debug)]
24pub struct InputResolver;
25
26impl InputResolver {
27    /// Resolve an input into a stream of concrete input sources.
28    ///
29    /// This returns a stream of resolved input sources for the given input,
30    /// taking into account the matching file extensions and respecting
31    /// exclusions. Glob patterns are expanded into individual file paths.
32    ///
33    /// # Returns
34    ///
35    /// Returns a stream of `Result<ResolvedInputSource>` for all matching input
36    /// sources. Glob patterns are expanded, so `FsGlob` never appears in the
37    /// output.
38    ///
39    /// # Errors
40    ///
41    /// Returns an error (within the stream) if:
42    /// - The glob pattern is invalid or expansion encounters I/O errors
43    /// - Directory traversal fails, including:
44    ///   - Permission denied when accessing directories or files
45    ///   - I/O errors while reading directory contents
46    ///   - Filesystem errors (disk errors, network filesystem issues, etc.)
47    ///   - Invalid file paths or symbolic link resolution failures
48    /// - Errors when reading or evaluating `.gitignore` or `.ignore` files
49    /// - Errors occur during file extension or path exclusion evaluation
50    ///
51    /// Once an error is returned, resolution of that input source halts
52    /// and no further `Ok(ResolvedInputSource)` will be produced.
53    #[must_use]
54    pub fn resolve<'a>(
55        input: &Input,
56        file_extensions: FileExtensions,
57        skip_hidden: bool,
58        skip_ignored: bool,
59        excluded_paths: &'a PathExcludes,
60    ) -> Pin<Box<dyn Stream<Item = Result<ResolvedInputSource>> + Send + 'a>> {
61        Self::resolve_input(
62            input,
63            file_extensions,
64            skip_hidden,
65            skip_ignored,
66            excluded_paths,
67        )
68    }
69
70    /// Create a [`Walk`] iterator for directory traversal
71    ///
72    /// # Errors
73    ///
74    /// Fails if [`FileExtensions`] cannot be converted
75    pub(crate) fn walk(
76        path: &Path,
77        file_extensions: FileExtensions,
78        skip_hidden: bool,
79        skip_ignored: bool,
80    ) -> Result<Walk> {
81        Ok(WalkBuilder::new(path)
82            // Skip over files which are ignored by git or `.ignore` if necessary
83            .git_ignore(skip_ignored)
84            .git_global(skip_ignored)
85            .git_exclude(skip_ignored)
86            .ignore(skip_ignored)
87            .parents(skip_ignored)
88            // Ignore hidden files if necessary
89            .hidden(skip_hidden)
90            // Configure the file types filter to only include files with matching extensions
91            .types(file_extensions.build(skip_hidden)?)
92            .build())
93    }
94
95    /// Internal method for resolving input sources.
96    ///
97    /// Takes an Input and returns a stream of `ResolvedInputSource` items,
98    /// expanding glob patterns and applying filtering based on the provided
99    /// configuration.
100    fn resolve_input<'a>(
101        input: &Input,
102        file_extensions: FileExtensions,
103        skip_hidden: bool,
104        skip_ignored: bool,
105        excluded_paths: &'a PathExcludes,
106    ) -> Pin<Box<dyn Stream<Item = Result<ResolvedInputSource>> + Send + 'a>> {
107        match &input.source {
108            InputSource::RemoteUrl(url) => {
109                let url = url.clone();
110                Box::pin(once(async move { Ok(ResolvedInputSource::RemoteUrl(url)) }))
111            }
112            InputSource::FsGlob {
113                pattern,
114                ignore_case,
115            } => {
116                // NOTE: we convert the glob::Pattern back to str because
117                // `glob_with` only takes string arguments.
118                let glob_expanded = tilde(pattern.as_str()).to_string();
119                let mut match_opts = glob::MatchOptions::new();
120                match_opts.case_sensitive = !ignore_case;
121                // Match shell behavior: wildcards like `*` and `**` should not
122                // match hidden files/dirs by default.
123                match_opts.require_literal_leading_dot = skip_hidden;
124
125                Box::pin(try_stream! {
126                    // For glob patterns, we expand the pattern and yield
127                    // matching paths as ResolvedInputSource::FsPath items.
128                    for entry in glob_with(&glob_expanded, match_opts)? {
129                        match entry {
130                            Ok(path) => {
131                                // Skip directories or files that don't match
132                                // extensions
133                                if path.is_dir() {
134                                    continue;
135                                }
136                                if Self::is_excluded_path(&path, excluded_paths) {
137                                    continue;
138                                }
139
140                                // We do not filter by extensions here.
141                                //
142                                // Instead, we always check files captured by
143                                // the glob pattern, as the user explicitly
144                                // specified them.
145                                yield ResolvedInputSource::FsPath(path);
146                            }
147                            Err(e) => {
148                                eprintln!("Error in glob pattern: {e:?}");
149                            }
150                        }
151                    }
152                })
153            }
154            InputSource::FsPath(path) => {
155                if path.is_dir() {
156                    let walk = match Self::walk(path, file_extensions, skip_hidden, skip_ignored) {
157                        Ok(x) => x,
158                        Err(e) => {
159                            return Box::pin(once(async move { Err(e) }));
160                        }
161                    };
162
163                    Box::pin(try_stream! {
164                        for entry in walk {
165                            let entry = entry?;
166                            if Self::is_excluded_path(entry.path(), excluded_paths)
167                            {
168                                continue;
169                            }
170
171                            match entry.file_type() {
172                                None => continue,
173                                Some(file_type) => {
174                                    if !file_type.is_file() {
175                                        continue;
176                                    }
177                                }
178                            }
179
180                            yield ResolvedInputSource::FsPath(
181                                entry.path().to_path_buf()
182                            );
183                        }
184                    })
185                } else {
186                    // For individual files, yield if not excluded.
187                    //
188                    // We do not filter by extension here, as individual
189                    // files should always be checked, no matter if their
190                    // extension matches or not.
191                    //
192                    // This follows the principle of least surprise because
193                    // the user explicitly specified the file, so they
194                    // expect it to be checked.
195                    if Self::is_excluded_path(path, excluded_paths) {
196                        Box::pin(futures::stream::empty())
197                    } else {
198                        let path = path.clone();
199                        Box::pin(once(async move { Ok(ResolvedInputSource::FsPath(path)) }))
200                    }
201                }
202            }
203            InputSource::Stdin => Box::pin(once(async move { Ok(ResolvedInputSource::Stdin) })),
204            InputSource::String(s) => {
205                let s = s.clone();
206                Box::pin(once(async move { Ok(ResolvedInputSource::String(s)) }))
207            }
208        }
209    }
210
211    /// Check if the given path was excluded from link checking
212    fn is_excluded_path(path: &Path, excluded_paths: &PathExcludes) -> bool {
213        excluded_paths.is_match(&path.to_string_lossy())
214    }
215}