lychee_lib/types/input/
resolver.rs

1//! Input source resolution.
2//!
3//! Provides the `InputResolver` which handles resolution of various input sources
4//! into concrete, processable sources by expanding glob patterns and applying filters.
5
6use super::input::Input;
7use super::source::{InputSource, ResolvedInputSource};
8use crate::Result;
9use crate::filter::PathExcludes;
10use crate::types::file::FileExtensions;
11use async_stream::try_stream;
12use futures::stream::Stream;
13use futures::stream::once;
14use glob::glob_with;
15use ignore::{Walk, WalkBuilder};
16use shellexpand::tilde;
17use std::path::Path;
18use std::pin::Pin;
19
20/// Resolves input sources into concrete, processable sources.
21///
22/// Handles expansion of glob patterns and filtering based on exclusion rules.
23#[derive(Copy, Clone, Debug)]
24pub struct InputResolver;
25
26impl InputResolver {
27    /// Resolve an input into a stream of concrete input sources.
28    ///
29    /// This returns a stream of resolved input sources for the given input,
30    /// taking into account the matching file extensions and respecting
31    /// exclusions. Glob patterns are expanded into individual file paths.
32    ///
33    /// # Returns
34    ///
35    /// Returns a stream of `Result<ResolvedInputSource>` for all matching input
36    /// sources. Glob patterns are expanded, so `FsGlob` never appears in the
37    /// output.
38    ///
39    /// # Errors
40    ///
41    /// Returns an error (within the stream) if:
42    /// - The glob pattern is invalid or expansion encounters I/O errors
43    /// - Directory traversal fails, including:
44    ///   - Permission denied when accessing directories or files
45    ///   - I/O errors while reading directory contents
46    ///   - Filesystem errors (disk errors, network filesystem issues, etc.)
47    ///   - Invalid file paths or symbolic link resolution failures
48    /// - Errors when reading or evaluating `.gitignore` or `.ignore` files
49    /// - Errors occur during file extension or path exclusion evaluation
50    ///
51    /// Once an error is returned, resolution of that input source halts
52    /// and no further `Ok(ResolvedInputSource)` will be produced.
53    #[must_use]
54    pub fn resolve<'a>(
55        input: &Input,
56        file_extensions: FileExtensions,
57        skip_hidden: bool,
58        skip_ignored: bool,
59        excluded_paths: &'a PathExcludes,
60    ) -> Pin<Box<dyn Stream<Item = Result<ResolvedInputSource>> + Send + 'a>> {
61        Self::resolve_input(
62            input,
63            file_extensions,
64            skip_hidden,
65            skip_ignored,
66            excluded_paths,
67        )
68    }
69
70    /// Create a [`Walk`] iterator for directory traversal
71    ///
72    /// # Errors
73    ///
74    /// Fails if [`FileExtensions`] cannot be converted
75    pub(crate) fn walk(
76        path: &Path,
77        file_extensions: FileExtensions,
78        skip_hidden: bool,
79        skip_ignored: bool,
80    ) -> Result<Walk> {
81        Ok(WalkBuilder::new(path)
82            // Skip over files which are ignored by git or `.ignore` if necessary
83            .git_ignore(skip_ignored)
84            .git_global(skip_ignored)
85            .git_exclude(skip_ignored)
86            .ignore(skip_ignored)
87            .parents(skip_ignored)
88            // Ignore hidden files if necessary
89            .hidden(skip_hidden)
90            // Configure the file types filter to only include files with matching extensions
91            .types(file_extensions.build(skip_hidden)?)
92            .build())
93    }
94
95    /// Internal method for resolving input sources.
96    ///
97    /// Takes an Input and returns a stream of `ResolvedInputSource` items,
98    /// expanding glob patterns and applying filtering based on the provided
99    /// configuration.
100    fn resolve_input<'a>(
101        input: &Input,
102        file_extensions: FileExtensions,
103        skip_hidden: bool,
104        skip_ignored: bool,
105        excluded_paths: &'a PathExcludes,
106    ) -> Pin<Box<dyn Stream<Item = Result<ResolvedInputSource>> + Send + 'a>> {
107        match &input.source {
108            InputSource::RemoteUrl(url) => {
109                let url = url.clone();
110                Box::pin(once(async move { Ok(ResolvedInputSource::RemoteUrl(url)) }))
111            }
112            InputSource::FsGlob {
113                pattern,
114                ignore_case,
115            } => {
116                // NOTE: we convert the glob::Pattern back to str because
117                // `glob_with` only takes string arguments.
118                let glob_expanded = tilde(pattern.as_str()).to_string();
119                let mut match_opts = glob::MatchOptions::new();
120                match_opts.case_sensitive = !ignore_case;
121
122                Box::pin(try_stream! {
123                    // For glob patterns, we expand the pattern and yield
124                    // matching paths as ResolvedInputSource::FsPath items.
125                    for entry in glob_with(&glob_expanded, match_opts)? {
126                        match entry {
127                            Ok(path) => {
128                                // Skip directories or files that don't match
129                                // extensions
130                                if path.is_dir() {
131                                    continue;
132                                }
133                                if Self::is_excluded_path(&path, excluded_paths) {
134                                    continue;
135                                }
136
137                                // We do not filter by extensions here.
138                                //
139                                // Instead, we always check files captured by
140                                // the glob pattern, as the user explicitly
141                                // specified them.
142                                yield ResolvedInputSource::FsPath(path);
143                            }
144                            Err(e) => {
145                                eprintln!("Error in glob pattern: {e:?}");
146                            }
147                        }
148                    }
149                })
150            }
151            InputSource::FsPath(path) => {
152                if path.is_dir() {
153                    let walk = match Self::walk(path, file_extensions, skip_hidden, skip_ignored) {
154                        Ok(x) => x,
155                        Err(e) => {
156                            return Box::pin(once(async move { Err(e) }));
157                        }
158                    };
159
160                    Box::pin(try_stream! {
161                        for entry in walk {
162                            let entry = entry?;
163                            if Self::is_excluded_path(entry.path(), excluded_paths)
164                            {
165                                continue;
166                            }
167
168                            match entry.file_type() {
169                                None => continue,
170                                Some(file_type) => {
171                                    if !file_type.is_file() {
172                                        continue;
173                                    }
174                                }
175                            }
176
177                            yield ResolvedInputSource::FsPath(
178                                entry.path().to_path_buf()
179                            );
180                        }
181                    })
182                } else {
183                    // For individual files, yield if not excluded.
184                    //
185                    // We do not filter by extension here, as individual
186                    // files should always be checked, no matter if their
187                    // extension matches or not.
188                    //
189                    // This follows the principle of least surprise because
190                    // the user explicitly specified the file, so they
191                    // expect it to be checked.
192                    if Self::is_excluded_path(path, excluded_paths) {
193                        Box::pin(futures::stream::empty())
194                    } else {
195                        let path = path.clone();
196                        Box::pin(once(async move { Ok(ResolvedInputSource::FsPath(path)) }))
197                    }
198                }
199            }
200            InputSource::Stdin => Box::pin(once(async move { Ok(ResolvedInputSource::Stdin) })),
201            InputSource::String(s) => {
202                let s = s.clone();
203                Box::pin(once(async move { Ok(ResolvedInputSource::String(s)) }))
204            }
205        }
206    }
207
208    /// Check if the given path was excluded from link checking
209    fn is_excluded_path(path: &Path, excluded_paths: &PathExcludes) -> bool {
210        excluded_paths.is_match(&path.to_string_lossy())
211    }
212}