1use super::InputResolver;
7use super::content::InputContent;
8use super::source::{InputSource, ResolvedInputSource};
9use crate::Preprocessor;
10use crate::filter::PathExcludes;
11use crate::types::{FileType, RequestError, file::FileExtensions, resolver::UrlContentResolver};
12use crate::{ErrorKind, LycheeResult};
13use async_stream::try_stream;
14use futures::stream::{Stream, StreamExt};
15use log::debug;
16use std::path::{Path, PathBuf};
17use tokio::io::{AsyncReadExt, stdin};
18
19#[derive(Clone, Debug, PartialEq, Eq, Hash)]
21pub struct Input {
22 pub source: InputSource,
24
25 pub file_type_hint: Option<FileType>,
30}
31
32impl Input {
33 pub fn new(
43 input: &str,
44 file_type_hint: Option<FileType>,
45 glob_ignore_case: bool,
46 ) -> LycheeResult<Self> {
47 let source = InputSource::new(input, glob_ignore_case)?;
48 Ok(Self {
49 source,
50 file_type_hint,
51 })
52 }
53
54 pub fn from_value(value: &str) -> LycheeResult<Self> {
62 Self::new(value, None, false)
63 }
64
65 #[must_use]
69 pub const fn from_input_source(source: InputSource) -> Self {
70 Self {
71 source,
72 file_type_hint: None,
73 }
74 }
75
76 #[allow(
87 clippy::too_many_arguments,
88 reason = "https://github.com/lycheeverse/lychee/issues/1898"
89 )]
90 pub fn get_contents(
91 self,
92 skip_missing: bool,
93 skip_hidden: bool,
94 skip_ignored: bool,
95 file_extensions: FileExtensions,
96 resolver: UrlContentResolver,
97 excluded_paths: PathExcludes,
98 preprocessor: Option<Preprocessor>,
99 ) -> impl Stream<Item = Result<InputContent, RequestError>> {
100 try_stream! {
101 let source = self.source.clone();
102
103 let user_input_error =
104 move |e: ErrorKind| RequestError::UserInputContent(source.clone(), e);
105 let discovered_input_error =
106 |e: ErrorKind| RequestError::GetInputContent(self.source.clone(), e);
107
108 match self.source {
114 InputSource::RemoteUrl(url) => {
115 match resolver.url_contents(*url).await {
116 Err(_) if skip_missing => (),
117 Err(e) => Err(user_input_error(e))?,
118 Ok(content) => yield content,
119 }
120 return;
121 }
122 InputSource::FsPath(ref path) => {
123 let is_readable = if path.is_dir() {
124 path.read_dir()
125 .map(|_| ())
126 .map_err(|e| ErrorKind::DirTraversal(ignore::Error::Io(e)))
127 } else {
128 path.metadata()
132 .map(|_| ())
133 .map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))
134 };
135
136 is_readable.map_err(user_input_error)?;
137 }
138 InputSource::Stdin => {
139 yield Self::stdin_content(self.file_type_hint)
140 .await
141 .map_err(user_input_error)?;
142 return;
143 }
144 InputSource::String(ref s) => {
145 yield Self::string_content(s, self.file_type_hint);
146 return;
147 }
148 _ => {}
149 }
150
151 let mut sources_stream = InputResolver::resolve(
153 &self,
154 file_extensions,
155 skip_hidden,
156 skip_ignored,
157 &excluded_paths,
158 );
159
160 let mut sources_empty = true;
161
162 while let Some(source_result) = sources_stream.next().await {
163 match source_result {
164 Ok(source) => {
165 let content_result = match source {
166 ResolvedInputSource::FsPath(path) => {
167 Self::path_content(&path, preprocessor.as_ref()).await
168 },
169 ResolvedInputSource::RemoteUrl(url) => {
170 resolver.url_contents(*url).await
171 }
172 ResolvedInputSource::Stdin => {
173 Self::stdin_content(self.file_type_hint).await
174 }
175 ResolvedInputSource::String(s) => {
176 Ok(Self::string_content(&s, self.file_type_hint))
177 }
178 };
179
180 match content_result {
181 Err(_) if skip_missing => (),
182 Err(e) if matches!(&e, ErrorKind::ReadFileInput(io_err, _) if io_err.kind() == std::io::ErrorKind::InvalidData) =>
183 {
184 if let ErrorKind::ReadFileInput(_, path) = &e {
186 log::warn!(
187 "Skipping file with invalid UTF-8 content: {}",
188 path.display()
189 );
190 }
191 }
192 Err(e) => Err(discovered_input_error(e))?,
193 Ok(content) => {
194 sources_empty = false;
195 yield content
196 }
197 }
198 }
199 Err(e) => Err(discovered_input_error(e))?,
200 }
201 }
202
203 if sources_empty {
204 log::warn!("{}: No files found for this input source", self.source);
205 }
206 }
207 }
208
209 pub fn get_sources(
222 self,
223 file_extensions: FileExtensions,
224 skip_hidden: bool,
225 skip_ignored: bool,
226 excluded_paths: &PathExcludes,
227 ) -> impl Stream<Item = LycheeResult<String>> {
228 InputResolver::resolve(
229 &self,
230 file_extensions,
231 skip_hidden,
232 skip_ignored,
233 excluded_paths,
234 )
235 .map(|res| {
236 res.map(|src| match src {
237 ResolvedInputSource::FsPath(path) => path.to_string_lossy().to_string(),
238 ResolvedInputSource::RemoteUrl(url) => url.to_string(),
239 ResolvedInputSource::Stdin => "<stdin>".to_string(),
240 ResolvedInputSource::String(_) => "<raw string>".to_string(),
241 })
242 })
243 }
244
245 pub async fn path_content<P: Into<PathBuf> + AsRef<Path> + Clone>(
252 path: P,
253 preprocessor: Option<&Preprocessor>,
254 ) -> LycheeResult<InputContent> {
255 let path = path.into();
256 let content = Self::get_content(&path, preprocessor).await?;
257
258 Ok(InputContent {
259 file_type: FileType::from(&path),
260 source: ResolvedInputSource::FsPath(path),
261 content,
262 })
263 }
264
265 pub async fn stdin_content(file_type_hint: Option<FileType>) -> LycheeResult<InputContent> {
271 let mut content = String::new();
272 let mut stdin = stdin();
273
274 debug!("Reading content from stdin"); stdin.read_to_string(&mut content).await?;
276
277 let input_content = InputContent {
278 source: ResolvedInputSource::Stdin,
279 file_type: file_type_hint.unwrap_or_default(),
280 content,
281 };
282
283 Ok(input_content)
284 }
285
286 #[must_use]
288 pub fn string_content(s: &str, file_type_hint: Option<FileType>) -> InputContent {
289 InputContent::from_string(s, file_type_hint.unwrap_or_default())
290 }
291
292 async fn get_content(
295 path: &PathBuf,
296 preprocessor: Option<&Preprocessor>,
297 ) -> LycheeResult<String> {
298 if let Some(pre) = preprocessor {
299 pre.process(path)
300 } else {
301 Ok(tokio::fs::read_to_string(path)
302 .await
303 .map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))?)
304 }
305 }
306}
307
308impl TryFrom<&str> for Input {
309 type Error = crate::ErrorKind;
310
311 fn try_from(value: &str) -> Result<Self, Self::Error> {
312 Self::from_value(value)
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319 use crate::filter::PathExcludes;
320
321 pub fn is_excluded_path(excluded_paths: &PathExcludes, path: &Path) -> bool {
323 excluded_paths.is_match(&path.to_string_lossy())
324 }
325
326 #[test]
327 fn test_input_handles_real_relative_paths() {
328 let test_file = "./Cargo.toml";
329 let path = Path::new(test_file);
330
331 assert!(path.exists());
332 assert!(path.is_relative());
333
334 let input = Input::new(test_file, None, false);
335 assert!(input.is_ok());
336 assert!(matches!(
337 input,
338 Ok(Input {
339 source: InputSource::FsPath(PathBuf { .. }),
340 file_type_hint: None,
341 })
342 ));
343 }
344
345 #[test]
346 fn test_input_handles_nonexistent_relative_paths() {
347 let test_file = "./nonexistent/relative/path";
348 let path = Path::new(test_file);
349
350 assert!(!path.exists());
351 assert!(path.is_relative());
352
353 let input = Input::from_value(test_file);
354 assert!(input.is_err());
355 assert!(matches!(input, Err(ErrorKind::InvalidFile(PathBuf { .. }))));
356 }
357
358 #[test]
359 fn test_no_exclusions() {
360 let dir = tempfile::tempdir().unwrap();
361 assert!(!is_excluded_path(&PathExcludes::empty(), dir.path()));
362 }
363
364 #[test]
365 fn test_excluded() {
366 let dir = tempfile::tempdir().unwrap();
367 let path = dir.path();
368 let excludes = PathExcludes::new([path.to_string_lossy()]).unwrap();
369 assert!(is_excluded_path(&excludes, path));
370 }
371
372 #[test]
373 fn test_excluded_subdir() {
374 let parent_dir = tempfile::tempdir().unwrap();
375 let parent = parent_dir.path();
376 let child_dir = tempfile::tempdir_in(parent).unwrap();
377 let child = child_dir.path();
378
379 let excludes = PathExcludes::new([parent.to_string_lossy()]).unwrap();
380 assert!(is_excluded_path(&excludes, child));
381 }
382
383 #[test]
384 fn test_url_without_scheme() {
385 let input = Input::from_value("example.com");
386 assert_eq!(
387 input.unwrap().source.to_string(),
388 String::from("http://example.com/")
389 );
390 }
391
392 #[cfg(windows)]
394 #[test]
395 fn test_windows_style_filepath_not_existing() {
396 let input = Input::from_value("C:\\example\\project\\here");
397 assert!(input.is_err());
398 let input = input.unwrap_err();
399
400 match input {
401 ErrorKind::InvalidFile(_) => (),
402 _ => panic!("Should have received InvalidFile error"),
403 }
404 }
405
406 #[cfg(windows)]
408 #[test]
409 fn test_windows_style_filepath_existing() {
410 use std::env::temp_dir;
411 use tempfile::NamedTempFile;
412
413 let dir = temp_dir();
414 let file = NamedTempFile::new_in(dir).unwrap();
415 let path = file.path();
416 let input = Input::from_value(path.to_str().unwrap()).unwrap();
417
418 match input.source {
419 InputSource::FsPath(_) => (),
420 _ => panic!("Input source should be FsPath but was not"),
421 }
422 }
423
424 #[test]
425 fn test_url_scheme_check_succeeding() {
426 assert!(matches!(
428 Input::from_value("http://example.com"),
429 Ok(Input {
430 source: InputSource::RemoteUrl(_),
431 ..
432 })
433 ));
434 assert!(matches!(
435 Input::from_value("https://example.com"),
436 Ok(Input {
437 source: InputSource::RemoteUrl(_),
438 ..
439 })
440 ));
441 assert!(matches!(
442 Input::from_value("http://subdomain.example.com/path?query=value",),
443 Ok(Input {
444 source: InputSource::RemoteUrl(_),
445 ..
446 })
447 ));
448 assert!(matches!(
449 Input::from_value("https://example.com:8080"),
450 Ok(Input {
451 source: InputSource::RemoteUrl(_),
452 ..
453 })
454 ));
455 }
456
457 #[test]
458 fn test_url_scheme_check_failing() {
459 assert!(matches!(
461 Input::from_value("ftp://example.com"),
462 Err(ErrorKind::InvalidFile(_))
463 ));
464 assert!(matches!(
465 Input::from_value("httpx://example.com"),
466 Err(ErrorKind::InvalidFile(_))
467 ));
468 assert!(matches!(
469 Input::from_value("file:///path/to/file"),
470 Err(ErrorKind::InvalidFile(_))
471 ));
472 assert!(matches!(
473 Input::from_value("mailto:user@example.com"),
474 Err(ErrorKind::InvalidFile(_))
475 ));
476 }
477
478 #[test]
479 fn test_non_url_inputs() {
480 assert!(matches!(
482 Input::from_value("./local/path"),
483 Err(ErrorKind::InvalidFile(_))
484 ));
485 assert!(matches!(
486 Input::from_value("*.md"),
487 Ok(Input {
488 source: InputSource::FsGlob { .. },
489 ..
490 })
491 ));
492 assert!(matches!(
494 Input::from_value("."),
495 Ok(Input {
496 source: InputSource::FsPath(_),
497 ..
498 })
499 ));
500 }
501}