1use super::InputResolver;
7use super::content::InputContent;
8use super::source::{InputSource, ResolvedInputSource};
9use crate::Preprocessor;
10use crate::filter::PathExcludes;
11use crate::types::{FileType, RequestError, file::FileExtensions, resolver::UrlContentResolver};
12use crate::{ErrorKind, LycheeResult};
13use async_stream::try_stream;
14use futures::stream::{Stream, StreamExt};
15use log::debug;
16use std::io::IsTerminal;
17use std::path::{Path, PathBuf};
18use tokio::io::{AsyncReadExt, stdin};
19
20#[derive(Clone, Debug, PartialEq, Eq, Hash)]
22pub struct Input {
23 pub source: InputSource,
25
26 pub file_type_hint: Option<FileType>,
31}
32
33impl Input {
34 pub fn new(
44 input: &str,
45 file_type_hint: Option<FileType>,
46 glob_ignore_case: bool,
47 ) -> LycheeResult<Self> {
48 let source = InputSource::new(input, glob_ignore_case)?;
49 Ok(Self {
50 source,
51 file_type_hint,
52 })
53 }
54
55 pub fn from_value(value: &str) -> LycheeResult<Self> {
63 Self::new(value, None, false)
64 }
65
66 #[must_use]
70 pub const fn from_input_source(source: InputSource) -> Self {
71 Self {
72 source,
73 file_type_hint: None,
74 }
75 }
76
77 #[allow(
88 clippy::too_many_arguments,
89 reason = "https://github.com/lycheeverse/lychee/issues/1898"
90 )]
91 pub fn get_contents(
92 self,
93 skip_missing: bool,
94 skip_hidden: bool,
95 skip_ignored: bool,
96 file_extensions: FileExtensions,
97 resolver: UrlContentResolver,
98 excluded_paths: PathExcludes,
99 preprocessor: Option<Preprocessor>,
100 ) -> impl Stream<Item = Result<InputContent, RequestError>> {
101 try_stream! {
102 let source = self.source.clone();
103
104 let user_input_error =
105 move |e: ErrorKind| RequestError::UserInputContent(source.clone(), e.into());
106 let discovered_input_error =
107 |e: ErrorKind| RequestError::GetInputContent(self.source.clone(), e.into());
108
109 match self.source {
115 InputSource::RemoteUrl(url) => {
116 match resolver.url_contents(*url).await {
117 Err(_) if skip_missing => (),
118 Err(e) => Err(user_input_error(e))?,
119 Ok(content) => yield content,
120 }
121 return;
122 }
123 InputSource::FsPath(ref path) => {
124 let is_readable = if path.is_dir() {
127 path.read_dir()
128 .map(|_| ())
129 .map_err(|e| ErrorKind::DirTraversal(ignore::Error::Io(e)))
130 } else {
131 path.metadata()
134 .map(|_| ())
135 .map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))
136 };
137 is_readable.map_err(user_input_error)?;
138 }
139 InputSource::Stdin => {
140 yield Self::stdin_content(self.file_type_hint)
141 .await
142 .map_err(user_input_error)?;
143 return;
144 }
145 InputSource::String(ref s) => {
146 yield Self::string_content(s, self.file_type_hint);
147 return;
148 }
149 _ => {}
150 }
151
152 let mut sources_stream = InputResolver::resolve(
154 &self,
155 file_extensions,
156 skip_hidden,
157 skip_ignored,
158 &excluded_paths,
159 );
160
161 let mut sources_empty = true;
162
163 while let Some(source_result) = sources_stream.next().await {
164 match source_result {
165 Ok(source) => {
166 let content_result = match source {
167 ResolvedInputSource::FsPath(path) => {
168 Self::path_content(&path, preprocessor.as_ref()).await
169 },
170 ResolvedInputSource::RemoteUrl(url) => {
171 resolver.url_contents(*url).await
172 }
173 ResolvedInputSource::Stdin => {
174 Self::stdin_content(self.file_type_hint).await
175 }
176 ResolvedInputSource::String(s) => {
177 Ok(Self::string_content(&s, self.file_type_hint))
178 }
179 };
180
181 match content_result {
182 Err(_) if skip_missing => (),
183 Err(e) if matches!(&e, ErrorKind::ReadFileInput(io_err, _) if io_err.kind() == std::io::ErrorKind::InvalidData) =>
184 {
185 if let ErrorKind::ReadFileInput(_, path) = &e {
187 log::warn!(
188 "Skipping file with invalid UTF-8 content: {}",
189 path.display()
190 );
191 }
192 }
193 Err(e) => Err(discovered_input_error(e))?,
194 Ok(content) => {
195 sources_empty = false;
196 yield content
197 }
198 }
199 }
200 Err(e) => Err(discovered_input_error(e))?,
201 }
202 }
203
204 if sources_empty {
205 log::warn!("{}: No files found for this input source", self.source);
206 }
207 }
208 }
209
210 pub fn get_sources(
223 self,
224 file_extensions: FileExtensions,
225 skip_hidden: bool,
226 skip_ignored: bool,
227 excluded_paths: &PathExcludes,
228 ) -> impl Stream<Item = LycheeResult<String>> {
229 InputResolver::resolve(
230 &self,
231 file_extensions,
232 skip_hidden,
233 skip_ignored,
234 excluded_paths,
235 )
236 .map(|res| {
237 res.map(|src| match src {
238 ResolvedInputSource::FsPath(path) => path.to_string_lossy().to_string(),
239 ResolvedInputSource::RemoteUrl(url) => url.to_string(),
240 ResolvedInputSource::Stdin => "<stdin>".to_string(),
241 ResolvedInputSource::String(_) => "<raw string>".to_string(),
242 })
243 })
244 }
245
246 pub async fn path_content<P: Into<PathBuf> + AsRef<Path> + Clone>(
253 path: P,
254 preprocessor: Option<&Preprocessor>,
255 ) -> LycheeResult<InputContent> {
256 let path = path.into();
257 let content = Self::get_content(&path, preprocessor).await?;
258
259 Ok(InputContent {
260 file_type: FileType::from(&path),
261 source: ResolvedInputSource::FsPath(path),
262 content,
263 })
264 }
265
266 pub async fn stdin_content(file_type_hint: Option<FileType>) -> LycheeResult<InputContent> {
272 let mut content = String::new();
273 let mut stdin = stdin();
274
275 if std::io::stdin().is_terminal() {
276 debug!("Reading content from stdin");
278 }
279 stdin.read_to_string(&mut content).await?;
280
281 let input_content = InputContent {
282 source: ResolvedInputSource::Stdin,
283 file_type: file_type_hint.unwrap_or_default(),
284 content,
285 };
286
287 Ok(input_content)
288 }
289
290 #[must_use]
292 pub fn string_content(s: &str, file_type_hint: Option<FileType>) -> InputContent {
293 InputContent::from_string(s, file_type_hint.unwrap_or_default())
294 }
295
296 async fn get_content(
299 path: &PathBuf,
300 preprocessor: Option<&Preprocessor>,
301 ) -> LycheeResult<String> {
302 if let Some(pre) = preprocessor {
303 pre.process(path)
304 } else {
305 Ok(tokio::fs::read_to_string(path)
306 .await
307 .map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))?)
308 }
309 }
310}
311
312impl TryFrom<&str> for Input {
313 type Error = crate::ErrorKind;
314
315 fn try_from(value: &str) -> Result<Self, Self::Error> {
316 Self::from_value(value)
317 }
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323 use crate::filter::PathExcludes;
324
325 pub fn is_excluded_path(excluded_paths: &PathExcludes, path: &Path) -> bool {
327 excluded_paths.is_match(&path.to_string_lossy())
328 }
329
330 #[test]
331 fn test_input_handles_real_relative_paths() {
332 let test_file = "./Cargo.toml";
333 let path = Path::new(test_file);
334
335 assert!(path.exists());
336 assert!(path.is_relative());
337
338 let input = Input::new(test_file, None, false);
339 assert!(input.is_ok());
340 assert!(matches!(
341 input,
342 Ok(Input {
343 source: InputSource::FsPath(PathBuf { .. }),
344 file_type_hint: None,
345 })
346 ));
347 }
348
349 #[test]
350 fn test_input_handles_nonexistent_relative_paths() {
351 let test_file = "./nonexistent/relative/path";
352 let path = Path::new(test_file);
353
354 assert!(!path.exists());
355 assert!(path.is_relative());
356
357 let input = Input::from_value(test_file);
358 assert!(input.is_err());
359 assert!(matches!(input, Err(ErrorKind::InvalidInput(_))));
360 }
361
362 #[test]
363 fn test_no_exclusions() {
364 let dir = tempfile::tempdir().unwrap();
365 assert!(!is_excluded_path(&PathExcludes::empty(), dir.path()));
366 }
367
368 #[test]
369 fn test_excluded() {
370 let dir = tempfile::tempdir().unwrap();
371 let path = dir.path();
372 let excludes = PathExcludes::new([path.to_string_lossy()]).unwrap();
373 assert!(is_excluded_path(&excludes, path));
374 }
375
376 #[test]
377 fn test_excluded_subdir() {
378 let parent_dir = tempfile::tempdir().unwrap();
379 let parent = parent_dir.path();
380 let child_dir = tempfile::tempdir_in(parent).unwrap();
381 let child = child_dir.path();
382
383 let excludes = PathExcludes::new([parent.to_string_lossy()]).unwrap();
384 assert!(is_excluded_path(&excludes, child));
385 }
386
387 #[test]
388 fn test_url_without_scheme() {
389 let input = Input::from_value("example.com");
390 assert!(matches!(input, Err(ErrorKind::InvalidInput(_))));
391 if let Err(error) = input {
392 let error_msg = error.to_string();
393 assert!(error_msg.contains("Use full URL"));
394 }
395 }
396
397 #[cfg(windows)]
399 #[test]
400 fn test_windows_style_filepath_not_existing() {
401 let input = Input::from_value("C:\\example\\project\\here");
402 assert!(input.is_err());
403 let input = input.unwrap_err();
404
405 match input {
406 ErrorKind::InvalidInput(_) => (),
407 _ => panic!("Should have received InvalidInput error, got: {input:?}"),
408 }
409 }
410
411 #[cfg(windows)]
413 #[test]
414 fn test_windows_style_filepath_existing() {
415 use std::env::temp_dir;
416 use tempfile::NamedTempFile;
417
418 let dir = temp_dir();
419 let file = NamedTempFile::new_in(dir).unwrap();
420 let path = file.path();
421 let input = Input::from_value(path.to_str().unwrap()).unwrap();
422
423 match input.source {
424 InputSource::FsPath(_) => (),
425 _ => panic!("Input source should be FsPath but was not"),
426 }
427 }
428
429 #[test]
430 fn test_url_scheme_check_succeeding() {
431 assert!(matches!(
433 Input::from_value("http://example.com"),
434 Ok(Input {
435 source: InputSource::RemoteUrl(_),
436 ..
437 })
438 ));
439 assert!(matches!(
440 Input::from_value("https://example.com"),
441 Ok(Input {
442 source: InputSource::RemoteUrl(_),
443 ..
444 })
445 ));
446 assert!(matches!(
447 Input::from_value("http://subdomain.example.com/path?query=value",),
448 Ok(Input {
449 source: InputSource::RemoteUrl(_),
450 ..
451 })
452 ));
453 assert!(matches!(
454 Input::from_value("https://example.com:8080"),
455 Ok(Input {
456 source: InputSource::RemoteUrl(_),
457 ..
458 })
459 ));
460 }
461
462 #[test]
463 fn test_url_scheme_check_passing() {
464 assert!(matches!(
466 Input::from_value("ftp://example.com"),
467 Ok(Input {
468 source: InputSource::RemoteUrl(_),
469 ..
470 })
471 ));
472 assert!(matches!(
473 Input::from_value("httpx://example.com"),
474 Ok(Input {
475 source: InputSource::RemoteUrl(_),
476 ..
477 })
478 ));
479 assert!(matches!(
480 Input::from_value("file:///path/to/file"),
481 Ok(Input {
482 source: InputSource::RemoteUrl(_),
483 ..
484 })
485 ));
486 assert!(matches!(
487 Input::from_value("mailto:user@example.com"),
488 Ok(Input {
489 source: InputSource::RemoteUrl(_),
490 ..
491 })
492 ));
493 }
494
495 #[test]
496 fn test_non_url_inputs() {
497 assert!(matches!(
499 Input::from_value("./local/path"),
500 Err(ErrorKind::InvalidInput(_))
501 ));
502 assert!(matches!(
503 Input::from_value("*.md"),
504 Ok(Input {
505 source: InputSource::FsGlob { .. },
506 ..
507 })
508 ));
509 assert!(matches!(
511 Input::from_value("."),
512 Ok(Input {
513 source: InputSource::FsPath(_),
514 ..
515 })
516 ));
517 }
518}