1use super::InputResolver;
7use super::content::InputContent;
8use super::source::InputSource;
9use super::source::ResolvedInputSource;
10use crate::filter::PathExcludes;
11use crate::types::FileType;
12use crate::types::file::FileExtensions;
13use crate::types::resolver::UrlContentResolver;
14use crate::{ErrorKind, Result};
15use async_stream::try_stream;
16use futures::stream::{Stream, StreamExt};
17use glob::glob_with;
18use ignore::WalkBuilder;
19use reqwest::Url;
20use shellexpand::tilde;
21use std::path::{Path, PathBuf};
22use tokio::io::{AsyncReadExt, stdin};
23
24const STDIN: &str = "-";
25
26#[derive(Clone, Debug, PartialEq, Eq, Hash)]
28pub struct Input {
29 pub source: InputSource,
31
32 pub file_type_hint: Option<FileType>,
37}
38
39impl Input {
40 pub fn new(
50 input: &str,
51 file_type_hint: Option<FileType>,
52 glob_ignore_case: bool,
53 ) -> Result<Self> {
54 let source = if input == STDIN {
55 InputSource::Stdin
56 } else {
57 match Url::parse(input) {
59 Ok(url) if url.scheme() == "http" || url.scheme() == "https" => {
63 InputSource::RemoteUrl(Box::new(url))
64 }
65 Ok(_) => {
66 return Err(ErrorKind::InvalidFile(PathBuf::from(input)));
68 }
69 _ => {
70 let is_glob = glob::Pattern::escape(input) != input;
72
73 if is_glob {
74 InputSource::FsGlob {
75 pattern: input.to_owned(),
76 ignore_case: glob_ignore_case,
77 }
78 } else {
79 let path = PathBuf::from(input);
81
82 #[cfg(windows)]
86 if path.exists() {
87 InputSource::FsPath(path)
89 } else {
90 return Err(ErrorKind::InvalidFile(path));
93 }
94
95 #[cfg(unix)]
96 if path.exists() {
97 InputSource::FsPath(path)
98 } else if input.starts_with('~') || input.starts_with('.') {
99 return Err(ErrorKind::InvalidFile(path));
108 } else {
109 let url = Url::parse(&format!("http://{input}")).map_err(|e| {
119 ErrorKind::ParseUrl(e, "Input is not a valid URL".to_string())
120 })?;
121 InputSource::RemoteUrl(Box::new(url))
122 }
123 }
124 }
125 }
126 };
127 Ok(Self {
128 source,
129 file_type_hint,
130 })
131 }
132
133 pub fn from_value(value: &str) -> Result<Self> {
141 Self::new(value, None, false)
142 }
143
144 #[must_use]
148 pub const fn from_input_source(source: InputSource) -> Self {
149 Self {
150 source,
151 file_type_hint: None,
152 }
153 }
154
155 pub fn get_contents(
166 self,
167 skip_missing: bool,
168 skip_hidden: bool,
169 skip_gitignored: bool,
170 file_extensions: FileExtensions,
171 resolver: UrlContentResolver,
172 excluded_paths: PathExcludes,
173 ) -> impl Stream<Item = Result<InputContent>> {
174 try_stream! {
175 match self.source {
177 InputSource::RemoteUrl(url) => {
178 match resolver.url_contents(*url).await {
179 Err(_) if skip_missing => (),
180 Err(e) => Err(e)?,
181 Ok(content) => yield content,
182 }
183 return;
184 }
185 InputSource::Stdin => {
186 yield Self::stdin_content(self.file_type_hint).await?;
187 return;
188 }
189 InputSource::String(ref s) => {
190 yield Self::string_content(s, self.file_type_hint);
191 return;
192 }
193 _ => {}
194 }
195
196 let mut sources_stream = Box::pin(InputResolver::resolve(
198 &self,
199 file_extensions,
200 skip_hidden,
201 skip_gitignored,
202 &excluded_paths,
203 ));
204
205 let mut sources_empty = true;
206
207 while let Some(source_result) = sources_stream.next().await {
208 match source_result {
209 Ok(source) => {
210 let content_result = match source {
211 ResolvedInputSource::FsPath(path) => {
212 Self::path_content(&path).await
213 },
214 ResolvedInputSource::RemoteUrl(url) => {
215 resolver.url_contents(*url).await
216 },
217 ResolvedInputSource::Stdin => {
218 Self::stdin_content(self.file_type_hint).await
219 },
220 ResolvedInputSource::String(s) => {
221 Ok(Self::string_content(&s, self.file_type_hint))
222 },
223 };
224
225 match content_result {
226 Err(_) if skip_missing => (),
227 Err(e) if matches!(&e, ErrorKind::ReadFileInput(io_err, _) if io_err.kind() == std::io::ErrorKind::InvalidData) => {
228 if let ErrorKind::ReadFileInput(_, path) = &e {
230 log::warn!("Skipping file with invalid UTF-8 content: {}", path.display());
231 }
232 },
233 Err(e) => Err(e)?,
234 Ok(content) => {
235 sources_empty = false;
236 yield content
237 }
238 }
239 },
240 Err(e) => Err(e)?,
241 }
242 }
243
244 if sources_empty {
245 log::warn!("{}: No files found for this input source", self.source);
246 }
247 }
248 }
249
250 fn walk_entries(
252 path: &Path,
253 file_extensions: FileExtensions,
254 skip_hidden: bool,
255 skip_gitignored: bool,
256 ) -> Result<ignore::Walk> {
257 Ok(WalkBuilder::new(path)
258 .standard_filters(skip_gitignored)
261 .hidden(skip_hidden)
263 .types(file_extensions.try_into()?)
265 .build())
266 }
267
268 pub fn get_sources(
291 self,
292 file_extensions: FileExtensions,
293 skip_hidden: bool,
294 skip_gitignored: bool,
295 excluded_paths: &PathExcludes,
296 ) -> impl Stream<Item = Result<String>> {
297 try_stream! {
298 match self.source {
299 InputSource::RemoteUrl(url) => yield url.to_string(),
300 InputSource::FsGlob {
301 ref pattern,
302 ignore_case,
303 } => {
304 let glob_expanded = tilde(&pattern).to_string();
305 let mut match_opts = glob::MatchOptions::new();
306 match_opts.case_sensitive = !ignore_case;
307 for entry in glob_with(&glob_expanded, match_opts)? {
308 match entry {
309 Ok(path) => {
310 if !Self::is_excluded_path(&path, excluded_paths) {
311 yield path.to_string_lossy().to_string();
312 }
313 },
314 Err(e) => eprintln!("{e:?}"),
315 }
316 }
317 }
318 InputSource::FsPath(ref path) => {
319 if path.is_dir() {
320 for entry in Input::walk_entries(
321 path,
322 file_extensions,
323 skip_hidden,
324 skip_gitignored,
325 )? {
326 let entry = entry?;
327 if !Self::is_excluded_path(entry.path(), excluded_paths) {
328 if entry.file_type().is_some_and(|ft| ft.is_file()) {
330 yield entry.path().to_string_lossy().to_string();
331 }
332 }
333 }
334 } else if !Self::is_excluded_path(path, excluded_paths) {
335 yield path.to_string_lossy().to_string();
336 }
337 }
338 InputSource::Stdin => yield "<stdin>".into(),
339 InputSource::String(_) => yield "<raw string>".into(),
340 }
341 }
342 }
343
344 fn is_excluded_path(path: &Path, excluded_paths: &PathExcludes) -> bool {
346 excluded_paths.is_match(&path.to_string_lossy())
347 }
348
349 pub async fn path_content<P: Into<PathBuf> + AsRef<Path> + Clone>(
355 path: P,
356 ) -> Result<InputContent> {
357 let path = path.into();
358
359 let content = tokio::fs::read_to_string(&path)
360 .await
361 .map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))?;
362
363 let input_content = InputContent {
364 file_type: FileType::from(&path),
365 source: ResolvedInputSource::FsPath(path),
366 content,
367 };
368
369 Ok(input_content)
370 }
371
372 pub async fn stdin_content(file_type_hint: Option<FileType>) -> Result<InputContent> {
378 let mut content = String::new();
379 let mut stdin = stdin();
380 stdin.read_to_string(&mut content).await?;
381
382 let input_content = InputContent {
383 source: ResolvedInputSource::Stdin,
384 file_type: file_type_hint.unwrap_or_default(),
385 content,
386 };
387
388 Ok(input_content)
389 }
390
391 #[must_use]
393 pub fn string_content(s: &str, file_type_hint: Option<FileType>) -> InputContent {
394 InputContent::from_string(s, file_type_hint.unwrap_or_default())
395 }
396}
397
398impl TryFrom<&str> for Input {
399 type Error = crate::ErrorKind;
400
401 fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
402 Self::from_value(value)
403 }
404}
405
406#[cfg(test)]
407mod tests {
408 use super::*;
409 use crate::filter::PathExcludes;
410
411 pub fn is_excluded_path(excluded_paths: &PathExcludes, path: &Path) -> bool {
413 excluded_paths.is_match(&path.to_string_lossy())
414 }
415
416 #[test]
417 fn test_input_handles_real_relative_paths() {
418 let test_file = "./Cargo.toml";
419 let path = Path::new(test_file);
420
421 assert!(path.exists());
422 assert!(path.is_relative());
423
424 let input = Input::new(test_file, None, false);
425 assert!(input.is_ok());
426 assert!(matches!(
427 input,
428 Ok(Input {
429 source: InputSource::FsPath(PathBuf { .. }),
430 file_type_hint: None,
431 })
432 ));
433 }
434
435 #[test]
436 fn test_input_handles_nonexistent_relative_paths() {
437 let test_file = "./nonexistent/relative/path";
438 let path = Path::new(test_file);
439
440 assert!(!path.exists());
441 assert!(path.is_relative());
442
443 let input = Input::from_value(test_file);
444 assert!(input.is_err());
445 assert!(matches!(input, Err(ErrorKind::InvalidFile(PathBuf { .. }))));
446 }
447
448 #[test]
449 fn test_no_exclusions() {
450 let dir = tempfile::tempdir().unwrap();
451 assert!(!is_excluded_path(&PathExcludes::empty(), dir.path()));
452 }
453
454 #[test]
455 fn test_excluded() {
456 let dir = tempfile::tempdir().unwrap();
457 let path = dir.path();
458 let excludes = PathExcludes::new([path.to_string_lossy()]).unwrap();
459 assert!(is_excluded_path(&excludes, path));
460 }
461
462 #[test]
463 fn test_excluded_subdir() {
464 let parent_dir = tempfile::tempdir().unwrap();
465 let parent = parent_dir.path();
466 let child_dir = tempfile::tempdir_in(parent).unwrap();
467 let child = child_dir.path();
468
469 let excludes = PathExcludes::new([parent.to_string_lossy()]).unwrap();
470 assert!(is_excluded_path(&excludes, child));
471 }
472
473 #[test]
474 fn test_url_without_scheme() {
475 let input = Input::from_value("example.com");
476 assert_eq!(
477 input.unwrap().source.to_string(),
478 String::from("http://example.com/")
479 );
480 }
481
482 #[cfg(windows)]
484 #[test]
485 fn test_windows_style_filepath_not_existing() {
486 let input = Input::from_value("C:\\example\\project\\here");
487 assert!(input.is_err());
488 let input = input.unwrap_err();
489
490 match input {
491 ErrorKind::InvalidFile(_) => (),
492 _ => panic!("Should have received InvalidFile error"),
493 }
494 }
495
496 #[cfg(windows)]
498 #[test]
499 fn test_windows_style_filepath_existing() {
500 use std::env::temp_dir;
501 use tempfile::NamedTempFile;
502
503 let dir = temp_dir();
504 let file = NamedTempFile::new_in(dir).unwrap();
505 let path = file.path();
506 let input = Input::from_value(path.to_str().unwrap()).unwrap();
507
508 match input.source {
509 InputSource::FsPath(_) => (),
510 _ => panic!("Input source should be FsPath but was not"),
511 }
512 }
513
514 #[test]
515 fn test_url_scheme_check_succeeding() {
516 assert!(matches!(
518 Input::from_value("http://example.com"),
519 Ok(Input {
520 source: InputSource::RemoteUrl(_),
521 ..
522 })
523 ));
524 assert!(matches!(
525 Input::from_value("https://example.com"),
526 Ok(Input {
527 source: InputSource::RemoteUrl(_),
528 ..
529 })
530 ));
531 assert!(matches!(
532 Input::from_value("http://subdomain.example.com/path?query=value",),
533 Ok(Input {
534 source: InputSource::RemoteUrl(_),
535 ..
536 })
537 ));
538 assert!(matches!(
539 Input::from_value("https://example.com:8080"),
540 Ok(Input {
541 source: InputSource::RemoteUrl(_),
542 ..
543 })
544 ));
545 }
546
547 #[test]
548 fn test_url_scheme_check_failing() {
549 assert!(matches!(
551 Input::from_value("ftp://example.com"),
552 Err(ErrorKind::InvalidFile(_))
553 ));
554 assert!(matches!(
555 Input::from_value("httpx://example.com"),
556 Err(ErrorKind::InvalidFile(_))
557 ));
558 assert!(matches!(
559 Input::from_value("file:///path/to/file"),
560 Err(ErrorKind::InvalidFile(_))
561 ));
562 assert!(matches!(
563 Input::from_value("mailto:user@example.com"),
564 Err(ErrorKind::InvalidFile(_))
565 ));
566 }
567
568 #[test]
569 fn test_non_url_inputs() {
570 assert!(matches!(
572 Input::from_value("./local/path"),
573 Err(ErrorKind::InvalidFile(_))
574 ));
575 assert!(matches!(
576 Input::from_value("*.md"),
577 Ok(Input {
578 source: InputSource::FsGlob { .. },
579 ..
580 })
581 ));
582 assert!(matches!(
584 Input::from_value("."),
585 Ok(Input {
586 source: InputSource::FsPath(_),
587 ..
588 })
589 ));
590 }
591}