lychee_lib/types/input/source.rs
1//! Input source type definitions.
2//!
3//! lychee can handle different kinds of input sources:
4//! - URLs (of HTTP/HTTPS scheme)
5//! - File system paths (to files or directories)
6//! - Unix shell-style glob patterns (e.g. `./docs/**/*.md`)
7//! - Standard input (`stdin`)
8//! - Raw strings (UTF-8 only for now)
9//!
10//! Each input source is handled differently:
11//! - File paths are walked (if they are directories) and filtered by
12//! extension
13//! - Glob patterns are expanded to matching file paths, which are then walked
14//! and filtered by extension
15//! - URLs, raw strings, and standard input (`stdin`) are read directly
16
17use reqwest::Url;
18use serde::{Deserialize, Serialize};
19use std::borrow::Cow;
20use std::fmt::Display;
21use std::path::PathBuf;
22
23/// Input types which lychee supports
24#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize)]
25#[non_exhaustive]
26pub enum InputSource {
27 /// URL (of HTTP/HTTPS scheme).
28 RemoteUrl(Box<Url>),
29 /// Unix shell-style glob pattern.
30 FsGlob {
31 /// The glob pattern matching all input files
32 pattern: String,
33 /// Don't be case sensitive when matching files against a glob pattern
34 ignore_case: bool,
35 },
36 /// File path.
37 FsPath(PathBuf),
38 /// Standard Input.
39 Stdin,
40 /// Raw string input.
41 String(Cow<'static, str>),
42}
43
44/// Resolved input sources that can be processed for content.
45///
46/// This represents input sources after glob pattern expansion.
47/// It is identical to `InputSource`, except that glob patterns
48/// have been resolved to concrete file paths.
49///
50/// We use a separate type to avoid handling the (no longer applicable)
51/// glob case in downstream processing.
52#[derive(Debug, Clone, PartialEq, Eq, Hash)]
53pub enum ResolvedInputSource {
54 /// URL (of HTTP/HTTPS scheme).
55 RemoteUrl(Box<Url>),
56 /// File path.
57 FsPath(PathBuf),
58 /// Standard Input.
59 Stdin,
60 /// Raw string input.
61 String(Cow<'static, str>),
62}
63
64impl From<ResolvedInputSource> for InputSource {
65 fn from(resolved: ResolvedInputSource) -> Self {
66 match resolved {
67 ResolvedInputSource::RemoteUrl(url) => InputSource::RemoteUrl(url),
68 ResolvedInputSource::FsPath(path) => InputSource::FsPath(path),
69 ResolvedInputSource::Stdin => InputSource::Stdin,
70 ResolvedInputSource::String(s) => InputSource::String(s),
71 }
72 }
73}
74
75impl Display for ResolvedInputSource {
76 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77 f.write_str(match self {
78 Self::RemoteUrl(url) => url.as_str(),
79 Self::FsPath(path) => path.to_str().unwrap_or_default(),
80 Self::Stdin => "stdin",
81 Self::String(s) => s.as_ref(),
82 })
83 }
84}
85
86/// Custom serialization for the `InputSource` enum.
87///
88/// This implementation serializes all variants as strings to ensure
89/// compatibility with JSON serialization, which requires string keys for enums.
90///
91/// Without this custom implementation, attempting to serialize `InputSource` to
92/// JSON would result in a "key must be a string" error.
93///
94/// See: <https://github.com/serde-rs/json/issues/45>
95impl Serialize for InputSource {
96 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
97 where
98 S: serde::Serializer,
99 {
100 serializer.collect_str(self)
101 }
102}
103
104impl Display for InputSource {
105 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
106 f.write_str(match self {
107 Self::RemoteUrl(url) => url.as_str(),
108 Self::FsGlob { pattern, .. } => pattern,
109 Self::FsPath(path) => path.to_str().unwrap_or_default(),
110 Self::Stdin => "stdin",
111 Self::String(s) => s.as_ref(),
112 })
113 }
114}