Skip to main content

lychee_lib/types/
file.rs

1use ignore::types::{Types, TypesBuilder};
2use serde::{Deserialize, Serialize};
3use std::path::Path;
4use url::Url;
5
6/// Represents an ordered list of file extensions.
7///
8/// This holds the actual extension strings (e.g. `md`, `html`, etc.) and is
9/// used to build a [`Types`] object which can be used to match file types.
10///
11/// In a sense, it is more "low-level" than [`FileType`] as it is closer to the
12/// actual representation of file extensions, while [`FileType`] is a higher-level
13/// abstraction that represents the "category" of a file (e.g. Markdown, HTML).
14///
15/// The order is significant as extensions at the beginning of the vector will
16/// be treated with higher priority (e.g. when deciding which file to pick out
17/// of a set of options)
18#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
19pub struct FileExtensions(Vec<String>);
20
21impl Default for FileExtensions {
22    fn default() -> Self {
23        FileType::default_extensions()
24    }
25}
26
27impl std::fmt::Display for FileExtensions {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        write!(f, "{}", self.0.join(","))
30    }
31}
32
33impl FileExtensions {
34    /// Create an empty list of file extensions
35    #[must_use]
36    pub const fn empty() -> Self {
37        Self(vec![])
38    }
39
40    /// Extend the list of existing extensions by the values from the iterator
41    pub fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) {
42        self.0.extend(iter);
43    }
44
45    /// Check if the list of file extensions contains the given file extension
46    pub fn contains<T: Into<String>>(&self, file_extension: T) -> bool {
47        self.0.contains(&file_extension.into())
48    }
49
50    /// Build the current list of file extensions into a file type matcher.
51    ///
52    /// # Errors
53    ///
54    /// Fails if an extension is `all` or otherwise contains any character that
55    /// is not a Unicode letter or number.
56    pub fn build(self, skip_hidden: bool) -> super::Result<Types> {
57        let mut types_builder = TypesBuilder::new();
58        let prefix = if skip_hidden { "[!.]" } else { "" };
59
60        for ext in self.0 {
61            types_builder.add(&ext, &format!("{prefix}*.{ext}"))?;
62        }
63        Ok(types_builder.select("all").build()?)
64    }
65}
66
67impl From<FileExtensions> for Vec<String> {
68    fn from(value: FileExtensions) -> Self {
69        value.0
70    }
71}
72
73impl From<Vec<String>> for FileExtensions {
74    fn from(value: Vec<String>) -> Self {
75        Self(value)
76    }
77}
78
79impl From<FileType> for FileExtensions {
80    fn from(file_type: FileType) -> Self {
81        match file_type {
82            FileType::Html => FileType::html_extensions(),
83            FileType::Markdown => FileType::markdown_extensions(),
84            FileType::Css => FileType::css_extensions(),
85            FileType::Plaintext => FileType::plaintext_extensions(),
86        }
87    }
88}
89
90impl FromIterator<String> for FileExtensions {
91    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
92        Self(iter.into_iter().collect())
93    }
94}
95
96impl Iterator for FileExtensions {
97    type Item = String;
98
99    fn next(&mut self) -> Option<Self::Item> {
100        self.0.pop()
101    }
102}
103
104impl std::str::FromStr for FileExtensions {
105    type Err = std::convert::Infallible; // Cannot fail parsing
106
107    fn from_str(s: &str) -> Result<Self, Self::Err> {
108        Ok(Self(s.split(',').map(String::from).collect()))
109    }
110}
111
112/// `FileType` defines which file types lychee can handle
113#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize, Default)]
114pub enum FileType {
115    /// File in HTML format
116    Html,
117    /// File in Markdown format
118    Markdown,
119    /// File in CSS format
120    Css,
121    /// Generic text file without syntax-specific parsing
122    #[default]
123    Plaintext,
124}
125
126impl std::fmt::Display for FileType {
127    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
128        match self {
129            FileType::Html => write!(f, "HTML"),
130            FileType::Markdown => write!(f, "Markdown"),
131            FileType::Css => write!(f, "CSS"),
132            FileType::Plaintext => write!(f, "plaintext"),
133        }
134    }
135}
136
137impl FileType {
138    /// All known Markdown extensions
139    const MARKDOWN_EXTENSIONS: &'static [&'static str] = &[
140        "markdown", "mkdown", "mkdn", "mdwn", "mdown", "mdx", "mkd", "md",
141    ];
142
143    /// All known HTML extensions
144    const HTML_EXTENSIONS: &'static [&'static str] = &["htm", "html"];
145
146    /// All known CSS extensions
147    const CSS_EXTENSIONS: &'static [&'static str] = &["css"];
148
149    /// All known plaintext extensions
150    const PLAINTEXT_EXTENSIONS: &'static [&'static str] = &["txt"];
151
152    /// Default extensions which are checked by lychee
153    #[must_use]
154    pub fn default_extensions() -> FileExtensions {
155        let mut extensions = FileExtensions::empty();
156        extensions.extend(Self::markdown_extensions());
157        extensions.extend(Self::html_extensions());
158        extensions.extend(Self::css_extensions());
159        extensions.extend(Self::plaintext_extensions());
160        extensions
161    }
162
163    /// All known Markdown extensions
164    #[must_use]
165    pub fn markdown_extensions() -> FileExtensions {
166        Self::MARKDOWN_EXTENSIONS
167            .iter()
168            .map(|&s| s.to_string())
169            .collect()
170    }
171
172    /// All known HTML extensions
173    #[must_use]
174    pub fn html_extensions() -> FileExtensions {
175        Self::HTML_EXTENSIONS
176            .iter()
177            .map(|&s| s.to_string())
178            .collect()
179    }
180
181    /// All known CSS extensions
182    #[must_use]
183    pub fn css_extensions() -> FileExtensions {
184        Self::CSS_EXTENSIONS
185            .iter()
186            .map(|&s| s.to_string())
187            .collect()
188    }
189
190    /// All known plaintext extensions
191    #[must_use]
192    pub fn plaintext_extensions() -> FileExtensions {
193        Self::PLAINTEXT_EXTENSIONS
194            .iter()
195            .map(|&s| s.to_string())
196            .collect()
197    }
198
199    /// Get the [`FileType`] from an extension string
200    #[must_use]
201    pub fn from_extension(extension: &str) -> Option<Self> {
202        let ext = extension.to_lowercase();
203        if Self::MARKDOWN_EXTENSIONS.contains(&ext.as_str()) {
204            Some(Self::Markdown)
205        } else if Self::HTML_EXTENSIONS.contains(&ext.as_str()) {
206            Some(Self::Html)
207        } else if Self::CSS_EXTENSIONS.contains(&ext.as_str()) {
208            Some(Self::Css)
209        } else if Self::PLAINTEXT_EXTENSIONS.contains(&ext.as_str()) {
210            Some(Self::Plaintext)
211        } else {
212            None
213        }
214    }
215}
216
217impl<P: AsRef<Path>> From<P> for FileType {
218    fn from(p: P) -> FileType {
219        let path = p.as_ref();
220        match path
221            .extension()
222            .and_then(std::ffi::OsStr::to_str)
223            .map(str::to_lowercase)
224            .as_deref()
225            .and_then(FileType::from_extension)
226        {
227            Some(file_type) => file_type,
228            None if is_url(path) => FileType::Html,
229            _ => FileType::default(),
230        }
231    }
232}
233
234/// Helper function to check if a path is likely a URL.
235fn is_url(path: &Path) -> bool {
236    path.to_str()
237        .and_then(|s| Url::parse(s).ok())
238        .is_some_and(|url| url.scheme() == "http" || url.scheme() == "https")
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    #[test]
246    fn test_extension() {
247        assert_eq!(FileType::from("foo.md"), FileType::Markdown);
248        assert_eq!(FileType::from("foo.MD"), FileType::Markdown);
249        assert_eq!(FileType::from("foo.mdx"), FileType::Markdown);
250
251        // Test that a file without an extension is considered plaintext
252        assert_eq!(FileType::from("README"), FileType::Plaintext);
253        assert_eq!(FileType::from("test"), FileType::Plaintext);
254
255        assert_eq!(FileType::from("test.unknown"), FileType::Plaintext);
256        assert_eq!(FileType::from("test.txt"), FileType::Plaintext);
257        assert_eq!(FileType::from("README.TXT"), FileType::Plaintext);
258
259        assert_eq!(FileType::from("test.htm"), FileType::Html);
260        assert_eq!(FileType::from("index.html"), FileType::Html);
261        assert_eq!(FileType::from("http://foo.com/index.html"), FileType::Html);
262    }
263
264    #[test]
265    fn test_default_extensions() {
266        let extensions = FileType::default_extensions();
267        // Test some known extensions
268        assert!(extensions.contains("md"));
269        assert!(extensions.contains("html"));
270        assert!(extensions.contains("markdown"));
271        assert!(extensions.contains("htm"));
272        assert!(extensions.contains("css"));
273        // Test that the count matches our static arrays
274        let all_extensions: Vec<_> = extensions.into();
275        assert_eq!(
276            all_extensions.len(),
277            FileType::MARKDOWN_EXTENSIONS.len()
278                + FileType::HTML_EXTENSIONS.len()
279                + FileType::CSS_EXTENSIONS.len()
280                + FileType::PLAINTEXT_EXTENSIONS.len()
281        );
282    }
283
284    #[test]
285    fn test_is_url() {
286        // Valid URLs
287        assert!(is_url(Path::new("http://foo.com")));
288        assert!(is_url(Path::new("https://foo.com")));
289        assert!(is_url(Path::new("http://www.foo.com")));
290        assert!(is_url(Path::new("https://www.foo.com")));
291        assert!(is_url(Path::new("http://foo.com/bar")));
292        assert!(is_url(Path::new("https://foo.com/bar")));
293        assert!(is_url(Path::new("http://foo.com:8080")));
294        assert!(is_url(Path::new("https://foo.com:8080")));
295        assert!(is_url(Path::new("http://foo.com/bar?q=hello")));
296        assert!(is_url(Path::new("https://foo.com/bar?q=hello")));
297
298        // Invalid URLs
299        assert!(!is_url(Path::new("foo.com")));
300        assert!(!is_url(Path::new("www.foo.com")));
301        assert!(!is_url(Path::new("foo")));
302        assert!(!is_url(Path::new("foo/bar")));
303        assert!(!is_url(Path::new("foo/bar/baz")));
304        assert!(!is_url(Path::new("file:///foo/bar.txt")));
305        assert!(!is_url(Path::new("ftp://foo.com")));
306    }
307
308    #[test]
309    fn test_from_extension() {
310        // Valid extensions
311        assert_eq!(FileType::from_extension("html"), Some(FileType::Html));
312        assert_eq!(FileType::from_extension("HTML"), Some(FileType::Html));
313        assert_eq!(FileType::from_extension("htm"), Some(FileType::Html));
314        assert_eq!(
315            FileType::from_extension("markdown"),
316            Some(FileType::Markdown)
317        );
318        assert_eq!(FileType::from_extension("md"), Some(FileType::Markdown));
319        assert_eq!(FileType::from_extension("MD"), Some(FileType::Markdown));
320        assert_eq!(FileType::from_extension("txt"), Some(FileType::Plaintext));
321        assert_eq!(FileType::from_extension("TXT"), Some(FileType::Plaintext));
322
323        // Unknown extension
324        assert_eq!(FileType::from_extension("unknown"), None);
325        assert_eq!(FileType::from_extension("xyz"), None);
326    }
327}