lychee_lib/types/
file.rs

1use ignore::types::{Types, TypesBuilder};
2use serde::{Deserialize, Serialize};
3use std::path::Path;
4use url::Url;
5
6/// Represents an ordered list of file extensions.
7///
8/// This holds the actual extension strings (e.g. `md`, `html`, etc.) and is
9/// used to build a [`Types`] object which can be used to match file types.
10///
11/// In a sense, it is more "low-level" than [`FileType`] as it is closer to the
12/// actual representation of file extensions, while [`FileType`] is a higher-level
13/// abstraction that represents the "category" of a file (e.g. Markdown, HTML).
14///
15/// The order is significant as extensions at the beginning of the vector will
16/// be treated with higher priority (e.g. when deciding which file to pick out
17/// of a set of options)
18#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
19pub struct FileExtensions(Vec<String>);
20
21impl Default for FileExtensions {
22    fn default() -> Self {
23        FileType::default_extensions()
24    }
25}
26
27impl std::fmt::Display for FileExtensions {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        write!(f, "{}", self.0.join(","))
30    }
31}
32
33impl FileExtensions {
34    /// Create an empty list of file extensions
35    #[must_use]
36    pub const fn empty() -> Self {
37        Self(vec![])
38    }
39
40    /// Extend the list of existing extensions by the values from the iterator
41    pub fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) {
42        self.0.extend(iter);
43    }
44
45    /// Check if the list of file extensions contains the given file extension
46    pub fn contains<T: Into<String>>(&self, file_extension: T) -> bool {
47        self.0.contains(&file_extension.into())
48    }
49
50    /// Build the current list of file extensions into a file type matcher.
51    ///
52    /// # Errors
53    ///
54    /// Fails if an extension is `all` or otherwise contains any character that
55    /// is not a Unicode letter or number.
56    pub fn build(self, skip_hidden: bool) -> super::Result<Types> {
57        let mut types_builder = TypesBuilder::new();
58        let prefix = if skip_hidden { "[!.]" } else { "" };
59
60        for ext in self.0 {
61            types_builder.add(&ext, &format!("{prefix}*.{ext}"))?;
62        }
63        Ok(types_builder.select("all").build()?)
64    }
65}
66
67impl From<FileExtensions> for Vec<String> {
68    fn from(value: FileExtensions) -> Self {
69        value.0
70    }
71}
72
73impl From<Vec<String>> for FileExtensions {
74    fn from(value: Vec<String>) -> Self {
75        Self(value)
76    }
77}
78
79impl From<FileType> for FileExtensions {
80    fn from(file_type: FileType) -> Self {
81        match file_type {
82            FileType::Html => FileType::html_extensions(),
83            FileType::Markdown => FileType::markdown_extensions(),
84            FileType::Plaintext => FileType::plaintext_extensions(),
85        }
86    }
87}
88
89impl FromIterator<String> for FileExtensions {
90    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
91        Self(iter.into_iter().collect())
92    }
93}
94
95impl Iterator for FileExtensions {
96    type Item = String;
97
98    fn next(&mut self) -> Option<Self::Item> {
99        self.0.pop()
100    }
101}
102
103impl std::str::FromStr for FileExtensions {
104    type Err = std::convert::Infallible; // Cannot fail parsing
105
106    fn from_str(s: &str) -> Result<Self, Self::Err> {
107        Ok(Self(s.split(',').map(String::from).collect()))
108    }
109}
110
111/// `FileType` defines which file types lychee can handle
112#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize, Default)]
113pub enum FileType {
114    /// File in HTML format
115    Html,
116    /// File in Markdown format
117    Markdown,
118    /// Generic text file without syntax-specific parsing
119    #[default]
120    Plaintext,
121}
122
123impl FileType {
124    /// All known Markdown extensions
125    const MARKDOWN_EXTENSIONS: &'static [&'static str] = &[
126        "markdown", "mkdown", "mkdn", "mdwn", "mdown", "mdx", "mkd", "md",
127    ];
128
129    /// All known HTML extensions
130    const HTML_EXTENSIONS: &'static [&'static str] = &["htm", "html"];
131
132    /// All known plaintext extensions
133    const PLAINTEXT_EXTENSIONS: &'static [&'static str] = &["txt"];
134
135    /// Default extensions which are checked by lychee
136    #[must_use]
137    pub fn default_extensions() -> FileExtensions {
138        let mut extensions = FileExtensions::empty();
139        extensions.extend(Self::markdown_extensions());
140        extensions.extend(Self::html_extensions());
141        extensions.extend(Self::plaintext_extensions());
142        extensions
143    }
144
145    /// All known Markdown extensions
146    #[must_use]
147    pub fn markdown_extensions() -> FileExtensions {
148        Self::MARKDOWN_EXTENSIONS
149            .iter()
150            .map(|&s| s.to_string())
151            .collect()
152    }
153
154    /// All known HTML extensions
155    #[must_use]
156    pub fn html_extensions() -> FileExtensions {
157        Self::HTML_EXTENSIONS
158            .iter()
159            .map(|&s| s.to_string())
160            .collect()
161    }
162
163    /// All known plaintext extensions
164    #[must_use]
165    pub fn plaintext_extensions() -> FileExtensions {
166        Self::PLAINTEXT_EXTENSIONS
167            .iter()
168            .map(|&s| s.to_string())
169            .collect()
170    }
171
172    /// Get the [`FileType`] from an extension string
173    #[must_use]
174    pub fn from_extension(extension: &str) -> Option<Self> {
175        let ext = extension.to_lowercase();
176        if Self::MARKDOWN_EXTENSIONS.contains(&ext.as_str()) {
177            Some(Self::Markdown)
178        } else if Self::HTML_EXTENSIONS.contains(&ext.as_str()) {
179            Some(Self::Html)
180        } else if Self::PLAINTEXT_EXTENSIONS.contains(&ext.as_str()) {
181            Some(Self::Plaintext)
182        } else {
183            None
184        }
185    }
186}
187
188impl<P: AsRef<Path>> From<P> for FileType {
189    fn from(p: P) -> FileType {
190        let path = p.as_ref();
191        match path
192            .extension()
193            .and_then(std::ffi::OsStr::to_str)
194            .map(str::to_lowercase)
195            .as_deref()
196            .and_then(FileType::from_extension)
197        {
198            Some(file_type) => file_type,
199            None if is_url(path) => FileType::Html,
200            _ => FileType::default(),
201        }
202    }
203}
204
205/// Helper function to check if a path is likely a URL.
206fn is_url(path: &Path) -> bool {
207    path.to_str()
208        .and_then(|s| Url::parse(s).ok())
209        .is_some_and(|url| url.scheme() == "http" || url.scheme() == "https")
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn test_extension() {
218        assert_eq!(FileType::from("foo.md"), FileType::Markdown);
219        assert_eq!(FileType::from("foo.MD"), FileType::Markdown);
220        assert_eq!(FileType::from("foo.mdx"), FileType::Markdown);
221
222        // Test that a file without an extension is considered plaintext
223        assert_eq!(FileType::from("README"), FileType::Plaintext);
224        assert_eq!(FileType::from("test"), FileType::Plaintext);
225
226        assert_eq!(FileType::from("test.unknown"), FileType::Plaintext);
227        assert_eq!(FileType::from("test.txt"), FileType::Plaintext);
228        assert_eq!(FileType::from("README.TXT"), FileType::Plaintext);
229
230        assert_eq!(FileType::from("test.htm"), FileType::Html);
231        assert_eq!(FileType::from("index.html"), FileType::Html);
232        assert_eq!(FileType::from("http://foo.com/index.html"), FileType::Html);
233    }
234
235    #[test]
236    fn test_default_extensions() {
237        let extensions = FileType::default_extensions();
238        // Test some known extensions
239        assert!(extensions.contains("md"));
240        assert!(extensions.contains("html"));
241        assert!(extensions.contains("markdown"));
242        assert!(extensions.contains("htm"));
243        // Test that the count matches our static arrays
244        let all_extensions: Vec<_> = extensions.into();
245        assert_eq!(
246            all_extensions.len(),
247            FileType::MARKDOWN_EXTENSIONS.len()
248                + FileType::HTML_EXTENSIONS.len()
249                + FileType::PLAINTEXT_EXTENSIONS.len()
250        );
251    }
252
253    #[test]
254    fn test_is_url() {
255        // Valid URLs
256        assert!(is_url(Path::new("http://foo.com")));
257        assert!(is_url(Path::new("https://foo.com")));
258        assert!(is_url(Path::new("http://www.foo.com")));
259        assert!(is_url(Path::new("https://www.foo.com")));
260        assert!(is_url(Path::new("http://foo.com/bar")));
261        assert!(is_url(Path::new("https://foo.com/bar")));
262        assert!(is_url(Path::new("http://foo.com:8080")));
263        assert!(is_url(Path::new("https://foo.com:8080")));
264        assert!(is_url(Path::new("http://foo.com/bar?q=hello")));
265        assert!(is_url(Path::new("https://foo.com/bar?q=hello")));
266
267        // Invalid URLs
268        assert!(!is_url(Path::new("foo.com")));
269        assert!(!is_url(Path::new("www.foo.com")));
270        assert!(!is_url(Path::new("foo")));
271        assert!(!is_url(Path::new("foo/bar")));
272        assert!(!is_url(Path::new("foo/bar/baz")));
273        assert!(!is_url(Path::new("file:///foo/bar.txt")));
274        assert!(!is_url(Path::new("ftp://foo.com")));
275    }
276
277    #[test]
278    fn test_from_extension() {
279        // Valid extensions
280        assert_eq!(FileType::from_extension("html"), Some(FileType::Html));
281        assert_eq!(FileType::from_extension("HTML"), Some(FileType::Html));
282        assert_eq!(FileType::from_extension("htm"), Some(FileType::Html));
283        assert_eq!(
284            FileType::from_extension("markdown"),
285            Some(FileType::Markdown)
286        );
287        assert_eq!(FileType::from_extension("md"), Some(FileType::Markdown));
288        assert_eq!(FileType::from_extension("MD"), Some(FileType::Markdown));
289        assert_eq!(FileType::from_extension("txt"), Some(FileType::Plaintext));
290        assert_eq!(FileType::from_extension("TXT"), Some(FileType::Plaintext));
291
292        // Unknown extension
293        assert_eq!(FileType::from_extension("unknown"), None);
294        assert_eq!(FileType::from_extension("xyz"), None);
295    }
296}