Skip to main content

lychee_lib/types/
file.rs

1use ignore::types::{Types, TypesBuilder};
2use serde::{Deserialize, Serialize};
3use std::path::Path;
4use url::Url;
5
6/// Represents an ordered list of file extensions.
7///
8/// This holds the actual extension strings (e.g. `md`, `html`, etc.) and is
9/// used to build a [`Types`] object which can be used to match file types.
10///
11/// In a sense, it is more "low-level" than [`FileType`] as it is closer to the
12/// actual representation of file extensions, while [`FileType`] is a higher-level
13/// abstraction that represents the "category" of a file (e.g. Markdown, HTML).
14///
15/// The order is significant as extensions at the beginning of the vector will
16/// be treated with higher priority (e.g. when deciding which file to pick out
17/// of a set of options)
18#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
19pub struct FileExtensions(Vec<String>);
20
21impl Default for FileExtensions {
22    fn default() -> Self {
23        FileType::default_extensions()
24    }
25}
26
27impl std::fmt::Display for FileExtensions {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        write!(f, "{}", self.0.join(","))
30    }
31}
32
33impl FileExtensions {
34    /// Create an empty list of file extensions
35    #[must_use]
36    pub const fn empty() -> Self {
37        Self(vec![])
38    }
39
40    /// Extend the list of existing extensions by the values from the iterator
41    pub fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) {
42        self.0.extend(iter);
43    }
44
45    /// Check if the list of file extensions contains the given file extension
46    pub fn contains<T: Into<String>>(&self, file_extension: T) -> bool {
47        self.0.contains(&file_extension.into())
48    }
49
50    /// Build the current list of file extensions into a file type matcher.
51    ///
52    /// # Errors
53    ///
54    /// Fails if an extension is `all` or otherwise contains any character that
55    /// is not a Unicode letter or number.
56    pub fn build(self, skip_hidden: bool) -> super::Result<Types> {
57        let mut types_builder = TypesBuilder::new();
58        let prefix = if skip_hidden { "[!.]" } else { "" };
59
60        for ext in self.0 {
61            types_builder.add(&ext, &format!("{prefix}*.{ext}"))?;
62        }
63        Ok(types_builder.select("all").build()?)
64    }
65}
66
67impl From<FileExtensions> for Vec<String> {
68    fn from(value: FileExtensions) -> Self {
69        value.0
70    }
71}
72
73impl From<Vec<String>> for FileExtensions {
74    fn from(value: Vec<String>) -> Self {
75        Self(value)
76    }
77}
78
79impl From<FileType> for FileExtensions {
80    fn from(file_type: FileType) -> Self {
81        match file_type {
82            FileType::Html => FileType::html_extensions(),
83            FileType::Markdown => FileType::markdown_extensions(),
84            FileType::Css => FileType::css_extensions(),
85            FileType::Plaintext => FileType::plaintext_extensions(),
86            FileType::Xml => FileType::xml_extensions(),
87        }
88    }
89}
90
91impl FromIterator<String> for FileExtensions {
92    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
93        Self(iter.into_iter().collect())
94    }
95}
96
97impl Iterator for FileExtensions {
98    type Item = String;
99
100    fn next(&mut self) -> Option<Self::Item> {
101        self.0.pop()
102    }
103}
104
105impl std::str::FromStr for FileExtensions {
106    type Err = std::convert::Infallible; // Cannot fail parsing
107
108    fn from_str(s: &str) -> Result<Self, Self::Err> {
109        Ok(Self(s.split(',').map(String::from).collect()))
110    }
111}
112
113/// `FileType` defines which file types lychee can handle
114#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize, Default)]
115pub enum FileType {
116    /// File in HTML format
117    Html,
118    /// File in Markdown format
119    Markdown,
120    /// File in CSS format
121    Css,
122    /// File in XML format (used for sitemaps)
123    Xml,
124    /// Generic text file without syntax-specific parsing
125    #[default]
126    Plaintext,
127}
128
129impl std::fmt::Display for FileType {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        match self {
132            FileType::Html => write!(f, "HTML"),
133            FileType::Markdown => write!(f, "Markdown"),
134            FileType::Css => write!(f, "CSS"),
135            FileType::Plaintext => write!(f, "plaintext"),
136            FileType::Xml => write!(f, "XML"),
137        }
138    }
139}
140
141impl FileType {
142    /// All known Markdown extensions
143    const MARKDOWN_EXTENSIONS: &'static [&'static str] = &[
144        "markdown", "mkdown", "mkdn", "mdwn", "mdown", "mdx", "mkd", "md",
145    ];
146
147    /// All known HTML extensions
148    const HTML_EXTENSIONS: &'static [&'static str] = &["htm", "html"];
149
150    /// All known CSS extensions
151    const CSS_EXTENSIONS: &'static [&'static str] = &["css"];
152
153    /// All known plaintext extensions
154    const PLAINTEXT_EXTENSIONS: &'static [&'static str] = &["txt"];
155
156    /// All known XML extensions
157    const XML_EXTENSIONS: &'static [&'static str] = &["xml"];
158
159    /// Default extensions which are checked by lychee
160    #[must_use]
161    pub fn default_extensions() -> FileExtensions {
162        let mut extensions = FileExtensions::empty();
163        extensions.extend(Self::markdown_extensions());
164        extensions.extend(Self::html_extensions());
165        extensions.extend(Self::css_extensions());
166        extensions.extend(Self::plaintext_extensions());
167        extensions.extend(Self::xml_extensions());
168        extensions
169    }
170
171    /// All known Markdown extensions
172    #[must_use]
173    pub fn markdown_extensions() -> FileExtensions {
174        Self::MARKDOWN_EXTENSIONS
175            .iter()
176            .map(|&s| s.to_string())
177            .collect()
178    }
179
180    /// All known HTML extensions
181    #[must_use]
182    pub fn html_extensions() -> FileExtensions {
183        Self::HTML_EXTENSIONS
184            .iter()
185            .map(|&s| s.to_string())
186            .collect()
187    }
188
189    /// All known CSS extensions
190    #[must_use]
191    pub fn css_extensions() -> FileExtensions {
192        Self::CSS_EXTENSIONS
193            .iter()
194            .map(|&s| s.to_string())
195            .collect()
196    }
197
198    /// All known plaintext extensions
199    #[must_use]
200    pub fn plaintext_extensions() -> FileExtensions {
201        Self::PLAINTEXT_EXTENSIONS
202            .iter()
203            .map(|&s| s.to_string())
204            .collect()
205    }
206
207    /// All known XML extensions
208    #[must_use]
209    pub fn xml_extensions() -> FileExtensions {
210        Self::XML_EXTENSIONS
211            .iter()
212            .map(|&s| s.to_string())
213            .collect()
214    }
215
216    /// Get the [`FileType`] from an extension string
217    #[must_use]
218    pub fn from_extension(extension: &str) -> Option<Self> {
219        let ext = extension.to_lowercase();
220        if Self::MARKDOWN_EXTENSIONS.contains(&ext.as_str()) {
221            Some(Self::Markdown)
222        } else if Self::HTML_EXTENSIONS.contains(&ext.as_str()) {
223            Some(Self::Html)
224        } else if Self::CSS_EXTENSIONS.contains(&ext.as_str()) {
225            Some(Self::Css)
226        } else if Self::PLAINTEXT_EXTENSIONS.contains(&ext.as_str()) {
227            Some(Self::Plaintext)
228        } else if Self::XML_EXTENSIONS.contains(&ext.as_str()) {
229            Some(Self::Xml)
230        } else {
231            None
232        }
233    }
234}
235
236impl<P: AsRef<Path>> From<P> for FileType {
237    fn from(p: P) -> FileType {
238        let path = p.as_ref();
239        match path
240            .extension()
241            .and_then(std::ffi::OsStr::to_str)
242            .map(str::to_lowercase)
243            .as_deref()
244            .and_then(FileType::from_extension)
245        {
246            Some(file_type) => file_type,
247            None if is_url(path) => FileType::Html,
248            _ => FileType::default(),
249        }
250    }
251}
252
253/// Helper function to check if a path is likely a URL.
254fn is_url(path: &Path) -> bool {
255    path.to_str()
256        .and_then(|s| Url::parse(s).ok())
257        .is_some_and(|url| url.scheme() == "http" || url.scheme() == "https")
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    #[test]
265    fn test_extension() {
266        assert_eq!(FileType::from("foo.md"), FileType::Markdown);
267        assert_eq!(FileType::from("foo.MD"), FileType::Markdown);
268        assert_eq!(FileType::from("foo.mdx"), FileType::Markdown);
269
270        // Test that a file without an extension is considered plaintext
271        assert_eq!(FileType::from("README"), FileType::Plaintext);
272        assert_eq!(FileType::from("test"), FileType::Plaintext);
273
274        assert_eq!(FileType::from("test.unknown"), FileType::Plaintext);
275        assert_eq!(FileType::from("test.txt"), FileType::Plaintext);
276        assert_eq!(FileType::from("README.TXT"), FileType::Plaintext);
277
278        assert_eq!(FileType::from("test.htm"), FileType::Html);
279        assert_eq!(FileType::from("index.html"), FileType::Html);
280        assert_eq!(FileType::from("http://foo.com/index.html"), FileType::Html);
281    }
282
283    #[test]
284    fn test_default_extensions() {
285        let extensions = FileType::default_extensions();
286        // Test some known extensions
287        assert!(extensions.contains("md"));
288        assert!(extensions.contains("html"));
289        assert!(extensions.contains("markdown"));
290        assert!(extensions.contains("htm"));
291        assert!(extensions.contains("css"));
292        // Test that the count matches our static arrays
293        let all_extensions: Vec<_> = extensions.into();
294        assert_eq!(
295            all_extensions.len(),
296            FileType::MARKDOWN_EXTENSIONS.len()
297                + FileType::HTML_EXTENSIONS.len()
298                + FileType::CSS_EXTENSIONS.len()
299                + FileType::PLAINTEXT_EXTENSIONS.len()
300                + FileType::XML_EXTENSIONS.len()
301        );
302    }
303
304    #[test]
305    fn test_is_url() {
306        // Valid URLs
307        assert!(is_url(Path::new("http://foo.com")));
308        assert!(is_url(Path::new("https://foo.com")));
309        assert!(is_url(Path::new("http://www.foo.com")));
310        assert!(is_url(Path::new("https://www.foo.com")));
311        assert!(is_url(Path::new("http://foo.com/bar")));
312        assert!(is_url(Path::new("https://foo.com/bar")));
313        assert!(is_url(Path::new("http://foo.com:8080")));
314        assert!(is_url(Path::new("https://foo.com:8080")));
315        assert!(is_url(Path::new("http://foo.com/bar?q=hello")));
316        assert!(is_url(Path::new("https://foo.com/bar?q=hello")));
317
318        // Invalid URLs
319        assert!(!is_url(Path::new("foo.com")));
320        assert!(!is_url(Path::new("www.foo.com")));
321        assert!(!is_url(Path::new("foo")));
322        assert!(!is_url(Path::new("foo/bar")));
323        assert!(!is_url(Path::new("foo/bar/baz")));
324        assert!(!is_url(Path::new("file:///foo/bar.txt")));
325        assert!(!is_url(Path::new("ftp://foo.com")));
326    }
327
328    #[test]
329    fn test_from_extension() {
330        // Valid extensions
331        assert_eq!(FileType::from_extension("html"), Some(FileType::Html));
332        assert_eq!(FileType::from_extension("HTML"), Some(FileType::Html));
333        assert_eq!(FileType::from_extension("htm"), Some(FileType::Html));
334        assert_eq!(
335            FileType::from_extension("markdown"),
336            Some(FileType::Markdown)
337        );
338        assert_eq!(FileType::from_extension("md"), Some(FileType::Markdown));
339        assert_eq!(FileType::from_extension("MD"), Some(FileType::Markdown));
340        assert_eq!(FileType::from_extension("txt"), Some(FileType::Plaintext));
341        assert_eq!(FileType::from_extension("TXT"), Some(FileType::Plaintext));
342
343        // Unknown extension
344        assert_eq!(FileType::from_extension("unknown"), None);
345        assert_eq!(FileType::from_extension("xyz"), None);
346    }
347}