lychee_lib/types/
file.rs

1use ignore::types::{Types, TypesBuilder};
2use serde::{Deserialize, Serialize};
3use std::path::Path;
4use url::Url;
5
6/// Represents an ordered list of file extensions.
7///
8/// This holds the actual extension strings (e.g. `md`, `html`, etc.) and is
9/// used to build a [`Types`] object which can be used to match file types.
10///
11/// In a sense, it is more "low-level" than [`FileType`] as it is closer to the
12/// actual representation of file extensions, while [`FileType`] is a higher-level
13/// abstraction that represents the "category" of a file (e.g. Markdown, HTML).
14///
15/// The order is significant as extensions at the beginning of the vector will
16/// be treated with higher priority (e.g. when deciding which file to pick out
17/// of a set of options)
18#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
19pub struct FileExtensions(Vec<String>);
20
21impl Default for FileExtensions {
22    fn default() -> Self {
23        FileType::default_extensions()
24    }
25}
26
27impl std::fmt::Display for FileExtensions {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        write!(f, "{}", self.0.join(","))
30    }
31}
32
33impl FileExtensions {
34    /// Create an empty list of file extensions
35    #[must_use]
36    pub const fn empty() -> Self {
37        Self(vec![])
38    }
39
40    /// Extend the list of existing extensions by the values from the iterator
41    pub fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) {
42        self.0.extend(iter);
43    }
44
45    /// Check if the list of file extensions contains the given file extension
46    pub fn contains<T: Into<String>>(&self, file_extension: T) -> bool {
47        self.0.contains(&file_extension.into())
48    }
49}
50
51impl TryFrom<FileExtensions> for Types {
52    type Error = super::ErrorKind;
53
54    /// Build the current list of file extensions into a file type matcher.
55    ///
56    /// # Errors
57    ///
58    /// Fails if an extension is `all` or otherwise contains any character that
59    /// is not a Unicode letter or number.
60    fn try_from(value: FileExtensions) -> super::Result<Self> {
61        let mut types_builder = TypesBuilder::new();
62        for ext in value.0.clone() {
63            types_builder.add(&ext, &format!("*.{ext}"))?;
64        }
65        Ok(types_builder.select("all").build()?)
66    }
67}
68
69impl From<FileExtensions> for Vec<String> {
70    fn from(value: FileExtensions) -> Self {
71        value.0
72    }
73}
74
75impl From<Vec<String>> for FileExtensions {
76    fn from(value: Vec<String>) -> Self {
77        Self(value)
78    }
79}
80
81impl From<FileType> for FileExtensions {
82    fn from(file_type: FileType) -> Self {
83        match file_type {
84            FileType::Html => FileType::html_extensions(),
85            FileType::Markdown => FileType::markdown_extensions(),
86            FileType::Plaintext => FileType::plaintext_extensions(),
87        }
88    }
89}
90
91impl FromIterator<String> for FileExtensions {
92    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
93        Self(iter.into_iter().collect())
94    }
95}
96
97impl Iterator for FileExtensions {
98    type Item = String;
99
100    fn next(&mut self) -> Option<Self::Item> {
101        self.0.pop()
102    }
103}
104
105impl std::str::FromStr for FileExtensions {
106    type Err = std::convert::Infallible; // Cannot fail parsing
107
108    fn from_str(s: &str) -> Result<Self, Self::Err> {
109        Ok(Self(s.split(',').map(String::from).collect()))
110    }
111}
112
113/// `FileType` defines which file types lychee can handle
114#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
115pub enum FileType {
116    /// File in HTML format
117    Html,
118    /// File in Markdown format
119    Markdown,
120    /// Generic text file without syntax-specific parsing
121    Plaintext,
122}
123
124impl FileType {
125    /// All known Markdown extensions
126    const MARKDOWN_EXTENSIONS: &'static [&'static str] = &[
127        "markdown", "mkdown", "mkdn", "mdwn", "mdown", "mdx", "mkd", "md",
128    ];
129
130    /// All known HTML extensions
131    const HTML_EXTENSIONS: &'static [&'static str] = &["htm", "html"];
132
133    /// All known plaintext extensions
134    const PLAINTEXT_EXTENSIONS: &'static [&'static str] = &["txt"];
135
136    /// Default extensions which are checked by lychee
137    #[must_use]
138    pub fn default_extensions() -> FileExtensions {
139        let mut extensions = FileExtensions::empty();
140        extensions.extend(Self::markdown_extensions());
141        extensions.extend(Self::html_extensions());
142        extensions.extend(Self::plaintext_extensions());
143        extensions
144    }
145
146    /// All known Markdown extensions
147    #[must_use]
148    pub fn markdown_extensions() -> FileExtensions {
149        Self::MARKDOWN_EXTENSIONS
150            .iter()
151            .map(|&s| s.to_string())
152            .collect()
153    }
154
155    /// All known HTML extensions
156    #[must_use]
157    pub fn html_extensions() -> FileExtensions {
158        Self::HTML_EXTENSIONS
159            .iter()
160            .map(|&s| s.to_string())
161            .collect()
162    }
163
164    /// All known plaintext extensions
165    #[must_use]
166    pub fn plaintext_extensions() -> FileExtensions {
167        Self::PLAINTEXT_EXTENSIONS
168            .iter()
169            .map(|&s| s.to_string())
170            .collect()
171    }
172
173    /// Get the [`FileType`] from an extension string
174    #[must_use]
175    pub fn from_extension(extension: &str) -> Option<Self> {
176        let ext = extension.to_lowercase();
177        if Self::MARKDOWN_EXTENSIONS.contains(&ext.as_str()) {
178            Some(Self::Markdown)
179        } else if Self::HTML_EXTENSIONS.contains(&ext.as_str()) {
180            Some(Self::Html)
181        } else if Self::PLAINTEXT_EXTENSIONS.contains(&ext.as_str()) {
182            Some(Self::Plaintext)
183        } else {
184            None
185        }
186    }
187}
188
189impl Default for FileType {
190    fn default() -> Self {
191        // This is the default file type when no other type can be determined.
192        // It represents a generic text file with no specific syntax.
193        Self::Plaintext
194    }
195}
196
197impl<P: AsRef<Path>> From<P> for FileType {
198    fn from(p: P) -> FileType {
199        let path = p.as_ref();
200        match path
201            .extension()
202            .and_then(std::ffi::OsStr::to_str)
203            .map(str::to_lowercase)
204            .as_deref()
205            .and_then(FileType::from_extension)
206        {
207            Some(file_type) => file_type,
208            None if is_url(path) => FileType::Html,
209            _ => FileType::default(),
210        }
211    }
212}
213
214/// Helper function to check if a path is likely a URL.
215fn is_url(path: &Path) -> bool {
216    path.to_str()
217        .and_then(|s| Url::parse(s).ok())
218        .is_some_and(|url| url.scheme() == "http" || url.scheme() == "https")
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn test_extension() {
227        assert_eq!(FileType::from("foo.md"), FileType::Markdown);
228        assert_eq!(FileType::from("foo.MD"), FileType::Markdown);
229        assert_eq!(FileType::from("foo.mdx"), FileType::Markdown);
230
231        // Test that a file without an extension is considered plaintext
232        assert_eq!(FileType::from("README"), FileType::Plaintext);
233        assert_eq!(FileType::from("test"), FileType::Plaintext);
234
235        assert_eq!(FileType::from("test.unknown"), FileType::Plaintext);
236        assert_eq!(FileType::from("test.txt"), FileType::Plaintext);
237        assert_eq!(FileType::from("README.TXT"), FileType::Plaintext);
238
239        assert_eq!(FileType::from("test.htm"), FileType::Html);
240        assert_eq!(FileType::from("index.html"), FileType::Html);
241        assert_eq!(FileType::from("http://foo.com/index.html"), FileType::Html);
242    }
243
244    #[test]
245    fn test_default_extensions() {
246        let extensions = FileType::default_extensions();
247        // Test some known extensions
248        assert!(extensions.contains("md"));
249        assert!(extensions.contains("html"));
250        assert!(extensions.contains("markdown"));
251        assert!(extensions.contains("htm"));
252        // Test that the count matches our static arrays
253        let all_extensions: Vec<_> = extensions.into();
254        assert_eq!(
255            all_extensions.len(),
256            FileType::MARKDOWN_EXTENSIONS.len()
257                + FileType::HTML_EXTENSIONS.len()
258                + FileType::PLAINTEXT_EXTENSIONS.len()
259        );
260    }
261
262    #[test]
263    fn test_is_url() {
264        // Valid URLs
265        assert!(is_url(Path::new("http://foo.com")));
266        assert!(is_url(Path::new("https://foo.com")));
267        assert!(is_url(Path::new("http://www.foo.com")));
268        assert!(is_url(Path::new("https://www.foo.com")));
269        assert!(is_url(Path::new("http://foo.com/bar")));
270        assert!(is_url(Path::new("https://foo.com/bar")));
271        assert!(is_url(Path::new("http://foo.com:8080")));
272        assert!(is_url(Path::new("https://foo.com:8080")));
273        assert!(is_url(Path::new("http://foo.com/bar?q=hello")));
274        assert!(is_url(Path::new("https://foo.com/bar?q=hello")));
275
276        // Invalid URLs
277        assert!(!is_url(Path::new("foo.com")));
278        assert!(!is_url(Path::new("www.foo.com")));
279        assert!(!is_url(Path::new("foo")));
280        assert!(!is_url(Path::new("foo/bar")));
281        assert!(!is_url(Path::new("foo/bar/baz")));
282        assert!(!is_url(Path::new("file:///foo/bar.txt")));
283        assert!(!is_url(Path::new("ftp://foo.com")));
284    }
285
286    #[test]
287    fn test_from_extension() {
288        // Valid extensions
289        assert_eq!(FileType::from_extension("html"), Some(FileType::Html));
290        assert_eq!(FileType::from_extension("HTML"), Some(FileType::Html));
291        assert_eq!(FileType::from_extension("htm"), Some(FileType::Html));
292        assert_eq!(
293            FileType::from_extension("markdown"),
294            Some(FileType::Markdown)
295        );
296        assert_eq!(FileType::from_extension("md"), Some(FileType::Markdown));
297        assert_eq!(FileType::from_extension("MD"), Some(FileType::Markdown));
298        assert_eq!(FileType::from_extension("txt"), Some(FileType::Plaintext));
299        assert_eq!(FileType::from_extension("TXT"), Some(FileType::Plaintext));
300
301        // Unknown extension
302        assert_eq!(FileType::from_extension("unknown"), None);
303        assert_eq!(FileType::from_extension("xyz"), None);
304    }
305}