detect_lang/
lib.rs

1//! Utility for identifying names of programming languages (and related files) from paths and file extensions.
2//!
3//! # Paths and Extensions
4//!
5//! Languages can be identified from paths using [`from_path`]
6//! or directly from extensions using [`from_extension`].
7//!
8//! [`from_path`]: fn.from_path.html
9//! [`from_extension`]: fn.from_extension.html
10//!
11//! ```
12//! use detect_lang::from_path;
13//! assert_eq!(from_path("foo.rs").unwrap().name(), "Rust");
14//! assert_eq!(from_path("foo.md").unwrap().name(), "Markdown");
15//!
16//! use detect_lang::from_extension;
17//! assert_eq!(from_extension("rs").unwrap().name(), "Rust");
18//! assert_eq!(from_extension("md").unwrap().name(), "Markdown");
19//!
20//! // The case is ignored
21//! assert_eq!(from_path("foo.jSoN").unwrap().name(), "JSON");
22//! assert_eq!(from_extension("jSoN").unwrap().name(), "JSON");
23//! ```
24//!
25//! # Language ID
26//!
27//! In short, the language [`id`](struct.Language.html#method.id)
28//! is a lowercase version of [`name`](struct.Language.html#method.name).
29//! However, it also replaces symbols making it usable as a [URL slug].
30//!
31//! For instance `foo.hpp` is identified as language name `C++` and
32//! language ID `cpp`.
33//!
34//! [URL slug]: https://en.wikipedia.org/wiki/Clean_URL#Slug
35//!
36//! ```
37//! use detect_lang::from_path;
38//! assert_eq!(from_path("foo.rs").unwrap().id(), "rust");
39//! assert_eq!(from_path("foo.cpp").unwrap().id(), "cpp");
40//! assert_eq!(from_path("foo.hpp").unwrap().id(), "cpp");
41//!
42//! use detect_lang::from_extension;
43//! assert_eq!(from_extension("rs").unwrap().id(), "rust");
44//! assert_eq!(from_extension("cpp").unwrap().id(), "cpp");
45//! assert_eq!(from_extension("hpp").unwrap().id(), "cpp");
46//!
47//! // The case is ignored
48//! assert_eq!(from_path("foo.jSoN").unwrap().id(), "json");
49//! assert_eq!(from_extension("jSoN").unwrap().id(), "json");
50//! ```
51//!
52//! # Always Lowercase
53//!
54//! If the extension is guaranteed to always be lowercase,
55//! then consider using [`from_lowercase_extension`] to avoid
56//! allocation and conversion to lowercase.
57//!
58//! [`from_lowercase_extension`]: fn.from_lowercase_extension.html
59//!
60//! ```
61//! # use detect_lang::{from_extension};
62//! use detect_lang::{from_lowercase_extension, Language};
63//!
64//! assert_eq!(from_lowercase_extension("json"), Some(Language("JSON", "json")));
65//! assert_eq!(from_lowercase_extension("jSoN"), None);
66//!
67//! assert_eq!(from_extension("json"), Some(Language("JSON", "json")));
68//! assert_eq!(from_extension("jSoN"), Some(Language("JSON", "json")));
69//! ```
70//!
71//! # Match Example
72//!
73//! ```
74//! use std::path::Path;
75//! use detect_lang::{from_path, Language};
76//!
77//! let path = Path::new("foo.rs");
78//! match from_path(path) {
79//!     //   Language(name, id)
80//!     Some(Language(_, "rust")) => println!("This is Rust"),
81//!     Some(Language(..))        => println!("Well it's not Rust"),
82//!     None                      => println!("Ehh, what?"),
83//! }
84//! ```
85
86#![forbid(unsafe_code)]
87#![deny(missing_docs)]
88// #![deny(missing_doc_code_examples)]
89#![deny(missing_debug_implementations)]
90#![warn(clippy::all)]
91
92use std::ffi::OsStr;
93use std::ops::Deref;
94use std::path::Path;
95
96mod languages;
97
98use languages::LANGUAGES;
99
100/// Languages contain a name and an ID (`Language(name, id)`).
101#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Debug)]
102pub struct Language<'a>(pub &'a str, pub &'a str);
103
104impl<'a> Language<'a> {
105    /// Returns the name of the language.
106    ///
107    /// # Example
108    ///
109    /// ```
110    /// # use detect_lang::{from_path, from_extension};
111    /// assert_eq!(from_path("foo.rs").unwrap().name(), "Rust");
112    /// assert_eq!(from_path("foo.md").unwrap().name(), "Markdown");
113    ///
114    /// assert_eq!(from_extension("rs").unwrap().name(), "Rust");
115    /// assert_eq!(from_extension("md").unwrap().name(), "Markdown");
116    ///
117    /// // The case is ignored
118    /// assert_eq!(from_path("foo.jSoN").unwrap().name(), "JSON");
119    /// assert_eq!(from_extension("jSoN").unwrap().name(), "JSON");
120    /// ```
121    #[inline]
122    pub fn name(&self) -> &'a str {
123        self.0
124    }
125
126    /// Returns the ID of the language.
127    /// In most cases the language ID is just a lowercase version of the [`name`](#method.name).
128    ///
129    /// The ID is also usable as a [URL slug].
130    ///
131    /// [URL slug]: https://en.wikipedia.org/wiki/Clean_URL#Slug
132    ///
133    /// # Example
134    ///
135    /// ```
136    /// # use detect_lang::{from_path, from_extension};
137    /// assert_eq!(from_path("foo.rs").unwrap().id(), "rust");
138    /// assert_eq!(from_path("foo.md").unwrap().id(), "markdown");
139    ///
140    /// assert_eq!(from_extension("rs").unwrap().id(), "rust");
141    /// assert_eq!(from_extension("md").unwrap().id(), "markdown");
142    ///
143    /// // The case is ignored
144    /// assert_eq!(from_path("foo.jSoN").unwrap().id(), "json");
145    /// assert_eq!(from_extension("jSoN").unwrap().id(), "json");
146    /// ```
147    #[inline]
148    pub fn id(&self) -> &'a str {
149        self.1
150    }
151}
152
153impl<'a> Deref for Language<'a> {
154    type Target = str;
155
156    #[inline]
157    fn deref(&self) -> &Self::Target {
158        self.id()
159    }
160}
161
162/// Identifies a language from a [path extension].
163/// The casing of the extension does not affect the result.
164/// Returns `None` if the language was not identified.
165///
166/// Note that `from_path` does not check if the path exists,
167/// nor does it attempt to load the file.
168///
169/// *[See also `from_extension`][from_extension].*
170///
171/// [from_extension]: fn.from_extension.html
172/// [path extension]: https://doc.rust-lang.org/stable/std/path/struct.Path.html#method.extension
173///
174/// # Example
175///
176/// ```
177/// # use detect_lang::{from_path, Language};
178/// assert_eq!(from_path("foo.rs"), Some(Language("Rust", "rust")));
179/// assert_eq!(from_path("foo.md"), Some(Language("Markdown", "markdown")));
180/// assert_eq!(from_path("foo.cpp"), Some(Language("C++", "cpp")));
181/// assert_eq!(from_path("foo.unknown"), None);
182///
183/// // The case is ignored
184/// assert_eq!(from_path("foo.jSoN"), Some(Language("JSON", "json")));
185/// ```
186///
187/// # Unsupported Language
188///
189/// If a language is not supported, then feel free to submit an issue
190/// on the [issue tracker], or add it to [languages.rs] and submit
191/// a [pull request].
192///
193/// [issue tracker]: https://github.com/vallentin/detect-lang/issues
194/// [pull request]: https://github.com/vallentin/detect-lang/pulls
195/// [languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
196#[inline]
197pub fn from_path<P: AsRef<Path>>(path: P) -> Option<Language<'static>> {
198    if let Some(Some(ext)) = path.as_ref().extension().map(OsStr::to_str) {
199        from_extension(ext)
200    } else {
201        None
202    }
203}
204
205/// Identifies a language from a file extension.
206/// The casing of the extension does not affect the result.
207/// Returns `None` if the language was not identified.
208///
209/// *[See also `from_path`][from_path].*
210///
211/// If the extension is guaranteed to always be lowercase,
212/// then consider using [`from_lowercase_extension`] to avoid
213/// allocation and conversion to lowercase.
214///
215/// [from_path]: fn.from_path.html
216/// [`from_lowercase_extension`]: fn.from_lowercase_extension.html
217///
218/// # Example
219///
220/// ```
221/// # use detect_lang::{from_extension, Language};
222/// assert_eq!(from_extension("rs"), Some(Language("Rust", "rust")));
223/// assert_eq!(from_extension("md"), Some(Language("Markdown", "markdown")));
224/// assert_eq!(from_extension("cpp"), Some(Language("C++", "cpp")));
225/// assert_eq!(from_extension("unknown"), None);
226///
227/// // The case is ignored
228/// assert_eq!(from_extension("jSoN"), Some(Language("JSON", "json")));
229/// ```
230///
231/// # Unsupported Language
232///
233/// If a language is not supported, then feel free to submit an issue
234/// on the [issue tracker], or add it to [languages.rs] and submit
235/// a [pull request].
236///
237/// [issue tracker]: https://github.com/vallentin/detect-lang/issues
238/// [pull request]: https://github.com/vallentin/detect-lang/pulls
239/// [languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
240#[inline]
241pub fn from_extension<S: AsRef<str>>(extension: S) -> Option<Language<'static>> {
242    let ext = extension.as_ref().to_ascii_lowercase();
243    from_lowercase_extension(ext)
244}
245
246/// Identifies a language from a lowercase file extension.
247/// Returns `None` if the language was not identified.
248///
249/// If the extension is not guaranteed to always be lowercase,
250/// then use [`from_extension`] instead.
251///
252/// *[See also `from_path`][from_path].*
253///
254/// [from_path]: fn.from_path.html
255/// [`from_extension`]: fn.from_extension.html
256///
257/// # Example
258///
259/// ```
260/// # use detect_lang::{from_extension, from_lowercase_extension, Language};
261/// assert_eq!(from_lowercase_extension("rs"), Some(Language("Rust", "rust")));
262/// assert_eq!(from_lowercase_extension("md"), Some(Language("Markdown", "markdown")));
263/// assert_eq!(from_lowercase_extension("cpp"), Some(Language("C++", "cpp")));
264/// assert_eq!(from_lowercase_extension("unknown"), None);
265///
266/// // Use `from_extension` if casing should be ignored
267/// assert_eq!(from_lowercase_extension("jSoN"), None);
268/// assert_eq!(from_extension("jSoN"), Some(Language("JSON", "json")));
269/// ```
270///
271/// # Unsupported Language
272///
273/// If a language is not supported, then feel free to submit an issue
274/// on the [issue tracker], or add it to [languages.rs] and submit
275/// a [pull request].
276///
277/// [issue tracker]: https://github.com/vallentin/detect-lang/issues
278/// [pull request]: https://github.com/vallentin/detect-lang/pulls
279/// [languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
280#[inline]
281pub fn from_lowercase_extension<S: AsRef<str>>(extension: S) -> Option<Language<'static>> {
282    LANGUAGES
283        .binary_search_by_key(&extension.as_ref(), |&(ext, _)| ext)
284        .ok()
285        .map(|i| LANGUAGES[i].1)
286}