detect_lang/lib.rs
1//! Utility for identifying names of programming languages (and related files) from paths and file extensions.
2//!
3//! # Paths and Extensions
4//!
5//! Languages can be identified from paths using [`from_path`]
6//! or directly from extensions using [`from_extension`].
7//!
8//! [`from_path`]: fn.from_path.html
9//! [`from_extension`]: fn.from_extension.html
10//!
11//! ```
12//! use detect_lang::from_path;
13//! assert_eq!(from_path("foo.rs").unwrap().name(), "Rust");
14//! assert_eq!(from_path("foo.md").unwrap().name(), "Markdown");
15//!
16//! use detect_lang::from_extension;
17//! assert_eq!(from_extension("rs").unwrap().name(), "Rust");
18//! assert_eq!(from_extension("md").unwrap().name(), "Markdown");
19//!
20//! // The case is ignored
21//! assert_eq!(from_path("foo.jSoN").unwrap().name(), "JSON");
22//! assert_eq!(from_extension("jSoN").unwrap().name(), "JSON");
23//! ```
24//!
25//! # Language ID
26//!
27//! In short, the language [`id`](struct.Language.html#method.id)
28//! is a lowercase version of [`name`](struct.Language.html#method.name).
29//! However, it also replaces symbols making it usable as a [URL slug].
30//!
31//! For instance `foo.hpp` is identified as language name `C++` and
32//! language ID `cpp`.
33//!
34//! [URL slug]: https://en.wikipedia.org/wiki/Clean_URL#Slug
35//!
36//! ```
37//! use detect_lang::from_path;
38//! assert_eq!(from_path("foo.rs").unwrap().id(), "rust");
39//! assert_eq!(from_path("foo.cpp").unwrap().id(), "cpp");
40//! assert_eq!(from_path("foo.hpp").unwrap().id(), "cpp");
41//!
42//! use detect_lang::from_extension;
43//! assert_eq!(from_extension("rs").unwrap().id(), "rust");
44//! assert_eq!(from_extension("cpp").unwrap().id(), "cpp");
45//! assert_eq!(from_extension("hpp").unwrap().id(), "cpp");
46//!
47//! // The case is ignored
48//! assert_eq!(from_path("foo.jSoN").unwrap().id(), "json");
49//! assert_eq!(from_extension("jSoN").unwrap().id(), "json");
50//! ```
51//!
52//! # Always Lowercase
53//!
54//! If the extension is guaranteed to always be lowercase,
55//! then consider using [`from_lowercase_extension`] to avoid
56//! allocation and conversion to lowercase.
57//!
58//! [`from_lowercase_extension`]: fn.from_lowercase_extension.html
59//!
60//! ```
61//! # use detect_lang::{from_extension};
62//! use detect_lang::{from_lowercase_extension, Language};
63//!
64//! assert_eq!(from_lowercase_extension("json"), Some(Language("JSON", "json")));
65//! assert_eq!(from_lowercase_extension("jSoN"), None);
66//!
67//! assert_eq!(from_extension("json"), Some(Language("JSON", "json")));
68//! assert_eq!(from_extension("jSoN"), Some(Language("JSON", "json")));
69//! ```
70//!
71//! # Match Example
72//!
73//! ```
74//! use std::path::Path;
75//! use detect_lang::{from_path, Language};
76//!
77//! let path = Path::new("foo.rs");
78//! match from_path(path) {
79//! // Language(name, id)
80//! Some(Language(_, "rust")) => println!("This is Rust"),
81//! Some(Language(..)) => println!("Well it's not Rust"),
82//! None => println!("Ehh, what?"),
83//! }
84//! ```
85
86#![forbid(unsafe_code)]
87#![deny(missing_docs)]
88// #![deny(missing_doc_code_examples)]
89#![deny(missing_debug_implementations)]
90#![warn(clippy::all)]
91
92use std::ffi::OsStr;
93use std::ops::Deref;
94use std::path::Path;
95
96mod languages;
97
98use languages::LANGUAGES;
99
100/// Languages contain a name and an ID (`Language(name, id)`).
101#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Debug)]
102pub struct Language<'a>(pub &'a str, pub &'a str);
103
104impl<'a> Language<'a> {
105 /// Returns the name of the language.
106 ///
107 /// # Example
108 ///
109 /// ```
110 /// # use detect_lang::{from_path, from_extension};
111 /// assert_eq!(from_path("foo.rs").unwrap().name(), "Rust");
112 /// assert_eq!(from_path("foo.md").unwrap().name(), "Markdown");
113 ///
114 /// assert_eq!(from_extension("rs").unwrap().name(), "Rust");
115 /// assert_eq!(from_extension("md").unwrap().name(), "Markdown");
116 ///
117 /// // The case is ignored
118 /// assert_eq!(from_path("foo.jSoN").unwrap().name(), "JSON");
119 /// assert_eq!(from_extension("jSoN").unwrap().name(), "JSON");
120 /// ```
121 #[inline]
122 pub fn name(&self) -> &'a str {
123 self.0
124 }
125
126 /// Returns the ID of the language.
127 /// In most cases the language ID is just a lowercase version of the [`name`](#method.name).
128 ///
129 /// The ID is also usable as a [URL slug].
130 ///
131 /// [URL slug]: https://en.wikipedia.org/wiki/Clean_URL#Slug
132 ///
133 /// # Example
134 ///
135 /// ```
136 /// # use detect_lang::{from_path, from_extension};
137 /// assert_eq!(from_path("foo.rs").unwrap().id(), "rust");
138 /// assert_eq!(from_path("foo.md").unwrap().id(), "markdown");
139 ///
140 /// assert_eq!(from_extension("rs").unwrap().id(), "rust");
141 /// assert_eq!(from_extension("md").unwrap().id(), "markdown");
142 ///
143 /// // The case is ignored
144 /// assert_eq!(from_path("foo.jSoN").unwrap().id(), "json");
145 /// assert_eq!(from_extension("jSoN").unwrap().id(), "json");
146 /// ```
147 #[inline]
148 pub fn id(&self) -> &'a str {
149 self.1
150 }
151}
152
153impl<'a> Deref for Language<'a> {
154 type Target = str;
155
156 #[inline]
157 fn deref(&self) -> &Self::Target {
158 self.id()
159 }
160}
161
162/// Identifies a language from a [path extension].
163/// The casing of the extension does not affect the result.
164/// Returns `None` if the language was not identified.
165///
166/// Note that `from_path` does not check if the path exists,
167/// nor does it attempt to load the file.
168///
169/// *[See also `from_extension`][from_extension].*
170///
171/// [from_extension]: fn.from_extension.html
172/// [path extension]: https://doc.rust-lang.org/stable/std/path/struct.Path.html#method.extension
173///
174/// # Example
175///
176/// ```
177/// # use detect_lang::{from_path, Language};
178/// assert_eq!(from_path("foo.rs"), Some(Language("Rust", "rust")));
179/// assert_eq!(from_path("foo.md"), Some(Language("Markdown", "markdown")));
180/// assert_eq!(from_path("foo.cpp"), Some(Language("C++", "cpp")));
181/// assert_eq!(from_path("foo.unknown"), None);
182///
183/// // The case is ignored
184/// assert_eq!(from_path("foo.jSoN"), Some(Language("JSON", "json")));
185/// ```
186///
187/// # Unsupported Language
188///
189/// If a language is not supported, then feel free to submit an issue
190/// on the [issue tracker], or add it to [languages.rs] and submit
191/// a [pull request].
192///
193/// [issue tracker]: https://github.com/vallentin/detect-lang/issues
194/// [pull request]: https://github.com/vallentin/detect-lang/pulls
195/// [languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
196#[inline]
197pub fn from_path<P: AsRef<Path>>(path: P) -> Option<Language<'static>> {
198 if let Some(Some(ext)) = path.as_ref().extension().map(OsStr::to_str) {
199 from_extension(ext)
200 } else {
201 None
202 }
203}
204
205/// Identifies a language from a file extension.
206/// The casing of the extension does not affect the result.
207/// Returns `None` if the language was not identified.
208///
209/// *[See also `from_path`][from_path].*
210///
211/// If the extension is guaranteed to always be lowercase,
212/// then consider using [`from_lowercase_extension`] to avoid
213/// allocation and conversion to lowercase.
214///
215/// [from_path]: fn.from_path.html
216/// [`from_lowercase_extension`]: fn.from_lowercase_extension.html
217///
218/// # Example
219///
220/// ```
221/// # use detect_lang::{from_extension, Language};
222/// assert_eq!(from_extension("rs"), Some(Language("Rust", "rust")));
223/// assert_eq!(from_extension("md"), Some(Language("Markdown", "markdown")));
224/// assert_eq!(from_extension("cpp"), Some(Language("C++", "cpp")));
225/// assert_eq!(from_extension("unknown"), None);
226///
227/// // The case is ignored
228/// assert_eq!(from_extension("jSoN"), Some(Language("JSON", "json")));
229/// ```
230///
231/// # Unsupported Language
232///
233/// If a language is not supported, then feel free to submit an issue
234/// on the [issue tracker], or add it to [languages.rs] and submit
235/// a [pull request].
236///
237/// [issue tracker]: https://github.com/vallentin/detect-lang/issues
238/// [pull request]: https://github.com/vallentin/detect-lang/pulls
239/// [languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
240#[inline]
241pub fn from_extension<S: AsRef<str>>(extension: S) -> Option<Language<'static>> {
242 let ext = extension.as_ref().to_ascii_lowercase();
243 from_lowercase_extension(ext)
244}
245
246/// Identifies a language from a lowercase file extension.
247/// Returns `None` if the language was not identified.
248///
249/// If the extension is not guaranteed to always be lowercase,
250/// then use [`from_extension`] instead.
251///
252/// *[See also `from_path`][from_path].*
253///
254/// [from_path]: fn.from_path.html
255/// [`from_extension`]: fn.from_extension.html
256///
257/// # Example
258///
259/// ```
260/// # use detect_lang::{from_extension, from_lowercase_extension, Language};
261/// assert_eq!(from_lowercase_extension("rs"), Some(Language("Rust", "rust")));
262/// assert_eq!(from_lowercase_extension("md"), Some(Language("Markdown", "markdown")));
263/// assert_eq!(from_lowercase_extension("cpp"), Some(Language("C++", "cpp")));
264/// assert_eq!(from_lowercase_extension("unknown"), None);
265///
266/// // Use `from_extension` if casing should be ignored
267/// assert_eq!(from_lowercase_extension("jSoN"), None);
268/// assert_eq!(from_extension("jSoN"), Some(Language("JSON", "json")));
269/// ```
270///
271/// # Unsupported Language
272///
273/// If a language is not supported, then feel free to submit an issue
274/// on the [issue tracker], or add it to [languages.rs] and submit
275/// a [pull request].
276///
277/// [issue tracker]: https://github.com/vallentin/detect-lang/issues
278/// [pull request]: https://github.com/vallentin/detect-lang/pulls
279/// [languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
280#[inline]
281pub fn from_lowercase_extension<S: AsRef<str>>(extension: S) -> Option<Language<'static>> {
282 LANGUAGES
283 .binary_search_by_key(&extension.as_ref(), |&(ext, _)| ext)
284 .ok()
285 .map(|i| LANGUAGES[i].1)
286}