comment_parser/languages.rs
1use std::path::Path;
2
3use crate::syntax::SyntaxRule;
4
5use SyntaxRule::*;
6
7const C: [SyntaxRule; 3] = [
8 LineComment(b"//"),
9 BlockComment(b"/*", b"*/"),
10 String(b"\""),
11];
12
13const PYTHON: [SyntaxRule; 4] = [
14 LineComment(b"#"),
15 String(b"\"\"\""),
16 String(b"\""),
17 String(b"'"),
18];
19
20const RUST: [SyntaxRule; 5] = [
21 LineComment(b"//!"),
22 LineComment(b"///"),
23 LineComment(b"//"),
24 BlockComment(b"/*", b"*/"),
25 String(b"\""),
26];
27
28#[rustfmt::skip]
29const SHELL: [SyntaxRule; 3] = [
30 LineComment(b"#"),
31 String(b"\""),
32 String(b"'"),
33];
34
35// The array is sorted by the language name
36const SYNTAXES: [(&str, &[SyntaxRule]); 15] = [
37 ("c", &C),
38 ("cpp", &C),
39 ("css", &C),
40 ("glsl", &C),
41 ("java", &C),
42 ("javascript", &C),
43 ("json", &C),
44 ("jsonc", &C),
45 ("python", &PYTHON),
46 ("rust", &RUST),
47 ("scss", &C),
48 ("shell", &SHELL),
49 ("toml", &C),
50 ("typescript", &C),
51 ("yaml", &C),
52];
53
54/// Given a language name, get [syntax rules] for a predefined
55/// language included in the crate.
56/// Returns `None` if the language is not supported.
57///
58/// In the case of `None`, check the following:
59/// - The language `name` must be written in all lower case.
60/// - The language `name` must not use special symbols e.g. use `"cpp"` not `"c++"`.
61///
62/// If [syntax rules] for a language does not exist, then consider
63/// trying another language, which has similar syntax rules when
64/// it comes to comments and strings. For instance `c` vs `cpp` or
65/// `css` vs `scss`.
66///
67/// Click [here][languages] to see all predefined languages.
68///
69/// [languages]: ../src/comment_parser/languages.rs.html
70///
71/// # Example
72///
73/// ```
74/// use comment_parser::get_syntax;
75///
76/// assert!(get_syntax("rust").is_some());
77/// assert!(get_syntax("c").is_some());
78/// assert!(get_syntax("cpp").is_some());
79/// assert!(get_syntax("python").is_some());
80/// ```
81///
82/// # Custom Syntax Rules
83///
84/// Go to [`SyntaxRule`][syntax rules] for an example on defining
85/// custom syntax rules.
86///
87/// [syntax rules]: enum.SyntaxRule.html
88#[inline]
89pub fn get_syntax<S: AsRef<str>>(name: S) -> Option<&'static [SyntaxRule<'static>]> {
90 SYNTAXES
91 .binary_search_by_key(&name.as_ref(), |&(name, _)| name)
92 .ok()
93 .map(|i| SYNTAXES[i].1)
94}
95
96/// Given a [`Path`], get [syntax rules] for a predefined
97/// language included in the crate.
98/// The language is identified from the [path extension], and
99/// the casing of the extension does not affect the result.
100///
101/// [`Path`]: https://doc.rust-lang.org/stable/std/path/struct.Path.html
102/// [path extension]: https://doc.rust-lang.org/stable/std/path/struct.Path.html#method.extension
103///
104/// Note that `get_syntax_from_path` does not check if the path exists,
105/// nor does it attempt to load the file.
106///
107/// *[See also `get_syntax_from_extension`][get_syntax_from_extension].*
108///
109/// [get_syntax_from_extension]: fn.get_syntax_from_extension.html
110///
111/// # Supported Languages
112///
113/// If [syntax rules] for a language does not exist, then consider
114/// trying another language, which has similar syntax rules when
115/// it comes to comments and strings. For instance `c` vs `cpp` or
116/// `css` vs `scss`.
117///
118/// Click [here][crate-languages.rs] to see all predefined languages.
119///
120/// Go to [`SyntaxRule`][syntax rules] for an example on defining
121/// custom syntax rules.
122///
123/// # Example
124///
125/// ```
126/// # use comment_parser::get_syntax_from_path;
127/// assert!(get_syntax_from_path("foo.rs").is_ok());
128///
129/// assert!(get_syntax_from_path("foo.c").is_ok());
130/// assert!(get_syntax_from_path("foo.h").is_ok());
131///
132/// assert!(get_syntax_from_path("foo.cpp").is_ok());
133/// assert!(get_syntax_from_path("foo.hpp").is_ok());
134///
135/// assert!(get_syntax_from_path("foo.py").is_ok());
136/// ```
137///
138/// # Unsupported Syntax Rules
139///
140/// If you get [`UnsupportedLanguage`] that means
141/// the language was identified by [detect-lang], but [syntax rules] are not
142/// included and predefined in [comment-parser] for the language.
143///
144/// If [syntax rules] for a language does not exist then feel free to submit an issue
145/// on the [issue tracker][comment-parser-issues], or add it to [languages.rs][comment-parser-languages.rs]
146/// and submit a [pull request][comment-parser-pulls].
147///
148/// # Unknown Language
149///
150/// If you get [`UnknownLanguage`] that means the language is not supported,
151/// by the sister crate [detect-lang].
152/// Feel free to submit an issue on the [issue tracker][detect-lang-issues], or add it
153/// to [languages.rs][detect-lang-languages.rs] and submit a [pull request][detect-lang-pulls].
154///
155/// [syntax rules]: enum.SyntaxRule.html
156/// [`UnknownLanguage`]: enum.LanguageError.html#variant.UnknownLanguage
157/// [`UnsupportedLanguage`]: enum.LanguageError.html#variant.UnsupportedLanguage
158///
159/// [crate-languages.rs]: ../src/comment_parser/languages.rs.html
160///
161/// [detect-lang]: https://crates.io/crates/detect-lang
162/// [detect-lang-issues]: https://github.com/vallentin/detect-lang/issues
163/// [detect-lang-pulls]: https://github.com/vallentin/detect-lang/pulls
164/// [detect-lang-languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
165///
166/// [comment-parser]: https://crates.io/crates/comment-parser
167/// [comment-parser-issues]: https://github.com/vallentin/comment-parser/issues
168/// [comment-parser-pulls]: https://github.com/vallentin/comment-parser/pulls
169/// [comment-parser-languages.rs]: https://github.com/vallentin/comment-parser/blob/master/src/languages.rs
170#[inline]
171pub fn get_syntax_from_path<P: AsRef<Path>>(
172 path: P,
173) -> Result<&'static [SyntaxRule<'static>], LanguageError> {
174 if let Some(language) = detect_lang::from_path(path) {
175 get_syntax(language.id()).ok_or(LanguageError::UnsupportedLanguage)
176 } else {
177 Err(LanguageError::UnknownLanguage)
178 }
179}
180
181/// Given a file `extension`, get [syntax rules] for a predefined
182/// language included in the crate.
183/// The casing of the `extension` does not affect the result.
184///
185/// [`Path`]: https://doc.rust-lang.org/stable/std/path/struct.Path.html
186/// [path extension]: https://doc.rust-lang.org/stable/std/path/struct.Path.html#method.extension
187///
188/// *[See also `get_syntax_from_path`][get_syntax_from_path].*
189///
190/// [get_syntax_from_path]: fn.get_syntax_from_path.html
191///
192/// # Supported Languages
193///
194/// If [syntax rules] for a language does not exist, then consider
195/// trying another language, which has similar syntax rules when
196/// it comes to comments and strings. For instance `c` vs `cpp` or
197/// `css` vs `scss`.
198///
199/// Click [here][crate-languages.rs] to see all predefined languages.
200///
201/// Go to [`SyntaxRule`][syntax rules] for an example on defining
202/// custom syntax rules.
203///
204/// # Example
205///
206/// ```
207/// # use comment_parser::get_syntax_from_extension;
208/// assert!(get_syntax_from_extension("rs").is_ok());
209///
210/// assert!(get_syntax_from_extension("c").is_ok());
211/// assert!(get_syntax_from_extension("h").is_ok());
212///
213/// assert!(get_syntax_from_extension("cpp").is_ok());
214/// assert!(get_syntax_from_extension("hpp").is_ok());
215///
216/// assert!(get_syntax_from_extension("py").is_ok());
217/// ```
218///
219/// # Unsupported Syntax Rules
220///
221/// If you get [`UnsupportedLanguage`] that means
222/// the language was identified by [detect-lang], but [syntax rules] are not
223/// included and predefined in [comment-parser] for the language.
224///
225/// If [syntax rules] for a language does not exist then feel free to submit an issue
226/// on the [issue tracker][comment-parser-issues], or add it to [languages.rs][comment-parser-languages.rs]
227/// and submit a [pull request][comment-parser-pulls].
228///
229/// # Unknown Language
230///
231/// If you get [`UnknownLanguage`] that means the language is not supported,
232/// by the sister crate [detect-lang].
233/// Feel free to submit an issue on the [issue tracker][detect-lang-issues], or add it
234/// to [languages.rs][detect-lang-languages.rs] and submit a [pull request][detect-lang-pulls].
235///
236/// [syntax rules]: enum.SyntaxRule.html
237/// [`UnknownLanguage`]: enum.LanguageError.html#variant.UnknownLanguage
238/// [`UnsupportedLanguage`]: enum.LanguageError.html#variant.UnsupportedLanguage
239///
240/// [crate-languages.rs]: ../src/comment_parser/languages.rs.html
241///
242/// [detect-lang]: https://crates.io/crates/detect-lang
243/// [detect-lang-issues]: https://github.com/vallentin/detect-lang/issues
244/// [detect-lang-pulls]: https://github.com/vallentin/detect-lang/pulls
245/// [detect-lang-languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
246///
247/// [comment-parser]: https://crates.io/crates/comment-parser
248/// [comment-parser-issues]: https://github.com/vallentin/comment-parser/issues
249/// [comment-parser-pulls]: https://github.com/vallentin/comment-parser/pulls
250/// [comment-parser-languages.rs]: https://github.com/vallentin/comment-parser/blob/master/src/languages.rs
251#[inline]
252pub fn get_syntax_from_extension<S: AsRef<str>>(
253 extension: S,
254) -> Result<&'static [SyntaxRule<'static>], LanguageError> {
255 if let Some(language) = detect_lang::from_extension(extension) {
256 get_syntax(language.id()).ok_or(LanguageError::UnsupportedLanguage)
257 } else {
258 Err(LanguageError::UnknownLanguage)
259 }
260}
261
262/// `LanguageError` is an error that can be returned by
263/// [`get_syntax_from_path`] and [`get_syntax_from_extension`].
264///
265/// [`get_syntax_from_path`]: fn.get_syntax_from_path.html
266/// [`get_syntax_from_extension`]: fn.get_syntax_from_extension.html
267#[derive(Debug)]
268pub enum LanguageError {
269 /// The language could not be identified.
270 ///
271 /// -----
272 ///
273 /// If you get `UnknownLanguage` that means the language is not supported,
274 /// by the sister crate [detect-lang].
275 /// Feel free to submit an issue on the [issue tracker][detect-lang-issues], or add it
276 /// to [languages.rs][detect-lang-languages.rs] and submit a [pull request][detect-lang-pulls].
277 ///
278 /// [detect-lang]: https://crates.io/crates/detect-lang
279 /// [detect-lang-issues]: https://github.com/vallentin/detect-lang/issues
280 /// [detect-lang-pulls]: https://github.com/vallentin/detect-lang/pulls
281 /// [detect-lang-languages.rs]: https://github.com/vallentin/detect-lang/blob/master/src/languages.rs
282 UnknownLanguage,
283
284 /// The language was identified by [detect-lang], but [syntax rules] are not
285 /// included and predefined in [comment-parser] for the language.
286 ///
287 /// ### Supported Languages
288 ///
289 /// If [syntax rules] for a language does not exist, then consider
290 /// trying another language, which has similar syntax rules when
291 /// it comes to comments and strings. For instance `c` vs `cpp` or
292 /// `css` vs `scss`.
293 ///
294 /// Click [here][crate-languages.rs] to see all predefined languages.
295 ///
296 /// [crate-languages.rs]: ../src/comment_parser/languages.rs.html
297 ///
298 /// ### Custom Syntax Rules
299 ///
300 /// Go to [`SyntaxRule`][syntax rules] for an example on defining
301 /// custom syntax rules.
302 ///
303 /// [syntax rules]: enum.SyntaxRule.html
304 ///
305 /// -----
306 ///
307 /// If you implement syntax rules for an unsupported language, then feel free to submit
308 /// your `rules` on the [issue tracker][comment-parser-issues], or add it to
309 /// [languages.rs][comment-parser-languages.rs] and submit a [pull request][comment-parser-pulls].
310 ///
311 /// [detect-lang]: https://crates.io/crates/detect-lang
312 /// [comment-parser]: https://crates.io/crates/comment-parser
313 /// [comment-parser-issues]: https://github.com/vallentin/comment-parser/issues
314 /// [comment-parser-pulls]: https://github.com/vallentin/comment-parser/pulls
315 /// [comment-parser-languages.rs]: https://github.com/vallentin/comment-parser/blob/master/src/languages.rs
316 UnsupportedLanguage,
317}
318
319#[test]
320fn check_order() {
321 for (a, b) in SYNTAXES.iter().zip(SYNTAXES.iter().skip(1)) {
322 assert!(
323 a.0 < b.0,
324 "Syntaxes out of order - {:?} should come after {:?}",
325 a,
326 b,
327 );
328 }
329}