1use std::borrow::Cow;
12use std::path::Path;
13
14#[derive(Debug, Clone, PartialEq, Eq, Hash)]
20pub struct LanguageId(pub Cow<'static, str>);
21
22impl LanguageId {
23 pub fn new(s: impl Into<Cow<'static, str>>) -> Self {
25 Self(s.into())
26 }
27
28 pub fn as_str(&self) -> &str {
30 &self.0
31 }
32}
33
34impl std::fmt::Display for LanguageId {
35 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36 f.write_str(&self.0)
37 }
38}
39
40impl AsRef<str> for LanguageId {
41 fn as_ref(&self) -> &str {
42 self.as_str()
43 }
44}
45
46fn lang_from_ext(ext: &str) -> Option<&'static str> {
52 match ext {
53 "rs" => Some("rust"),
54 "py" | "pyw" | "pyi" => Some("python"),
55 "ts" => Some("typescript"),
56 "tsx" => Some("typescriptreact"),
57 "js" | "mjs" | "cjs" => Some("javascript"),
58 "jsx" => Some("javascriptreact"),
59 "go" => Some("go"),
60 "c" | "h" => Some("c"),
61 "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "h++" => Some("cpp"),
62 "java" => Some("java"),
63 "cs" => Some("csharp"),
64 "rb" => Some("ruby"),
65 "lua" => Some("lua"),
66 "zig" => Some("zig"),
67 "toml" => Some("toml"),
68 "yaml" | "yml" => Some("yaml"),
69 "json" | "jsonc" => Some("json"),
70 "md" | "markdown" => Some("markdown"),
71 "sh" | "bash" | "zsh" => Some("shellscript"),
72 "fish" => Some("fish"),
73 "html" | "htm" => Some("html"),
74 "css" => Some("css"),
75 "scss" => Some("scss"),
76 "sql" => Some("sql"),
77 "xml" => Some("xml"),
78 "svelte" => Some("svelte"),
79 "vue" => Some("vue"),
80 "kt" | "kts" => Some("kotlin"),
81 "swift" => Some("swift"),
82 "r" => Some("r"),
83 "dart" => Some("dart"),
84 "ex" | "exs" => Some("elixir"),
85 "hs" => Some("haskell"),
86 "ml" | "mli" => Some("ocaml"),
87 "clj" | "cljs" => Some("clojure"),
88 "erl" | "hrl" => Some("erlang"),
89 "nim" => Some("nim"),
90 "tf" | "tfvars" => Some("terraform"),
91 "dockerfile" => Some("dockerfile"),
92 _ => None,
93 }
94}
95
96fn lang_from_mime(mime: &str) -> Option<&'static str> {
98 let mime = mime.split(';').next().unwrap_or(mime).trim();
100 match mime {
101 "text/rust" | "text/x-rust" => Some("rust"),
103 "text/x-python" | "text/x-python3" | "application/x-python-code" => Some("python"),
105 "application/json" | "text/json" => Some("json"),
107 "text/html" => Some("html"),
108 "text/css" => Some("css"),
109 "text/javascript" | "application/javascript" | "application/x-javascript" => {
110 Some("javascript")
111 }
112 "text/typescript" | "application/typescript" => Some("typescript"),
113 "text/markdown" | "text/x-markdown" => Some("markdown"),
115 "text/x-yaml" | "application/yaml" | "application/x-yaml" => Some("yaml"),
116 "application/toml" => Some("toml"),
117 "text/xml" | "application/xml" => Some("xml"),
118 "text/x-sh" | "application/x-sh" => Some("shellscript"),
120 "text/x-sql" | "application/sql" => Some("sql"),
122 _ => None,
123 }
124}
125
126pub fn detect_language(path: &Path) -> Option<LanguageId> {
146 if let Some(id) = path
148 .extension()
149 .and_then(|e| e.to_str())
150 .map(|e| e.to_ascii_lowercase())
151 .as_deref()
152 .and_then(lang_from_ext)
153 {
154 return Some(LanguageId(Cow::Borrowed(id)));
155 }
156
157 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
159 let lower = name.to_ascii_lowercase();
160 if let Some(id) = lang_from_ext(&lower) {
161 return Some(LanguageId(Cow::Borrowed(id)));
162 }
163 }
164
165 let mime = mime_guess::from_path(path).first_raw()?;
167 let id = lang_from_mime(mime)?;
168 Some(LanguageId(Cow::Borrowed(id)))
169}
170
171#[cfg(test)]
176mod tests {
177 use super::*;
178 use std::path::Path;
179
180 fn id(path: &str) -> Option<String> {
181 detect_language(Path::new(path)).map(|l| l.as_str().to_string())
182 }
183
184 #[test]
185 fn rust_extension() {
186 assert_eq!(id("main.rs"), Some("rust".into()));
187 }
188
189 #[test]
190 fn cpp_extensions() {
191 assert_eq!(id("foo.cpp"), Some("cpp".into()));
192 assert_eq!(id("foo.hpp"), Some("cpp".into()));
193 assert_eq!(id("foo.cc"), Some("cpp".into()));
194 }
195
196 #[test]
197 fn python_extension() {
198 assert_eq!(id("script.py"), Some("python".into()));
199 assert_eq!(id("types.pyi"), Some("python".into()));
200 }
201
202 #[test]
203 fn toml_extension() {
204 assert_eq!(id("Cargo.toml"), Some("toml".into()));
205 }
206
207 #[test]
208 fn yaml_extensions() {
209 assert_eq!(id("config.yaml"), Some("yaml".into()));
210 assert_eq!(id("config.yml"), Some("yaml".into()));
211 }
212
213 #[test]
214 fn unknown_extension_returns_none() {
215 assert_eq!(id("archive.tar"), None);
216 assert_eq!(id("image.png"), None);
217 assert_eq!(id("binary.exe"), None);
218 assert_eq!(id("no_extension"), None);
219 }
220
221 #[test]
222 fn case_insensitive_extension() {
223 assert_eq!(id("main.RS"), Some("rust".into()));
224 assert_eq!(id("main.Py"), Some("python".into()));
225 }
226
227 #[test]
228 fn dockerfile_bare_name() {
229 assert_eq!(id("Dockerfile"), Some("dockerfile".into()));
230 }
231
232 #[test]
233 fn language_id_display() {
234 let lang = LanguageId::new("rust");
235 assert_eq!(lang.to_string(), "rust");
236 assert_eq!(lang.as_str(), "rust");
237 }
238
239 #[test]
240 fn language_id_owned() {
241 let lang = LanguageId::new(String::from("my-lang"));
242 assert_eq!(lang.as_str(), "my-lang");
243 }
244
245 #[test]
246 fn language_id_equality() {
247 assert_eq!(LanguageId::new("rust"), LanguageId::new("rust"));
248 assert_ne!(LanguageId::new("rust"), LanguageId::new("python"));
249 }
250}