use lazy_static::lazy_static;
use regex::Regex;
use std::{borrow::Borrow, ffi::OsStr, path::Path};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Language {
Bash,
C,
Clojure,
CommonLisp,
CPlusPlus,
CSharp,
Css,
Elixir,
EmacsLisp,
Go,
Haskell,
Java,
JavaScript,
Json,
Jsx,
OCaml,
OCamlInterface,
Php,
Python,
Ruby,
Rust,
Scala,
Tsx,
TypeScript,
}
use Language::*;
pub fn guess(path: &Path, src: &str) -> Option<Language> {
if let Some(lang) = from_emacs_mode_header(src) {
return Some(lang);
}
if let Some(lang) = from_shebang(src) {
return Some(lang);
}
if let Some(lang) = from_name(path) {
return Some(lang);
}
match path.extension() {
Some(extension) => from_extension(extension, src),
None => None,
}
}
fn from_emacs_mode_header(src: &str) -> Option<Language> {
lazy_static! {
static ref MODE_RE: Regex = Regex::new(r"-\*-.*mode:([^;]+?);.*-\*-").unwrap();
static ref SHORTHAND_RE: Regex = Regex::new(r"-\*-(.+)-\*-").unwrap();
}
for line in src.lines().take(2) {
let mode_name: String = match (MODE_RE.captures(line), SHORTHAND_RE.captures(line)) {
(Some(cap), _) => cap[1].into(),
(_, Some(cap)) => cap[1].into(),
_ => "".into(),
};
let lang = match mode_name.to_ascii_lowercase().trim().borrow() {
"c" => Some(C),
"clojure" => Some(Clojure),
"csharp" => Some(CSharp),
"css" => Some(Css),
"c++" => Some(CPlusPlus),
"elixir" => Some(Elixir),
"emacs-lisp" => Some(EmacsLisp),
"go" => Some(Go),
"haskell" => Some(Haskell),
"java" => Some(Java),
"js" | "js2" => Some(JavaScript),
"lisp" => Some(CommonLisp),
"python" => Some(Python),
"rjsx" => Some(Jsx),
"ruby" => Some(Ruby),
"rust" => Some(Rust),
"scala" => Some(Scala),
"sh" => Some(Bash),
"tuareg" => Some(OCaml),
"typescript" => Some(TypeScript),
_ => None,
};
if lang.is_some() {
return lang;
}
}
None
}
fn from_shebang(src: &str) -> Option<Language> {
lazy_static! {
static ref RE: Regex = Regex::new(r"#!(?:/usr/bin/env )?([^ ]+)").unwrap();
}
if let Some(first_line) = src.lines().next() {
if let Some(cap) = RE.captures(first_line) {
let interpreter_path = Path::new(&cap[1]);
if let Some(name) = interpreter_path.file_name() {
match name.to_string_lossy().borrow() {
"ash" | "bash" | "dash" | "ksh" | "mksh" | "pdksh" | "rc" | "sh" | "zsh" => {
return Some(Bash)
}
"tcc" => return Some(C),
"lisp" | "sbc" | "ccl" | "clisp" | "ecl" => return Some(CommonLisp),
"elixir" => return Some(Elixir),
"runghc" | "runhaskell" | "runhugs" => return Some(Haskell),
"chakra" | "d8" | "gjs" | "js" | "node" | "nodejs" | "qjs" | "rhino" | "v8"
| "v8-shell" => return Some(JavaScript),
"ocaml" | "ocamlrun" | "ocamlscript" => return Some(OCaml),
"python" | "python2" | "python3" => return Some(Python),
"ruby" | "macruby" | "rake" | "jruby" | "rbx" => return Some(Ruby),
"deno" | "ts-node" => return Some(TypeScript),
_ => {}
}
}
}
}
None
}
fn from_name(path: &Path) -> Option<Language> {
match path.file_name() {
Some(name) => match name.to_string_lossy().borrow() {
".bash_aliases" | ".bash_history" | ".bash_logout" | ".bash_profile" | ".bashrc"
| ".cshrc" | ".env" | ".env.example" | ".flaskenv" | ".kshrc" | ".login"
| ".profile" | ".zlogin" | ".zlogout" | ".zprofile" | ".zshenv" | ".zshrc" | "9fs"
| "PKGBUILD" | "bash_aliases" | "bash_logout" | "bash_profile" | "bashrc" | "cshrc"
| "gradlew" | "kshrc" | "login" | "man" | "profile" | "zlogin" | "zlogout"
| "zprofile" | "zshenv" | "zshrc" => Some(Bash),
".emacs" | "_emacs" | "Cask" => Some(EmacsLisp),
"TARGETS" | "BUCK" | "DEPS" => Some(Python),
"Gemfile" | "Rakefile" => Some(Ruby),
_ => None,
},
None => None,
}
}
fn from_extension(extension: &OsStr, src: &str) -> Option<Language> {
match extension.to_string_lossy().borrow() {
"sh" | "bash" | "bats" | "cgi" | "command" | "env" | "fcgi" | "ksh" | "sh.in" | "tmux"
| "tool" | "zsh" => Some(Bash),
"c" => Some(C),
"cc" | "cpp" | "h" | "hh" | "hpp" | "cxx" => Some(CPlusPlus),
"bb" | "boot" | "clj" | "cljc" | "clje" | "cljs" | "cljx" | "edn" | "joke" | "joker" => {
Some(Clojure)
}
"lisp" | "lsp" | "asd" => Some(CommonLisp),
"cs" => Some(CSharp),
"css" => Some(Css),
"el" => Some(EmacsLisp),
"ex" | "exs" => Some(Elixir),
"go" => Some(Go),
"hs" => Some(Haskell),
"java" => Some(Java),
"cjs" | "js" | "mjs" => Some(JavaScript),
"jsx" => Some(Jsx),
"json" => Some(Json),
"ml" => Some(OCaml),
"mli" => Some(OCamlInterface),
"php" if !src.starts_with("<?hh") => Some(Php),
"py" | "py3" | "pyi" | "bzl" => Some(Python),
"rb" | "builder" | "spec" | "rake" => Some(Ruby),
"rs" => Some(Rust),
"scala" | "sbt" | "sc" => Some(Scala),
"ts" => Some(TypeScript),
"tsx" => Some(Tsx),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_guess_by_extension() {
let path = Path::new("foo.el");
assert_eq!(guess(path, ""), Some(EmacsLisp));
}
#[test]
fn test_guess_by_whole_name() {
let path = Path::new("foo/.bashrc");
assert_eq!(guess(path, ""), Some(Bash));
}
#[test]
fn test_guess_by_shebang() {
let path = Path::new("foo");
assert_eq!(guess(path, "#!/bin/bash"), Some(Bash));
}
#[test]
fn test_guess_by_env_shebang() {
let path = Path::new("foo");
assert_eq!(guess(path, "#!/usr/bin/env python"), Some(Python));
}
#[test]
fn test_guess_by_emacs_mode() {
let path = Path::new("foo");
assert_eq!(
guess(path, "; -*- mode: Lisp; eval: (auto-fill-mode 1); -*-"),
Some(CommonLisp)
);
}
#[test]
fn test_guess_by_emacs_mode_second_line() {
let path = Path::new("foo");
assert_eq!(
guess(path, "!#/bin/bash\n; -*- mode: Lisp; -*-"),
Some(CommonLisp)
);
}
#[test]
fn test_guess_by_emacs_mode_shorthand() {
let path = Path::new("foo");
assert_eq!(guess(path, "(* -*- tuareg -*- *)"), Some(OCaml));
}
#[test]
fn test_guess_by_emacs_mode_shorthand_no_spaces() {
let path = Path::new("foo");
assert_eq!(guess(path, "# -*-python-*-"), Some(Python));
}
#[test]
fn test_guess_unknown() {
let path = Path::new("jfkdlsjfkdsljfkdsljf");
assert_eq!(guess(path, ""), None);
}
}