use fresh_languages::Language;
type LanguageId = &'static str;
const FISH: LanguageId = "fish";
const PERL: LanguageId = "perl";
const POWERSHELL: LanguageId = "powershell";
const TCL: LanguageId = "tcl";
const GROOVY: LanguageId = "groovy";
const ELIXIR: LanguageId = "elixir";
const R: LanguageId = "r";
const JULIA: LanguageId = "julia";
const NUSHELL: LanguageId = "nushell";
const DART: LanguageId = "dart";
pub(super) fn language_for_shebang(first_line: &str) -> Option<LanguageId> {
let rest = first_line.strip_prefix("#!")?;
let mut tokens = rest.split_whitespace();
let mut base = interpreter_basename(tokens.next()?);
if base == "env" {
base = loop {
let tok = tokens.next()?;
if tok.starts_with('-') || tok.contains('=') {
continue;
}
break interpreter_basename(tok);
};
}
language_for_interpreter(base)
}
fn interpreter_basename(token: &str) -> &str {
token.rsplit(['/', '\\']).next().unwrap_or(token)
}
fn language_for_interpreter(base: &str) -> Option<LanguageId> {
let lower = base.to_ascii_lowercase();
if let Some(lang) = interpreter_table(&lower) {
return Some(lang);
}
let stem = lower.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.');
if stem.len() != lower.len() && !stem.is_empty() {
return interpreter_table(stem);
}
None
}
fn interpreter_table(name: &str) -> Option<LanguageId> {
Some(match name {
"sh" | "bash" | "dash" | "ash" | "ksh" | "mksh" | "pdksh" | "zsh" => Language::Bash.id(),
"python" | "pypy" => Language::Python.id(),
"ruby" | "jruby" => Language::Ruby.id(),
"php" => Language::Php.id(),
"node" | "nodejs" => Language::JavaScript.id(),
"deno" | "bun" | "ts-node" | "tsx" => Language::TypeScript.id(),
"lua" | "luajit" => Language::Lua.id(),
"perl" => PERL,
"fish" => FISH,
"pwsh" | "powershell" => POWERSHELL,
"tcl" | "tclsh" | "wish" => TCL,
"groovy" => GROOVY,
"elixir" => ELIXIR,
"r" | "rscript" => R,
"julia" => JULIA,
"nu" => NUSHELL,
"dart" => DART,
_ => return None,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_interpreters_and_indirection() {
assert_eq!(language_for_shebang("#!/bin/sh\n"), Some("bash"));
assert_eq!(language_for_shebang("#! /bin/sh\n"), Some("bash"));
assert_eq!(language_for_shebang("#!/usr/bin/fish\n"), Some("fish"));
assert_eq!(language_for_shebang("#!/usr/bin/lua\n"), Some("lua"));
assert_eq!(
language_for_shebang("#!/usr/bin/pwsh\n"),
Some("powershell")
);
assert_eq!(
language_for_shebang("#!/usr/bin/env python3\n"),
Some("python")
);
assert_eq!(
language_for_shebang("#!/usr/bin/env -S deno run\n"),
Some("typescript")
);
assert_eq!(
language_for_shebang("#!/usr/bin/env FOO=bar ruby\n"),
Some("ruby")
);
assert_eq!(
language_for_shebang("#!/usr/bin/python3.11\n"),
Some("python")
);
assert_eq!(language_for_shebang("#!/usr/bin/env Rscript\n"), Some("r"));
assert_eq!(
language_for_shebang("#!/usr/bin/env node\n"),
Some("javascript")
);
assert_eq!(
language_for_shebang("#!/usr/bin/env elixir\n"),
Some("elixir")
);
assert_eq!(language_for_shebang("not a shebang\n"), None);
assert_eq!(language_for_shebang("#!/usr/bin/awk -f\n"), None);
assert_eq!(language_for_shebang("#!/usr/bin/env\n"), None);
}
}