use crate::core_ir::UnifiedModule;
use crate::in_lang_parse;
use clap::ValueEnum;
use std::fmt;
use std::io::{BufRead, BufReader};
use std::path::Path;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ParserId {
In,
Icore,
C,
Cpp,
ObjC,
ObjCpp,
Java,
Kotlin,
Scala,
CSharp,
FSharp,
VbNet,
Python,
Ruby,
Php,
Perl,
JavaScript,
TypeScript,
Go,
V,
Rust,
Swift,
Zig,
Dart,
Lua,
Clojure,
Groovy,
Elixir,
Erlang,
Haskell,
OCaml,
Julia,
R,
Nim,
D,
Crystal,
Odin,
Hare,
}
impl ParserId {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
ParserId::In => "in",
ParserId::Icore => "icore",
ParserId::C => "c",
ParserId::Cpp => "cpp",
ParserId::ObjC => "objc",
ParserId::ObjCpp => "objc++",
ParserId::Java => "java",
ParserId::Kotlin => "kotlin",
ParserId::Scala => "scala",
ParserId::CSharp => "csharp",
ParserId::FSharp => "fsharp",
ParserId::VbNet => "vb",
ParserId::Python => "python",
ParserId::Ruby => "ruby",
ParserId::Php => "php",
ParserId::Perl => "perl",
ParserId::JavaScript => "javascript",
ParserId::TypeScript => "typescript",
ParserId::Go => "go",
ParserId::V => "v",
ParserId::Rust => "rust",
ParserId::Swift => "swift",
ParserId::Zig => "zig",
ParserId::Dart => "dart",
ParserId::Lua => "lua",
ParserId::Clojure => "clojure",
ParserId::Groovy => "groovy",
ParserId::Elixir => "elixir",
ParserId::Erlang => "erlang",
ParserId::Haskell => "haskell",
ParserId::OCaml => "ocaml",
ParserId::Julia => "julia",
ParserId::R => "r",
ParserId::Nim => "nim",
ParserId::D => "d",
ParserId::Crystal => "crystal",
ParserId::Odin => "odin",
ParserId::Hare => "hare",
}
}
#[must_use]
pub const fn family_label(self) -> &'static str {
match self {
ParserId::In => "inauguration .in",
ParserId::Icore => "JSON Core IR (icore)",
ParserId::C | ParserId::Cpp | ParserId::ObjC | ParserId::ObjCpp => "C-like",
ParserId::Java | ParserId::Kotlin | ParserId::Scala => "JVM / class-based",
ParserId::CSharp | ParserId::FSharp | ParserId::VbNet => ".NET",
ParserId::Python | ParserId::Ruby | ParserId::Php | ParserId::Perl => {
"dynamic OO / scripting"
}
ParserId::JavaScript | ParserId::TypeScript => "ECMAScript-shaped",
ParserId::Go | ParserId::V | ParserId::Rust | ParserId::Swift | ParserId::Zig => {
"systems / curly-brace"
}
ParserId::Dart | ParserId::Lua => "OO / embeddable",
ParserId::Clojure
| ParserId::Elixir
| ParserId::Erlang
| ParserId::Haskell
| ParserId::OCaml => "functional",
ParserId::Groovy => "JVM scripting",
ParserId::Julia | ParserId::R => "numeric / scientific",
ParserId::Nim | ParserId::D | ParserId::Crystal => "ALGOL-descended",
ParserId::Odin | ParserId::Hare => "systems / native",
}
}
}
#[must_use]
pub fn parser_id_from_extension(ext: &str) -> Option<ParserId> {
match ext {
"swift" => Some(ParserId::Swift),
"in" => Some(ParserId::In),
"icore" => Some(ParserId::Icore),
"c" | "h" => Some(ParserId::C),
"cc" | "cpp" | "cxx" | "hpp" | "hxx" | "hh" | "h++" | "ipp" => Some(ParserId::Cpp),
"m" => Some(ParserId::ObjC),
"mm" => Some(ParserId::ObjCpp),
"java" => Some(ParserId::Java),
"kt" | "kts" => Some(ParserId::Kotlin),
"scala" | "sc" => Some(ParserId::Scala),
"cs" => Some(ParserId::CSharp),
"fs" | "fsx" | "fsi" => Some(ParserId::FSharp),
"vb" => Some(ParserId::VbNet),
"py" | "pyi" | "pyw" => Some(ParserId::Python),
"rb" | "rake" | "gemspec" => Some(ParserId::Ruby),
"php" | "phtml" => Some(ParserId::Php),
"pl" | "pm" => Some(ParserId::Perl),
"js" | "mjs" | "cjs" | "jsx" => Some(ParserId::JavaScript),
"ts" | "tsx" | "mts" | "cts" => Some(ParserId::TypeScript),
"go" => Some(ParserId::Go),
"v" => Some(ParserId::V),
"rs" => Some(ParserId::Rust),
"zig" => Some(ParserId::Zig),
"dart" => Some(ParserId::Dart),
"lua" => Some(ParserId::Lua),
"clj" | "cljs" | "cljc" => Some(ParserId::Clojure),
"groovy" => Some(ParserId::Groovy),
"ex" | "exs" => Some(ParserId::Elixir),
"erl" | "hrl" => Some(ParserId::Erlang),
"hs" | "lhs" => Some(ParserId::Haskell),
"ml" | "mli" => Some(ParserId::OCaml),
"jl" => Some(ParserId::Julia),
"r" => Some(ParserId::R),
"nim" => Some(ParserId::Nim),
"d" => Some(ParserId::D),
"cr" => Some(ParserId::Crystal),
"odin" => Some(ParserId::Odin),
"ha" => Some(ParserId::Hare),
_ => None,
}
}
#[must_use]
pub fn parser_id_from_magic_token(token: &str) -> Option<ParserId> {
let t = token.trim();
parser_id_from_extension(&t.to_ascii_lowercase()).or_else(|| {
match t.to_ascii_lowercase().as_str() {
"objc" | "objective-c" => Some(ParserId::ObjC),
"objc++" | "objcpp" => Some(ParserId::ObjCpp),
"csharp" => Some(ParserId::CSharp),
"fsharp" => Some(ParserId::FSharp),
"kotlin" => Some(ParserId::Kotlin),
"typescript" => Some(ParserId::TypeScript),
"javascript" => Some(ParserId::JavaScript),
"vlang" => Some(ParserId::V),
"cplusplus" | "c++" => Some(ParserId::Cpp),
"ocaml" => Some(ParserId::OCaml),
"icore" => Some(ParserId::Icore),
_ => None,
}
})
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum)]
pub enum ParserCli {
#[default]
Auto,
In,
Icore,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ResolvedBuildParser {
CoreIr(ParserId),
Swift,
}
fn env_core_ir_parser_override() -> Option<ParserId> {
let s = std::env::var("IN_PARSER").ok()?;
if s.eq_ignore_ascii_case("in") {
Some(ParserId::In)
} else if s.eq_ignore_ascii_case("icore") {
Some(ParserId::Icore)
} else {
None
}
}
const MAGIC_SHEBANG_PREFIX: &str = "#!in ";
const MAGIC_PARSER_KEY: &str = "parser=";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MagicParserDirective {
ForceIn,
DeferAuto,
UseParser(ParserId),
}
fn read_magic_parser_directive(path: &Path) -> Option<MagicParserDirective> {
if !path.is_file() {
return None;
}
let file = std::fs::File::open(path).ok()?;
let mut reader = BufReader::new(file);
let mut line = String::new();
reader.read_line(&mut line).ok()?;
parse_magic_parser_first_line(&line)
}
fn parse_magic_parser_first_line(line: &str) -> Option<MagicParserDirective> {
let s = line.trim_end_matches(['\r', '\n']);
let rest = s.strip_prefix(MAGIC_SHEBANG_PREFIX)?;
let value = rest.strip_prefix(MAGIC_PARSER_KEY)?;
if value == "in" {
return Some(MagicParserDirective::ForceIn);
}
if value == "auto" {
return Some(MagicParserDirective::DeferAuto);
}
let id = parser_id_from_magic_token(value)?;
Some(MagicParserDirective::UseParser(id))
}
pub fn resolve_parser_id(path: &Path, cli: ParserCli) -> ResolvedBuildParser {
match cli {
ParserCli::In => ResolvedBuildParser::CoreIr(ParserId::In),
ParserCli::Icore => ResolvedBuildParser::CoreIr(ParserId::Icore),
ParserCli::Auto => {
if let Some(m) = read_magic_parser_directive(path) {
match m {
MagicParserDirective::ForceIn => {
return ResolvedBuildParser::CoreIr(ParserId::In);
}
MagicParserDirective::UseParser(ParserId::In) => {
return ResolvedBuildParser::CoreIr(ParserId::In);
}
MagicParserDirective::UseParser(id) => {
return ResolvedBuildParser::CoreIr(id);
}
MagicParserDirective::DeferAuto => {}
}
}
if let Some(id) = env_core_ir_parser_override() {
return ResolvedBuildParser::CoreIr(id);
}
if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
let el = ext.to_ascii_lowercase();
if let Some(id) = parser_id_from_extension(&el) {
return ResolvedBuildParser::CoreIr(id);
}
}
ResolvedBuildParser::CoreIr(ParserId::Icore)
}
}
}
#[derive(Debug)]
pub enum ParserRegistryError {
Msg(String),
}
impl fmt::Display for ParserRegistryError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParserRegistryError::Msg(s) => write!(f, "{s}"),
}
}
}
impl std::error::Error for ParserRegistryError {}
pub trait SourceParser {
fn parse_to_core(&self, path: &Path) -> Result<UnifiedModule, ParserRegistryError>;
}
#[derive(Debug, Default, Clone, Copy)]
pub struct InLangParser;
impl SourceParser for InLangParser {
fn parse_to_core(&self, path: &Path) -> Result<UnifiedModule, ParserRegistryError> {
in_lang_parse::parse_in_file(path).map_err(ParserRegistryError::Msg)
}
}
pub fn parse_with_resolved(
resolved: ResolvedBuildParser,
path: &Path,
) -> Result<Option<UnifiedModule>, ParserRegistryError> {
match resolved {
ResolvedBuildParser::Swift => Ok(None),
ResolvedBuildParser::CoreIr(ParserId::In) => InLangParser.parse_to_core(path).map(Some),
ResolvedBuildParser::CoreIr(ParserId::Icore) => {
crate::compiler::icore::parse_icore_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
ResolvedBuildParser::CoreIr(ParserId::Rust) => {
crate::compiler::rust_front::parse_rust_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
ResolvedBuildParser::CoreIr(ParserId::Nim) => {
crate::compiler::nim_boundary::parse_nim_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
ResolvedBuildParser::CoreIr(ParserId::Odin) => {
crate::compiler::odin_boundary::parse_odin_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
ResolvedBuildParser::CoreIr(ParserId::Hare) => {
crate::compiler::hare_boundary::parse_hare_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
ResolvedBuildParser::CoreIr(ParserId::D) => crate::compiler::d_boundary::parse_d_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some),
ResolvedBuildParser::CoreIr(ParserId::Crystal) => {
crate::compiler::crystal_boundary::parse_crystal_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
ResolvedBuildParser::CoreIr(ParserId::Clojure) => {
crate::compiler::clojure_boundary::parse_clojure_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
ResolvedBuildParser::CoreIr(ParserId::VbNet) => {
crate::compiler::vb_boundary::parse_vb_file(path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
ResolvedBuildParser::CoreIr(id) => {
crate::compiler::tree_front::parse_polyglot_file(id, path)
.map_err(ParserRegistryError::Msg)
.map(Some)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
use std::sync::Mutex;
static ENV_LOCK: Mutex<()> = Mutex::new(());
fn temp_file_path(suffix: &str) -> std::path::PathBuf {
std::env::temp_dir().join(format!(
"inauguration-parser-registry-{}-{}-{suffix}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
))
}
#[test]
fn parse_magic_first_line_recognizes_in_and_auto() {
assert_eq!(
parse_magic_parser_first_line("#!in parser=in\n"),
Some(MagicParserDirective::ForceIn)
);
assert_eq!(
parse_magic_parser_first_line("#!in parser=auto\r\n"),
Some(MagicParserDirective::DeferAuto)
);
assert_eq!(
parse_magic_parser_first_line("#!in parser=java\n"),
Some(MagicParserDirective::UseParser(ParserId::Java))
);
assert_eq!(
parse_magic_parser_first_line("#!in parser=icore\n"),
Some(MagicParserDirective::UseParser(ParserId::Icore))
);
assert_eq!(parse_magic_parser_first_line("#!in parser=nope\n"), None);
assert_eq!(parse_magic_parser_first_line("#!/usr/bin/env in\n"), None);
}
#[test]
fn auto_resolves_in_extension() {
assert!(matches!(
resolve_parser_id(Path::new("hello.in"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::In)
));
}
#[test]
fn auto_resolves_java_extension_to_core_ir() {
assert!(matches!(
resolve_parser_id(Path::new("Foo.java"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::Java)
));
}
#[test]
fn auto_resolves_v_extension_to_core_ir() {
assert!(matches!(
resolve_parser_id(Path::new("main.v"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::V)
));
}
#[test]
fn auto_resolves_go_extension_to_core_ir() {
assert!(matches!(
resolve_parser_id(Path::new("main.go"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::Go)
));
}
#[test]
fn auto_resolves_icore_extension() {
assert!(matches!(
resolve_parser_id(Path::new("module.icore"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::Icore)
));
}
#[test]
fn parser_cli_icore_forces_icore() {
assert!(matches!(
resolve_parser_id(Path::new("anything.swift"), ParserCli::Icore),
ResolvedBuildParser::CoreIr(ParserId::Icore)
));
}
#[test]
fn auto_swift_for_swift() {
assert!(matches!(
resolve_parser_id(Path::new("App.swift"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::Swift)
));
}
#[test]
fn auto_cpp_for_cc() {
assert!(matches!(
resolve_parser_id(Path::new("lib.cc"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::Cpp)
));
}
#[test]
fn auto_resolves_ocaml_odin_and_hare_extensions() {
assert!(matches!(
resolve_parser_id(Path::new("main.ml"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::OCaml)
));
assert!(matches!(
resolve_parser_id(Path::new("main.odin"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::Odin)
));
assert!(matches!(
resolve_parser_id(Path::new("main.ha"), ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::Hare)
));
}
#[test]
fn ocaml_front_parses_polyglot_sample_shape() {
let path = temp_file_path("sample.ml");
std::fs::write(
&path,
"let helper value = value\n\nlet main () = ignore (helper 1)\n",
)
.expect("write temp");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::OCaml), &path)
.expect("parse")
.expect("module");
let _ = std::fs::remove_file(&path);
assert!(
m.decls.iter().any(
|d| matches!(d, crate::core_ir::Decl::Function { name, .. } if name == "helper")
)
);
assert!(
m.decls.iter().any(
|d| matches!(d, crate::core_ir::Decl::Function { name, .. } if name == "main")
)
);
}
#[test]
fn magic_parser_in_overrides_non_in_extension() {
let path = temp_file_path("magic.swift");
std::fs::write(&path, "#!in parser=in\nfn main() -> void\n").expect("write temp");
assert!(matches!(
resolve_parser_id(&path, ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::In)
));
let _ = std::fs::remove_file(&path);
}
#[test]
fn magic_parser_auto_defers_to_in_parser_env() {
let _lock = ENV_LOCK.lock().expect("env lock");
let path = temp_file_path("defer.swift");
std::fs::write(&path, "#!in parser=auto\nfn main() -> void\n").expect("write temp");
unsafe {
std::env::set_var("IN_PARSER", "in");
}
let resolved = resolve_parser_id(&path, ParserCli::Auto);
unsafe {
std::env::remove_var("IN_PARSER");
}
let _ = std::fs::remove_file(&path);
assert!(matches!(
resolved,
ResolvedBuildParser::CoreIr(ParserId::In)
));
}
#[test]
fn magic_parser_auto_defers_to_dot_in_extension() {
let path = temp_file_path("defer.in");
std::fs::write(&path, "#!in parser=auto\nfn main() -> void\n").expect("write temp");
assert!(matches!(
resolve_parser_id(&path, ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::In)
));
let _ = std::fs::remove_file(&path);
}
#[test]
fn unknown_magic_parser_value_falls_through_to_extension() {
let _lock = ENV_LOCK.lock().expect("env lock");
unsafe {
std::env::remove_var("IN_PARSER");
}
let path = temp_file_path("unknown.xyz");
std::fs::write(&path, "#!in parser=nope\n").expect("write temp");
assert!(matches!(
resolve_parser_id(&path, ParserCli::Auto),
ResolvedBuildParser::CoreIr(ParserId::Icore)
));
let _ = std::fs::remove_file(&path);
}
#[test]
fn polyglot_java_class_without_methods_errors() {
let path = temp_file_path("empty.java");
std::fs::write(&path, "class X {}\n").expect("write temp");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::Java), &path)
.expect("parse")
.expect("module");
assert!(
m.decls
.iter()
.any(|d| matches!(d, crate::core_ir::Decl::Class { name, .. } if name == "X"))
);
let _ = std::fs::remove_file(&path);
}
#[test]
fn clojure_boundary_front_parses_polyglot_sample_shape() {
let path = temp_file_path("sample.clj");
std::fs::write(&path, "(defn answer [] 42)\n(defn main [] nil)\n").expect("write temp");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::Clojure), &path)
.expect("parse")
.expect("module");
let _ = std::fs::remove_file(&path);
assert!(
m.decls.iter().any(
|d| matches!(d, crate::core_ir::Decl::Function { name, .. } if name == "answer")
)
);
}
#[test]
fn polyglot_java_static_void_main_ok() {
let path = temp_file_path("entry.java");
std::fs::write(
&path,
"class X { public static void main(String[] a) { } }\n",
)
.expect("write temp");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::Java), &path)
.expect("parse")
.expect("module");
let _ = std::fs::remove_file(&path);
assert!(
m.decls
.iter()
.any(|d| matches!(d, crate::core_ir::Decl::Class { name, .. } if name == "X")),
"expected class X, got {:?}",
m.decls
);
let class_x = m
.decls
.iter()
.find_map(|d| match d {
crate::core_ir::Decl::Class { methods, .. } => Some(methods),
_ => None,
})
.expect("class X methods");
assert!(
class_x.iter().any(
|d| matches!(d, crate::core_ir::Decl::Function { name, .. } if name == "main")
),
"expected main method in class"
);
}
#[test]
fn v_front_parses_main_function() {
let path = temp_file_path("main.v");
std::fs::write(
&path,
"module main\nfn main() {\n x := 1\n return\n}\n",
)
.expect("write temp");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::V), &path)
.expect("parse")
.expect("module");
let _ = std::fs::remove_file(&path);
assert!(
m.decls.iter().any(
|d| matches!(d, crate::core_ir::Decl::Function { name, .. } if name == "main")
)
);
}
#[test]
fn nim_boundary_front_parses_polyglot_sample_shape() {
let path = temp_file_path("sample.nim");
std::fs::write(&path, "proc answer(): int = 42\n\nproc main() = discard\n")
.expect("write temp");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::Nim), &path)
.expect("parse")
.expect("module");
let _ = std::fs::remove_file(&path);
assert!(
m.decls.iter().any(
|d| matches!(d, crate::core_ir::Decl::Function { name, .. } if name == "answer")
)
);
}
#[test]
fn odin_boundary_front_parses_polyglot_sample_shape() {
let path = temp_file_path("sample.odin");
std::fs::write(
&path,
"package main\n\nanswer :: proc() -> int {\n\treturn 42\n}\n\nmain :: proc() {}\n",
)
.expect("write temp");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::Odin), &path)
.expect("parse")
.expect("module");
let _ = std::fs::remove_file(&path);
assert!(
m.decls.iter().any(
|d| matches!(d, crate::core_ir::Decl::Function { name, .. } if name == "answer")
)
);
}
#[test]
fn go_front_parses_main_function() {
let path = temp_file_path("main.go");
std::fs::write(
&path,
"package main\nfunc main() {\n x := 1\n return\n}\n",
)
.expect("write temp");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::Go), &path)
.expect("parse")
.expect("module");
let _ = std::fs::remove_file(&path);
assert!(
m.decls.iter().any(
|d| matches!(d, crate::core_ir::Decl::Function { name, .. } if name == "main")
)
);
}
#[test]
fn parse_with_resolved_reads_minimal_in_file() {
let path = temp_file_path("hello.in");
std::fs::write(&path, "fn main() -> void\n").expect("write temp .in");
let m = parse_with_resolved(ResolvedBuildParser::CoreIr(ParserId::In), &path)
.expect("parse")
.expect("module");
let _ = std::fs::remove_file(&path);
assert!(!m.decls.is_empty());
}
}