use std::fmt::Write;
use std::path::PathBuf;
use crate::codegen::rust;
use crate::codegen::EmittedFile;
use crate::lowering::StateTable;
pub fn emit(st: &StateTable) -> Vec<EmittedFile> {
let rust_file = rust::emit(st).into_iter().next().unwrap().contents;
let name = if st.grammar_name.is_empty() {
"parser".to_string()
} else {
st.grammar_name.clone()
};
let tokens: Vec<String> = st.tokens.iter().map(|t| t.name.clone()).collect();
let rules: Vec<String> = st.entry_states.iter().map(|(n, _)| n.clone()).collect();
let lib = build_lib_rs(&rust_file, &name, &st.rule_kinds, &tokens, &rules);
let cargo = build_cargo_toml(&name);
let pyproject = build_pyproject(&name);
vec![
EmittedFile {
path: PathBuf::from("src/lib.rs"),
contents: lib,
},
EmittedFile {
path: PathBuf::from("Cargo.toml"),
contents: cargo,
},
EmittedFile {
path: PathBuf::from("pyproject.toml"),
contents: pyproject,
},
]
}
fn build_lib_rs(
rust_file: &str,
name: &str,
rule_kinds: &[String],
tokens: &[String],
rules: &[String],
) -> String {
let mut out = String::new();
writeln!(
&mut out,
"//! Generated by parsuna — do not edit by hand."
)
.unwrap();
writeln!(&mut out, "//!").unwrap();
writeln!(
&mut out,
"//! This crate compiles to a Python extension module exposing the"
)
.unwrap();
writeln!(
&mut out,
"//! grammar's parser. From Python: `import <name>; p = <name>.parse_<rule>(src)`"
)
.unwrap();
writeln!(&mut out, "//! then iterate `p` to receive Event objects.").unwrap();
writeln!(&mut out).unwrap();
for line in rust_file.lines() {
if line.starts_with("#![") {
continue;
}
writeln!(&mut out, "{}", line).unwrap();
}
writeln!(&mut out).unwrap();
out.push_str(PY_BINDINGS_CORE);
writeln!(&mut out).unwrap();
writeln!(
&mut out,
"#[pyo3::pyclass(frozen, module = \"{}\", name = \"RuleKind\")]",
name
)
.unwrap();
writeln!(&mut out, "struct PyRuleKind;").unwrap();
writeln!(&mut out, "#[pyo3::pymethods]").unwrap();
writeln!(&mut out, "impl PyRuleKind {{").unwrap();
for n in rule_kinds {
let p = crate::codegen::common::pascal(n);
writeln!(
&mut out,
" #[classattr] pub const {}: u16 = RuleKind::{}.id();",
p, p
)
.unwrap();
}
writeln!(&mut out, " #[staticmethod]").unwrap();
writeln!(&mut out, " fn name(kind: u16) -> &'static str {{").unwrap();
writeln!(&mut out, " match kind {{").unwrap();
for (i, n) in rule_kinds.iter().enumerate() {
writeln!(&mut out, " {} => \"{}\",", i, n).unwrap();
}
writeln!(&mut out, " _ => \"?\",").unwrap();
writeln!(&mut out, " }}").unwrap();
writeln!(&mut out, " }}").unwrap();
writeln!(&mut out, "}}").unwrap();
writeln!(&mut out).unwrap();
writeln!(
&mut out,
"#[pyo3::pyclass(frozen, module = \"{}\", name = \"TokenKind\")]",
name
)
.unwrap();
writeln!(&mut out, "struct PyTokenKind;").unwrap();
writeln!(&mut out, "#[pyo3::pymethods]").unwrap();
writeln!(&mut out, "impl PyTokenKind {{").unwrap();
for n in tokens {
let p = crate::codegen::common::pascal(n);
writeln!(
&mut out,
" #[classattr] pub const {}: i16 = TokenKind::{} as i16;",
p, p
)
.unwrap();
}
writeln!(
&mut out,
" #[classattr] pub const EOF: i16 = parsuna_rt::TOKEN_EOF;"
)
.unwrap();
writeln!(
&mut out,
" #[classattr] pub const ERROR: i16 = parsuna_rt::TOKEN_ERROR;"
)
.unwrap();
writeln!(&mut out, "}}").unwrap();
writeln!(&mut out).unwrap();
for r in rules {
writeln!(
&mut out,
"/// Parse the `{}` rule from a string and return a [`PyParser`].",
r
)
.unwrap();
writeln!(&mut out, "#[pyo3::pyfunction(name = \"parse_{}\")]", r).unwrap();
writeln!(&mut out, "fn parse_{}_py(src: &str) -> PyParser {{", r).unwrap();
writeln!(&mut out, " let lex = parsuna_rt::StreamingLexer::new(std::io::Cursor::new(src.as_bytes().to_vec()), &LEXER_CONFIG);").unwrap();
writeln!(
&mut out,
" PyParser {{ inner: Parser::new(lex, ENTRY_{}) }}",
r.to_uppercase()
)
.unwrap();
writeln!(&mut out, "}}").unwrap();
}
writeln!(&mut out).unwrap();
writeln!(&mut out, "#[pyo3::pymodule]").unwrap();
writeln!(
&mut out,
"fn {}(_py: pyo3::Python, m: &pyo3::types::PyModule) -> pyo3::PyResult<()> {{",
name
)
.unwrap();
writeln!(&mut out, " m.add_class::<PyPos>()?;").unwrap();
writeln!(&mut out, " m.add_class::<PySpan>()?;").unwrap();
writeln!(&mut out, " m.add_class::<PyError>()?;").unwrap();
writeln!(&mut out, " m.add_class::<PyEvent>()?;").unwrap();
writeln!(&mut out, " m.add_class::<PyParser>()?;").unwrap();
writeln!(&mut out, " m.add_class::<PyRuleKind>()?;").unwrap();
writeln!(&mut out, " m.add_class::<PyTokenKind>()?;").unwrap();
for r in rules {
writeln!(
&mut out,
" m.add_function(pyo3::wrap_pyfunction!(parse_{}_py, m)?)?;",
r
)
.unwrap();
}
writeln!(&mut out, " Ok(())").unwrap();
writeln!(&mut out, "}}").unwrap();
out
}
fn build_cargo_toml(name: &str) -> String {
format!(
r#"[package]
name = "{name}"
version = "0.1.0"
edition = "2021"
[lib]
name = "{name}"
crate-type = ["cdylib"]
[dependencies]
# Point this `path` at your local parsuna-rt checkout, or replace with a
# crates.io dependency once parsuna-rt is published.
parsuna-rt = {{ path = "../../runtimes/rust" }}
pyo3 = {{ version = "0.20", features = ["extension-module"] }}
"#
)
}
fn build_pyproject(name: &str) -> String {
format!(
r#"[build-system]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"
[project]
name = "{name}"
version = "0.1.0"
description = "Generated parser for the `{name}` grammar (parsuna)."
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
]
[tool.maturin]
features = ["pyo3/extension-module"]
"#
)
}
const PY_BINDINGS_CORE: &str = r#"
use pyo3::prelude::*;
/// Source position: byte offset plus 1-based line/column.
#[pyclass(frozen, get_all, name = "Pos")]
#[derive(Clone, Copy, Debug)]
struct PyPos { offset: u32, line: u32, column: u32 }
#[pymethods]
impl PyPos {
fn __repr__(&self) -> String { format!("Pos({}, {}, {})", self.offset, self.line, self.column) }
}
/// Half-open span `[start, end)` over the source.
#[pyclass(frozen, get_all, name = "Span")]
#[derive(Clone, Copy, Debug)]
struct PySpan { start: PyPos, end: PyPos }
#[pymethods]
impl PySpan {
fn __repr__(&self) -> String {
format!("Span({}:{}-{}:{})", self.start.line, self.start.column, self.end.line, self.end.column)
}
}
/// A recoverable parse or lex error.
#[pyclass(frozen, get_all, name = "Error")]
#[derive(Clone, Debug)]
struct PyError { message: String }
#[pymethods]
impl PyError {
fn __repr__(&self) -> String {
format!("Error({:?})", self.message)
}
}
/// A single pull-parser event. `tag` is one of "enter", "exit", "token",
/// or "error"; the populated payload field depends on the tag.
#[pyclass(frozen, get_all, name = "Event")]
#[derive(Clone, Debug)]
struct PyEvent {
tag: String,
span: PySpan,
kind: Option<i32>,
text: Option<String>,
error: Option<PyError>,
}
#[pymethods]
impl PyEvent {
fn is_enter(&self) -> bool { self.tag == "enter" }
fn is_exit(&self) -> bool { self.tag == "exit" }
fn is_token(&self) -> bool { self.tag == "token" }
fn is_error(&self) -> bool { self.tag == "error" }
fn __repr__(&self) -> String {
match (self.tag.as_str(), self.text.as_deref(), self.error.as_ref()) {
("enter", _, _) => format!("Event(enter rule={})", self.kind.unwrap_or(0)),
("exit", _, _) => format!("Event(exit rule={})", self.kind.unwrap_or(0)),
("token", Some(t), _) => format!("Event(token kind={} text={:?})", self.kind.unwrap_or(0), t),
("token", None, _) => format!("Event(token kind={})", self.kind.unwrap_or(0)),
("error", _, Some(d)) => format!("Event(error {:?})", d.message),
_ => "Event(?)".to_string(),
}
}
}
fn to_py_pos(p: parsuna_rt::Pos) -> PyPos { PyPos { offset: p.offset, line: p.line, column: p.column } }
fn to_py_span(s: parsuna_rt::Span) -> PySpan { PySpan { start: to_py_pos(s.start), end: to_py_pos(s.end) } }
fn to_py_diag(d: parsuna_rt::Error) -> PyError {
PyError { message: d.message.into_owned() }
}
fn to_py_event(ev: Event) -> PyEvent {
match ev {
parsuna_rt::Event::Enter { rule, pos } => PyEvent {
tag: "enter".into(),
span: to_py_span(parsuna_rt::Span::point(pos)),
kind: Some(rule as i32),
text: None,
error: None,
},
parsuna_rt::Event::Exit { rule, pos } => PyEvent {
tag: "exit".into(),
span: to_py_span(parsuna_rt::Span::point(pos)),
kind: Some(rule as i32),
text: None,
error: None,
},
parsuna_rt::Event::Error(d) => {
let span = to_py_span(d.span);
PyEvent { tag: "error".into(), span, kind: None, text: None, error: Some(to_py_diag(d)) }
}
parsuna_rt::Event::Token(t) => {
let span = to_py_span(t.span);
PyEvent { tag: "token".into(), span, kind: Some(t.kind as i32), text: Some(t.text.into_owned()), error: None }
}
}
}
/// Pull-based parser. Iterate to walk the parse as a sequence of
/// [`PyEvent`] values, or call `next_event` manually.
#[pyclass(unsendable, name = "Parser")]
struct PyParser { inner: Parser<'static, parsuna_rt::StreamingLexer<std::io::Cursor<Vec<u8>>, TokenKind>> }
#[pymethods]
impl PyParser {
fn next_event(&mut self) -> Option<PyEvent> { self.inner.next_event().map(to_py_event) }
fn __iter__(slf: PyRef<Self>) -> PyRef<Self> { slf }
fn __next__(&mut self) -> Option<PyEvent> { self.next_event() }
}
"#;