use panproto_schema::{Protocol, Schema};
use crate::error::ParseError;
use crate::registry::AstParser;
use crate::theory_extract::{ExtractedTheoryMeta, extract_theory_from_node_types};
use crate::walker::{AstWalker, WalkerConfig};
pub struct LanguageParser {
protocol_name: String,
extensions: Vec<&'static str>,
language: tree_sitter::Language,
theory_meta: ExtractedTheoryMeta,
protocol: Protocol,
walker_config: WalkerConfig,
}
impl LanguageParser {
pub fn from_language(
protocol_name: &str,
extensions: Vec<&'static str>,
language: tree_sitter::Language,
node_types_json: &[u8],
walker_config: WalkerConfig,
) -> Result<Self, ParseError> {
let theory_name = format!("Th{}FullAST", capitalize_first(protocol_name));
let theory_meta = extract_theory_from_node_types(&theory_name, node_types_json)?;
let protocol = build_full_ast_protocol(protocol_name, &theory_name);
Ok(Self {
protocol_name: protocol_name.to_owned(),
extensions,
language,
theory_meta,
protocol,
walker_config,
})
}
}
impl AstParser for LanguageParser {
fn protocol_name(&self) -> &str {
&self.protocol_name
}
fn parse(&self, source: &[u8], file_path: &str) -> Result<Schema, ParseError> {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&self.language)
.map_err(|e| ParseError::TreeSitterParse {
path: format!("{file_path}: set_language failed: {e}"),
})?;
let tree = parser
.parse(source, None)
.ok_or_else(|| ParseError::TreeSitterParse {
path: format!("{file_path}: parse returned None (timeout or cancellation)"),
})?;
let walker = AstWalker::new(
source,
&self.theory_meta,
&self.protocol,
self.walker_config.clone(),
);
walker.walk(&tree, file_path)
}
fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError> {
emit_from_schema(schema, &self.protocol_name)
}
fn supported_extensions(&self) -> &[&str] {
&self.extensions
}
fn theory_meta(&self) -> &ExtractedTheoryMeta {
&self.theory_meta
}
}
fn emit_from_schema(schema: &Schema, protocol: &str) -> Result<Vec<u8>, ParseError> {
let mut fragments: Vec<(usize, String)> = Vec::new();
for name in schema.vertices.keys() {
if let Some(constraints) = schema.constraints.get(name) {
let start_byte = constraints
.iter()
.find(|c| c.sort.as_ref() == "start-byte")
.and_then(|c| c.value.parse::<usize>().ok());
let literal = constraints
.iter()
.find(|c| c.sort.as_ref() == "literal-value")
.map(|c| c.value.clone());
if let (Some(start), Some(text)) = (start_byte, literal) {
fragments.push((start, text));
}
for c in constraints {
let sort_str = c.sort.as_ref();
if sort_str.starts_with("interstitial-") {
let pos_sort = format!("{sort_str}-start-byte");
let pos = constraints
.iter()
.find(|c2| c2.sort.as_ref() == pos_sort.as_str())
.and_then(|c2| c2.value.parse::<usize>().ok());
if let Some(p) = pos {
fragments.push((p, c.value.clone()));
}
}
}
}
}
if fragments.is_empty() {
return Err(ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: "schema has no text fragments".to_owned(),
});
}
fragments.sort_by_key(|(pos, _)| *pos);
let mut output = Vec::new();
let mut cursor = 0;
for (pos, text) in &fragments {
if *pos >= cursor {
output.extend_from_slice(text.as_bytes());
cursor = pos + text.len();
}
}
Ok(output)
}
fn build_full_ast_protocol(protocol_name: &str, theory_name: &str) -> Protocol {
Protocol {
name: protocol_name.into(),
schema_theory: theory_name.into(),
instance_theory: format!("{theory_name}Instance"),
schema_composition: None,
instance_composition: None,
obj_kinds: vec![],
edge_rules: vec![],
constraint_sorts: vec![
"literal-value".into(),
"literal-type".into(),
"operator".into(),
"visibility".into(),
"mutability".into(),
"async".into(),
"static".into(),
"generator".into(),
"comment".into(),
"indent".into(),
"trailing-comma".into(),
"semicolon".into(),
"blank-lines-before".into(),
"start-byte".into(),
"end-byte".into(),
],
has_order: true,
has_coproducts: false,
has_recursion: true,
has_causal: false,
nominal_identity: false,
has_defaults: false,
has_coercions: false,
has_mergers: false,
has_policies: false,
}
}
fn capitalize_first(s: &str) -> String {
let mut chars = s.chars();
chars.next().map_or_else(String::new, |c| {
c.to_uppercase().collect::<String>() + chars.as_str()
})
}