#![allow(
clippy::module_name_repetitions,
clippy::too_many_lines,
clippy::too_many_arguments,
clippy::map_unwrap_or,
clippy::option_if_let_else,
clippy::elidable_lifetime_names,
clippy::items_after_statements,
clippy::needless_pass_by_value,
clippy::single_match_else,
clippy::manual_let_else,
clippy::match_same_arms,
clippy::missing_const_for_fn,
clippy::single_char_pattern,
clippy::naive_bytecount,
clippy::expect_used,
clippy::redundant_pub_crate,
clippy::used_underscore_binding,
clippy::redundant_field_names,
clippy::struct_field_names,
clippy::redundant_else,
clippy::similar_names
)]
use std::collections::BTreeMap;
use panproto_schema::{Edge, Schema};
use serde::Deserialize;
use crate::error::ParseError;
#[derive(Debug, Clone, Deserialize)]
#[serde(tag = "type")]
#[non_exhaustive]
pub enum Production {
#[serde(rename = "SEQ")]
Seq {
members: Vec<Self>,
},
#[serde(rename = "CHOICE")]
Choice {
members: Vec<Self>,
},
#[serde(rename = "REPEAT")]
Repeat {
content: Box<Self>,
},
#[serde(rename = "REPEAT1")]
Repeat1 {
content: Box<Self>,
},
#[serde(rename = "OPTIONAL")]
Optional {
content: Box<Self>,
},
#[serde(rename = "SYMBOL")]
Symbol {
name: String,
},
#[serde(rename = "STRING")]
String {
value: String,
},
#[serde(rename = "PATTERN")]
Pattern {
value: String,
},
#[serde(rename = "BLANK")]
Blank,
#[serde(rename = "FIELD")]
Field {
name: String,
content: Box<Self>,
},
#[serde(rename = "ALIAS")]
Alias {
content: Box<Self>,
#[serde(default)]
named: bool,
#[serde(default)]
value: String,
},
#[serde(rename = "TOKEN")]
Token {
content: Box<Self>,
},
#[serde(rename = "IMMEDIATE_TOKEN")]
ImmediateToken {
content: Box<Self>,
},
#[serde(rename = "PREC")]
Prec {
#[allow(dead_code)]
value: serde_json::Value,
content: Box<Self>,
},
#[serde(rename = "PREC_LEFT")]
PrecLeft {
#[allow(dead_code)]
value: serde_json::Value,
content: Box<Self>,
},
#[serde(rename = "PREC_RIGHT")]
PrecRight {
#[allow(dead_code)]
value: serde_json::Value,
content: Box<Self>,
},
#[serde(rename = "PREC_DYNAMIC")]
PrecDynamic {
#[allow(dead_code)]
value: serde_json::Value,
content: Box<Self>,
},
#[serde(rename = "RESERVED")]
Reserved {
content: Box<Self>,
#[allow(dead_code)]
#[serde(default)]
context_name: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TokenRole {
BracketOpen,
BracketClose,
Separator,
Keyword,
Operator,
Connector,
Terminal,
}
#[derive(Debug, Clone, Deserialize)]
#[non_exhaustive]
pub struct Grammar {
#[allow(dead_code)]
pub name: String,
pub rules: BTreeMap<String, Production>,
#[serde(default, deserialize_with = "deserialize_supertypes")]
pub supertypes: std::collections::HashSet<String>,
#[serde(default, deserialize_with = "deserialize_extras")]
pub extras: std::collections::HashSet<String>,
#[serde(skip)]
pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
#[serde(skip)]
pub yield_sets: std::collections::HashMap<String, std::collections::HashSet<String>>,
#[serde(skip)]
pub node_type_children: std::collections::HashMap<String, std::collections::HashSet<String>>,
#[serde(skip)]
pub node_type_field_children: std::collections::HashMap<
String,
std::collections::HashMap<String, std::collections::HashSet<String>>,
>,
#[serde(skip)]
pub node_type_nonfield_children:
std::collections::HashMap<String, std::collections::HashSet<String>>,
#[serde(skip)]
pub external_alias_map: std::collections::HashMap<String, String>,
#[serde(skip)]
pub token_roles:
std::collections::HashMap<String, std::collections::HashMap<String, TokenRole>>,
#[serde(skip)]
pub indent_triggers: std::collections::HashSet<(String, String)>,
#[serde(skip)]
pub line_comment_prefixes: Vec<String>,
#[serde(skip)]
pub external_indent_opens: std::collections::HashSet<String>,
#[serde(skip)]
pub external_indent_closes: std::collections::HashSet<String>,
#[serde(skip)]
pub external_newlines: std::collections::HashSet<String>,
#[serde(skip)]
pub external_semicolons: std::collections::HashSet<String>,
#[serde(skip)]
pub named_alias_map: std::collections::HashMap<String, String>,
}
fn deserialize_supertypes<'de, D>(
deserializer: D,
) -> Result<std::collections::HashSet<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
let mut out = std::collections::HashSet::new();
for entry in entries {
match entry {
serde_json::Value::String(s) => {
out.insert(s);
}
serde_json::Value::Object(map) => {
if let Some(serde_json::Value::String(name)) = map.get("name") {
out.insert(name.clone());
}
}
_ => {}
}
}
Ok(out)
}
fn deserialize_extras<'de, D>(
deserializer: D,
) -> Result<std::collections::HashSet<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
let mut out = std::collections::HashSet::new();
for entry in entries {
if let serde_json::Value::Object(map) = entry {
let ty = map.get("type").and_then(serde_json::Value::as_str);
match ty {
Some("SYMBOL") => {
if let Some(serde_json::Value::String(name)) = map.get("name") {
out.insert(name.clone());
}
}
Some("ALIAS") => {
let named = map
.get("named")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
if named {
if let Some(serde_json::Value::String(value)) = map.get("value") {
out.insert(value.clone());
}
}
}
_ => {}
}
}
}
Ok(out)
}
impl Grammar {
pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
Self::from_bytes_with_node_types(protocol, bytes, None)
}
pub fn from_bytes_with_node_types(
protocol: &str,
grammar_bytes: &[u8],
node_types_bytes: Option<&[u8]>,
) -> Result<Self, ParseError> {
let mut grammar: Self =
serde_json::from_slice(grammar_bytes).map_err(|e| ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: format!("grammar.json deserialization failed: {e}"),
})?;
grammar.subtypes = compute_subtype_closure(&grammar);
grammar.named_alias_map = build_named_alias_map(&grammar);
grammar.yield_sets = compute_yield_sets(&grammar);
if let Some(nt_bytes) = node_types_bytes {
let (all_children, field_children, nonfield_children) =
build_node_type_children(nt_bytes);
grammar.node_type_children = all_children;
grammar.node_type_field_children = field_children;
grammar.node_type_nonfield_children = nonfield_children;
augment_subtypes_from_node_types(&mut grammar);
}
grammar.yield_sets = compute_yield_sets(&grammar);
grammar.external_alias_map = build_external_alias_map(&grammar);
let (token_roles, indent_triggers) = compute_token_roles(&grammar);
grammar.token_roles = token_roles;
grammar.indent_triggers = indent_triggers;
grammar.line_comment_prefixes = extract_line_comment_prefixes(&grammar);
classify_external_layout_tokens(&mut grammar);
grammar.yield_sets = compute_yield_sets(&grammar);
Ok(grammar)
}
}
fn compute_subtype_closure(
grammar: &Grammar,
) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
use std::collections::{HashMap, HashSet};
let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
for name in grammar.rules.keys() {
subtypes
.entry(name.clone())
.or_default()
.insert(name.clone());
}
fn walk<'g>(
grammar: &'g Grammar,
production: &'g Production,
visited: &mut HashSet<&'g str>,
out: &mut HashSet<String>,
) {
match production {
Production::Symbol { name } => {
out.insert(name.clone());
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if expand && visited.insert(name.as_str()) {
if let Some(rule) = grammar.rules.get(name) {
walk(grammar, rule, visited, out);
}
}
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(grammar, m, visited, out);
}
}
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
out.insert(value.clone());
}
walk(grammar, content, visited, out);
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
walk(grammar, content, visited, out);
}
_ => {}
}
}
for (name, rule) in &grammar.rules {
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if !expand {
continue;
}
let mut visited: HashSet<&str> = HashSet::new();
visited.insert(name.as_str());
let mut reachable: HashSet<String> = HashSet::new();
walk(grammar, rule, &mut visited, &mut reachable);
for kind in &reachable {
subtypes
.entry(kind.clone())
.or_default()
.insert(name.clone());
}
}
fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
match production {
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
out.push((value.clone(), content.as_ref()));
}
collect_aliases(content, out);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_aliases(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
collect_aliases(content, out);
}
_ => {}
}
}
let mut aliases: Vec<(String, &Production)> = Vec::new();
for rule in grammar.rules.values() {
collect_aliases(rule, &mut aliases);
}
for (alias_value, content) in aliases {
let mut visited: HashSet<&str> = HashSet::new();
let mut reachable: HashSet<String> = HashSet::new();
walk(grammar, content, &mut visited, &mut reachable);
subtypes
.entry(alias_value.clone())
.or_default()
.insert(alias_value.clone());
for kind in reachable {
subtypes
.entry(kind)
.or_default()
.insert(alias_value.clone());
}
}
let is_dispatch = |s: &str| s.starts_with('_') || grammar.supertypes.contains(s);
let mut nodes: HashSet<String> = HashSet::new();
for (k, vs) in &subtypes {
if is_dispatch(k) {
nodes.insert(k.clone());
}
for v in vs {
if is_dispatch(v) {
nodes.insert(v.clone());
}
}
}
let nodes: Vec<String> = nodes.into_iter().collect();
let index_of: HashMap<&str, usize> = nodes
.iter()
.enumerate()
.map(|(i, n)| (n.as_str(), i))
.collect();
let mut edges: Vec<Vec<usize>> = vec![Vec::new(); nodes.len()];
for (i, name) in nodes.iter().enumerate() {
if let Some(targets) = subtypes.get(name) {
for t in targets {
if let Some(&j) = index_of.get(t.as_str()) {
if i != j {
edges[i].push(j);
}
}
}
}
}
fn tarjan(edges: &[Vec<usize>]) -> Vec<usize> {
let n = edges.len();
let mut comp = vec![usize::MAX; n];
let mut index_arr = vec![usize::MAX; n];
let mut lowlink = vec![0usize; n];
let mut on_stack = vec![false; n];
let mut stack: Vec<usize> = Vec::new();
let mut next_index = 0usize;
let mut next_comp = 0usize;
let mut work: Vec<(usize, usize)> = Vec::new();
for start in 0..n {
if index_arr[start] != usize::MAX {
continue;
}
work.push((start, 0));
index_arr[start] = next_index;
lowlink[start] = next_index;
next_index += 1;
stack.push(start);
on_stack[start] = true;
while let Some(&(v, i)) = work.last() {
if i < edges[v].len() {
let w = edges[v][i];
if let Some(slot) = work.last_mut() {
slot.1 += 1;
}
if index_arr[w] == usize::MAX {
index_arr[w] = next_index;
lowlink[w] = next_index;
next_index += 1;
stack.push(w);
on_stack[w] = true;
work.push((w, 0));
} else if on_stack[w] && index_arr[w] < lowlink[v] {
lowlink[v] = index_arr[w];
}
} else {
if lowlink[v] == index_arr[v] {
while let Some(w) = stack.pop() {
on_stack[w] = false;
comp[w] = next_comp;
if w == v {
break;
}
}
next_comp += 1;
}
let lv = lowlink[v];
work.pop();
if let Some(&(parent, _)) = work.last() {
if lv < lowlink[parent] {
lowlink[parent] = lv;
}
}
}
}
}
comp
}
let comp = tarjan(&edges);
let num_comps = comp.iter().max().copied().map_or(0, |m| m + 1);
let mut scc_members: Vec<Vec<usize>> = vec![Vec::new(); num_comps];
for (v, &c) in comp.iter().enumerate() {
scc_members[c].push(v);
}
let mut scc_closure: Vec<HashSet<String>> = vec![HashSet::new(); num_comps];
for c in 0..num_comps {
let mut closure: HashSet<String> = HashSet::new();
for &v in &scc_members[c] {
closure.insert(nodes[v].clone());
}
for &v in &scc_members[c] {
for &w in &edges[v] {
let wc = comp[w];
if wc != c {
closure.extend(scc_closure[wc].iter().cloned());
}
}
}
scc_closure[c] = closure;
}
let keys: Vec<String> = subtypes.keys().cloned().collect();
for k in keys {
let existing = subtypes.remove(&k).unwrap_or_default();
let mut new_set: HashSet<String> = HashSet::new();
for s in &existing {
new_set.insert(s.clone());
if let Some(&i) = index_of.get(s.as_str()) {
new_set.extend(scc_closure[comp[i]].iter().cloned());
}
}
subtypes.insert(k, new_set);
}
subtypes
}
fn compute_yield_sets(
grammar: &Grammar,
) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
let mut cache: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for (name, rule) in &grammar.rules {
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if !expand {
continue;
}
if cache.contains_key(name) {
continue;
}
let mut visited = std::collections::HashSet::new();
let ys = yield_of_production(grammar, rule, &mut visited, &mut cache);
cache.insert(name.clone(), ys);
}
cache
}
fn yield_of_production(
grammar: &Grammar,
production: &Production,
visited: &mut std::collections::HashSet<String>,
cache: &mut std::collections::HashMap<String, std::collections::HashSet<String>>,
) -> std::collections::HashSet<String> {
match production {
Production::Symbol { name } => {
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if !expand {
let mut set = std::collections::HashSet::new();
set.insert(name.clone());
return set;
}
if let Some(cached) = cache.get(name) {
return cached.clone();
}
{
if !visited.insert(name.clone()) {
return std::collections::HashSet::new();
}
let result = if let Some(rule) = grammar.rules.get(name) {
yield_of_production(grammar, rule, visited, cache)
} else {
std::collections::HashSet::new()
};
visited.remove(name);
cache.insert(name.clone(), result.clone());
result
}
}
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
let mut set = std::collections::HashSet::new();
set.insert(value.clone());
set
} else {
yield_of_production(grammar, content, visited, cache)
}
}
Production::Seq { members } => {
if members.is_empty() {
let mut set = std::collections::HashSet::new();
set.insert(String::new());
set
} else {
let mut combined = std::collections::HashSet::new();
for m in members {
let ys = yield_of_production(grammar, m, visited, cache);
if ys.is_empty() {
continue;
}
let has_epsilon = ys.contains("");
combined.extend(ys);
if !has_epsilon {
break;
}
}
combined
}
}
Production::Choice { members } => {
let mut union = std::collections::HashSet::new();
for m in members {
union.extend(yield_of_production(grammar, m, visited, cache));
}
union
}
Production::Optional { content } => {
let mut set = yield_of_production(grammar, content, visited, cache);
set.insert(String::new());
set
}
Production::Blank => {
let mut set = std::collections::HashSet::new();
set.insert(String::new());
set
}
Production::String { .. } | Production::Pattern { .. } => std::collections::HashSet::new(),
Production::Repeat { content } => {
let mut set = yield_of_production(grammar, content, visited, cache);
set.insert(String::new());
set
}
Production::Repeat1 { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
yield_of_production(grammar, content, visited, cache)
}
}
}
type NodeTypeResult = (
std::collections::HashMap<String, std::collections::HashSet<String>>,
std::collections::HashMap<
String,
std::collections::HashMap<String, std::collections::HashSet<String>>,
>,
std::collections::HashMap<String, std::collections::HashSet<String>>,
);
fn build_node_type_children(nt_bytes: &[u8]) -> NodeTypeResult {
use std::collections::{HashMap, HashSet};
let node_types: Vec<crate::theory_extract::NodeType> = match serde_json::from_slice(nt_bytes) {
Ok(v) => v,
Err(_) => return (HashMap::new(), HashMap::new(), HashMap::new()),
};
let mut all_map: HashMap<String, HashSet<String>> = HashMap::new();
let mut field_map: HashMap<String, HashMap<String, HashSet<String>>> = HashMap::new();
let mut nonfield_map: HashMap<String, HashSet<String>> = HashMap::new();
for entry in &node_types {
if !entry.named {
continue;
}
let mut child_kinds = HashSet::new();
for (field_name, field_value) in &entry.fields {
if let Some(types) = field_value.get("types").and_then(|t| t.as_array()) {
for t in types {
if let (Some(name), Some(true)) = (
t.get("type").and_then(|n| n.as_str()),
t.get("named").and_then(serde_json::Value::as_bool),
) {
child_kinds.insert(name.to_owned());
field_map
.entry(entry.node_type.clone())
.or_default()
.entry(field_name.clone())
.or_default()
.insert(name.to_owned());
}
}
}
}
if let Some(ref children) = entry.children {
for t in &children.types {
if t.named {
child_kinds.insert(t.node_type.clone());
nonfield_map
.entry(entry.node_type.clone())
.or_default()
.insert(t.node_type.clone());
}
}
}
if !child_kinds.is_empty() {
all_map.insert(entry.node_type.clone(), child_kinds);
}
}
(all_map, field_map, nonfield_map)
}
fn augment_subtypes_from_node_types(grammar: &mut Grammar) {
use std::collections::HashMap;
let mut pairs: Vec<(String, String)> = Vec::new();
for parent_kind in grammar.node_type_children.keys() {
let Some(rule) = grammar.rules.get(parent_kind) else {
continue;
};
let mut field_symbols: HashMap<String, Vec<String>> = HashMap::new();
let mut non_field_symbols: Vec<String> = Vec::new();
collect_field_symbols(rule, &mut field_symbols, &mut non_field_symbols, false);
if let Some(nt_fields) = grammar.node_type_field_children.get(parent_kind) {
for (field_name, nt_child_kinds) in nt_fields {
let Some(rule_syms) = field_symbols.get(field_name) else {
continue;
};
for child_kind in nt_child_kinds {
if grammar.rules.contains_key(child_kind) {
continue;
}
for sym_name in rule_syms {
if !kind_satisfies_symbol(grammar, Some(child_kind), sym_name) {
pairs.push((child_kind.clone(), sym_name.clone()));
}
}
}
}
}
if let Some(nt_nonfield) = grammar.node_type_nonfield_children.get(parent_kind) {
for child_kind in nt_nonfield {
if grammar.rules.contains_key(child_kind) {
continue;
}
for sym_name in &non_field_symbols {
if !kind_satisfies_symbol(grammar, Some(child_kind), sym_name) {
pairs.push((child_kind.clone(), sym_name.clone()));
}
}
}
}
}
for (child_kind, sym_name) in pairs {
grammar
.subtypes
.entry(child_kind)
.or_default()
.insert(sym_name);
}
}
fn collect_field_symbols(
prod: &Production,
field_map: &mut std::collections::HashMap<String, Vec<String>>,
non_field: &mut Vec<String>,
inside_field: bool,
) {
match prod {
Production::Symbol { name } if !inside_field => {
non_field.push(name.clone());
}
Production::Field { name, content } => {
let mut syms = Vec::new();
collect_symbols_flat(content, &mut syms);
field_map.entry(name.clone()).or_default().extend(syms);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_field_symbols(m, field_map, non_field, inside_field);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
collect_field_symbols(content, field_map, non_field, inside_field);
}
_ => {}
}
}
fn collect_symbols_flat(prod: &Production, out: &mut Vec<String>) {
match prod {
Production::Symbol { name } => out.push(name.clone()),
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_symbols_flat(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => collect_symbols_flat(content, out),
_ => {}
}
}
fn build_external_alias_map(grammar: &Grammar) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
fn walk(
grammar: &Grammar,
prod: &Production,
map: &mut std::collections::HashMap<String, String>,
) {
match prod {
Production::Alias {
content,
named,
value,
} => {
if !*named && !value.is_empty() {
if let Production::Symbol { name } = content.as_ref() {
if name.starts_with('_') && !grammar.rules.contains_key(name) {
map.entry(name.clone()).or_insert_with(|| value.clone());
}
}
}
walk(grammar, content, map);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(grammar, m, map);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(grammar, content, map),
_ => {}
}
}
for rule in grammar.rules.values() {
walk(grammar, rule, &mut map);
}
map
}
fn build_named_alias_map(grammar: &Grammar) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
fn walk(prod: &Production, map: &mut std::collections::HashMap<String, String>) {
match prod {
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
if let Production::Symbol { name } = content.as_ref() {
map.entry(value.clone()).or_insert_with(|| name.clone());
}
}
walk(content, map);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(m, map);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, map),
_ => {}
}
}
for rule in grammar.rules.values() {
walk(rule, &mut map);
}
map
}
type RoleMap = std::collections::HashMap<String, std::collections::HashMap<String, TokenRole>>;
type IndentSet = std::collections::HashSet<(String, String)>;
fn compute_token_roles(grammar: &Grammar) -> (RoleMap, IndentSet) {
use std::collections::{HashMap, HashSet};
let mut all_roles: HashMap<String, HashMap<String, TokenRole>> = HashMap::new();
let mut indent_triggers: HashSet<(String, String)> = HashSet::new();
for (rule_name, rule) in &grammar.rules {
let mut roles: HashMap<String, TokenRole> = HashMap::new();
classify_production(rule, &mut roles, &mut indent_triggers, rule_name);
if !roles.is_empty() {
all_roles.insert(rule_name.clone(), roles);
}
}
(all_roles, indent_triggers)
}
fn classify_production(
prod: &Production,
roles: &mut std::collections::HashMap<String, TokenRole>,
indent_triggers: &mut std::collections::HashSet<(String, String)>,
rule_name: &str,
) {
match prod {
Production::Seq { members } => {
classify_seq(members, roles, indent_triggers, rule_name, false);
}
Production::Choice { members } => {
for m in members {
match m {
Production::Seq {
members: seq_members,
} => {
classify_seq(seq_members, roles, indent_triggers, rule_name, true);
}
_ => classify_production(m, roles, indent_triggers, rule_name),
}
}
}
Production::Repeat { content } | Production::Repeat1 { content } => {
classify_repeat_body(content, roles, indent_triggers, rule_name);
}
Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
classify_production(content, roles, indent_triggers, rule_name);
}
Production::Alias { content, .. } => {
classify_production(content, roles, indent_triggers, rule_name);
}
_ => {}
}
}
fn classify_seq(
members: &[Production],
roles: &mut std::collections::HashMap<String, TokenRole>,
indent_triggers: &mut std::collections::HashSet<(String, String)>,
rule_name: &str,
in_choice: bool,
) {
let string_positions: Vec<(usize, &str)> = members
.iter()
.enumerate()
.filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
.collect();
let content_count = members
.iter()
.filter(|m| unwrap_to_string(m).is_none())
.count();
if string_positions.len() >= 2 {
let (first_idx, first_val) = string_positions[0];
let (last_idx, last_val) = string_positions[string_positions.len() - 1];
let has_content_between = members[first_idx + 1..last_idx]
.iter()
.any(|m| unwrap_to_string(m).is_none());
let both_punct = !is_word_like(first_val) && !is_word_like(last_val);
let both_word = is_word_like(first_val) && is_word_like(last_val);
if has_content_between && first_val != last_val && (both_punct || both_word) {
roles.insert(first_val.to_owned(), TokenRole::BracketOpen);
roles.insert(last_val.to_owned(), TokenRole::BracketClose);
let between = &members[first_idx + 1..last_idx];
if first_val == "{" && has_repeat_recursive(between) {
indent_triggers.insert((rule_name.to_owned(), first_val.to_owned()));
}
}
}
let first_content_idx = members.iter().position(|m| unwrap_to_string(m).is_none());
let last_content_idx = members.iter().rposition(|m| unwrap_to_string(m).is_none());
for (i, m) in members.iter().enumerate() {
if let Some(value) = unwrap_to_string(m) {
let value = value.to_owned();
if !roles.contains_key(&value) {
if is_word_like(&value) {
roles.insert(value.clone(), TokenRole::Keyword);
} else if !in_choice
&& first_content_idx.is_some_and(|fc| i < fc)
&& is_prefix_sigil(&value)
{
roles.insert(value.clone(), TokenRole::BracketOpen);
} else if last_content_idx.is_some_and(|lc| i > lc) {
roles.insert(value.clone(), TokenRole::BracketClose);
} else if !in_choice
&& string_positions.len() == 1
&& content_count == 2
&& value.len() == 1
{
roles.insert(value.clone(), TokenRole::Connector);
} else {
roles.insert(value.clone(), TokenRole::Operator);
}
}
}
}
for m in members {
if unwrap_to_string(m).is_none() {
classify_production(m, roles, indent_triggers, rule_name);
}
}
}
fn classify_repeat_body(
content: &Production,
roles: &mut std::collections::HashMap<String, TokenRole>,
indent_triggers: &mut std::collections::HashSet<(String, String)>,
rule_name: &str,
) {
match content {
Production::Seq { members } => {
if let Some(Production::String { value }) = members.first() {
roles.insert(value.clone(), TokenRole::Separator);
}
classify_seq(members, roles, indent_triggers, rule_name, false);
}
_ => classify_production(content, roles, indent_triggers, rule_name),
}
}
fn classify_seq_positions(members: &[Production], in_choice: bool) -> Vec<Option<TokenRole>> {
let mut roles: Vec<Option<TokenRole>> = vec![None; members.len()];
let string_positions: Vec<(usize, &str)> = members
.iter()
.enumerate()
.filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
.collect();
let content_count = members
.iter()
.filter(|m| unwrap_to_string(m).is_none())
.count();
let mut bracket_open_idx: Option<usize> = None;
let mut bracket_close_idx: Option<usize> = None;
if string_positions.len() >= 2 {
let (first_idx, first_val) = string_positions[0];
let (last_idx, last_val) = string_positions[string_positions.len() - 1];
let has_content_between = members[first_idx + 1..last_idx]
.iter()
.any(|m| unwrap_to_string(m).is_none());
let both_punct = !is_word_like(first_val) && !is_word_like(last_val);
let both_word = is_word_like(first_val) && is_word_like(last_val);
let either_immediate =
is_immediate_token(&members[first_idx]) || is_immediate_token(&members[last_idx]);
let same_text_immediate = first_val == last_val && either_immediate;
if has_content_between
&& (both_punct || both_word)
&& (first_val != last_val || same_text_immediate)
{
roles[first_idx] = Some(TokenRole::BracketOpen);
roles[last_idx] = Some(TokenRole::BracketClose);
bracket_open_idx = Some(first_idx);
bracket_close_idx = Some(last_idx);
}
}
let first_content_idx = members.iter().position(|m| unwrap_to_string(m).is_none());
let last_content_idx = members.iter().rposition(|m| unwrap_to_string(m).is_none());
for (i, m) in members.iter().enumerate() {
if roles[i].is_some() {
continue;
}
if let Some(value) = unwrap_to_string(m) {
roles[i] = Some(if is_word_like(value) {
TokenRole::Keyword
} else if !in_choice && first_content_idx.is_some_and(|fc| i < fc) {
if is_prefix_sigil(value) {
TokenRole::BracketOpen
} else {
TokenRole::Operator
}
} else if last_content_idx.is_some_and(|lc| i > lc) {
TokenRole::BracketClose
} else if !in_choice
&& string_positions.len() == 1
&& content_count == 2
&& value.len() == 1
{
TokenRole::Connector
} else {
TokenRole::Operator
});
}
}
let _ = (bracket_open_idx, bracket_close_idx);
roles
}
#[allow(clippy::branches_sharing_code)]
fn seq_bracket_triggers_indent(
members: &[Production],
open_idx: usize,
_grammar: &Grammar,
) -> bool {
let string_positions: Vec<(usize, &str)> = members
.iter()
.enumerate()
.filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
.collect();
if string_positions.len() < 2 {
return false;
}
let open_val = string_positions.iter().find(|(i, _)| *i == open_idx);
let close_val = string_positions.last();
if let (Some((_, open_text)), Some((close_idx, close_text))) = (open_val, close_val) {
if open_idx >= *close_idx {
return false;
}
if is_word_like(open_text) && is_word_like(close_text) {
return true;
}
let between = &members[open_idx + 1..*close_idx];
if *open_text == "{" && has_repeat_recursive(between) {
return true;
}
if *open_text == "{" {
for m in between {
if let Production::Choice { members: alts } = m {
let has_blank = alts.iter().any(|a| matches!(a, Production::Blank));
if has_blank {
for alt in alts {
if let Production::Symbol { name } = alt {
if let Some(rule) = _grammar.rules.get(name) {
if has_repeat_in(rule) {
return true;
}
}
}
}
}
}
}
}
false
} else {
false
}
}
fn member_has_leading_bracket(prod: &Production, grammar: &Grammar) -> bool {
match prod {
Production::Symbol { name } => grammar
.rules
.get(name)
.is_some_and(|rule| first_string_of(rule).is_some_and(|s| !is_word_like(s))),
Production::Field { content, .. } => member_has_leading_bracket(content, grammar),
Production::Choice { members } => {
let non_blank: Vec<_> = members
.iter()
.filter(|m| !matches!(m, Production::Blank))
.collect();
!non_blank.is_empty()
&& non_blank
.iter()
.all(|m| member_has_leading_bracket(m, grammar))
}
Production::Alias { content, .. } => {
if let Production::Symbol { name } = content.as_ref() {
grammar
.rules
.get(name)
.is_some_and(|rule| first_string_of(rule).is_some_and(|s| !is_word_like(s)))
} else {
false
}
}
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Optional { content } => member_has_leading_bracket(content, grammar),
Production::Repeat { .. } | Production::Repeat1 { .. } => false,
_ => false,
}
}
fn first_string_of(prod: &Production) -> Option<&str> {
match prod {
Production::String { value } => Some(value.as_str()),
Production::Seq { members } => members.first().and_then(first_string_of),
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Field { content, .. } => first_string_of(content),
_ => None,
}
}
fn has_repeat_recursive(members: &[Production]) -> bool {
members.iter().any(has_repeat_in)
}
fn has_repeat_in(prod: &Production) -> bool {
match prod {
Production::Repeat { .. } | Production::Repeat1 { .. } => true,
Production::Choice { members } | Production::Seq { members } => {
members.iter().any(has_repeat_in)
}
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Reserved { content, .. }
| Production::Alias { content, .. } => has_repeat_in(content),
_ => false,
}
}
fn is_word_like(s: &str) -> bool {
!s.is_empty()
&& s.chars().all(|c| c.is_alphanumeric() || c == '_')
&& s.starts_with(|c: char| c.is_alphabetic() || c == '_')
}
fn is_prefix_sigil(s: &str) -> bool {
if s.len() == 1 {
let c = s.as_bytes()[0];
!matches!(
c,
b'=' | b'+'
| b'-'
| b'*'
| b'/'
| b'<'
| b'>'
| b'!'
| b'?'
| b'|'
| b'&'
| b'^'
| b'%'
| b'~'
)
} else {
true
}
}
fn is_immediate_token(prod: &Production) -> bool {
match prod {
Production::ImmediateToken { .. } => true,
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::Field { content, .. }
| Production::Reserved { content, .. } => is_immediate_token(content),
_ => false,
}
}
fn unwrap_to_string(prod: &Production) -> Option<&str> {
match prod {
Production::String { value } => Some(value.as_str()),
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Field { content, .. }
| Production::Reserved { content, .. } => unwrap_to_string(content),
_ => None,
}
}
fn extract_line_comment_prefixes(grammar: &Grammar) -> Vec<String> {
let mut prefixes = Vec::new();
for extra_name in &grammar.extras {
if let Some(rule) = grammar.rules.get(extra_name) {
if let Some(prefix) = extract_line_comment_prefix(rule) {
prefixes.push(prefix);
}
}
}
prefixes
}
fn extract_line_comment_prefix(prod: &Production) -> Option<String> {
match prod {
Production::Token { content } | Production::ImmediateToken { content } => {
extract_line_comment_prefix(content)
}
Production::Seq { members } if members.len() >= 2 => {
if let Production::String { value } = &members[0] {
if members[1..].iter().any(|m| {
matches!(m, Production::Pattern { value } if value.contains(".*") || value.contains("[^\\n]*") || value.contains("[^\\r\\n]*"))
}) {
return Some(value.clone());
}
}
None
}
Production::Choice { members } => members.iter().find_map(extract_line_comment_prefix),
_ => None,
}
}
fn classify_external_layout_tokens(grammar: &mut Grammar) {
let all_hidden_refs = collect_all_symbol_refs(&grammar.rules);
for name in &all_hidden_refs {
if !name.starts_with('_') || grammar.rules.contains_key(name) {
continue;
}
if grammar.external_alias_map.contains_key(name) {
continue;
}
if name == "_indent" || name.ends_with("_indent") {
grammar.external_indent_opens.insert(name.clone());
} else if name == "_dedent" || name.ends_with("_dedent") {
grammar.external_indent_closes.insert(name.clone());
} else if name.contains("line_ending")
|| name.contains("newline")
|| name.ends_with("_or_eof")
{
grammar.external_newlines.insert(name.clone());
} else if name.contains("semicolon") {
grammar.external_semicolons.insert(name.clone());
}
}
}
fn collect_all_symbol_refs(
rules: &BTreeMap<String, Production>,
) -> std::collections::HashSet<String> {
let mut refs = std::collections::HashSet::new();
fn walk(prod: &Production, refs: &mut std::collections::HashSet<String>) {
match prod {
Production::Symbol { name } => {
refs.insert(name.clone());
}
Production::Seq { members } | Production::Choice { members } => {
for m in members {
walk(m, refs);
}
}
Production::Alias { content, .. }
| Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, refs),
_ => {}
}
}
for rule in rules.values() {
walk(rule, &mut refs);
}
refs
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct FormatPolicy {
pub indent_width: usize,
pub separator: String,
pub newline: String,
pub line_break_after: Vec<String>,
pub indent_open: Vec<String>,
pub indent_close: Vec<String>,
}
impl Default for FormatPolicy {
fn default() -> Self {
Self {
indent_width: 2,
separator: " ".to_owned(),
newline: "\n".to_owned(),
line_break_after: vec![";".into(), "{".into(), "}".into()],
indent_open: vec!["{".into()],
indent_close: vec!["}".into()],
}
}
}
pub fn emit_pretty(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
policy: &FormatPolicy,
cassette: Option<&dyn crate::languages::cassettes::GrammarCassette>,
) -> Result<Vec<u8>, ParseError> {
let roots = collect_roots(schema);
if roots.is_empty() {
return Err(ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: "schema has no entry vertices".to_owned(),
});
}
let mut out = Output::new(policy, grammar, cassette);
for (i, root) in roots.iter().enumerate() {
if i > 0 {
out.newline();
}
emit_vertex(protocol, schema, grammar, root, &mut out)?;
}
Ok(out.finish())
}
fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
if !schema.entries.is_empty() {
return schema
.entries
.iter()
.filter(|name| schema.vertices.contains_key(*name))
.collect();
}
let mut targets: std::collections::HashSet<&panproto_gat::Name> =
std::collections::HashSet::new();
for edge in schema.edges.keys() {
targets.insert(&edge.tgt);
}
let mut roots: Vec<&panproto_gat::Name> = schema
.vertices
.keys()
.filter(|name| !targets.contains(name))
.collect();
roots.sort();
roots
}
fn emit_vertex(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let vertex = schema
.vertices
.get(vertex_id)
.ok_or_else(|| ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: format!("vertex '{vertex_id}' not found"),
})?;
let kind_head = vertex.kind.as_ref();
if let Some(rule) = grammar.rules.get(kind_head) {
if is_immediate_token(rule) {
out.no_space();
}
}
if let Some(literal) = literal_value(schema, vertex_id) {
if children_for(schema, vertex_id).is_empty() {
let is_bracket_pair = literal.len() >= 2
&& matches!(
(literal.as_bytes().first(), literal.as_bytes().last()),
(Some(b'('), Some(b')')) | (Some(b'['), Some(b']')) | (Some(b'{'), Some(b'}'))
);
let vkind = vertex.kind.as_ref();
let has_alias_rule = grammar
.named_alias_map
.get(vkind)
.is_some_and(|src| grammar.rules.contains_key(src));
if !(is_bracket_pair && has_alias_rule) {
out.token_with_role(literal, Some(TokenRole::Terminal));
return Ok(());
}
}
}
let kind = vertex.kind.as_ref();
let edges = children_for(schema, vertex_id);
if let Some(rule) = grammar.rules.get(kind) {
let old_rule = out.current_rule.take();
out.current_rule = Some(kind.to_owned());
let mut cursor = ChildCursor::new(&edges);
emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out)?;
drain_extras(protocol, schema, grammar, &mut cursor, out)?;
out.current_rule = old_rule;
return Ok(());
}
if let Some(source_name) = grammar.named_alias_map.get(kind) {
if let Some(rule) = grammar.rules.get(source_name) {
let old_rule = out.current_rule.take();
out.current_rule = Some(source_name.to_owned());
let mut cursor = ChildCursor::new(&edges);
emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out)?;
drain_extras(protocol, schema, grammar, &mut cursor, out)?;
out.current_rule = old_rule;
return Ok(());
}
}
for edge in &edges {
emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
}
Ok(())
}
struct ChildCursor<'a> {
edges: &'a [&'a Edge],
consumed: Vec<bool>,
}
impl<'a> ChildCursor<'a> {
fn new(edges: &'a [&'a Edge]) -> Self {
Self {
edges,
consumed: vec![false; edges.len()],
}
}
fn take_field(&mut self, field_name: &str) -> Option<&'a Edge> {
for (i, edge) in self.edges.iter().enumerate() {
if !self.consumed[i] && edge.kind.as_ref() == field_name {
self.consumed[i] = true;
return Some(edge);
}
}
None
}
#[cfg(test)]
fn has_matching(&self, predicate: impl Fn(&Edge) -> bool) -> bool {
self.edges
.iter()
.enumerate()
.any(|(i, edge)| !self.consumed[i] && predicate(edge))
}
fn take_matching(&mut self, predicate: impl Fn(&Edge) -> bool) -> Option<&'a Edge> {
for (i, edge) in self.edges.iter().enumerate() {
if !self.consumed[i] && predicate(edge) {
self.consumed[i] = true;
return Some(edge);
}
}
None
}
}
thread_local! {
static EMIT_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
static EMIT_MU_FRAMES: std::cell::RefCell<std::collections::HashSet<(String, String)>> =
std::cell::RefCell::new(std::collections::HashSet::new());
static EMIT_FIELD_CONTEXT: std::cell::RefCell<Option<String>> =
const { std::cell::RefCell::new(None) };
}
struct FieldContextGuard(Option<String>);
impl Drop for FieldContextGuard {
fn drop(&mut self) {
EMIT_FIELD_CONTEXT.with(|f| *f.borrow_mut() = self.0.take());
}
}
fn push_field_context(name: &str) -> FieldContextGuard {
let prev = EMIT_FIELD_CONTEXT.with(|f| f.borrow_mut().replace(name.to_owned()));
FieldContextGuard(prev)
}
fn clear_field_context() -> FieldContextGuard {
let prev = EMIT_FIELD_CONTEXT.with(|f| f.borrow_mut().take());
FieldContextGuard(prev)
}
fn current_field_context() -> Option<String> {
EMIT_FIELD_CONTEXT.with(|f| f.borrow().clone())
}
fn walk_in_mu_frame(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
rule_name: &str,
rule: &Production,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let key = (vertex_id.to_string(), rule_name.to_owned());
let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
if !inserted {
return Ok(());
}
let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
EMIT_MU_FRAMES.with(|frames| {
frames.borrow_mut().remove(&key);
});
result
}
fn emit_production(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
production: &Production,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let depth = EMIT_DEPTH.with(|d| {
let v = d.get() + 1;
d.set(v);
v
});
if depth > 500 {
EMIT_DEPTH.with(|d| d.set(d.get() - 1));
return Err(ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: format!(
"emit_production recursion >500 (likely a cyclic grammar; \
vertex='{vertex_id}')"
),
});
}
drain_extras(protocol, schema, grammar, cursor, out)?;
let result = emit_production_inner(
protocol, schema, grammar, vertex_id, production, cursor, out,
);
EMIT_DEPTH.with(|d| d.set(d.get() - 1));
result
}
fn drain_extras(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
if grammar.extras.is_empty() {
return Ok(());
}
loop {
let next_extra: Option<usize> = cursor
.edges
.iter()
.enumerate()
.find(|(i, _)| !cursor.consumed[*i])
.and_then(|(i, edge)| {
let kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref())?;
if grammar.extras.contains(kind) {
Some(i)
} else {
None
}
});
let Some(idx) = next_extra else {
return Ok(());
};
cursor.consumed[idx] = true;
let target = &cursor.edges[idx].tgt;
emit_vertex(protocol, schema, grammar, target, out)?;
}
}
fn emit_seq_with_roles(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
members: &[Production],
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
in_choice: bool,
) -> Result<(), ParseError> {
let positional_roles = classify_seq_positions(members, in_choice);
let indent_open_idx: Option<usize> = positional_roles.iter().enumerate().position(|(i, r)| {
*r == Some(TokenRole::BracketOpen) && seq_bracket_triggers_indent(members, i, grammar)
});
let mut line_break_positions: std::collections::HashSet<usize> =
std::collections::HashSet::new();
if let Some(oi) = indent_open_idx {
let open_text = unwrap_to_string(&members[oi]);
if open_text.is_some_and(is_word_like) {
let mut found_body = false;
for (j, member) in members.iter().enumerate().skip(oi + 1) {
if let Production::Choice { members: alts } = member {
let has_blank = alts.iter().any(|a| matches!(a, Production::Blank));
let has_block_symbol = alts.iter().any(|a| match a {
Production::Symbol { name } => {
grammar.rules.get(name).is_some_and(has_repeat_in)
}
_ => false,
});
if has_blank && has_block_symbol {
line_break_positions.insert(j);
found_body = true;
}
} else if found_body && matches!(member, Production::Field { .. }) {
line_break_positions.insert(j);
}
}
}
}
let mut prev_member_emitted_content = false;
for (i, member) in members.iter().enumerate() {
let tokens_before_member = out.tokens.len();
if let Some(value) = unwrap_to_string(member) {
let role = positional_roles[i].unwrap_or_else(|| {
if is_word_like(value) {
TokenRole::Keyword
} else {
TokenRole::Operator
}
});
if indent_open_idx == Some(i) {
if is_word_like(value) {
out.tokens.push(Token::Lit(value.to_owned(), role));
out.tokens.push(Token::IndentOpen);
} else {
out.token_with_indent_open(value, role);
}
} else if role == TokenRole::BracketClose && indent_open_idx.is_some() {
out.tokens.push(Token::IndentClose);
out.tokens.push(Token::Lit(value.to_owned(), role));
} else {
out.token_with_role(value, Some(role));
}
} else {
if i > 0 && unwrap_to_string(&members[i - 1]).is_none() && prev_member_emitted_content {
let member_starts_with_bracket = member_has_leading_bracket(member, grammar);
let is_zero_width_external = matches!(
member,
Production::Symbol { name }
if name.starts_with('_') && !grammar.rules.contains_key(name)
);
let is_separator_choice = matches!(member, Production::Choice { members: alts }
if alts.iter().all(|a| matches!(a, Production::Blank) || unwrap_to_string(a).is_some()));
let is_repeat = matches!(
member,
Production::Repeat { .. } | Production::Repeat1 { .. }
);
if !member_starts_with_bracket
&& !is_zero_width_external
&& !is_separator_choice
&& !is_repeat
{
out.tokens.push(Token::ForceSpace);
}
}
if line_break_positions.contains(&i) {
out.newline();
}
emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
}
prev_member_emitted_content = out.tokens[tokens_before_member..]
.iter()
.any(|t| matches!(t, Token::Lit(_, _)));
}
Ok(())
}
fn emit_production_inner(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
production: &Production,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
match production {
Production::String { value } => {
out.token(value);
Ok(())
}
Production::Pattern { value } => {
if let Some(literal) = literal_value(schema, vertex_id) {
out.token_with_role(literal, Some(TokenRole::Terminal));
} else if is_newline_like_pattern(value) {
out.newline();
} else if is_whitespace_only_pattern(value) {
} else {
out.token_with_role(&placeholder_for_pattern(value), Some(TokenRole::Terminal));
}
Ok(())
}
Production::Blank => Ok(()),
Production::Symbol { name } => {
if let Some(field) = current_field_context() {
if let Some(edge) = cursor.take_field(&field) {
return emit_in_child_context(
protocol, schema, grammar, &edge.tgt, production, out,
);
}
return Ok(());
}
if name.starts_with('_') {
if let Some(rule) = grammar.rules.get(name) {
let old_rule = out.current_rule.take();
out.current_rule = Some(name.to_owned());
let result = walk_in_mu_frame(
protocol, schema, grammar, vertex_id, name, rule, cursor, out,
);
out.current_rule = old_rule;
result
} else {
if let Some(alias_value) = grammar.external_alias_map.get(name) {
out.token(alias_value);
return Ok(());
}
if grammar.external_indent_opens.contains(name) {
out.indent_open();
} else if grammar.external_indent_closes.contains(name) {
out.indent_close();
} else if grammar.external_newlines.contains(name) {
out.newline();
} else if grammar.external_semicolons.contains(name) {
out.token_with_role(";", Some(TokenRole::Separator));
} else if let Some(default) = out
.cassette
.and_then(|c| crate::languages::cassettes::resolve_external_token(c, name))
{
if !default.is_empty() {
out.token(default);
}
}
Ok(())
}
} else if let Some(edge) = { take_symbol_match(grammar, schema, cursor, name) } {
emit_vertex(protocol, schema, grammar, &edge.tgt, out)
} else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
let rule = grammar
.rules
.get(name)
.ok_or_else(|| ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: format!("no production for SYMBOL '{name}'"),
})?;
{
let old_rule = out.current_rule.take();
out.current_rule = Some(name.to_owned());
let result = walk_in_mu_frame(
protocol, schema, grammar, vertex_id, name, rule, cursor, out,
);
out.current_rule = old_rule;
result
}
} else {
Ok(())
}
}
Production::Seq { members } => emit_seq_with_roles(
protocol, schema, grammar, vertex_id, members, cursor, out, false,
),
Production::Choice { members } => {
if let Some(matched) =
pick_choice_with_cursor(schema, grammar, vertex_id, cursor, members)
{
match matched {
Production::Seq {
members: seq_members,
} => emit_seq_with_roles(
protocol,
schema,
grammar,
vertex_id,
seq_members,
cursor,
out,
true,
),
Production::String { value } => {
let role = if is_word_like(value) {
TokenRole::Keyword
} else {
TokenRole::Separator
};
out.token_with_role(value, Some(role));
Ok(())
}
_ => {
emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
}
}
} else {
Ok(())
}
}
Production::Repeat { content } | Production::Repeat1 { content } => {
let mandatory_sep_text: Option<&str> = match content.as_ref() {
Production::Seq { members } if members.len() >= 2 => unwrap_to_string(&members[0]),
_ => None,
};
let separator_leading_seq: Option<&[Production]> = match content.as_ref() {
Production::Seq { members } if members.len() >= 2 => {
let first = &members[0];
let is_mandatory_sep = unwrap_to_string(first).is_some();
let cassette_overrides = is_mandatory_sep
&& unwrap_to_string(first).is_some_and(|sep| {
out.cassette.is_some_and(|c| c.separator_is_line_break(sep))
});
let is_separator_slot = match first {
Production::Choice { members } => {
members.iter().any(|m| matches!(m, Production::Blank))
}
Production::Optional { .. } => true,
_ => cassette_overrides,
};
if is_separator_slot {
Some(members.as_slice())
} else {
None
}
}
_ => None,
};
let mut emitted_any = false;
loop {
let cursor_snap = cursor.consumed.clone();
let out_snap = out.snapshot();
let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
let result: Result<(), ParseError> =
if let Some(seq_members) = separator_leading_seq {
let cassette_replaces_sep = mandatory_sep_text.is_some_and(|sep| {
out.cassette.is_some_and(|c| c.separator_is_line_break(sep))
});
let pre_sep = out.snapshot();
let sep_result = if cassette_replaces_sep {
out.newline();
Ok(())
} else {
emit_production(
protocol,
schema,
grammar,
vertex_id,
&seq_members[0],
cursor,
out,
)
};
match sep_result {
Err(e) => Err(e),
Ok(()) => {
if !cassette_replaces_sep && !out.lit_emitted_since(pre_sep) {
out.no_space();
}
let mut rest_result = Ok(());
for member in &seq_members[1..] {
rest_result = emit_production(
protocol, schema, grammar, vertex_id, member, cursor, out,
);
if rest_result.is_err() {
break;
}
}
rest_result
}
}
} else {
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
};
let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
if result.is_err() || consumed_after == consumed_before {
cursor.consumed = cursor_snap;
out.restore(out_snap);
break;
}
emitted_any = true;
}
if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
}
Ok(())
}
Production::Optional { content } => {
let cursor_snap = cursor.consumed.clone();
let out_snap = out.snapshot();
let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
let result =
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
if result.is_err() {
cursor.consumed = cursor_snap;
out.restore(out_snap);
return result;
}
let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
if consumed_after == consumed_before
&& !has_relevant_constraint(content, schema, vertex_id)
{
cursor.consumed = cursor_snap;
out.restore(out_snap);
}
Ok(())
}
Production::Field { name, content } => {
let _guard = push_field_context(name);
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
}
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
if let Some(edge) = cursor.take_matching(|edge| {
schema
.vertices
.get(&edge.tgt)
.map(|v| v.kind.as_ref() == value.as_str())
.unwrap_or(false)
}) {
return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
}
}
if !*named && !value.is_empty() {
if let Production::Symbol { name: sym } = content.as_ref() {
if sym.starts_with('_') && !grammar.rules.contains_key(sym) {
out.token(value);
return Ok(());
}
}
}
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
}
Production::ImmediateToken { content } => {
out.no_space();
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
}
Production::Token { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
}
}
}
fn take_symbol_match<'a>(
grammar: &Grammar,
schema: &Schema,
cursor: &mut ChildCursor<'a>,
name: &str,
) -> Option<&'a Edge> {
if let Some(edge) = cursor.take_matching(|edge| {
edge.kind.as_ref() == "child_of" && {
let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
kind_satisfies_symbol(grammar, target_kind, name)
}
}) {
return Some(edge);
}
cursor.take_matching(|edge| {
let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
kind_satisfies_symbol(grammar, target_kind, name)
})
}
fn kind_satisfies_symbol(grammar: &Grammar, target_kind: Option<&str>, name: &str) -> bool {
let Some(target) = target_kind else {
return false;
};
if target == name {
return true;
}
grammar
.subtypes
.get(target)
.is_some_and(|set| set.contains(name))
}
fn emit_aliased_child(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
child_id: &panproto_gat::Name,
content: &Production,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
if let Some(literal) = literal_value(schema, child_id) {
if children_for(schema, child_id).is_empty() {
let is_bracket_pair = literal.len() >= 2
&& matches!(
(literal.as_bytes().first(), literal.as_bytes().last()),
(Some(b'('), Some(b')')) | (Some(b'['), Some(b']')) | (Some(b'{'), Some(b'}'))
);
if !is_bracket_pair {
out.token_with_role(literal, Some(TokenRole::Terminal));
return Ok(());
}
}
}
let _guard = clear_field_context();
if let Production::Symbol { name } = content {
if let Some(rule) = grammar.rules.get(name) {
let edges = children_for(schema, child_id);
let mut cursor = ChildCursor::new(&edges);
let old_rule = out.current_rule.take();
out.current_rule = Some(name.to_owned());
let result =
emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
out.current_rule = old_rule;
return result;
}
}
let edges = children_for(schema, child_id);
let mut cursor = ChildCursor::new(&edges);
emit_production(
protocol,
schema,
grammar,
child_id,
content,
&mut cursor,
out,
)
}
fn emit_in_child_context(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
child_id: &panproto_gat::Name,
production: &Production,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let _guard = clear_field_context();
if !matches!(production, Production::Symbol { .. }) {
let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
let symbols = referenced_symbols(production);
if symbols
.iter()
.any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
{
return emit_vertex(protocol, schema, grammar, child_id, out);
}
}
match production {
Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
_ => {
let edges = children_for(schema, child_id);
let mut cursor = ChildCursor::new(&edges);
emit_production(
protocol,
schema,
grammar,
child_id,
production,
&mut cursor,
out,
)
}
}
}
fn pick_choice_with_cursor<'a>(
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
cursor: &ChildCursor<'_>,
alternatives: &'a [Production],
) -> Option<&'a Production> {
let consumed_count = cursor.consumed.iter().filter(|&&c| c).count();
let positional_interstitials: Vec<&str> = schema
.constraints
.get(vertex_id)
.map(|cs| {
let mut indexed: Vec<(usize, &str)> = cs
.iter()
.filter_map(|c| {
let s = c.sort.as_ref();
if !s.starts_with("interstitial-") || s.ends_with("-start-byte") {
return None;
}
let idx: usize = s["interstitial-".len()..].parse().ok()?;
Some((idx, c.value.as_str()))
})
.collect();
indexed.sort_by_key(|&(i, _)| i);
indexed.into_iter().map(|(_, v)| v).collect()
})
.unwrap_or_default();
let positional_slice: String = if positional_interstitials.is_empty() {
String::new()
} else {
positional_interstitials
.iter()
.skip(consumed_count)
.copied()
.collect::<Vec<&str>>()
.join(" ")
};
let fingerprint_blob = schema
.constraints
.get(vertex_id)
.and_then(|cs| {
cs.iter()
.find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
.map(|c| c.value.clone())
})
.unwrap_or_default();
let constraint_blob: String = if positional_slice.is_empty() {
fingerprint_blob
} else {
positional_slice
};
let child_kinds: Vec<&str> = schema
.constraints
.get(vertex_id)
.and_then(|cs| {
cs.iter()
.find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
.map(|c| c.value.split_whitespace().collect())
})
.unwrap_or_default();
let any_unconsumed = cursor
.edges
.iter()
.enumerate()
.any(|(i, _)| !cursor.consumed[i]);
let blank_present = alternatives.iter().any(|a| matches!(a, Production::Blank));
let edge_kinds: Vec<&str> = cursor
.edges
.iter()
.enumerate()
.filter(|(i, _)| !cursor.consumed[*i])
.map(|(_, e)| e.kind.as_ref())
.collect();
if !any_unconsumed && blank_present {
return alternatives.iter().find(|a| matches!(a, Production::Blank));
}
if !any_unconsumed && !blank_present {
for alt in alternatives {
if let Production::Pattern { value } = alt {
if is_newline_like_pattern(value) {
return Some(alt);
}
}
}
if let Some(pure_lit) = alternatives.iter().find(|alt| {
let syms = referenced_symbols(alt);
let strings = literal_strings(alt);
syms.is_empty() && !strings.is_empty()
}) {
return Some(pure_lit);
}
let mut visited = std::collections::HashSet::new();
let mut yield_cache = grammar.yield_sets.clone();
for alt in alternatives {
let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
if ys.contains("") {
return Some(alt);
}
visited.clear();
}
}
for edge_idx in 0..cursor.edges.len() {
if cursor.consumed[edge_idx] {
continue;
}
let edge = &cursor.edges[edge_idx];
let tgt_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
let tgt_lit = literal_value(schema, &edge.tgt);
for alt in alternatives {
if let Production::String { value } = alt {
if Some(value.as_str()) == tgt_kind || tgt_lit == Some(value.as_str()) {
return Some(alt);
}
}
}
}
if !constraint_blob.is_empty() {
let first_uc_edge_pre = cursor
.edges
.iter()
.enumerate()
.find(|(i, _)| !cursor.consumed[*i])
.map(|(_, e)| e);
let alt_accepts = |a: &Production| -> bool {
let Some(edge) = first_uc_edge_pre else {
return false;
};
let edge_kind = edge.kind.as_ref();
let Some(tgt_kind) = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()) else {
return false;
};
accepts_first_edge(grammar, a, edge_kind, tgt_kind)
};
let any_consumes = any_unconsumed && alternatives.iter().any(alt_accepts);
let mut best_literal: usize = 0;
let mut best_symbols: usize = 0;
let mut best_total_chars: usize = usize::MAX;
let mut best_alt: Option<&Production> = None;
let mut tied = false;
for alt in alternatives {
let strings = literal_strings(alt);
if strings.is_empty() {
continue;
}
if any_consumes && !alt_accepts(alt) {
continue;
}
let literal_score = strings
.iter()
.filter(|s| constraint_blob.contains(s.as_str()))
.map(String::len)
.sum::<usize>();
if literal_score == 0 {
continue;
}
let total_chars: usize = strings.iter().map(String::len).sum();
let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
let symbols = referenced_symbols(alt);
symbols
.iter()
.filter(|sym| {
let sym_str: &str = sym;
if child_kinds.contains(&sym_str) {
return true;
}
grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
sub_set
.iter()
.any(|sub| child_kinds.contains(&sub.as_str()))
})
})
.count()
} else {
0
};
let better = literal_score > best_literal
|| (literal_score == best_literal && symbol_score > best_symbols)
|| (literal_score == best_literal
&& symbol_score == best_symbols
&& total_chars < best_total_chars);
let same = literal_score == best_literal
&& symbol_score == best_symbols
&& total_chars == best_total_chars;
if better {
best_literal = literal_score;
best_symbols = symbol_score;
best_total_chars = total_chars;
best_alt = Some(alt);
tied = false;
} else if same && best_alt.is_some() {
tied = true;
}
}
if let Some(alt) = best_alt {
if !tied {
if any_unconsumed {
if alt_accepts(alt) {
return Some(alt);
}
if !blank_present || referenced_symbols(alt).is_empty() {
return Some(alt);
}
} else {
return Some(alt);
}
}
}
}
let first_unconsumed_kind: Option<&str> = cursor
.edges
.iter()
.enumerate()
.find(|(i, _)| !cursor.consumed[*i])
.and_then(|(_, edge)| schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()));
if let Some(target_kind) = first_unconsumed_kind {
let target_supers = grammar.subtypes.get(target_kind);
{
let mut match_count = 0usize;
let mut indent_alt_idx: Option<usize> = None;
let mut visited = std::collections::HashSet::new();
let mut yield_cache = grammar.yield_sets.clone();
for (i, alt) in alternatives.iter().enumerate() {
let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
if ys.contains(target_kind) {
match_count += 1;
if indent_alt_idx.is_none()
&& referenced_symbols(alt)
.iter()
.any(|s| grammar.external_indent_opens.contains(*s))
{
indent_alt_idx = Some(i);
}
}
visited.clear();
}
if match_count > 1 {
if let Some(idx) = indent_alt_idx {
return Some(&alternatives[idx]);
}
}
}
for alt in alternatives {
if let Production::Symbol { name } = alt {
if name.as_str() == target_kind {
return Some(alt);
}
}
if let Production::Alias {
named: true, value, ..
} = alt
{
if value.as_str() == target_kind {
return Some(alt);
}
}
}
if let Some(supers) = target_supers {
for alt in alternatives {
if let Production::Symbol { name } = alt {
if supers.contains(name.as_str()) {
return Some(alt);
}
}
if let Production::Alias {
named: true, value, ..
} = alt
{
if supers.contains(value.as_str()) {
return Some(alt);
}
}
}
}
let mut visited = std::collections::HashSet::new();
let mut yield_cache = grammar.yield_sets.clone();
let mut matching_alts: Vec<&Production> = Vec::new();
for alt in alternatives {
if has_any_field(alt) && !has_field_in(alt, &edge_kinds) {
visited.clear();
continue;
}
if !alt_satisfies_field_token_restrictions(schema, cursor, alt) {
visited.clear();
continue;
}
if !alt_satisfies_pre_alias_constraints(schema, cursor, alt) {
visited.clear();
continue;
}
let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
if ys.contains(target_kind) {
matching_alts.push(alt);
}
visited.clear();
}
if matching_alts.len() == 1 {
return Some(matching_alts[0]);
}
if matching_alts.len() > 1 {
matching_alts.sort_by_key(|alt| std::cmp::Reverse(prec_value(alt)));
return Some(matching_alts[0]);
}
}
for alt in alternatives {
if has_field_in(alt, &edge_kinds) {
return Some(alt);
}
}
let _ = (schema, vertex_id);
let has_newline_pattern = alternatives
.iter()
.any(|a| matches!(a, Production::Pattern { value } if is_newline_like_pattern(value)));
if has_newline_pattern {
for alt in alternatives {
if let Production::Pattern { value } = alt {
if is_newline_like_pattern(value) {
return Some(alt);
}
}
}
}
if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
for alt in alternatives {
if let Production::Symbol { name } = alt {
if name.starts_with('_') {
if let Some(rule) = grammar.rules.get(name) {
if contains_newline_pattern(rule) {
return Some(alt);
}
}
}
}
}
return alternatives.iter().find(|a| matches!(a, Production::Blank));
}
if !any_unconsumed {
if let Some(pure_lit) = alternatives.iter().find(|alt| {
let syms = referenced_symbols(alt);
syms.is_empty() && !matches!(alt, Production::Blank)
}) {
return Some(pure_lit);
}
}
alternatives
.iter()
.find(|alt| !matches!(alt, Production::Blank))
}
fn literal_strings(production: &Production) -> Vec<String> {
let mut out = Vec::new();
fn walk(p: &Production, out: &mut Vec<String>) {
match p {
Production::String { value } if !value.is_empty() => {
out.push(value.clone());
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, out),
_ => {}
}
}
walk(production, &mut out);
out
}
fn referenced_symbols(production: &Production) -> Vec<&str> {
let mut out = Vec::new();
fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
match p {
Production::Symbol { name } => out.push(name.as_str()),
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(m, out);
}
}
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
out.push(value.as_str());
}
walk(content, out);
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, out),
_ => {}
}
}
walk(production, &mut out);
out
}
#[cfg(test)]
fn first_symbol(production: &Production) -> Option<&str> {
match production {
Production::Symbol { name } => Some(name),
Production::Seq { members } => members.iter().find_map(first_symbol),
Production::Choice { members } => members.iter().find_map(first_symbol),
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => first_symbol(content),
_ => None,
}
}
fn prec_value(prod: &Production) -> i64 {
match prod {
Production::Prec { value, .. }
| Production::PrecLeft { value, .. }
| Production::PrecRight { value, .. }
| Production::PrecDynamic { value, .. } => value.as_i64().unwrap_or(0),
_ => 0,
}
}
fn has_any_field(production: &Production) -> bool {
match production {
Production::Field { .. } => true,
Production::Seq { members } | Production::Choice { members } => {
members.iter().any(has_any_field)
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => has_any_field(content),
_ => false,
}
}
fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
match production {
Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
Production::Seq { members } | Production::Choice { members } => {
members.iter().any(|m| has_field_in(m, edge_kinds))
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => has_field_in(content, edge_kinds),
_ => false,
}
}
fn collect_field_token_restrictions<'a>(
production: &'a Production,
out: &mut Vec<(&'a str, Vec<&'a str>)>,
) {
match production {
Production::Field { name, content } => {
if let Some(strings) = literal_choice_set(content) {
out.push((name.as_str(), strings));
}
collect_field_token_restrictions(content, out);
}
Production::Seq { members } | Production::Choice { members } => {
for m in members {
collect_field_token_restrictions(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
collect_field_token_restrictions(content, out);
}
_ => {}
}
}
fn literal_choice_set(p: &Production) -> Option<Vec<&str>> {
fn unwrap(p: &Production) -> &Production {
match p {
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Reserved { content, .. } => unwrap(content),
_ => p,
}
}
let p = unwrap(p);
let Production::Alias { content, .. } = p else {
return None;
};
let inner = unwrap(content);
match inner {
Production::String { value } => Some(vec![value.as_str()]),
Production::Choice { members } => {
let mut out = Vec::new();
for m in members {
match unwrap(m) {
Production::String { value } => out.push(value.as_str()),
_ => return None,
}
}
Some(out)
}
_ => None,
}
}
fn accepts_first_edge(
grammar: &Grammar,
production: &Production,
edge_field: &str,
target_kind: &str,
) -> bool {
fn yield_contains(grammar: &Grammar, prod: &Production, kind: &str) -> bool {
let mut visited = std::collections::HashSet::new();
let mut cache = grammar.yield_sets.clone();
let ys = yield_of_production(grammar, prod, &mut visited, &mut cache);
ys.contains(kind)
|| grammar
.subtypes
.get(kind)
.is_some_and(|subs| subs.iter().any(|s| ys.contains(s.as_str())))
}
fn yield_has_epsilon(grammar: &Grammar, prod: &Production) -> bool {
let mut visited = std::collections::HashSet::new();
let mut cache = grammar.yield_sets.clone();
let ys = yield_of_production(grammar, prod, &mut visited, &mut cache);
ys.contains("") || ys.is_empty()
}
match production {
Production::String { .. } | Production::Pattern { .. } | Production::Blank => false,
Production::Symbol { name } => {
if edge_field != "child_of" {
return false;
}
if name == target_kind {
return true;
}
if grammar
.subtypes
.get(target_kind)
.is_some_and(|s| s.contains(name))
{
return true;
}
let is_expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if is_expand {
if let Some(rule) = grammar.rules.get(name) {
return accepts_first_edge(grammar, rule, edge_field, target_kind);
}
}
false
}
Production::Alias {
named,
value,
content,
} => {
if *named && !value.is_empty() {
edge_field == "child_of" && value == target_kind
} else {
accepts_first_edge(grammar, content, edge_field, target_kind)
}
}
Production::Field { name, content } => {
edge_field == name.as_str() && yield_contains(grammar, content, target_kind)
}
Production::Seq { members } => {
for m in members {
if accepts_first_edge(grammar, m, edge_field, target_kind) {
return true;
}
if !yield_has_epsilon(grammar, m) {
return false;
}
}
false
}
Production::Choice { members } => members
.iter()
.any(|m| accepts_first_edge(grammar, m, edge_field, target_kind)),
Production::Optional { content }
| Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
accepts_first_edge(grammar, content, edge_field, target_kind)
}
}
}
fn pre_alias_symbol<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
schema.constraints.get(vertex_id).and_then(|cs| {
cs.iter()
.find(|c| c.sort.as_ref() == "pre-alias-symbol")
.map(|c| c.value.as_str())
})
}
fn field_alias_sources<'a>(production: &'a Production, field_name: &str, out: &mut Vec<&'a str>) {
fn unwrap_to_alias_source(p: &Production) -> Option<&str> {
let inner = match p {
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Reserved { content, .. } => content.as_ref(),
_ => p,
};
match inner {
Production::Alias { content, named, .. } if *named => {
if let Production::Symbol { name } = content.as_ref() {
return Some(name.as_str());
}
None
}
_ => None,
}
}
match production {
Production::Field { name, content } if name.as_str() == field_name => {
if let Some(src) = unwrap_to_alias_source(content) {
out.push(src);
}
}
Production::Field { content, .. }
| Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
field_alias_sources(content, field_name, out);
}
Production::Seq { members } | Production::Choice { members } => {
for m in members {
field_alias_sources(m, field_name, out);
}
}
_ => {}
}
}
fn alt_satisfies_pre_alias_constraints(
schema: &Schema,
cursor: &ChildCursor<'_>,
alt: &Production,
) -> bool {
for (i, edge) in cursor.edges.iter().enumerate() {
if cursor.consumed[i] {
continue;
}
let edge_kind = edge.kind.as_ref();
if edge_kind == "child_of" {
continue;
}
let Some(actual_source) = pre_alias_symbol(schema, &edge.tgt) else {
continue;
};
let mut sources: Vec<&str> = Vec::new();
field_alias_sources(alt, edge_kind, &mut sources);
if sources.is_empty() {
continue;
}
if !sources.contains(&actual_source) {
return false;
}
}
true
}
fn alt_satisfies_field_token_restrictions(
schema: &Schema,
cursor: &ChildCursor<'_>,
alt: &Production,
) -> bool {
let mut restrictions: Vec<(&str, Vec<&str>)> = Vec::new();
collect_field_token_restrictions(alt, &mut restrictions);
for (field_name, allowed) in &restrictions {
let mut field_seen = false;
let mut field_admits = false;
for (i, edge) in cursor.edges.iter().enumerate() {
if cursor.consumed[i] {
continue;
}
if edge.kind.as_ref() != *field_name {
continue;
}
field_seen = true;
let lit = literal_value(schema, &edge.tgt);
if let Some(l) = lit {
if allowed.contains(&l) {
field_admits = true;
break;
}
}
}
if field_seen && !field_admits {
return false;
}
}
true
}
fn has_relevant_constraint(
production: &Production,
schema: &Schema,
vertex_id: &panproto_gat::Name,
) -> bool {
let constraints = match schema.constraints.get(vertex_id) {
Some(c) => c,
None => return false,
};
fn walk(production: &Production, constraints: &[panproto_schema::Constraint]) -> bool {
match production {
Production::String { value } => constraints
.iter()
.any(|c| c.value == *value || c.sort.as_ref() == value),
Production::Field { name, content } => {
constraints.iter().any(|c| c.sort.as_ref() == name) || walk(content, constraints)
}
Production::Seq { members } | Production::Choice { members } => {
members.iter().any(|m| walk(m, constraints))
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, constraints),
_ => false,
}
}
walk(production, constraints)
}
fn children_for<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Vec<&'a Edge> {
let Some(edges) = schema.outgoing.get(vertex_id) else {
return Vec::new();
};
let mut indexed: Vec<(usize, u32, &Edge)> = edges
.iter()
.enumerate()
.map(|(i, e)| {
let canonical = schema.edges.get_key_value(e).map_or(e, |(k, _)| k);
let pos = schema.orderings.get(canonical).copied().unwrap_or(u32::MAX);
(i, pos, canonical)
})
.collect();
indexed.sort_by_key(|(i, pos, _)| (*pos, *i));
indexed.into_iter().map(|(_, _, e)| e).collect()
}
fn vertex_id_kind<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
schema.vertices.get(vertex_id).map(|v| v.kind.as_ref())
}
fn literal_value<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
schema
.constraints
.get(vertex_id)?
.iter()
.find(|c| c.sort.as_ref() == "literal-value")
.map(|c| c.value.as_str())
}
fn contains_newline_pattern(prod: &Production) -> bool {
match prod {
Production::Pattern { value } => is_newline_like_pattern(value),
Production::Choice { members } | Production::Seq { members } => {
members.iter().any(contains_newline_pattern)
}
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Reserved { content, .. } => contains_newline_pattern(content),
_ => false,
}
}
fn is_newline_like_pattern(pattern: &str) -> bool {
if pattern.is_empty() {
return false;
}
let mut chars = pattern.chars();
let mut saw_newline_atom = false;
while let Some(c) = chars.next() {
match c {
'\\' => match chars.next() {
Some('n' | 'r') => saw_newline_atom = true,
_ => return false,
},
'\n' | '\r' => saw_newline_atom = true,
'?' | '*' | '+' => {} _ => return false,
}
}
saw_newline_atom
}
fn is_whitespace_only_pattern(pattern: &str) -> bool {
if pattern.is_empty() {
return false;
}
let trimmed = pattern.trim_end_matches(['?', '*', '+']);
if trimmed.is_empty() {
return false;
}
if matches!(trimmed, "\\s" | " " | "\\t") {
return true;
}
if let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
let mut chars = inner.chars();
let mut saw_atom = false;
while let Some(c) = chars.next() {
match c {
'\\' => match chars.next() {
Some('s' | 't' | 'r' | 'n') => saw_atom = true,
_ => return false,
},
' ' | '\t' => saw_atom = true,
_ => return false,
}
}
return saw_atom;
}
false
}
fn placeholder_for_pattern(pattern: &str) -> String {
let simple_lit = decode_simple_pattern_literal(pattern);
if let Some(lit) = simple_lit {
return lit;
}
if pattern.contains("[0-9]") || pattern.contains("\\d") {
"0".into()
} else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
"_x".into()
} else if pattern.contains('"') || pattern.contains('\'') {
"\"\"".into()
} else {
"_".into()
}
}
fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
if pattern
.chars()
.any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
{
return None;
}
let mut out = String::new();
let mut chars = pattern.chars();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('n') => out.push('\n'),
Some('r') => out.push('\r'),
Some('t') => out.push('\t'),
Some('\\') => out.push('\\'),
Some('/') => out.push('/'),
Some(other) => out.push(other),
None => return None,
}
} else {
out.push(c);
}
}
Some(out)
}
#[derive(Clone)]
enum Token {
Lit(String, TokenRole),
IndentOpen,
IndentClose,
LineBreak,
ForceSpace,
NoSpace,
}
struct Output<'a> {
tokens: Vec<Token>,
policy: &'a FormatPolicy,
grammar: &'a Grammar,
current_rule: Option<String>,
cassette: Option<&'a dyn crate::languages::cassettes::GrammarCassette>,
}
#[derive(Clone)]
struct OutputSnapshot {
tokens_len: usize,
}
impl<'a> Output<'a> {
fn new(
policy: &'a FormatPolicy,
grammar: &'a Grammar,
cassette: Option<&'a dyn crate::languages::cassettes::GrammarCassette>,
) -> Self {
Self {
tokens: Vec::new(),
policy,
grammar,
current_rule: None,
cassette,
}
}
fn token(&mut self, value: &str) {
self.token_with_role(value, None);
}
fn token_with_role(&mut self, value: &str, explicit_role: Option<TokenRole>) {
if value.is_empty() {
return;
}
if value == "\n" || value == "\r\n" || value == "\r" {
self.tokens.push(Token::LineBreak);
return;
}
let trimmed = value.trim_end_matches(['\n', '\r']);
let trailing_newlines = value.len() - trimmed.len();
if trailing_newlines > 0 && !trimmed.is_empty() {
let role = explicit_role.unwrap_or(TokenRole::Terminal);
if role == TokenRole::BracketClose
&& self.policy.indent_close.iter().any(|t| t == trimmed)
{
self.tokens.push(Token::IndentClose);
}
self.tokens.push(Token::Lit(trimmed.to_owned(), role));
if role == TokenRole::BracketOpen {
if let Some(ref rule) = self.current_rule {
if self
.grammar
.indent_triggers
.contains(&(rule.clone(), trimmed.to_owned()))
{
self.tokens.push(Token::IndentOpen);
}
}
}
self.tokens.push(Token::LineBreak);
return;
}
let role = explicit_role.unwrap_or_else(|| self.lookup_role(value));
if role == TokenRole::BracketClose && self.policy.indent_close.iter().any(|t| t == value) {
self.tokens.push(Token::IndentClose);
}
self.tokens.push(Token::Lit(value.to_owned(), role));
if role == TokenRole::BracketOpen {
let grammar_indent = self.current_rule.as_ref().is_some_and(|rule| {
self.grammar
.indent_triggers
.contains(&(rule.clone(), value.to_owned()))
});
if grammar_indent {
self.tokens.push(Token::IndentOpen);
self.tokens.push(Token::LineBreak);
}
}
let is_non_indent_bracket = self.current_rule.is_some()
&& (role == TokenRole::BracketOpen || role == TokenRole::BracketClose)
&& !self.current_rule.as_ref().is_some_and(|rule| {
self.grammar
.indent_triggers
.contains(&(rule.clone(), value.to_owned()))
});
if !is_non_indent_bracket && self.policy.line_break_after.iter().any(|t| t == value) {
self.tokens.push(Token::LineBreak);
}
}
fn lookup_role(&self, value: &str) -> TokenRole {
if let Some(ref rule) = self.current_rule {
if let Some(role_map) = self.grammar.token_roles.get(rule) {
if let Some(role) = role_map.get(value) {
return *role;
}
}
}
if is_word_like(value) {
TokenRole::Keyword
} else {
TokenRole::Operator
}
}
fn token_with_indent_open(&mut self, value: &str, role: TokenRole) {
if value.is_empty() {
return;
}
if role == TokenRole::BracketClose && self.policy.indent_close.iter().any(|t| t == value) {
self.tokens.push(Token::IndentClose);
}
self.tokens.push(Token::Lit(value.to_owned(), role));
if role == TokenRole::BracketOpen {
self.tokens.push(Token::IndentOpen);
self.tokens.push(Token::LineBreak);
}
}
fn newline(&mut self) {
self.tokens.push(Token::LineBreak);
}
fn indent_open(&mut self) {
self.tokens.push(Token::IndentOpen);
self.tokens.push(Token::LineBreak);
}
fn indent_close(&mut self) {
self.tokens.push(Token::IndentClose);
}
fn snapshot(&self) -> OutputSnapshot {
OutputSnapshot {
tokens_len: self.tokens.len(),
}
}
fn restore(&mut self, snap: OutputSnapshot) {
self.tokens.truncate(snap.tokens_len);
}
fn lit_emitted_since(&self, snap: OutputSnapshot) -> bool {
self.tokens[snap.tokens_len..]
.iter()
.any(|t| matches!(t, Token::Lit(_, _)))
}
fn no_space(&mut self) {
self.tokens.push(Token::NoSpace);
}
fn finish(self) -> Vec<u8> {
layout(
&self.tokens,
self.policy,
&self.grammar.line_comment_prefixes,
)
}
}
fn layout(tokens: &[Token], policy: &FormatPolicy, line_comment_prefixes: &[String]) -> Vec<u8> {
let mut bytes = Vec::new();
let mut indent: usize = 0;
let mut at_line_start = true;
let mut last_role: Option<TokenRole> = None;
let mut last_text: String = String::new();
let mut suppress_next_separator = false;
let mut force_next_separator = false;
let newline = policy.newline.as_bytes();
let separator = policy.separator.as_bytes();
for (tok_idx, tok) in tokens.iter().enumerate() {
if std::env::var("DBG_LAYOUT").is_ok() {
match tok {
Token::Lit(v, r) => eprintln!(
" TOK: Lit({v:?}, {r:?}) at_line_start={at_line_start} last_role={last_role:?}"
),
Token::IndentOpen => eprintln!(" TOK: IndentOpen"),
Token::IndentClose => eprintln!(" TOK: IndentClose"),
Token::LineBreak => eprintln!(" TOK: LineBreak"),
Token::NoSpace => eprintln!(" TOK: NoSpace"),
Token::ForceSpace => eprintln!(" TOK: ForceSpace"),
}
}
match tok {
Token::IndentOpen => indent += 1,
Token::IndentClose => {
indent = indent.saturating_sub(1);
if !at_line_start {
bytes.extend_from_slice(newline);
at_line_start = true;
}
}
Token::LineBreak => {
if !at_line_start {
bytes.extend_from_slice(newline);
at_line_start = true;
}
}
Token::NoSpace => {
suppress_next_separator = true;
}
Token::ForceSpace => {
force_next_separator = true;
}
Token::Lit(value, role) => {
let is_block_open = *role == TokenRole::BracketOpen
&& tokens
.get(tok_idx + 1)
.is_some_and(|t| matches!(t, Token::IndentOpen));
if at_line_start {
bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
} else if let Some(prev_role) = last_role {
let want_space = force_next_separator
|| (!suppress_next_separator
&& needs_space_by_role(prev_role, &last_text, *role, value))
|| (is_block_open
&& !suppress_next_separator
&& matches!(prev_role, TokenRole::Terminal | TokenRole::BracketClose));
if want_space {
bytes.extend_from_slice(separator);
}
}
suppress_next_separator = false;
force_next_separator = false;
bytes.extend_from_slice(value.as_bytes());
at_line_start = false;
last_role = Some(*role);
last_text.clear();
last_text.push_str(value);
if line_comment_prefixes
.iter()
.any(|p| value.starts_with(p.as_str()))
{
bytes.extend_from_slice(newline);
at_line_start = true;
last_role = None;
}
}
}
}
if !at_line_start {
bytes.extend_from_slice(newline);
}
bytes
}
fn effective_spacing_role(role: TokenRole, text: &str) -> TokenRole {
match role {
TokenRole::BracketOpen | TokenRole::BracketClose if is_word_like(text) => {
TokenRole::Keyword
}
other => other,
}
}
fn needs_space_by_role(last: TokenRole, last_text: &str, next: TokenRole, next_text: &str) -> bool {
let last = effective_spacing_role(last, last_text);
let next = effective_spacing_role(next, next_text);
match (last, next) {
(TokenRole::BracketOpen, _) | (_, TokenRole::BracketClose) => false,
(_, TokenRole::Separator) => false,
(TokenRole::Separator, _) => true,
(TokenRole::Connector, _) | (_, TokenRole::Connector) => false,
(TokenRole::Terminal, TokenRole::BracketOpen) => false,
(TokenRole::BracketClose, TokenRole::BracketOpen) => false,
(TokenRole::Keyword, _) | (_, TokenRole::Keyword) => true,
(TokenRole::Terminal, TokenRole::Terminal) => true,
(TokenRole::Terminal, TokenRole::Operator) | (TokenRole::Operator, TokenRole::Terminal) => {
true
}
(TokenRole::Operator, TokenRole::Operator) => true,
(TokenRole::BracketClose, _) => true,
(TokenRole::Operator, TokenRole::BracketOpen) => true,
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
fn test_grammar() -> Grammar {
Grammar::from_bytes("test", b"{\"name\":\"test\",\"rules\":{}}").unwrap_or_else(|_| {
serde_json::from_str::<Grammar>(r#"{"name":"test","rules":{}}"#).unwrap()
})
}
#[test]
fn parses_simple_grammar_json() {
let bytes = br#"{
"name": "tiny",
"rules": {
"program": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "hello"},
{"type": "STRING", "value": ";"}
]
}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
assert!(g.rules.contains_key("program"));
}
#[test]
fn output_emits_punctuation_without_leading_space() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token_with_role("foo", Some(TokenRole::Terminal));
out.token_with_role("(", Some(TokenRole::BracketOpen));
out.token_with_role(")", Some(TokenRole::BracketClose));
out.token_with_role(";", Some(TokenRole::Separator));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert!(s.starts_with("foo();"), "got {s:?}");
}
#[test]
fn grammar_from_bytes_rejects_malformed_input() {
let result = Grammar::from_bytes("malformed", b"not json");
let err = result.expect_err("malformed bytes must yield Err");
let msg = err.to_string();
assert!(
msg.contains("malformed"),
"error message should name the protocol: {msg:?}"
);
}
#[test]
fn output_indents_after_open_brace() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token_with_role("fn", Some(TokenRole::Keyword));
out.token_with_role("foo", Some(TokenRole::Terminal));
out.token_with_role("(", Some(TokenRole::BracketOpen));
out.token_with_role(")", Some(TokenRole::BracketClose));
out.token_with_role("{", Some(TokenRole::BracketOpen));
out.token_with_role("body", Some(TokenRole::Terminal));
out.token_with_role("}", Some(TokenRole::BracketClose));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
assert!(s.contains("body"), "body inside block: {s:?}");
assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
}
#[test]
fn output_no_space_between_word_and_dot() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token_with_role("foo", Some(TokenRole::Terminal));
out.token_with_role(".", Some(TokenRole::Operator));
out.token_with_role("bar", Some(TokenRole::Terminal));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert!(
s.contains("foo") && s.contains("bar"),
"both identifiers present: {s:?}"
);
}
#[test]
fn output_snapshot_restore_truncates_bytes() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token("keep");
let snap = out.snapshot();
out.token("drop");
out.token("more");
out.restore(snap);
out.token("after");
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert!(s.contains("keep"), "kept token survives: {s:?}");
assert!(s.contains("after"), "post-restore token visible: {s:?}");
assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
}
#[test]
fn child_cursor_take_field_consumes_once() {
let edges_owned: Vec<Edge> = vec![Edge {
src: panproto_gat::Name::from("p"),
tgt: panproto_gat::Name::from("c"),
kind: panproto_gat::Name::from("name"),
name: None,
}];
let edges: Vec<&Edge> = edges_owned.iter().collect();
let mut cursor = ChildCursor::new(&edges);
let first = cursor.take_field("name");
let second = cursor.take_field("name");
assert!(first.is_some(), "first take returns the edge");
assert!(
second.is_none(),
"second take returns None (already consumed)"
);
}
#[test]
fn child_cursor_take_matching_predicate() {
let edges_owned: Vec<Edge> = vec![
Edge {
src: "p".into(),
tgt: "c1".into(),
kind: "child_of".into(),
name: None,
},
Edge {
src: "p".into(),
tgt: "c2".into(),
kind: "key".into(),
name: None,
},
];
let edges: Vec<&Edge> = edges_owned.iter().collect();
let mut cursor = ChildCursor::new(&edges);
assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
assert!(taken.is_some());
assert!(
!cursor.has_matching(|e| e.kind.as_ref() == "key"),
"consumed edge no longer matches"
);
assert!(
cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
"the other edge is still available"
);
}
#[test]
fn kind_satisfies_symbol_direct_match() {
let bytes = br#"{
"name": "tiny",
"rules": {
"x": {"type": "STRING", "value": "x"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
assert!(!kind_satisfies_symbol(&g, None, "x"));
}
#[test]
fn kind_satisfies_symbol_through_hidden_rule() {
let bytes = br#"{
"name": "tiny",
"rules": {
"_value": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "object"},
{"type": "SYMBOL", "name": "number"}
]
},
"object": {"type": "STRING", "value": "{}"},
"number": {"type": "PATTERN", "value": "[0-9]+"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
assert!(
kind_satisfies_symbol(&g, Some("number"), "_value"),
"number is reachable from _value via CHOICE"
);
assert!(
kind_satisfies_symbol(&g, Some("object"), "_value"),
"object is reachable from _value via CHOICE"
);
assert!(
!kind_satisfies_symbol(&g, Some("string"), "_value"),
"string is NOT among the alternatives"
);
}
#[test]
fn first_symbol_skips_string_terminals() {
let prod: Production = serde_json::from_str(
r#"{
"type": "SEQ",
"members": [
{"type": "STRING", "value": "{"},
{"type": "SYMBOL", "name": "body"},
{"type": "STRING", "value": "}"}
]
}"#,
)
.expect("valid SEQ");
assert_eq!(first_symbol(&prod), Some("body"));
}
#[test]
fn placeholder_for_pattern_routes_by_regex_class() {
assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
}
#[test]
fn format_policy_default_breaks_after_semicolon() {
let policy = FormatPolicy::default();
assert!(policy.line_break_after.iter().any(|t| t == ";"));
assert!(policy.indent_open.iter().any(|t| t == "{"));
assert!(policy.indent_close.iter().any(|t| t == "}"));
assert_eq!(policy.indent_width, 2);
}
#[test]
fn placeholder_decodes_literal_pattern_separators() {
assert_eq!(placeholder_for_pattern("\\n"), "\n");
assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
assert_eq!(placeholder_for_pattern(";"), ";");
assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
assert_eq!(placeholder_for_pattern("a|b"), "_");
}
#[test]
fn supertypes_decode_from_grammar_json_strings() {
let bytes = br#"{
"name": "tiny",
"supertypes": ["expression"],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "binary_expression"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"binary_expression": {"type": "STRING", "value": "x"},
"identifier": {"type": "PATTERN", "value": "[a-z]+"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("parse");
assert!(g.supertypes.contains("expression"));
assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
}
#[test]
fn supertypes_decode_from_grammar_json_objects() {
let bytes = br#"{
"name": "tiny",
"supertypes": [{"type": "SYMBOL", "name": "stmt"}],
"rules": {
"stmt": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "while_stmt"},
{"type": "SYMBOL", "name": "if_stmt"}
]
},
"while_stmt": {"type": "STRING", "value": "while"},
"if_stmt": {"type": "STRING", "value": "if"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("parse");
assert!(g.supertypes.contains("stmt"));
assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
}
#[test]
fn alias_value_matches_kind() {
let bytes = br#"{
"name": "tiny",
"rules": {
"_package_identifier": {
"type": "ALIAS",
"named": true,
"value": "package_identifier",
"content": {"type": "SYMBOL", "name": "identifier"}
},
"identifier": {"type": "PATTERN", "value": "[a-z]+"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("parse");
assert!(kind_satisfies_symbol(
&g,
Some("package_identifier"),
"_package_identifier"
));
}
#[test]
fn referenced_symbols_walks_nested_seq() {
let prod: Production = serde_json::from_str(
r#"{
"type": "SEQ",
"members": [
{"type": "CHOICE", "members": [
{"type": "SYMBOL", "name": "attribute_item"},
{"type": "BLANK"}
]},
{"type": "SYMBOL", "name": "parameter"},
{"type": "REPEAT", "content": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": ","},
{"type": "SYMBOL", "name": "parameter"}
]
}}
]
}"#,
)
.expect("seq");
let symbols = referenced_symbols(&prod);
assert!(symbols.contains(&"attribute_item"));
assert!(symbols.contains(&"parameter"));
}
#[test]
fn literal_strings_collects_choice_members() {
let prod: Production = serde_json::from_str(
r#"{
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "+"},
{"type": "STRING", "value": "-"},
{"type": "STRING", "value": "*"}
]
}"#,
)
.expect("choice");
let strings = literal_strings(&prod);
assert_eq!(strings, vec!["+", "-", "*"]);
}
#[test]
fn reserved_variant_deserialises() {
let prod: Production = serde_json::from_str(
r#"{
"type": "RESERVED",
"content": {"type": "SYMBOL", "name": "_lowercase_identifier"},
"context_name": "attribute_id"
}"#,
)
.expect("RESERVED parses");
match prod {
Production::Reserved { content, .. } => match *content {
Production::Symbol { name } => assert_eq!(name, "_lowercase_identifier"),
other => panic!("expected inner SYMBOL, got {other:?}"),
},
other => panic!("expected RESERVED, got {other:?}"),
}
}
#[test]
fn reserved_grammar_loads_end_to_end() {
let bytes = br#"{
"name": "tiny_reserved",
"rules": {
"program": {
"type": "RESERVED",
"content": {"type": "SYMBOL", "name": "ident"},
"context_name": "keywords"
},
"ident": {"type": "PATTERN", "value": "[a-z]+"}
}
}"#;
let g = Grammar::from_bytes("tiny_reserved", bytes).expect("RESERVED-using grammar loads");
assert!(g.rules.contains_key("program"));
}
#[test]
fn reserved_walker_helpers_recurse_into_content() {
let prod: Production = serde_json::from_str(
r#"{
"type": "RESERVED",
"content": {
"type": "FIELD",
"name": "lhs",
"content": {"type": "SYMBOL", "name": "expr"}
},
"context_name": "ctx"
}"#,
)
.expect("nested RESERVED parses");
assert_eq!(first_symbol(&prod), Some("expr"));
assert!(has_field_in(&prod, &["lhs"]));
let symbols = referenced_symbols(&prod);
assert!(symbols.contains(&"expr"));
}
fn yield_of(grammar: &Grammar, prod: &Production) -> std::collections::HashSet<String> {
let mut visited = std::collections::HashSet::new();
let mut cache = grammar.yield_sets.clone();
yield_of_production(grammar, prod, &mut visited, &mut cache)
}
#[test]
fn yield_set_seq_only_first_member() {
let prod: Production = serde_json::from_str(
r#"{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "STRING", "value": "as"},
{"type": "SYMBOL", "name": "target"}
]
}"#,
)
.expect("valid SEQ");
let g = Grammar::from_bytes("test", b"{}").unwrap_or_else(|_| {
serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap()
});
let ys = yield_of(&g, &prod);
assert!(ys.contains("identifier"), "SEQ yields first member");
assert!(
!ys.contains("target"),
"SEQ must NOT yield non-first members"
);
}
#[test]
fn yield_set_choice_union() {
let prod: Production = serde_json::from_str(
r#"{
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "a"},
{"type": "SYMBOL", "name": "b"}
]
}"#,
)
.expect("valid CHOICE");
let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
let ys = yield_of(&g, &prod);
assert_eq!(ys.len(), 2);
assert!(ys.contains("a"));
assert!(ys.contains("b"));
}
#[test]
fn yield_set_hidden_expansion() {
let g = serde_json::from_str::<Grammar>(
r#"{"name":"t","rules":{
"_value": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "number"},
{"type": "SYMBOL", "name": "object"}
]
}
}}"#,
)
.unwrap();
let mut g = g;
g.subtypes = compute_subtype_closure(&g);
g.yield_sets = compute_yield_sets(&g);
let sym: Production =
serde_json::from_str(r#"{"type": "SYMBOL", "name": "_value"}"#).unwrap();
let ys = yield_of(&g, &sym);
assert!(
ys.contains("number"),
"hidden rule expands into its CHOICE members"
);
assert!(ys.contains("object"));
assert!(
!ys.contains("_value"),
"hidden rule name is not in yield set"
);
}
#[test]
fn yield_set_optional_includes_epsilon() {
let prod: Production = serde_json::from_str(
r#"{"type": "OPTIONAL", "content": {"type": "SYMBOL", "name": "x"}}"#,
)
.unwrap();
let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
let ys = yield_of(&g, &prod);
assert!(ys.contains("x"));
assert!(ys.contains(""), "OPTIONAL includes epsilon");
}
#[test]
fn yield_set_alias_uses_value() {
let prod: Production = serde_json::from_str(
r#"{"type": "ALIAS", "content": {"type": "SYMBOL", "name": "real"},
"named": true, "value": "alias_name"}"#,
)
.unwrap();
let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
let ys = yield_of(&g, &prod);
assert_eq!(ys.len(), 1);
assert!(ys.contains("alias_name"), "named ALIAS yields its value");
}
}