#![allow(
clippy::module_name_repetitions,
clippy::too_many_lines,
clippy::too_many_arguments,
clippy::map_unwrap_or,
clippy::option_if_let_else,
clippy::elidable_lifetime_names,
clippy::items_after_statements,
clippy::needless_pass_by_value,
clippy::single_match_else,
clippy::manual_let_else,
clippy::match_same_arms,
clippy::missing_const_for_fn,
clippy::single_char_pattern,
clippy::naive_bytecount,
clippy::expect_used,
clippy::redundant_pub_crate,
clippy::used_underscore_binding,
clippy::redundant_field_names,
clippy::struct_field_names,
clippy::redundant_else,
clippy::similar_names
)]
use super::{
ChildCursor, EMIT_DEPTH, EMIT_MU_FRAMES, Edge, Grammar, Output, ParseError, Production, Schema,
Token, TokenRole, accepts_first_edge, alias_content_is_terminal_pattern,
aliased_source_literals, alt_satisfies_field_token_restrictions,
alt_satisfies_pre_alias_constraints, children_for, classify_seq_positions, clear_field_context,
collect_field_names, collect_inner_field_names_expanded, contains_newline_pattern,
current_field_context, first_unconsumed_target_fingerprint, has_field_in,
has_relevant_constraint, has_repeat_in, is_blank_line_rule, is_connector_punctuation,
is_immediate_token, is_newline_alt, is_newline_like_pattern, is_no_space_external,
is_whitespace_external, is_whitespace_only_pattern, is_word_like, leaf_terminal_role,
left_recursive_alts, literal_strings, literal_value, mandatory_field_names,
member_has_leading_bracket, pattern_absorbs_leading_space, placeholder_for_pattern,
pre_alias_symbol, prec_value, push_field_context, reconstruct_subtree_bytes,
reduces_to_immediate_token, referenced_symbols, repeat_body_is_whole_vertex_item,
repeat_has_bracket_keyed_member, seq_bracket_triggers_indent, seq_open_bracket_index,
unbounded_negated_class, unwrap_prec, unwrap_to_string, vertex_has_byte_span, vertex_id_kind,
yield_of_production,
};
pub(crate) fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
if !schema.entries.is_empty() {
return schema
.entries
.iter()
.filter(|name| schema.vertices.contains_key(*name))
.collect();
}
let mut targets: std::collections::HashSet<&panproto_gat::Name> =
std::collections::HashSet::new();
for edge in schema.edges.keys() {
targets.insert(&edge.tgt);
}
let mut roots: Vec<&panproto_gat::Name> = schema
.vertices
.keys()
.filter(|name| !targets.contains(name))
.collect();
roots.sort();
roots
}
fn rule_symbol_closure<'g>(
grammar: &'g Grammar,
rule: &'g Production,
) -> std::collections::HashSet<&'g str> {
fn walk<'g>(
grammar: &'g Grammar,
prod: &'g Production,
out: &mut std::collections::HashSet<&'g str>,
visited: &mut std::collections::HashSet<&'g str>,
) {
match prod {
Production::Symbol { name } => {
out.insert(name.as_str());
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if expand && visited.insert(name.as_str()) {
if let Some(r) = grammar.rules.get(name) {
walk(grammar, r, out, visited);
}
}
}
Production::Alias {
content,
value,
named,
} => {
if *named && !value.is_empty() {
out.insert(value.as_str());
}
walk(grammar, content, out, visited);
}
Production::Seq { members } | Production::Choice { members } => {
for m in members {
walk(grammar, m, out, visited);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(grammar, content, out, visited),
_ => {}
}
}
let mut out = std::collections::HashSet::new();
let mut visited = std::collections::HashSet::new();
walk(grammar, rule, &mut out, &mut visited);
out
}
fn rule_admits_count(grammar: &Grammar, rule: &Production, demand: &[&str]) -> usize {
let syms = rule_symbol_closure(grammar, rule);
demand
.iter()
.filter(|k| {
syms.iter()
.any(|s| kind_satisfies_symbol(grammar, Some(k), s))
})
.count()
}
fn ordered_consumption(grammar: &Grammar, rule: &Production, demand: &[&str]) -> usize {
let mut visited = Vec::new();
super::match_demand(grammar, rule, demand, &[], 0, None, &mut visited)
.into_iter()
.max()
.unwrap_or(0)
}
fn rule_min_required_children(grammar: &Grammar, rule: &Production) -> usize {
fn eval(
grammar: &Grammar,
p: &Production,
min: &std::collections::HashMap<String, usize>,
) -> usize {
match p {
Production::Symbol { name } => {
if name.starts_with('_') {
*min.get(name).unwrap_or(&0)
} else {
usize::from(grammar.rules.contains_key(name))
}
}
Production::Field { content, .. } => eval(grammar, content, min),
Production::Seq { members } => members.iter().map(|m| eval(grammar, m, min)).sum(),
Production::Choice { members } => members
.iter()
.map(|m| eval(grammar, m, min))
.min()
.unwrap_or(0),
Production::Repeat1 { content } => eval(grammar, content, min),
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. }
| Production::Alias { content, .. } => eval(grammar, content, min),
_ => 0,
}
}
let min = grammar.min_children.get_or_init(|| {
let mut min: std::collections::HashMap<String, usize> = grammar
.rules
.keys()
.filter(|k| k.starts_with('_'))
.map(|k| (k.clone(), 0usize))
.collect();
loop {
let mut changed = false;
for (name, body) in &grammar.rules {
if !name.starts_with('_') {
continue;
}
let v = eval(grammar, body, &min);
if min.get(name) != Some(&v) {
min.insert(name.clone(), v);
changed = true;
}
}
if !changed {
break;
}
}
min
});
eval(grammar, rule, min)
}
fn aliased_content_admits_child(
schema: &Schema,
grammar: &Grammar,
content: &Production,
child_id: &panproto_gat::Name,
) -> bool {
if let Production::Symbol { .. } = content {
return true;
}
let edges = children_for(schema, child_id);
let demand: Vec<&str> = edges
.iter()
.filter_map(|e| vertex_id_kind(schema, &e.tgt))
.collect();
if demand.is_empty() {
return true;
}
rule_admits_count(grammar, content, &demand) == demand.len()
}
fn select_walk_rule<'g>(
schema: &Schema,
grammar: &'g Grammar,
edges: &[&Edge],
kind: &'g str,
own_rule: &'g Production,
vertex_id: &panproto_gat::Name,
) -> (&'g str, &'g Production) {
let Some(sources) = grammar.named_alias_sources.get(kind) else {
return (kind, own_rule);
};
let demand: Vec<&str> = edges
.iter()
.filter_map(|e| schema.vertices.get(&e.tgt).map(|v| v.kind.as_ref()))
.collect();
if demand.is_empty() {
return (kind, own_rule);
}
let own_admits = rule_admits_count(grammar, own_rule, &demand);
if own_admits == demand.len() {
if ordered_consumption(grammar, own_rule, &demand) < demand.len() {
let recorded_src = pre_alias_symbol(schema, vertex_id);
let mut fit: Option<(&str, &Production)> = None;
for src in sources {
if src == kind {
continue;
}
let Some(src_rule) = grammar.rules.get(src) else {
continue;
};
if ordered_consumption(grammar, src_rule, &demand) == demand.len() {
if recorded_src == Some(src.as_str()) {
return (src.as_str(), src_rule);
}
fit.get_or_insert((src.as_str(), src_rule));
}
}
if let Some(found) = fit {
return found;
}
}
let own_min = rule_min_required_children(grammar, own_rule);
if own_min <= demand.len() {
return (kind, own_rule);
}
let recorded_src = pre_alias_symbol(schema, vertex_id);
let mut fit: Option<(&str, &Production)> = None;
for src in sources {
if src == kind {
continue;
}
let Some(src_rule) = grammar.rules.get(src) else {
continue;
};
if rule_admits_count(grammar, src_rule, &demand) == demand.len()
&& rule_min_required_children(grammar, src_rule) <= demand.len()
{
if recorded_src == Some(src.as_str()) {
return (src.as_str(), src_rule);
}
fit.get_or_insert((src.as_str(), src_rule));
}
}
return fit.unwrap_or((kind, own_rule));
}
let mut best: Option<(&str, &Production, usize)> = None;
for src in sources {
if src == kind {
continue;
}
let Some(src_rule) = grammar.rules.get(src) else {
continue;
};
let c = rule_admits_count(grammar, src_rule, &demand);
if c > own_admits && best.as_ref().is_none_or(|&(_, _, bc)| c > bc) {
best = Some((src.as_str(), src_rule, c));
}
}
match best {
Some((name, r, _)) => (name, r),
None => (kind, own_rule),
}
}
fn is_tight_content_kind(
grammar: &Grammar,
cassette: Option<&dyn crate::languages::cassettes::GrammarCassette>,
kind: &str,
) -> bool {
cassette.is_some_and(|c| c.kind_is_tight_content(kind))
|| grammar.external_content_kinds.contains(kind)
|| grammar.string_content_kinds.contains(kind)
}
pub(crate) fn emit_vertex(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let vertex = schema
.vertices
.get(vertex_id)
.ok_or_else(|| ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: format!("vertex '{vertex_id}' not found"),
})?;
if vertex_has_byte_span(schema, vertex_id) {
if let Some(bytes) = reconstruct_subtree_bytes(schema, vertex_id) {
out.verbatim(&bytes);
return Ok(());
}
}
let kind_head = vertex.kind.as_ref();
if let Some(rule) = grammar.rules.get(kind_head) {
if is_immediate_token(rule) {
out.no_space();
}
}
if let Some(literal) = literal_value(schema, vertex_id) {
if children_for(schema, vertex_id).is_empty() {
let is_bracket_pair = literal.len() >= 2
&& matches!(
(literal.as_bytes().first(), literal.as_bytes().last()),
(Some(b'('), Some(b')')) | (Some(b'['), Some(b']')) | (Some(b'{'), Some(b'}'))
);
let is_empty_bracket_pair = is_bracket_pair && literal.len() == 2;
let vkind = vertex.kind.as_ref();
let has_alias_rule = grammar
.named_alias_map
.get(vkind)
.is_some_and(|src| grammar.rules.contains_key(src));
if !(is_empty_bracket_pair && has_alias_rule) {
if is_tight_content_kind(grammar, out.cassette, vkind) {
out.no_space();
out.tight_token(literal);
out.no_space();
return Ok(());
}
let role = if is_bracket_pair {
TokenRole::BracketClose
} else {
leaf_terminal_role(grammar, vkind)
};
if grammar.leading_space_terminals.contains(vkind)
&& literal.starts_with([' ', '\t'])
{
out.no_space();
}
if grammar.immediate_token_alias_kinds.contains(vkind) {
out.no_space();
}
out.token_with_role(literal, Some(role));
if grammar.leading_space_terminals.contains(vkind) && literal.ends_with([' ', '\t'])
{
out.no_space();
}
if let Some(negated) = grammar.rules.get(vkind).and_then(|r| match r {
Production::Pattern { value } => unbounded_negated_class(value),
_ => None,
}) {
out.tokens.push(Token::AbsorberGuard(negated.to_owned()));
}
if grammar.line_rest_kinds.contains(vkind) && !literal.ends_with(['\n', '\r']) {
out.newline();
}
return Ok(());
}
}
}
let kind = vertex.kind.as_ref();
let edges = children_for(schema, vertex_id);
if let Some(rule) = grammar.rules.get(kind) {
let (walk_name, walk_rule): (&str, &Production) =
select_walk_rule(schema, grammar, &edges, kind, rule, vertex_id);
let old_rule = out.current_rule.take();
out.current_rule = Some(walk_name.to_owned());
let synthetic_indent = grammar.synthetic_indent_rules.contains(walk_name);
if synthetic_indent {
out.indent_open();
}
let mut cursor = ChildCursor::new(&edges);
emit_production(
protocol,
schema,
grammar,
vertex_id,
walk_rule,
&mut cursor,
out,
)?;
drain_extras(protocol, schema, grammar, &mut cursor, out)?;
out.current_rule = old_rule;
return Ok(());
}
if let Some(source_name) = grammar.named_alias_map.get(kind) {
if let Some(rule) = grammar.rules.get(source_name) {
let old_rule = out.current_rule.take();
out.current_rule = Some(source_name.to_owned());
let mut cursor = ChildCursor::new(&edges);
emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out)?;
drain_extras(protocol, schema, grammar, &mut cursor, out)?;
out.current_rule = old_rule;
return Ok(());
}
}
for edge in &edges {
emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
}
Ok(())
}
pub(crate) fn walk_in_mu_frame(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
rule_name: &str,
rule: &Production,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let consumed = cursor.consumed.iter().filter(|&&c| c).count();
let key = (vertex_id.to_string(), rule_name.to_owned(), consumed);
let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
if !inserted {
return Ok(());
}
let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
EMIT_MU_FRAMES.with(|frames| {
frames.borrow_mut().remove(&key);
});
result
}
pub(crate) fn emit_production(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
production: &Production,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let depth = EMIT_DEPTH.with(|d| {
let v = d.get() + 1;
d.set(v);
v
});
if depth > 500 {
EMIT_DEPTH.with(|d| d.set(d.get() - 1));
return Err(ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: format!(
"emit_production recursion >500 (likely a cyclic grammar; \
vertex='{vertex_id}')"
),
});
}
let defer_leading_extras = matches!(
production,
Production::Seq { members } if seq_open_bracket_index(members).is_some()
);
if !defer_leading_extras {
drain_extras(protocol, schema, grammar, cursor, out)?;
}
let result = emit_production_inner(
protocol, schema, grammar, vertex_id, production, cursor, out,
);
EMIT_DEPTH.with(|d| d.set(d.get() - 1));
result
}
pub(crate) fn drain_extras(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
if grammar.extras.is_empty() {
return Ok(());
}
loop {
let next_extra: Option<usize> = cursor
.edges
.iter()
.enumerate()
.find(|(i, _)| !cursor.consumed[*i])
.and_then(|(i, edge)| {
let kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref())?;
if grammar.extras.contains(kind) {
Some(i)
} else {
None
}
});
let Some(idx) = next_extra else {
return Ok(());
};
cursor.consumed[idx] = true;
let target = &cursor.edges[idx].tgt;
emit_vertex(protocol, schema, grammar, target, out)?;
}
}
pub(crate) fn emit_seq_with_roles(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
members: &[Production],
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
in_choice: bool,
) -> Result<(), ParseError> {
let positional_roles = classify_seq_positions(members, in_choice);
let indent_open_idx: Option<usize> = positional_roles.iter().enumerate().position(|(i, r)| {
*r == Some(TokenRole::BracketOpen) && seq_bracket_triggers_indent(members, i, grammar)
});
let mut line_break_positions: std::collections::HashSet<usize> =
std::collections::HashSet::new();
if let Some(oi) = indent_open_idx {
let open_text = unwrap_to_string(&members[oi]);
if open_text.is_some_and(is_word_like) {
let mut found_body = false;
for (j, member) in members.iter().enumerate().skip(oi + 1) {
if let Production::Choice { members: alts } = member {
let has_blank = alts.iter().any(|a| matches!(a, Production::Blank));
let has_block_symbol = alts.iter().any(|a| match a {
Production::Symbol { name } => {
grammar.rules.get(name).is_some_and(has_repeat_in)
}
_ => false,
});
if has_blank && has_block_symbol {
line_break_positions.insert(j);
found_body = true;
}
} else if found_body && matches!(member, Production::Field { .. }) {
line_break_positions.insert(j);
}
}
}
}
let leading_extra_after = seq_open_bracket_index(members);
let mut prev_member_emitted_content = false;
for (i, member) in members.iter().enumerate() {
let tokens_before_member = out.tokens.len();
if let Some(value) = unwrap_to_string(member) {
let role = positional_roles[i].unwrap_or_else(|| {
if is_word_like(value) {
TokenRole::Keyword
} else {
TokenRole::Operator
}
});
if indent_open_idx == Some(i) {
if is_word_like(value) {
out.tokens.push(Token::Lit(value.to_owned(), role));
out.tokens.push(Token::IndentOpen);
} else {
out.token_with_indent_open(value, role);
}
} else if role == TokenRole::BracketClose && indent_open_idx.is_some() {
out.tokens.push(Token::IndentClose);
out.tokens.push(Token::Lit(value.to_owned(), role));
} else {
out.token_with_role(value, Some(role));
}
} else if reduces_to_immediate_token(member) {
if prev_member_emitted_content {
out.no_space();
}
emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
} else {
if i > 0 && unwrap_to_string(&members[i - 1]).is_none() && prev_member_emitted_content {
let member_starts_with_bracket = member_has_leading_bracket(member, grammar);
let is_zero_width_external = matches!(
member,
Production::Symbol { name }
if name.starts_with('_') && !grammar.rules.contains_key(name)
);
let is_separator_choice = matches!(member, Production::Choice { members: alts }
if alts.iter().all(|a| matches!(a, Production::Blank) || unwrap_to_string(a).is_some()));
let is_repeat = matches!(
member,
Production::Repeat { .. } | Production::Repeat1 { .. }
);
let prev_tight_right = matches!(
out.tokens.last(),
Some(Token::Lit(_, TokenRole::BracketOpen | TokenRole::Immediate))
);
let prev_member_tight_operator = out
.current_rule
.as_ref()
.zip(out.cassette)
.is_some_and(|(rule, cassette)| {
literal_strings(&members[i - 1])
.iter()
.any(|lit| cassette.operator_is_tight(rule, lit))
});
if !member_starts_with_bracket
&& !is_zero_width_external
&& !is_separator_choice
&& !is_repeat
&& !prev_tight_right
&& !prev_member_tight_operator
{
out.tokens.push(Token::ForceSpace);
}
}
if line_break_positions.contains(&i) {
out.newline();
}
emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
}
prev_member_emitted_content = out.tokens[tokens_before_member..]
.iter()
.any(|t| matches!(t, Token::Lit(_, _) | Token::Verbatim(_)));
if leading_extra_after == Some(i) {
drain_extras(protocol, schema, grammar, cursor, out)?;
}
}
Ok(())
}
fn unconsumed_non_extra(schema: &Schema, grammar: &Grammar, cursor: &ChildCursor<'_>) -> usize {
cursor
.edges
.iter()
.enumerate()
.filter(|(i, edge)| {
!cursor.consumed[*i]
&& schema
.vertices
.get(&edge.tgt)
.is_none_or(|v| !grammar.extras.contains(v.kind.as_ref()))
})
.count()
}
#[allow(clippy::too_many_arguments)]
fn emit_lr_step(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
rule_name: &str,
rest: &[Production],
base_choice: &Production,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let mut prev_emitted = true;
for member in rest {
let tokens_before = out.tokens.len();
if matches!(member, Production::Symbol { name } if name == rule_name) {
emit_production(
protocol,
schema,
grammar,
vertex_id,
base_choice,
cursor,
out,
)?;
} else if let Some(value) = unwrap_to_string(member) {
let role = if is_word_like(value) {
TokenRole::Keyword
} else {
TokenRole::Operator
};
out.token_with_role(value, Some(role));
} else {
if !prev_emitted {
out.force_space();
}
emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
}
prev_emitted = out.tokens.len() > tokens_before;
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn emit_left_recursive_unrolled(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
rule_name: &str,
rec_seqs: &[&[Production]],
base_alts: Vec<Production>,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let base_choice = Production::Choice { members: base_alts };
let binary = rec_seqs.iter().any(|seq| {
seq[1..]
.iter()
.any(|m| matches!(m, Production::Symbol { name } if name == rule_name))
});
emit_production(
protocol,
schema,
grammar,
vertex_id,
&base_choice,
cursor,
out,
)?;
let mut guard = 0usize;
while unconsumed_non_extra(schema, grammar, cursor) > 0 {
guard += 1;
if guard > cursor.edges.len() + 1 {
break;
}
let before = unconsumed_non_extra(schema, grammar, cursor);
if binary {
emit_lr_step(
protocol,
schema,
grammar,
vertex_id,
rule_name,
&rec_seqs[0][1..],
&base_choice,
cursor,
out,
)?;
} else {
let next_kind = cursor
.edges
.iter()
.enumerate()
.find(|(i, edge)| {
!cursor.consumed[*i]
&& schema
.vertices
.get(&edge.tgt)
.is_none_or(|v| !grammar.extras.contains(v.kind.as_ref()))
})
.and_then(|(_, edge)| schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()));
let chosen = rec_seqs.iter().find(|seq| {
seq[1..].iter().any(|m| match m {
Production::Symbol { name } => kind_satisfies_symbol(grammar, next_kind, name),
Production::Field { content, .. } => matches!(
content.as_ref(),
Production::Symbol { name }
if kind_satisfies_symbol(grammar, next_kind, name)
),
_ => false,
})
});
let Some(seq) = chosen else {
break;
};
emit_lr_step(
protocol,
schema,
grammar,
vertex_id,
rule_name,
&seq[1..],
&base_choice,
cursor,
out,
)?;
}
if unconsumed_non_extra(schema, grammar, cursor) >= before {
break;
}
}
Ok(())
}
pub(crate) fn emit_production_inner(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
production: &Production,
cursor: &mut ChildCursor<'_>,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
match production {
Production::String { value } => {
out.token(value);
Ok(())
}
Production::Pattern { value } => {
if let Some(literal) = literal_value(schema, vertex_id) {
if pattern_absorbs_leading_space(value) {
out.no_space();
}
out.token_with_role(literal, Some(TokenRole::Terminal));
if let Some(negated) = unbounded_negated_class(value) {
out.tokens.push(Token::AbsorberGuard(negated.to_owned()));
}
} else if is_newline_like_pattern(value) {
out.newline();
} else if is_whitespace_only_pattern(value) {
} else {
out.token_with_role(&placeholder_for_pattern(value), Some(TokenRole::Terminal));
}
Ok(())
}
Production::Blank => Ok(()),
Production::Symbol { name } => {
if let Some(field) = current_field_context() {
let field_sort = format!("field:{field}");
let has_field_literals = schema
.constraints
.get(vertex_id)
.is_some_and(|cs| cs.iter().any(|c| c.sort.as_ref() == field_sort));
if has_field_literals && name.starts_with('_') {
if let Some(rule) = grammar.rules.get(name) {
let old_rule = out.current_rule.take();
out.current_rule = Some(name.to_owned());
let result = walk_in_mu_frame(
protocol, schema, grammar, vertex_id, name, rule, cursor, out,
);
out.current_rule = old_rule;
return result;
}
}
if name.starts_with('_') && !cursor.has_field(&field) {
if let Some(rule) = grammar.rules.get(name) {
let mut inner_fields = std::collections::HashSet::new();
let mut seen = std::collections::HashSet::new();
collect_inner_field_names_expanded(
rule,
grammar,
&mut inner_fields,
&mut seen,
);
let parent_kind = vertex_id_kind(schema, vertex_id);
let nt_fields =
parent_kind.and_then(|pk| grammar.node_type_field_children.get(pk));
let rebound = inner_fields.iter().any(|f| {
if *f == field.as_str() {
return false;
}
let Some(child_kind) = cursor
.peek_field(f)
.and_then(|e| schema.vertices.get(&e.tgt))
.map(|v| v.kind.as_ref())
else {
return false;
};
nt_fields
.and_then(|m| m.get(*f))
.is_some_and(|ks| ks.contains(child_kind))
});
if rebound {
let _clear = clear_field_context();
let old_rule = out.current_rule.take();
out.current_rule = Some(name.to_owned());
let result = walk_in_mu_frame(
protocol, schema, grammar, vertex_id, name, rule, cursor, out,
);
out.current_rule = old_rule;
return result;
}
}
}
if let Some(edge) = cursor.take_field(&field) {
return emit_in_child_context(
protocol, schema, grammar, &edge.tgt, production, out,
);
}
let parent_kind = vertex_id_kind(schema, vertex_id);
let field_absent_in_node_types = parent_kind.is_some_and(|pk| {
grammar.node_type_nonfield_children.contains_key(pk)
&& !grammar
.node_type_field_children
.get(pk)
.is_some_and(|fields| fields.contains_key(field.as_str()))
});
if field_absent_in_node_types {
if let Some(edge) = take_symbol_match(grammar, schema, cursor, name) {
return emit_in_child_context(
protocol, schema, grammar, &edge.tgt, production, out,
);
}
}
let sort = format!("field:{field}");
if let Some(v) = schema.constraints.get(vertex_id).and_then(|cs| {
cs.iter()
.find(|c| c.sort.as_ref() == sort)
.map(|c| c.value.clone())
}) {
out.token(&v);
}
return Ok(());
}
if is_whitespace_external(name) {
out.force_space();
return Ok(());
}
let is_inlined = name.starts_with('_')
|| (grammar.inline_rules.contains(name) && !grammar.extras.contains(name));
if is_inlined {
if let Some(rule) = grammar.rules.get(name) {
let old_rule = out.current_rule.take();
out.current_rule = Some(name.to_owned());
let result = walk_in_mu_frame(
protocol, schema, grammar, vertex_id, name, rule, cursor, out,
);
out.current_rule = old_rule;
result
} else {
let bracket_role = if grammar.external_bracket_opens.contains(name) {
Some(TokenRole::BracketOpen)
} else if grammar.external_bracket_closes.contains(name) {
Some(TokenRole::BracketClose)
} else {
None
};
if let Some(alias_value) = grammar.external_alias_map.get(name) {
if out
.cassette
.is_some_and(|c| c.external_leads_no_space(name))
{
out.no_space();
}
match bracket_role {
Some(role) => out.token_with_role(alias_value, Some(role)),
None => out.token(alias_value),
}
return Ok(());
}
if let Some(close_lit) = grammar.external_close_text.get(name) {
out.no_space();
out.token_with_role(close_lit, Some(TokenRole::BracketClose));
} else if is_whitespace_external(name) {
out.force_space();
} else if is_no_space_external(name) {
out.no_space();
} else if grammar.external_indent_opens.contains(name) {
out.indent_open();
} else if grammar.external_indent_closes.contains(name) {
out.indent_close();
} else if grammar.external_newlines.contains(name)
|| out.cassette.is_some_and(|c| c.external_is_newline(name))
{
out.newline();
} else if grammar.external_semicolons.contains(name) {
out.token_with_role(";", Some(TokenRole::Separator));
} else if let Some(default) = out
.cassette
.and_then(|c| crate::languages::cassettes::resolve_external_token(c, name))
{
if !default.is_empty() {
match bracket_role {
Some(role) => out.token_with_role(default, Some(role)),
None if is_connector_punctuation(default) => {
out.no_space();
out.token_with_role(default, Some(TokenRole::Connector));
out.no_space();
}
None => out.token(default),
}
}
}
Ok(())
}
} else if let Some(edge) = { take_symbol_match(grammar, schema, cursor, name) } {
emit_vertex(protocol, schema, grammar, &edge.tgt, out)
} else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
let rule = grammar
.rules
.get(name)
.ok_or_else(|| ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: format!("no production for SYMBOL '{name}'"),
})?;
{
let old_rule = out.current_rule.take();
out.current_rule = Some(name.to_owned());
let result = walk_in_mu_frame(
protocol, schema, grammar, vertex_id, name, rule, cursor, out,
);
out.current_rule = old_rule;
result
}
} else {
Ok(())
}
}
Production::Seq { members } => emit_seq_with_roles(
protocol, schema, grammar, vertex_id, members, cursor, out, false,
),
Production::Choice { members } => {
if let Some(rule_name) = out.current_rule.clone() {
if rule_name.starts_with('_') {
if let Some(rec_seqs) = left_recursive_alts(members, &rule_name) {
if unconsumed_non_extra(schema, grammar, cursor) >= 2 {
let base_alts: Vec<Production> = members
.iter()
.filter(|m| {
!matches!(unwrap_prec(m), Production::Seq { members: s }
if matches!(s.first(), Some(Production::Symbol { name }) if name == &rule_name))
})
.cloned()
.collect();
if !base_alts.is_empty() {
return emit_left_recursive_unrolled(
protocol, schema, grammar, vertex_id, &rule_name, &rec_seqs,
base_alts, cursor, out,
);
}
}
}
}
}
if let Some(matched) = pick_choice_with_cursor(
schema,
grammar,
vertex_id,
cursor,
members,
out.current_rule.as_deref(),
) {
match matched {
Production::Seq {
members: seq_members,
} => emit_seq_with_roles(
protocol,
schema,
grammar,
vertex_id,
seq_members,
cursor,
out,
true,
),
Production::String { value } => {
let role = out.explicit_role(value).unwrap_or_else(|| {
if is_word_like(value) {
TokenRole::Keyword
} else {
TokenRole::Separator
}
});
out.token_with_role(value, Some(role));
Ok(())
}
_ => {
emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
}
}
} else {
Ok(())
}
}
Production::Repeat { content } | Production::Repeat1 { content } => {
let mandatory_sep_text: Option<&str> = match content.as_ref() {
Production::Seq { members } if members.len() >= 2 => unwrap_to_string(&members[0]),
_ => None,
};
let separator_leading_seq: Option<&[Production]> = match content.as_ref() {
Production::Seq { members } if members.len() >= 2 => {
let first = &members[0];
let is_mandatory_sep = unwrap_to_string(first).is_some();
let cassette_overrides = is_mandatory_sep
&& unwrap_to_string(first).is_some_and(|sep| {
out.cassette.is_some_and(|c| c.separator_is_line_break(sep))
});
let is_separator_slot = match first {
Production::Choice { members } => {
members.iter().any(|m| matches!(m, Production::Blank))
}
Production::Optional { .. } => true,
_ => cassette_overrides,
};
if is_separator_slot {
Some(members.as_slice())
} else {
None
}
}
_ => None,
};
let item_per_iteration = matches!(content.as_ref(), Production::Field { .. })
|| matches!(content.as_ref(), Production::Symbol { name }
if grammar.rules.contains_key(name) && !name.starts_with('_'));
let item_needs_newline = !item_per_iteration
&& grammar.external_indent_opens.is_empty()
&& repeat_body_is_whole_vertex_item(content, grammar)
&& repeat_has_bracket_keyed_member(content, grammar);
let trailing_newline_separator = match content.as_ref() {
Production::Seq { members } if members.len() >= 2 => members
.last()
.is_some_and(|last| seq_trailing_newline_separator(grammar, last)),
_ => false,
};
let trailing_mandatory_sep: Option<&str> = match content.as_ref() {
Production::Seq { members }
if members.len() >= 2 && separator_leading_seq.is_none() =>
{
members.last().and_then(unwrap_to_string)
}
_ => None,
};
let sep_budget: Option<usize> = trailing_mandatory_sep.and_then(|sep| {
let cs = schema.constraints.get(vertex_id)?;
let has_ptrace = cs.iter().any(|c| c.sort.as_ref().starts_with("ptrace-"));
if !has_ptrace {
return None;
}
let count = cs
.iter()
.filter(|c| {
c.sort.as_ref().starts_with("ptrace-")
&& c.value.strip_prefix('T') == Some(sep)
})
.count();
Some(count)
});
let mut seps_emitted = 0usize;
let repeat_lead_symbol: Option<&str> = match content.as_ref() {
Production::Seq { members } if members.len() >= 2 => match &members[0] {
Production::Symbol { name }
if !name.starts_with('_')
&& grammar.rules.contains_key(name)
&& !grammar.named_alias_map.contains_key(name.as_str()) =>
{
Some(name.as_str())
}
_ => None,
},
_ => None,
};
let mut emitted_any = false;
loop {
let cursor_snap = cursor.consumed.clone();
let out_snap = out.snapshot();
let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
if let Some(lead) = repeat_lead_symbol {
let lead_available = cursor.edges.iter().enumerate().any(|(i, e)| {
!cursor.consumed[i]
&& kind_satisfies_symbol(
grammar,
schema.vertices.get(&e.tgt).map(|v| v.kind.as_ref()),
lead,
)
});
if !lead_available {
break;
}
}
if item_per_iteration && emitted_any {
out.force_space();
}
if item_needs_newline && emitted_any {
out.newline();
}
if trailing_newline_separator && emitted_any {
out.newline();
}
let result: Result<(), ParseError> =
if let Some(seq_members) = separator_leading_seq {
let cassette_replaces_sep = mandatory_sep_text.is_some_and(|sep| {
out.cassette.is_some_and(|c| c.separator_is_line_break(sep))
});
let pre_sep = out.snapshot();
let sep_result = if cassette_replaces_sep {
out.newline();
Ok(())
} else {
emit_production(
protocol,
schema,
grammar,
vertex_id,
&seq_members[0],
cursor,
out,
)
};
match sep_result {
Err(e) => Err(e),
Ok(()) => {
let sep_is_optional_statement_sep =
choice_offers_separator_literal(&seq_members[0]);
if !cassette_replaces_sep
&& !sep_is_optional_statement_sep
&& !out.lit_emitted_since(pre_sep)
{
out.no_space();
}
let mut rest_result = Ok(());
for member in &seq_members[1..] {
rest_result = emit_production(
protocol, schema, grammar, vertex_id, member, cursor, out,
);
if rest_result.is_err() {
break;
}
}
rest_result
}
}
} else if let Some(budget) = sep_budget {
let Production::Seq {
members: seq_members,
} = content.as_ref()
else {
unreachable!("trailing_mandatory_sep implies a SEQ body")
};
let (sep_member, lead_members) = seq_members.split_last().expect("len>=2");
let mut body_result = Ok(());
for member in lead_members {
body_result = emit_production(
protocol, schema, grammar, vertex_id, member, cursor, out,
);
if body_result.is_err() {
break;
}
}
if body_result.is_ok() && seps_emitted < budget {
body_result = emit_production(
protocol, schema, grammar, vertex_id, sep_member, cursor, out,
);
if body_result.is_ok() {
seps_emitted += 1;
}
}
body_result
} else {
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
};
let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
if result.is_err() || consumed_after == consumed_before {
cursor.consumed = cursor_snap;
out.restore(out_snap);
break;
}
emitted_any = true;
}
if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
}
Ok(())
}
Production::Optional { content } => {
let cursor_snap = cursor.consumed.clone();
let out_snap = out.snapshot();
let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
let result =
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
if result.is_err() {
cursor.consumed = cursor_snap;
out.restore(out_snap);
return result;
}
let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
if consumed_after == consumed_before
&& !has_relevant_constraint(content, schema, vertex_id)
{
cursor.consumed = cursor_snap;
out.restore(out_snap);
}
Ok(())
}
Production::Field { name, content } => {
let _guard = push_field_context(name);
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
}
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
if let Some(edge) = cursor.take_matching(|edge| {
schema
.vertices
.get(&edge.tgt)
.map(|v| v.kind.as_ref() == value.as_str())
.unwrap_or(false)
&& aliased_content_admits_child(schema, grammar, content, &edge.tgt)
}) {
return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
}
}
if !*named && !value.is_empty() {
if let Production::Symbol { name: sym } = content.as_ref() {
if !grammar.rules.contains_key(sym) {
if out.cassette.is_some_and(|c| c.external_leads_no_space(sym)) {
out.no_space();
}
out.token(value);
return Ok(());
}
}
if alias_content_is_terminal_pattern(content)
&& literal_value(schema, vertex_id).is_none()
{
out.token(value);
return Ok(());
}
}
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
}
Production::ImmediateToken { content } => {
out.no_space();
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
}
Production::Token { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
}
}
}
pub(crate) fn take_symbol_match<'a>(
grammar: &Grammar,
schema: &Schema,
cursor: &mut ChildCursor<'a>,
name: &str,
) -> Option<&'a Edge> {
if let Some(edge) = cursor.take_matching(|edge| {
edge.kind.as_ref() == "child_of" && {
let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
kind_satisfies_symbol(grammar, target_kind, name)
}
}) {
return Some(edge);
}
cursor.take_matching(|edge| {
let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
kind_satisfies_symbol(grammar, target_kind, name)
})
}
pub(crate) fn kind_satisfies_symbol(
grammar: &Grammar,
target_kind: Option<&str>,
name: &str,
) -> bool {
let Some(target) = target_kind else {
return false;
};
if target == name {
return true;
}
grammar
.subtypes
.get(target)
.is_some_and(|set| set.contains(name))
}
pub(crate) fn emit_aliased_child(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
child_id: &panproto_gat::Name,
content: &Production,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
if vertex_has_byte_span(schema, child_id) {
if let Some(bytes) = reconstruct_subtree_bytes(schema, child_id) {
out.verbatim(&bytes);
return Ok(());
}
}
if let Some(literal) = literal_value(schema, child_id) {
if children_for(schema, child_id).is_empty() {
let kind = vertex_id_kind(schema, child_id).unwrap_or("");
if is_tight_content_kind(grammar, out.cassette, kind) {
out.no_space();
out.tight_token(literal);
out.no_space();
return Ok(());
}
let is_bracket_pair = literal.len() >= 2
&& matches!(
(literal.as_bytes().first(), literal.as_bytes().last()),
(Some(b'('), Some(b')')) | (Some(b'['), Some(b']')) | (Some(b'{'), Some(b'}'))
);
let is_empty_bracket_pair = is_bracket_pair && literal.len() == 2;
if !is_empty_bracket_pair {
if grammar.leading_space_terminals.contains(kind)
&& literal.starts_with([' ', '\t'])
{
out.no_space();
}
if grammar.immediate_token_alias_kinds.contains(kind) {
out.no_space();
}
let role = if is_bracket_pair {
TokenRole::BracketClose
} else {
leaf_terminal_role(grammar, kind)
};
out.token_with_role(literal, Some(role));
if grammar.leading_space_terminals.contains(kind) && literal.ends_with([' ', '\t'])
{
out.no_space();
}
return Ok(());
}
}
}
let _guard = clear_field_context();
if let Production::Symbol { name } = content {
if let Some(rule) = grammar.rules.get(name) {
let edges = children_for(schema, child_id);
let child_kind = vertex_id_kind(schema, child_id).unwrap_or(name);
let (walk_name, walk_rule) =
select_walk_rule(schema, grammar, &edges, child_kind, rule, child_id);
let mut cursor = ChildCursor::new(&edges);
let old_rule = out.current_rule.take();
out.current_rule = Some(walk_name.to_owned());
let result = emit_production(
protocol,
schema,
grammar,
child_id,
walk_rule,
&mut cursor,
out,
);
out.current_rule = old_rule;
return result;
}
}
if let Production::Choice { members } = content {
if let Some(src) = pre_alias_symbol(schema, child_id) {
let picked = members.iter().find_map(|m| match m {
Production::Symbol { name } if name.as_str() == src => grammar.rules.get(name),
_ => None,
});
if let Some(rule) = picked {
let edges = children_for(schema, child_id);
let mut cursor = ChildCursor::new(&edges);
let old_rule = out.current_rule.take();
out.current_rule = Some(src.to_owned());
let result =
emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
out.current_rule = old_rule;
return result;
}
}
}
let edges = children_for(schema, child_id);
let mut cursor = ChildCursor::new(&edges);
emit_production(
protocol,
schema,
grammar,
child_id,
content,
&mut cursor,
out,
)
}
pub(crate) fn emit_in_child_context(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
child_id: &panproto_gat::Name,
production: &Production,
out: &mut Output<'_>,
) -> Result<(), ParseError> {
let _guard = clear_field_context();
if !matches!(production, Production::Symbol { .. }) {
let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
let symbols = referenced_symbols(production);
if symbols
.iter()
.any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
{
return emit_vertex(protocol, schema, grammar, child_id, out);
}
}
match production {
Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
_ => {
let edges = children_for(schema, child_id);
let mut cursor = ChildCursor::new(&edges);
emit_production(
protocol,
schema,
grammar,
child_id,
production,
&mut cursor,
out,
)
}
}
}
fn default_choice<'a>(
schema: &Schema,
grammar: &Grammar,
cursor: &ChildCursor<'_>,
alternatives: &'a [Production],
) -> Option<&'a Production> {
let any_unconsumed = cursor
.edges
.iter()
.enumerate()
.any(|(i, _)| !cursor.consumed[i]);
let edge_kinds: Vec<&str> = cursor
.edges
.iter()
.enumerate()
.filter(|(i, _)| !cursor.consumed[*i])
.map(|(_, e)| e.kind.as_ref())
.collect();
let uc_edge_pairs: Vec<(&str, &str)> = cursor
.edges
.iter()
.enumerate()
.filter(|(i, _)| !cursor.consumed[*i])
.filter_map(|(_, e)| {
schema
.vertices
.get(&e.tgt)
.map(|v| (e.kind.as_ref(), v.kind.as_ref()))
})
.collect();
for alt in alternatives {
if has_field_in(alt, &edge_kinds) {
return Some(alt);
}
}
if let Some(nl) = alternatives
.iter()
.find(|a| matches!(a, Production::Pattern { value } if is_newline_like_pattern(value)))
{
return Some(nl);
}
if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
for alt in alternatives {
if let Production::Symbol { name } = alt {
if name.starts_with('_') {
if let Some(rule) = grammar.rules.get(name) {
if contains_newline_pattern(rule) {
return Some(alt);
}
}
}
}
}
return alternatives.iter().find(|a| matches!(a, Production::Blank));
}
let no_alt_accepts_remaining = any_unconsumed
&& !alternatives.iter().any(|alt| {
uc_edge_pairs
.iter()
.any(|&(ek, tk)| accepts_first_edge(grammar, alt, ek, tk))
});
if !any_unconsumed || no_alt_accepts_remaining {
if let Some(base) = unmarked_base_literal(alternatives) {
return Some(base);
}
if let Some(pure_lit) = alternatives
.iter()
.find(|alt| referenced_symbols(alt).is_empty() && !matches!(alt, Production::Blank))
{
return Some(pure_lit);
}
}
alternatives
.iter()
.find(|alt| !matches!(alt, Production::Blank))
}
fn unmarked_base_literal(alternatives: &[Production]) -> Option<&Production> {
let tails: Vec<(&Production, String)> = alternatives
.iter()
.map(|a| match a {
Production::String { value } => Some((a, value.clone())),
Production::Pattern { value } => pattern_trailing_literal(value).map(|tail| (a, tail)),
_ => None,
})
.collect::<Option<Vec<_>>>()?;
if tails.len() < 2 {
return None;
}
let mut base: Option<&Production> = None;
for &(prod, ref val) in &tails {
if !matches!(prod, Production::String { .. }) {
continue;
}
let is_base = tails
.iter()
.all(|(_, other)| other == val || (other.len() > val.len() && other.ends_with(val)));
if is_base {
if base.is_some() {
return None; }
base = Some(prod);
}
}
base
}
fn pattern_trailing_literal(value: &str) -> Option<String> {
let rest = value.strip_prefix('[')?;
let end = rest.find(']')?;
let after = &rest[end + 1..];
let after = after
.strip_prefix('*')
.or_else(|| after.strip_prefix('+'))
.or_else(|| after.strip_prefix('?'))
.unwrap_or(after);
if after.is_empty() {
return None;
}
crate::emit_pretty::helpers::decode_simple_pattern_literal(after)
}
fn seq_trailing_newline_separator(grammar: &Grammar, prod: &Production) -> bool {
fn alt_is_newline(grammar: &Grammar, p: &Production) -> bool {
match p {
Production::Symbol { name } => {
grammar.external_newlines.contains(name)
|| (name.starts_with('_')
&& !grammar.rules.contains_key(name)
&& (name.contains("newline") || name.contains("line_ending")))
}
Production::Pattern { value } => is_newline_like_pattern(value),
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => alt_is_newline(grammar, content),
_ => false,
}
}
match prod {
Production::Choice { members } => members.iter().any(|m| alt_is_newline(grammar, m)),
Production::Optional { content } => alt_is_newline(grammar, content),
_ => false,
}
}
fn choice_offers_separator_literal(prod: &Production) -> bool {
fn alt_is_separator(p: &Production) -> bool {
match p {
Production::String { value } => value == ";" || value == ",",
Production::Pattern { value } => is_newline_like_pattern(value),
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => alt_is_separator(content),
_ => false,
}
}
match prod {
Production::Choice { members } => members.iter().any(alt_is_separator),
Production::Optional { content } => alt_is_separator(content),
_ => false,
}
}
fn unwrap_choice_body(prod: &Production) -> &[Production] {
match prod {
Production::Choice { members } => members,
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Reserved { content, .. } => unwrap_choice_body(content),
_ => &[],
}
}
pub(crate) fn pick_choice_with_cursor<'a>(
schema: &Schema,
grammar: &Grammar,
vertex_id: &panproto_gat::Name,
cursor: &ChildCursor<'_>,
alternatives: &'a [Production],
current_rule: Option<&str>,
) -> Option<&'a Production> {
let (demand, labels): (Vec<&str>, Vec<&str>) = cursor
.edges
.iter()
.enumerate()
.filter(|(i, _)| !cursor.consumed[*i])
.filter_map(|(_, e)| {
schema
.vertices
.get(&e.tgt)
.map(|v| (v.kind.as_ref(), e.kind.as_ref()))
})
.unzip();
let field_ctx = current_field_context();
let mut alt_field_names: std::collections::HashSet<&str> = std::collections::HashSet::new();
for alt in alternatives {
collect_field_names(alt, &mut alt_field_names);
}
let trace_tokens: Vec<String> = schema
.constraints
.get(vertex_id)
.map(|cs| {
cs.iter()
.filter_map(|c| {
let s = c.sort.as_ref();
if s.starts_with("ptrace-") {
c.value.strip_prefix('T').map(ToOwned::to_owned)
} else if let Some(field) = s.strip_prefix("field:") {
(alt_field_names.contains(field) || field_ctx.as_deref() == Some(field))
.then(|| c.value.clone())
} else {
None
}
})
.collect()
})
.unwrap_or_default();
let field_constraints: Vec<(&str, &str)> = schema
.constraints
.get(vertex_id)
.map(|cs| {
cs.iter()
.filter_map(|c| {
c.sort
.as_ref()
.strip_prefix("field:")
.map(|name| (name, c.value.as_str()))
})
.collect()
})
.unwrap_or_default();
let self_rule = current_rule.filter(|r| {
grammar
.rules
.get(*r)
.is_some_and(|body| std::ptr::eq(unwrap_choice_body(body), alternatives))
});
let consumed_count = cursor.consumed.iter().filter(|&&c| c).count();
let positional_interstitials: Vec<&str> = schema
.constraints
.get(vertex_id)
.map(|cs| {
let mut indexed: Vec<(usize, &str)> = cs
.iter()
.filter_map(|c| {
let s = c.sort.as_ref();
if !s.starts_with("interstitial-") || s.ends_with("-start-byte") {
return None;
}
let idx: usize = s["interstitial-".len()..].parse().ok()?;
Some((idx, c.value.as_str()))
})
.collect();
indexed.sort_by_key(|&(i, _)| i);
indexed.into_iter().map(|(_, v)| v).collect()
})
.unwrap_or_default();
let positional_slice: String = if positional_interstitials.is_empty() {
String::new()
} else {
positional_interstitials
.iter()
.skip(consumed_count)
.copied()
.collect::<Vec<&str>>()
.join(" ")
};
let has_ptrace = schema
.constraints
.get(vertex_id)
.is_some_and(|cs| cs.iter().any(|c| c.sort.as_ref().starts_with("ptrace-")));
if has_ptrace {
let asi_alt = alternatives.iter().position(|a| {
matches!(a, Production::Symbol { name }
if grammar.external_newlines.contains(name))
});
if let Some(asi_idx) = asi_alt {
let literal_term: Option<&str> = alternatives
.iter()
.filter_map(unwrap_to_string)
.find(|s| *s == ";");
if let Some(term) = literal_term {
let in_trace = trace_tokens.iter().any(|t| t == term);
let in_layout = positional_slice.contains(term);
if !in_trace && !in_layout {
return Some(&alternatives[asi_idx]);
}
}
}
}
if let Some(idx) = super::select_choice_with_trace(
grammar,
alternatives,
&demand,
&labels,
field_ctx.as_deref(),
&field_constraints,
&trace_tokens,
self_rule,
&positional_slice,
) {
return Some(&alternatives[idx]);
}
let fingerprint_blob = schema
.constraints
.get(vertex_id)
.and_then(|cs| {
cs.iter()
.find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
.map(|c| c.value.clone())
})
.unwrap_or_default();
let constraint_blob: String = if positional_slice.is_empty() {
fingerprint_blob
} else {
positional_slice
};
let child_kinds: Vec<&str> = schema
.constraints
.get(vertex_id)
.and_then(|cs| {
cs.iter()
.find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
.map(|c| c.value.split_whitespace().collect())
})
.unwrap_or_default();
let concrete_named_absent = |sym: &str| -> bool {
!child_kinds.is_empty()
&& !sym.starts_with('_')
&& !grammar.supertypes.contains(sym)
&& grammar
.rules
.get(sym)
.is_some_and(|r| !literal_strings(r).is_empty())
&& !child_kinds.contains(&sym)
};
let alias_source_ok = |content: &Production, value: &str| -> bool {
let src_lits = aliased_source_literals(grammar, content);
if src_lits.is_empty() {
return true;
}
match first_unconsumed_target_fingerprint(schema, cursor, value) {
None => true,
Some(b) => src_lits.iter().any(|l| b.contains(l.as_str())),
}
};
let any_unconsumed = cursor
.edges
.iter()
.enumerate()
.any(|(i, _)| !cursor.consumed[i]);
let blank_present = alternatives.iter().any(|a| matches!(a, Production::Blank));
let edge_kinds: Vec<&str> = cursor
.edges
.iter()
.enumerate()
.filter(|(i, _)| !cursor.consumed[*i])
.map(|(_, e)| e.kind.as_ref())
.collect();
if !any_unconsumed && blank_present {
return alternatives.iter().find(|a| matches!(a, Production::Blank));
}
if !any_unconsumed && !blank_present {
for alt in alternatives {
if let Production::Pattern { value } = alt {
if is_newline_like_pattern(value) {
return Some(alt);
}
}
}
if alternatives.len() == 2 {
let lit_alt = alternatives.iter().find(|a| {
matches!(a, Production::ImmediateToken { .. })
&& referenced_symbols(a).is_empty()
&& matches!(
literal_strings(a).as_slice(),
[s] if s.chars().count() == 1
)
});
let blank_alt = alternatives.iter().find(|a| {
matches!(a, Production::Symbol { name }
if grammar.rules.get(name).is_some_and(is_blank_line_rule))
});
if let (Some(lit), Some(blank)) = (lit_alt, blank_alt) {
let lit_attested = literal_strings(lit)
.iter()
.any(|s| trace_tokens.iter().any(|t| t == s));
if !lit_attested {
return Some(blank);
}
}
}
if let Some(pure_lit) = alternatives.iter().find(|alt| {
let syms = referenced_symbols(alt);
let strings = literal_strings(alt);
syms.is_empty() && !strings.is_empty()
}) {
return Some(pure_lit);
}
let mut visited = std::collections::HashSet::new();
let mut yield_cache = grammar.yield_sets.clone();
for alt in alternatives {
let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
if ys.contains("") {
return Some(alt);
}
visited.clear();
}
}
for edge_idx in 0..cursor.edges.len() {
if cursor.consumed[edge_idx] {
continue;
}
let edge = &cursor.edges[edge_idx];
let tgt_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
let tgt_lit = literal_value(schema, &edge.tgt);
for alt in alternatives {
if let Production::String { value } = alt {
if Some(value.as_str()) == tgt_kind || tgt_lit == Some(value.as_str()) {
return Some(alt);
}
}
}
}
if any_unconsumed {
let mut visited = std::collections::HashSet::new();
let mut yield_cache = grammar.yield_sets.clone();
let all_non_consuming = alternatives.iter().all(|alt| {
let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
visited.clear();
ys.is_empty() || (ys.len() == 1 && ys.contains(""))
});
if all_non_consuming {
if let Some(nl) = alternatives.iter().find(|a| is_newline_alt(grammar, a)) {
return Some(nl);
}
}
}
if !constraint_blob.is_empty() {
let first_uc_edge_pre = cursor
.edges
.iter()
.enumerate()
.find(|(i, _)| !cursor.consumed[*i])
.map(|(_, e)| e);
let alt_accepts = |a: &Production| -> bool {
let Some(edge) = first_uc_edge_pre else {
return false;
};
let edge_kind = edge.kind.as_ref();
let Some(tgt_kind) = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()) else {
return false;
};
accepts_first_edge(grammar, a, edge_kind, tgt_kind)
};
let any_consumes = any_unconsumed && alternatives.iter().any(alt_accepts);
let mut best_literal: usize = 0;
let mut best_symbols: usize = 0;
let mut best_total_chars: usize = usize::MAX;
let mut best_alt: Option<&Production> = None;
let mut tied = false;
for alt in alternatives {
let strings = literal_strings(alt);
if strings.is_empty() {
continue;
}
if any_consumes && !alt_accepts(alt) {
continue;
}
let literal_score = strings
.iter()
.filter(|s| constraint_blob.contains(s.as_str()))
.map(String::len)
.sum::<usize>();
if literal_score == 0 {
continue;
}
let total_chars: usize = strings.iter().map(String::len).sum();
let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
let symbols = referenced_symbols(alt);
symbols
.iter()
.filter(|sym| {
let sym_str: &str = sym;
if child_kinds.contains(&sym_str) {
return true;
}
grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
sub_set
.iter()
.any(|sub| child_kinds.contains(&sub.as_str()))
})
})
.count()
} else {
0
};
let better = literal_score > best_literal
|| (literal_score == best_literal && symbol_score > best_symbols)
|| (literal_score == best_literal
&& symbol_score == best_symbols
&& total_chars < best_total_chars);
let same = literal_score == best_literal
&& symbol_score == best_symbols
&& total_chars == best_total_chars;
if better {
best_literal = literal_score;
best_symbols = symbol_score;
best_total_chars = total_chars;
best_alt = Some(alt);
tied = false;
} else if same && best_alt.is_some() {
tied = true;
}
}
if let Some(alt) = best_alt {
if !tied {
if any_unconsumed {
if alt_accepts(alt) {
return Some(alt);
}
if !blank_present || referenced_symbols(alt).is_empty() {
return Some(alt);
}
} else {
return Some(alt);
}
}
}
}
let first_unconsumed_kind: Option<&str> = cursor
.edges
.iter()
.enumerate()
.find(|(i, _)| !cursor.consumed[*i])
.and_then(|(_, edge)| schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()));
if let Some(target_kind) = first_unconsumed_kind {
let target_supers = grammar.subtypes.get(target_kind);
{
let mut match_count = 0usize;
let mut indent_alt_idx: Option<usize> = None;
let mut visited = std::collections::HashSet::new();
let mut yield_cache = grammar.yield_sets.clone();
for (i, alt) in alternatives.iter().enumerate() {
let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
if ys.contains(target_kind) {
match_count += 1;
if indent_alt_idx.is_none()
&& referenced_symbols(alt)
.iter()
.any(|s| grammar.external_indent_opens.contains(*s))
{
indent_alt_idx = Some(i);
}
}
visited.clear();
}
if match_count > 1 {
if let Some(idx) = indent_alt_idx {
return Some(&alternatives[idx]);
}
}
}
for alt in alternatives {
if let Production::Symbol { name } = alt {
if name.as_str() == target_kind {
return Some(alt);
}
}
if let Production::Alias {
named: true,
value,
content,
} = alt
{
if value.as_str() == target_kind && alias_source_ok(content, value) {
return Some(alt);
}
}
}
if let Some(supers) = target_supers {
for alt in alternatives {
if let Production::Symbol { name } = alt {
if supers.contains(name.as_str()) && !concrete_named_absent(name) {
return Some(alt);
}
}
if let Production::Alias {
named: true,
value,
content,
} = alt
{
if supers.contains(value.as_str())
&& !concrete_named_absent(value)
&& alias_source_ok(content, value)
{
return Some(alt);
}
}
}
}
let mut visited = std::collections::HashSet::new();
let mut yield_cache = grammar.yield_sets.clone();
let mut matching_alts: Vec<&Production> = Vec::new();
for alt in alternatives {
let mandatory_fields = mandatory_field_names(alt);
if !mandatory_fields.is_empty()
&& !mandatory_fields.iter().any(|f| edge_kinds.contains(f))
{
visited.clear();
continue;
}
if !alt_satisfies_field_token_restrictions(schema, cursor, alt) {
visited.clear();
continue;
}
if !alt_satisfies_pre_alias_constraints(schema, cursor, alt) {
visited.clear();
continue;
}
let concrete_absent = match alt {
Production::Symbol { name } => concrete_named_absent(name),
Production::Alias {
named: true,
value,
content,
} => concrete_named_absent(value) || !alias_source_ok(content, value),
_ => false,
};
if concrete_absent {
visited.clear();
continue;
}
let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
if ys.contains(target_kind) {
matching_alts.push(alt);
}
visited.clear();
}
if matching_alts.len() == 1 {
return Some(matching_alts[0]);
}
if matching_alts.len() > 1 {
matching_alts.sort_by_key(|alt| std::cmp::Reverse(prec_value(alt)));
return Some(matching_alts[0]);
}
}
default_choice(schema, grammar, cursor, alternatives)
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod review_tests {
use super::{pattern_trailing_literal, unmarked_base_literal};
use crate::emit_pretty::Production;
fn s(v: &str) -> Production {
Production::String { value: v.into() }
}
fn p(v: &str) -> Production {
Production::Pattern { value: v.into() }
}
#[test]
fn pattern_trailing_literal_extracts_constant_tail() {
assert_eq!(pattern_trailing_literal("[bB]'").as_deref(), Some("'"));
assert_eq!(pattern_trailing_literal("[xX]'").as_deref(), Some("'"));
assert_eq!(pattern_trailing_literal("[bB]*'").as_deref(), Some("'"));
assert_eq!(pattern_trailing_literal("[bB]"), None);
assert_eq!(pattern_trailing_literal("'"), None);
assert_eq!(pattern_trailing_literal("[a-z]+[0-9]"), None);
}
#[test]
fn unmarked_base_prefers_bare_string_over_prefix_pattern() {
let alts = [p("[bB]'"), s("'")];
let base = unmarked_base_literal(&alts).unwrap();
assert!(matches!(base, Production::String { value } if value == "'"));
}
#[test]
fn unmarked_base_all_string_suffix_still_works() {
let alts = [s("L\""), s("u\""), s("U\""), s("u8\""), s("\"")];
let base = unmarked_base_literal(&alts).unwrap();
assert!(matches!(base, Production::String { value } if value == "\""));
}
#[test]
fn unmarked_base_declines_unrelated_choice() {
assert!(unmarked_base_literal(&[s("+"), s("-")]).is_none());
assert!(unmarked_base_literal(&[p("[a-z]+"), s("'")]).is_none());
}
}