#![allow(
clippy::module_name_repetitions,
clippy::too_many_lines,
clippy::too_many_arguments,
clippy::map_unwrap_or,
clippy::option_if_let_else,
clippy::elidable_lifetime_names,
clippy::items_after_statements,
clippy::needless_pass_by_value,
clippy::single_match_else,
clippy::manual_let_else,
clippy::match_same_arms,
clippy::missing_const_for_fn,
clippy::single_char_pattern,
clippy::naive_bytecount,
clippy::expect_used,
clippy::redundant_pub_crate,
clippy::used_underscore_binding,
clippy::redundant_field_names,
clippy::struct_field_names,
clippy::redundant_else,
clippy::similar_names
)]
use super::{
BTreeMap, Deserialize, ParseError, collect_all_symbol_refs, external_symbol_name,
extract_line_comment_prefix, has_repeat_recursive, is_immediate_token, is_newline_like_pattern,
is_prefix_sigil, is_quote_delimiter, is_rest_of_line_pattern, is_whitespace_only_pattern,
is_word_like, kind_satisfies_symbol, leading_optional_sign, literal_strings,
matching_close_bracket, pattern_absorbs_leading_space, referenced_symbols, terminal_pattern_of,
unwrap_to_seq, unwrap_to_string,
};
#[derive(Debug, Clone, Deserialize)]
#[serde(tag = "type")]
#[non_exhaustive]
pub enum Production {
#[serde(rename = "SEQ")]
Seq {
members: Vec<Self>,
},
#[serde(rename = "CHOICE")]
Choice {
members: Vec<Self>,
},
#[serde(rename = "REPEAT")]
Repeat {
content: Box<Self>,
},
#[serde(rename = "REPEAT1")]
Repeat1 {
content: Box<Self>,
},
#[serde(rename = "OPTIONAL")]
Optional {
content: Box<Self>,
},
#[serde(rename = "SYMBOL")]
Symbol {
name: String,
},
#[serde(rename = "STRING")]
String {
value: String,
},
#[serde(rename = "PATTERN")]
Pattern {
value: String,
},
#[serde(rename = "BLANK")]
Blank,
#[serde(rename = "FIELD")]
Field {
name: String,
content: Box<Self>,
},
#[serde(rename = "ALIAS")]
Alias {
content: Box<Self>,
#[serde(default)]
named: bool,
#[serde(default)]
value: String,
},
#[serde(rename = "TOKEN")]
Token {
content: Box<Self>,
},
#[serde(rename = "IMMEDIATE_TOKEN")]
ImmediateToken {
content: Box<Self>,
},
#[serde(rename = "PREC")]
Prec {
#[allow(dead_code)]
value: serde_json::Value,
content: Box<Self>,
},
#[serde(rename = "PREC_LEFT")]
PrecLeft {
#[allow(dead_code)]
value: serde_json::Value,
content: Box<Self>,
},
#[serde(rename = "PREC_RIGHT")]
PrecRight {
#[allow(dead_code)]
value: serde_json::Value,
content: Box<Self>,
},
#[serde(rename = "PREC_DYNAMIC")]
PrecDynamic {
#[allow(dead_code)]
value: serde_json::Value,
content: Box<Self>,
},
#[serde(rename = "RESERVED")]
Reserved {
content: Box<Self>,
#[allow(dead_code)]
#[serde(default)]
context_name: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TokenRole {
BracketOpen,
BracketClose,
Separator,
Keyword,
Operator,
Connector,
Terminal,
Immediate,
}
#[derive(Debug, Clone, Deserialize)]
#[non_exhaustive]
pub struct Grammar {
#[allow(dead_code)]
pub name: String,
#[serde(skip)]
pub start_symbol: String,
#[serde(skip)]
pub(crate) min_children: std::sync::OnceLock<std::collections::HashMap<String, usize>>,
pub rules: BTreeMap<String, Production>,
#[serde(default, deserialize_with = "deserialize_supertypes")]
pub supertypes: std::collections::HashSet<String>,
#[serde(default, deserialize_with = "deserialize_extras")]
pub extras: std::collections::HashSet<String>,
#[serde(
rename = "inline",
default,
deserialize_with = "deserialize_supertypes"
)]
pub inline_rules: std::collections::HashSet<String>,
#[serde(skip)]
pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
#[serde(skip)]
pub yield_sets: std::collections::HashMap<String, std::collections::HashSet<String>>,
#[serde(skip)]
pub node_type_children: std::collections::HashMap<String, std::collections::HashSet<String>>,
#[serde(skip)]
pub node_type_field_children: std::collections::HashMap<
String,
std::collections::HashMap<String, std::collections::HashSet<String>>,
>,
#[serde(skip)]
pub node_type_nonfield_children:
std::collections::HashMap<String, std::collections::HashSet<String>>,
#[serde(skip)]
pub external_alias_map: std::collections::HashMap<String, String>,
#[serde(skip)]
pub token_roles:
std::collections::HashMap<String, std::collections::HashMap<String, TokenRole>>,
#[serde(skip)]
pub indent_triggers: std::collections::HashSet<(String, String)>,
#[serde(skip)]
pub line_comment_prefixes: Vec<String>,
#[serde(skip)]
pub trailing_break_markers: Vec<String>,
#[serde(skip)]
pub trailing_break_on_whitespace: bool,
#[serde(skip)]
pub top_level_text_admits_newline: bool,
#[serde(skip)]
pub external_indent_opens: std::collections::HashSet<String>,
#[serde(skip)]
pub external_indent_closes: std::collections::HashSet<String>,
#[serde(skip)]
pub external_newlines: std::collections::HashSet<String>,
#[serde(skip)]
pub external_semicolons: std::collections::HashSet<String>,
#[serde(skip)]
pub external_bracket_opens: std::collections::HashSet<String>,
#[serde(skip)]
pub external_bracket_closes: std::collections::HashSet<String>,
#[serde(skip)]
pub external_content_kinds: std::collections::HashSet<String>,
#[serde(skip)]
pub string_content_kinds: std::collections::HashSet<String>,
#[serde(skip)]
pub synthetic_indent_rules: std::collections::HashSet<String>,
#[serde(skip)]
pub named_alias_map: std::collections::HashMap<String, String>,
#[serde(skip)]
pub named_alias_sources: std::collections::HashMap<String, Vec<String>>,
#[serde(skip)]
pub leading_space_terminals: std::collections::HashSet<String>,
#[serde(skip)]
pub line_rest_kinds: std::collections::HashSet<String>,
#[serde(skip)]
pub immediate_token_alias_kinds: std::collections::HashSet<String>,
#[serde(skip)]
pub external_close_text: std::collections::HashMap<String, String>,
}
pub(crate) fn deserialize_supertypes<'de, D>(
deserializer: D,
) -> Result<std::collections::HashSet<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
let mut out = std::collections::HashSet::new();
for entry in entries {
match entry {
serde_json::Value::String(s) => {
out.insert(s);
}
serde_json::Value::Object(map) => {
if let Some(serde_json::Value::String(name)) = map.get("name") {
out.insert(name.clone());
}
}
_ => {}
}
}
Ok(out)
}
pub(crate) fn deserialize_extras<'de, D>(
deserializer: D,
) -> Result<std::collections::HashSet<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
let mut out = std::collections::HashSet::new();
for entry in entries {
if let serde_json::Value::Object(map) = entry {
let ty = map.get("type").and_then(serde_json::Value::as_str);
match ty {
Some("SYMBOL") => {
if let Some(serde_json::Value::String(name)) = map.get("name") {
out.insert(name.clone());
}
}
Some("ALIAS") => {
let named = map
.get("named")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
if named {
if let Some(serde_json::Value::String(value)) = map.get("value") {
out.insert(value.clone());
}
}
}
_ => {}
}
}
}
Ok(out)
}
impl Grammar {
pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
Self::from_bytes_with_node_types(protocol, bytes, None)
}
pub fn from_bytes_with_node_types(
protocol: &str,
grammar_bytes: &[u8],
node_types_bytes: Option<&[u8]>,
) -> Result<Self, ParseError> {
let mut grammar: Self =
serde_json::from_slice(grammar_bytes).map_err(|e| ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: format!("grammar.json deserialization failed: {e}"),
})?;
grammar.start_symbol = extract_start_symbol(grammar_bytes);
grammar.subtypes = compute_subtype_closure(&grammar);
grammar.named_alias_map = build_named_alias_map(&grammar);
grammar.named_alias_sources = build_named_alias_sources(&grammar);
grammar.yield_sets = compute_yield_sets(&grammar);
if let Some(nt_bytes) = node_types_bytes {
let (all_children, field_children, nonfield_children) =
build_node_type_children(nt_bytes);
grammar.node_type_children = all_children;
grammar.node_type_field_children = field_children;
grammar.node_type_nonfield_children = nonfield_children;
reconcile_field_names(&mut grammar);
augment_subtypes_from_node_types(&mut grammar);
}
grammar.yield_sets = compute_yield_sets(&grammar);
grammar.external_alias_map = build_external_alias_map(&grammar);
let (token_roles, indent_triggers) = compute_token_roles(&grammar);
grammar.token_roles = token_roles;
grammar.indent_triggers = indent_triggers;
grammar.line_comment_prefixes = extract_line_comment_prefixes(&grammar);
let (tb_markers, tb_ws) = classify_trailing_break_markers(&grammar);
grammar.trailing_break_markers = tb_markers;
grammar.trailing_break_on_whitespace = tb_ws;
grammar.top_level_text_admits_newline = classify_top_level_text_admits_newline(&grammar);
classify_external_layout_tokens(&mut grammar);
classify_external_bracket_delimiters(&mut grammar);
classify_external_close_text(&mut grammar);
classify_string_content_kinds(&mut grammar);
classify_synthetic_indent_rules(&mut grammar);
grammar.leading_space_terminals = classify_leading_space_terminals(&grammar);
grammar.line_rest_kinds = classify_line_rest_kinds(&grammar);
grammar.immediate_token_alias_kinds = classify_immediate_token_alias_kinds(&grammar);
grammar.yield_sets = compute_yield_sets(&grammar);
Ok(grammar)
}
}
pub(crate) fn compute_subtype_closure(
grammar: &Grammar,
) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
use std::collections::{HashMap, HashSet};
let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
for name in grammar.rules.keys() {
subtypes
.entry(name.clone())
.or_default()
.insert(name.clone());
}
fn walk<'g>(
grammar: &'g Grammar,
production: &'g Production,
visited: &mut HashSet<&'g str>,
out: &mut HashSet<String>,
) {
match production {
Production::Symbol { name } => {
out.insert(name.clone());
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if expand && visited.insert(name.as_str()) {
if let Some(rule) = grammar.rules.get(name) {
walk(grammar, rule, visited, out);
}
}
}
Production::Choice { members } => {
for m in members {
walk(grammar, m, visited, out);
}
}
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
out.insert(value.clone());
}
walk(grammar, content, visited, out);
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
walk(grammar, content, visited, out);
}
_ => {}
}
}
for (name, rule) in &grammar.rules {
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if !expand {
continue;
}
let mut visited: HashSet<&str> = HashSet::new();
visited.insert(name.as_str());
let mut reachable: HashSet<String> = HashSet::new();
walk(grammar, rule, &mut visited, &mut reachable);
for kind in &reachable {
subtypes
.entry(kind.clone())
.or_default()
.insert(name.clone());
}
}
fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
match production {
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
out.push((value.clone(), content.as_ref()));
}
collect_aliases(content, out);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_aliases(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
collect_aliases(content, out);
}
_ => {}
}
}
let mut aliases: Vec<(String, &Production)> = Vec::new();
for rule in grammar.rules.values() {
collect_aliases(rule, &mut aliases);
}
for (alias_value, content) in aliases {
let mut visited: HashSet<&str> = HashSet::new();
let mut reachable: HashSet<String> = HashSet::new();
walk(grammar, content, &mut visited, &mut reachable);
subtypes
.entry(alias_value.clone())
.or_default()
.insert(alias_value.clone());
for kind in reachable {
subtypes
.entry(kind)
.or_default()
.insert(alias_value.clone());
}
}
let is_dispatch = |s: &str| s.starts_with('_') || grammar.supertypes.contains(s);
let mut nodes: HashSet<String> = HashSet::new();
for (k, vs) in &subtypes {
if is_dispatch(k) {
nodes.insert(k.clone());
}
for v in vs {
if is_dispatch(v) {
nodes.insert(v.clone());
}
}
}
let nodes: Vec<String> = nodes.into_iter().collect();
let index_of: HashMap<&str, usize> = nodes
.iter()
.enumerate()
.map(|(i, n)| (n.as_str(), i))
.collect();
let mut edges: Vec<Vec<usize>> = vec![Vec::new(); nodes.len()];
for (i, name) in nodes.iter().enumerate() {
if let Some(targets) = subtypes.get(name) {
for t in targets {
if let Some(&j) = index_of.get(t.as_str()) {
if i != j {
edges[i].push(j);
}
}
}
}
}
fn tarjan(edges: &[Vec<usize>]) -> Vec<usize> {
let n = edges.len();
let mut comp = vec![usize::MAX; n];
let mut index_arr = vec![usize::MAX; n];
let mut lowlink = vec![0usize; n];
let mut on_stack = vec![false; n];
let mut stack: Vec<usize> = Vec::new();
let mut next_index = 0usize;
let mut next_comp = 0usize;
let mut work: Vec<(usize, usize)> = Vec::new();
for start in 0..n {
if index_arr[start] != usize::MAX {
continue;
}
work.push((start, 0));
index_arr[start] = next_index;
lowlink[start] = next_index;
next_index += 1;
stack.push(start);
on_stack[start] = true;
while let Some(&(v, i)) = work.last() {
if i < edges[v].len() {
let w = edges[v][i];
if let Some(slot) = work.last_mut() {
slot.1 += 1;
}
if index_arr[w] == usize::MAX {
index_arr[w] = next_index;
lowlink[w] = next_index;
next_index += 1;
stack.push(w);
on_stack[w] = true;
work.push((w, 0));
} else if on_stack[w] && index_arr[w] < lowlink[v] {
lowlink[v] = index_arr[w];
}
} else {
if lowlink[v] == index_arr[v] {
while let Some(w) = stack.pop() {
on_stack[w] = false;
comp[w] = next_comp;
if w == v {
break;
}
}
next_comp += 1;
}
let lv = lowlink[v];
work.pop();
if let Some(&(parent, _)) = work.last() {
if lv < lowlink[parent] {
lowlink[parent] = lv;
}
}
}
}
}
comp
}
let comp = tarjan(&edges);
let num_comps = comp.iter().max().copied().map_or(0, |m| m + 1);
let mut scc_members: Vec<Vec<usize>> = vec![Vec::new(); num_comps];
for (v, &c) in comp.iter().enumerate() {
scc_members[c].push(v);
}
let mut scc_closure: Vec<HashSet<String>> = vec![HashSet::new(); num_comps];
for c in 0..num_comps {
let mut closure: HashSet<String> = HashSet::new();
for &v in &scc_members[c] {
closure.insert(nodes[v].clone());
}
for &v in &scc_members[c] {
for &w in &edges[v] {
let wc = comp[w];
if wc != c {
closure.extend(scc_closure[wc].iter().cloned());
}
}
}
scc_closure[c] = closure;
}
let keys: Vec<String> = subtypes.keys().cloned().collect();
for k in keys {
let existing = subtypes.remove(&k).unwrap_or_default();
let mut new_set: HashSet<String> = HashSet::new();
for s in &existing {
new_set.insert(s.clone());
if let Some(&i) = index_of.get(s.as_str()) {
new_set.extend(scc_closure[comp[i]].iter().cloned());
}
}
subtypes.insert(k, new_set);
}
subtypes
}
pub(crate) fn compute_yield_sets(
grammar: &Grammar,
) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
let mut cache: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for (name, rule) in &grammar.rules {
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if !expand {
continue;
}
if cache.contains_key(name) {
continue;
}
let mut visited = std::collections::HashSet::new();
let ys = yield_of_production(grammar, rule, &mut visited, &mut cache);
cache.insert(name.clone(), ys);
}
cache
}
pub(crate) fn yield_of_production(
grammar: &Grammar,
production: &Production,
visited: &mut std::collections::HashSet<String>,
cache: &mut std::collections::HashMap<String, std::collections::HashSet<String>>,
) -> std::collections::HashSet<String> {
match production {
Production::Symbol { name } => {
let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
if !expand {
let mut set = std::collections::HashSet::new();
set.insert(name.clone());
return set;
}
if let Some(cached) = cache.get(name) {
return cached.clone();
}
{
if !visited.insert(name.clone()) {
return std::collections::HashSet::new();
}
let result = if let Some(rule) = grammar.rules.get(name) {
yield_of_production(grammar, rule, visited, cache)
} else {
std::collections::HashSet::new()
};
visited.remove(name);
cache.insert(name.clone(), result.clone());
result
}
}
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
let mut set = std::collections::HashSet::new();
set.insert(value.clone());
set
} else {
yield_of_production(grammar, content, visited, cache)
}
}
Production::Seq { members } => {
if members.is_empty() {
let mut set = std::collections::HashSet::new();
set.insert(String::new());
set
} else {
let mut combined = std::collections::HashSet::new();
for m in members {
let ys = yield_of_production(grammar, m, visited, cache);
if ys.is_empty() {
continue;
}
let has_epsilon = ys.contains("");
combined.extend(ys);
if !has_epsilon {
break;
}
}
combined
}
}
Production::Choice { members } => {
let mut union = std::collections::HashSet::new();
for m in members {
union.extend(yield_of_production(grammar, m, visited, cache));
}
union
}
Production::Optional { content } => {
let mut set = yield_of_production(grammar, content, visited, cache);
set.insert(String::new());
set
}
Production::Blank => {
let mut set = std::collections::HashSet::new();
set.insert(String::new());
set
}
Production::String { .. } | Production::Pattern { .. } => std::collections::HashSet::new(),
Production::Repeat { content } => {
let mut set = yield_of_production(grammar, content, visited, cache);
set.insert(String::new());
set
}
Production::Repeat1 { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
yield_of_production(grammar, content, visited, cache)
}
}
}
pub(crate) type NodeTypeResult = (
std::collections::HashMap<String, std::collections::HashSet<String>>,
std::collections::HashMap<
String,
std::collections::HashMap<String, std::collections::HashSet<String>>,
>,
std::collections::HashMap<String, std::collections::HashSet<String>>,
);
pub(crate) fn build_node_type_children(nt_bytes: &[u8]) -> NodeTypeResult {
use std::collections::{HashMap, HashSet};
let node_types: Vec<crate::theory_extract::NodeType> =
match crate::theory_extract::parse_node_types(nt_bytes) {
Ok(v) => v,
Err(_) => return (HashMap::new(), HashMap::new(), HashMap::new()),
};
let mut all_map: HashMap<String, HashSet<String>> = HashMap::new();
let mut field_map: HashMap<String, HashMap<String, HashSet<String>>> = HashMap::new();
let mut nonfield_map: HashMap<String, HashSet<String>> = HashMap::new();
for entry in &node_types {
if !entry.named {
continue;
}
let mut child_kinds = HashSet::new();
for (field_name, field_value) in &entry.fields {
if let Some(types) = field_value.get("types").and_then(|t| t.as_array()) {
for t in types {
if let (Some(name), Some(true)) = (
t.get("type").and_then(|n| n.as_str()),
t.get("named").and_then(serde_json::Value::as_bool),
) {
child_kinds.insert(name.to_owned());
field_map
.entry(entry.node_type.clone())
.or_default()
.entry(field_name.clone())
.or_default()
.insert(name.to_owned());
}
}
}
}
if let Some(ref children) = entry.children {
for t in &children.types {
if t.named {
child_kinds.insert(t.node_type.clone());
nonfield_map
.entry(entry.node_type.clone())
.or_default()
.insert(t.node_type.clone());
}
}
}
if !child_kinds.is_empty() {
all_map.insert(entry.node_type.clone(), child_kinds);
}
}
(all_map, field_map, nonfield_map)
}
pub(crate) fn augment_subtypes_from_node_types(grammar: &mut Grammar) {
use std::collections::HashMap;
let mut pairs: Vec<(String, String)> = Vec::new();
for parent_kind in grammar.node_type_children.keys() {
let Some(rule) = grammar.rules.get(parent_kind) else {
continue;
};
let mut field_symbols: HashMap<String, Vec<String>> = HashMap::new();
let mut non_field_symbols: Vec<String> = Vec::new();
collect_field_symbols(rule, &mut field_symbols, &mut non_field_symbols, false);
let dispatch_target = |grammar: &Grammar, sym: &str| -> bool {
sym.starts_with('_')
|| grammar.supertypes.contains(sym)
|| grammar
.rules
.get(sym)
.is_none_or(|r| literal_strings(r).is_empty())
};
if let Some(nt_fields) = grammar.node_type_field_children.get(parent_kind) {
for (field_name, nt_child_kinds) in nt_fields {
let Some(rule_syms) = field_symbols.get(field_name) else {
continue;
};
for child_kind in nt_child_kinds {
if grammar.rules.contains_key(child_kind) {
continue;
}
for sym_name in rule_syms {
if dispatch_target(grammar, sym_name)
&& !kind_satisfies_symbol(grammar, Some(child_kind), sym_name)
{
pairs.push((child_kind.clone(), sym_name.clone()));
}
}
}
}
}
if let Some(nt_nonfield) = grammar.node_type_nonfield_children.get(parent_kind) {
for child_kind in nt_nonfield {
if grammar.rules.contains_key(child_kind) {
continue;
}
for sym_name in &non_field_symbols {
if dispatch_target(grammar, sym_name)
&& !kind_satisfies_symbol(grammar, Some(child_kind), sym_name)
{
pairs.push((child_kind.clone(), sym_name.clone()));
}
}
}
}
}
for (child_kind, sym_name) in pairs {
grammar
.subtypes
.entry(child_kind)
.or_default()
.insert(sym_name);
}
}
pub(crate) fn reconcile_field_names(grammar: &mut Grammar) {
use std::collections::HashSet;
let mut renames: Vec<(String, String, String)> = Vec::new();
for (kind, nt_fields) in &grammar.node_type_field_children {
let Some(rule) = grammar.rules.get(kind) else {
continue;
};
let mut grammar_fields: HashSet<String> = HashSet::new();
collect_grammar_field_names(rule, &mut grammar_fields);
let nt_names: HashSet<&String> = nt_fields.keys().collect();
let grammar_only: Vec<&String> = grammar_fields
.iter()
.filter(|f| !nt_names.contains(f))
.collect();
let nt_only: Vec<&String> = nt_fields
.keys()
.filter(|f| !grammar_fields.contains(*f))
.collect();
if grammar_only.len() == 1 && nt_only.len() == 1 {
renames.push((kind.clone(), grammar_only[0].clone(), nt_only[0].clone()));
}
}
for (kind, from, to) in renames {
if let Some(rule) = grammar.rules.get_mut(&kind) {
rename_field_in(rule, &from, &to);
}
}
}
fn collect_grammar_field_names(prod: &Production, out: &mut std::collections::HashSet<String>) {
match prod {
Production::Field { name, content } => {
out.insert(name.clone());
collect_grammar_field_names(content, out);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_grammar_field_names(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => collect_grammar_field_names(content, out),
_ => {}
}
}
fn rename_field_in(prod: &mut Production, from: &str, to: &str) {
match prod {
Production::Field { name, content } => {
if name == from {
to.clone_into(name);
}
rename_field_in(content, from, to);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
rename_field_in(m, from, to);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => rename_field_in(content, from, to),
_ => {}
}
}
pub(crate) fn collect_field_symbols(
prod: &Production,
field_map: &mut std::collections::HashMap<String, Vec<String>>,
non_field: &mut Vec<String>,
inside_field: bool,
) {
match prod {
Production::Symbol { name } if !inside_field => {
non_field.push(name.clone());
}
Production::Field { name, content } => {
let mut syms = Vec::new();
collect_symbols_flat(content, &mut syms);
field_map.entry(name.clone()).or_default().extend(syms);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_field_symbols(m, field_map, non_field, inside_field);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
collect_field_symbols(content, field_map, non_field, inside_field);
}
_ => {}
}
}
pub(crate) fn collect_symbols_flat(prod: &Production, out: &mut Vec<String>) {
match prod {
Production::Symbol { name } => out.push(name.clone()),
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_symbols_flat(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => collect_symbols_flat(content, out),
_ => {}
}
}
pub(crate) fn build_external_alias_map(
grammar: &Grammar,
) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
fn walk(
grammar: &Grammar,
prod: &Production,
map: &mut std::collections::HashMap<String, String>,
) {
match prod {
Production::Alias {
content,
named,
value,
} => {
if !*named && !value.is_empty() {
if let Production::Symbol { name } = content.as_ref() {
if name.starts_with('_') && !grammar.rules.contains_key(name) {
map.entry(name.clone()).or_insert_with(|| value.clone());
}
}
}
walk(grammar, content, map);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(grammar, m, map);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(grammar, content, map),
_ => {}
}
}
for rule in grammar.rules.values() {
walk(grammar, rule, &mut map);
}
map
}
pub(crate) fn build_named_alias_map(
grammar: &Grammar,
) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
fn walk(prod: &Production, map: &mut std::collections::HashMap<String, String>) {
match prod {
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
if let Production::Symbol { name } = content.as_ref() {
map.entry(value.clone()).or_insert_with(|| name.clone());
}
}
walk(content, map);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(m, map);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, map),
_ => {}
}
}
for rule in grammar.rules.values() {
walk(rule, &mut map);
}
map
}
pub(crate) fn build_named_alias_sources(
grammar: &Grammar,
) -> std::collections::HashMap<String, Vec<String>> {
let mut map: std::collections::HashMap<String, Vec<String>> = std::collections::HashMap::new();
fn walk(prod: &Production, map: &mut std::collections::HashMap<String, Vec<String>>) {
match prod {
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
if let Production::Symbol { name } = content.as_ref() {
let srcs = map.entry(value.clone()).or_default();
if !srcs.contains(name) {
srcs.push(name.clone());
}
}
}
walk(content, map);
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(m, map);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, map),
_ => {}
}
}
for rule in grammar.rules.values() {
walk(rule, &mut map);
}
map
}
pub(crate) type RoleMap =
std::collections::HashMap<String, std::collections::HashMap<String, TokenRole>>;
pub(crate) type IndentSet = std::collections::HashSet<(String, String)>;
pub(crate) fn compute_token_roles(grammar: &Grammar) -> (RoleMap, IndentSet) {
use std::collections::{HashMap, HashSet};
let mut all_roles: HashMap<String, HashMap<String, TokenRole>> = HashMap::new();
let mut indent_triggers: HashSet<(String, String)> = HashSet::new();
for (rule_name, rule) in &grammar.rules {
let mut roles: HashMap<String, TokenRole> = HashMap::new();
classify_production(rule, &mut roles, &mut indent_triggers, rule_name);
if !roles.is_empty() {
all_roles.insert(rule_name.clone(), roles);
}
}
(all_roles, indent_triggers)
}
pub(crate) fn classify_production(
prod: &Production,
roles: &mut std::collections::HashMap<String, TokenRole>,
indent_triggers: &mut std::collections::HashSet<(String, String)>,
rule_name: &str,
) {
match prod {
Production::Seq { members } => {
classify_seq(members, roles, indent_triggers, rule_name, false);
}
Production::Choice { members } => {
for m in members {
match m {
Production::Seq {
members: seq_members,
} => {
classify_seq(seq_members, roles, indent_triggers, rule_name, true);
}
_ => classify_production(m, roles, indent_triggers, rule_name),
}
}
}
Production::Repeat { content } | Production::Repeat1 { content } => {
classify_repeat_body(content, roles, indent_triggers, rule_name);
}
Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
classify_production(content, roles, indent_triggers, rule_name);
}
Production::Alias { content, .. } => {
classify_production(content, roles, indent_triggers, rule_name);
}
_ => {}
}
}
pub(crate) fn classify_seq(
members: &[Production],
roles: &mut std::collections::HashMap<String, TokenRole>,
indent_triggers: &mut std::collections::HashSet<(String, String)>,
rule_name: &str,
in_choice: bool,
) {
let string_positions: Vec<(usize, &str)> = members
.iter()
.enumerate()
.filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
.collect();
let content_count = members
.iter()
.filter(|m| unwrap_to_string(m).is_none())
.count();
if string_positions.len() >= 2 {
let (first_idx, first_val) = string_positions[0];
let (last_idx, last_val) = string_positions[string_positions.len() - 1];
let has_content_between = members[first_idx + 1..last_idx]
.iter()
.any(|m| unwrap_to_string(m).is_none());
let both_punct = !is_word_like(first_val) && !is_word_like(last_val);
let both_word = is_word_like(first_val) && is_word_like(last_val);
if has_content_between && first_val != last_val && (both_punct || both_word) {
roles.insert(first_val.to_owned(), TokenRole::BracketOpen);
roles.insert(last_val.to_owned(), TokenRole::BracketClose);
let between = &members[first_idx + 1..last_idx];
if first_val == "{" && has_repeat_recursive(between) {
indent_triggers.insert((rule_name.to_owned(), first_val.to_owned()));
}
}
}
if members.len() >= 2 {
if let Some(first) = members.first() {
let has_following_content = members[1..].iter().any(|m| unwrap_to_string(m).is_none());
if has_following_content {
for sign in leading_optional_sign(first) {
roles.entry(sign).or_insert(TokenRole::BracketOpen);
}
}
}
}
let first_content_idx = members.iter().position(|m| unwrap_to_string(m).is_none());
let last_content_idx = members.iter().rposition(|m| unwrap_to_string(m).is_none());
for (i, m) in members.iter().enumerate() {
if let Some(value) = unwrap_to_string(m) {
let value = value.to_owned();
if !roles.contains_key(&value) {
if is_word_like(&value) {
roles.insert(value.clone(), TokenRole::Keyword);
} else if !in_choice
&& first_content_idx.is_some_and(|fc| i < fc)
&& is_prefix_sigil(&value)
{
roles.insert(value.clone(), TokenRole::BracketOpen);
} else if last_content_idx.is_some_and(|lc| i > lc) {
roles.insert(value.clone(), TokenRole::BracketClose);
} else if !in_choice
&& string_positions.len() == 1
&& content_count == 2
&& value.len() == 1
{
roles.insert(value.clone(), TokenRole::Connector);
} else {
roles.insert(value.clone(), TokenRole::Operator);
}
}
}
}
for m in members {
if unwrap_to_string(m).is_none() {
classify_production(m, roles, indent_triggers, rule_name);
}
}
}
pub(crate) fn classify_repeat_body(
content: &Production,
roles: &mut std::collections::HashMap<String, TokenRole>,
indent_triggers: &mut std::collections::HashSet<(String, String)>,
rule_name: &str,
) {
match content {
Production::Seq { members } => {
if let Some(Production::String { value }) = members.first() {
roles.insert(value.clone(), TokenRole::Separator);
}
classify_seq(members, roles, indent_triggers, rule_name, false);
}
_ => classify_production(content, roles, indent_triggers, rule_name),
}
}
pub(crate) fn classify_seq_positions(
members: &[Production],
in_choice: bool,
) -> Vec<Option<TokenRole>> {
let mut roles: Vec<Option<TokenRole>> = vec![None; members.len()];
let string_positions: Vec<(usize, &str)> = members
.iter()
.enumerate()
.filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
.collect();
let content_count = members
.iter()
.filter(|m| unwrap_to_string(m).is_none())
.count();
let mut bracket_open_idx: Option<usize> = None;
let mut bracket_close_idx: Option<usize> = None;
for &(oi, ov) in &string_positions {
let Some(close_text) = matching_close_bracket(ov) else {
continue;
};
if let Some(&(ci, _)) = string_positions
.iter()
.rev()
.find(|(_, v)| *v == close_text)
{
if oi < ci
&& members[oi + 1..ci]
.iter()
.any(|m| unwrap_to_string(m).is_none())
{
roles[oi] = Some(TokenRole::BracketOpen);
roles[ci] = Some(TokenRole::BracketClose);
bracket_open_idx = Some(oi);
bracket_close_idx = Some(ci);
break;
}
}
}
if bracket_open_idx.is_none() && string_positions.len() >= 2 {
let (first_idx, first_val) = string_positions[0];
let (last_idx, last_val) = string_positions[string_positions.len() - 1];
let has_content_between = members[first_idx + 1..last_idx]
.iter()
.any(|m| unwrap_to_string(m).is_none());
let both_punct = !is_word_like(first_val) && !is_word_like(last_val);
let both_word = is_word_like(first_val) && is_word_like(last_val);
let either_immediate =
is_immediate_token(&members[first_idx]) || is_immediate_token(&members[last_idx]);
let same_text_immediate = first_val == last_val && either_immediate;
if has_content_between
&& (both_punct || both_word)
&& (first_val != last_val || same_text_immediate)
{
roles[first_idx] = Some(TokenRole::BracketOpen);
roles[last_idx] = Some(TokenRole::BracketClose);
bracket_open_idx = Some(first_idx);
bracket_close_idx = Some(last_idx);
}
}
let first_content_idx = members.iter().position(|m| unwrap_to_string(m).is_none());
let last_content_idx = members.iter().rposition(|m| unwrap_to_string(m).is_none());
for (i, m) in members.iter().enumerate() {
if roles[i].is_some() {
continue;
}
if let Some(value) = unwrap_to_string(m) {
roles[i] = Some(if is_immediate_token(m) {
TokenRole::Immediate
} else if is_word_like(value) {
TokenRole::Keyword
} else if !in_choice && first_content_idx.is_some_and(|fc| i < fc) {
if is_prefix_sigil(value) {
TokenRole::BracketOpen
} else {
TokenRole::Operator
}
} else if last_content_idx.is_some_and(|lc| i > lc) {
TokenRole::BracketClose
} else if !in_choice
&& string_positions.len() == 1
&& content_count == 2
&& value.len() == 1
{
TokenRole::Connector
} else {
TokenRole::Operator
});
}
}
let _ = (bracket_open_idx, bracket_close_idx);
roles
}
pub(crate) fn extract_line_comment_prefixes(grammar: &Grammar) -> Vec<String> {
let mut prefixes = Vec::new();
for extra_name in &grammar.extras {
if let Some(rule) = grammar.rules.get(extra_name) {
if let Some(prefix) = extract_line_comment_prefix(rule) {
prefixes.push(prefix);
}
}
}
prefixes
}
fn production_is_newline_leading(
grammar: &Grammar,
prod: &Production,
seen: &mut std::collections::HashSet<String>,
) -> bool {
match prod {
Production::String { value } | Production::Pattern { value } => {
is_newline_like_pattern(value)
}
Production::Seq { members } => members
.first()
.is_some_and(|m| production_is_newline_leading(grammar, m, seen)),
Production::Symbol { name } => {
if !seen.insert(name.clone()) {
return false;
}
grammar
.rules
.get(name)
.is_some_and(|r| production_is_newline_leading(grammar, r, seen))
}
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Reserved { content, .. } => {
production_is_newline_leading(grammar, content, seen)
}
_ => false,
}
}
pub(crate) fn classify_trailing_break_markers(grammar: &Grammar) -> (Vec<String>, bool) {
fn collect_marker_alts(
grammar: &Grammar,
prod: &Production,
lits: &mut Vec<String>,
ws: &mut bool,
seen: &mut std::collections::HashSet<String>,
) {
match prod {
Production::Choice { members } => {
for m in members {
collect_marker_alts(grammar, m, lits, ws, seen);
}
}
Production::Symbol { name } => {
if let Some(r) = grammar
.rules
.get(name)
.filter(|_| seen.insert(name.clone()))
{
collect_marker_alts(grammar, r, lits, ws, seen);
}
}
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Reserved { content, .. } => {
collect_marker_alts(grammar, content, lits, ws, seen);
}
Production::String { value } => {
let mut chars = value.chars();
if let (Some(c), None) = (chars.next(), chars.clone().next()) {
if !c.is_alphanumeric() && !c.is_whitespace() {
lits.push(value.clone());
}
}
}
Production::Pattern { value } => {
let ws_only = super::split_top_level_alternation(value)
.iter()
.all(|b| is_whitespace_only_pattern(b.trim()));
if ws_only {
*ws = true;
}
}
_ => {}
}
}
let mut lits: Vec<String> = Vec::new();
let mut ws = false;
for rule in grammar.rules.values() {
if let Production::Seq { members } = rule {
if members.len() >= 2
&& production_is_newline_leading(
grammar,
members.last().expect("len >= 2"),
&mut std::collections::HashSet::new(),
)
{
collect_marker_alts(
grammar,
&members[0],
&mut lits,
&mut ws,
&mut std::collections::HashSet::new(),
);
}
}
}
lits.sort();
lits.dedup();
(lits, ws)
}
fn extract_start_symbol(bytes: &[u8]) -> String {
let Ok(text) = std::str::from_utf8(bytes) else {
return String::new();
};
let Some(rules_at) = text.find("\"rules\"") else {
return String::new();
};
let after = &text[rules_at + "\"rules\"".len()..];
let Some(brace) = after.find('{') else {
return String::new();
};
let mut chars = after[brace + 1..].char_indices();
for (_, c) in chars.by_ref() {
if c == '"' {
break;
}
if !c.is_whitespace() {
return String::new();
}
}
let mut key = String::new();
while let Some((_, c)) = chars.next() {
match c {
'\\' => {
chars.next();
}
'"' => return key,
_ => key.push(c),
}
}
String::new()
}
fn pattern_admits_newline(value: &str) -> bool {
for branch in super::split_top_level_alternation(value) {
let b = branch.trim();
if let Some(rest) = b.strip_prefix("[^") {
if let Some(idx) = rest.find(']') {
let inner = &rest[..idx];
if !inner.contains("\\n") && !inner.contains("\\r") {
return true;
}
}
}
}
false
}
pub(crate) fn classify_top_level_text_admits_newline(grammar: &Grammar) -> bool {
let Some(start_body) = grammar.rules.get(&grammar.start_symbol) else {
return false;
};
fn collect_content_kinds(
grammar: &Grammar,
prod: &Production,
out: &mut std::collections::HashSet<String>,
seen: &mut std::collections::HashSet<String>,
) {
match prod {
Production::Seq { members } | Production::Choice { members } => {
for m in members {
collect_content_kinds(grammar, m, out, seen);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Field { content, .. }
| Production::Reserved { content, .. } => {
collect_content_kinds(grammar, content, out, seen);
}
Production::Symbol { name } => {
if name.starts_with('_') {
if seen.insert(name.clone()) {
if let Some(r) = grammar.rules.get(name) {
collect_content_kinds(grammar, r, out, seen);
}
}
} else {
out.insert(name.clone());
}
}
_ => {}
}
}
let mut kinds = std::collections::HashSet::new();
collect_content_kinds(
grammar,
start_body,
&mut kinds,
&mut std::collections::HashSet::new(),
);
fn body_admits_newline_text(prod: &Production) -> bool {
match prod {
Production::Pattern { value } => pattern_admits_newline(value),
Production::Repeat1 { content } | Production::Repeat { content } => {
body_admits_newline_text(content)
}
Production::Choice { members } => members.iter().any(body_admits_newline_text),
Production::Token { content } | Production::ImmediateToken { content } => {
body_admits_newline_text(content)
}
_ => false,
}
}
kinds
.iter()
.any(|k| grammar.rules.get(k).is_some_and(body_admits_newline_text))
}
pub(crate) fn classify_external_layout_tokens(grammar: &mut Grammar) {
let all_hidden_refs = collect_all_symbol_refs(&grammar.rules);
for name in &all_hidden_refs {
if !name.starts_with('_') || grammar.rules.contains_key(name) {
continue;
}
if grammar.external_alias_map.contains_key(name) {
continue;
}
if name == "_indent" || name.ends_with("_indent") {
grammar.external_indent_opens.insert(name.clone());
} else if name == "_dedent" || name.ends_with("_dedent") {
grammar.external_indent_closes.insert(name.clone());
} else if name.contains("line_ending")
|| name.contains("newline")
|| name.ends_with("_or_eof")
|| name.contains("automatic_separator")
|| name.contains("automatic_semicolon")
|| name.contains("layout_semicolon")
{
grammar.external_newlines.insert(name.clone());
} else if name.contains("semicolon") {
grammar.external_semicolons.insert(name.clone());
}
}
}
pub(crate) fn classify_external_bracket_delimiters(grammar: &mut Grammar) {
let is_external = |name: &str| !grammar.rules.contains_key(name);
let mut opens = std::collections::HashSet::new();
let mut closes = std::collections::HashSet::new();
let mut content_kinds = std::collections::HashSet::new();
for rule in grammar.rules.values() {
let Production::Seq { members } = unwrap_to_seq(rule) else {
continue;
};
if members.len() < 3 {
continue;
}
let (Some(first), Some(last)) = (members.first(), members.last()) else {
continue;
};
let (Some(open), Some(close)) = (
delimiter_external_name(first),
delimiter_external_name(last),
) else {
continue;
};
if open == close || !is_external(open) || !is_external(close) {
continue;
}
opens.insert(open.to_owned());
closes.insert(close.to_owned());
for member in &members[1..members.len() - 1] {
collect_visible_external_content(grammar, member, &mut content_kinds, &mut Vec::new());
}
}
grammar.external_bracket_opens = opens;
grammar.external_bracket_closes = closes;
grammar.external_content_kinds = content_kinds;
}
pub(crate) fn classify_external_close_text(grammar: &mut Grammar) {
let is_external = |name: &str| !grammar.rules.contains_key(name);
let mut close_text = std::collections::HashMap::new();
for rule in grammar.rules.values() {
let Production::Seq { members } = unwrap_to_seq(rule) else {
continue;
};
if members.len() < 3 {
continue;
}
let (Some(first), Some(last)) = (members.first(), members.last()) else {
continue;
};
let Some(open) = string_literal_value(first) else {
continue;
};
if !open.starts_with(['"', '\'', '`']) {
continue;
}
let Some(close) = delimiter_external_name(last) else {
continue;
};
if !is_external(close)
|| grammar.external_indent_closes.contains(close)
|| grammar.external_indent_opens.contains(close)
|| grammar.external_newlines.contains(close)
|| close.contains("indent")
|| close.contains("dedent")
|| close.contains("newline")
|| close.contains("line_ending")
|| close.ends_with("_or_eof")
{
continue;
}
close_text.insert(close.to_owned(), open.to_owned());
}
grammar.external_close_text = close_text;
}
fn string_literal_value(prod: &Production) -> Option<&str> {
match prod {
Production::String { value } => Some(value.as_str()),
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => string_literal_value(content),
_ => None,
}
}
fn delimiter_external_name(prod: &Production) -> Option<&str> {
match prod {
Production::Symbol { name } => Some(name.as_str()),
Production::Alias {
content,
named: false,
..
} => external_symbol_name(content),
_ => None,
}
}
fn collect_visible_external_content<'g>(
grammar: &'g Grammar,
prod: &'g Production,
out: &mut std::collections::HashSet<String>,
visiting: &mut Vec<&'g str>,
) {
match prod {
Production::Symbol { name } => {
if !grammar.rules.contains_key(name) {
if !name.starts_with('_') {
out.insert(name.clone());
}
} else if name.starts_with('_') && !visiting.contains(&name.as_str()) {
visiting.push(name.as_str());
if let Some(rule) = grammar.rules.get(name) {
collect_visible_external_content(grammar, rule, out, visiting);
}
visiting.pop();
}
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_visible_external_content(grammar, m, out, visiting);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. }
| Production::Field { content, .. }
| Production::Alias { content, .. } => {
collect_visible_external_content(grammar, content, out, visiting);
}
Production::String { .. } | Production::Pattern { .. } | Production::Blank => {}
}
}
fn seq_alternatives(rule: &Production) -> Vec<&Production> {
match unwrap_to_seq(rule) {
Production::Choice { members } => members.iter().map(unwrap_to_seq).collect(),
other => vec![other],
}
}
fn member_is_unbounded_body(
prod: &Production,
rules: &std::collections::BTreeMap<String, Production>,
) -> bool {
match prod {
Production::Repeat { .. } | Production::Repeat1 { .. } => true,
Production::Symbol { name } => rules
.get(name)
.is_some_and(|r| matches!(r, Production::Repeat { .. } | Production::Repeat1 { .. })),
Production::Choice { members } | Production::Seq { members } => {
members.iter().any(|m| member_is_unbounded_body(m, rules))
}
Production::Optional { content } | Production::Field { content, .. } => {
member_is_unbounded_body(content, rules)
}
_ => false,
}
}
pub(crate) fn classify_string_content_kinds(grammar: &mut Grammar) {
let mut accum = StringContentAccum::new();
for rule in grammar.rules.values() {
for seq in seq_alternatives(rule) {
let Production::Seq { members } = seq else {
continue;
};
if members.len() < 3 {
continue;
}
let Some(first @ Production::String { value: open }) = members.first() else {
continue;
};
if !is_quote_delimiter(first) {
continue;
}
let Some(close_idx) = members
.iter()
.rposition(|m| unwrap_to_string(m) == Some(open.as_str()))
else {
continue;
};
if close_idx == 0 {
continue;
}
let mut has_repeat_body = false;
for member in &members[1..close_idx] {
if member_is_unbounded_body(member, &grammar.rules) {
has_repeat_body = true;
}
collect_string_content_kinds(member, &mut accum);
}
if has_repeat_body {
accum.commit();
} else {
accum.clear_in_rule_guard();
}
}
}
grammar.string_content_kinds = accum.into_set();
}
struct StringContentAccum {
confirmed: std::collections::HashSet<String>,
pending: std::collections::HashSet<String>,
}
impl StringContentAccum {
fn new() -> Self {
Self {
confirmed: std::collections::HashSet::new(),
pending: std::collections::HashSet::new(),
}
}
fn insert(&mut self, kind: String) {
self.pending.insert(kind);
}
fn commit(&mut self) {
for k in self.pending.drain() {
self.confirmed.insert(k);
}
}
fn clear_in_rule_guard(&mut self) {
self.pending.clear();
}
fn into_set(mut self) -> std::collections::HashSet<String> {
self.commit();
self.confirmed
}
}
fn collect_string_content_kinds(prod: &Production, out: &mut StringContentAccum) {
match prod {
Production::Alias {
value, named: true, ..
} => out.insert(value.clone()),
Production::Symbol { name } if !name.starts_with('_') => out.insert(name.clone()),
Production::Choice { members } | Production::Seq { members } => {
for m in members {
collect_string_content_kinds(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. }
| Production::Field { content, .. } => collect_string_content_kinds(content, out),
_ => {}
}
}
pub(crate) fn classify_synthetic_indent_rules(grammar: &mut Grammar) {
if grammar.external_indent_closes.is_empty() {
return;
}
let mut rules = std::collections::HashSet::new();
for (name, rule) in &grammar.rules {
let symbols = referenced_symbols(rule);
let references_close = symbols
.iter()
.any(|s| grammar.external_indent_closes.contains(*s));
let references_open = symbols
.iter()
.any(|s| grammar.external_indent_opens.contains(*s));
if references_close && !references_open {
rules.insert(name.clone());
}
}
grammar.synthetic_indent_rules = rules;
}
pub(crate) fn classify_leading_space_terminals(
grammar: &Grammar,
) -> std::collections::HashSet<String> {
let mut out = std::collections::HashSet::new();
for (name, rule) in &grammar.rules {
if let Some(p) = terminal_pattern_of(rule) {
if pattern_absorbs_leading_space(p) {
out.insert(name.clone());
}
}
}
fn walk(prod: &Production, out: &mut std::collections::HashSet<String>) {
match prod {
Production::Alias {
content,
named: true,
value,
} => {
if let Some(p) = terminal_pattern_of(content) {
if pattern_absorbs_leading_space(p) {
out.insert(value.clone());
}
}
walk(content, out);
}
Production::Alias { content, .. }
| Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, out),
Production::Seq { members } | Production::Choice { members } => {
for m in members {
walk(m, out);
}
}
_ => {}
}
}
for rule in grammar.rules.values() {
walk(rule, &mut out);
}
out
}
pub(crate) fn classify_immediate_token_alias_kinds(
grammar: &Grammar,
) -> std::collections::HashSet<String> {
let mut out = std::collections::HashSet::new();
fn collect_aliases(prod: &Production, out: &mut std::collections::HashSet<String>) {
match prod {
Production::Alias {
content,
named: true,
value,
} => {
if is_immediate_token(content) && terminal_pattern_of(content).is_some() {
out.insert(value.clone());
}
collect_aliases(content, out);
}
Production::Alias { content, .. }
| Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => collect_aliases(content, out),
Production::Seq { members } | Production::Choice { members } => {
for m in members {
collect_aliases(m, out);
}
}
_ => {}
}
}
fn walk(prod: &Production, out: &mut std::collections::HashSet<String>) {
match prod {
Production::Seq { members } => {
if members.len() >= 2
&& is_quote_delimiter(&members[0])
&& is_quote_delimiter(&members[members.len() - 1])
{
for m in &members[1..members.len() - 1] {
collect_aliases(m, out);
}
}
for m in members {
walk(m, out);
}
}
Production::Choice { members } => {
for m in members {
walk(m, out);
}
}
Production::Alias { content, .. }
| Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, out),
_ => {}
}
}
for rule in grammar.rules.values() {
walk(rule, &mut out);
}
out
}
pub(crate) fn classify_line_rest_kinds(grammar: &Grammar) -> std::collections::HashSet<String> {
let mut out = std::collections::HashSet::new();
for (name, rule) in &grammar.rules {
if let Some(p) = terminal_pattern_of(rule) {
if is_rest_of_line_pattern(p) {
out.insert(name.clone());
}
}
}
fn walk(prod: &Production, out: &mut std::collections::HashSet<String>) {
match prod {
Production::Alias {
content,
named: true,
value,
} => {
if let Some(p) = terminal_pattern_of(content) {
if is_rest_of_line_pattern(p) {
out.insert(value.clone());
}
}
walk(content, out);
}
Production::Alias { content, .. }
| Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, out),
Production::Seq { members } | Production::Choice { members } => {
for m in members {
walk(m, out);
}
}
_ => {}
}
}
for rule in grammar.rules.values() {
walk(rule, &mut out);
}
out
}