#![allow(
clippy::module_name_repetitions,
clippy::too_many_lines,
clippy::too_many_arguments,
clippy::map_unwrap_or,
clippy::option_if_let_else,
clippy::elidable_lifetime_names,
clippy::items_after_statements,
clippy::needless_pass_by_value,
clippy::single_match_else,
clippy::manual_let_else,
clippy::match_same_arms,
clippy::missing_const_for_fn,
clippy::single_char_pattern,
clippy::naive_bytecount,
clippy::expect_used,
clippy::redundant_pub_crate,
clippy::used_underscore_binding,
clippy::redundant_field_names,
clippy::struct_field_names,
clippy::redundant_else,
clippy::similar_names
)]
use super::{BTreeMap, Grammar, Production, TokenRole};
#[allow(clippy::branches_sharing_code)]
pub(crate) fn seq_bracket_triggers_indent(
members: &[Production],
open_idx: usize,
_grammar: &Grammar,
) -> bool {
let string_positions: Vec<(usize, &str)> = members
.iter()
.enumerate()
.filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
.collect();
if string_positions.len() < 2 {
return false;
}
let open_val = string_positions.iter().find(|(i, _)| *i == open_idx);
let close_val = string_positions.last();
if let (Some((_, open_text)), Some((close_idx, close_text))) = (open_val, close_val) {
if open_idx >= *close_idx {
return false;
}
if is_word_like(open_text) && is_word_like(close_text) {
return true;
}
let between = &members[open_idx + 1..*close_idx];
if *open_text == "{" && has_repeat_recursive(between) {
return true;
}
if *open_text == "{" {
for m in between {
if let Production::Choice { members: alts } = m {
let has_blank = alts.iter().any(|a| matches!(a, Production::Blank));
if has_blank {
for alt in alts {
if let Production::Symbol { name } = alt {
if let Some(rule) = _grammar.rules.get(name) {
if has_repeat_in(rule) {
return true;
}
}
}
}
}
}
}
}
false
} else {
false
}
}
pub(crate) fn seq_open_bracket_index(members: &[Production]) -> Option<usize> {
let string_positions: Vec<(usize, &str)> = members
.iter()
.enumerate()
.filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
.collect();
let first_content_idx = members.iter().position(|m| unwrap_to_string(m).is_none())?;
for &(oi, ov) in &string_positions {
if oi >= first_content_idx {
break;
}
let Some(close_text) = matching_close_bracket(ov) else {
continue;
};
if is_word_like(ov) {
continue;
}
if string_positions
.iter()
.rev()
.find(|(ci, _)| *ci > oi)
.is_some_and(|(_, v)| *v == close_text)
{
return Some(oi);
}
}
None
}
pub(crate) fn member_has_leading_bracket(prod: &Production, grammar: &Grammar) -> bool {
match prod {
Production::Symbol { name } => grammar
.rules
.get(name)
.is_some_and(|rule| first_string_of(rule).is_some_and(|s| !is_word_like(s))),
Production::Seq { .. } => first_string_of(prod).is_some_and(|s| !is_word_like(s)),
Production::Field { content, .. } => member_has_leading_bracket(content, grammar),
Production::Choice { members } => {
let non_blank: Vec<_> = members
.iter()
.filter(|m| !matches!(m, Production::Blank))
.collect();
!non_blank.is_empty()
&& non_blank
.iter()
.all(|m| member_has_leading_bracket(m, grammar))
}
Production::Alias { content, .. } => {
if let Production::Symbol { name } = content.as_ref() {
grammar
.rules
.get(name)
.is_some_and(|rule| first_string_of(rule).is_some_and(|s| !is_word_like(s)))
} else {
false
}
}
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Optional { content } => member_has_leading_bracket(content, grammar),
Production::Repeat { .. } | Production::Repeat1 { .. } => false,
_ => false,
}
}
pub(crate) fn first_string_of(prod: &Production) -> Option<&str> {
match prod {
Production::String { value } => Some(value.as_str()),
Production::Seq { members } => members.first().and_then(first_string_of),
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Field { content, .. } => first_string_of(content),
_ => None,
}
}
pub(crate) fn has_repeat_recursive(members: &[Production]) -> bool {
members.iter().any(has_repeat_in)
}
pub(crate) fn has_repeat_in(prod: &Production) -> bool {
match prod {
Production::Repeat { .. } | Production::Repeat1 { .. } => true,
Production::Choice { members } | Production::Seq { members } => {
members.iter().any(has_repeat_in)
}
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Reserved { content, .. }
| Production::Alias { content, .. } => has_repeat_in(content),
_ => false,
}
}
pub(crate) fn is_unary_sign(s: &str) -> bool {
matches!(s, "-" | "+")
}
pub(crate) fn leading_optional_sign(prod: &Production) -> Vec<String> {
match prod {
Production::Choice { members }
if members.iter().any(|m| matches!(m, Production::Blank)) =>
{
members
.iter()
.filter_map(unwrap_to_string)
.filter(|s| is_unary_sign(s))
.map(str::to_owned)
.collect()
}
Production::Optional { content } => unwrap_to_string(content)
.filter(|s| is_unary_sign(s))
.map(|s| vec![s.to_owned()])
.unwrap_or_default(),
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Field { content, .. } => leading_optional_sign(content),
_ => Vec::new(),
}
}
pub(crate) fn matching_close_bracket(open: &str) -> Option<&'static str> {
match open {
"(" => Some(")"),
"[" => Some("]"),
"{" => Some("}"),
_ => None,
}
}
pub(crate) fn is_word_like(s: &str) -> bool {
!s.is_empty()
&& s.chars().all(|c| c.is_alphanumeric() || c == '_')
&& s.starts_with(|c: char| c.is_alphabetic() || c == '_')
}
pub(crate) fn is_prefix_sigil(s: &str) -> bool {
if s.len() == 1 {
let c = s.as_bytes()[0];
!matches!(
c,
b'=' | b'+'
| b'-'
| b'*'
| b'/'
| b'<'
| b'>'
| b'!'
| b'?'
| b'|'
| b'&'
| b'^'
| b'%'
| b'~'
)
} else {
true
}
}
pub(crate) fn is_connector_punctuation(s: &str) -> bool {
matches!(s, "." | "::" | "->")
}
pub(crate) fn is_immediate_token(prod: &Production) -> bool {
match prod {
Production::ImmediateToken { .. } => true,
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::Field { content, .. }
| Production::Reserved { content, .. } => is_immediate_token(content),
_ => false,
}
}
pub(crate) fn reduces_to_immediate_token(prod: &Production) -> bool {
match prod {
Production::ImmediateToken { .. } => true,
Production::Alias { content, .. }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::Field { content, .. }
| Production::Reserved { content, .. } => reduces_to_immediate_token(content),
Production::Choice { members } => {
let mut saw_immediate = false;
for m in members {
match m {
Production::Blank => {}
_ if reduces_to_immediate_token(m) => saw_immediate = true,
_ => return false,
}
}
saw_immediate
}
_ => false,
}
}
pub(crate) fn unwrap_to_string(prod: &Production) -> Option<&str> {
match prod {
Production::String { value } => Some(value.as_str()),
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Field { content, .. }
| Production::Reserved { content, .. } => unwrap_to_string(content),
_ => None,
}
}
pub(crate) fn unwrap_prec(prod: &Production) -> &Production {
match prod {
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. } => unwrap_prec(content),
other => other,
}
}
pub(crate) fn left_recursive_alts<'a>(
members: &'a [Production],
rule_name: &str,
) -> Option<Vec<&'a [Production]>> {
let seqs: Vec<&'a [Production]> = members
.iter()
.filter_map(|alt| match unwrap_prec(alt) {
Production::Seq { members: seq }
if matches!(seq.first(), Some(Production::Symbol { name }) if name == rule_name) =>
{
Some(seq.as_slice())
}
_ => None,
})
.collect();
(!seqs.is_empty()).then_some(seqs)
}
pub(crate) fn is_quote_delimiter(prod: &Production) -> bool {
fn ends_in_quote(s: &str) -> bool {
matches!(s.chars().last(), Some('\'' | '"' | '`'))
}
match prod {
Production::Choice { members } => {
!members.is_empty()
&& members
.iter()
.all(|m| unwrap_to_string(m).is_some_and(ends_in_quote))
}
other => unwrap_to_string(other).is_some_and(ends_in_quote),
}
}
pub(crate) fn extract_line_comment_prefix(prod: &Production) -> Option<String> {
match prod {
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => extract_line_comment_prefix(content),
Production::Seq { members } if members.len() >= 2 => {
let prefix = match &members[0] {
Production::String { value } => Some(value.clone()),
Production::Pattern { value } => fixed_literal_pattern(value),
_ => None,
};
if let Some(prefix) = prefix {
if members[1..].iter().any(seq_member_is_line_rest) {
return Some(prefix);
}
}
None
}
Production::Choice { members } => members.iter().find_map(extract_line_comment_prefix),
_ => None,
}
}
fn fixed_literal_pattern(value: &str) -> Option<String> {
if value.is_empty() {
return None;
}
let has_meta = value.bytes().any(|b| {
matches!(
b,
b'.' | b'*'
| b'+'
| b'?'
| b'['
| b']'
| b'('
| b')'
| b'|'
| b'{'
| b'}'
| b'^'
| b'$'
| b'\\'
)
});
if has_meta {
None
} else {
Some(value.to_string())
}
}
fn seq_member_is_line_rest(prod: &Production) -> bool {
match prod {
Production::Pattern { value } => {
value.contains(".*") || pattern_has_newline_excluding_class(value)
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Optional { content }
| Production::Reserved { content, .. } => seq_member_is_line_rest(content),
Production::Choice { members } | Production::Seq { members } => {
members.iter().any(seq_member_is_line_rest)
}
_ => false,
}
}
pub(crate) fn pattern_has_newline_excluding_class(value: &str) -> bool {
let bytes = value.as_bytes();
let mut i = 0;
while i + 1 < bytes.len() {
if bytes[i] == b'[' && bytes[i + 1] == b'^' && (i == 0 || bytes[i - 1] != b'\\') {
let mut j = i + 2;
if j < bytes.len() && bytes[j] == b']' {
j += 1;
}
while j < bytes.len() {
if bytes[j] == b'\\' {
j += 2;
continue;
}
if bytes[j] == b']' {
break;
}
j += 1;
}
let class_end = j.min(bytes.len());
let body = &value[i..class_end];
if body.contains("\\n")
|| body.contains("\\r")
|| body.contains("\\x0a")
|| body.contains("\\x0A")
{
return true;
}
i = class_end + 1;
} else {
i += 1;
}
}
false
}
pub(crate) fn terminal_pattern_of(prod: &Production) -> Option<&str> {
match prod {
Production::Pattern { value } => Some(value),
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => terminal_pattern_of(content),
_ => None,
}
}
pub(crate) fn is_rest_of_line_pattern(value: &str) -> bool {
let bytes = value.as_bytes();
let mut tail_start = None;
let mut i = 0;
while i + 1 < bytes.len() {
if bytes[i] == b'.' && (bytes[i + 1] == b'*' || bytes[i + 1] == b'+') {
let mut bs = 0;
let mut j = i;
while j > 0 && bytes[j - 1] == b'\\' {
bs += 1;
j -= 1;
}
if bs % 2 == 0 {
tail_start = Some(i + 2);
}
}
i += 1;
}
if let Some(start) = tail_start {
return rest_after_unbounded_tail_is_inert(&value[start..]);
}
let mut neg_tail_start = None;
let mut i = 0;
while i + 1 < bytes.len() {
let class_open = bytes[i] == b'[' && bytes[i + 1] == b'^' && {
let mut bs = 0;
let mut j = i;
while j > 0 && bytes[j - 1] == b'\\' {
bs += 1;
j -= 1;
}
bs % 2 == 0
};
if class_open {
let mut j = i + 2;
if j < bytes.len() && bytes[j] == b']' {
j += 1; }
while j < bytes.len() {
if bytes[j] == b'\\' {
j += 2;
continue;
}
if bytes[j] == b']' {
break;
}
j += 1;
}
if j < bytes.len() && bytes[j] == b']' {
let inner = &value[i + 2..j];
let quant = j + 1;
let unbounded =
quant < bytes.len() && (bytes[quant] == b'*' || bytes[quant] == b'+');
if unbounded && negated_class_excludes_only_newlines(inner) {
neg_tail_start = Some(quant + 1);
}
i = j + 1;
continue;
}
}
i += 1;
}
match neg_tail_start {
Some(start) => rest_after_unbounded_tail_is_inert(&value[start..]),
None => false,
}
}
fn negated_class_excludes_only_newlines(inner: &str) -> bool {
let bytes = inner.as_bytes();
let mut i = 0;
let mut saw_newline = false;
while i < bytes.len() {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
match bytes[i + 1] {
b'n' | b'r' => {
saw_newline = true;
i += 2;
}
b'x' => {
let hex = inner.get(i + 2..i + 4).unwrap_or("");
if matches!(hex, "0a" | "0A" | "0d" | "0D") {
saw_newline = true;
i += 4;
} else {
return false;
}
}
_ => return false,
}
} else {
match bytes[i] {
b'\n' | b'\r' => {
saw_newline = true;
i += 1;
}
_ => return false,
}
}
}
saw_newline
}
fn rest_after_unbounded_tail_is_inert(rest: &str) -> bool {
let rb = rest.as_bytes();
let mut k = 0;
while k < rb.len() {
match rb[k] {
b')' | b'?' | b'*' | b'+' | b'$' => k += 1,
b'\\' if k + 1 < rb.len() => match rb[k + 1] {
b'n' | b'r' | b'f' | b't' | b'v' | b'z' | b'Z' => k += 2,
_ => return false,
},
_ => return false,
}
}
true
}
pub(crate) fn leaf_terminal_role(grammar: &Grammar, kind: &str) -> TokenRole {
if grammar.external_bracket_opens.contains(kind) {
TokenRole::BracketOpen
} else if grammar.external_bracket_closes.contains(kind) {
TokenRole::BracketClose
} else {
TokenRole::Terminal
}
}
pub(crate) fn alias_content_is_terminal_pattern(prod: &Production) -> bool {
match prod {
Production::Pattern { .. } => true,
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => alias_content_is_terminal_pattern(content),
_ => false,
}
}
pub(crate) fn unwrap_to_seq(prod: &Production) -> &Production {
match prod {
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::Reserved { content, .. } => unwrap_to_seq(content),
other => other,
}
}
pub(crate) fn external_symbol_name(prod: &Production) -> Option<&str> {
match prod {
Production::Symbol { name } => Some(name.as_str()),
_ => None,
}
}
pub(crate) fn collect_all_symbol_refs(
rules: &BTreeMap<String, Production>,
) -> std::collections::HashSet<String> {
let mut refs = std::collections::HashSet::new();
fn walk(prod: &Production, refs: &mut std::collections::HashSet<String>) {
match prod {
Production::Symbol { name } => {
refs.insert(name.clone());
}
Production::Seq { members } | Production::Choice { members } => {
for m in members {
walk(m, refs);
}
}
Production::Alias { content, .. }
| Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, refs),
_ => {}
}
}
for rule in rules.values() {
walk(rule, &mut refs);
}
refs
}
pub(crate) fn literal_strings(production: &Production) -> Vec<String> {
let mut out = Vec::new();
fn walk(p: &Production, out: &mut Vec<String>) {
match p {
Production::String { value } if !value.is_empty() => {
out.push(value.clone());
}
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, out),
_ => {}
}
}
walk(production, &mut out);
out
}
pub(crate) fn referenced_symbols(production: &Production) -> Vec<&str> {
let mut out = Vec::new();
fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
match p {
Production::Symbol { name } => out.push(name.as_str()),
Production::Choice { members } | Production::Seq { members } => {
for m in members {
walk(m, out);
}
}
Production::Alias {
content,
named,
value,
} => {
if *named && !value.is_empty() {
out.push(value.as_str());
}
walk(content, out);
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => walk(content, out),
_ => {}
}
}
walk(production, &mut out);
out
}
#[cfg(test)]
pub(crate) fn first_symbol(production: &Production) -> Option<&str> {
match production {
Production::Symbol { name } => Some(name),
Production::Seq { members } => members.iter().find_map(first_symbol),
Production::Choice { members } => members.iter().find_map(first_symbol),
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => first_symbol(content),
_ => None,
}
}
pub(crate) fn prec_value(prod: &Production) -> i64 {
match prod {
Production::Prec { value, .. }
| Production::PrecLeft { value, .. }
| Production::PrecRight { value, .. }
| Production::PrecDynamic { value, .. } => value.as_i64().unwrap_or(0),
_ => 0,
}
}
pub(crate) fn mandatory_field_names(production: &Production) -> Vec<&str> {
let mut out = Vec::new();
collect_mandatory_fields(production, true, &mut out);
out
}
fn collect_mandatory_fields<'p>(
production: &'p Production,
mandatory: bool,
out: &mut Vec<&'p str>,
) {
match production {
Production::Field { name, content } => {
if mandatory {
out.push(name.as_str());
}
collect_mandatory_fields(content, false, out);
}
Production::Seq { members } => {
for m in members {
collect_mandatory_fields(m, mandatory, out);
}
}
Production::Choice { members } => {
let escapable = members.iter().any(|m| matches!(m, Production::Blank));
let inner = mandatory && !escapable;
for m in members {
collect_mandatory_fields(m, inner, out);
}
}
Production::Optional { content } | Production::Repeat { content } => {
collect_mandatory_fields(content, false, out);
}
Production::Repeat1 { content }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Alias { content, .. }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
collect_mandatory_fields(content, mandatory, out);
}
_ => {}
}
}
pub(crate) fn collect_field_names<'p>(
production: &'p Production,
out: &mut std::collections::HashSet<&'p str>,
) {
match production {
Production::Field { name, content } => {
out.insert(name.as_str());
collect_field_names(content, out);
}
Production::Seq { members } | Production::Choice { members } => {
for m in members {
collect_field_names(m, out);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => collect_field_names(content, out),
_ => {}
}
}
pub(crate) fn collect_inner_field_names_expanded<'p>(
production: &'p Production,
grammar: &'p crate::emit_pretty::grammar::Grammar,
out: &mut std::collections::HashSet<&'p str>,
seen: &mut std::collections::HashSet<&'p str>,
) {
match production {
Production::Field { name, content } => {
out.insert(name.as_str());
collect_inner_field_names_expanded(content, grammar, out, seen);
}
Production::Symbol { name } if name.starts_with('_') && seen.insert(name.as_str()) => {
if let Some(rule) = grammar.rules.get(name) {
collect_inner_field_names_expanded(rule, grammar, out, seen);
}
}
Production::Seq { members } | Production::Choice { members } => {
for m in members {
collect_inner_field_names_expanded(m, grammar, out, seen);
}
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => {
collect_inner_field_names_expanded(content, grammar, out, seen);
}
_ => {}
}
}
pub(crate) fn repeat_body_is_whole_vertex_item(
content: &Production,
grammar: &crate::emit_pretty::grammar::Grammar,
) -> bool {
fn check(
p: &Production,
grammar: &crate::emit_pretty::grammar::Grammar,
seen: &mut std::collections::HashSet<String>,
) -> bool {
match p {
Production::Field { .. } => true,
Production::Symbol { name } => {
if !name.starts_with('_') {
return grammar.rules.contains_key(name);
}
if !seen.insert(name.clone()) {
return false;
}
grammar
.rules
.get(name)
.is_some_and(|rule| check(rule, grammar, seen))
}
Production::Choice { members } => {
!members.is_empty() && members.iter().all(|m| check(m, grammar, seen))
}
Production::Token { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => check(content, grammar, seen),
_ => false,
}
}
let mut seen = std::collections::HashSet::new();
check(content, grammar, &mut seen)
}
pub(crate) fn repeat_has_bracket_keyed_member(content: &Production, grammar: &Grammar) -> bool {
fn is_bracket_keyed(seq_members: &[Production]) -> bool {
if seq_members
.first()
.and_then(first_string_of)
.is_none_or(|s| s != "[")
{
return false;
}
let rest: Vec<&Production> = seq_members.iter().collect();
let close = rest
.iter()
.position(|m| first_string_of(m).is_some_and(|s| s == "]"));
let Some(close_idx) = close else {
return false;
};
rest[close_idx + 1..]
.iter()
.any(|m| has_leading_string(m, "="))
}
fn has_leading_string(p: &Production, target: &str) -> bool {
match p {
Production::Choice { members } => members.iter().any(|m| has_leading_string(m, target)),
_ => first_string_of(p).is_some_and(|s| s == target),
}
}
fn check(
p: &Production,
grammar: &Grammar,
seen: &mut std::collections::HashSet<String>,
) -> bool {
match p {
Production::Symbol { name } => {
if !seen.insert(name.clone()) {
return false;
}
grammar
.rules
.get(name)
.is_some_and(|rule| check(rule, grammar, seen))
}
Production::Choice { members } => members.iter().any(|m| check(m, grammar, seen)),
Production::Seq { members } => is_bracket_keyed(members),
Production::Token { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => check(content, grammar, seen),
_ => false,
}
}
let mut seen = std::collections::HashSet::new();
check(content, grammar, &mut seen)
}
pub(crate) fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
match production {
Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
Production::Seq { members } | Production::Choice { members } => {
members.iter().any(|m| has_field_in(m, edge_kinds))
}
Production::Repeat { content }
| Production::Repeat1 { content }
| Production::Optional { content }
| Production::Alias { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => has_field_in(content, edge_kinds),
_ => false,
}
}
pub(crate) fn literal_choice_set(p: &Production) -> Option<Vec<&str>> {
fn unwrap(p: &Production) -> &Production {
match p {
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Reserved { content, .. } => unwrap(content),
_ => p,
}
}
let p = unwrap(p);
let Production::Alias { content, .. } = p else {
return None;
};
let inner = unwrap(content);
match inner {
Production::String { value } => Some(vec![value.as_str()]),
Production::Choice { members } => {
let mut out = Vec::new();
for m in members {
match unwrap(m) {
Production::String { value } => out.push(value.as_str()),
_ => return None,
}
}
Some(out)
}
_ => None,
}
}
pub(crate) fn is_newline_alt(grammar: &Grammar, alt: &Production) -> bool {
match alt {
Production::Pattern { value } => is_newline_like_pattern(value),
Production::Symbol { name } => {
grammar.external_newlines.contains(name)
|| (name.starts_with('_')
&& grammar
.rules
.get(name)
.is_some_and(contains_newline_pattern))
}
Production::Token { content }
| Production::ImmediateToken { content }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Reserved { content, .. } => is_newline_alt(grammar, content),
_ => false,
}
}
pub(crate) fn contains_newline_pattern(prod: &Production) -> bool {
match prod {
Production::Pattern { value } => is_newline_like_pattern(value),
Production::Choice { members } | Production::Seq { members } => {
members.iter().any(contains_newline_pattern)
}
Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Optional { content }
| Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Reserved { content, .. } => contains_newline_pattern(content),
_ => false,
}
}
pub(crate) fn is_blank_line_rule(prod: &Production) -> bool {
match prod {
Production::Pattern { value } => is_newline_like_pattern(value),
Production::Field { content, .. }
| Production::Alias { content, .. }
| Production::Prec { content, .. }
| Production::PrecLeft { content, .. }
| Production::PrecRight { content, .. }
| Production::PrecDynamic { content, .. }
| Production::Token { content }
| Production::ImmediateToken { content }
| Production::Reserved { content, .. } => is_blank_line_rule(content),
_ => false,
}
}
pub(crate) fn is_newline_like_pattern(pattern: &str) -> bool {
if pattern.is_empty() {
return false;
}
split_top_level_alternation(pattern)
.iter()
.all(|branch| is_newline_branch(branch))
}
pub(crate) fn is_newline_branch(branch: &str) -> bool {
if branch.is_empty() {
return false;
}
let mut chars = branch.chars();
let mut saw_newline_atom = false;
while let Some(c) = chars.next() {
match c {
'\\' => match chars.next() {
Some('n' | 'r') => saw_newline_atom = true,
_ => return false,
},
'\n' | '\r' => saw_newline_atom = true,
'[' => {
let mut class_has_atom = false;
let mut esc = false;
let mut closed = false;
for cc in chars.by_ref() {
if esc {
match cc {
'n' | 'r' => class_has_atom = true,
_ => return false,
}
esc = false;
continue;
}
match cc {
']' => {
closed = true;
break;
}
'\\' => esc = true,
'\n' | '\r' => class_has_atom = true,
_ => return false,
}
}
if !closed || !class_has_atom {
return false;
}
saw_newline_atom = true;
}
'?' | '*' | '+' => {} _ => return false,
}
}
saw_newline_atom
}
pub(crate) fn split_top_level_alternation(pattern: &str) -> Vec<&str> {
let mut parts = Vec::new();
let mut start = 0;
let mut in_class = false;
let mut escaped = false;
for (i, c) in pattern.char_indices() {
if escaped {
escaped = false;
continue;
}
match c {
'\\' => escaped = true,
'[' => in_class = true,
']' => in_class = false,
'|' if !in_class => {
parts.push(&pattern[start..i]);
start = i + 1;
}
_ => {}
}
}
parts.push(&pattern[start..]);
parts
}
pub(crate) fn is_whitespace_only_pattern(pattern: &str) -> bool {
if pattern.is_empty() {
return false;
}
let trimmed = pattern.trim_end_matches(['?', '*', '+']);
if trimmed.is_empty() {
return false;
}
if matches!(trimmed, "\\s" | " " | "\\t" | "\\p{Zs}") {
return true;
}
if let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
let mut chars = inner.chars();
let mut saw_atom = false;
while let Some(c) = chars.next() {
match c {
'\\' => match chars.next() {
Some('s' | 't' | 'r' | 'n') => saw_atom = true,
_ => return false,
},
' ' | '\t' => saw_atom = true,
_ => return false,
}
}
return saw_atom;
}
false
}
pub(crate) fn pattern_absorbs_leading_space(pattern: &str) -> bool {
let pattern = pattern.strip_prefix('^').unwrap_or(pattern);
let mut chars = pattern.chars();
match chars.next() {
Some('.') => true,
Some('[') if pattern.starts_with("[^") => {
let inner = &pattern[2..];
let end = inner.find(']').unwrap_or(inner.len());
let negated = &inner[..end];
!(negated.contains(' ') || negated.contains("\\s") || negated.contains("\\t"))
}
_ => false,
}
}
pub(crate) fn unbounded_negated_class(pattern: &str) -> Option<&str> {
let pattern = pattern.strip_prefix('^').unwrap_or(pattern);
let inner = pattern.strip_prefix("[^")?;
let mut close = None;
let bytes = inner.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' {
i += 2;
continue;
}
if bytes[i] == b']' {
close = Some(i);
break;
}
i += 1;
}
let close = close?;
let rest = &inner[close + 1..];
if rest == "+" || rest == "*" {
Some(&inner[..close])
} else {
None
}
}
pub(crate) fn placeholder_for_pattern(pattern: &str) -> String {
let simple_lit = decode_simple_pattern_literal(pattern);
if let Some(lit) = simple_lit {
return lit;
}
if let Some(lit) = decode_whitespace_padded_literal(pattern) {
return lit;
}
if let Some(lit) = char_class_first_literal(pattern) {
return lit;
}
if pattern.contains("[0-9]") || pattern.contains("\\d") {
"0".into()
} else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
"_x".into()
} else if pattern.contains('"') || pattern.contains('\'') {
"\"\"".into()
} else {
"_".into()
}
}
pub(crate) fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
if pattern
.chars()
.any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
{
return None;
}
let mut out = String::new();
let mut chars = pattern.chars();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('n') => out.push('\n'),
Some('r') => out.push('\r'),
Some('t') => out.push('\t'),
Some('\\') => out.push('\\'),
Some('/') => out.push('/'),
Some(other) => out.push(other),
None => return None,
}
} else {
out.push(c);
}
}
Some(out)
}
pub(crate) fn decode_whitespace_padded_literal(pattern: &str) -> Option<String> {
let body = strip_leading_ws_run(pattern).unwrap_or(pattern);
let body = if let Some(idx) = body.rfind('[') {
let tail = &body[idx..];
if strip_leading_ws_run(tail) == Some("") {
&body[..idx]
} else {
body
}
} else if let Some(stripped) = body
.strip_suffix("\\s*")
.or_else(|| body.strip_suffix("\\s+"))
{
stripped
} else {
body
};
if body.is_empty() {
return None;
}
decode_simple_pattern_literal(body)
}
fn strip_leading_ws_run(s: &str) -> Option<&str> {
if let Some(rest) = s.strip_prefix("\\s*").or_else(|| s.strip_prefix("\\s+")) {
return Some(rest);
}
let rest = s.strip_prefix('[')?;
let end = rest.find(']')?;
let class = &rest[..end];
let after = &rest[end + 1..];
let after = after
.strip_prefix('*')
.or_else(|| after.strip_prefix('+'))?;
if !is_whitespace_only_pattern(&format!("[{class}]*")) {
return None;
}
Some(after)
}
pub(crate) fn is_no_space_external(name: &str) -> bool {
matches!(
name,
"_concat" | "_brace_concat" | "_concat_list" | "_no_space" | "_no_line_break"
) || name.starts_with("_immediate")
}
pub(crate) fn char_class_first_literal(pattern: &str) -> Option<String> {
let inner = pattern.strip_prefix('[')?.strip_suffix(']')?;
if inner.is_empty() || inner.starts_with('^') || inner.contains('-') {
return None;
}
let mut chars = inner.chars();
let first = chars.next()?;
if first == '\\' {
return Some(
match chars.next()? {
'n' => '\n',
'r' => '\r',
't' => '\t',
other => other,
}
.to_string(),
);
}
Some(first.to_string())
}
pub(crate) fn is_whitespace_external(name: &str) -> bool {
matches!(
name,
"_non_newline_whitespace" | "_whitespace" | "_space" | "_ws" | "whitespace"
)
}
#[cfg(test)]
mod line_comment_prefix_tests {
use super::*;
fn pat(v: &str) -> Production {
Production::Pattern {
value: v.to_string(),
}
}
fn string(v: &str) -> Production {
Production::String {
value: v.to_string(),
}
}
#[test]
fn newline_excluding_class_detected_anywhere() {
assert!(pattern_has_newline_excluding_class(
r"(\\+(.|\r?\n)|[^\\\n])*"
));
assert!(pattern_has_newline_excluding_class(r"[^\r\n
]*"));
assert!(pattern_has_newline_excluding_class(
r"[^\x00-\x08\x0a-\x1f\x7f]"
));
assert!(!pattern_has_newline_excluding_class(r"[\n]+"));
assert!(!pattern_has_newline_excluding_class("abc"));
}
#[test]
fn fixed_literal_pattern_accepts_plain_prefix() {
assert_eq!(fixed_literal_pattern("#"), Some("#".to_string()));
assert_eq!(fixed_literal_pattern("//"), Some("//".to_string()));
assert_eq!(fixed_literal_pattern(".*"), None);
assert_eq!(fixed_literal_pattern("a+"), None);
assert_eq!(fixed_literal_pattern(""), None);
assert_eq!(fixed_literal_pattern("#="), Some("#=".to_string()));
}
#[test]
fn julia_line_comment_prefix_from_pattern_first_member() {
let rule = Production::Seq {
members: vec![pat("#"), pat(".*")],
};
assert_eq!(extract_line_comment_prefix(&rule), Some("#".to_string()));
}
#[test]
fn c_line_comment_prefix_from_nested_negated_class() {
let line = Production::Seq {
members: vec![string("//"), pat(r"(\\+(.|\r?\n)|[^\\\n])*")],
};
let block = Production::Seq {
members: vec![string("/*"), pat(r"[^*]*\*+([^/*][^*]*\*+)*"), string("/")],
};
let rule = Production::Token {
content: Box::new(Production::Choice {
members: vec![line, block],
}),
};
assert_eq!(extract_line_comment_prefix(&rule), Some("//".to_string()));
}
#[test]
fn block_comment_with_symbol_rest_yields_no_prefix() {
let rule = Production::Seq {
members: vec![
pat("#="),
Production::Symbol {
name: "_block_comment_rest".to_string(),
},
],
};
assert_eq!(extract_line_comment_prefix(&rule), None);
}
#[test]
fn rust_line_comment_prefix_through_choice_body() {
let immediate = |inner: Production| Production::ImmediateToken {
content: Box::new(inner),
};
let sym = |n: &str| Production::Symbol {
name: n.to_string(),
};
let non_doc = Production::Seq {
members: vec![immediate(pat(r"\/\/")), pat(".*")],
};
let doc = Production::Seq {
members: vec![sym("_line_doc_comment_marker"), sym("_line_doc_content")],
};
let plain = immediate(pat(".*"));
let rule = Production::Seq {
members: vec![
string("//"),
Production::Choice {
members: vec![non_doc, doc, plain],
},
],
};
assert_eq!(extract_line_comment_prefix(&rule), Some("//".to_string()));
}
#[test]
fn rust_block_comment_choice_body_yields_no_prefix() {
let content = Production::Symbol {
name: "_block_comment_content".to_string(),
};
let body = Production::Choice {
members: vec![
Production::Choice {
members: vec![
Production::Seq {
members: vec![
Production::Symbol {
name: "_block_doc_comment_marker".to_string(),
},
Production::Choice {
members: vec![content.clone(), Production::Blank],
},
],
},
content,
],
},
Production::Blank,
],
};
let rule = Production::Seq {
members: vec![string("/*"), body, string("*/")],
};
assert_eq!(extract_line_comment_prefix(&rule), None);
}
#[test]
fn reduces_to_immediate_through_alias_and_choice() {
let alias = |inner: Production| Production::Alias {
content: Box::new(inner),
named: true,
value: "number".to_string(),
};
let imm = || Production::ImmediateToken {
content: Box::new(pat("[0-9]+")),
};
assert!(reduces_to_immediate_token(&imm()));
assert!(reduces_to_immediate_token(&alias(imm())));
assert!(reduces_to_immediate_token(&Production::Choice {
members: vec![alias(imm()), Production::Blank],
}));
assert!(!reduces_to_immediate_token(&Production::Choice {
members: vec![alias(imm()), string("x")],
}));
assert!(!reduces_to_immediate_token(&string("*")));
assert!(!reduces_to_immediate_token(&Production::Symbol {
name: "number".to_string(),
}));
}
}