pub mod concepts;
pub mod emit;
pub mod fidelity;
pub mod import;
pub mod inference;
mod links;
pub mod runtime;
pub mod surface;
pub mod translate;
pub mod validate;
pub use concepts::{
annotate_grammar_concepts, grammar_expr_concept_id, rule_concept_id, GrammarConcept,
GRAMMAR_CONCEPTS,
};
pub use emit::{
emit_abnf, emit_bnf, emit_ebnf, emit_gbnf, emit_javascript_parser, emit_peggy, emit_pest,
emit_rust_parser, emit_tree_sitter_grammar_js, emit_tree_sitter_grammar_js_with_report,
render_rust_type, EmitReport, GrammarEmitError, JsParserArtifacts, RustParserArtifacts,
};
pub use fidelity::{
canonical_grammar_format, grammar_format_profile, GrammarFidelityLevel, GrammarFormatProfile,
GRAMMAR_CONSTRUCTS, GRAMMAR_FORMATS,
};
pub use import::{
import_abnf, import_antlr, import_bnf, import_ebnf, import_gbnf, import_lark, import_pest,
import_tree_sitter_json, GrammarImportError,
};
pub use inference::active::{
clean_structural_acceptance, learn_dfa, learn_grammar, ActiveLearningConfig,
ActiveLearningError, Dfa, GrammarAcceptorOracle, Oracle as ActiveLearningOracle,
ParserAcceptancePredicate, ParserMembershipOracle, SamplingEquivalenceOracle,
Symbol as ActiveSymbol,
};
pub use inference::advisor::{
AdviceDecision, AdviceDecisionKind, AdviceSource, ConceptNamingAdvisor, FallbackAdvisor,
MdlMergeAdvisor, MergeAdvisor, MergeCandidate, MergeRequest, MergeScore, NameCandidate,
NamingAdvisor, NamingRequest,
};
#[cfg(feature = "llm-assist")]
pub use inference::advisor::{LlmClient, LlmError, LlmMergeAdvisor, LlmNamingAdvisor};
pub use inference::cfg::{
infer_cfg, infer_cfg_with_advisors, InferenceOptions, InferenceReport, InferenceResult, Oracle,
PositiveOnlyOracle,
};
pub use inference::eval::{
evaluate, mdl, run_corpus, run_named_corpus, sample, size_symbols, BenchmarkReport, EvalError,
GoldenCorpus, GrammarOracle, MembershipOracle, MetricScores, SampleConfig, ScoringMode,
GOLDEN_CORPORA,
};
pub use inference::lexical::{
categorise, infer_lexical_classes, CharCategory, LexicalConfig, LexicalModel, Token,
};
pub use inference::minimize::{
mdl_cost, minimize, Mdl, MinimizeOptions, MinimizeReport, MinimizeResult,
};
pub use inference::prior::{
build_structural_prior, ByteSpan, Delimiter, LeafKind, PriorOptions, SeedNode, SeedTree,
StructuralPrior, WhitespacePolicy,
};
pub use inference::semantic::{
default_pattern_catalog, evaluate_atom, evaluate_clause, evaluate_constraint,
evaluate_probabilistic, mine_semantic_constraints, ConstraintAtom, ConstraintClause,
ConstraintPattern, LengthUnit, NonTerminalRef, SemanticConstraint, SemanticInferenceConfig,
};
pub use inference::sequitur::{run_sequitur, Symbol};
pub use inference::state_merging::{infer_dfa, InferredAutomaton, MergeStrategy, Sample};
pub use runtime::{register_grammar, with_grammar, GrammarParser};
pub use surface::{
grammar_from_lino, grammar_to_lino, parse_grammar_surface, write_grammar_surface,
GrammarSurfaceError,
};
pub use translate::{
grammar_concept_translation_rules, translate_grammar_surface, GrammarTranslateError,
};
pub use validate::{validate, DiagnosticKind, GrammarDiagnostic, RuleSpan, Severity};
use std::collections::BTreeSet;
use std::fmt;
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum GrammarExpr {
Empty,
Terminal(String),
TerminalInsensitive(String),
CharRange(char, char),
CharClass {
negated: bool,
items: Vec<CharClassItem>,
},
AnyChar,
NonTerminal(String),
Choice {
ordered: bool,
alternatives: Vec<Self>,
},
Sequence(Vec<Self>),
Optional(Box<Self>),
ZeroOrMore(Box<Self>),
OneOrMore(Box<Self>),
Repeat {
expr: Box<Self>,
min: usize,
max: Option<usize>,
},
And(Box<Self>),
Not(Box<Self>),
Capture {
label: Option<String>,
expr: Box<Self>,
},
}
impl GrammarExpr {
#[must_use]
pub const fn empty() -> Self {
Self::Empty
}
#[must_use]
pub fn terminal(value: impl Into<String>) -> Self {
Self::Terminal(value.into())
}
#[must_use]
pub fn terminal_insensitive(value: impl Into<String>) -> Self {
Self::TerminalInsensitive(value.into())
}
#[must_use]
pub const fn char_range(start: char, end: char) -> Self {
Self::CharRange(start, end)
}
#[must_use]
pub fn char_class<I>(negated: bool, items: I) -> Self
where
I: IntoIterator<Item = CharClassItem>,
{
Self::CharClass {
negated,
items: items.into_iter().collect(),
}
}
#[must_use]
pub const fn any_char() -> Self {
Self::AnyChar
}
#[must_use]
pub fn non_terminal(value: impl Into<String>) -> Self {
Self::NonTerminal(value.into())
}
#[must_use]
pub fn choice<I>(ordered: bool, alternatives: I) -> Self
where
I: IntoIterator<Item = Self>,
{
Self::Choice {
ordered,
alternatives: alternatives.into_iter().collect(),
}
}
#[must_use]
pub fn sequence<I>(items: I) -> Self
where
I: IntoIterator<Item = Self>,
{
Self::Sequence(items.into_iter().collect())
}
#[must_use]
pub fn optional(expr: Self) -> Self {
Self::Optional(Box::new(expr))
}
#[must_use]
pub fn zero_or_more(expr: Self) -> Self {
Self::ZeroOrMore(Box::new(expr))
}
#[must_use]
pub fn one_or_more(expr: Self) -> Self {
Self::OneOrMore(Box::new(expr))
}
#[must_use]
pub fn repeat(expr: Self, min: usize, max: Option<usize>) -> Self {
Self::Repeat {
expr: Box::new(expr),
min,
max,
}
}
#[must_use]
pub fn and(expr: Self) -> Self {
Self::And(Box::new(expr))
}
#[must_use]
#[allow(clippy::should_implement_trait)]
pub fn not(expr: Self) -> Self {
Self::Not(Box::new(expr))
}
#[must_use]
pub fn capture(label: impl Into<String>, expr: Self) -> Self {
Self::Capture {
label: Some(label.into()),
expr: Box::new(expr),
}
}
#[must_use]
pub fn capture_unlabeled(expr: Self) -> Self {
Self::Capture {
label: None,
expr: Box::new(expr),
}
}
fn collect_nonterminals(&self, names: &mut BTreeSet<String>) {
match self {
Self::NonTerminal(name) => {
names.insert(name.clone());
}
Self::Choice { alternatives, .. } => {
for alternative in alternatives {
alternative.collect_nonterminals(names);
}
}
Self::Sequence(items) => {
for item in items {
item.collect_nonterminals(names);
}
}
Self::Optional(expr)
| Self::ZeroOrMore(expr)
| Self::OneOrMore(expr)
| Self::And(expr)
| Self::Not(expr)
| Self::Capture { expr, .. }
| Self::Repeat { expr, .. } => expr.collect_nonterminals(names),
Self::Empty
| Self::Terminal(_)
| Self::TerminalInsensitive(_)
| Self::CharRange(_, _)
| Self::CharClass { .. }
| Self::AnyChar => {}
}
}
}
impl fmt::Display for GrammarExpr {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Empty => formatter.write_str("empty"),
Self::Terminal(value) => write!(formatter, "{value:?}"),
Self::TerminalInsensitive(value) => write!(formatter, "i{value:?}"),
Self::CharRange(start, end) => write!(formatter, "{start:?}..={end:?}"),
Self::CharClass { negated, items } => {
let marker = if *negated { "^" } else { "" };
write!(formatter, "[{marker}")?;
for item in items {
write!(formatter, "{item}")?;
}
formatter.write_str("]")
}
Self::AnyChar => formatter.write_str("."),
Self::NonTerminal(name) => formatter.write_str(name),
Self::Choice {
ordered,
alternatives,
} => {
let separator = if *ordered { " / " } else { " | " };
write_joined(formatter, alternatives, separator)
}
Self::Sequence(items) => write_joined(formatter, items, " "),
Self::Optional(expr) => write!(formatter, "({expr})?"),
Self::ZeroOrMore(expr) => write!(formatter, "({expr})*"),
Self::OneOrMore(expr) => write!(formatter, "({expr})+"),
Self::Repeat { expr, min, max } => match max {
Some(max) => write!(formatter, "({expr}){{{min},{max}}}"),
None => write!(formatter, "({expr}){{{min},}}"),
},
Self::And(expr) => write!(formatter, "&({expr})"),
Self::Not(expr) => write!(formatter, "!({expr})"),
Self::Capture { label, expr } => match label {
Some(label) => write!(formatter, "{label}:({expr})"),
None => write!(formatter, "capture({expr})"),
},
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum CharClassItem {
Char(char),
Range(char, char),
}
impl CharClassItem {
#[must_use]
pub const fn char(value: char) -> Self {
Self::Char(value)
}
#[must_use]
pub const fn range(start: char, end: char) -> Self {
Self::Range(start, end)
}
}
impl fmt::Display for CharClassItem {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Char(value) => write!(formatter, "{}", value.escape_default()),
Self::Range(start, end) => {
write!(
formatter,
"{}-{}",
start.escape_default(),
end.escape_default()
)
}
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum RuleKind {
Normal,
Atomic,
Silent,
Token,
}
impl RuleKind {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::Normal => "normal",
Self::Atomic => "atomic",
Self::Silent => "silent",
Self::Token => "token",
}
}
pub(crate) fn from_tag(value: &str) -> Option<Self> {
match value {
"normal" => Some(Self::Normal),
"atomic" => Some(Self::Atomic),
"silent" => Some(Self::Silent),
"token" => Some(Self::Token),
_ => None,
}
}
}
impl fmt::Display for RuleKind {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(self.as_str())
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct GrammarRule {
pub name: String,
pub expr: GrammarExpr,
pub kind: RuleKind,
pub concept: Option<String>,
pub doc: Option<String>,
}
impl GrammarRule {
#[must_use]
pub fn new(name: impl Into<String>, expr: GrammarExpr) -> Self {
Self {
name: name.into(),
expr,
kind: RuleKind::Normal,
concept: None,
doc: None,
}
}
#[must_use]
pub const fn with_kind(mut self, kind: RuleKind) -> Self {
self.kind = kind;
self
}
#[must_use]
pub fn with_concept(mut self, concept: impl Into<String>) -> Self {
self.concept = Some(concept.into());
self
}
#[must_use]
pub fn with_doc(mut self, doc: impl Into<String>) -> Self {
self.doc = Some(doc.into());
self
}
#[must_use]
pub fn name(&self) -> &str {
&self.name
}
#[must_use]
pub const fn expr(&self) -> &GrammarExpr {
&self.expr
}
#[must_use]
pub const fn kind(&self) -> RuleKind {
self.kind
}
#[must_use]
pub fn concept(&self) -> Option<&str> {
self.concept.as_deref()
}
#[must_use]
pub fn doc(&self) -> Option<&str> {
self.doc.as_deref()
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum GrammarFormat {
MetaLanguage,
Bnf,
Ebnf,
Abnf,
Peg,
Antlr,
Lark,
Gbnf,
TreeSitter,
Inferred,
}
impl GrammarFormat {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::MetaLanguage => "meta-language",
Self::Bnf => "bnf",
Self::Ebnf => "ebnf",
Self::Abnf => "abnf",
Self::Peg => "peg",
Self::Antlr => "antlr",
Self::Lark => "lark",
Self::Gbnf => "gbnf",
Self::TreeSitter => "tree-sitter",
Self::Inferred => "inferred",
}
}
pub(crate) fn from_tag(value: &str) -> Option<Self> {
match value {
"meta-language" => Some(Self::MetaLanguage),
"bnf" => Some(Self::Bnf),
"ebnf" => Some(Self::Ebnf),
"abnf" => Some(Self::Abnf),
"peg" => Some(Self::Peg),
"antlr" => Some(Self::Antlr),
"lark" => Some(Self::Lark),
"gbnf" => Some(Self::Gbnf),
"tree-sitter" => Some(Self::TreeSitter),
"inferred" => Some(Self::Inferred),
_ => None,
}
}
}
impl fmt::Display for GrammarFormat {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(self.as_str())
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Grammar {
rules: Vec<GrammarRule>,
start: Option<String>,
source_format: Option<GrammarFormat>,
}
impl Grammar {
#[must_use]
pub const fn new() -> Self {
Self {
rules: Vec::new(),
start: None,
source_format: None,
}
}
#[must_use]
pub const fn builder() -> GrammarBuilder {
GrammarBuilder::new()
}
#[must_use]
pub const fn expr() -> ExprBuilder {
ExprBuilder
}
#[must_use]
pub fn with_rule(mut self, rule: GrammarRule) -> Self {
self.rules.push(rule);
self
}
#[must_use]
pub fn with_start(mut self, start: impl Into<String>) -> Self {
self.start = Some(start.into());
self
}
#[must_use]
pub const fn with_source_format(mut self, source_format: GrammarFormat) -> Self {
self.source_format = Some(source_format);
self
}
pub fn add_rule(&mut self, rule: GrammarRule) {
self.rules.push(rule);
}
pub fn set_start(&mut self, start: impl Into<String>) {
self.start = Some(start.into());
}
pub fn clear_start(&mut self) {
self.start = None;
}
pub const fn set_source_format(&mut self, source_format: GrammarFormat) {
self.source_format = Some(source_format);
}
#[must_use]
pub fn rules(&self) -> &[GrammarRule] {
&self.rules
}
#[must_use]
pub fn rule(&self, name: &str) -> Option<&GrammarRule> {
self.rules.iter().find(|rule| rule.name == name)
}
#[must_use]
pub fn start(&self) -> Option<&str> {
self.start.as_deref()
}
#[must_use]
pub fn start_rule(&self) -> Option<&GrammarRule> {
self.start
.as_deref()
.map_or_else(|| self.rules.first(), |start| self.rule(start))
}
#[must_use]
pub const fn source_format(&self) -> Option<GrammarFormat> {
self.source_format
}
#[must_use]
pub fn rule_names(&self) -> Vec<&str> {
self.rules.iter().map(GrammarRule::name).collect()
}
#[must_use]
pub fn referenced_nonterminals(&self) -> BTreeSet<String> {
let mut names = BTreeSet::new();
for rule in &self.rules {
rule.expr.collect_nonterminals(&mut names);
}
names
}
#[must_use]
pub fn undefined_nonterminals(&self) -> BTreeSet<String> {
let defined = self
.rules
.iter()
.map(|rule| rule.name.clone())
.collect::<BTreeSet<_>>();
self.referenced_nonterminals()
.difference(&defined)
.cloned()
.collect()
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct GrammarBuilder {
grammar: Grammar,
}
impl GrammarBuilder {
#[must_use]
pub const fn new() -> Self {
Self {
grammar: Grammar::new(),
}
}
#[must_use]
pub const fn source_format(mut self, source_format: GrammarFormat) -> Self {
self.grammar.source_format = Some(source_format);
self
}
#[must_use]
pub fn start(mut self, start: impl Into<String>) -> Self {
self.grammar.start = Some(start.into());
self
}
#[must_use]
pub fn rule(mut self, name: impl Into<String>, expr: GrammarExpr) -> Self {
self.grammar.rules.push(GrammarRule::new(name, expr));
self
}
#[must_use]
pub fn grammar_rule(mut self, rule: GrammarRule) -> Self {
self.grammar.rules.push(rule);
self
}
#[must_use]
pub fn rule_with_kind(
mut self,
name: impl Into<String>,
expr: GrammarExpr,
kind: RuleKind,
) -> Self {
self.grammar
.rules
.push(GrammarRule::new(name, expr).with_kind(kind));
self
}
#[must_use]
pub fn build(self) -> Grammar {
self.grammar
}
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct ExprBuilder;
impl ExprBuilder {
#[must_use]
pub const fn empty(self) -> GrammarExpr {
GrammarExpr::Empty
}
#[must_use]
pub fn term(self, value: impl Into<String>) -> GrammarExpr {
GrammarExpr::terminal(value)
}
#[must_use]
pub fn terminal(self, value: impl Into<String>) -> GrammarExpr {
GrammarExpr::terminal(value)
}
#[must_use]
pub fn terminal_insensitive(self, value: impl Into<String>) -> GrammarExpr {
GrammarExpr::terminal_insensitive(value)
}
#[must_use]
pub const fn char(self, value: char) -> GrammarExpr {
GrammarExpr::CharRange(value, value)
}
#[must_use]
pub const fn char_range(self, start: char, end: char) -> GrammarExpr {
GrammarExpr::CharRange(start, end)
}
#[must_use]
pub fn char_class<I>(self, negated: bool, items: I) -> GrammarExpr
where
I: IntoIterator<Item = CharClassItem>,
{
GrammarExpr::char_class(negated, items)
}
#[must_use]
pub const fn any(self) -> GrammarExpr {
GrammarExpr::AnyChar
}
#[must_use]
pub fn nt(self, value: impl Into<String>) -> GrammarExpr {
GrammarExpr::non_terminal(value)
}
#[must_use]
pub fn non_terminal(self, value: impl Into<String>) -> GrammarExpr {
GrammarExpr::non_terminal(value)
}
#[must_use]
pub fn choice<I>(self, ordered: bool, alternatives: I) -> GrammarExpr
where
I: IntoIterator<Item = GrammarExpr>,
{
GrammarExpr::choice(ordered, alternatives)
}
#[must_use]
pub fn choice_ordered<I>(self, alternatives: I) -> GrammarExpr
where
I: IntoIterator<Item = GrammarExpr>,
{
GrammarExpr::choice(true, alternatives)
}
#[must_use]
pub fn choice_unordered<I>(self, alternatives: I) -> GrammarExpr
where
I: IntoIterator<Item = GrammarExpr>,
{
GrammarExpr::choice(false, alternatives)
}
#[must_use]
pub fn seq<I>(self, items: I) -> GrammarExpr
where
I: IntoIterator<Item = GrammarExpr>,
{
GrammarExpr::sequence(items)
}
#[must_use]
pub fn opt(self, expr: GrammarExpr) -> GrammarExpr {
GrammarExpr::optional(expr)
}
#[must_use]
pub fn rep0(self, expr: GrammarExpr) -> GrammarExpr {
GrammarExpr::zero_or_more(expr)
}
#[must_use]
pub fn rep1(self, expr: GrammarExpr) -> GrammarExpr {
GrammarExpr::one_or_more(expr)
}
#[must_use]
pub fn repeat(self, expr: GrammarExpr, min: usize, max: Option<usize>) -> GrammarExpr {
GrammarExpr::repeat(expr, min, max)
}
#[must_use]
pub fn and(self, expr: GrammarExpr) -> GrammarExpr {
GrammarExpr::and(expr)
}
#[must_use]
pub fn not(self, expr: GrammarExpr) -> GrammarExpr {
GrammarExpr::not(expr)
}
#[must_use]
pub fn capture(self, label: Option<impl Into<String>>, expr: GrammarExpr) -> GrammarExpr {
match label {
Some(label) => GrammarExpr::capture(label, expr),
None => GrammarExpr::capture_unlabeled(expr),
}
}
#[must_use]
pub fn capture_unlabeled(self, expr: GrammarExpr) -> GrammarExpr {
GrammarExpr::capture_unlabeled(expr)
}
}
fn write_joined(
formatter: &mut fmt::Formatter<'_>,
expressions: &[GrammarExpr],
separator: &str,
) -> fmt::Result {
if let Some((first, rest)) = expressions.split_first() {
write!(formatter, "{first}")?;
for expression in rest {
formatter.write_str(separator)?;
write!(formatter, "{expression}")?;
}
}
Ok(())
}