gazelle/grammar.rs
1//! Grammar types - both public AST and internal representation types.
2
3/// An interned symbol ID for O(1) lookups.
4/// Layout:
5/// - IDs 0..num_terminals: terminals (EOF is always terminal 0)
6/// - IDs num_terminals.. onwards: non-terminals
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
8pub struct SymbolId(pub(crate) u32);
9
10impl SymbolId {
11 /// The EOF symbol ID (always 0).
12 pub const EOF: SymbolId = SymbolId(0);
13
14 /// Create a SymbolId from a raw u32.
15 #[doc(hidden)]
16 pub const fn new(id: u32) -> Self {
17 SymbolId(id)
18 }
19}
20
21// ============================================================================
22// Public AST types for grammar definitions (require alloc)
23// ============================================================================
24
25use alloc::string::String;
26use alloc::vec::Vec;
27
28/// A grammar definition, typically produced by [`parse_grammar`](crate::parse_grammar)
29/// or built programmatically with fields.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct Grammar {
32 /// Name of the start symbol.
33 pub start: String,
34 /// Expected number of reduce/reduce conflicts.
35 pub expect_rr: usize,
36 /// Expected number of shift/reduce conflicts.
37 pub expect_sr: usize,
38 /// Terminal definitions.
39 pub terminals: Vec<TerminalDef>,
40 /// Grammar rules (productions).
41 pub rules: Vec<Rule>,
42}
43
44/// How a terminal's shift/reduce conflicts are resolved.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum TerminalKind {
47 /// Normal terminal — conflicts are reported as errors.
48 Plain,
49 /// `prec` — resolved at runtime by comparing `Precedence` levels.
50 Prec,
51 /// `shift` — conflicts are resolved statically in favor of shift.
52 Shift,
53 /// `reduce` — conflicts are resolved statically in favor of reduce.
54 Reduce,
55 /// `conflict` — resolved at runtime by the lexer passing `Resolve::Shift` or `Resolve::Reduce`.
56 Conflict,
57}
58
59/// A terminal definition in the grammar.
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub struct TerminalDef {
62 /// Terminal name (e.g., "NUM", "PLUS").
63 pub name: String,
64 /// Whether this terminal carries a typed payload.
65 pub has_type: bool,
66 /// How shift/reduce conflicts on this terminal are resolved.
67 pub kind: TerminalKind,
68 /// Optional regex pattern for automatic lexer generation.
69 pub pattern: Option<String>,
70}
71
72/// A rule (production) in the grammar.
73#[derive(Debug, Clone, PartialEq, Eq)]
74pub struct Rule {
75 /// Non-terminal name (left-hand side).
76 pub name: String,
77 /// Alternatives (right-hand sides).
78 pub alts: Vec<Alt>,
79}
80
81/// An alternative (right-hand side) of a rule.
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct Alt {
84 /// Terms in this alternative.
85 pub terms: Vec<Term>,
86 /// Action name (e.g., `=> binop`).
87 pub name: String,
88}
89
90/// A term in a grammar rule.
91#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
92pub enum Term {
93 /// Plain symbol reference.
94 Symbol(String),
95 /// `?` - optional (zero or one).
96 Optional(String),
97 /// `*` - zero or more.
98 ZeroOrMore(String),
99 /// `+` - one or more.
100 OneOrMore(String),
101 /// `%` - one or more separated by the given symbol.
102 SeparatedBy { symbol: String, sep: String },
103 /// `_` - empty production marker.
104 Empty,
105}