sipha_parse/grammar/
mod.rs

1//! Grammar rule definitions.
2//!
3//! This module defines the `GrammarRule` enum, which provides a declarative
4//! way to specify grammar rules. Rules can be combined to build complex
5//! grammars using sequences, choices, repetitions, and more.
6
7mod builder;
8mod evaluator;
9mod validation;
10
11pub use builder::GrammarRuleBuilder;
12pub use evaluator::GrammarRuleParser;
13pub use validation::validate_rule;
14
15use sipha_core::traits::{RuleId, TokenKind};
16
17/// Minimal precedence type for Pratt expressions.
18/// This will be moved to sipha-pratt in the final refactoring.
19#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
20pub struct Precedence(pub u8);
21
22/// High-level grammar building block.
23///
24/// # Serialization
25///
26/// When the `serde` feature is enabled, this type implements `Serialize` and `Deserialize`.
27/// Note that `K` and `R` must also implement these traits for serialization to work.
28#[derive(Clone, Debug)]
29#[cfg_attr(
30    feature = "serde",
31    derive(serde::Serialize, serde::Deserialize),
32    serde(
33        bound = "K: serde::Serialize + serde::de::DeserializeOwned, R: serde::Serialize + serde::de::DeserializeOwned"
34    )
35)]
36pub enum GrammarRule<K: TokenKind, R: RuleId> {
37    /// Sequence: `a b c`.
38    Sequence(Vec<GrammarRule<K, R>>),
39    /// Choice: `a | b | c`.
40    Choice(Vec<GrammarRule<K, R>>),
41    /// Optional: `a?`.
42    Optional(Box<GrammarRule<K, R>>),
43    /// Zero or more: `a*`.
44    ZeroOrMore(Box<GrammarRule<K, R>>),
45    /// One or more: `a+`.
46    OneOrMore(Box<GrammarRule<K, R>>),
47    /// Range repetition: `a{min,max}`.
48    Range {
49        /// Rule that must repeat.
50        rule: Box<GrammarRule<K, R>>,
51        /// Minimum number of occurrences (inclusive).
52        min: usize,
53        /// Maximum number of occurrences (inclusive).
54        max: usize,
55    },
56    /// Delegate to the Pratt parser for expression parsing.
57    PrattExpr(Precedence),
58    /// Terminal token literal.
59    Token(K),
60    /// Rule reference.
61    Rule(R),
62    /// Conditional rule that depends on the parser position.
63    Conditional {
64        /// Nested rule to evaluate when the condition succeeds.
65        rule: Box<GrammarRule<K, R>>,
66        /// Condition receives the parser position and returns whether the rule
67        /// should run.
68        #[cfg_attr(
69            feature = "serde",
70            serde(skip_serializing, deserialize_with = "deserialize_condition")
71        )]
72        condition: fn(usize) -> bool,
73    },
74    /// Negative lookahead: succeeds if the rule does NOT match.
75    NegativeLookahead(Box<GrammarRule<K, R>>),
76    /// Positive lookahead: succeeds if the rule matches, but doesn't consume input.
77    PositiveLookahead(Box<GrammarRule<K, R>>),
78}
79
80#[cfg(feature = "serde")]
81const DEFAULT_CONDITION: fn(usize) -> bool = |_| false;
82
83#[cfg(feature = "serde")]
84fn deserialize_condition<'de, D>(_deserializer: D) -> Result<fn(usize) -> bool, D::Error>
85where
86    D: serde::Deserializer<'de>,
87{
88    Ok(DEFAULT_CONDITION)
89}