pub mod lexer;
pub mod serialized;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum AtnType {
Lexer,
Parser,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Atn {
grammar_type: AtnType,
max_token_type: i32,
states: Vec<AtnState>,
rule_to_start_state: Vec<usize>,
rule_to_stop_state: Vec<usize>,
rule_to_token_type: Vec<i32>,
mode_to_start_state: Vec<usize>,
decision_to_state: Vec<usize>,
lexer_actions: Vec<LexerAction>,
}
impl Atn {
pub const fn new(grammar_type: AtnType, max_token_type: i32) -> Self {
Self {
grammar_type,
max_token_type,
states: Vec::new(),
rule_to_start_state: Vec::new(),
rule_to_stop_state: Vec::new(),
rule_to_token_type: Vec::new(),
mode_to_start_state: Vec::new(),
decision_to_state: Vec::new(),
lexer_actions: Vec::new(),
}
}
pub const fn grammar_type(&self) -> AtnType {
self.grammar_type
}
pub const fn max_token_type(&self) -> i32 {
self.max_token_type
}
pub fn states(&self) -> &[AtnState] {
&self.states
}
pub fn state(&self, state_number: usize) -> Option<&AtnState> {
self.states.get(state_number)
}
pub fn state_mut(&mut self, state_number: usize) -> Option<&mut AtnState> {
self.states.get_mut(state_number)
}
pub fn add_state(&mut self, state: AtnState) -> usize {
let index = self.states.len();
self.states.push(state);
index
}
pub fn decision_to_state(&self) -> &[usize] {
&self.decision_to_state
}
pub fn add_decision_state(&mut self, state_number: usize) {
self.decision_to_state.push(state_number);
}
pub fn rule_to_start_state(&self) -> &[usize] {
&self.rule_to_start_state
}
pub fn set_rule_to_start_state(&mut self, rule_to_start_state: Vec<usize>) {
self.rule_to_start_state = rule_to_start_state;
}
pub fn rule_to_stop_state(&self) -> &[usize] {
&self.rule_to_stop_state
}
pub fn set_rule_to_stop_state(&mut self, rule_to_stop_state: Vec<usize>) {
self.rule_to_stop_state = rule_to_stop_state;
}
pub fn rule_to_token_type(&self) -> &[i32] {
&self.rule_to_token_type
}
pub fn set_rule_to_token_type(&mut self, rule_to_token_type: Vec<i32>) {
self.rule_to_token_type = rule_to_token_type;
}
pub fn mode_to_start_state(&self) -> &[usize] {
&self.mode_to_start_state
}
pub fn add_mode_start_state(&mut self, state_number: usize) {
self.mode_to_start_state.push(state_number);
}
pub fn lexer_actions(&self) -> &[LexerAction] {
&self.lexer_actions
}
pub fn set_lexer_actions(&mut self, lexer_actions: Vec<LexerAction>) {
self.lexer_actions = lexer_actions;
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct AtnState {
pub state_number: usize,
pub rule_index: Option<usize>,
pub kind: AtnStateKind,
pub end_state: Option<usize>,
pub loop_back_state: Option<usize>,
pub non_greedy: bool,
pub precedence_rule_decision: bool,
pub left_recursive_rule: bool,
pub transitions: Vec<Transition>,
}
impl AtnState {
pub const fn new(state_number: usize, kind: AtnStateKind) -> Self {
Self {
state_number,
rule_index: None,
kind,
end_state: None,
loop_back_state: None,
non_greedy: false,
precedence_rule_decision: false,
left_recursive_rule: false,
transitions: Vec::new(),
}
}
#[must_use]
pub const fn with_rule_index(mut self, rule_index: usize) -> Self {
self.rule_index = Some(rule_index);
self
}
pub fn add_transition(&mut self, transition: Transition) {
self.transitions.push(transition);
}
pub fn is_rule_stop(&self) -> bool {
self.kind == AtnStateKind::RuleStop
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum AtnStateKind {
Invalid,
Basic,
RuleStart,
BlockStart,
PlusBlockStart,
StarBlockStart,
TokenStart,
RuleStop,
BlockEnd,
StarLoopBack,
StarLoopEntry,
PlusLoopBack,
LoopEnd,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Transition {
Epsilon {
target: usize,
},
Atom {
target: usize,
label: i32,
},
Range {
target: usize,
start: i32,
stop: i32,
},
Set {
target: usize,
set: IntervalSet,
},
NotSet {
target: usize,
set: IntervalSet,
},
Wildcard {
target: usize,
},
Rule {
target: usize,
rule_index: usize,
follow_state: usize,
precedence: i32,
},
Predicate {
target: usize,
rule_index: usize,
pred_index: usize,
context_dependent: bool,
},
Action {
target: usize,
rule_index: usize,
action_index: Option<usize>,
context_dependent: bool,
},
Precedence {
target: usize,
precedence: i32,
},
}
impl Transition {
pub const fn target(&self) -> usize {
match self {
Self::Epsilon { target }
| Self::Atom { target, .. }
| Self::Range { target, .. }
| Self::Set { target, .. }
| Self::NotSet { target, .. }
| Self::Wildcard { target }
| Self::Rule { target, .. }
| Self::Predicate { target, .. }
| Self::Action { target, .. }
| Self::Precedence { target, .. } => *target,
}
}
pub const fn is_epsilon(&self) -> bool {
matches!(
self,
Self::Epsilon { .. }
| Self::Rule { .. }
| Self::Predicate { .. }
| Self::Action { .. }
| Self::Precedence { .. }
)
}
pub fn matches(&self, symbol: i32, min_vocabulary: i32, max_vocabulary: i32) -> bool {
match self {
Self::Atom { label, .. } => *label == symbol,
Self::Range { start, stop, .. } => (*start..=*stop).contains(&symbol),
Self::Set { set, .. } => set.contains(symbol),
Self::NotSet { set, .. } => {
(min_vocabulary..=max_vocabulary).contains(&symbol) && !set.contains(symbol)
}
Self::Wildcard { .. } => (min_vocabulary..=max_vocabulary).contains(&symbol),
Self::Epsilon { .. }
| Self::Rule { .. }
| Self::Predicate { .. }
| Self::Action { .. }
| Self::Precedence { .. } => false,
}
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct IntervalSet {
ranges: Vec<(i32, i32)>,
}
impl IntervalSet {
pub fn new() -> Self {
Self::default()
}
pub fn from_range(start: i32, stop: i32) -> Self {
let mut set = Self::new();
set.add_range(start, stop);
set
}
pub fn add(&mut self, value: i32) {
self.add_range(value, value);
}
pub fn add_range(&mut self, start: i32, stop: i32) {
let (start, stop) = if start <= stop {
(start, stop)
} else {
(stop, start)
};
self.ranges.push((start, stop));
self.normalize();
}
fn normalize(&mut self) {
self.ranges.sort_unstable();
let mut merged: Vec<(i32, i32)> = Vec::with_capacity(self.ranges.len());
for (start, stop) in self.ranges.drain(..) {
if let Some((_, last_stop)) = merged.last_mut() {
if start <= last_stop.saturating_add(1) {
*last_stop = (*last_stop).max(stop);
continue;
}
}
merged.push((start, stop));
}
self.ranges = merged;
}
pub fn contains(&self, value: i32) -> bool {
self.ranges
.iter()
.any(|(start, stop)| (*start..=*stop).contains(&value))
}
pub fn ranges(&self) -> &[(i32, i32)] {
&self.ranges
}
pub const fn is_empty(&self) -> bool {
self.ranges.is_empty()
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum LexerAction {
Channel(i32),
Custom { rule_index: i32, action_index: i32 },
Mode(i32),
More,
PopMode,
PushMode(i32),
Skip,
Type(i32),
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct LexerActionResult {
pub token_type: i32,
pub channel: i32,
pub skip: bool,
pub more: bool,
}
impl LexerActionResult {
pub const fn new(token_type: i32, channel: i32) -> Self {
Self {
token_type,
channel,
skip: false,
more: false,
}
}
pub fn apply<I, F>(&mut self, action: &LexerAction, lexer: &mut crate::lexer::BaseLexer<I, F>)
where
I: crate::char_stream::CharStream,
F: crate::token::TokenFactory,
{
use crate::lexer::Lexer;
match action {
LexerAction::Channel(channel) => self.channel = *channel,
LexerAction::Custom { .. } => {}
LexerAction::Mode(mode) => lexer.set_mode(*mode),
LexerAction::More => self.more = true,
LexerAction::PopMode => {
lexer.pop_mode();
}
LexerAction::PushMode(mode) => lexer.push_mode(*mode),
LexerAction::Skip => self.skip = true,
LexerAction::Type(token_type) => self.token_type = *token_type,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn interval_set_handles_ranges() {
let set = IntervalSet::from_range(2, 4);
assert!(set.contains(2));
assert!(set.contains(3));
assert!(set.contains(4));
assert!(!set.contains(5));
assert_eq!(set.ranges(), &[(2, 4)]);
}
}