use std::{
fmt::{Debug, Display},
hash::Hash,
ops::Range,
sync::{Arc, Mutex},
};
use crate::{
earley::{grammar::ParamValue, lexer::MatchingLexemesIdx, ParamCond},
HashMap, HashSet, Instant,
};
use anyhow::{bail, ensure, Result};
use derivre::{NextByte, RegexAst, StateID};
use serde::{Deserialize, Serialize};
use toktrie::{
parse_numeric_token, Recognizer, SimpleVob, TokEnv, TokTrie, TokenId, INVALID_TOKEN,
};
use crate::{
api::{ParserLimits, StopReason},
earley::{lexer::Lexer, lexerspec::LexemeClass},
id32_type,
};
use super::{
grammar::{CGrammar, CSymIdx, CSymbol, RhsPtr},
lexer::{LexerResult, PreLexeme},
lexerspec::{Lexeme, LexemeIdx, LexemeSpec, LexerSpec},
perf::ParserPerfCounters,
regexvec::{LexemeSet, LexerStats},
};
const TRACE: bool = false;
const DEBUG: bool = true;
pub(crate) const ITEM_TRACE: bool = false;
macro_rules! trace {
($($arg:tt)*) => {
if cfg!(feature = "logging") && TRACE {
eprintln!($($arg)*);
}
}
}
macro_rules! debug {
($($arg:tt)*) => {
if cfg!(feature = "logging") && DEBUG {
eprintln!($($arg)*);
}
}
}
macro_rules! debug_def {
($s:expr, $($arg:tt)*) => {
if cfg!(feature = "logging") && DEBUG && $s.scratch.log_enabled() {
eprintln!($($arg)*);
}
}
}
macro_rules! item_trace {
($($arg:tt)*) => {
if ITEM_TRACE {
eprint!(" ");
eprintln!($($arg)*);
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
struct Item {
data: u64,
}
#[derive(Clone)]
struct SavedParserState {
lexer_stack_length: usize,
}
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
pub struct ParserStats {
pub compute_time_us: u64,
pub rows: usize,
pub cached_rows: usize,
pub all_items: usize,
pub lexer_cost: u64,
pub slices_applied: usize,
pub trie_nodes_walked: usize,
pub definitive_bytes: usize,
pub lexer_ops: usize,
pub num_lex_errors: usize,
pub num_lexemes: usize,
}
#[derive(Debug, Clone)]
pub struct XorShift {
seed: u32,
}
impl XorShift {
pub fn new(seed: u32) -> Self {
XorShift { seed }
}
pub fn new_str(s: &str) -> Self {
XorShift {
seed: XorShift::fnv1a_32(s.as_bytes()),
}
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> u32 {
let mut x = self.seed;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
self.seed = x;
x
}
pub fn from_range(&mut self, r: Range<usize>) -> usize {
assert!(r.start < r.end);
assert!(r.end < u32::MAX as usize);
r.start + (self.next() as usize) % (r.end - r.start)
}
pub fn one_in(&mut self, n: u32) -> bool {
self.next().is_multiple_of(n)
}
pub fn next_alt(&mut self) -> u32 {
let mut x = self.seed;
x ^= x << 15;
x ^= x >> 4;
x ^= x << 23;
self.seed = x;
x
}
pub fn fnv1a_32(s: &[u8]) -> u32 {
let mut hash: u32 = 0x811c9dc5;
for byte in s {
hash ^= *byte as u32;
hash = hash.wrapping_mul(0x01000193);
}
hash
}
pub fn sample_from_vob(&mut self, vob: &SimpleVob) -> u32 {
let nset = vob.num_set();
assert!(nset > 0);
if nset > vob.len() / 10 {
loop {
let idx = self.from_range(0..vob.len());
if vob[idx] {
return idx as u32;
}
}
} else {
let choices = vob.to_list();
choices[self.from_range(0..choices.len())]
}
}
}
impl Default for XorShift {
fn default() -> Self {
XorShift { seed: 0xdeadf00d }
}
}
#[derive(Debug, Default, Clone)]
pub struct ParserMetrics {
pub rand: XorShift,
pub message: String,
pub slicer_leftover_us: usize,
}
impl ParserStats {
pub fn delta(&self, previous: &ParserStats) -> ParserStats {
ParserStats {
rows: self.rows.saturating_sub(previous.rows),
cached_rows: self.cached_rows.saturating_sub(previous.cached_rows),
definitive_bytes: self
.definitive_bytes
.saturating_sub(previous.definitive_bytes),
lexer_ops: self.lexer_ops.saturating_sub(previous.lexer_ops),
num_lexemes: self.num_lexemes.saturating_sub(previous.num_lexemes),
num_lex_errors: self.num_lex_errors.saturating_sub(previous.num_lex_errors),
all_items: self.all_items.saturating_sub(previous.all_items),
lexer_cost: self.lexer_cost.saturating_sub(previous.lexer_cost),
compute_time_us: self
.compute_time_us
.saturating_sub(previous.compute_time_us),
slices_applied: self.slices_applied.saturating_sub(previous.slices_applied),
trie_nodes_walked: self
.trie_nodes_walked
.saturating_sub(previous.trie_nodes_walked),
}
}
pub fn max(&self, other: &ParserStats) -> ParserStats {
ParserStats {
rows: self.rows.max(other.rows),
cached_rows: self.cached_rows.max(other.cached_rows),
definitive_bytes: self.definitive_bytes.max(other.definitive_bytes),
lexer_ops: self.lexer_ops.max(other.lexer_ops),
num_lexemes: self.num_lexemes.max(other.num_lexemes),
num_lex_errors: self.num_lex_errors.max(other.num_lex_errors),
all_items: self.all_items.max(other.all_items),
lexer_cost: self.lexer_cost.max(other.lexer_cost),
compute_time_us: self.compute_time_us.max(other.compute_time_us),
slices_applied: self.slices_applied.max(other.slices_applied),
trie_nodes_walked: self.trie_nodes_walked.max(other.trie_nodes_walked),
}
}
}
impl Display for ParserStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", serde_json::to_string_pretty(self).unwrap())
}
}
id32_type!(GrammarStackPtr);
#[derive(Clone, Debug)]
struct GrammarStackNode {
back_ptr: GrammarStackPtr,
token_horizon: u32,
grammar_id: LexemeClass,
start_item: Item,
start_item_idx: usize,
}
#[derive(Clone)]
struct Row {
first_item: u32,
last_item: u32,
grammar_stack_ptr: GrammarStackPtr,
lexer_start_state: StateID,
lexeme_idx: MatchingLexemesIdx,
}
impl Row {
fn item_indices(&self) -> Range<usize> {
self.first_item as usize..self.last_item as usize
}
}
impl Item {
#[allow(dead_code)]
const NULL: Self = Item { data: 0 };
fn new(rule: RhsPtr, start: usize) -> Self {
Item {
data: rule.as_index() as u64 | ((start as u64) << 32),
}
}
fn rhs_ptr(&self) -> RhsPtr {
RhsPtr::from_index(self.data as u32)
}
fn start_pos(&self) -> usize {
(self.data >> 32) as usize
}
fn advance_dot(&self) -> Self {
Item {
data: self.data + 1,
}
}
fn rewind_dot(&self) -> Self {
Item {
data: self.data - 1,
}
}
}
impl Debug for Item {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let rule = self.rhs_ptr();
write!(f, "Item(rhs={} @{})", rule.as_index(), self.start_pos())
}
}
#[derive(Clone)]
struct Scratch {
grammar: Arc<CGrammar>,
row_start: usize,
row_end: usize,
items: Vec<Item>,
item_args: Vec<ParamValue>,
grammar_stack: Vec<GrammarStackNode>,
push_allowed_grammar_ids: SimpleVob,
push_allowed_lexemes: LexemeSet,
push_grm_top: GrammarStackPtr,
push_lexeme_idx: MatchingLexemesIdx,
definitive: bool,
log_override: bool,
parametric: bool,
}
#[derive(Clone)]
struct RowInfo {
start_byte_idx: usize,
lexeme: Lexeme,
token_idx_start: usize,
token_idx_stop: usize,
}
impl RowInfo {
fn apply_token_idx(&mut self, idx: usize) {
self.token_idx_start = self.token_idx_start.min(idx);
self.token_idx_stop = self.token_idx_stop.max(idx);
}
fn set_token_idx(&mut self, idx: usize) {
self.token_idx_start = idx;
self.token_idx_stop = idx;
}
fn dbg(&self, lexer: &Lexer) -> String {
format!(
"token_idx: {}-{}; b:{}; {}",
self.token_idx_start,
self.token_idx_stop,
self.start_byte_idx,
lexer.dbg_lexeme(&self.lexeme),
)
}
}
#[derive(Clone, Copy, Debug)]
struct LexerState {
row_idx: u32, lexer_state: StateID, byte: Option<u8>,
}
#[derive(Clone)]
struct Captures {
capture_list: Vec<(String, Vec<u8>)>,
capture_map: HashMap<String, Vec<u8>>,
}
impl Captures {
fn new() -> Self {
Captures {
capture_list: vec![],
capture_map: HashMap::default(),
}
}
fn push(&mut self, cap: (String, Vec<u8>)) {
let (name, bytes) = cap;
if !name.starts_with("__LIST_APPEND:") {
if let Some(old) = self.capture_map.get(&name) {
if old == &bytes {
return;
}
}
}
self.capture_list.push((name.clone(), bytes.clone()));
self.capture_map.insert(name, bytes);
}
}
#[derive(Clone)]
struct ParserState {
grammar: Arc<CGrammar>,
tok_env: TokEnv,
scratch: Scratch,
trie_lexer_stack: usize,
trie_grammar_stack: usize,
captures: Captures,
special_token_marker_token: TokenId,
lexer_stack: Vec<LexerState>,
lexer_stack_top_eos: bool,
lexer_stack_flush_position: usize,
rows: Vec<Row>,
rows_valid_end: usize,
trace_byte_stack: Vec<u8>,
trace_stats0: ParserStats,
trace_start: Instant,
row_infos: Vec<RowInfo>,
token_idx: usize,
bytes: Vec<u8>,
byte_to_token_idx: Vec<u32>,
last_force_bytes_len: usize,
stats: ParserStats,
perf_counters: Arc<ParserPerfCounters>,
limits: ParserLimits,
metrics: ParserMetrics,
max_all_items: usize,
parser_error: Option<String>,
backtrack_byte_count: usize,
bias_cache: Option<BiasCache>,
shared_box: Box<SharedState>,
}
#[derive(Clone)]
struct BiasCache {
lexer_state: StateID,
row_idx: u32,
has_pending_lexeme_bytes: bool,
mask: SimpleVob,
}
#[derive(Clone, Default)]
struct SharedState {
lexer_opt: Option<Lexer>,
}
impl SharedState {
#[inline(always)]
fn lexer_mut(&mut self) -> &mut Lexer {
self.lexer_opt.as_mut().unwrap()
}
#[inline(always)]
fn lexer(&self) -> &Lexer {
self.lexer_opt.as_ref().unwrap()
}
}
#[derive(Clone)]
pub struct Parser {
shared: Arc<Mutex<Box<SharedState>>>,
state: ParserState,
}
impl Scratch {
fn new(grammar: Arc<CGrammar>) -> Self {
Scratch {
push_allowed_lexemes: grammar.lexer_spec().alloc_lexeme_set(),
push_allowed_grammar_ids: grammar.lexer_spec().alloc_grammar_set(),
push_grm_top: GrammarStackPtr::new(0),
push_lexeme_idx: MatchingLexemesIdx::Single(LexemeIdx::new(0)),
parametric: grammar.parametric(),
grammar,
row_start: 0,
row_end: 0,
items: vec![],
item_args: vec![],
grammar_stack: vec![],
definitive: true,
log_override: false,
}
}
fn log_enabled(&self) -> bool {
self.definitive || self.log_override
}
fn new_row(&mut self, pos: usize) {
self.row_start = pos;
self.row_end = pos;
}
fn row_len(&self) -> usize {
self.row_end - self.row_start
}
fn work_row(&self, lexer_start_state: StateID) -> Row {
Row {
first_item: self.row_start as u32,
last_item: self.row_end as u32,
grammar_stack_ptr: self.push_grm_top,
lexer_start_state,
lexeme_idx: self.push_lexeme_idx,
}
}
#[inline(always)]
fn ensure_items(&mut self, n: usize) {
let k = n.saturating_sub(self.items.len());
self.items.reserve(k);
if self.parametric {
self.item_args.reserve(k);
}
}
fn push_grammar_stack(&mut self, node: GrammarStackNode) {
if self.log_enabled() {
debug!("push_grammar_stack: {:?}", node);
}
let ptr = GrammarStackPtr::new(self.grammar_stack.len());
self.grammar_stack.push(node);
self.push_grm_top = ptr;
}
fn just_add_idx(&mut self, item: Item, src_item_idx: usize, info: &str) {
let arg = if self.parametric {
self.item_args[src_item_idx]
} else {
ParamValue::default()
};
self.just_add(item, arg, info)
}
#[inline(always)]
fn just_add(&mut self, item: Item, param: ParamValue, info: &str) {
if self.items.len() == self.row_end {
self.items.push(item);
} else {
self.items[self.row_end] = item;
}
if self.parametric {
if self.item_args.len() == self.row_end {
self.item_args.push(param);
} else {
self.item_args[self.row_end] = param;
}
}
if self.log_enabled() {
debug!(
" addu: {} ({}) ::{}",
self.item_to_string(self.row_end),
info,
param
);
}
self.row_end += 1;
}
#[inline(always)]
fn find_item(&self, item: Item) -> Option<usize> {
self.items[self.row_start..self.row_end]
.iter()
.position(|&x| x == item)
.map(|x| x + self.row_start)
}
#[inline(always)]
fn add_unique(&mut self, item: Item, origin_item_idx: usize, info: &str) {
if self.parametric {
self.add_unique_arg(item, info, self.item_args[origin_item_idx])
} else {
self.add_unique_arg(item, info, ParamValue::default())
}
}
#[inline(always)]
fn add_unique_arg(&mut self, item: Item, info: &str, param: ParamValue) {
if self.parametric {
for idx in self.row_start..self.row_end {
if self.items[idx] == item && self.item_args[idx] == param {
return;
}
}
self.just_add(item, param, info);
} else {
if self.find_item(item).is_none() {
self.just_add(item, param, info);
}
}
}
fn item_to_string(&self, idx: usize) -> String {
item_to_string(
&self.grammar,
&self.items[idx],
self.item_args.get(idx).copied().unwrap_or_default(),
)
}
}
macro_rules! ensure_internal {
($cond:expr, $msg:expr) => {
ensure!($cond, "Internal error: {}", $msg)
};
}
impl ParserState {
fn new(
tok_env: TokEnv,
grammar: Arc<CGrammar>,
mut limits: ParserLimits,
perf_counters: Arc<ParserPerfCounters>,
) -> Result<(Self, Lexer)> {
let start = grammar.start();
let mut lexer = Lexer::from(grammar.lexer_spec(), &mut limits, true)?;
if limits.precompute_large_lexemes {
let t0 = crate::Instant::now();
lexer.dfa.set_fuel(limits.initial_lexer_fuel);
for spec in &grammar.lexer_spec().lexemes {
let w = lexer.dfa.lexeme_weight(spec.idx);
if w > 1000 {
let mut allowed = grammar.lexer_spec().alloc_lexeme_set();
allowed.add(spec.idx);
lexer.precompute_for(tok_env.tok_trie(), &allowed);
}
}
perf_counters.precompute.record(t0.elapsed());
if lexer.dfa.has_error() {
bail!("lexer precomputation failed; either increase limits.initial_lexer_fuel or disable limits.precompute_large_lexemes");
}
}
let scratch = Scratch::new(Arc::clone(&grammar));
let lexer_state = lexer.a_dead_state(); let spec_tok = tok_env
.tok_trie()
.greedy_tokenize(&[TokTrie::SPECIAL_TOKEN_MARKER]);
let special_marker_token = if spec_tok.len() == 1 {
spec_tok[0]
} else {
INVALID_TOKEN
};
let mut r = ParserState {
grammar,
tok_env,
special_token_marker_token: special_marker_token,
trie_lexer_stack: usize::MAX,
rows: vec![],
rows_valid_end: 0,
row_infos: vec![],
captures: Captures::new(),
scratch,
stats: ParserStats::default(),
metrics: ParserMetrics::default(),
trace_stats0: ParserStats::default(),
trace_byte_stack: vec![],
trace_start: Instant::now(),
token_idx: 0,
byte_to_token_idx: vec![],
bytes: vec![],
last_force_bytes_len: usize::MAX,
max_all_items: usize::MAX,
limits,
backtrack_byte_count: 0,
lexer_stack_top_eos: false,
lexer_stack_flush_position: 0,
lexer_stack: vec![LexerState {
row_idx: 0,
lexer_state,
byte: None,
}],
trie_grammar_stack: 0,
parser_error: None,
bias_cache: None,
shared_box: Box::new(SharedState {
lexer_opt: Some(lexer),
}),
perf_counters,
};
r.scratch.grammar_stack.push(GrammarStackNode {
back_ptr: GrammarStackPtr::new(0),
token_horizon: u32::MAX,
grammar_id: LexemeClass::ROOT,
start_item: Item::new(RhsPtr::from_index(0), 0),
start_item_idx: 0,
});
for rule in r.grammar.rules_of(start) {
r.scratch
.add_unique_arg(Item::new(*rule, 0), "init", ParamValue::default());
}
debug!("initial push");
let _ = r.push_row(0, &Lexeme::bogus());
ensure_internal!(
r.num_rows() == 1 && r.rows.len() == 1,
"initial push failed"
);
assert!(r.lexer_stack.len() == 1);
if !r.lexer_spec().allow_initial_skip {
let skip_id = r.lexer_spec().skip_id(LexemeClass::ROOT);
let mut possible = r
.lexer()
.possible_lexemes(r.rows[0].lexer_start_state)
.clone();
possible.remove(skip_id);
let new_state = r.lexer_mut().start_state(&possible);
r.rows[0].lexer_start_state = new_state;
debug!(
"disallowing initial SKIP; {}",
r.allowed_lexemes_dbg(new_state)
);
}
let state = r.rows[0].lexer_start_state;
r.lexer_stack[0].lexer_state = state;
r.assert_definitive();
let lexer = std::mem::take(&mut r.shared_box).lexer_opt.unwrap();
r.stats.lexer_cost = lexer.dfa.total_fuel_spent();
Ok((r, lexer))
}
#[inline(always)]
fn lexer(&self) -> &Lexer {
self.shared_box.lexer()
}
#[inline(always)]
fn lexer_mut(&mut self) -> &mut Lexer {
self.shared_box.lexer_mut()
}
fn with_items_limit<T>(
&mut self,
limit: usize,
lbl: &str,
f: impl FnOnce(&mut Self) -> T,
) -> T {
self.max_all_items = self.stats.all_items + limit;
let r = f(self);
if self.stats.all_items > self.max_all_items && self.parser_error.is_none() {
self.parser_error = Some(format!(
"Too many items (limit {limit}; {lbl}); try avoiding single-byte/short lexemes"
));
}
self.max_all_items = usize::MAX;
r
}
fn compute_bias(&mut self, computer: &dyn BiasComputer, start: &[u8]) -> SimpleVob {
let t0 = Instant::now();
if start.is_empty() {
let curr_state = self.lexer_state();
let has_pending = self.has_pending_lexeme_bytes();
if let Some(ref cache) = self.bias_cache {
if cache.lexer_state == curr_state.lexer_state
&& cache.row_idx == curr_state.row_idx
&& cache.has_pending_lexeme_bytes == has_pending
{
let d = t0.elapsed();
self.stats.compute_time_us += d.as_micros() as u64;
self.perf_counters.compute_bias.record(d);
return cache.mask.clone();
}
}
}
let limits = self.limits.clone();
let dfa = &mut self.lexer_mut().dfa;
dfa.set_fuel(limits.step_lexer_fuel);
dfa.set_max_states(limits.max_lexer_states);
let mut set = self.with_items_limit(limits.step_max_items, "mask", |state| {
let mut r = ParserRecognizer { state };
computer.compute_bias(&mut r, start)
});
self.stats.lexer_cost = self.lexer().dfa.total_fuel_spent();
if self.special_token_marker_token != INVALID_TOKEN {
set.disallow_token(self.special_token_marker_token);
}
if start.is_empty() {
self.run_speculative("token_ranges", |state| {
if state.flush_lexer() {
for spec in state.token_range_lexemes() {
for range in &spec.token_ranges {
set.allow_range(range.clone());
}
}
}
});
}
let eos = computer.trie().eos_token();
if eos != INVALID_TOKEN && start.is_empty() && self.lexer_allows_eos() {
set.allow_token(eos);
}
if start.is_empty() {
let curr_state = self.lexer_state();
self.bias_cache = Some(BiasCache {
lexer_state: curr_state.lexer_state,
row_idx: curr_state.row_idx,
has_pending_lexeme_bytes: self.has_pending_lexeme_bytes(),
mask: set.clone(),
});
}
let d = t0.elapsed();
self.stats.compute_time_us += d.as_micros() as u64;
self.perf_counters.compute_bias.record(d);
set
}
fn after_dots(&self) -> impl Iterator<Item = RhsPtr> + '_ {
self.curr_row()
.item_indices()
.map(|i| self.scratch.items[i].rhs_ptr())
}
fn after_dots_symdata(&self) -> impl Iterator<Item = &CSymbol> + '_ {
self.after_dots().map(|pos| self.grammar.sym_data_dot(pos))
}
fn can_advance_inner(&self) -> bool {
for data in self.after_dots_symdata() {
if data.idx == CSymIdx::NULL {
continue;
}
if data.is_terminal || data.gen_grammar.is_some() {
return true;
}
}
false
}
pub fn can_advance(&self) -> bool {
self.has_pending_lexeme_bytes() || self.can_advance_inner()
}
pub fn has_pending_lexeme_bytes(&self) -> bool {
let row_idx = self.num_rows() - 1;
for back in self.lexer_stack.iter().rev() {
if back.row_idx as usize != row_idx {
break;
}
if back.byte.is_some() {
return true;
}
}
false
}
fn row_is_accepting(&self) -> bool {
for pos in self.after_dots() {
let after_dot = self.grammar.sym_idx_dot(pos);
if after_dot == CSymIdx::NULL {
let lhs = self.grammar.sym_idx_lhs(pos);
if lhs == self.grammar.start() {
return true;
}
}
}
false
}
pub fn lexer_allows_eos(&mut self) -> bool {
if self.has_pending_lexeme_bytes() {
let lexer_state = self.lexer_state().lexer_state;
self.lexer_mut().allows_eos(lexer_state)
} else {
false
}
}
fn item_to_string(&self, idx: usize) -> String {
self.scratch.item_to_string(idx)
}
fn print_row(&self, row_idx: usize) {
let row = &self.rows[row_idx];
println!(
"row {}; lexer_stack={} top_state={:?}",
row_idx,
self.lexer_stack.len(),
self.lexer_stack.last().unwrap().lexer_state
);
println!(
" allowed: {}",
self.allowed_lexemes_dbg(row.lexer_start_state)
);
if row_idx < self.row_infos.len() {
let info = &self.row_infos[row_idx];
if info.lexeme.is_bogus() {
println!(" lexeme: placeholder");
} else {
println!(" lexeme: {}", self.lexer().dbg_lexeme(&info.lexeme));
}
} else {
println!(" speculative");
}
for i in row.item_indices() {
println!(" {}", self.item_to_string(i));
}
}
#[inline(always)]
fn lexer_state(&self) -> LexerState {
self.lexer_stack[self.lexer_stack.len() - 1]
}
#[inline(always)]
pub fn num_rows(&self) -> usize {
self.lexer_state().row_idx as usize + 1
}
#[inline(always)]
fn pop_lexer_states(&mut self, n: usize) {
self.lexer_stack
.truncate(self.lexer_stack.len().saturating_sub(n));
}
#[allow(dead_code)]
pub fn print_stats(&mut self) {
println!("stats: {:?}", self.stats);
self.stats = ParserStats::default();
}
fn assert_definitive_inner(&self) {
assert!(self.scratch.definitive);
assert!(self.backtrack_byte_count == 0);
if self.num_rows() != self.row_infos.len() {
panic!(
"num_rows={} row_infos={}",
self.num_rows(),
self.row_infos.len()
);
}
}
fn assert_definitive(&self) {
self.assert_definitive_inner();
if self.lexer_spec().can_rollback() {
self.check_lexer_bytes_invariant();
}
}
pub fn get_bytes(&self) -> &[u8] {
&self.bytes
}
fn item_lhs(&self, item: &Item) -> CSymIdx {
self.grammar.sym_idx_lhs(item.rhs_ptr())
}
#[allow(dead_code)]
fn item_sym_data(&self, item: &Item) -> &CSymbol {
self.grammar.sym_data(self.item_lhs(item))
}
fn hidden_start(&self, lexer: &mut Lexer) -> usize {
let lexer_state = self.lexer_state().lexer_state;
let hidden_len = lexer.possible_hidden_len(lexer_state);
if hidden_len == 0 {
return usize::MAX;
}
let last_lexeme_visible_len = self.curr_row_bytes().len() - hidden_len;
let prefix_len = self.row_infos[self.num_rows() - 1].start_byte_idx;
prefix_len + last_lexeme_visible_len
}
pub fn temperature(&self) -> Option<f32> {
let mut temp = -1000.0f32;
for data in self.after_dots_symdata() {
if data.is_terminal {
temp = temp.max(data.props.temperature);
}
}
if temp < 0.00000001 {
None
} else {
Some(temp)
}
}
pub fn rollback(&mut self, n_bytes: usize) -> Result<()> {
debug!("rollback: {} bytes", n_bytes);
ensure!(self.parser_error.is_none(), "rollback: parser error");
self.assert_definitive();
ensure!(
n_bytes <= self.byte_to_token_idx.len(),
"rollback: too many bytes {} > {}",
n_bytes,
self.byte_to_token_idx.len()
);
let new_len = self.byte_to_token_idx.len() - n_bytes;
self.byte_to_token_idx.truncate(new_len);
self.bytes.truncate(new_len);
self.lexer_stack.truncate(new_len + 1);
self.row_infos.truncate(self.num_rows());
self.token_idx = *self.byte_to_token_idx.last().unwrap_or(&0) as usize;
self.last_force_bytes_len = usize::MAX;
self.lexer_stack_top_eos = false;
self.rows_valid_end = self.num_rows();
self.assert_definitive();
Ok(())
}
pub fn validate_tokens(&mut self, tokens: &[TokenId]) -> usize {
self.assert_definitive();
self.run_speculative("validate_tokens", |state| {
state.scratch.log_override = true;
let mut applied_idx = state.byte_to_token_idx.len();
let tok_env = state.tok_env.clone();
let trie = tok_env.tok_trie();
let eos = trie.eos_token();
let mut recog = ParserRecognizer { state };
for (tidx, &tok) in tokens.iter().enumerate() {
let state = &mut recog.state;
if tok == eos {
if applied_idx == state.bytes.len() && state.is_accepting_inner() {
return tidx + 1;
} else {
return tidx;
}
}
if applied_idx >= state.bytes.len() {
let saved_parser_state = state.save_state();
if let Some(idx) = state.flush_and_check_numeric(tok) {
let numeric_bytes = trie.decode_as_special(tok);
let ok = state.add_numeric_token(idx, &numeric_bytes);
assert!(ok.is_ok());
continue; }
state.restore_state(saved_parser_state);
}
let token_bytes = trie.decode_raw(&[tok]);
let token_bytes = if applied_idx < state.bytes.len()
&& state.bytes[applied_idx] == TokTrie::SPECIAL_TOKEN_MARKER
{
trie.decode_as_special(tok)
} else {
token_bytes
};
for &b in &token_bytes {
if applied_idx < recog.state.bytes.len() {
if recog.state.bytes[applied_idx] == b {
applied_idx += 1;
} else {
return tidx;
}
} else {
if b != TokTrie::SPECIAL_TOKEN_MARKER && recog.try_push_byte(b) {
continue;
} else {
return tidx;
}
}
}
}
tokens.len() })
}
fn add_numeric_token(&mut self, idx: LexemeIdx, tok_bytes: &[u8]) -> Result<()> {
let lexer_state = self.lexer_state();
for &b in &tok_bytes[0..tok_bytes.len() - 1] {
self.lexer_stack.push(LexerState {
byte: Some(b),
..lexer_state
});
}
if self.scratch.definitive {
self.bytes.extend_from_slice(tok_bytes);
for _ in 0..tok_bytes.len() {
self.byte_to_token_idx
.push(self.token_idx.try_into().unwrap());
}
}
debug_def!(
self,
"add_numeric_token: idx={:?} bytes={:?}",
idx,
tok_bytes
);
let ok = self.advance_parser(PreLexeme::just_idx(MatchingLexemesIdx::Single(idx)));
ensure!(
ok,
"failed to advance parser after adding bytes ignoring lexer"
);
if self.scratch.definitive {
let row_idx = self.num_rows() - 1;
self.row_infos[row_idx].apply_token_idx(self.token_idx);
}
Ok(())
}
fn flush_and_check_numeric(&mut self, tok_id: TokenId) -> Option<LexemeIdx> {
if self.flush_lexer() {
for spec in self.token_range_lexemes() {
if spec.contains_token(tok_id) {
return Some(spec.idx);
}
}
}
None
}
pub fn apply_token(&mut self, tok_bytes: &[u8], tok_id: TokenId) -> Result<usize> {
self.assert_definitive();
item_trace!("apply_token: {:?}", String::from_utf8_lossy(tok_bytes));
let mut check_lexer_max_tokens = false;
let mut row_to_apply = self.num_rows() - 1;
let applied_idx0 = self.byte_to_token_idx.len();
while row_to_apply > 0 {
if self.row_infos[row_to_apply].start_byte_idx <= applied_idx0 {
break;
}
row_to_apply -= 1;
}
if self.tok_env.tok_trie().token(tok_id) == tok_bytes
&& self.byte_to_token_idx.len() == self.bytes.len()
{
let applies = self
.run_speculative("numeric_apply_token", |state| {
state.flush_and_check_numeric(tok_id)
})
.is_some();
if applies {
let row_idx = self.num_rows() - 1;
self.row_infos[row_idx].apply_token_idx(self.token_idx);
self.lexer_stack_flush_position = 0;
let idx = self.flush_and_check_numeric(tok_id).unwrap();
self.add_numeric_token(idx, tok_bytes)?;
if self.lexer_stack_flush_position > 0 {
assert!(self.lexer_stack_flush_position + 1 < self.lexer_stack.len());
self.lexer_stack.remove(self.lexer_stack_flush_position);
}
self.assert_definitive();
return Ok(0);
}
}
for (bidx, &b) in tok_bytes.iter().enumerate() {
check_lexer_max_tokens = false;
let applied_idx = self.byte_to_token_idx.len();
if applied_idx >= self.bytes.len() {
assert!(applied_idx == self.bytes.len());
let row_idx = self.num_rows() - 1;
self.row_infos[row_idx].apply_token_idx(self.token_idx);
let (ok, bt) = self.try_push_byte_definitive(Some(b));
if !ok {
bail!(
"token {:?} doesn't satisfy the grammar; byte {:?} fails parse",
String::from_utf8_lossy(tok_bytes),
b as char,
);
}
if bt > 0 {
self.byte_to_token_idx.truncate(self.bytes.len());
let bt = bt + (tok_bytes.len() - bidx - 1);
return Ok(bt);
}
if row_idx == self.num_rows() - 1 {
check_lexer_max_tokens = true;
}
} else {
if bidx == 0 && self.bytes[applied_idx] == TokTrie::SPECIAL_TOKEN_MARKER {
if let Some(tid) = self.tok_env.tok_trie().token_id_at_bytes(tok_bytes) {
if let Some((len, tid2)) =
parse_numeric_token(&self.bytes[applied_idx + 1..])
{
if tid == tid2 {
let tokidx = self.token_idx.try_into().unwrap();
for _ in 0..len + 1 {
self.byte_to_token_idx.push(tokidx);
}
break;
}
}
}
}
if self.bytes[applied_idx] != b {
bail!(
"token {:?} doesn't satisfy the grammar; forced bytes: got {:?}; applying {:?}",
String::from_utf8_lossy(tok_bytes),
self.bytes[applied_idx] as char,
b as char,
);
}
}
self.byte_to_token_idx
.push(self.token_idx.try_into().unwrap());
}
item_trace!(
"apply_token: ok, {}/{}",
self.byte_to_token_idx.len(),
self.bytes.len()
);
for idx in row_to_apply..self.num_rows() {
if self.row_infos[idx].start_byte_idx >= applied_idx0 {
self.row_infos[idx].set_token_idx(self.token_idx);
} else {
self.row_infos[idx].apply_token_idx(self.token_idx);
}
}
if check_lexer_max_tokens {
let row_idx = self.num_rows() - 1;
let mut pop_classes = HashSet::default();
let mut stack_ptr = self.rows[row_idx].grammar_stack_ptr;
while stack_ptr.as_usize() > 0 {
let grm_top = &self.scratch.grammar_stack[stack_ptr.as_usize()];
if grm_top.token_horizon <= self.token_idx as u32 + 1 {
pop_classes.insert(grm_top.grammar_id);
stack_ptr = grm_top.back_ptr;
} else {
break;
}
}
let info = &self.row_infos[row_idx];
let info_tokens = std::cmp::max(
0,
self.token_idx as isize + 1 - info.token_idx_start as isize,
) as usize;
let lex_state = self.lexer_state().lexer_state;
let mut limit = self.lexer_spec().alloc_lexeme_set();
let mut num_limit = 0;
{
let possible_lexemes = self.lexer().possible_lexemes(lex_state);
for lex in possible_lexemes.iter() {
let lex_spec = self.lexer_spec().lexeme_spec(lex);
let max_tokens = lex_spec.max_tokens();
let class_ok = !pop_classes.contains(&lex_spec.class());
debug!(
" max_tokens: {} max={} info={} class_ok={}",
self.lexer().dbg_lexeme(&Lexeme::single_idx(lex)),
max_tokens,
info_tokens,
class_ok
);
if info_tokens < max_tokens && class_ok {
limit.add(lex);
} else {
num_limit += 1;
}
}
}
if num_limit > 0 {
debug!(
" max_tokens limiting to: {}",
self.lexer_spec().dbg_lexeme_set(&limit)
);
let new_state = self.lexer_mut().limit_state_to(lex_state, &limit);
if new_state.is_dead() {
debug!(" limited everything; forcing EOI");
let (ok, bt) = self.try_push_byte_definitive(None);
assert!(bt == 0);
if !ok {
debug!("parse reject on max_tokens");
return Ok(0);
}
} else {
self.lexer_stack.last_mut().unwrap().lexer_state = new_state;
}
}
}
let item_count = self.curr_row().item_indices().count();
if item_count > self.limits.max_items_in_row {
bail!(
"Current row has {} items; max is {}; consider making your grammar left-recursive if it's right-recursive",
item_count,
self.limits.max_items_in_row,
);
}
if false {
self.print_row(self.num_rows() - 1);
}
self.assert_definitive();
Ok(0)
}
fn token_range_lexemes(&self) -> Vec<&LexemeSpec> {
let state = self.lexer_state().lexer_state;
let possible = self.lexer().possible_lexemes(state);
self.lexer_spec().token_range_lexemes(possible)
}
pub fn needs_force_bytes(&self) -> bool {
self.bytes.len() != self.last_force_bytes_len
}
pub fn force_bytes(&mut self) {
self.assert_definitive();
if !self.needs_force_bytes() {
return;
}
trace!("force_bytes lexer_stack {}", self.lexer_stack.len());
self.with_items_limit(self.limits.step_max_items, "ff_tokens", |s| {
while let Some(b) = s.forced_byte() {
debug!(" forced: {:?} 0x{:x}", b as char, b);
if b == TokTrie::SPECIAL_TOKEN_MARKER {
assert!(!s.has_pending_lexeme_bytes());
let specs = s.token_range_lexemes();
let mut unique_token_id = None;
'spec: for s in specs {
for range in &s.token_ranges {
if range.start() == range.end() {
let t = *range.start();
if unique_token_id.is_none() || unique_token_id == Some(t) {
unique_token_id = Some(t);
} else {
unique_token_id = None;
break 'spec;
}
} else {
unique_token_id = None;
break 'spec;
}
}
}
if let Some(token_id) = unique_token_id {
let mut bytes = format!("X[{token_id}]").into_bytes();
bytes[0] = TokTrie::SPECIAL_TOKEN_MARKER;
let mut all_ok = true;
for b in bytes {
let (ok, bt) = s.try_push_byte_definitive(Some(b));
assert!(bt == 0);
if !ok {
all_ok = false;
break;
}
}
if !all_ok {
debug!(
" force_bytes reject, special token {}, byte = {}",
token_id, b as char
);
break;
} else {
continue;
}
} else {
debug!(" non-determined special token");
break;
}
}
let (ok, bt) = s.try_push_byte_definitive(Some(b));
assert!(bt == 0);
if !ok {
debug!(" force_bytes reject {}", b as char);
break;
}
}
});
self.assert_definitive();
self.last_force_bytes_len = self.bytes.len();
let bytes = &self.bytes[self.byte_to_token_idx.len()..];
trace!(
"force_bytes exit {} lexer_stack={}",
bytes.len(),
self.lexer_stack.len()
);
}
fn special_pre_lexeme(&mut self, state: StateID) -> bool {
let possible = self.lexer().possible_lexemes(state);
let specs = self.lexer_spec().token_range_lexemes(possible);
let bytes = self.curr_row_bytes();
debug!("special_pre_lexeme: {:?}", String::from_utf8_lossy(&bytes));
let bytes = &bytes[2..bytes.len()];
if let Ok(tok_id) = std::str::from_utf8(bytes).unwrap().parse::<u32>() {
let idx = specs.iter().position(|spec| {
spec.token_ranges
.iter()
.any(|range| range.contains(&tok_id))
});
debug!(" >> tok_id={} idx={:?}", tok_id, idx);
if let Some(idx) = idx {
let pre = PreLexeme {
idx: MatchingLexemesIdx::Single(specs[idx].idx),
byte: Some(b']'),
byte_next_row: false,
};
self.advance_parser(pre)
} else {
false
}
} else {
debug!(" >> not a number; should never happen?");
false
}
}
#[inline(always)]
fn advance_lexer_or_parser(&mut self, lex_result: LexerResult, curr: LexerState) -> bool {
match lex_result {
LexerResult::State(next_state, byte) => {
self.lexer_stack.push(LexerState {
row_idx: curr.row_idx,
lexer_state: next_state,
byte: Some(byte),
});
true
}
LexerResult::Error => false,
LexerResult::Lexeme(pre_lexeme) => self.advance_parser(pre_lexeme),
LexerResult::SpecialToken(state) => self.special_pre_lexeme(state),
}
}
fn check_lexer_bytes_invariant(&self) {
let off = if self.lexer_stack_top_eos { 2 } else { 1 };
if self.lexer_stack.len() != self.bytes.len() + off {
panic!(
"lexer_stack={:?} bytes={:?} {}!={}+{off}",
self.lexer_stack,
String::from_utf8_lossy(&self.bytes),
self.lexer_stack.len(),
self.bytes.len()
);
}
}
fn trie_started_inner(&mut self, lbl: &str) {
self.assert_definitive();
self.trie_lexer_stack = self.lexer_stack.len();
self.trie_grammar_stack = self.scratch.grammar_stack.len();
self.scratch.definitive = false;
if ITEM_TRACE {
self.trace_stats0 = self.stats.clone();
self.trace_start = Instant::now();
self.trace_byte_stack.clear();
item_trace!("trie started; {}", lbl);
}
self.rows_valid_end = self.num_rows();
}
fn trie_finished_inner(&mut self) {
assert!(!self.scratch.definitive);
assert!(self.row_infos.len() <= self.num_rows());
assert!(self.scratch.grammar_stack.len() >= self.trie_grammar_stack);
self.scratch.grammar_stack.truncate(self.trie_grammar_stack);
if ITEM_TRACE {
let mut st = self.stats.clone();
st.lexer_cost = self.lexer().dfa.total_fuel_spent();
st = st.delta(&self.trace_stats0);
st.compute_time_us = self.trace_start.elapsed().as_micros() as u64;
item_trace!("trie finished: {}", serde_json::to_string(&st).unwrap());
self.trace_byte_stack.clear();
}
self.pop_lexer_states(self.lexer_stack.len() - self.trie_lexer_stack);
self.scratch.definitive = true;
self.assert_definitive();
self.rows_valid_end = self.num_rows();
self.scratch.log_override = false; self.lexer_stack_flush_position = 0;
}
fn run_speculative<T>(&mut self, lbl: &str, f: impl FnOnce(&mut Self) -> T) -> T {
self.trie_started_inner(lbl);
let r = f(self);
self.trie_finished_inner();
r
}
fn is_accepting_inner(&mut self) -> bool {
self.flush_lexer() && self.row_is_accepting()
}
pub fn is_accepting(&mut self) -> bool {
self.run_speculative("is_accepting", |s| s.is_accepting_inner())
}
fn try_push_byte_definitive(&mut self, byte: Option<u8>) -> (bool, usize) {
assert!(self.scratch.definitive);
let curr = self.lexer_state();
let res = if let Some(b) = byte {
self.stats.definitive_bytes += 1;
self.lexer_mut().advance(curr.lexer_state, b, true)
} else {
let lexeme = self.lexer_mut().force_lexeme_end(curr.lexer_state);
if lexeme.is_error() {
debug!(
" lexer fail on forced end; allowed: {}",
self.allowed_lexemes_dbg(self.rows[curr.row_idx as usize].lexer_start_state)
);
}
lexeme
};
if res.is_error() {
debug!(
" lexer fail; allowed: {}",
self.allowed_lexemes_dbg(self.rows[curr.row_idx as usize].lexer_start_state)
);
}
assert!(self.backtrack_byte_count == 0);
if self.advance_lexer_or_parser(res, curr) {
if let Some(b) = byte {
self.bytes.push(b);
}
let bt = std::mem::take(&mut self.backtrack_byte_count);
if bt > 0 {
assert!(self.lexer_spec().has_stop);
self.last_force_bytes_len = usize::MAX;
self.bytes.truncate(self.bytes.len() - bt);
}
(true, bt)
} else {
(false, 0)
}
}
fn curr_row(&self) -> &Row {
&self.rows[self.lexer_state().row_idx as usize]
}
fn forced_byte(&mut self) -> Option<u8> {
if self.is_accepting() {
debug!(" in accept state, not forcing");
return None;
}
let lex_state = self.lexer_state().lexer_state;
let quick_res = self.lexer_mut().next_byte(lex_state);
if let NextByte::ForcedByte(b) = quick_res {
return Some(b);
}
let slow_res = self.run_speculative("forced_byte", |state| {
let mut r = ParserRecognizer { state };
if let NextByte::SomeBytes2([a, b]) = quick_res {
if r.try_push_byte(a) {
r.pop_bytes(1);
if r.try_push_byte(b) {
r.pop_bytes(1);
return None;
}
}
}
let b0 = quick_res.some_bytes().first().cloned().unwrap_or(b' ');
let mut b = b0;
let mut byte_sym = None;
loop {
if r.try_push_byte(b) {
r.pop_bytes(1);
if byte_sym.is_some() {
return None; } else {
byte_sym = Some(b);
}
}
b = b.wrapping_add(1);
if b == b0 {
break;
}
}
byte_sym
});
slow_res
}
fn save_state(&self) -> SavedParserState {
SavedParserState {
lexer_stack_length: self.lexer_stack.len(),
}
}
fn restore_state(&mut self, state: SavedParserState) {
self.lexer_stack.truncate(state.lexer_stack_length);
}
fn flush_lexer(&mut self) -> bool {
if !self.has_pending_lexeme_bytes() {
return true;
}
let curr = self.lexer_state();
let lex_result = self.lexer_mut().try_lexeme_end(curr.lexer_state);
let prev_len = self.lexer_stack.len();
let r = self.advance_lexer_or_parser(lex_result, curr);
if self.lexer_stack.len() != prev_len {
assert!(self.lexer_stack.len() == prev_len + 1);
assert!(prev_len > 0);
self.lexer_stack_flush_position = prev_len;
}
assert!(self.backtrack_byte_count == 0);
r
}
pub fn scan_eos(&mut self) -> bool {
self.assert_definitive();
let lexer_eos = self.lexer_allows_eos();
debug!(" scan eos: lexer_eos={}", lexer_eos);
let prev_stack = self.lexer_stack.len();
if !self.flush_lexer() {
assert_eq!(self.lexer_stack.len(), prev_stack);
debug!(" flush_lexer() failed");
return false;
}
debug!(" flush_lexer() OK");
if lexer_eos {
return true;
}
if self.lexer_stack.len() != prev_stack {
assert_eq!(self.lexer_stack.len(), prev_stack + 1);
self.lexer_stack_top_eos = true;
}
self.assert_definitive();
false
}
fn scan_skip_lexeme(&mut self, lexeme: &Lexeme) -> bool {
let src = self.curr_row().item_indices();
let n = src.len();
if n == 0 {
return false;
}
self.scratch.ensure_items(src.end + n + 10);
self.scratch.new_row(src.end);
self.scratch.push_lexeme_idx = lexeme.idx;
let mut lex_start = Some(self.rows[self.num_rows() - 1].lexer_start_state);
for i in src {
self.scratch
.just_add_idx(self.scratch.items[i], i, "skip_lexeme");
}
let (grammar_id, max_token_ptr) = self.maybe_pop_grammar_stack(lexeme.idx);
if let Some(ptr) = max_token_ptr {
self.process_max_tokens(ptr, lexeme);
lex_start = None;
}
let push_res = self.just_push_row(grammar_id, lex_start);
assert!(push_res);
true
}
fn scan(&mut self, lexeme: &Lexeme) -> bool {
let set = self.shared_box.lexer().lexemes_from_idx(lexeme.idx);
let lex_spec = self.lexer_spec();
for lx in set.as_slice() {
if lex_spec.lexeme_spec(*lx).is_skip {
return self.scan_skip_lexeme(lexeme);
}
}
let row_idx = self.num_rows() - 1;
let items = self.rows[row_idx].item_indices();
self.scratch.ensure_items(items.end + items.len() + 10);
self.scratch.new_row(items.end);
self.scratch.push_lexeme_idx = lexeme.idx;
debug_def!(
self,
" scan: {} at row={} token={}",
self.lexer().dbg_lexeme(lexeme),
row_idx,
self.token_idx,
);
for i in items {
let item = self.scratch.items[i];
let sym = self.grammar.sym_data_dot(item.rhs_ptr());
if let Some(idx) = sym.lexeme {
if set.contains(idx) {
self.scratch.just_add_idx(item.advance_dot(), i, "scan");
}
}
}
self.push_row(self.num_rows(), lexeme)
}
fn mk_capture(&self, var_name: &str, bytes: &[u8]) -> (String, Vec<u8>) {
debug!(
" capture: {} {:?}",
var_name,
String::from_utf8_lossy(bytes)
);
let bytes = self.tok_env.tok_trie().decode_raw_to_decode(bytes);
(var_name.to_string(), bytes)
}
fn process_one_capture(
&mut self,
lhs: CSymIdx,
curr_idx: usize,
lexeme: &Lexeme,
is_lexeme: bool,
capture_start: usize,
) {
let sym_data = self.grammar.sym_data(lhs);
debug!(
" process_one_capture: {} {}-{} {}",
self.grammar.sym_name(lhs),
capture_start,
curr_idx,
if is_lexeme { "lexeme" } else { "full" }
);
if let Some(var_name) = sym_data.props.stop_capture_name.as_ref() {
let bytes = lexeme.hidden_bytes();
self.captures.push(self.mk_capture(var_name, bytes));
}
if let Some(var_name) = sym_data.props.capture_name.as_ref() {
let mut bytes = Vec::new();
if capture_start < curr_idx {
bytes = self.row_infos[capture_start..curr_idx]
.iter()
.map(|ri| ri.lexeme.upper_visible_bytes(is_lexeme))
.collect::<Vec<_>>()
.concat();
}
if is_lexeme || capture_start < curr_idx {
bytes.extend_from_slice(lexeme.upper_visible_bytes(is_lexeme));
}
self.captures.push(self.mk_capture(var_name, &bytes));
}
}
fn process_captures(&mut self, item: Item, curr_idx: usize, lexeme: &Lexeme, for_lexeme: bool) {
let rule = item.rhs_ptr();
let for_full_rule = self.grammar.sym_idx_dot(rule) == CSymIdx::NULL;
debug!(
" process_captures: for_full_rule={} for_lexeme={}; {:?}",
for_full_rule, for_lexeme, lexeme
);
if for_full_rule {
let lhs = self.grammar.sym_idx_lhs(rule);
self.process_one_capture(lhs, curr_idx, lexeme, false, item.start_pos());
}
if for_lexeme {
let prev_item = item.rewind_dot();
let lex_idx = self.grammar.sym_idx_dot(prev_item.rhs_ptr());
assert!(lex_idx != CSymIdx::NULL);
self.process_one_capture(lex_idx, curr_idx, lexeme, true, curr_idx);
}
}
#[inline(always)]
fn process_agenda(&mut self, curr_idx: usize, lexeme: &Lexeme) {
let mut agenda_ptr = self.scratch.row_start;
self.scratch.push_allowed_lexemes.clear();
self.scratch.push_allowed_grammar_ids.set_all(false);
let lexemes_end = if self.scratch.row_start == 0 {
0
} else {
self.scratch.row_end
};
while agenda_ptr < self.scratch.row_end {
let item_idx = agenda_ptr;
let item = self.scratch.items[agenda_ptr];
agenda_ptr += 1;
debug_def!(self, " agenda: {}", self.item_to_string(item_idx));
let dot_ptr = item.rhs_ptr();
let after_dot = self.grammar.sym_idx_dot(dot_ptr);
if self.scratch.definitive {
let is_lexeme = agenda_ptr <= lexemes_end;
if is_lexeme || after_dot == CSymIdx::NULL {
self.process_captures(item, curr_idx, lexeme, is_lexeme);
}
}
if after_dot == CSymIdx::NULL {
let lhs = self.grammar.sym_idx_lhs(dot_ptr);
if item.start_pos() < curr_idx {
for i in self.rows[item.start_pos()].item_indices() {
let item = self.scratch.items[i];
if self.grammar.sym_idx_dot(item.rhs_ptr()) == lhs {
self.scratch.add_unique(item.advance_dot(), i, "complete");
}
}
}
} else {
let sym_data = self.grammar.sym_data(after_dot);
if let Some(lx) = sym_data.lexeme {
self.scratch
.push_allowed_grammar_ids
.set(sym_data.props.grammar_id.as_usize(), true);
self.scratch.push_allowed_lexemes.add(lx);
}
let mut cond_nullable = false;
if sym_data.gen_grammar.is_some() {
let mut node = self.mk_grammar_stack_node(sym_data, curr_idx);
self.scratch
.add_unique(node.start_item, item_idx, "gen_grammar");
node.start_item_idx = self.scratch.find_item(node.start_item).unwrap();
self.scratch.push_grammar_stack(node);
} else {
if self.scratch.parametric {
let param_here = self.scratch.item_args[item_idx];
let param_dot = self.grammar.param_value_dot(dot_ptr).eval(param_here);
if !sym_data.cond_nullable.is_empty()
&& sym_data.cond_nullable.iter().any(|c| c.eval(param_dot))
{
cond_nullable = true;
}
for ri in 0..sym_data.rules.len() {
if !sym_data.rules_cond[ri].eval(param_dot) {
continue; }
let inner_rule = sym_data.rules[ri];
let new_item = Item::new(inner_rule, curr_idx);
self.scratch
.add_unique_arg(new_item, "predict_parametric", param_dot);
}
} else {
for rule in &sym_data.rules {
let new_item = Item::new(*rule, curr_idx);
self.scratch.add_unique(new_item, item_idx, "predict");
}
}
}
if sym_data.is_nullable || cond_nullable {
self.scratch
.add_unique(item.advance_dot(), item_idx, "null");
if self.scratch.definitive {
if let Some(var_name) = &sym_data.props.capture_name {
debug!(" capture: {} NULL", var_name);
self.captures.push((var_name.clone(), vec![]));
}
}
}
}
}
}
#[inline(always)]
fn just_push_row(&mut self, grammar_id: LexemeClass, lex_start: Option<StateID>) -> bool {
let row_len = self.scratch.row_len();
self.stats.rows += 1;
if row_len == 0 {
false
} else {
self.stats.all_items += row_len;
let lex_start = if let Some(l) = lex_start {
l
} else {
if self
.scratch
.push_allowed_grammar_ids
.get(grammar_id.as_usize())
{
let skip = self.lexer_spec().skip_id(grammar_id);
self.scratch.push_allowed_lexemes.add(skip);
}
self.shared_box
.lexer_mut()
.start_state(&self.scratch.push_allowed_lexemes)
};
debug_def!(
self,
" push row: {} {:?}",
self.allowed_lexemes_dbg(lex_start),
grammar_id
);
let idx = self.num_rows();
let row = self.scratch.work_row(lex_start);
if self.rows.is_empty() || self.rows.len() == idx {
self.rows.push(row);
} else {
self.rows[idx] = row;
}
self.rows_valid_end = idx + 1;
if self.scratch.definitive {
if self.row_infos.len() > idx {
self.row_infos.drain(idx..);
}
let mut start_byte_idx = self.bytes.len();
if start_byte_idx > 0 {
start_byte_idx += 1;
}
self.row_infos.push(RowInfo {
lexeme: Lexeme::bogus(),
token_idx_start: self.token_idx,
token_idx_stop: self.token_idx,
start_byte_idx,
});
}
true
}
}
fn process_max_tokens(&mut self, ptr: GrammarStackPtr, lexeme: &Lexeme) {
debug_def!(self, " process_max_tokens");
let curr_idx = self.num_rows();
let top = &self.scratch.grammar_stack[ptr.as_usize()];
self.scratch.push_grm_top = top.back_ptr;
let item = top.start_item.advance_dot();
self.scratch.row_end = self.scratch.row_start;
self.scratch
.just_add_idx(item, top.start_item_idx, "max_tokens");
self.process_agenda(curr_idx, lexeme);
}
#[inline(always)]
fn push_row(&mut self, curr_idx: usize, lexeme: &Lexeme) -> bool {
let (grammar_id, max_token_ptr) = self.maybe_pop_grammar_stack(lexeme.idx);
self.process_agenda(curr_idx, lexeme);
if let Some(ptr) = max_token_ptr {
assert!(curr_idx == self.num_rows(), "max_tokens on first row");
self.process_max_tokens(ptr, lexeme);
}
self.just_push_row(grammar_id, None)
}
fn mk_grammar_stack_node(&self, sym_data: &CSymbol, curr_idx: usize) -> GrammarStackNode {
assert!(sym_data.rules.len() == 1);
let start_item = Item::new(sym_data.rules[0], curr_idx);
assert!(self.grammar.sym_idx_dot(start_item.advance_dot().rhs_ptr()) == CSymIdx::NULL);
let nested_sym = self.grammar.sym_data_dot(start_item.rhs_ptr());
let token_horizon = sym_data.props.max_tokens.saturating_add(self.token_idx);
GrammarStackNode {
back_ptr: self.scratch.push_grm_top,
token_horizon: token_horizon as u32,
grammar_id: nested_sym.props.grammar_id,
start_item,
start_item_idx: usize::MAX,
}
}
#[inline(always)]
fn maybe_pop_grammar_stack(
&mut self,
lx: MatchingLexemesIdx,
) -> (LexemeClass, Option<GrammarStackPtr>) {
let set = self.lexer().lexemes_from_idx(lx);
let lex_spec = &self.lexer_spec();
let grammar_ids = HashSet::from_iter(
set.as_slice()
.iter()
.map(|&e| lex_spec.lexeme_spec(e).class()),
);
let mut max_token_ptr = None;
let mut grm_stack_top = if !self.rows.is_empty() {
self.rows[self.num_rows() - 1].grammar_stack_ptr
} else {
GrammarStackPtr::new(0)
};
while grm_stack_top.as_usize() > 0 {
let grm_top = &self.scratch.grammar_stack[grm_stack_top.as_usize()];
debug_def!(
self,
" pop grammar_stack: top={:?}, curr={:?}, #{}",
grm_top.grammar_id,
grammar_ids,
self.token_idx
);
if grammar_ids.contains(&grm_top.grammar_id) {
if grm_top.token_horizon <= self.token_idx as u32 {
debug_def!(
self,
" hit token limit horizon={} token_idx={}",
grm_top.token_horizon,
self.token_idx
);
max_token_ptr = Some(grm_stack_top);
}
break;
}
grm_stack_top = grm_top.back_ptr;
}
if grm_stack_top.as_usize() == 0 {
assert!(
grammar_ids.contains(&LexemeClass::ROOT),
"grammar stack empty for non-root grammar: {grammar_ids:?}"
);
}
self.scratch.push_grm_top = grm_stack_top;
let top_id = self.scratch.grammar_stack[grm_stack_top.as_usize()].grammar_id;
(top_id, max_token_ptr)
}
#[inline(always)]
fn curr_row_bytes(&self) -> Vec<u8> {
let mut bytes = vec![];
let row_idx = self.num_rows() - 1;
for back in self.lexer_stack.iter().rev() {
if back.row_idx as usize != row_idx {
break;
}
if let Some(b) = back.byte {
bytes.push(b);
}
}
bytes.reverse();
bytes
}
fn lexer_spec(&self) -> &LexerSpec {
self.grammar.lexer_spec()
}
fn allowed_lexemes_dbg(&self, lex_state: StateID) -> String {
self.lexer_spec()
.dbg_lexeme_set(self.lexer().possible_lexemes(lex_state))
}
#[inline(always)]
fn mk_lexeme(&self, byte: Option<u8>, pre_lexeme: PreLexeme) -> Lexeme {
let mut bytes = self.curr_row_bytes();
if let Some(byte) = byte {
bytes.push(byte);
}
let (hidden, is_suffix) = self.lexer().lexeme_props(pre_lexeme.idx);
Lexeme::new(pre_lexeme.idx, bytes, hidden, is_suffix)
}
fn has_forced_bytes(&self, allowed_lexemes: &LexemeSet, bytes: &[u8]) -> bool {
if allowed_lexemes.is_empty() {
return false;
}
let mut matched_something = false;
for lexeme_idx in allowed_lexemes.iter() {
let lex_spec = &self.lexer_spec().lexemes[lexeme_idx.as_usize()];
if lex_spec.is_skip && matches!(lex_spec.rx, RegexAst::NoMatch) {
continue;
}
if !self.lexer_spec().has_forced_bytes(lex_spec, bytes) {
return false;
}
matched_something = true;
}
matched_something
}
#[inline(always)]
fn lexer_state_for_added_row(
&mut self,
lexeme: Lexeme,
transition_byte: Option<u8>,
) -> LexerState {
let added_row = self.num_rows();
let added_row_start_state = self.rows[added_row].lexer_start_state;
let no_hidden = LexerState {
row_idx: added_row as u32,
lexer_state: self
.shared_box
.lexer_mut()
.transition_start_state(added_row_start_state, transition_byte),
byte: transition_byte,
};
if self.scratch.definitive {
self.row_infos[added_row - 1].lexeme = lexeme;
if transition_byte.is_some() {
let new_start = self.row_infos[added_row - 1]
.start_byte_idx
.saturating_sub(1);
self.row_infos[added_row].start_byte_idx -= new_start;
}
}
debug_def!(
self,
"lex: re-start {:?} (via {:?}); allowed: {}",
no_hidden.lexer_state,
transition_byte.map(|b| b as char),
self.allowed_lexemes_dbg(added_row_start_state)
);
no_hidden
}
#[inline(always)]
fn handle_hidden_bytes(
&mut self,
no_hidden: LexerState,
lexeme_byte: Option<u8>,
pre_lexeme: PreLexeme,
) -> bool {
let added_row_start_state = self.rows[self.num_rows()].lexer_start_state;
let lexeme = self.mk_lexeme(lexeme_byte, pre_lexeme);
let hidden_bytes = lexeme.hidden_bytes();
let trace_here = self.scratch.log_enabled();
if trace_here {
trace!(
" hidden_bytes: {} {:?}",
self.allowed_lexemes_dbg(added_row_start_state),
String::from_utf8_lossy(hidden_bytes)
);
}
if self.has_forced_bytes(
self.lexer().possible_lexemes(added_row_start_state),
hidden_bytes,
) {
if trace_here {
trace!(" hidden forced");
}
let mut lexer_state = added_row_start_state;
self.pop_lexer_states(hidden_bytes.len() - 1);
for idx in 0..hidden_bytes.len() {
let b = hidden_bytes[idx];
match self
.shared_box
.lexer_mut()
.advance(lexer_state, b, trace_here)
{
LexerResult::State(next_state, _) => {
lexer_state = next_state;
}
LexerResult::SpecialToken(_) => panic!("hidden byte resulted in special token"),
LexerResult::Error => panic!("hidden byte failed; {hidden_bytes:?}"),
LexerResult::Lexeme(second_lexeme) => {
if trace_here {
debug!("hidden bytes lexeme: {:?}", second_lexeme);
}
assert!(
idx == hidden_bytes.len() - 1,
"lexeme in the middle of hidden bytes"
);
self.lexer_stack.push(LexerState {
lexer_state,
byte: None,
..no_hidden
});
let r = self.advance_parser(second_lexeme);
if r {
let new_top = self.lexer_stack.pop().unwrap();
*self.lexer_stack.last_mut().unwrap() = new_top;
return true;
} else {
self.lexer_stack.pop();
return false;
}
}
}
self.lexer_stack.push(LexerState {
lexer_state,
byte: Some(b),
..no_hidden
});
}
if self.scratch.definitive {
self.assert_definitive_inner();
}
} else {
if trace_here {
debug!(" hidden not forced");
}
if self.scratch.definitive {
self.lexer_stack.push(LexerState {
lexer_state: added_row_start_state,
byte: None,
..no_hidden
});
self.assert_definitive_inner();
self.backtrack_byte_count = hidden_bytes.len();
} else {
self.lexer_stack.push(LexerState {
lexer_state: self.shared_box.lexer_mut().a_dead_state(),
byte: None,
..no_hidden
});
}
}
true
}
fn lexer_stack_top(&self) -> String {
String::from_utf8_lossy(&self.trace_byte_stack).to_string()
}
#[inline(never)]
fn advance_parser(&mut self, pre_lexeme: PreLexeme) -> bool {
if self.stats.all_items > self.max_all_items {
return false;
}
let transition_byte = if pre_lexeme.byte_next_row {
pre_lexeme.byte
} else {
None
};
let lexeme_byte = if pre_lexeme.byte_next_row {
None
} else {
pre_lexeme.byte
};
let lexeme_idx = pre_lexeme.idx;
let lexeme = if self.scratch.definitive {
self.mk_lexeme(lexeme_byte, pre_lexeme)
} else {
Lexeme::just_idx(lexeme_idx)
};
let scan_res = if !self.scratch.definitive
&& self.num_rows() < self.rows_valid_end
&& self.rows[self.num_rows()].lexeme_idx == lexeme_idx
{
self.stats.cached_rows += 1;
true
} else {
let scan_res = self.scan(&lexeme);
if scan_res && ITEM_TRACE {
let added_row = self.num_rows();
let row = &self.rows[added_row];
item_trace!(
" row: {:?} -> {}",
self.lexer_stack_top(),
row.item_indices().len()
);
if self.stats.all_items > self.max_all_items {
panic!("max items exceeded");
}
}
scan_res
};
if scan_res {
let mut no_hidden = self.lexer_state_for_added_row(lexeme, transition_byte);
let (hidden, is_suffix) = self.lexer().lexeme_props(lexeme_idx);
if hidden > 0 && !is_suffix {
return self.handle_hidden_bytes(no_hidden, lexeme_byte, pre_lexeme);
} else {
if pre_lexeme.byte_next_row && no_hidden.lexer_state.is_dead() {
if self.scratch.definitive {
self.row_infos.drain(no_hidden.row_idx as usize..);
}
return false;
}
if let Some(b) = transition_byte {
let single = self
.lexer_mut()
.check_for_single_byte_lexeme(no_hidden.lexer_state, b);
if let Some(second_lexeme) = single {
debug_def!(self, "single byte lexeme: {:?}", second_lexeme);
no_hidden.byte = None;
self.lexer_stack.push(no_hidden);
assert!(pre_lexeme.byte_next_row);
assert!(!second_lexeme.byte_next_row);
let r = self.advance_parser(second_lexeme);
if r {
let new_top = self.lexer_stack.pop().unwrap();
*self.lexer_stack.last_mut().unwrap() = new_top;
return true;
} else {
self.lexer_stack.pop();
return false;
}
}
}
debug_def!(self, " push normal: {no_hidden:?}");
self.lexer_stack.push(no_hidden);
}
if self.scratch.definitive {
self.assert_definitive_inner();
}
true
} else {
debug_def!(self, " scan failed");
false
}
}
}
pub struct ParserRecognizer<'a> {
state: &'a mut ParserState,
}
impl ParserRecognizer<'_> {
pub fn lexer_mut(&mut self) -> &mut Lexer {
self.state.lexer_mut()
}
pub fn lexer(&self) -> &Lexer {
self.state.lexer()
}
pub fn lexer_state(&self) -> StateID {
self.state.lexer_state().lexer_state
}
pub fn stats_mut(&mut self) -> &mut ParserStats {
&mut self.state.stats
}
pub fn metrics_mut(&mut self) -> &mut ParserMetrics {
&mut self.state.metrics
}
}
pub trait BiasComputer: Send + Sync {
fn compute_bias(&self, rec: &mut ParserRecognizer<'_>, start: &[u8]) -> SimpleVob;
fn trie(&self) -> &TokTrie;
}
impl Recognizer for ParserRecognizer<'_> {
#[inline(always)]
fn pop_bytes(&mut self, num: usize) {
if ITEM_TRACE {
self.state
.trace_byte_stack
.truncate(self.state.trace_byte_stack.len() - num);
}
self.state.pop_lexer_states(num);
}
fn collapse(&mut self) {
}
fn trie_started(&mut self, lbl: &str) {
self.state.trie_started_inner(lbl);
}
fn trie_finished(&mut self) {
self.state.trie_finished_inner();
}
#[inline(always)]
fn try_push_byte(&mut self, byte: u8) -> bool {
let stats = false;
let lexer_logging = false;
let curr = self.state.lexer_state();
let res = self
.state
.lexer_mut()
.advance(curr.lexer_state, byte, lexer_logging);
if ITEM_TRACE {
self.state.trace_byte_stack.push(byte);
}
if stats {
assert!(!self.state.scratch.definitive);
self.state.stats.lexer_ops += 1;
match res {
LexerResult::State(_, _) => {}
LexerResult::Error => self.state.stats.num_lex_errors += 1,
LexerResult::Lexeme(_) | LexerResult::SpecialToken(_) => {
self.state.stats.num_lexemes += 1
}
}
}
let r = self.state.advance_lexer_or_parser(res, curr);
if ITEM_TRACE && !r {
self.state.trace_byte_stack.pop();
}
r
}
fn save_stats(&mut self, nodes_walked: usize) {
self.state.stats.trie_nodes_walked += nodes_walked;
}
}
fn item_to_string(g: &CGrammar, item: &Item, param: ParamValue) -> String {
let mut r = format!(
"{} @{}",
g.rule_to_string(item.rhs_ptr(), &ParamCond::True),
item.start_pos()
);
if !param.is_default() {
r.push_str(&format!(" ::{param}"));
}
r
}
pub enum ParserError {
LexerError(String),
ParserError(String),
}
impl ParserError {
pub fn stop_reason(&self) -> StopReason {
match self {
ParserError::LexerError(_) => StopReason::LexerTooComplex,
ParserError::ParserError(_) => StopReason::ParserTooComplex,
}
}
pub fn message(&self) -> String {
match self {
ParserError::LexerError(s) => format!("lexer error: {s}"),
ParserError::ParserError(s) => format!("parser error: {s}"),
}
}
}
impl Parser {
pub fn new(
tok_env: TokEnv,
grammar: Arc<CGrammar>,
limits: ParserLimits,
perf_counters: Arc<ParserPerfCounters>,
) -> Result<Self> {
let (state, lexer) = ParserState::new(tok_env, grammar, limits, perf_counters)?;
let shared = Arc::new(Mutex::new(Box::new(SharedState {
lexer_opt: Some(lexer),
})));
Ok(Parser { shared, state })
}
pub fn compute_bias(&mut self, computer: &dyn BiasComputer, start: &[u8]) -> SimpleVob {
self.with_shared(|state| state.compute_bias(computer, start))
}
pub fn captures(&self) -> &[(String, Vec<u8>)] {
&self.state.captures.capture_list
}
pub fn get_capture(&self, name: &str) -> Option<&[u8]> {
self.state.captures.capture_map.get(name).map(|v| &v[..])
}
pub fn stats(&self) -> &ParserStats {
&self.state.stats
}
#[inline(always)]
pub fn perf_counters(&self) -> &ParserPerfCounters {
&self.state.perf_counters
}
pub fn metrics_mut(&mut self) -> &mut ParserMetrics {
&mut self.state.metrics
}
pub fn hidden_start(&self) -> usize {
let mut shared = self.shared.lock().unwrap();
self.state.hidden_start(shared.lexer_mut())
}
pub fn lexer_stats(&self) -> LexerStats {
self.shared.lock().unwrap().lexer().dfa.stats()
}
pub fn get_error(&self) -> Option<ParserError> {
let shared = self.shared.lock().unwrap();
if let Some(e) = shared.lexer().dfa.get_error() {
return Some(ParserError::LexerError(e));
}
if let Some(e) = &self.state.parser_error {
return Some(ParserError::ParserError(e.clone()));
}
None
}
pub fn with_recognizer<T>(&mut self, f: impl FnOnce(&mut ParserRecognizer) -> T) -> T {
self.with_shared(|state| {
let mut rec = ParserRecognizer { state };
f(&mut rec)
})
}
pub fn get_bytes(&self) -> &[u8] {
self.state.get_bytes()
}
pub fn force_bytes(&mut self) -> &[u8] {
if !self.state.needs_force_bytes() {
self.currently_forced_bytes()
} else {
let t0 = Instant::now();
let prev_len = self.currently_forced_bytes().len();
self.with_shared(|state| state.force_bytes());
let r = self.currently_forced_bytes();
if r.len() > prev_len {
self.state.perf_counters.force_bytes.record(t0.elapsed());
} else {
self.state
.perf_counters
.force_bytes_empty
.record(t0.elapsed());
}
r
}
}
pub fn scan_eos(&mut self) -> bool {
self.with_shared(|state| state.scan_eos())
}
pub fn grammar_warnings(&mut self) -> Vec<String> {
self.with_shared(|state| state.lexer_spec().render_warnings())
}
pub(crate) fn apply_forced(&mut self, byte_idx: usize) {
self.state.byte_to_token_idx.resize(byte_idx, 0);
}
pub(crate) fn additional_backtrack(&mut self, n_bytes: usize) {
let new_len = self.state.byte_to_token_idx.len().saturating_sub(n_bytes);
self.state.byte_to_token_idx.truncate(new_len);
}
pub fn apply_token(&mut self, tok_bytes: &[u8], tok_id: TokenId) -> Result<usize> {
let r = self.with_shared(|state| state.apply_token(tok_bytes, tok_id));
self.state.token_idx += 1;
r
}
fn with_shared<T>(&mut self, f: impl FnOnce(&mut ParserState) -> T) -> T {
let mut shared = self.shared.lock().unwrap();
self.state.shared_box = std::mem::take(&mut *shared);
let r = f(&mut self.state);
*shared = std::mem::take(&mut self.state.shared_box);
assert!(shared.lexer_opt.is_some());
r
}
pub fn rollback(&mut self, n_bytes: usize) -> Result<()> {
self.state.lexer_spec().check_rollback()?;
self.with_shared(|state| state.rollback(n_bytes))
}
pub fn validate_tokens(&mut self, tokens: &[TokenId]) -> usize {
self.with_shared(|state| {
let r = state.validate_tokens(tokens);
debug!(
"validate_tokens: {} -> {}/{}",
state.tok_env.tok_trie().tokens_dbg(tokens),
r,
tokens.len()
);
r
})
}
pub fn log_row_infos(&mut self, label: &str) {
if cfg!(feature = "logging") && DEBUG {
self.with_shared(|state| {
debug!(
"row infos {}: token_idx: {}; applied bytes: {}/{}",
label,
state.token_idx,
state.byte_to_token_idx.len(),
state.bytes.len()
);
for infos in state.row_infos.iter() {
debug!(" {}", infos.dbg(state.lexer()));
}
})
}
}
pub fn is_accepting(&mut self) -> bool {
self.with_shared(|state| state.is_accepting())
}
pub fn currently_forced_bytes(&self) -> &[u8] {
&self.state.bytes[self.state.byte_to_token_idx.len()..]
}
pub fn has_pending_lexeme_bytes(&self) -> bool {
self.state.has_pending_lexeme_bytes()
}
pub fn grammar(&self) -> &CGrammar {
&self.state.grammar
}
pub fn can_advance(&self) -> bool {
self.state.can_advance()
}
pub fn temperature(&self) -> Option<f32> {
self.state.temperature()
}
pub fn deep_clone(&self) -> Self {
let mut copy = self.clone();
let shared = self.shared.lock().unwrap();
copy.shared = Arc::new(Mutex::new(shared.clone()));
copy
}
pub fn test_trigger_lexer_error(&mut self) -> Result<()> {
self.with_shared(|_state| {
panic!("synthetic error");
})
}
pub fn invalidate_bias_cache(&mut self) {
self.state.bias_cache = None;
}
}