1use std::borrow::Cow;
2use std::cell::RefCell;
3use std::fmt::Write;
4use std::rc::Rc;
5use std::{cmp, fmt};
6use std::{mem, ops};
7
8use cfg_history::earley::History;
9use cfg_symbol::SymbolName;
10
11use occurence_map::OccurenceMap;
12use rule_builder::RuleBuilder;
13
14use crate::local_prelude::*;
15use cfg_history::{
16 BinarizedRhsRange::*, HistoryNodeBinarize, HistoryNodeEliminateNulling, RootHistoryNode,
17};
18
19#[derive(Clone, Debug)]
21pub struct Cfg {
22 sym_source: SymbolSource,
24 lexemes: SymbolBitSet,
26 rules: Vec<CfgRule>,
28 roots: MaybeSmallVec<Symbol>,
30 wrapped_roots: MaybeSmallVec<WrappedRoot>,
31 rhs_len_invariant: Option<usize>,
32 eliminate_nulling: bool,
33 tmp_stack: RefCell<Vec<Symbol>>,
34}
35
36#[derive(Clone, Debug, Eq, PartialEq)]
38pub struct CfgRule {
39 pub lhs: Symbol,
41 pub rhs: Rc<[Symbol]>,
43 pub history: History,
45}
46
47#[derive(Clone)]
49pub struct NamedCfgRule {
50 pub lhs: Symbol,
52 pub rhs: Rc<[Symbol]>,
54 pub history: Option<History>,
56 pub names: Vec<Option<SymbolName>>,
58}
59
60#[derive(Clone, Copy, Debug)]
61pub struct WrappedRoot {
62 pub start_of_input: Symbol,
63 pub inner_root: Symbol,
64 pub end_of_input: Symbol,
65 pub root: Symbol,
66}
67
68#[derive(Eq, PartialEq, Clone, Copy)]
69pub enum RhsPropertyMode {
70 All,
71 Any,
72}
73
74#[derive(Clone, Copy, Debug)]
75pub struct DotInfo {
76 pub lhs: Symbol,
77 pub predot: Option<Symbol>,
78 pub postdot: Option<Symbol>,
79 pub earley: Option<earley::rule_dot::RuleDot>,
80}
81
82impl Default for Cfg {
83 fn default() -> Self {
84 Self::with_sym_source(SymbolSource::new())
85 }
86}
87
88impl Cfg {
89 #[inline]
91 pub fn new() -> Self {
92 Self::default()
93 }
94
95 pub fn with_sym_source(sym_source: SymbolSource) -> Self {
96 Cfg {
97 sym_source,
98 lexemes: SymbolBitSet::new(),
99 rules: vec![],
100 roots: MaybeSmallVec::new(),
101 wrapped_roots: MaybeSmallVec::new(),
102 rhs_len_invariant: None,
103 eliminate_nulling: false,
104 tmp_stack: RefCell::new(vec![]),
105 }
106 }
107
108 pub fn sym<const N: usize>(&mut self) -> [Symbol; N] {
110 self.sym_source_mut().sym()
111 }
112
113 pub fn next_sym(&mut self, name: Option<Cow<str>>) -> Symbol {
115 self.sym_source_mut().next_sym(name)
116 }
117
118 pub fn lexeme(&mut self, name: Option<Cow<str>>) -> Symbol {
120 self.lexemes.reserve(self.num_syms() + 1);
121 let result = self.sym_source_mut().next_sym(name);
122 self.lexemes.set(result, true);
123 result
124 }
125
126 pub fn sym_at<const N: usize>(at: usize) -> [Symbol; N] {
127 let mut sym_source = SymbolSource::new();
128 for _ in 0..at {
129 sym_source.next_sym(None);
130 }
131 sym_source.sym()
132 }
133
134 pub fn num_syms(&self) -> usize {
136 self.sym_source().num_syms()
137 }
138
139 pub fn set_roots(&mut self, roots: impl AsRef<[Symbol]>) {
140 self.roots = roots.as_ref().iter().copied().collect();
141 }
142
143 pub fn roots(&self) -> &[Symbol] {
144 &self.roots[..]
145 }
146
147 pub fn wrapped_roots(&self) -> &[WrappedRoot] {
148 &self.wrapped_roots[..]
149 }
150
151 pub fn set_wrapped_roots(&mut self, wrapped_roots: &[WrappedRoot]) {
152 self.wrapped_roots = wrapped_roots.into();
153 }
154
155 pub fn has_roots(&self) -> bool {
156 !self.roots.is_empty()
157 }
158
159 pub fn limit_rhs_len(&mut self, limit: Option<usize>) {
168 self.rhs_len_invariant = limit;
169 let mut container = mem::take(&mut self.rules);
170 container.retain(|rule| self.maybe_process_rule(rule));
171 self.rules.extend(container);
172 }
173
174 pub fn rule_rhs_len_allowed_range(&self) -> ops::Range<usize> {
175 self.eliminate_nulling as usize..self.rhs_len_invariant.unwrap_or(usize::MAX)
176 }
177
178 pub fn sort(&mut self) {
180 self.rules.sort_by_key(|rule| (rule.lhs, rule.rhs.clone()));
181 }
182
183 pub fn sort_by(&mut self, compare: impl FnMut(&CfgRule, &CfgRule) -> cmp::Ordering) {
185 self.rules.sort_by(compare);
186 }
187
188 pub fn dedup(&mut self) {
190 self.rules.dedup_by_key(|rule| (rule.lhs, rule.rhs.clone()));
191 }
192
193 pub fn extend(&mut self, other: &Cfg) {
194 self.rules.extend(other.rules.iter().cloned());
195 }
196
197 pub fn binarize_and_eliminate_nulling_rules(&mut self) -> Cfg {
212 self.limit_rhs_len(Some(2));
213
214 let mut result = Cfg::with_sym_source(self.sym_source.clone());
215
216 let mut nullable = self.nulling_symbols();
217 self.rhs_closure_for_all(&mut nullable);
218 if nullable.iter().count() == 0 {
219 return result;
220 }
221
222 let mut rewritten_work = Cfg::new();
223 for rule in self.rules() {
224 let is_nullable = |sym: &Symbol| nullable[*sym];
225 let maybe_which = match (
226 rule.rhs.get(0).map(is_nullable),
227 rule.rhs.get(1).map(is_nullable),
228 ) {
229 (Some(true), Some(true)) => Some(All(2)),
230 (Some(true), None) => Some(All(1)),
231 (None, None) => Some(All(0)),
232 (Some(true), Some(false)) => Some(Right),
233 (Some(false), Some(true)) => Some(Left),
234 _ => None,
235 };
236 if let Some(which) = maybe_which {
237 match which {
238 All(num) => {
239 if num == 2 {
241 let history = HistoryNodeEliminateNulling {
242 prev: rule.history,
243 rhs0: rule.rhs.get(0).cloned(),
244 rhs1: rule.rhs.get(1).cloned(),
245 which,
246 }
247 .into();
248 rewritten_work
249 .rule(rule.lhs)
250 .history(history)
251 .rhs(&rule.rhs[0..1]);
252 rewritten_work
253 .rule(rule.lhs)
254 .history(history)
255 .rhs(&rule.rhs[1..2]);
256 }
257 let history = HistoryNodeEliminateNulling {
258 prev: rule.history,
259 rhs0: rule.rhs.get(0).cloned(),
260 rhs1: rule.rhs.get(1).cloned(),
261 which,
262 }
263 .into();
264 result
265 .rule(rule.lhs)
266 .history(history)
267 .rhs(&rule.rhs[which.as_range()]);
268 }
269 Left | Right => {
270 let history: History = HistoryNodeEliminateNulling {
271 prev: rule.history,
272 rhs0: rule.rhs.get(0).cloned(),
273 rhs1: rule.rhs.get(1).cloned(),
274 which,
275 }
276 .into();
277 println!("{:?}", history.nullable());
278 rewritten_work
279 .rule(rule.lhs)
280 .history(history)
281 .rhs(&rule.rhs[which.as_range()]);
282 }
283 }
284 }
285 }
286
287 self.extend(&rewritten_work);
288
289 self.rules.retain(|rule| rule.rhs.len() != 0);
290
291 let mut productive = SymbolBitSet::new();
292 productive.terminal(&*self);
294 productive.subtract_productive(&result);
295
296 self.rhs_closure_for_all(&mut productive);
297 self.rules.retain(|rule| {
298 productive[rule.lhs]
302 });
303
304 result
305 }
306
307 pub fn rules<'a>(&'a self) -> impl Iterator<Item = &'a CfgRule>
308 where
309 Self: 'a,
310 {
311 self.rules.iter()
312 }
313
314 pub fn column(&self, col: usize) -> impl Iterator<Item = DotInfo> + '_ {
315 let mapper = move |rule: &CfgRule| DotInfo {
316 lhs: rule.lhs,
317 predot: rule.rhs.get(col.wrapping_sub(1)).copied(),
318 postdot: rule.rhs.get(col).copied(),
319 earley: Some(rule.history.dot(col)),
320 };
321 self.rules().map(mapper)
322 }
323
324 pub fn sym_source(&self) -> &SymbolSource {
325 &self.sym_source
326 }
327
328 pub fn sym_source_mut(&mut self) -> &mut SymbolSource {
329 &mut self.sym_source
330 }
331
332 pub fn retain(&mut self, f: impl FnMut(&CfgRule) -> bool) {
333 self.rules.retain(f);
334 }
335
336 fn maybe_process_rule(&mut self, rule: &CfgRule) -> bool {
337 if self.rule_rhs_len_allowed_range().contains(&rule.rhs.len()) {
338 return true;
339 }
340
341 let mut rhs_rev = rule.rhs.to_vec();
351 rhs_rev.reverse();
352 let mut tail = Vec::new();
353 let mut i: u32 = 0;
354 while !rhs_rev.is_empty() {
355 let tail_idx = rhs_rev
356 .len()
357 .saturating_sub(self.rule_rhs_len_allowed_range().end);
358 tail.extend(rhs_rev.drain(tail_idx..));
359 tail.reverse();
360 let lhs;
361 if rhs_rev.is_empty() {
362 lhs = rule.lhs;
363 } else {
364 lhs = self.next_sym(None);
365 rhs_rev.push(lhs);
366 }
367 let history;
368 if i == 0 && rhs_rev.is_empty() {
369 history = rule.history;
370 } else {
371 let history_node_binarize = HistoryNodeBinarize {
372 prev: rule.history,
373 height: i,
374 full_len: rule.rhs.len(),
375 is_top: rhs_rev.is_empty(),
376 };
377 println!("{:?}", rule.rhs);
378 history = history_node_binarize.into();
379 }
380 self.rules.push(CfgRule::new(lhs, &tail[..], history));
381 tail.clear();
382 i += 1;
383 }
384
385 false
386 }
387
388 pub fn add_rule(&mut self, rule: CfgRule) {
389 if self.maybe_process_rule(&rule) {
390 self.rules.push(rule);
391 }
392 }
393
394 pub fn clear_rules(&mut self) {
395 self.rules.clear();
396 }
397
398 pub fn reverse(&mut self) {
400 for rule in &mut self.rules {
401 rule.rhs = rule.rhs.iter().copied().rev().collect();
402 }
403 }
404
405 pub fn rule(&mut self, lhs: Symbol) -> RuleBuilder<'_> {
407 RuleBuilder::new(self).rule(lhs)
408 }
409
410 pub fn precedenced_rule(&mut self, lhs: Symbol) -> PrecedencedRuleBuilder<'_> {
412 PrecedencedRuleBuilder::new(self, lhs)
413 }
414
415 pub fn rhs_closure_for_all(&self, property: &mut SymbolBitSet) {
416 self.rhs_closure(property, RhsPropertyMode::All)
417 }
418
419 pub fn rhs_closure_for_any(&self, property: &mut SymbolBitSet) {
420 self.rhs_closure(property, RhsPropertyMode::Any)
421 }
422
423 pub fn rhs_closure(&self, property: &mut SymbolBitSet, property_mode: RhsPropertyMode) {
424 let mut tmp_stack = self.tmp_stack.borrow_mut();
425 tmp_stack.extend(property.iter());
426
427 let occurence_map = OccurenceMap::from_rules(self.rules());
428
429 while let Some(work_sym) = tmp_stack.pop() {
430 for &rule_id in occurence_map.get(work_sym).rhs() {
431 let rule = &self.rules[rule_id];
432 let mut rhs_iter = rule.rhs.iter();
433 let get_property = |&sym: &Symbol| property[sym];
434 let rhs_satifies_property = match property_mode {
435 RhsPropertyMode::All => rhs_iter.all(get_property),
436 RhsPropertyMode::Any => rhs_iter.any(get_property),
437 };
438 if !property[rule.lhs] && rhs_satifies_property {
439 property.set(rule.lhs, true);
440 tmp_stack.push(rule.lhs);
441 }
442 }
443 }
444
445 tmp_stack.clear();
446 }
447
448 pub fn rhs_closure_with_values(&mut self, value: &mut [Option<u32>]) {
449 let mut tmp_stack = self.tmp_stack.borrow_mut();
450 for (maybe_sym_value, sym) in value.iter().zip(SymbolSource::generate_fresh()) {
451 if maybe_sym_value.is_some() {
452 tmp_stack.push(sym);
453 }
454 }
455
456 let occurence_map = OccurenceMap::from_rules(self.rules());
457
458 while let Some(work_sym) = tmp_stack.pop() {
459 let rules = occurence_map
460 .get(work_sym)
461 .rhs()
462 .iter()
463 .map(|&rule_id| &self.rules[rule_id]);
464 for rule in rules {
465 let maybe_work_value = rule
466 .rhs
467 .iter()
468 .try_fold(0, |acc, elem| value[elem.usize()].map(|val| acc + val));
469 if let Some(work_value) = maybe_work_value {
470 if let Some(current_value) = value[rule.lhs.usize()] {
471 if current_value <= work_value {
472 continue;
473 }
474 }
475 value[rule.lhs.usize()] = Some(work_value);
476 tmp_stack.push(rule.lhs);
477 }
478 }
479 }
480
481 tmp_stack.clear();
482 }
483
484 pub fn wrap_input(&mut self) {
485 self.wrapped_roots.clear();
486 let roots_len = self.roots.len();
487 let roots = mem::replace(&mut self.roots, MaybeSmallVec::with_capacity(roots_len));
488 for inner_root in roots {
489 let [root, start_of_input, end_of_input] = self.sym_source.with_names([
490 Some("root"),
491 Some("start_of_input"),
492 Some("end_of_input"),
493 ]);
494 self.add_rule(CfgRule {
495 lhs: root,
496 rhs: [start_of_input, inner_root, end_of_input].into(),
497 history: RootHistoryNode::Rule { lhs: root }.into(),
498 });
499 self.wrapped_roots.push(WrappedRoot {
500 root,
501 start_of_input,
502 inner_root,
503 end_of_input,
504 });
505 self.roots.push(root);
506 }
507 }
508
509 pub fn is_empty(&self) -> bool {
510 if self.wrapped_roots.is_empty() {
511 self.rules.is_empty()
512 } else {
513 let mut roots = self.roots.clone();
514 roots.sort();
515 self.rules()
516 .all(|rule| roots.binary_search(&rule.lhs).is_ok())
517 }
518 }
519
520 pub fn stringify_to_bnf(&self) -> String {
521 let mut result = String::new();
522 for rule in self.rules() {
523 let stringify_sym = |sym| format!("{}({})", self.sym_source.name_of(sym), sym.usize());
524 let lhs = stringify_sym(rule.lhs);
525 let rhs = if rule.rhs.is_empty() {
526 "()".into()
527 } else {
528 rule.rhs
529 .iter()
530 .copied()
531 .map(stringify_sym)
532 .enumerate()
533 .map(|(i, elem)| {
534 if i == 0 {
535 elem.to_string()
536 } else {
537 format!(" ~ {}", elem)
538 }
539 })
540 .collect::<String>()
541 };
542 writeln!(&mut result, "{} ::= {};", lhs, rhs).expect("writing to String failed");
543 }
544 result
545 }
546}
547
548impl CfgRule {
549 pub fn new(lhs: Symbol, rhs: impl AsRef<[Symbol]>, history: History) -> Self {
551 CfgRule {
552 lhs,
553 rhs: rhs.as_ref().into(),
554 history,
555 }
556 }
557
558 pub fn named(&self, sym_source: &SymbolSource) -> NamedCfgRule {
559 NamedCfgRule {
560 lhs: self.lhs,
561 rhs: self.rhs.clone(),
562 history: Some(self.history),
563 names: sym_source.names(),
564 }
565 }
566}
567
568impl NamedCfgRule {
569 pub fn new(names: Vec<Option<SymbolName>>) -> Self {
570 let mut iter = SymbolSource::generate_fresh();
571 NamedCfgRule {
572 lhs: iter.next().unwrap(),
573 rhs: iter.take(names.len() - 1).collect::<Vec<_>>().into(),
574 history: None,
575 names,
576 }
577 }
578
579 pub fn with_history(names: Vec<Option<SymbolName>>, history: History) -> Self {
580 let mut iter = SymbolSource::generate_fresh();
581 NamedCfgRule {
582 lhs: iter.next().unwrap(),
583 rhs: iter.take(names.len() - 1).collect::<Vec<_>>().into(),
584 history: Some(history),
585 names,
586 }
587 }
588}
589
590#[macro_export]
591macro_rules! named_cfg_rule {
592 ($lhs:ident ::= $($rhs:ident)*) => {
593 {
594 use std::rc::Rc;
595 NamedCfgRule::new(vec![Some(stringify!($lhs).into()), $(Some(stringify!($rhs).into())),*])
596 }
597 };
598}
599
600impl Eq for NamedCfgRule {
601 fn assert_receiver_is_total_eq(&self) {}
602}
603
604impl PartialEq for NamedCfgRule {
605 fn eq(&self, other: &Self) -> bool {
606 self.names[self.lhs.usize()] == other.names[other.lhs.usize()]
607 && self.rhs.len() == other.rhs.len()
608 && self
609 .rhs
610 .iter()
611 .zip(other.rhs.iter())
612 .all(|(sym_a, sym_b)| self.names[sym_a.usize()] == other.names[sym_b.usize()])
613 && (self.history.is_none() || other.history.is_none() || self.history == other.history)
614 }
615}
616
617impl fmt::Debug for NamedCfgRule {
618 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
619 let gensym = &"gensym".to_string();
620 let lhs = self.names[self.lhs.usize()].as_deref().unwrap_or(gensym);
621 let rhs = self
622 .rhs
623 .iter()
624 .map(|sym| self.names[sym.usize()].as_deref().unwrap_or(gensym))
625 .collect::<Vec<_>>();
626 f.debug_struct("NamedCfgRule")
627 .field("lhs", &lhs)
628 .field("rhs", &rhs)
629 .field("history", &self.history)
630 .finish()
631 }
632}