use boreal_parser::regex::AssertionKind;
use crate::regex::{visit, Class, Hir, VisitAction, Visitor};
pub struct HirAnalysis {
pub has_start_or_end_line: bool,
pub has_repetitions: bool,
pub has_greedy_repetitions: bool,
pub has_word_boundaries: bool,
pub has_classes: bool,
pub has_alternations: bool,
pub nb_alt_literals: Option<u32>,
}
pub fn analyze_hir(hir: &Hir, dot_all: bool) -> HirAnalysis {
visit(
hir,
HirAnalyser {
dot_all,
has_start_or_end_line: false,
has_repetitions: false,
has_greedy_repetitions: false,
has_word_boundaries: false,
has_classes: false,
has_alternations: false,
nb_alt_literals: Some(1),
alt_stack: Vec::new(),
},
)
}
struct HirAnalyser {
dot_all: bool,
has_start_or_end_line: bool,
has_repetitions: bool,
has_greedy_repetitions: bool,
has_word_boundaries: bool,
has_classes: bool,
has_alternations: bool,
nb_alt_literals: Option<u32>,
alt_stack: Vec<AltStack>,
}
struct AltStack {
prev_nb_alt_literals: Option<u32>,
branches_nb_alt_literals: Option<u32>,
}
impl Visitor for HirAnalyser {
type Output = HirAnalysis;
fn visit_pre(&mut self, hir: &Hir) -> VisitAction {
match hir {
Hir::Mask { negated, .. } => {
if let Some(count) = &mut self.nb_alt_literals {
self.nb_alt_literals = count.checked_mul(if *negated { 256 - 16 } else { 16 });
}
}
Hir::Assertion(kind) => {
match kind {
AssertionKind::StartLine | AssertionKind::EndLine => {
self.has_start_or_end_line = true;
}
AssertionKind::WordBoundary | AssertionKind::NonWordBoundary => {
self.has_word_boundaries = true;
}
}
self.nb_alt_literals = None;
}
Hir::Repetition { greedy, .. } => {
self.has_repetitions = true;
if *greedy {
self.has_greedy_repetitions = true;
}
self.nb_alt_literals = None;
}
Hir::Dot => {
if let Some(count) = &mut self.nb_alt_literals {
self.nb_alt_literals = count.checked_mul(if self.dot_all { 256 } else { 255 });
}
}
Hir::Class(Class { bitmap, .. }) => {
if let Some(count) = &mut self.nb_alt_literals {
self.nb_alt_literals = count.checked_mul(bitmap.count_ones());
}
self.has_classes = true;
}
Hir::Literal(_) | Hir::Empty | Hir::Group(_) | Hir::Concat(_) => (),
Hir::Alternation(_) => {
self.alt_stack.push(AltStack {
prev_nb_alt_literals: self.nb_alt_literals,
branches_nb_alt_literals: Some(0),
});
self.nb_alt_literals = Some(1);
self.has_alternations = true;
}
}
VisitAction::Continue
}
fn visit_alternation_in(&mut self) {
let last_pos = self.alt_stack.len() - 1;
let v = &mut self.alt_stack[last_pos];
match (v.branches_nb_alt_literals, self.nb_alt_literals) {
(Some(bc), Some(c)) => v.branches_nb_alt_literals = bc.checked_add(c),
(Some(_), None) => v.branches_nb_alt_literals = None,
(None, _) => (),
}
self.nb_alt_literals = Some(1);
}
fn visit_post(&mut self, hir: &Hir) {
if let Hir::Alternation(_) = hir {
self.visit_alternation_in();
let stack = self.alt_stack.pop().unwrap();
match (stack.prev_nb_alt_literals, stack.branches_nb_alt_literals) {
(None, _) | (Some(_), None) => self.nb_alt_literals = None,
(Some(c), Some(bc)) => {
self.nb_alt_literals = c.checked_mul(bc);
}
}
}
}
fn finish(self) -> Self::Output {
HirAnalysis {
has_start_or_end_line: self.has_start_or_end_line,
has_repetitions: self.has_repetitions,
has_greedy_repetitions: self.has_greedy_repetitions,
has_word_boundaries: self.has_word_boundaries,
has_classes: self.has_classes,
has_alternations: self.has_alternations,
nb_alt_literals: self.nb_alt_literals,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_helpers::expr_to_hir;
fn analyze_expr(expr: &str, dot_all: bool) -> HirAnalysis {
analyze_hir(&expr_to_hir(expr), dot_all)
}
#[test]
fn test_count_alt_literals() {
#[track_caller]
fn test(expr: &str, dot_all: bool, expected: Option<u32>) {
let res = analyze_expr(expr, dot_all);
assert_eq!(res.nb_alt_literals, expected);
}
test(r"^a", false, None);
test(r"a$", false, None);
test(r"a\b", false, None);
test(r"a\B", false, None);
test(r"a?", false, None);
test(r"a+?", false, None);
test(r"a*", false, None);
test(r"a{2}", false, None);
test(r"a{1,}", false, None);
test(r"a{2,3}", false, None);
test("{ AB [1-] 01 }", false, None);
test("{ AB [-2] 01 }", false, None);
test("{ AB [1-2] 01 }", false, None);
test(r"[a-d_%]", false, Some(6));
test(r"[^ab]", false, Some(254));
test(r"\w", false, Some(63));
test(r".", false, Some(255));
test(r".", true, Some(256));
test(r"a(b)()e", false, Some(1));
test(r"a|f(b|c)|((ab)|)c|d", false, Some(6));
test("{ AB CD 01 }", false, Some(1));
test("{ AB ?D 01 }", false, Some(16));
test("{ D? FE }", false, Some(16));
test("{ AB ( 01 | 23 45) ( 67 | 89 | F0 ) CD }", false, Some(6));
test(
"{ ( 01 | ( 23 | FF ) ( ( 45 | 67 ) | 58 ( AA | BB | CC ) | DD ) ) }",
false,
Some(13),
);
test("{ ( AA | BB ) F? }", false, Some(32));
test("{ AB ?? 01 }", true, Some(256));
test("{ AB [1] 01 }", false, Some(255));
test(
"{ AB (?A | ?B | ?C | ?D | ?E | ?F | ?0) 01 }",
false,
Some(112),
);
test("{ AA ( CC | ?? | BB ) }", true, Some(258));
test("{ AA ~?D BB }", false, Some(240));
test(r"{ AA [1-3] ?A ?B }", false, None);
test(r"a\b(1|2)c", false, None);
test(r"a(\b|2)c", false, None);
test(r"a.b(|)c", false, Some(510));
test("{ AA ~?D BB }", false, Some(240));
}
#[test]
fn test_flags() {
let res = analyze_expr("^a32+", false);
assert!(res.has_start_or_end_line);
assert!(res.has_repetitions);
assert!(res.has_greedy_repetitions);
assert!(!res.has_word_boundaries);
assert!(!res.has_classes);
assert!(!res.has_alternations);
let res = analyze_expr(r"\b[Ww]o(r|R)d\b", false);
assert!(!res.has_start_or_end_line);
assert!(!res.has_repetitions);
assert!(!res.has_greedy_repetitions);
assert!(res.has_word_boundaries);
assert!(res.has_classes);
assert!(res.has_alternations);
let res = analyze_expr(r"{ 51 [-3] ( ?A ?? AF | FA ) }", false);
assert!(!res.has_start_or_end_line);
assert!(res.has_repetitions);
assert!(!res.has_greedy_repetitions);
assert!(!res.has_word_boundaries);
assert!(!res.has_classes);
assert!(res.has_alternations);
let res = analyze_expr(r"\Ba{1,3}?$", false);
assert!(res.has_start_or_end_line);
assert!(res.has_repetitions);
assert!(!res.has_greedy_repetitions);
assert!(res.has_word_boundaries);
assert!(!res.has_classes);
assert!(!res.has_alternations);
}
}