use std::{fmt, sync::LazyLock};
use crate::parser::Literal;
use regex_syntax::{
hir::{Dot, Hir, HirKind},
ParserBuilder,
};
static DOT_HIRS: LazyLock<[Hir; 6]> = LazyLock::new(|| {
[
Hir::dot(Dot::AnyChar),
Hir::dot(Dot::AnyByte),
Hir::dot(Dot::AnyByteExceptLF),
Hir::dot(Dot::AnyCharExceptLF),
Hir::dot(Dot::AnyByteExceptCRLF),
Hir::dot(Dot::AnyCharExceptCRLF),
]
});
#[derive(Clone, Debug)]
pub struct Pattern {
is_literal: bool,
source: String,
hir: Hir,
}
impl Pattern {
pub fn compile(
is_literal: bool,
regex: &str,
source: String,
unicode: bool,
ignore_case: bool,
) -> Result<Pattern, String> {
let hir = ParserBuilder::new()
.utf8(false)
.unicode(unicode)
.case_insensitive(ignore_case)
.build()
.parse(regex)
.map_err(|err| format!("{err}"))?;
Ok(Pattern {
is_literal,
source,
hir,
})
}
pub fn compile_lit(source: &Literal) -> Result<Pattern, String> {
let hir = match source {
Literal::Utf8(lit_str) => Hir::literal(lit_str.value().as_bytes()),
Literal::Bytes(lit_byte_str) => Hir::literal(lit_byte_str.value()),
};
Ok(Pattern {
is_literal: true,
source: source.token().to_string(),
hir,
})
}
pub fn priority(&self) -> usize {
Self::complexity(&self.hir)
}
fn complexity(hir: &Hir) -> usize {
match hir.kind() {
HirKind::Empty => 0,
HirKind::Literal(lit) => match std::str::from_utf8(&lit.0) {
Ok(s) => 2 * s.chars().count(),
Err(_) => 2 * lit.0.len(),
},
HirKind::Class(_) => 2,
HirKind::Look(_) => 0,
HirKind::Repetition(repetition) => {
repetition.min as usize * Self::complexity(&repetition.sub)
}
HirKind::Capture(capture) => Self::complexity(&capture.sub),
HirKind::Concat(hirs) => hirs.iter().map(Self::complexity).sum(),
HirKind::Alternation(hirs) => hirs.iter().map(Self::complexity).min().unwrap_or(0),
}
}
pub fn check_for_greedy_all(&self) -> bool {
Self::has_greedy_all(&self.hir)
}
fn has_greedy_all(hir: &Hir) -> bool {
match hir.kind() {
HirKind::Repetition(repetition) => {
let is_dot = DOT_HIRS.contains(&repetition.sub);
let is_unbounded = repetition.max.is_none();
let is_greedy = repetition.greedy;
is_dot && is_unbounded && is_greedy
}
HirKind::Empty => false,
HirKind::Literal(_literal) => false,
HirKind::Class(_class) => false,
HirKind::Look(_look) => false,
HirKind::Capture(capture) => Self::has_greedy_all(&capture.sub),
HirKind::Concat(hirs) => hirs.iter().any(Self::has_greedy_all),
HirKind::Alternation(hirs) => hirs.iter().any(Self::has_greedy_all),
}
}
pub fn hir(&self) -> &Hir {
&self.hir
}
pub fn source(&self) -> &str {
&self.source
}
}
impl fmt::Display for Pattern {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_literal {
write!(f, "#[token({})]", self.source)
} else {
write!(f, "#[regex({})]", self.source)
}
}
}