#![allow(clippy::match_same_arms, clippy::too_many_lines)]
use crate::types::FuzzyLimits;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct MatchFlags {
pub best_match: bool,
pub enhance_match: bool,
pub posix: bool,
pub verbose: bool,
pub dot_all: bool,
pub multi_line: bool,
pub ungreedy: bool,
pub case_insensitive: bool,
pub global: bool,
pub unicode: bool,
}
impl MatchFlags {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_best_match(mut self) -> Self {
self.best_match = true;
self
}
#[must_use]
pub fn with_enhance_match(mut self) -> Self {
self.enhance_match = true;
self
}
#[must_use]
pub fn with_posix(mut self) -> Self {
self.posix = true;
self
}
}
#[derive(Debug, Clone, PartialEq, Default)]
pub enum Fuzziness {
Edits(u8),
Detailed(FuzzyLimits),
MrabStyle(MrabFuzziness),
#[default]
Inherited,
Exact,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct MrabFuzziness {
pub max_insertions: Option<u8>,
pub max_deletions: Option<u8>,
pub max_substitutions: Option<u8>,
pub max_transpositions: Option<u8>,
pub max_errors: Option<u8>,
pub min_errors: Option<u8>,
pub unlimited_insertions: bool,
pub unlimited_deletions: bool,
pub unlimited_substitutions: bool,
pub unlimited_transpositions: bool,
pub unlimited_errors: bool,
pub insertion_cost: Option<u8>,
pub deletion_cost: Option<u8>,
pub substitution_cost: Option<u8>,
pub transposition_cost: Option<u8>,
pub max_cost: Option<u8>,
pub substitution_chars: Option<CharClass>,
pub insertion_chars: Option<CharClass>,
pub deletion_chars: Option<CharClass>,
}
impl MrabFuzziness {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn insertions(mut self, max: u8) -> Self {
self.max_insertions = Some(max);
self
}
#[must_use]
pub fn deletions(mut self, max: u8) -> Self {
self.max_deletions = Some(max);
self
}
#[must_use]
pub fn substitutions(mut self, max: u8) -> Self {
self.max_substitutions = Some(max);
self
}
#[must_use]
pub fn errors(mut self, max: u8) -> Self {
self.max_errors = Some(max);
self
}
#[must_use]
pub fn error_range(mut self, min: u8, max: u8) -> Self {
self.min_errors = Some(min);
self.max_errors = Some(max);
self
}
#[must_use]
pub fn to_limits(&self) -> FuzzyLimits {
const UNLIMITED: u8 = 255;
let mut limits = FuzzyLimits::new();
if let Some(i) = self.max_insertions {
limits = limits.insertions(i);
} else if self.unlimited_insertions {
limits = limits.insertions(UNLIMITED);
}
if let Some(d) = self.max_deletions {
limits = limits.deletions(d);
} else if self.unlimited_deletions {
limits = limits.deletions(UNLIMITED);
}
if let Some(s) = self.max_substitutions {
limits = limits.substitutions(s);
} else if self.unlimited_substitutions {
limits = limits.substitutions(UNLIMITED);
}
if let Some(t) = self.max_transpositions {
limits = limits.swaps(t);
} else if self.unlimited_transpositions {
limits = limits.swaps(UNLIMITED);
}
if let Some(e) = self.max_errors {
limits = limits.edits(e);
} else if self.unlimited_errors {
limits = limits.edits(UNLIMITED);
} else if let Some(max_cost) = self.max_cost {
let min_cost = [
self.insertion_cost.unwrap_or(1),
self.deletion_cost.unwrap_or(1),
self.substitution_cost.unwrap_or(1),
self.transposition_cost.unwrap_or(1),
]
.into_iter()
.filter(|&c| c > 0)
.min()
.unwrap_or(1);
let actual_max_cost = max_cost.saturating_sub(1);
let inferred_max_edits = actual_max_cost / min_cost;
limits = limits.edits(inferred_max_edits);
}
limits
}
#[must_use]
pub fn has_unlimited(&self) -> bool {
self.unlimited_insertions
|| self.unlimited_deletions
|| self.unlimited_substitutions
|| self.unlimited_transpositions
|| self.unlimited_errors
}
}
impl Fuzziness {
#[must_use]
pub fn to_limits(&self, default_edits: u8) -> Option<FuzzyLimits> {
match self {
Fuzziness::Exact => Some(FuzzyLimits::new().edits(0)),
Fuzziness::Edits(n) => Some(FuzzyLimits::new().edits(*n)),
Fuzziness::Detailed(limits) => Some(limits.clone()),
Fuzziness::MrabStyle(mrab) => Some(mrab.to_limits()),
Fuzziness::Inherited => {
if default_edits > 0 {
Some(FuzzyLimits::new().edits(default_edits))
} else {
None
}
}
}
}
#[must_use]
pub fn min_edits(&self) -> Option<u8> {
match self {
Fuzziness::MrabStyle(mrab) => mrab.min_errors,
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Ast {
Empty,
Literal {
text: String,
fuzziness: Fuzziness,
},
Char(char),
CharClass(CharClass),
Concat(Vec<Ast>),
Alternation(Vec<Ast>),
Quantified {
expr: Box<Ast>,
quantifier: Quantifier,
greedy: bool,
},
Group {
index: usize,
name: Option<String>,
expr: Box<Ast>,
},
NonCapturingGroup {
expr: Box<Ast>,
fuzziness: Fuzziness,
},
Anchor(Anchor),
Lookahead {
positive: bool,
expr: Box<Ast>,
},
Lookbehind {
positive: bool,
expr: Box<Ast>,
},
Backreference {
group: usize,
fuzziness: Fuzziness,
},
NamedList {
name: String,
},
ResetMatchStart,
AtomicGroup {
expr: Box<Ast>,
},
RecursivePattern,
RecursiveGroup {
group: usize,
},
RecursiveNamedGroup {
name: String,
},
}
impl Ast {
pub fn literal(text: impl Into<String>) -> Self {
Ast::Literal {
text: text.into(),
fuzziness: Fuzziness::Inherited,
}
}
pub fn literal_fuzzy(text: impl Into<String>, fuzziness: Fuzziness) -> Self {
Ast::Literal {
text: text.into(),
fuzziness,
}
}
#[must_use]
pub fn quantified(expr: Ast, quantifier: Quantifier, greedy: bool) -> Self {
Ast::Quantified {
expr: Box::new(expr),
quantifier,
greedy,
}
}
#[must_use]
pub fn group(index: usize, expr: Ast) -> Self {
Ast::Group {
index,
name: None,
expr: Box::new(expr),
}
}
pub fn named_group(index: usize, name: impl Into<String>, expr: Ast) -> Self {
Ast::Group {
index,
name: Some(name.into()),
expr: Box::new(expr),
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
matches!(self, Ast::Empty)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct CharClass {
pub negated: bool,
pub items: Vec<CharClassItem>,
}
impl CharClass {
#[must_use]
pub fn new(negated: bool, items: Vec<CharClassItem>) -> Self {
CharClass { negated, items }
}
#[must_use]
pub fn any() -> Self {
CharClass {
negated: false,
items: vec![CharClassItem::Named(NamedClass::AnyExceptNewline)],
}
}
#[must_use]
pub fn any_with_newlines() -> Self {
CharClass {
negated: false,
items: vec![CharClassItem::Named(NamedClass::Any)],
}
}
#[must_use]
pub fn digit() -> Self {
CharClass {
negated: false,
items: vec![CharClassItem::Named(NamedClass::Digit)],
}
}
#[must_use]
pub fn word() -> Self {
CharClass {
negated: false,
items: vec![CharClassItem::Named(NamedClass::Word)],
}
}
#[must_use]
pub fn whitespace() -> Self {
CharClass {
negated: false,
items: vec![CharClassItem::Named(NamedClass::Whitespace)],
}
}
#[must_use]
pub fn matches(&self, ch: char) -> bool {
let in_class = self.items.iter().any(|item| item.matches(ch));
if self.negated { !in_class } else { in_class }
}
#[must_use]
pub fn matches_unicode(&self, ch: char) -> bool {
let in_class = self
.items
.iter()
.any(|item| item.matches_with_unicode(ch, true));
if self.negated { !in_class } else { in_class }
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum CharClassItem {
Single(char),
Range(char, char),
Named(NamedClass),
}
impl CharClassItem {
#[must_use]
pub fn matches(&self, ch: char) -> bool {
match self {
CharClassItem::Single(c) => *c == ch,
CharClassItem::Range(start, end) => ch >= *start && ch <= *end,
CharClassItem::Named(class) => class.matches(ch),
}
}
#[must_use]
pub fn matches_with_unicode(&self, ch: char, unicode: bool) -> bool {
match self {
CharClassItem::Single(c) => *c == ch,
CharClassItem::Range(start, end) => ch >= *start && ch <= *end,
CharClassItem::Named(class) => class.matches_with_unicode(ch, unicode),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NamedClass {
Digit,
NotDigit,
Word,
NotWord,
Whitespace,
NotWhitespace,
Any,
AnyExceptNewline,
}
impl NamedClass {
#[must_use]
pub fn matches(&self, ch: char) -> bool {
self.matches_with_unicode(ch, false)
}
#[must_use]
pub fn matches_with_unicode(&self, ch: char, unicode: bool) -> bool {
match self {
NamedClass::Digit => {
if unicode {
ch.is_ascii_digit() || unicode_digit(ch)
} else {
ch.is_ascii_digit()
}
}
NamedClass::NotDigit => {
if unicode {
!(ch.is_ascii_digit() || unicode_digit(ch))
} else {
!ch.is_ascii_digit()
}
}
NamedClass::Word => {
if unicode {
ch.is_alphanumeric() || ch == '_' || unicode_word_char(ch)
} else {
ch.is_ascii_alphanumeric() || ch == '_'
}
}
NamedClass::NotWord => {
if unicode {
!(ch.is_alphanumeric() || ch == '_' || unicode_word_char(ch))
} else {
!(ch.is_ascii_alphanumeric() || ch == '_')
}
}
NamedClass::Whitespace => {
if unicode {
ch.is_whitespace() || unicode_whitespace(ch)
} else {
ch.is_ascii_whitespace()
}
}
NamedClass::NotWhitespace => {
if unicode {
!(ch.is_whitespace() || unicode_whitespace(ch))
} else {
!ch.is_ascii_whitespace()
}
}
NamedClass::Any => true,
NamedClass::AnyExceptNewline => ch != '\n' && ch != '\r',
}
}
}
fn unicode_digit(ch: char) -> bool {
matches!(ch,
'\u{0660}'..='\u{0669}' | '\u{06F0}'..='\u{06F9}' | '\u{0966}'..='\u{096F}' | '\u{0E50}'..='\u{0E59}' | '\u{FF10}'..='\u{FF19}' | '\u{104A0}'..='\u{104D9}' | '\u{1D7CE}'..='\u{1D7FF}' )
}
fn unicode_word_char(ch: char) -> bool {
matches!(ch,
'\u{00C0}'..='\u{024F}' | '\u{0250}'..='\u{02AF}' | '\u{02B0}'..='\u{02FF}' | '\u{0300}'..='\u{036F}' | '\u{0370}'..='\u{03FF}' | '\u{0400}'..='\u{04FF}' | '\u{0500}'..='\u{052F}' | '\u{0530}'..='\u{058F}' | '\u{0590}'..='\u{05FF}' | '\u{0600}'..='\u{06FF}' | '\u{0900}'..='\u{097F}' | '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{F900}'..='\u{FAFF}' | '\u{2000}'..='\u{206F}' | '\u{2070}'..='\u{209F}' | '\u{20A0}'..='\u{20CF}' | '\u{2100}'..='\u{214F}' | '\u{2150}'..='\u{218F}' | '\u{2190}'..='\u{21FF}' | '\u{2200}'..='\u{22FF}' | '\u{2300}'..='\u{23FF}' | '\u{2460}'..='\u{24FF}' | '\u{2500}'..='\u{257F}' | '\u{2580}'..='\u{259F}' | '\u{25A0}'..='\u{25FF}' | '\u{2600}'..='\u{26FF}' | '\u{2700}'..='\u{27BF}' | '\u{FB00}'..='\u{FB4F}' | '\u{FB50}'..='\u{FDFF}' | '\u{FE70}'..='\u{FEFF}' | '\u{FF00}'..='\u{FFEF}' | '\u{1F600}'..='\u{1F64F}' | '\u{1F300}'..='\u{1F5FF}' | '\u{1F680}'..='\u{1F6FF}' | '\u{1F900}'..='\u{1F9FF}' | '\u{1FA00}'..='\u{1FA6F}' | '\u{1FA70}'..='\u{1FAFF}' | '\u{1F170}'..='\u{1F19A}' | '\u{00B5}' )
}
fn unicode_whitespace(ch: char) -> bool {
matches!(
ch,
'\u{0085}' | '\u{00A0}' | '\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{2028}' | '\u{2029}' | '\u{202F}' | '\u{205F}' | '\u{3000}' )
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Quantifier {
ZeroOrMore,
OneOrMore,
ZeroOrOne,
Exactly(usize),
AtLeast(usize),
Between(usize, usize),
}
impl Quantifier {
#[must_use]
pub fn min(&self) -> usize {
match self {
Quantifier::ZeroOrMore | Quantifier::ZeroOrOne => 0,
Quantifier::OneOrMore => 1,
Quantifier::Exactly(n) | Quantifier::AtLeast(n) | Quantifier::Between(n, _) => *n,
}
}
#[must_use]
pub fn max(&self) -> Option<usize> {
match self {
Quantifier::ZeroOrMore | Quantifier::OneOrMore | Quantifier::AtLeast(_) => None,
Quantifier::ZeroOrOne => Some(1),
Quantifier::Exactly(n) => Some(*n),
Quantifier::Between(_, m) => Some(*m),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Anchor {
Start,
End,
WordBoundary,
NotWordBoundary,
}