use {
regex_automata::meta::Regex,
regex_syntax::hir::{
self, Hir,
literal::{Literal, Seq},
},
};
use crate::{config::ConfiguredHIR, error::Error};
#[derive(Clone, Debug)]
pub(crate) struct InnerLiterals {
seq: Seq,
}
impl InnerLiterals {
pub(crate) fn new(chir: &ConfiguredHIR, re: &Regex) -> InnerLiterals {
if chir.config().line_terminator.is_none() {
log::trace!(
"skipping inner literal extraction, \
no line terminator is set"
);
return InnerLiterals::none();
}
if re.is_accelerated() {
if !chir.hir().properties().look_set().contains_word_unicode() {
log::trace!(
"skipping inner literal extraction, \
existing regex is believed to already be accelerated",
);
return InnerLiterals::none();
}
}
if chir.hir().properties().is_alternation_literal() {
log::trace!(
"skipping inner literal extraction, \
found alternation of literals, deferring to regex engine",
);
return InnerLiterals::none();
}
let seq = Extractor::new().extract_untagged(chir.hir());
InnerLiterals { seq }
}
pub(crate) fn none() -> InnerLiterals {
InnerLiterals { seq: Seq::infinite() }
}
pub(crate) fn one_regex(&self) -> Result<Option<Regex>, Error> {
let Some(lits) = self.seq.literals() else { return Ok(None) };
if lits.is_empty() {
return Ok(None);
}
let mut alts = vec![];
for lit in lits.iter() {
alts.push(Hir::literal(lit.as_bytes()));
}
let hir = Hir::alternation(alts);
log::debug!("extracted fast line regex: {:?}", hir.to_string());
let re = Regex::builder()
.configure(Regex::config().utf8_empty(false))
.build_from_hir(&hir)
.map_err(Error::regex)?;
Ok(Some(re))
}
}
#[derive(Debug)]
struct Extractor {
limit_class: usize,
limit_repeat: usize,
limit_literal_len: usize,
limit_total: usize,
}
impl Extractor {
fn new() -> Extractor {
Extractor {
limit_class: 10,
limit_repeat: 10,
limit_literal_len: 100,
limit_total: 64,
}
}
fn extract_untagged(&self, hir: &Hir) -> Seq {
let mut seq = self.extract(hir);
log::trace!("extracted inner literals: {:?}", seq.seq);
seq.seq.optimize_for_prefix_by_preference();
log::trace!(
"extracted inner literals after optimization: {:?}",
seq.seq
);
if !seq.is_good() {
log::trace!(
"throwing away inner literals because they might be slow"
);
seq.make_infinite();
}
seq.seq
}
fn extract(&self, hir: &Hir) -> TSeq {
use regex_syntax::hir::HirKind::*;
match *hir.kind() {
Empty | Look(_) => TSeq::singleton(self::Literal::exact(vec![])),
Literal(hir::Literal(ref bytes)) => {
let mut seq =
TSeq::singleton(self::Literal::exact(bytes.to_vec()));
self.enforce_literal_len(&mut seq);
seq
}
Class(hir::Class::Unicode(ref cls)) => {
self.extract_class_unicode(cls)
}
Class(hir::Class::Bytes(ref cls)) => self.extract_class_bytes(cls),
Repetition(ref rep) => self.extract_repetition(rep),
Capture(hir::Capture { ref sub, .. }) => self.extract(sub),
Concat(ref hirs) => self.extract_concat(hirs.iter()),
Alternation(ref hirs) => self.extract_alternation(hirs.iter()),
}
}
fn extract_concat<'a, I: Iterator<Item = &'a Hir>>(&self, it: I) -> TSeq {
let mut seq = TSeq::singleton(self::Literal::exact(vec![]));
let mut prev: Option<TSeq> = None;
for hir in it {
if seq.is_inexact() {
if seq.is_empty() {
return seq;
}
if seq.is_really_good() {
return seq;
}
prev = Some(match prev {
None => seq,
Some(prev) => prev.choose(seq),
});
seq = TSeq::singleton(self::Literal::exact(vec![]));
seq.make_not_prefix();
}
seq = self.cross(seq, self.extract(hir));
}
if let Some(prev) = prev { prev.choose(seq) } else { seq }
}
fn extract_alternation<'a, I: Iterator<Item = &'a Hir>>(
&self,
it: I,
) -> TSeq {
let mut seq = TSeq::empty();
for hir in it {
if !seq.is_finite() {
break;
}
seq = self.union(seq, &mut self.extract(hir));
}
seq
}
fn extract_repetition(&self, rep: &hir::Repetition) -> TSeq {
let mut subseq = self.extract(&rep.sub);
match *rep {
hir::Repetition { min: 0, max, greedy, .. } => {
if max != Some(1) {
subseq.make_inexact();
}
let mut empty = TSeq::singleton(Literal::exact(vec![]));
if !greedy {
std::mem::swap(&mut subseq, &mut empty);
}
self.union(subseq, &mut empty)
}
hir::Repetition { min, max: Some(max), .. } if min == max => {
assert!(min > 0); let limit =
u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
let mut seq = TSeq::singleton(Literal::exact(vec![]));
for _ in 0..std::cmp::min(min, limit) {
if seq.is_inexact() {
break;
}
seq = self.cross(seq, subseq.clone());
}
if usize::try_from(min).is_err() || min > limit {
seq.make_inexact();
}
seq
}
hir::Repetition { min, max: Some(max), .. } if min < max => {
assert!(min > 0); let limit =
u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
let mut seq = TSeq::singleton(Literal::exact(vec![]));
for _ in 0..std::cmp::min(min, limit) {
if seq.is_inexact() {
break;
}
seq = self.cross(seq, subseq.clone());
}
seq.make_inexact();
seq
}
hir::Repetition { .. } => {
subseq.make_inexact();
subseq
}
}
}
fn extract_class_unicode(&self, cls: &hir::ClassUnicode) -> TSeq {
if self.class_over_limit_unicode(cls) {
return TSeq::infinite();
}
let mut seq = TSeq::empty();
for r in cls.iter() {
for ch in r.start()..=r.end() {
seq.push(Literal::from(ch));
}
}
self.enforce_literal_len(&mut seq);
seq
}
fn extract_class_bytes(&self, cls: &hir::ClassBytes) -> TSeq {
if self.class_over_limit_bytes(cls) {
return TSeq::infinite();
}
let mut seq = TSeq::empty();
for r in cls.iter() {
for b in r.start()..=r.end() {
seq.push(Literal::from(b));
}
}
self.enforce_literal_len(&mut seq);
seq
}
fn class_over_limit_unicode(&self, cls: &hir::ClassUnicode) -> bool {
let mut count = 0;
for r in cls.iter() {
if count > self.limit_class {
return true;
}
count += r.len();
}
count > self.limit_class
}
fn class_over_limit_bytes(&self, cls: &hir::ClassBytes) -> bool {
let mut count = 0;
for r in cls.iter() {
if count > self.limit_class {
return true;
}
count += r.len();
}
count > self.limit_class
}
fn cross(&self, mut seq1: TSeq, mut seq2: TSeq) -> TSeq {
if !seq2.prefix {
return seq1.choose(seq2);
}
if seq1
.max_cross_len(&seq2)
.map_or(false, |len| len > self.limit_total)
{
seq2.make_infinite();
}
seq1.cross_forward(&mut seq2);
assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
self.enforce_literal_len(&mut seq1);
seq1
}
fn union(&self, mut seq1: TSeq, seq2: &mut TSeq) -> TSeq {
if seq1.max_union_len(seq2).map_or(false, |len| len > self.limit_total)
{
seq1.keep_first_bytes(4);
seq2.keep_first_bytes(4);
seq1.dedup();
seq2.dedup();
if seq1
.max_union_len(seq2)
.map_or(false, |len| len > self.limit_total)
{
seq2.make_infinite();
}
}
seq1.union(seq2);
assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
seq1.prefix = seq1.prefix && seq2.prefix;
seq1
}
fn enforce_literal_len(&self, seq: &mut TSeq) {
seq.keep_first_bytes(self.limit_literal_len);
}
}
#[derive(Clone, Debug)]
struct TSeq {
seq: Seq,
prefix: bool,
}
#[allow(dead_code)]
impl TSeq {
fn empty() -> TSeq {
TSeq { seq: Seq::empty(), prefix: true }
}
fn infinite() -> TSeq {
TSeq { seq: Seq::infinite(), prefix: true }
}
fn singleton(lit: Literal) -> TSeq {
TSeq { seq: Seq::singleton(lit), prefix: true }
}
fn new<I, B>(it: I) -> TSeq
where
I: IntoIterator<Item = B>,
B: AsRef<[u8]>,
{
TSeq { seq: Seq::new(it), prefix: true }
}
fn literals(&self) -> Option<&[Literal]> {
self.seq.literals()
}
fn push(&mut self, lit: Literal) {
self.seq.push(lit);
}
fn make_inexact(&mut self) {
self.seq.make_inexact();
}
fn make_infinite(&mut self) {
self.seq.make_infinite();
}
fn cross_forward(&mut self, other: &mut TSeq) {
assert!(other.prefix);
self.seq.cross_forward(&mut other.seq);
}
fn union(&mut self, other: &mut TSeq) {
self.seq.union(&mut other.seq);
}
fn dedup(&mut self) {
self.seq.dedup();
}
fn sort(&mut self) {
self.seq.sort();
}
fn keep_first_bytes(&mut self, len: usize) {
self.seq.keep_first_bytes(len);
}
fn is_finite(&self) -> bool {
self.seq.is_finite()
}
fn is_empty(&self) -> bool {
self.seq.is_empty()
}
fn len(&self) -> Option<usize> {
self.seq.len()
}
fn is_exact(&self) -> bool {
self.seq.is_exact()
}
fn is_inexact(&self) -> bool {
self.seq.is_inexact()
}
fn max_union_len(&self, other: &TSeq) -> Option<usize> {
self.seq.max_union_len(&other.seq)
}
fn max_cross_len(&self, other: &TSeq) -> Option<usize> {
assert!(other.prefix);
self.seq.max_cross_len(&other.seq)
}
fn min_literal_len(&self) -> Option<usize> {
self.seq.min_literal_len()
}
fn max_literal_len(&self) -> Option<usize> {
self.seq.max_literal_len()
}
fn make_not_prefix(&mut self) {
self.prefix = false;
}
fn is_good(&self) -> bool {
if self.has_poisonous_literal() {
return false;
}
let Some(min) = self.min_literal_len() else { return false };
let Some(len) = self.len() else { return false };
if min <= 1 {
return len <= 3;
}
min >= 2 && len <= 64
}
fn is_really_good(&self) -> bool {
if self.has_poisonous_literal() {
return false;
}
let Some(min) = self.min_literal_len() else { return false };
let Some(len) = self.len() else { return false };
min >= 3 && len <= 8
}
fn has_poisonous_literal(&self) -> bool {
let Some(lits) = self.literals() else { return false };
lits.iter().any(is_poisonous)
}
fn choose(self, other: TSeq) -> TSeq {
let (mut seq1, mut seq2) = (self, other);
seq1.make_inexact();
seq2.make_inexact();
if !seq1.is_finite() {
return seq2;
} else if !seq2.is_finite() {
return seq1;
}
if seq1.has_poisonous_literal() {
return seq2;
} else if seq2.has_poisonous_literal() {
return seq1;
}
let Some(min1) = seq1.min_literal_len() else { return seq2 };
let Some(min2) = seq2.min_literal_len() else { return seq1 };
if min1 < min2 {
return seq2;
} else if min2 < min1 {
return seq1;
}
let len1 = seq1.len().unwrap();
let len2 = seq2.len().unwrap();
if len1 < len2 {
return seq2;
} else if len2 < len1 {
return seq1;
}
seq1
}
}
impl FromIterator<Literal> for TSeq {
fn from_iter<T: IntoIterator<Item = Literal>>(it: T) -> TSeq {
TSeq { seq: Seq::from_iter(it), prefix: true }
}
}
fn is_poisonous(lit: &Literal) -> bool {
use regex_syntax::hir::literal::rank;
lit.is_empty() || (lit.len() == 1 && rank(lit.as_bytes()[0]) >= 250)
}
#[cfg(test)]
mod tests {
use super::*;
fn e(pattern: impl AsRef<str>) -> Seq {
let pattern = pattern.as_ref();
let hir = regex_syntax::ParserBuilder::new()
.utf8(false)
.build()
.parse(pattern)
.unwrap();
Extractor::new().extract_untagged(&hir)
}
#[allow(non_snake_case)]
fn E(x: &str) -> Literal {
Literal::exact(x.as_bytes())
}
#[allow(non_snake_case)]
fn I(x: &str) -> Literal {
Literal::inexact(x.as_bytes())
}
fn seq<I: IntoIterator<Item = Literal>>(it: I) -> Seq {
Seq::from_iter(it)
}
fn inexact<I>(it: I) -> Seq
where
I: IntoIterator<Item = Literal>,
{
Seq::from_iter(it)
}
fn exact<B: AsRef<[u8]>, I: IntoIterator<Item = B>>(it: I) -> Seq {
Seq::new(it)
}
#[test]
fn various() {
assert_eq!(e(r"foo"), seq([E("foo")]));
assert_eq!(e(r"[a-z]foo[a-z]"), seq([I("foo")]));
assert_eq!(e(r"[a-z](foo)(bar)[a-z]"), seq([I("foobar")]));
assert_eq!(e(r"[a-z]([a-z]foo)(bar[a-z])[a-z]"), seq([I("foo")]));
assert_eq!(e(r"[a-z]([a-z]foo)([a-z]foo)[a-z]"), seq([I("foo")]));
assert_eq!(e(r"(\d{1,3}\.){3}\d{1,3}"), seq([I(".")]));
assert_eq!(e(r"[a-z]([a-z]foo){3}[a-z]"), seq([I("foo")]));
assert_eq!(e(r"[a-z](foo[a-z]){3}[a-z]"), seq([I("foo")]));
assert_eq!(e(r"[a-z]([a-z]foo[a-z]){3}[a-z]"), seq([I("foo")]));
assert_eq!(
e(r"[a-z]([a-z]foo){3}(bar[a-z]){3}[a-z]"),
seq([I("foo")])
);
}
#[test]
fn heuristics() {
assert_eq!(e(r"[a-z]+(ab|cd|ef)[a-z]+hiya[a-z]+"), seq([I("hiya")]));
assert_eq!(
e(r"[a-z]+(abc|def|ghi)[a-z]+hiya[a-z]+"),
seq([I("abc"), I("def"), I("ghi")])
);
}
#[test]
fn literal() {
assert_eq!(exact(["a"]), e("a"));
assert_eq!(exact(["aaaaa"]), e("aaaaa"));
assert_eq!(exact(["A", "a"]), e("(?i-u)a"));
assert_eq!(exact(["AB", "Ab", "aB", "ab"]), e("(?i-u)ab"));
assert_eq!(exact(["abC", "abc"]), e("ab(?i-u)c"));
assert_eq!(Seq::infinite(), e(r"(?-u:\xFF)"));
assert_eq!(exact([b"Z"]), e(r"Z"));
assert_eq!(exact(["☃"]), e("☃"));
assert_eq!(exact(["☃"]), e("(?i)☃"));
assert_eq!(exact(["☃☃☃☃☃"]), e("☃☃☃☃☃"));
assert_eq!(exact(["Δ"]), e("Δ"));
assert_eq!(exact(["δ"]), e("δ"));
assert_eq!(exact(["Δ", "δ"]), e("(?i)Δ"));
assert_eq!(exact(["Δ", "δ"]), e("(?i)δ"));
assert_eq!(exact(["S", "s", "ſ"]), e("(?i)S"));
assert_eq!(exact(["S", "s", "ſ"]), e("(?i)s"));
assert_eq!(exact(["S", "s", "ſ"]), e("(?i)ſ"));
let letters = "ͱͳͷΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋ";
assert_eq!(exact([letters]), e(letters));
}
#[test]
fn class() {
assert_eq!(exact(["a", "b", "c"]), e("[abc]"));
assert_eq!(exact(["a1b", "a2b", "a3b"]), e("a[123]b"));
assert_eq!(exact(["δ", "ε"]), e("[εδ]"));
assert_eq!(exact(["Δ", "Ε", "δ", "ε", "ϵ"]), e(r"(?i)[εδ]"));
}
#[test]
fn look() {
assert_eq!(exact(["ab"]), e(r"a\Ab"));
assert_eq!(exact(["ab"]), e(r"a\zb"));
assert_eq!(exact(["ab"]), e(r"a(?m:^)b"));
assert_eq!(exact(["ab"]), e(r"a(?m:$)b"));
assert_eq!(exact(["ab"]), e(r"a\bb"));
assert_eq!(exact(["ab"]), e(r"a\Bb"));
assert_eq!(exact(["ab"]), e(r"a(?-u:\b)b"));
assert_eq!(exact(["ab"]), e(r"a(?-u:\B)b"));
assert_eq!(exact(["ab"]), e(r"^ab"));
assert_eq!(exact(["ab"]), e(r"$ab"));
assert_eq!(exact(["ab"]), e(r"(?m:^)ab"));
assert_eq!(exact(["ab"]), e(r"(?m:$)ab"));
assert_eq!(exact(["ab"]), e(r"\bab"));
assert_eq!(exact(["ab"]), e(r"\Bab"));
assert_eq!(exact(["ab"]), e(r"(?-u:\b)ab"));
assert_eq!(exact(["ab"]), e(r"(?-u:\B)ab"));
assert_eq!(exact(["ab"]), e(r"ab^"));
assert_eq!(exact(["ab"]), e(r"ab$"));
assert_eq!(exact(["ab"]), e(r"ab(?m:^)"));
assert_eq!(exact(["ab"]), e(r"ab(?m:$)"));
assert_eq!(exact(["ab"]), e(r"ab\b"));
assert_eq!(exact(["ab"]), e(r"ab\B"));
assert_eq!(exact(["ab"]), e(r"ab(?-u:\b)"));
assert_eq!(exact(["ab"]), e(r"ab(?-u:\B)"));
assert_eq!(seq([I("aZ"), E("ab")]), e(r"^aZ*b"));
}
#[test]
fn repetition() {
assert_eq!(Seq::infinite(), e(r"a?"));
assert_eq!(Seq::infinite(), e(r"a??"));
assert_eq!(Seq::infinite(), e(r"a*"));
assert_eq!(Seq::infinite(), e(r"a*?"));
assert_eq!(inexact([I("a")]), e(r"a+"));
assert_eq!(inexact([I("a")]), e(r"(a+)+"));
assert_eq!(exact(["ab"]), e(r"aZ{0}b"));
assert_eq!(exact(["aZb", "ab"]), e(r"aZ?b"));
assert_eq!(exact(["ab", "aZb"]), e(r"aZ??b"));
assert_eq!(inexact([I("aZ"), E("ab")]), e(r"aZ*b"));
assert_eq!(inexact([E("ab"), I("aZ")]), e(r"aZ*?b"));
assert_eq!(inexact([I("aZ")]), e(r"aZ+b"));
assert_eq!(inexact([I("aZ")]), e(r"aZ+?b"));
assert_eq!(exact(["aZZb"]), e(r"aZ{2}b"));
assert_eq!(inexact([I("aZZ")]), e(r"aZ{2,3}b"));
assert_eq!(Seq::infinite(), e(r"(abc)?"));
assert_eq!(Seq::infinite(), e(r"(abc)??"));
assert_eq!(inexact([I("a"), E("b")]), e(r"a*b"));
assert_eq!(inexact([E("b"), I("a")]), e(r"a*?b"));
assert_eq!(inexact([I("ab")]), e(r"ab+"));
assert_eq!(inexact([I("a"), I("b")]), e(r"a*b+"));
assert_eq!(inexact([I("a"), I("b"), E("c")]), e(r"a*b*c"));
assert_eq!(inexact([I("a"), I("b"), E("c")]), e(r"(a+)?(b+)?c"));
assert_eq!(inexact([I("a"), I("b"), E("c")]), e(r"(a+|)(b+|)c"));
assert_eq!(Seq::infinite(), e(r"a*b*c*"));
assert_eq!(inexact([I("a"), I("b"), I("c")]), e(r"a*b*c+"));
assert_eq!(inexact([I("a"), I("b")]), e(r"a*b+c"));
assert_eq!(inexact([I("a"), I("b")]), e(r"a*b+c*"));
assert_eq!(inexact([I("ab"), E("a")]), e(r"ab*"));
assert_eq!(inexact([I("ab"), E("ac")]), e(r"ab*c"));
assert_eq!(inexact([I("ab")]), e(r"ab+"));
assert_eq!(inexact([I("ab")]), e(r"ab+c"));
assert_eq!(inexact([I("z"), E("azb")]), e(r"z*azb"));
let expected =
exact(["aaa", "aab", "aba", "abb", "baa", "bab", "bba", "bbb"]);
assert_eq!(expected, e(r"[ab]{3}"));
let expected = inexact([
I("aaa"),
I("aab"),
I("aba"),
I("abb"),
I("baa"),
I("bab"),
I("bba"),
I("bbb"),
]);
assert_eq!(expected, e(r"[ab]{3,4}"));
}
#[test]
fn concat() {
assert_eq!(exact(["abcxyz"]), e(r"abc()xyz"));
assert_eq!(exact(["abcxyz"]), e(r"(abc)(xyz)"));
assert_eq!(exact(["abcmnoxyz"]), e(r"abc()mno()xyz"));
assert_eq!(Seq::infinite(), e(r"abc[a&&b]xyz"));
assert_eq!(exact(["abcxyz"]), e(r"abc[a&&b]*xyz"));
}
#[test]
fn alternation() {
assert_eq!(exact(["abc", "mno", "xyz"]), e(r"abc|mno|xyz"));
assert_eq!(
inexact([E("abc"), I("mZ"), E("mo"), E("xyz")]),
e(r"abc|mZ*o|xyz")
);
assert_eq!(exact(["abc", "xyz"]), e(r"abc|M[a&&b]N|xyz"));
assert_eq!(exact(["abc", "MN", "xyz"]), e(r"abc|M[a&&b]*N|xyz"));
assert_eq!(exact(["aaa"]), e(r"(?:|aa)aaa"));
assert_eq!(Seq::infinite(), e(r"(?:|aa)(?:aaa)*"));
assert_eq!(Seq::infinite(), e(r"(?:|aa)(?:aaa)*?"));
assert_eq!(Seq::infinite(), e(r"a|b*"));
assert_eq!(inexact([E("a"), I("b")]), e(r"a|b+"));
assert_eq!(inexact([I("a"), E("b"), E("c")]), e(r"a*b|c"));
assert_eq!(Seq::infinite(), e(r"a|(?:b|c*)"));
assert_eq!(inexact([I("a"), I("b"), E("c")]), e(r"(a|b)*c|(a|ab)*c"));
assert_eq!(
exact(["abef", "abgh", "cdef", "cdgh"]),
e(r"(ab|cd)(ef|gh)")
);
assert_eq!(
exact([
"abefij", "abefkl", "abghij", "abghkl", "cdefij", "cdefkl",
"cdghij", "cdghkl",
]),
e(r"(ab|cd)(ef|gh)(ij|kl)")
);
}
#[test]
fn impossible() {
assert_eq!(Seq::infinite(), e(r"[a&&b]"));
assert_eq!(Seq::infinite(), e(r"a[a&&b]"));
assert_eq!(Seq::infinite(), e(r"[a&&b]b"));
assert_eq!(Seq::infinite(), e(r"a[a&&b]b"));
assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]|b"));
assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]|b"));
assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]d|b"));
assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]d|b"));
assert_eq!(Seq::infinite(), e(r"[a&&b]*"));
assert_eq!(exact(["MN"]), e(r"M[a&&b]*N"));
}
#[test]
fn anything() {
assert_eq!(Seq::infinite(), e(r"."));
assert_eq!(Seq::infinite(), e(r"(?s)."));
assert_eq!(Seq::infinite(), e(r"[A-Za-z]"));
assert_eq!(Seq::infinite(), e(r"[A-Z]"));
assert_eq!(Seq::infinite(), e(r"[A-Z]{0}"));
assert_eq!(Seq::infinite(), e(r"[A-Z]?"));
assert_eq!(Seq::infinite(), e(r"[A-Z]*"));
assert_eq!(Seq::infinite(), e(r"[A-Z]+"));
assert_eq!(seq([I("1")]), e(r"1[A-Z]"));
assert_eq!(seq([I("1")]), e(r"1[A-Z]2"));
assert_eq!(seq([I("123")]), e(r"[A-Z]+123"));
assert_eq!(seq([I("123")]), e(r"[A-Z]+123[A-Z]+"));
assert_eq!(Seq::infinite(), e(r"1|[A-Z]|3"));
assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|2[A-Z]|3"),);
assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|[A-Z]2[A-Z]|3"),);
assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|[A-Z]2|3"),);
assert_eq!(seq([E("1"), I("2"), E("4")]), e(r"1|2[A-Z]3|4"),);
assert_eq!(seq([I("2")]), e(r"(?:|1)[A-Z]2"));
assert_eq!(inexact([I("a")]), e(r"a.z"));
}
#[test]
fn empty() {
assert_eq!(Seq::infinite(), e(r""));
assert_eq!(Seq::infinite(), e(r"^"));
assert_eq!(Seq::infinite(), e(r"$"));
assert_eq!(Seq::infinite(), e(r"(?m:^)"));
assert_eq!(Seq::infinite(), e(r"(?m:$)"));
assert_eq!(Seq::infinite(), e(r"\b"));
assert_eq!(Seq::infinite(), e(r"\B"));
assert_eq!(Seq::infinite(), e(r"(?-u:\b)"));
assert_eq!(Seq::infinite(), e(r"(?-u:\B)"));
}
#[test]
fn crazy_repeats() {
assert_eq!(Seq::infinite(), e(r"(?:){4294967295}"));
assert_eq!(Seq::infinite(), e(r"(?:){64}{64}{64}{64}{64}{64}"));
assert_eq!(Seq::infinite(), e(r"x{0}{4294967295}"));
assert_eq!(Seq::infinite(), e(r"(?:|){4294967295}"));
assert_eq!(
Seq::infinite(),
e(r"(?:){8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}")
);
let repa = "a".repeat(100);
assert_eq!(
inexact([I(&repa)]),
e(r"a{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}")
);
}
#[test]
fn optimize() {
let s = e(r"foobarfoobar|foobar|foobarzfoobar|foobarfoobar");
assert_eq!(seq([I("foobar")]), s);
let s = e(r"abba|akka|abccba");
assert_eq!(exact(["abba", "akka", "abccba"]), s);
let s = e(r"sam|samwise");
assert_eq!(seq([E("sam")]), s);
let s = e(r"foobarfoo|foo||foozfoo|foofoo");
assert_eq!(Seq::infinite(), s);
let s = e(r"foobarfoo|foo| |foofoo");
assert_eq!(Seq::infinite(), s);
}
#[test]
fn case_insensitive_alternation() {
let s = e(r"(?i:e.x|ex)");
assert_eq!(s, seq([I("X"), I("x")]));
}
}