iregex 0.2.0

Intermediate representation for Regular Expressions.
Documentation
use std::{fs, ops::Range};

use iregex::{Alternation, Atom, CompoundAutomaton, Concatenation, IRegEx};
use iregex_automata::{any_char, dot::DotDisplay, nfa::U32StateBuilder, Map, RangeSet, NFA};

#[test]
fn no_matches_anchored() {
	let vectors = [(
		Atom::<_, ()>::star(Atom::Token(['a', 'b', 'c'].into_iter().collect()).into()).into(),
		"abcd",
	)];

	for (root, haystack) in vectors {
		let ire = IRegEx::anchored(root);
		let aut = ire.compile(U32StateBuilder::default()).unwrap();
		let mut matches = aut.matches(haystack.chars());
		assert_eq!(matches.next(), None);
	}
}

#[test]
fn single_match_anchored() {
	let vectors = [
		(Concatenation::<_, ()>::new().into(), ""),
		(Atom::Token(any_char()).into(), "a"),
		(Atom::star(Atom::Token(any_char()).into()).into(), "abcd"),
	];

	for (root, haystack) in vectors {
		let ire = IRegEx::anchored(root);
		let aut = ire.compile(U32StateBuilder::default()).unwrap();
		let mut matches = aut.matches(haystack.chars());
		assert_eq!(matches.next(), Some(0..haystack.len()));
		assert_eq!(matches.next(), None);
	}
}

#[test]
fn single_match_unanchored() {
	let b: RangeSet<char> = ['b'].into_iter().collect();

	let vectors = [
		(Atom::Token(b.clone()).into(), "aba", 1..2),
		(
			[Atom::Token(b.clone()).into(), Atom::Token(b).into()]
				.into_iter()
				.collect::<Concatenation>()
				.into(),
			"abba",
			1..3,
		),
	];

	for (root, haystack, expected) in vectors {
		let ire = IRegEx::unanchored(root);
		let aut = ire.compile(U32StateBuilder::default()).unwrap();
		let mut matches = aut.matches(haystack.chars());
		assert_eq!(matches.next(), Some(expected));
		assert_eq!(matches.next(), None);
	}
}

#[test]
fn many_matches_unanchored() {
	let a = Atom::Token(['a'].into_iter().collect());
	let b = Atom::Token(['b'].into_iter().collect());

	let vectors: [(Alternation, &str, &[Range<usize>]); 3] = [
		(
			Concatenation::new().into(),
			"aaa",
			&[0..0, 1..1, 2..2, 3..3],
		),
		(Atom::Token(any_char()).into(), "aaa", &[0..1, 1..2, 2..3]),
		(
			[Concatenation::from(a), Concatenation::from(b)]
				.into_iter()
				.collect(),
			"abab",
			&[0..1, 1..2, 2..3, 3..4],
		),
	];

	for (i, (root, haystack, expected)) in vectors.into_iter().enumerate() {
		let ire = IRegEx::unanchored(root);
		let aut = ire.compile(U32StateBuilder::default()).unwrap();
		let matches: Vec<_> = aut.matches(haystack.chars()).collect();

		if matches != expected {
			write_compound_automaton(format!("many_matches_unanchored_{i}"), &aut);
		}

		assert_eq!(matches, expected);
	}
}

fn write_compound_automaton(basename: String, aut: &CompoundAutomaton) {
	write_automaton(format!("{basename}_prefix.dot"), &aut.prefix);
	write_automaton(format!("{basename}_root.dot"), &aut.root.get(&()).unwrap());
	write_automaton(
		format!("{basename}_suffix.dot"),
		&aut.suffix.get(&()).unwrap(),
	);
}

fn write_automaton(path: String, aut: &NFA) {
	fs::write(&path, aut.dot().to_string()).unwrap();
}