iregex/ir/
atom.rs

1use std::hash::Hash;
2
3use iregex_automata::{
4	nfa::{BuildNFA, StateBuilder, Tags},
5	Class, Map, RangeSet, Token, NFA,
6};
7
8use crate::{Boundary, CaptureTag};
9
10use super::{Alternation, CaptureGroupId, Repeat};
11
12#[derive(Debug, Clone)]
13pub enum Atom<T = char, B = ()> {
14	/// Boundary.
15	Boundary(B),
16
17	/// Token.
18	Token(RangeSet<T>),
19
20	/// Repetition.
21	Repeat(Alternation<T, B>, Repeat),
22
23	/// Capture group.
24	Capture(CaptureGroupId, Alternation<T, B>),
25}
26
27impl<T, B> Atom<T, B> {
28	pub fn alternation(alt: Alternation<T, B>) -> Self {
29		Self::Repeat(alt, Repeat::ONCE)
30	}
31
32	pub fn star(inner: Alternation<T, B>) -> Self {
33		Self::Repeat(inner, Repeat::STAR)
34	}
35}
36
37impl<T, B, Q, C> BuildNFA<T, Q, C, CaptureTag> for Atom<T, B>
38where
39	T: Token,
40	B: Boundary<T, Class = C>,
41	Q: Copy + Ord,
42	C: Clone + Eq + Hash + Class<T>,
43{
44	fn build_nfa_from<S: StateBuilder<T, Q, C>>(
45		&self,
46		state_builder: &mut S,
47		nfa: &mut NFA<Q, T>,
48		tags: &mut Tags<Q, CaptureTag>,
49		class: &B::Class,
50	) -> Result<(Q, C::Map<Q>), S::Error> {
51		match self {
52			Self::Boundary(boundary) => {
53				let a = state_builder.next_state(nfa, class.clone())?;
54				let mut output: C::Map<Q> = Default::default();
55				if let Some(b_class) = boundary.apply(class) {
56					let b = state_builder.next_state(nfa, b_class.clone())?;
57					output.set(b_class, b);
58				}
59				Ok((a, output))
60			}
61			Self::Token(set) => {
62				let a = state_builder.next_state(nfa, class.clone())?;
63				let mut output: C::Map<Q> = Default::default();
64				for (b_class, set) in class.classify(set).into_entries() {
65					let b = state_builder.next_state(nfa, b_class.clone())?;
66					nfa.add(a, Some(set.into_owned()), b);
67					output.set(b_class, b);
68				}
69
70				Ok((a, output))
71			}
72			Self::Repeat(alt, r) => r.build_nfa_for(alt, state_builder, nfa, tags, class),
73			Self::Capture(_, alt) => alt.build_nfa_from(state_builder, nfa, tags, class),
74		}
75	}
76}