iregex/ir/
concatenation.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
use iregex_automata::{
	nfa::{BuildNFA, StateBuilder, Tags},
	Class, Map, Token, NFA,
};
use std::{hash::Hash, ops::Deref};

use crate::{Boundary, CaptureTag};

use super::Atom;

/// Regular expression atom sequence.
#[derive(Debug, Clone)]
pub struct Concatenation<T = char, B = ()>(Vec<Atom<T, B>>);

impl<T, B> Default for Concatenation<T, B> {
	fn default() -> Self {
		Self(Vec::new())
	}
}

impl<T, B> Concatenation<T, B> {
	pub fn new() -> Self {
		Self::default()
	}

	pub fn push(&mut self, atom: Atom<T, B>) {
		self.0.push(atom)
	}
}

impl<T, B> From<Atom<T, B>> for Concatenation<T, B> {
	fn from(value: Atom<T, B>) -> Self {
		Self(vec![value])
	}
}

impl<T, B> Deref for Concatenation<T, B> {
	type Target = [Atom<T, B>];

	fn deref(&self) -> &Self::Target {
		self.0.as_slice()
	}
}

impl<'a, T, B> IntoIterator for &'a Concatenation<T, B> {
	type IntoIter = std::slice::Iter<'a, Atom<T, B>>;
	type Item = &'a Atom<T, B>;

	fn into_iter(self) -> Self::IntoIter {
		self.0.iter()
	}
}

impl<T, B> IntoIterator for Concatenation<T, B> {
	type IntoIter = std::vec::IntoIter<Atom<T, B>>;
	type Item = Atom<T, B>;

	fn into_iter(self) -> Self::IntoIter {
		self.0.into_iter()
	}
}

impl<T, B> FromIterator<Atom<T, B>> for Concatenation<T, B> {
	fn from_iter<I: IntoIterator<Item = Atom<T, B>>>(iter: I) -> Self {
		Self(Vec::from_iter(iter))
	}
}

impl<T, B, Q, C> BuildNFA<T, Q, C, CaptureTag> for Concatenation<T, B>
where
	T: Token,
	B: Boundary<T, Class = C>,
	Q: Copy + Ord,
	C: Clone + Eq + Hash + Class<T>,
{
	fn build_nfa_from<S: StateBuilder<T, Q, C>>(
		&self,
		state_builder: &mut S,
		nfa: &mut NFA<Q, T>,
		tags: &mut Tags<Q, CaptureTag>,
		class: &C,
	) -> Result<(Q, C::Map<Q>), S::Error> {
		match self.0.as_slice() {
			[] => {
				let a = state_builder.next_state(nfa, class.clone())?;
				Ok((a, Map::singleton(class.clone(), a)))
			}
			[atom] => atom.build_nfa_from(state_builder, nfa, tags, class),
			list => {
				let a = state_builder.next_state(nfa, class.clone())?;

				let mut map: C::Map<(Q, bool)> = Map::singleton(class.clone(), (a, false));

				for atom in list {
					for (class, (b, _)) in std::mem::take(&mut map).into_entries() {
						let (atom_a, atom_b_map) =
							atom.build_nfa_from(state_builder, nfa, tags, &class)?;
						nfa.add(b, None, atom_a);
						for (b_class, atom_b) in atom_b_map.into_entries() {
							let (c, merging) =
								map.get_mut_or_insert_with(&b_class, || (atom_b, false));

							if *c != atom_b {
								if *merging {
									nfa.add(atom_b, None, *c);
								} else {
									let d = state_builder.next_state(nfa, b_class)?;
									nfa.add(atom_b, None, d);
									nfa.add(*c, None, d);
									*c = d;
									*merging = true;
								}
							}
						}
					}
				}

				Ok((a, map.into_entries().map(|(c, (q, _))| (c, q)).collect()))
			}
		}
	}
}