plotnik_compiler/emit/
regex_table.rs1use std::collections::HashMap;
7
8use regex_automata::dfa::dense;
9use regex_automata::dfa::sparse::DFA;
10
11use plotnik_bytecode::StringId;
12
13use super::EmitError;
14
15#[derive(Debug)]
17struct RegexEntry {
18 string_id: StringId,
20 dfa_bytes: Vec<u8>,
22}
23
24#[derive(Debug, Default)]
31pub struct RegexTableBuilder {
32 lookup: HashMap<StringId, u16>,
34 entries: Vec<Option<RegexEntry>>,
36}
37
38impl RegexTableBuilder {
39 pub fn new() -> Self {
40 Self {
41 lookup: HashMap::new(),
42 entries: vec![None], }
44 }
45
46 pub fn intern(&mut self, pattern: &str, string_id: StringId) -> Result<u16, EmitError> {
50 if let Some(&id) = self.lookup.get(&string_id) {
51 return Ok(id);
52 }
53
54 let dense = dense::DFA::builder()
56 .configure(
57 dense::DFA::config()
58 .start_kind(regex_automata::dfa::StartKind::Unanchored)
59 .minimize(true),
60 )
61 .build(pattern)
62 .map_err(|e| EmitError::RegexCompile(pattern.to_string(), e.to_string()))?;
63
64 let sparse = dense
65 .to_sparse()
66 .map_err(|e| EmitError::RegexCompile(pattern.to_string(), e.to_string()))?;
67
68 let dfa_bytes = sparse.to_bytes_little_endian();
69
70 let id = self.entries.len() as u16;
71 if id == u16::MAX {
72 return Err(EmitError::TooManyRegexes(self.entries.len()));
73 }
74
75 self.entries.push(Some(RegexEntry {
76 string_id,
77 dfa_bytes,
78 }));
79 self.lookup.insert(string_id, id);
80 Ok(id)
81 }
82
83 pub fn len(&self) -> usize {
85 self.entries.len()
86 }
87
88 pub fn is_empty(&self) -> bool {
90 self.entries.len() <= 1
91 }
92
93 pub fn validate(&self) -> Result<(), EmitError> {
95 if self.entries.len() > 65535 {
96 return Err(EmitError::TooManyRegexes(self.entries.len()));
97 }
98 Ok(())
99 }
100
101 pub fn get(&self, string_id: StringId) -> Option<u16> {
103 self.lookup.get(&string_id).copied()
104 }
105
106 pub fn emit(&self) -> (Vec<u8>, Vec<u8>) {
113 let mut blob = Vec::new();
114 let mut table = Vec::with_capacity(self.entries.len() * 8 + 4);
115
116 for entry in &self.entries {
117 let rem = blob.len() % 4;
119 if rem != 0 {
120 blob.resize(blob.len() + (4 - rem), 0);
121 }
122
123 let (string_id, offset) = if let Some(e) = entry {
124 blob.extend_from_slice(&e.dfa_bytes);
125 (e.string_id.get(), (blob.len() - e.dfa_bytes.len()) as u32)
126 } else {
127 (0, 0)
128 };
129
130 table.extend_from_slice(&string_id.to_le_bytes());
132 table.extend_from_slice(&0u16.to_le_bytes()); table.extend_from_slice(&offset.to_le_bytes());
134 }
135
136 table.extend_from_slice(&0u16.to_le_bytes()); table.extend_from_slice(&0u16.to_le_bytes()); table.extend_from_slice(&(blob.len() as u32).to_le_bytes());
140
141 (blob, table)
142 }
143}
144
145pub fn deserialize_dfa(bytes: &[u8]) -> Result<DFA<&[u8]>, String> {
150 DFA::from_bytes(bytes)
153 .map(|(dfa, _)| dfa)
154 .map_err(|e| e.to_string())
155}