Skip to main content

lextrail_test/
assemble.rs

1use std::collections::{HashMap, HashSet, VecDeque};
2use std::mem;
3
4use crate::build::{Symbol, SymbolVar, UUId};
5use crate::guide::{CFGGraph, IdState, State, TrailLayer, build_cfg_graph};
6use crate::helpers::TrailError;
7
8#[derive(Debug, Clone)]
9struct ASMNode {
10    value: u8,
11    id: UUId,
12}
13
14impl Default for ASMNode {
15    fn default() -> Self {
16        Self {
17            value: 0,
18            id: UUId::new(),
19        }
20    }
21}
22
23#[derive(Debug, Clone)]
24pub struct ASMGraph {
25    nodes: HashMap<UUId, ASMNode>,
26    heads: HashSet<UUId>,
27    edges: HashMap<UUId, HashSet<UUId>>,
28    tails: HashSet<UUId>,
29}
30
31impl Default for ASMGraph {
32    fn default() -> Self {
33        Self {
34            nodes: HashMap::new(),
35            heads: HashSet::new(),
36            edges: HashMap::new(),
37            tails: HashSet::new(),
38        }
39    }
40}
41
42#[derive(Clone, PartialEq, Eq)]
43struct ASMStep {
44    accumulator: Vec<u8>,
45    id: IdState,
46}
47
48impl Default for ASMStep {
49    fn default() -> Self {
50        Self {
51            accumulator: Vec::new(),
52            id: IdState::Unset,
53        }
54    }
55}
56
57#[derive(Clone)]
58enum ASMToken {
59    At(VecDeque<u8>),
60    End,
61}
62
63impl ASMToken {
64    pub fn new(s: &str) -> Self {
65        Self::At(s.bytes().collect())
66    }
67}
68
69#[derive(Clone)]
70struct ASMFrame<'a> {
71    layers: Vec<TrailLayer<'a>>,
72    step: ASMStep,
73    token: ASMToken,
74}
75
76#[derive(Clone)]
77pub struct ASMProposal<'a> {
78    frame: ASMFrame<'a>,
79    value: String,
80}
81
82pub type ASMState<'a> = State<ASMProposal<'a>>;
83
84pub struct ASMSchema {
85    pub cfg: CFGGraph,
86    pub asm: ASMGraph,
87}
88
89pub type ASM<'a> = (ASMSchema, ASMState<'a>);
90
91type UTF8 = Vec<u8>;
92
93pub fn build_asm_graph(alphabet: Vec<String>) -> ASMGraph {
94    let (mut graph, mut id) = (ASMGraph::default(), UUId::new());
95    let (nodes, tails) = (&mut graph.nodes, &mut graph.tails);
96
97    let tokens: Vec<UTF8> = alphabet.into_iter().map(|s| s.into_bytes()).collect();
98
99    for token in tokens {
100        for (i, byte) in token.iter().enumerate() {
101            let candidates = if i == 0 {
102                &mut graph.heads
103            } else {
104                graph.edges.entry(id).or_insert(HashSet::new())
105            };
106
107            let opt_id = candidates
108                .iter()
109                .find_map(|id| (nodes[id].value == *byte).then_some(*id));
110
111            match opt_id {
112                Some(value) => id = value,
113                None => {
114                    let new_node = ASMNode {
115                        value: *byte,
116                        ..ASMNode::default()
117                    };
118                    let new_id = new_node.id;
119
120                    candidates.insert(new_id);
121                    nodes.insert(new_id, new_node);
122
123                    id = new_id;
124                }
125            }
126        }
127        tails.insert(id);
128    }
129
130    return graph;
131}
132
133pub fn asm_cfg<'a>(cfg: &str, alphabet: Vec<String>) -> Result<ASM<'a>, TrailError> {
134    return Ok((
135        ASMSchema {
136            cfg: build_cfg_graph(cfg)?,
137            asm: build_asm_graph(alphabet),
138        },
139        ASMState::default(),
140    ));
141}
142
143pub fn asm_rex<'a>(rex: &str, alphabet: Vec<String>) -> Result<ASM<'a>, TrailError> {
144    return Ok((
145        ASMSchema {
146            cfg: build_cfg_graph(&format!("/{rex}/"))?,
147            asm: build_asm_graph(alphabet),
148        },
149        ASMState::default(),
150    ));
151}
152
153pub fn asm_exp<'a>(exp: &str, alphabet: Vec<String>) -> Result<ASM<'a>, TrailError> {
154    return Ok((
155        ASMSchema {
156            cfg: build_cfg_graph(&format!("start: {exp}"))?,
157            asm: build_asm_graph(alphabet),
158        },
159        ASMState::default(),
160    ));
161}
162
163fn assemble<'a>(graph: &ASMGraph, frame: &mut ASMFrame<'a>) -> Vec<ASMProposal<'a>> {
164    let mut proposals: Vec<ASMProposal> = Vec::new();
165
166    let (nodes, tails) = (&graph.nodes, &graph.tails);
167    let (token, step) = (&frame.token, &frame.step);
168
169    let mut successors = if let IdState::Set(uuid) = step.id {
170        graph.edges.get(&uuid).cloned().unwrap_or_default()
171    } else {
172        graph.heads.clone()
173    };
174
175    let mut bytes = if let ASMToken::At(value) = token {
176        value.clone()
177    } else {
178        unreachable!()
179    };
180
181    while !bytes.is_empty() {
182        let found = successors.iter().find(|uuid| nodes[uuid].value == bytes[0]);
183
184        match found {
185            Some(uuid) => {
186                let byte = bytes.pop_front().unwrap();
187
188                frame.step.id = IdState::Set(*uuid);
189                frame.step.accumulator.push(byte);
190
191                if tails.contains(uuid) {
192                    let mut final_frame = frame.clone();
193                    final_frame.token = if bytes.is_empty() {
194                        ASMToken::End
195                    } else {
196                        ASMToken::At(bytes.clone())
197                    };
198
199                    let final_token = mem::take(&mut final_frame.step).accumulator;
200                    let final_value = String::from_utf8(final_token.into_iter().collect()).unwrap();
201
202                    proposals.push(ASMProposal {
203                        frame: final_frame,
204                        value: final_value,
205                    });
206                }
207
208                match graph.edges.get(&uuid) {
209                    Some(uuids) => successors = uuids.clone(),
210                    None => break,
211                }
212            }
213            None => break,
214        }
215    }
216
217    frame.token = if bytes.is_empty() {
218        ASMToken::End
219    } else {
220        ASMToken::At(bytes)
221    };
222
223    return proposals;
224}
225
226fn asm_run<'a>(schema: &'a ASMSchema, state: &mut ASMState<'a>) {
227    let (cfg, asm) = (&schema.cfg, &schema.asm);
228    let (proposals, backrefs) = (&mut state.proposals, &mut state.backrefs);
229
230    for proposal in &*proposals {
231        let checkpoint = proposal.frame.layers.last().unwrap();
232
233        let id = if let IdState::Set(uuid) = checkpoint.id {
234            uuid
235        } else {
236            unreachable!()
237        };
238
239        if let Symbol::TERMINAL { value, tags, .. } = &checkpoint.graph.nodes[&id] {
240            for tag in tags {
241                backrefs
242                    .entry(tag.clone())
243                    .or_insert_with(String::new)
244                    .push_str(value);
245            }
246        }
247    }
248
249    let mut frames = if proposals.is_empty() {
250        let start = vec![TrailLayer {
251            graph: &cfg[&SymbolVar::new("start")],
252            id: IdState::Unset,
253        }];
254
255        vec![ASMFrame {
256            layers: start,
257            step: ASMStep::default(),
258            token: ASMToken::End,
259        }]
260    } else {
261        proposals.drain(..).map(|proposal| proposal.frame).collect()
262    };
263
264    while !frames.is_empty() {
265        let mut frame = frames.pop().unwrap();
266
267        let checkpoint = frame
268            .layers
269            .last()
270            .expect("Empty proposal states are neither processed, nor pushed to the state.");
271
272        let (id, graph) = (checkpoint.id, checkpoint.graph);
273
274        let nodes = &graph.nodes;
275
276        let successors = match id {
277            IdState::Set(uuid) => {
278                if let ASMToken::End = frame.token {
279                    graph.edges.get(&uuid).cloned().unwrap_or_default()
280                } else {
281                    HashSet::from([uuid])
282                }
283            }
284            IdState::Unset => graph.heads.clone(),
285        };
286
287        if successors.is_empty() {
288            let layers = &mut frame.layers;
289
290            layers.pop();
291
292            if !layers.is_empty() {
293                frames.push(frame);
294            }
295
296            continue;
297        }
298
299        for successor in &successors {
300            match &nodes[successor] {
301                Symbol::TERMINAL { value, .. } => {
302                    let mut next_frame = frame.clone();
303                    next_frame.layers.last_mut().unwrap().id = IdState::Set(*successor);
304
305                    if let ASMToken::End = frame.token {
306                        next_frame.token = ASMToken::new(value)
307                    }
308
309                    let assembled = assemble(&asm, &mut next_frame);
310                    proposals.extend(assembled);
311
312                    if let ASMToken::End = next_frame.token {
313                        frames.push(next_frame)
314                    }
315                }
316                Symbol::VARIABLE { value, .. } => {
317                    let mut next_frame = frame.clone();
318                    next_frame.layers.last_mut().unwrap().id = IdState::Set(*successor);
319
320                    let next_layer = TrailLayer {
321                        graph: &cfg[&value],
322                        id: IdState::Unset,
323                    };
324                    next_frame.layers.push(next_layer);
325
326                    frames.push(next_frame);
327                }
328                Symbol::REFERENCE { value, .. } => {
329                    let mut next_frame = frame.clone();
330                    next_frame.layers.last_mut().unwrap().id = IdState::Set(*successor);
331
332                    if let ASMToken::End = frame.token {
333                        next_frame.token = ASMToken::new(&backrefs[value])
334                    }
335
336                    let assembled = assemble(&asm, &mut next_frame);
337                    proposals.extend(assembled);
338
339                    if let ASMToken::End = next_frame.token {
340                        frames.push(next_frame)
341                    }
342                }
343                Symbol::END { .. } if frame.step == ASMStep::default() => {
344                    let mut next_frame = frame.clone();
345                    next_frame.layers.last_mut().unwrap().id = IdState::Set(*successor);
346
347                    proposals.push(ASMProposal {
348                        frame: next_frame,
349                        value: String::new(),
350                    });
351                }
352                _ => (),
353            }
354        }
355    }
356}
357
358pub fn get_next_tokens<'a, 'b>(
359    schema: &'a ASMSchema,
360    state: &mut ASMState<'a>,
361    value: &String,
362) -> Result<Vec<String>, TrailError> {
363    let is_initial = state.proposals.is_empty();
364
365    state.proposals.retain(|p| p.value == *value);
366
367    if state.proposals.is_empty() && !is_initial {
368        return Err(TrailError(format!(
369            "Symbol `{:#?}` has no previous state.",
370            value
371        )));
372    }
373
374    asm_run(schema, state);
375
376    Ok(state.proposals.iter().map(|p| p.value.clone()).collect())
377}