1use std::collections::{HashMap, HashSet};
6
7use dcbor::prelude::*;
8
9use super::{Matcher, Path, Pattern};
10use crate::{Quantifier, Reluctance};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum Axis {
15 ArrayElement,
17 MapKey,
19 MapValue,
21 TaggedContent,
23}
24
25impl Axis {
26 pub fn children(&self, cbor: &CBOR) -> Vec<CBOR> {
28 match (self, cbor.as_case()) {
29 (Axis::ArrayElement, CBORCase::Array(arr)) => arr.clone(),
30 (Axis::MapKey, CBORCase::Map(map)) => {
31 map.iter().map(|(k, _v)| k.clone()).collect()
32 }
33 (Axis::MapValue, CBORCase::Map(map)) => {
34 map.iter().map(|(_k, v)| v.clone()).collect()
35 }
36 (Axis::TaggedContent, CBORCase::Tagged(_, content)) => {
37 vec![content.clone()]
38 }
39 _ => Vec::new(),
40 }
41 }
42}
43
44#[derive(Debug, Clone)]
46pub enum Instr {
47 MatchPredicate(usize),
49 MatchStructure(usize),
51 Split { a: usize, b: usize },
53 Jump(usize),
55 PushAxis(Axis),
57 Pop,
59 Save,
61 Accept,
63 Search {
65 pat_idx: usize,
66 capture_map: Vec<(String, usize)>,
67 },
68 ExtendSequence,
70 CombineSequence,
72 NotMatch { pat_idx: usize },
74 Repeat {
76 pat_idx: usize,
77 quantifier: Quantifier,
78 },
79 CaptureStart(usize),
81 CaptureEnd(usize),
83}
84
85#[derive(Debug, Clone)]
86pub struct Program {
87 pub code: Vec<Instr>,
88 pub literals: Vec<Pattern>,
89 pub capture_names: Vec<String>,
90}
91
92#[derive(Clone)]
94struct Thread {
95 pc: usize,
96 cbor: CBOR,
97 path: Path,
98 saved_paths: Vec<Path>,
100 captures: Vec<Vec<Path>>,
101 capture_stack: Vec<Vec<usize>>,
102}
103
104#[allow(clippy::panic)]
111pub(crate) fn atomic_paths(
112 p: &crate::pattern::Pattern,
113 cbor: &CBOR,
114) -> Vec<Path> {
115 use crate::pattern::Pattern::*;
116 match p {
117 Value(v) => v.paths(cbor),
118 Structure(s) => s.paths(cbor),
119 Meta(meta) => match meta {
120 crate::pattern::meta::MetaPattern::Any(_) => {
121 vec![vec![cbor.clone()]]
122 }
123 crate::pattern::meta::MetaPattern::Search(_) => {
124 panic!(
125 "SearchPattern should be compiled to Search instruction, not MatchPredicate"
126 );
127 }
128 _ => panic!(
129 "non-atomic meta pattern used in MatchPredicate: {:?}",
130 meta
131 ),
132 },
133 }
134}
135
136fn repeat_paths(
137 pat: &Pattern,
138 cbor: &CBOR,
139 path: &Path,
140 quantifier: Quantifier,
141) -> Vec<(CBOR, Path)> {
142 let mut states: Vec<Vec<(CBOR, Path)>> =
144 vec![vec![(cbor.clone(), path.clone())]];
145 let bound = quantifier.max().unwrap_or(usize::MAX);
146
147 for _ in 0..bound {
149 let mut next = Vec::new();
150 for (c, pth) in states.last().unwrap().iter() {
151 for sub_path in pat.paths(c) {
152 if let Some(last) = sub_path.last() {
153 if last.to_cbor_data() == c.to_cbor_data() {
154 continue; }
156 let mut combined = pth.clone();
157 if sub_path.first() == Some(c) {
158 combined.extend(sub_path.iter().skip(1).cloned());
159 } else {
160 combined.extend(sub_path.iter().cloned());
161 }
162 next.push((last.clone(), combined));
163 }
164 }
165 }
166 if next.is_empty() {
167 break; }
169 states.push(next);
170 }
171
172 let has_zero_rep = quantifier.min() == 0;
174 let zero_rep_result = if has_zero_rep {
175 vec![(cbor.clone(), path.clone())]
176 } else {
177 vec![]
178 };
179
180 let max_possible = states.len() - 1;
182 let max_allowed = bound.min(max_possible);
183
184 if max_allowed < quantifier.min() && quantifier.min() > 0 {
186 return Vec::new();
187 }
188
189 let min_count = if quantifier.min() == 0 {
192 1
193 } else {
194 quantifier.min()
195 };
196 let max_count = if max_allowed < min_count {
197 return zero_rep_result;
198 } else {
199 max_allowed
200 };
201
202 let count_range = min_count..=max_count;
203
204 let counts: Vec<usize> = match quantifier.reluctance() {
206 Reluctance::Greedy => count_range.rev().collect(),
207 Reluctance::Lazy => count_range.collect(),
208 Reluctance::Possessive => {
209 if max_count >= min_count {
210 vec![max_count]
211 } else {
212 vec![]
213 }
214 }
215 };
216
217 let mut out = Vec::new();
219
220 if matches!(quantifier.reluctance(), Reluctance::Greedy) {
222 for c in counts {
224 if let Some(list) = states.get(c) {
225 out.extend(list.clone());
226 }
227 }
228
229 if has_zero_rep && out.is_empty() {
232 out.push((cbor.clone(), path.clone()));
233 }
234 } else {
235 if has_zero_rep {
237 out.push((cbor.clone(), path.clone()));
238 }
239
240 for c in counts {
242 if let Some(list) = states.get(c) {
243 out.extend(list.clone());
244 }
245 }
246 }
247
248 out
249}
250
251fn run_thread(
256 prog: &Program,
257 start: Thread,
258 out: &mut Vec<(Path, Vec<Vec<Path>>)>,
259) -> bool {
260 use Instr::*;
261 let mut produced = false;
262 let mut stack = vec![start];
263
264 while let Some(mut th) = stack.pop() {
265 loop {
266 match prog.code[th.pc] {
267 MatchPredicate(idx) => {
268 if atomic_paths(&prog.literals[idx], &th.cbor).is_empty() {
269 break;
270 }
271 th.pc += 1;
272 }
273 MatchStructure(idx) => {
274 if let crate::pattern::Pattern::Structure(sp) =
277 &prog.literals[idx]
278 {
279 let (structure_paths, structure_captures) =
280 sp.paths_with_captures(&th.cbor);
281
282 if structure_paths.is_empty() {
283 break;
284 }
285
286 for (i, name) in prog.capture_names.iter().enumerate() {
288 if let Some(captured_paths) =
289 structure_captures.get(name)
290 {
291 while th.captures.len() <= i {
293 th.captures.push(Vec::new());
294 }
295 th.captures[i].extend(captured_paths.clone());
296 }
297 }
298
299 if structure_paths.len() == 1
301 && structure_paths[0].len() == 1
302 {
303 th.pc += 1;
305 } else {
306 for structure_path in structure_paths {
309 if let Some(target) = structure_path.last() {
310 let mut new_thread = th.clone();
311 new_thread.cbor = target.clone();
312 new_thread.path.extend(
313 structure_path.iter().skip(1).cloned(),
314 );
315 new_thread.pc += 1;
316 stack.push(new_thread);
317 }
318 }
319 break;
320 }
321 } else {
322 panic!(
323 "MatchStructure used with non-structure pattern"
324 );
325 }
326 }
327 Split { a, b } => {
328 let mut th2 = th.clone();
329 th2.pc = b;
330 stack.push(th2);
331 th.pc = a;
332 }
333 Jump(addr) => {
334 th.pc = addr;
335 }
336 PushAxis(axis) => {
337 let children = axis.children(&th.cbor);
338 for child in children {
339 let mut new_thread = th.clone();
340 new_thread.cbor = child.clone();
341 new_thread.path.push(child);
342 new_thread.pc += 1;
343 stack.push(new_thread);
344 }
345 break;
346 }
347 Pop => {
348 if th.path.is_empty() {
349 break;
350 }
351 th.path.pop();
352 if let Some(parent) = th.path.last() {
353 th.cbor = parent.clone();
354 }
355 th.pc += 1;
356 }
357 Save => {
358 out.push((th.path.clone(), th.captures.clone()));
359 produced = true;
360 th.pc += 1;
361 }
362 Accept => {
363 out.push((th.path.clone(), th.captures.clone()));
364 produced = true;
365 break;
366 }
367 Search { pat_idx, ref capture_map } => {
368 let (search_results, captures) =
370 prog.literals[pat_idx].paths_with_captures(&th.cbor);
371
372 for search_path in search_results {
373 let mut new_thread = th.clone();
374 new_thread.path = search_path.clone();
375
376 for (name, capture_idx) in capture_map {
379 if *capture_idx < new_thread.captures.len() {
380 if let Some(capture_paths) = captures.get(name)
381 {
382 for capture_path in capture_paths {
383 new_thread.captures[*capture_idx]
384 .push(capture_path.clone());
385 }
386 }
387 }
388 }
389
390 new_thread.pc += 1;
391 stack.push(new_thread);
392 }
393 break;
394 }
395 ExtendSequence => {
396 th.saved_paths.push(th.path.clone());
397 if let Some(last) = th.path.last().cloned() {
398 th.path = vec![last.clone()];
399 th.cbor = last;
400 }
401 th.pc += 1;
402 }
403 CombineSequence => {
404 if let Some(saved) = th.saved_paths.pop() {
405 let mut combined = saved;
406 if th.path.len() > 1 {
407 combined.extend(th.path.iter().skip(1).cloned());
408 }
409 th.path = combined;
410 }
411 th.pc += 1;
412 }
413 NotMatch { pat_idx } => {
414 if !prog.literals[pat_idx].paths(&th.cbor).is_empty() {
415 break; }
417 th.pc += 1;
418 }
419 Repeat { pat_idx, quantifier } => {
420 let repeat_results = repeat_paths(
421 &prog.literals[pat_idx],
422 &th.cbor,
423 &th.path,
424 quantifier,
425 );
426 for (result_cbor, result_path) in repeat_results {
427 let mut new_thread = th.clone();
428 new_thread.cbor = result_cbor;
429 new_thread.path = result_path;
430 new_thread.pc += 1;
431 stack.push(new_thread);
432 }
433 break;
434 }
435 CaptureStart(idx) => {
436 while th.captures.len() <= idx {
438 th.captures.push(Vec::new());
439 }
440 while th.capture_stack.len() <= idx {
441 th.capture_stack.push(Vec::new());
442 }
443 th.capture_stack[idx].push(th.path.len());
445 th.pc += 1;
446 }
447 CaptureEnd(idx) => {
448 if let Some(_start_len) = th
450 .capture_stack
451 .get_mut(idx)
452 .and_then(|stack| stack.pop())
453 {
454 let captured_path = th.path.clone();
458 if let Some(captures) = th.captures.get_mut(idx) {
459 captures.push(captured_path);
460 }
461 }
462 th.pc += 1;
463 }
464 }
465 }
466 }
467
468 produced
469}
470
471pub fn run(
474 prog: &Program,
475 root: &CBOR,
476) -> (Vec<Path>, HashMap<String, Vec<Path>>) {
477 let start = Thread {
478 pc: 0,
479 cbor: root.clone(),
480 path: vec![root.clone()],
481 saved_paths: Vec::new(),
482 captures: Vec::new(),
483 capture_stack: Vec::new(),
484 };
485
486 let mut results = Vec::new();
487 run_thread(prog, start, &mut results);
488
489 let mut seen_paths = HashSet::new();
491 let paths: Vec<Path> = results
492 .iter()
493 .filter_map(|(path, _)| {
494 if seen_paths.contains(path) {
495 None } else {
497 seen_paths.insert(path.clone());
498 Some(path.clone()) }
500 })
501 .collect();
502
503 let mut captures = HashMap::new();
507 for (i, name) in prog.capture_names.iter().enumerate() {
508 let mut captured_paths = Vec::new();
509 for (_, thread_captures) in &results {
510 if let Some(capture_group) = thread_captures.get(i) {
511 captured_paths.extend(capture_group.clone());
512 }
513 }
514
515 if !captured_paths.is_empty() {
518 let mut seen_capture_paths = HashSet::new();
519 let deduplicated_captured_paths: Vec<Path> = captured_paths
520 .into_iter()
521 .filter(|path| {
522 if seen_capture_paths.contains(path) {
523 false } else {
525 seen_capture_paths.insert(path.clone());
526 true }
528 })
529 .collect();
530 captures.insert(name.clone(), deduplicated_captured_paths);
531 }
532 }
533
534 (paths, captures)
535}
536
537pub struct Vm;
539
540impl Vm {
541 pub fn run(
543 prog: &Program,
544 root: &CBOR,
545 ) -> (Vec<Path>, HashMap<String, Vec<Path>>) {
546 run(prog, root)
547 }
548}