1use std::collections::{HashMap, HashSet};
6
7use dcbor::prelude::*;
8
9use super::{Matcher, Path, Pattern};
10use crate::{Quantifier, Reluctance};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum Axis {
15 ArrayElement,
17 MapKey,
19 MapValue,
21 TaggedContent,
23}
24
25impl Axis {
26 pub fn children(&self, cbor: &CBOR) -> Vec<CBOR> {
28 match (self, cbor.as_case()) {
29 (Axis::ArrayElement, CBORCase::Array(arr)) => arr.clone(),
30 (Axis::MapKey, CBORCase::Map(map)) => {
31 map.iter().map(|(k, _v)| k.clone()).collect()
32 }
33 (Axis::MapValue, CBORCase::Map(map)) => {
34 map.iter().map(|(_k, v)| v.clone()).collect()
35 }
36 (Axis::TaggedContent, CBORCase::Tagged(_, content)) => {
37 vec![content.clone()]
38 }
39 _ => Vec::new(),
40 }
41 }
42}
43
44#[derive(Debug, Clone)]
46pub enum Instr {
47 MatchPredicate(usize),
49 MatchStructure(usize),
51 Split { a: usize, b: usize },
53 Jump(usize),
55 PushAxis(Axis),
57 Pop,
59 Save,
61 Accept,
63 Search {
65 pat_idx: usize,
66 capture_map: Vec<(String, usize)>,
67 },
68 ExtendSequence,
70 CombineSequence,
72 NotMatch { pat_idx: usize },
74 Repeat {
76 pat_idx: usize,
77 quantifier: Quantifier,
78 },
79 CaptureStart(usize),
81 CaptureEnd(usize),
83}
84
85#[derive(Debug, Clone)]
86pub struct Program {
87 pub code: Vec<Instr>,
88 pub literals: Vec<Pattern>,
89 pub capture_names: Vec<String>,
90}
91
92#[derive(Clone)]
94struct Thread {
95 pc: usize,
96 cbor: CBOR,
97 path: Path,
98 saved_paths: Vec<Path>,
100 captures: Vec<Vec<Path>>,
101 capture_stack: Vec<Vec<usize>>,
102}
103
104#[allow(clippy::panic)]
111pub(crate) fn atomic_paths(
112 p: &crate::pattern::Pattern,
113 cbor: &CBOR,
114) -> Vec<Path> {
115 use crate::pattern::Pattern::*;
116 match p {
117 Value(v) => v.paths(cbor),
118 Structure(s) => s.paths(cbor),
119 Meta(meta) => match meta {
120 crate::pattern::meta::MetaPattern::Any(_) => {
121 vec![vec![cbor.clone()]]
122 }
123 crate::pattern::meta::MetaPattern::Search(_) => {
124 panic!(
125 "SearchPattern should be compiled to Search instruction, not MatchPredicate"
126 );
127 }
128 _ => panic!(
129 "non-atomic meta pattern used in MatchPredicate: {:?}",
130 meta
131 ),
132 },
133 }
134}
135
136fn repeat_paths(
137 pat: &Pattern,
138 cbor: &CBOR,
139 path: &Path,
140 quantifier: Quantifier,
141) -> Vec<(CBOR, Path)> {
142 let mut states: Vec<Vec<(CBOR, Path)>> =
144 vec![vec![(cbor.clone(), path.clone())]];
145 let bound = quantifier.max().unwrap_or(usize::MAX);
146
147 for _ in 0..bound {
149 let mut next = Vec::new();
150 for (c, pth) in states.last().unwrap().iter() {
151 for sub_path in pat.paths(c) {
152 if let Some(last) = sub_path.last() {
153 if last.to_cbor_data() == c.to_cbor_data() {
154 continue; }
156 let mut combined = pth.clone();
157 if sub_path.first() == Some(c) {
158 combined.extend(sub_path.iter().skip(1).cloned());
159 } else {
160 combined.extend(sub_path.iter().cloned());
161 }
162 next.push((last.clone(), combined));
163 }
164 }
165 }
166 if next.is_empty() {
167 break; }
169 states.push(next);
170 }
171
172 let has_zero_rep = quantifier.min() == 0;
174 let zero_rep_result = if has_zero_rep {
175 vec![(cbor.clone(), path.clone())]
176 } else {
177 vec![]
178 };
179
180 let max_possible = states.len() - 1;
182 let max_allowed = bound.min(max_possible);
183
184 if max_allowed < quantifier.min() && quantifier.min() > 0 {
186 return Vec::new();
187 }
188
189 let min_count = if quantifier.min() == 0 {
192 1
193 } else {
194 quantifier.min()
195 };
196 let max_count = if max_allowed < min_count {
197 return zero_rep_result;
198 } else {
199 max_allowed
200 };
201
202 let count_range = min_count..=max_count;
203
204 let counts: Vec<usize> = match quantifier.reluctance() {
206 Reluctance::Greedy => count_range.rev().collect(),
207 Reluctance::Lazy => count_range.collect(),
208 Reluctance::Possessive => {
209 if max_count >= min_count {
210 vec![max_count]
211 } else {
212 vec![]
213 }
214 }
215 };
216
217 let mut out = Vec::new();
219
220 if matches!(quantifier.reluctance(), Reluctance::Greedy) {
222 for c in counts {
224 if let Some(list) = states.get(c) {
225 out.extend(list.clone());
226 }
227 }
228
229 if has_zero_rep && out.is_empty() {
232 out.push((cbor.clone(), path.clone()));
233 }
234 } else {
235 if has_zero_rep {
237 out.push((cbor.clone(), path.clone()));
238 }
239
240 for c in counts {
242 if let Some(list) = states.get(c) {
243 out.extend(list.clone());
244 }
245 }
246 }
247
248 out
249}
250
251fn run_thread(
256 prog: &Program,
257 start: Thread,
258 out: &mut Vec<(Path, Vec<Vec<Path>>)>,
259) -> bool {
260 use Instr::*;
261 let mut produced = false;
262 let mut stack = vec![start];
263
264 while let Some(mut th) = stack.pop() {
265 loop {
266 match prog.code[th.pc] {
267 MatchPredicate(idx) => {
268 if atomic_paths(&prog.literals[idx], &th.cbor).is_empty() {
269 break;
270 }
271 th.pc += 1;
272 }
273 MatchStructure(idx) => {
274 if let crate::pattern::Pattern::Structure(sp) =
277 &prog.literals[idx]
278 {
279 let (structure_paths, structure_captures) =
280 sp.paths_with_captures(&th.cbor);
281
282 if structure_paths.is_empty() {
283 break;
284 }
285
286 for (i, name) in prog.capture_names.iter().enumerate() {
288 if let Some(captured_paths) =
289 structure_captures.get(name)
290 {
291 while th.captures.len() <= i {
293 th.captures.push(Vec::new());
294 }
295 th.captures[i].extend(captured_paths.clone());
296 }
297 }
298
299 if structure_paths.len() == 1
301 && structure_paths[0].len() == 1
302 {
303 th.pc += 1;
305 } else {
306 for structure_path in structure_paths {
309 if let Some(target) = structure_path.last() {
310 let mut new_thread = th.clone();
311 new_thread.cbor = target.clone();
312 new_thread.path.extend(
313 structure_path.iter().skip(1).cloned(),
314 );
315 new_thread.pc += 1;
316 stack.push(new_thread);
317 }
318 }
319 break;
320 }
321 } else {
322 panic!(
323 "MatchStructure used with non-structure pattern"
324 );
325 }
326 }
327 Split { a, b } => {
328 let mut th2 = th.clone();
329 th2.pc = b;
330 stack.push(th2);
331 th.pc = a;
332 }
333 Jump(addr) => {
334 th.pc = addr;
335 }
336 PushAxis(axis) => {
337 let children = axis.children(&th.cbor);
338 for child in children {
339 let mut new_thread = th.clone();
340 new_thread.cbor = child.clone();
341 new_thread.path.push(child);
342 new_thread.pc += 1;
343 stack.push(new_thread);
344 }
345 break;
346 }
347 Pop => {
348 if th.path.is_empty() {
349 break;
350 }
351 th.path.pop();
352 if let Some(parent) = th.path.last() {
353 th.cbor = parent.clone();
354 }
355 th.pc += 1;
356 }
357 Save => {
358 out.push((th.path.clone(), th.captures.clone()));
359 produced = true;
360 th.pc += 1;
361 }
362 Accept => {
363 out.push((th.path.clone(), th.captures.clone()));
364 produced = true;
365 break;
366 }
367 Search { pat_idx, ref capture_map } => {
368 let (search_results, captures) =
370 prog.literals[pat_idx].paths_with_captures(&th.cbor);
371
372 for search_path in search_results {
373 let mut new_thread = th.clone();
374 new_thread.path = search_path.clone();
375
376 for (name, capture_idx) in capture_map {
379 if *capture_idx < new_thread.captures.len()
380 && let Some(capture_paths) = captures.get(name)
381 {
382 for capture_path in capture_paths {
383 new_thread.captures[*capture_idx]
384 .push(capture_path.clone());
385 }
386 }
387 }
388
389 new_thread.pc += 1;
390 stack.push(new_thread);
391 }
392 break;
393 }
394 ExtendSequence => {
395 th.saved_paths.push(th.path.clone());
396 if let Some(last) = th.path.last().cloned() {
397 th.path = vec![last.clone()];
398 th.cbor = last;
399 }
400 th.pc += 1;
401 }
402 CombineSequence => {
403 if let Some(saved) = th.saved_paths.pop() {
404 let mut combined = saved;
405 if th.path.len() > 1 {
406 combined.extend(th.path.iter().skip(1).cloned());
407 }
408 th.path = combined;
409 }
410 th.pc += 1;
411 }
412 NotMatch { pat_idx } => {
413 if !prog.literals[pat_idx].paths(&th.cbor).is_empty() {
414 break; }
416 th.pc += 1;
417 }
418 Repeat { pat_idx, quantifier } => {
419 let repeat_results = repeat_paths(
420 &prog.literals[pat_idx],
421 &th.cbor,
422 &th.path,
423 quantifier,
424 );
425 for (result_cbor, result_path) in repeat_results {
426 let mut new_thread = th.clone();
427 new_thread.cbor = result_cbor;
428 new_thread.path = result_path;
429 new_thread.pc += 1;
430 stack.push(new_thread);
431 }
432 break;
433 }
434 CaptureStart(idx) => {
435 while th.captures.len() <= idx {
437 th.captures.push(Vec::new());
438 }
439 while th.capture_stack.len() <= idx {
440 th.capture_stack.push(Vec::new());
441 }
442 th.capture_stack[idx].push(th.path.len());
444 th.pc += 1;
445 }
446 CaptureEnd(idx) => {
447 if let Some(_start_len) = th
449 .capture_stack
450 .get_mut(idx)
451 .and_then(|stack| stack.pop())
452 {
453 let captured_path = th.path.clone();
457 if let Some(captures) = th.captures.get_mut(idx) {
458 captures.push(captured_path);
459 }
460 }
461 th.pc += 1;
462 }
463 }
464 }
465 }
466
467 produced
468}
469
470pub fn run(
473 prog: &Program,
474 root: &CBOR,
475) -> (Vec<Path>, HashMap<String, Vec<Path>>) {
476 let start = Thread {
477 pc: 0,
478 cbor: root.clone(),
479 path: vec![root.clone()],
480 saved_paths: Vec::new(),
481 captures: Vec::new(),
482 capture_stack: Vec::new(),
483 };
484
485 let mut results = Vec::new();
486 run_thread(prog, start, &mut results);
487
488 let mut seen_paths = HashSet::new();
490 let paths: Vec<Path> = results
491 .iter()
492 .filter_map(|(path, _)| {
493 if seen_paths.contains(path) {
494 None } else {
496 seen_paths.insert(path.clone());
497 Some(path.clone()) }
499 })
500 .collect();
501
502 let mut captures = HashMap::new();
506 for (i, name) in prog.capture_names.iter().enumerate() {
507 let mut captured_paths = Vec::new();
508 for (_, thread_captures) in &results {
509 if let Some(capture_group) = thread_captures.get(i) {
510 captured_paths.extend(capture_group.clone());
511 }
512 }
513
514 if !captured_paths.is_empty() {
517 let mut seen_capture_paths = HashSet::new();
518 let deduplicated_captured_paths: Vec<Path> = captured_paths
519 .into_iter()
520 .filter(|path| {
521 if seen_capture_paths.contains(path) {
522 false } else {
524 seen_capture_paths.insert(path.clone());
525 true }
527 })
528 .collect();
529 captures.insert(name.clone(), deduplicated_captured_paths);
530 }
531 }
532
533 (paths, captures)
534}
535
536pub struct Vm;
538
539impl Vm {
540 pub fn run(
542 prog: &Program,
543 root: &CBOR,
544 ) -> (Vec<Path>, HashMap<String, Vec<Path>>) {
545 run(prog, root)
546 }
547}