Skip to main content

uni_query/query/df_graph/
nfa.rs

1use std::collections::HashSet;
2use uni_store::storage::direction::Direction;
3
4/// NFA state identifier.
5pub type NfaStateId = u16;
6
7/// Default maximum hops for unbounded VLP patterns (`*` without upper bound).
8pub const DEFAULT_MAX_HOPS: usize = 128;
9
10/// Path traversal semantics controlling which paths are valid.
11#[derive(Clone, Debug, PartialEq)]
12pub enum PathMode {
13    /// No restrictions on repeated edges or nodes.
14    Walk,
15    /// No repeated edges (OpenCypher default for VLP).
16    Trail,
17    /// No repeated nodes.
18    Acyclic,
19    /// No repeated nodes except start may equal end.
20    Simple,
21}
22
23/// Selects which subset of matching paths to return.
24#[derive(Clone, Debug)]
25pub enum PathSelector {
26    /// All matching paths (default).
27    All,
28    /// One arbitrary path per endpoint pair.
29    Any,
30    /// One shortest path per endpoint pair.
31    AnyShortest,
32    /// All shortest paths per endpoint pair.
33    AllShortest,
34    /// K shortest paths per endpoint pair.
35    ShortestK(usize),
36}
37
38/// Determines the BFS strategy based on how the VLP result is consumed.
39#[derive(Clone, Debug)]
40pub enum VlpOutputMode {
41    /// No path_variable, no step_variable — only endpoints and hop count.
42    EndpointsOnly,
43    /// Only `length(p)` or `min/max(length(p))` is used.
44    LengthOnly { needs_max: bool },
45    /// Only `count(p)` is used.
46    CountOnly,
47    /// Path variable is used in RETURN.
48    FullPath,
49    /// Step variable is bound (e.g., `[r*1..3]`).
50    StepVariable,
51    /// `shortestPath()` or `allShortestPaths()`.
52    ShortestPath { selector: PathSelector },
53    /// EXISTS pattern with VLP.
54    Existential,
55}
56
57/// Constraint on a vertex at a given NFA state (for QPP intermediate nodes).
58///
59/// V1: label-only constraint checked via L0 visibility (O(1) for cached lookups).
60/// Future: multi-label, property predicates, WHERE clauses.
61#[derive(Clone, Debug, PartialEq)]
62pub enum VertexConstraint {
63    /// Vertex must have this label.
64    Label(String),
65}
66
67/// One step (hop) in a Quantified Path Pattern sub-pattern.
68#[derive(Clone, Debug)]
69pub struct QppStep {
70    pub edge_type_ids: Vec<u32>,
71    pub direction: Direction,
72    pub target_constraint: Option<VertexConstraint>,
73}
74
75/// A single NFA transition between states.
76#[derive(Clone, Debug)]
77pub struct NfaTransition {
78    pub from: NfaStateId,
79    pub to: NfaStateId,
80    pub edge_type_ids: Vec<u32>,
81    pub direction: Direction,
82}
83
84/// Non-deterministic finite automaton for variable-length path matching.
85///
86/// For a simple VLP pattern `[:TYPE*min..max]`, this is a linear chain:
87/// ```text
88/// q0 --TYPE--> q1 --TYPE--> q2 --TYPE--> ... --TYPE--> q(max)
89///              ^                                        ^
90///              accepting if >= min_hops                  accepting
91/// ```
92///
93/// For a QPP pattern `((a)-[:T1]->(b:Person)-[:T2]->(c)){2,4}`:
94/// ```text
95/// q0 --T1--> q1[Person] --T2--> q2 --T1--> q3[Person] --T2--> q4 --T1--> ...
96///                                ^                              ^
97///                                accepting (1 iter)             accepting (2 iter)
98/// ```
99/// State constraints are checked during BFS expansion.
100#[derive(Clone, Debug)]
101pub struct PathNfa {
102    transitions: Vec<NfaTransition>,
103    accepting_states: HashSet<NfaStateId>,
104    start_state: NfaStateId,
105    num_states: u16,
106    /// Index into `transitions`: `transitions_by_state[state] = (start_idx, end_idx)`.
107    transitions_by_state: Vec<(usize, usize)>,
108    /// Per-state vertex constraint. `state_constraints[i]` is the constraint
109    /// that must hold on a vertex reaching NFA state `i`. `None` = no constraint.
110    state_constraints: Vec<Option<VertexConstraint>>,
111}
112
113impl PathNfa {
114    /// Compile a simple VLP pattern into a linear-chain NFA.
115    ///
116    /// Creates states `q0..q(max_hops)`, with transitions from `qi` to `qi+1`.
117    /// Accepting states are `q(min_hops)..=q(max_hops)`.
118    pub fn from_vlp(
119        edge_type_ids: Vec<u32>,
120        direction: Direction,
121        min_hops: usize,
122        max_hops: usize,
123    ) -> Self {
124        // Gracefully handle empty intervals (min > max) — return a trivial NFA
125        // with no accepting states. The BFS will find nothing and return 0 results.
126        if min_hops > max_hops {
127            return Self {
128                transitions: Vec::new(),
129                accepting_states: HashSet::new(),
130                start_state: 0,
131                num_states: 1,
132                transitions_by_state: vec![(0, 0)],
133                state_constraints: vec![None],
134            };
135        }
136
137        let num_states = (max_hops + 1) as u16;
138
139        // Build transitions: q0->q1, q1->q2, ..., q(max-1)->q(max)
140        let mut transitions = Vec::with_capacity(max_hops);
141        for i in 0..max_hops {
142            transitions.push(NfaTransition {
143                from: i as NfaStateId,
144                to: (i + 1) as NfaStateId,
145                edge_type_ids: edge_type_ids.clone(),
146                direction,
147            });
148        }
149
150        // Accepting states: q(min)..=q(max)
151        let accepting_states: HashSet<NfaStateId> =
152            (min_hops..=max_hops).map(|s| s as NfaStateId).collect();
153
154        // Build transitions_by_state index.
155        // State i (where i < max_hops) has exactly one transition at index i.
156        // State max_hops has no outgoing transitions.
157        let mut transitions_by_state = Vec::with_capacity(num_states as usize);
158        for i in 0..num_states {
159            if (i as usize) < max_hops {
160                transitions_by_state.push((i as usize, i as usize + 1));
161            } else {
162                let len = transitions.len();
163                transitions_by_state.push((len, len));
164            }
165        }
166
167        // No state constraints for simple VLP patterns.
168        let state_constraints = vec![None; num_states as usize];
169
170        Self {
171            transitions,
172            accepting_states,
173            start_state: 0,
174            num_states,
175            transitions_by_state,
176            state_constraints,
177        }
178    }
179
180    /// Get all transitions from a given NFA state.
181    pub fn transitions_from(&self, state: NfaStateId) -> &[NfaTransition] {
182        if (state as usize) < self.transitions_by_state.len() {
183            let (start, end) = self.transitions_by_state[state as usize];
184            &self.transitions[start..end]
185        } else {
186            &[]
187        }
188    }
189
190    /// Check if a state is an accepting state.
191    pub fn is_accepting(&self, state: NfaStateId) -> bool {
192        self.accepting_states.contains(&state)
193    }
194
195    /// Get the start state of the NFA.
196    pub fn start_state(&self) -> NfaStateId {
197        self.start_state
198    }
199
200    /// Get the total number of states.
201    pub fn num_states(&self) -> u16 {
202        self.num_states
203    }
204
205    /// Get a reference to the accepting states set.
206    pub fn accepting_states(&self) -> &HashSet<NfaStateId> {
207        &self.accepting_states
208    }
209
210    /// Get the state constraint for a given NFA state.
211    pub fn state_constraint(&self, state: NfaStateId) -> Option<&VertexConstraint> {
212        self.state_constraints
213            .get(state as usize)
214            .and_then(|c| c.as_ref())
215    }
216
217    /// Compile a QPP (Quantified Path Pattern) into an NFA with per-state constraints.
218    ///
219    /// `steps`: the sub-pattern hops (e.g., 2 steps for `(a)-[:T1]->(b:Person)-[:T2]->(c)`)
220    /// `min_iterations`, `max_iterations`: quantifier bounds (iterations, NOT hops)
221    ///
222    /// For a 2-step sub-pattern with `{2,4}`:
223    /// - Total hops = 2 * 4 = 8
224    /// - States: q0, q1, q2, q3, q4, q5, q6, q7, q8 (9 states)
225    /// - Accepting at iteration boundaries: q4, q6, q8 (iterations 2, 3, 4)
226    /// - State constraints: step\[0\].target_constraint at q1, q3, q5, q7;
227    ///   step\[1\].target_constraint at q2, q4, q6, q8
228    pub fn from_qpp(steps: Vec<QppStep>, min_iterations: usize, max_iterations: usize) -> Self {
229        assert!(!steps.is_empty(), "QPP must have at least one step");
230
231        // Gracefully handle empty intervals (min > max) — return a trivial NFA
232        // with no accepting states.
233        if min_iterations > max_iterations {
234            return Self {
235                transitions: Vec::new(),
236                accepting_states: HashSet::new(),
237                start_state: 0,
238                num_states: 1,
239                transitions_by_state: vec![(0, 0)],
240                state_constraints: vec![None],
241            };
242        }
243
244        let hops_per_iter = steps.len();
245        let total_hops = hops_per_iter * max_iterations;
246        let num_states = (total_hops + 1) as u16;
247
248        // Build transitions: one per hop, cycling through steps
249        let mut transitions = Vec::with_capacity(total_hops);
250        let mut state_constraints = vec![None; num_states as usize];
251
252        for hop in 0..total_hops {
253            let step = &steps[hop % hops_per_iter];
254            let from = hop as NfaStateId;
255            let to = (hop + 1) as NfaStateId;
256
257            transitions.push(NfaTransition {
258                from,
259                to,
260                edge_type_ids: step.edge_type_ids.clone(),
261                direction: step.direction,
262            });
263
264            // Apply the step's target constraint to the destination state
265            if let Some(ref constraint) = step.target_constraint {
266                state_constraints[to as usize] = Some(constraint.clone());
267            }
268        }
269
270        // Accepting states at iteration boundaries >= min_iterations
271        let mut accepting_states = HashSet::new();
272        for iter in min_iterations..=max_iterations {
273            accepting_states.insert((iter * hops_per_iter) as NfaStateId);
274        }
275
276        // Build transitions_by_state index (same structure as from_vlp: linear chain)
277        let mut transitions_by_state = Vec::with_capacity(num_states as usize);
278        for i in 0..num_states {
279            if (i as usize) < total_hops {
280                transitions_by_state.push((i as usize, i as usize + 1));
281            } else {
282                let len = transitions.len();
283                transitions_by_state.push((len, len));
284            }
285        }
286
287        Self {
288            transitions,
289            accepting_states,
290            start_state: 0,
291            num_states,
292            transitions_by_state,
293            state_constraints,
294        }
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_vlp_to_nfa_basic() {
304        // [:KNOWS*2..5] → 6 states (q0..q5), accepting {q2, q3, q4, q5}
305        let nfa = PathNfa::from_vlp(vec![1], Direction::Outgoing, 2, 5);
306        assert_eq!(nfa.num_states(), 6);
307        assert_eq!(nfa.start_state(), 0);
308        assert_eq!(nfa.transitions.len(), 5);
309        assert_eq!(
310            nfa.accepting_states(),
311            &[2, 3, 4, 5].into_iter().collect::<HashSet<NfaStateId>>()
312        );
313        // VLP has no state constraints
314        for i in 0..6 {
315            assert!(nfa.state_constraint(i).is_none());
316        }
317    }
318
319    #[test]
320    fn test_vlp_to_nfa_unbounded() {
321        // [:KNOWS*] → equivalent to *1..DEFAULT_MAX_HOPS
322        let nfa = PathNfa::from_vlp(vec![1], Direction::Outgoing, 1, DEFAULT_MAX_HOPS);
323        assert_eq!(nfa.num_states(), (DEFAULT_MAX_HOPS + 1) as u16);
324        assert!(!nfa.is_accepting(0));
325        assert!(nfa.is_accepting(1));
326        assert!(nfa.is_accepting(DEFAULT_MAX_HOPS as NfaStateId));
327        assert_eq!(nfa.transitions.len(), DEFAULT_MAX_HOPS);
328    }
329
330    #[test]
331    fn test_vlp_to_nfa_zero_min() {
332        // [:KNOWS*0..3] → 4 states, q0 IS accepting
333        let nfa = PathNfa::from_vlp(vec![1], Direction::Outgoing, 0, 3);
334        assert_eq!(nfa.num_states(), 4);
335        assert!(nfa.is_accepting(0));
336        assert!(nfa.is_accepting(1));
337        assert!(nfa.is_accepting(2));
338        assert!(nfa.is_accepting(3));
339    }
340
341    #[test]
342    fn test_vlp_to_nfa_exact() {
343        // [:KNOWS*3] → 4 states, only q3 is accepting
344        let nfa = PathNfa::from_vlp(vec![1], Direction::Outgoing, 3, 3);
345        assert_eq!(nfa.num_states(), 4);
346        assert!(!nfa.is_accepting(0));
347        assert!(!nfa.is_accepting(1));
348        assert!(!nfa.is_accepting(2));
349        assert!(nfa.is_accepting(3));
350    }
351
352    #[test]
353    fn test_vlp_to_nfa_multi_type() {
354        // [:KNOWS|LIKES*1..3] → transitions carry both type IDs
355        let nfa = PathNfa::from_vlp(vec![1, 2], Direction::Outgoing, 1, 3);
356        assert_eq!(nfa.num_states(), 4);
357        assert_eq!(nfa.transitions.len(), 3);
358        for t in &nfa.transitions {
359            assert_eq!(t.edge_type_ids, vec![1, 2]);
360        }
361    }
362
363    #[test]
364    fn test_vlp_to_nfa_direction_both() {
365        // Undirected VLP: all transitions carry Direction::Both
366        let nfa = PathNfa::from_vlp(vec![1], Direction::Both, 1, 2);
367        assert_eq!(nfa.num_states(), 3);
368        for t in &nfa.transitions {
369            assert_eq!(t.direction, Direction::Both);
370        }
371    }
372
373    #[test]
374    fn test_nfa_transitions_from() {
375        let nfa = PathNfa::from_vlp(vec![1], Direction::Outgoing, 1, 3);
376        // q0 has one outgoing transition (q0->q1)
377        let t0 = nfa.transitions_from(0);
378        assert_eq!(t0.len(), 1);
379        assert_eq!(t0[0].from, 0);
380        assert_eq!(t0[0].to, 1);
381
382        // q2 has one outgoing transition (q2->q3)
383        let t2 = nfa.transitions_from(2);
384        assert_eq!(t2.len(), 1);
385        assert_eq!(t2[0].from, 2);
386        assert_eq!(t2[0].to, 3);
387
388        // q3 (max state) has no outgoing transitions
389        let t3 = nfa.transitions_from(3);
390        assert_eq!(t3.len(), 0);
391
392        // Out-of-range state returns empty slice
393        let t99 = nfa.transitions_from(99);
394        assert_eq!(t99.len(), 0);
395    }
396
397    #[test]
398    fn test_nfa_is_accepting() {
399        let nfa = PathNfa::from_vlp(vec![1], Direction::Outgoing, 2, 4);
400        assert!(!nfa.is_accepting(0));
401        assert!(!nfa.is_accepting(1));
402        assert!(nfa.is_accepting(2));
403        assert!(nfa.is_accepting(3));
404        assert!(nfa.is_accepting(4));
405        assert!(!nfa.is_accepting(5)); // doesn't exist
406    }
407
408    // --- QPP tests ---
409
410    #[test]
411    fn test_qpp_two_hop_basic() {
412        // ((a)-[:T1]->(b)-[:T2]->(c)){1,3}
413        // 2 steps × 3 max iterations = 6 total hops, 7 states
414        // Accepting at iteration boundaries: 1*2=2, 2*2=4, 3*2=6
415        let steps = vec![
416            QppStep {
417                edge_type_ids: vec![1],
418                direction: Direction::Outgoing,
419                target_constraint: None,
420            },
421            QppStep {
422                edge_type_ids: vec![2],
423                direction: Direction::Outgoing,
424                target_constraint: None,
425            },
426        ];
427        let nfa = PathNfa::from_qpp(steps, 1, 3);
428
429        assert_eq!(nfa.num_states(), 7);
430        assert_eq!(nfa.transitions.len(), 6);
431
432        // Accepting at q2, q4, q6
433        assert!(!nfa.is_accepting(0));
434        assert!(!nfa.is_accepting(1));
435        assert!(nfa.is_accepting(2));
436        assert!(!nfa.is_accepting(3));
437        assert!(nfa.is_accepting(4));
438        assert!(!nfa.is_accepting(5));
439        assert!(nfa.is_accepting(6));
440    }
441
442    #[test]
443    fn test_qpp_state_constraints() {
444        // ((a)-[:KNOWS]->(b:Person)-[:WORKS_AT]->(c:Company)){1,2}
445        // Step 0: KNOWS, target=Person (at odd states: 1, 3)
446        // Step 1: WORKS_AT, target=Company (at even states: 2, 4)
447        let steps = vec![
448            QppStep {
449                edge_type_ids: vec![10],
450                direction: Direction::Outgoing,
451                target_constraint: Some(VertexConstraint::Label("Person".to_string())),
452            },
453            QppStep {
454                edge_type_ids: vec![20],
455                direction: Direction::Outgoing,
456                target_constraint: Some(VertexConstraint::Label("Company".to_string())),
457            },
458        ];
459        let nfa = PathNfa::from_qpp(steps, 1, 2);
460
461        assert_eq!(nfa.num_states(), 5); // 2*2 + 1 = 5
462
463        // State constraints: q1=Person, q2=Company, q3=Person, q4=Company
464        assert!(nfa.state_constraint(0).is_none());
465        assert_eq!(
466            nfa.state_constraint(1),
467            Some(&VertexConstraint::Label("Person".to_string()))
468        );
469        assert_eq!(
470            nfa.state_constraint(2),
471            Some(&VertexConstraint::Label("Company".to_string()))
472        );
473        assert_eq!(
474            nfa.state_constraint(3),
475            Some(&VertexConstraint::Label("Person".to_string()))
476        );
477        assert_eq!(
478            nfa.state_constraint(4),
479            Some(&VertexConstraint::Label("Company".to_string()))
480        );
481    }
482
483    #[test]
484    fn test_qpp_transitions_alternate() {
485        // ((a)-[:T1]->(b)-[:T2]->(c)){1,2}
486        // Transitions should alternate: T1, T2, T1, T2
487        let steps = vec![
488            QppStep {
489                edge_type_ids: vec![1],
490                direction: Direction::Outgoing,
491                target_constraint: None,
492            },
493            QppStep {
494                edge_type_ids: vec![2],
495                direction: Direction::Incoming,
496                target_constraint: None,
497            },
498        ];
499        let nfa = PathNfa::from_qpp(steps, 1, 2);
500
501        assert_eq!(nfa.transitions.len(), 4);
502        assert_eq!(nfa.transitions[0].edge_type_ids, vec![1]);
503        assert_eq!(nfa.transitions[0].direction, Direction::Outgoing);
504        assert_eq!(nfa.transitions[1].edge_type_ids, vec![2]);
505        assert_eq!(nfa.transitions[1].direction, Direction::Incoming);
506        assert_eq!(nfa.transitions[2].edge_type_ids, vec![1]);
507        assert_eq!(nfa.transitions[2].direction, Direction::Outgoing);
508        assert_eq!(nfa.transitions[3].edge_type_ids, vec![2]);
509        assert_eq!(nfa.transitions[3].direction, Direction::Incoming);
510    }
511
512    #[test]
513    fn test_qpp_single_hop_equiv() {
514        // ((a)-[:T]->(b)){2,4} should be equivalent to [:T*2..4]
515        let qpp_steps = vec![QppStep {
516            edge_type_ids: vec![1],
517            direction: Direction::Outgoing,
518            target_constraint: None,
519        }];
520        let qpp_nfa = PathNfa::from_qpp(qpp_steps, 2, 4);
521        let vlp_nfa = PathNfa::from_vlp(vec![1], Direction::Outgoing, 2, 4);
522
523        assert_eq!(qpp_nfa.num_states(), vlp_nfa.num_states());
524        assert_eq!(qpp_nfa.accepting_states(), vlp_nfa.accepting_states());
525        assert_eq!(qpp_nfa.transitions.len(), vlp_nfa.transitions.len());
526    }
527
528    #[test]
529    fn test_qpp_accepting_at_boundaries() {
530        // ((a)-[:T1]->(b)-[:T2]->(c)-[:T3]->(d)){2,3}
531        // 3 steps, accepting at 2*3=6 and 3*3=9
532        let steps = vec![
533            QppStep {
534                edge_type_ids: vec![1],
535                direction: Direction::Outgoing,
536                target_constraint: None,
537            },
538            QppStep {
539                edge_type_ids: vec![2],
540                direction: Direction::Outgoing,
541                target_constraint: None,
542            },
543            QppStep {
544                edge_type_ids: vec![3],
545                direction: Direction::Outgoing,
546                target_constraint: None,
547            },
548        ];
549        let nfa = PathNfa::from_qpp(steps, 2, 3);
550
551        assert_eq!(nfa.num_states(), 10); // 3*3 + 1
552
553        // Only q6 and q9 are accepting
554        for i in 0..10u16 {
555            if i == 6 || i == 9 {
556                assert!(nfa.is_accepting(i), "State {i} should be accepting");
557            } else {
558                assert!(!nfa.is_accepting(i), "State {i} should not be accepting");
559            }
560        }
561    }
562
563    #[test]
564    fn test_qpp_zero_min() {
565        // ((a)-[:T]->(b)){0,3} — zero iterations makes start state accepting
566        let steps = vec![QppStep {
567            edge_type_ids: vec![1],
568            direction: Direction::Outgoing,
569            target_constraint: None,
570        }];
571        let nfa = PathNfa::from_qpp(steps, 0, 3);
572
573        assert!(nfa.is_accepting(0)); // Zero iterations
574        assert!(nfa.is_accepting(1)); // 1 iteration
575        assert!(nfa.is_accepting(2)); // 2 iterations
576        assert!(nfa.is_accepting(3)); // 3 iterations
577    }
578
579    #[test]
580    fn test_qpp_unbounded_capped() {
581        // ((a)-[:T1]->(b)-[:T2]->(c)){2,} — cap at DEFAULT_MAX_HOPS / hops_per_iter
582        let steps = vec![
583            QppStep {
584                edge_type_ids: vec![1],
585                direction: Direction::Outgoing,
586                target_constraint: None,
587            },
588            QppStep {
589                edge_type_ids: vec![2],
590                direction: Direction::Outgoing,
591                target_constraint: None,
592            },
593        ];
594        let max_iter = DEFAULT_MAX_HOPS / steps.len();
595        let nfa = PathNfa::from_qpp(steps, 2, max_iter);
596
597        let expected_states = (max_iter * 2 + 1) as u16;
598        assert_eq!(nfa.num_states(), expected_states);
599        assert!(nfa.is_accepting((2 * 2) as NfaStateId)); // min: 2 iterations
600        assert!(nfa.is_accepting((max_iter * 2) as NfaStateId)); // max
601        assert!(!nfa.is_accepting(1)); // Not at iteration boundary
602    }
603
604    #[test]
605    fn test_vlp_empty_interval_no_panic() {
606        // [*3..1] is an empty interval — min > max
607        // Should NOT panic, should produce an NFA with 0 accepting states
608        let nfa = PathNfa::from_vlp(vec![1], Direction::Outgoing, 3, 1);
609        assert!(
610            nfa.accepting_states().is_empty(),
611            "Empty interval NFA should have no accepting states"
612        );
613        // The NFA should be well-formed (1 state, no transitions)
614        assert_eq!(nfa.num_states(), 1);
615        assert_eq!(nfa.start_state(), 0);
616    }
617
618    #[test]
619    fn test_qpp_empty_interval_no_panic() {
620        // QPP with min_iterations > max_iterations — should not panic
621        let steps = vec![QppStep {
622            edge_type_ids: vec![1],
623            direction: Direction::Outgoing,
624            target_constraint: None,
625        }];
626        let nfa = PathNfa::from_qpp(steps, 3, 1);
627        assert!(
628            nfa.accepting_states().is_empty(),
629            "Empty interval QPP NFA should have no accepting states"
630        );
631        assert_eq!(nfa.num_states(), 1);
632    }
633
634    #[test]
635    fn test_vertex_constraint_label() {
636        let c = VertexConstraint::Label("Person".to_string());
637        assert_eq!(c, VertexConstraint::Label("Person".to_string()));
638        assert_ne!(c, VertexConstraint::Label("Company".to_string()));
639    }
640}