triblespace_core/query.rs
1//! Query facilities for matching tribles by declaring patterns of constraints.
2//! Build queries with the [`find!`](crate::prelude::find) macro which binds variables and
3//! combines constraint expressions:
4//!
5//! ```
6//! # use triblespace_core::prelude::*;
7//! # use triblespace_core::prelude::inlineencodings::ShortString;
8//! let results = find!((x: Inline<ShortString>), x.is("foo".to_inline())).collect::<Vec<_>>();
9//! ```
10//!
11//! Variables are converted via [`TryFromInline`](crate::inline::TryFromInline). By default,
12//! conversion failures silently skip the row (filter semantics). Append `?` to a variable
13//! to receive `Result<T, E>` instead, letting the caller handle errors explicitly.
14//!
15//! For a tour of the language see the "Query Language" chapter in the book.
16//! Conceptual background on schemas and join strategy appears in the
17//! "Query Engine" and "Atreides Join" chapters.
18/// [`ConstantConstraint`] — pins a variable to a single value.
19pub mod constantconstraint;
20/// [`EqualityConstraint`](equalityconstraint::EqualityConstraint) — constrains two variables to have the same value.
21pub mod equalityconstraint;
22/// [`KeysConstraint`](hashmapconstraint::KeysConstraint) — constrains a variable to HashMap keys.
23pub mod hashmapconstraint;
24/// [`SetConstraint`](hashsetconstraint::SetConstraint) — constrains a variable to HashSet members.
25pub mod hashsetconstraint;
26/// [`IgnoreConstraint`] — hides variables from the outer query.
27pub mod ignore;
28/// [`IntersectionConstraint`](intersectionconstraint::IntersectionConstraint) — logical AND.
29pub mod intersectionconstraint;
30/// [`PatchValueConstraint`](patchconstraint::PatchValueConstraint) and [`PatchIdConstraint`](patchconstraint::PatchIdConstraint) — constrains variables to PATCH entries.
31pub mod patchconstraint;
32/// [`InlineRange`](rangeconstraint::InlineRange) — restricts a variable to a byte-lexicographic range.
33pub mod rangeconstraint;
34/// [`RegularPathConstraint`] — regular path expressions over graphs.
35pub mod regularpathconstraint;
36/// [`SortedSliceConstraint`](sortedsliceconstraint::SortedSliceConstraint) — constrains a variable to values in a sorted slice (binary search confirm).
37pub mod sortedsliceconstraint;
38/// [`UnionConstraint`](unionconstraint::UnionConstraint) — logical OR.
39pub mod unionconstraint;
40mod variableset;
41
42use std::cmp::Reverse;
43use std::fmt;
44use std::iter::FromIterator;
45use std::marker::PhantomData;
46
47use arrayvec::ArrayVec;
48use constantconstraint::*;
49/// Re-export of [`IgnoreConstraint`].
50pub use ignore::IgnoreConstraint;
51
52use crate::inline::encodings::genid::GenId;
53use crate::inline::RawInline;
54use crate::inline::Inline;
55use crate::inline::InlineEncoding;
56
57/// Re-export of [`PathOp`].
58pub use regularpathconstraint::PathOp;
59/// Re-export of [`RegularPathConstraint`].
60pub use regularpathconstraint::RegularPathConstraint;
61/// Re-export of [`VariableSet`](variableset::VariableSet).
62pub use variableset::VariableSet;
63
64/// Types storing tribles can implement this trait to expose them to queries.
65/// The trait provides a method to create a constraint for a given trible pattern.
66pub trait TriblePattern {
67 /// The type of the constraint created by the pattern method.
68 ///
69 /// `Send + Sync` is required so the resulting constraint tree can be
70 /// used with the `parallel` feature's rayon iterators. Every in-tree
71 /// pattern backend (TribleSet, SuccinctArchive) satisfies this; custom
72 /// implementations should hold their data behind `Arc` or similar.
73 type PatternConstraint<'a>: Constraint<'a> + Send + Sync
74 where
75 Self: 'a;
76
77 /// Create a constraint for a given trible pattern.
78 /// The method takes three variables, one for each part of the trible.
79 /// The schemas of the entities and attributes are always [GenId], while the value
80 /// schema can be any type implementing [InlineEncoding] and is specified as a type parameter.
81 ///
82 /// This method is usually not called directly, but rather through typed query language
83 /// macros like [pattern!][crate::macros::pattern].
84 fn pattern<'a, V: InlineEncoding>(
85 &'a self,
86 e: Variable<GenId>,
87 a: Variable<GenId>,
88 v: Variable<V>,
89 ) -> Self::PatternConstraint<'a>;
90}
91
92/// Low-level identifier for a variable in a query.
93pub type VariableId = usize;
94
95/// Context for creating variables in a query.
96/// The context keeps track of the next index to assign to a variable.
97/// This allows for the creation of new anonymous variables in higher-level query languages.
98#[derive(Debug)]
99pub struct VariableContext {
100 /// The index that will be assigned to the next variable.
101 pub next_index: VariableId,
102}
103
104impl Default for VariableContext {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110impl VariableContext {
111 /// Create a new variable context.
112 /// The context starts with an index of 0.
113 pub fn new() -> Self {
114 VariableContext { next_index: 0 }
115 }
116
117 /// Create a new variable.
118 /// The variable is assigned the next available index.
119 ///
120 /// Panics if the number of variables exceeds 128.
121 ///
122 /// This method is usually not called directly, but rather through typed query language
123 /// macros like [find!][crate::query].
124 pub fn next_variable<T: InlineEncoding>(&mut self) -> Variable<T> {
125 assert!(
126 self.next_index < 128,
127 "currently queries support at most 128 variables"
128 );
129 let v = Variable::new(self.next_index);
130 self.next_index += 1;
131 v
132 }
133}
134
135/// A placeholder for unknowns in a query.
136/// Within the query engine each variable is identified by an integer,
137/// which can be accessed via the `index` property.
138/// Variables also have an associated type which is used to parse the [Inline]s
139/// found by the query engine.
140#[derive(Debug)]
141pub struct Variable<T: InlineEncoding> {
142 /// The integer index identifying this variable in the [`Binding`].
143 pub index: VariableId,
144 typed: PhantomData<T>,
145}
146
147impl<T: InlineEncoding> Copy for Variable<T> {}
148
149impl<T: InlineEncoding> Clone for Variable<T> {
150 fn clone(&self) -> Self {
151 *self
152 }
153}
154
155impl<T: InlineEncoding> Variable<T> {
156 /// Creates a variable with the given index.
157 pub fn new(index: VariableId) -> Self {
158 Variable {
159 index,
160 typed: PhantomData,
161 }
162 }
163
164 /// Extracts the bound value for this variable from `binding`.
165 ///
166 /// # Panics
167 ///
168 /// Panics if the variable has not been bound.
169 pub fn extract(self, binding: &Binding) -> &Inline<T> {
170 let raw = binding.get(self.index).unwrap_or_else(|| {
171 panic!(
172 "query variable (idx {}) was never bound before projection. This usually means the variable was projected in `find!` but never appeared in any constraint. If you intended a pure existence query, use `find!((), ...)` or `exists!(constraint)`.",
173 self.index
174 )
175 });
176 Inline::as_transmute_raw(raw)
177 }
178}
179
180/// Collections can implement this trait so that they can be used in queries.
181/// The returned constraint will filter the values assigned to the variable
182/// to only those that are contained in the collection.
183pub trait ContainsConstraint<'a, T: InlineEncoding> {
184 /// The concrete constraint type produced by [`has`](ContainsConstraint::has).
185 type Constraint: Constraint<'a>;
186
187 /// Create a constraint that filters the values assigned to the variable
188 /// to only those that are contained in the collection.
189 ///
190 /// The returned constraint will usually perform a conversion between the
191 /// concrete rust type stored in the collection a [Inline] of the appropriate schema
192 /// type for the variable.
193 fn has(self, v: Variable<T>) -> Self::Constraint;
194}
195
196impl<T: InlineEncoding> Variable<T> {
197 /// Create a constraint so that only a specific value can be assigned to the variable.
198 pub fn is(self, constant: Inline<T>) -> ConstantConstraint {
199 ConstantConstraint::new(self, constant)
200 }
201}
202
203/// The binding keeps track of the values assigned to variables in a query.
204/// It maps variables to values - by their index - via a simple array,
205/// and keeps track of which variables are bound.
206/// It is used to store intermediate results and to pass information
207/// between different constraints.
208/// The binding is mutable, as it is modified by the query engine.
209/// It is not thread-safe and should not be shared between threads.
210/// The binding is a simple data structure that is cheap to clone.
211/// It is not intended to be used as a long-term storage for query results.
212#[derive(Clone, Debug)]
213pub struct Binding {
214 /// Bitset tracking which variables have been assigned a value.
215 pub bound: VariableSet,
216 values: [RawInline; 128],
217}
218
219impl Binding {
220 /// Binds `variable` to `value`.
221 pub fn set(&mut self, variable: VariableId, value: &RawInline) {
222 self.values[variable] = *value;
223 self.bound.set(variable);
224 }
225
226 /// Unset a variable in the binding.
227 /// This is used to backtrack in the query engine.
228 pub fn unset(&mut self, variable: VariableId) {
229 self.bound.unset(variable);
230 }
231
232 /// Check if a variable is bound in the binding.
233 pub fn get(&self, variable: VariableId) -> Option<&RawInline> {
234 if self.bound.is_set(variable) {
235 Some(&self.values[variable])
236 } else {
237 None
238 }
239 }
240}
241
242impl Default for Binding {
243 fn default() -> Self {
244 Self {
245 bound: VariableSet::new_empty(),
246 values: [[0; 32]; 128],
247 }
248 }
249}
250
251/// The cooperative protocol that every query participant implements.
252///
253/// A constraint restricts the values that can be assigned to query variables.
254/// The query engine does not plan joins in advance; instead it consults
255/// constraints directly during a depth-first search over partial bindings.
256/// Each constraint reports which variables it touches, estimates how many
257/// candidates remain, enumerates concrete values on demand, and signals
258/// whether its requirements are still satisfiable. This protocol is the
259/// sole interface between the engine and the data — whether that data lives
260/// in a [`TribleSet`](crate::trible::TribleSet), a [`HashMap`](std::collections::HashMap),
261/// or a custom application predicate.
262///
263/// # The protocol
264///
265/// The engine drives the search by calling five methods in a fixed rhythm:
266///
267/// | Method | Role | Called when |
268/// |--------|------|------------|
269/// | [`variables`](Constraint::variables) | Declares which variables the constraint touches. | Once, at query start. |
270/// | [`estimate`](Constraint::estimate) | Predicts the candidate count for a variable. | Before each binding decision. |
271/// | [`propose`](Constraint::propose) | Enumerates candidate values for a variable. | On the most selective constraint. |
272/// | [`confirm`](Constraint::confirm) | Filters candidates proposed by another constraint. | On all remaining constraints. |
273/// | [`satisfied`](Constraint::satisfied) | Checks whether fully-bound sub-constraints still hold. | Before propose/confirm in composite constraints. |
274///
275/// [`influence`](Constraint::influence) completes the picture by telling the
276/// engine which estimates to refresh when a variable is bound or unbound.
277///
278/// # Statelessness
279///
280/// Constraints are stateless: every method receives the current [`Binding`]
281/// as a parameter rather than maintaining internal bookkeeping. This lets
282/// the engine backtrack freely by unsetting variables in the binding
283/// without notifying the constraints.
284///
285/// # Composability
286///
287/// Constraints combine via [`IntersectionConstraint`](crate::query::intersectionconstraint::IntersectionConstraint)
288/// (logical AND — built by [`and!`](crate::and)) and
289/// [`UnionConstraint`](crate::query::unionconstraint::UnionConstraint)
290/// (logical OR — built by [`or!`](crate::or)). Because every constraint
291/// speaks the same protocol, heterogeneous data sources mix freely in a
292/// single query.
293///
294/// # Implementing a custom constraint
295///
296/// A new constraint only needs to implement [`variables`](Constraint::variables),
297/// [`estimate`](Constraint::estimate), [`propose`](Constraint::propose), and
298/// [`confirm`](Constraint::confirm). Override [`satisfied`](Constraint::satisfied)
299/// when the constraint can detect unsatisfiability before the engine asks
300/// about individual variables (e.g. a fully-bound triple lookup that found
301/// no match). Override [`influence`](Constraint::influence) when binding one
302/// variable changes the estimates for a non-obvious set of others.
303pub trait Constraint<'a> {
304 /// Returns the set of variables this constraint touches.
305 ///
306 /// Called once at query start. The engine uses this to build influence
307 /// graphs and to determine which constraints participate when a
308 /// particular variable is being bound.
309 fn variables(&self) -> VariableSet;
310
311 /// Estimates the number of candidate values for `variable` given the
312 /// current partial `binding`.
313 ///
314 /// Returns `None` when `variable` is not constrained by this constraint.
315 /// The estimate need not be exact — it guides variable ordering, not
316 /// correctness. Tighter estimates lead to better search pruning; see the
317 /// [Atreides join](crate) family for how different estimate fidelities
318 /// affect performance.
319 fn estimate(&self, variable: VariableId, binding: &Binding) -> Option<usize>;
320
321 /// Enumerates candidate values for `variable` into `proposals`.
322 ///
323 /// Called on the constraint with the lowest estimate for the variable
324 /// being bound. Values are appended to `proposals`; the engine may
325 /// already have values in the vector from a previous round.
326 ///
327 /// Does nothing when `variable` is not constrained by this constraint.
328 fn propose(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>);
329
330 /// Filters `proposals` to remove values for `variable` that violate
331 /// this constraint.
332 ///
333 /// Called on every constraint *except* the one that proposed, in order
334 /// of increasing estimate. Implementations remove entries from
335 /// `proposals` that are inconsistent with the current `binding`.
336 ///
337 /// Does nothing when `variable` is not constrained by this constraint.
338 fn confirm(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>);
339
340 /// Returns whether this constraint is consistent with the current
341 /// `binding`.
342 ///
343 /// The default implementation returns `true`. Override this when the
344 /// constraint can cheaply detect that no solution exists — for example,
345 /// a `TribleSetConstraint`
346 /// whose entity, attribute, and value are all bound but the triple is
347 /// absent from the dataset.
348 ///
349 /// Composite constraints propagate this check to their children:
350 /// [`IntersectionConstraint`](crate::query::intersectionconstraint::IntersectionConstraint)
351 /// requires *all* children to be satisfied, while
352 /// [`UnionConstraint`](crate::query::unionconstraint::UnionConstraint)
353 /// requires *at least one*. The union uses this to skip dead variants
354 /// in propose and confirm, preventing values from a satisfied variant
355 /// from leaking through a dead one.
356 fn satisfied(&self, _binding: &Binding) -> bool {
357 true
358 }
359
360 /// Returns the set of variables whose estimates may change when
361 /// `variable` is bound or unbound.
362 ///
363 /// The default includes every variable this constraint touches except
364 /// `variable` itself. Returns an empty set when `variable` is not part
365 /// of this constraint.
366 fn influence(&self, variable: VariableId) -> VariableSet {
367 let mut vars = self.variables();
368 if vars.is_set(variable) {
369 vars.unset(variable);
370 vars
371 } else {
372 VariableSet::new_empty()
373 }
374 }
375}
376
377impl<'a, T: Constraint<'a> + ?Sized> Constraint<'a> for Box<T> {
378 fn variables(&self) -> VariableSet {
379 let inner: &T = self;
380 inner.variables()
381 }
382
383 fn estimate(&self, variable: VariableId, binding: &Binding) -> Option<usize> {
384 let inner: &T = self;
385 inner.estimate(variable, binding)
386 }
387
388 fn propose(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>) {
389 let inner: &T = self;
390 inner.propose(variable, binding, proposals)
391 }
392
393 fn confirm(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>) {
394 let inner: &T = self;
395 inner.confirm(variable, binding, proposals)
396 }
397
398 fn satisfied(&self, binding: &Binding) -> bool {
399 let inner: &T = self;
400 inner.satisfied(binding)
401 }
402
403 fn influence(&self, variable: VariableId) -> VariableSet {
404 let inner: &T = self;
405 inner.influence(variable)
406 }
407}
408
409impl<'a, T: Constraint<'a> + ?Sized> Constraint<'a> for std::sync::Arc<T> {
410 fn variables(&self) -> VariableSet {
411 let inner: &T = self;
412 inner.variables()
413 }
414
415 fn estimate(&self, variable: VariableId, binding: &Binding) -> Option<usize> {
416 let inner: &T = self;
417 inner.estimate(variable, binding)
418 }
419
420 fn propose(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>) {
421 let inner: &T = self;
422 inner.propose(variable, binding, proposals)
423 }
424
425 fn confirm(&self, variable: VariableId, binding: &Binding, proposal: &mut Vec<RawInline>) {
426 let inner: &T = self;
427 inner.confirm(variable, binding, proposal)
428 }
429
430 fn satisfied(&self, binding: &Binding) -> bool {
431 let inner: &T = self;
432 inner.satisfied(binding)
433 }
434
435 fn influence(&self, variable: VariableId) -> VariableSet {
436 let inner: &T = self;
437 inner.influence(variable)
438 }
439}
440
441/// A query is an iterator over the results of a query.
442/// It takes a constraint and a post-processing function as input,
443/// and returns the results of the query as a stream of values.
444/// The query engine uses a depth-first search to find solutions to the query,
445/// proposing values for the variables and backtracking when it reaches a dead end.
446/// The query engine is designed to be simple and efficient, providing low, consistent,
447/// and predictable latency, skew resistance, and no required (or possible) tuning.
448/// The query engine is designed to be used in combination with the [Constraint] trait,
449/// which provides a simple and flexible way to implement constraints that can be used
450/// to filter the results of a query.
451///
452/// This struct is usually not created directly, but rather through the `find!` macro,
453/// which provides a convenient way to declare variables and concrete types for them.
454/// And which sets up the nessecairy context for higher-level query languages
455/// like the one provided by the [`crate::macros`] module.
456pub struct Query<C, P: Fn(&Binding) -> Option<R>, R> {
457 constraint: C,
458 postprocessing: P,
459 mode: Search,
460 binding: Binding,
461 influences: [VariableSet; 128],
462 estimates: [usize; 128],
463 touched_variables: VariableSet,
464 stack: ArrayVec<VariableId, 128>,
465 unbound: ArrayVec<VariableId, 128>,
466 values: ArrayVec<Option<Vec<RawInline>>, 128>,
467}
468
469// Manual `Clone` impl, because `#[derive(Clone)]` would require `R: Clone`
470// which isn't actually needed — `R` only appears in `P`'s return type.
471#[cfg(feature = "parallel")]
472impl<C, P, R> Clone for Query<C, P, R>
473where
474 C: Clone,
475 P: Fn(&Binding) -> Option<R> + Clone,
476{
477 fn clone(&self) -> Self {
478 Self {
479 constraint: self.constraint.clone(),
480 postprocessing: self.postprocessing.clone(),
481 mode: self.mode,
482 binding: self.binding.clone(),
483 influences: self.influences,
484 estimates: self.estimates,
485 touched_variables: self.touched_variables,
486 stack: self.stack.clone(),
487 unbound: self.unbound.clone(),
488 values: self.values.clone(),
489 }
490 }
491}
492
493impl<'a, C: Constraint<'a>, P: Fn(&Binding) -> Option<R>, R> Query<C, P, R> {
494 /// Picks the next unbound variable, refreshes estimates touched by
495 /// the most recent binding, re-sorts `unbound`, pushes the chosen
496 /// variable onto the stack, and fills its proposal vector via
497 /// [`Constraint::propose`]. Leaves `mode = NextValue`. The caller is
498 /// responsible for ensuring `unbound` is non-empty.
499 ///
500 /// Shared between [`Iterator::next`]'s `NextVariable` branch and the
501 /// [`UnindexedProducer::split`](crate::query::QueryParIter) implementation
502 /// — the "push + propose" dance is identical in both.
503 fn push_next_variable(&mut self) {
504 let mut stale_estimates = VariableSet::new_empty();
505 while let Some(variable) = self.touched_variables.drain_next_ascending() {
506 stale_estimates = stale_estimates.union(self.influences[variable]);
507 }
508 // Bound variables can't be influenced by the unbound ones, so skip.
509 stale_estimates = stale_estimates.subtract(self.binding.bound);
510
511 if !stale_estimates.is_empty() {
512 while let Some(v) = stale_estimates.drain_next_ascending() {
513 self.estimates[v] = self
514 .constraint
515 .estimate(v, &self.binding)
516 .expect("unconstrained variable in query");
517 }
518 self.unbound.sort_unstable_by_key(|v| {
519 (
520 Reverse(
521 self.estimates[*v]
522 .checked_ilog2()
523 .map(|magnitude| magnitude + 1)
524 .unwrap_or(0),
525 ),
526 self.influences[*v].count(),
527 )
528 });
529 }
530
531 let variable = self.unbound.pop().expect("non-empty unbound");
532 let estimate = self.estimates[variable];
533 self.stack.push(variable);
534 let values = self.values[variable].get_or_insert(Vec::new());
535 values.clear();
536 values.reserve_exact(estimate.saturating_sub(values.capacity()));
537 self.constraint.propose(variable, &self.binding, values);
538 }
539
540 /// Create a new query.
541 /// The query takes a constraint and a post-processing function as input,
542 /// and returns the results of the query as a stream of values.
543 /// The post-processing function returns `Option<R>`: returning `None`
544 /// skips the current binding and continues the search.
545 ///
546 /// This method is usually not called directly, but rather through the [find!] macro,
547 pub fn new(constraint: C, postprocessing: P) -> Self {
548 let variables = constraint.variables();
549 let influences = std::array::from_fn(|v| {
550 if variables.is_set(v) {
551 constraint.influence(v)
552 } else {
553 VariableSet::new_empty()
554 }
555 });
556 let binding = Binding::default();
557 let estimates = std::array::from_fn(|v| {
558 if variables.is_set(v) {
559 constraint
560 .estimate(v, &binding)
561 .expect("unconstrained variable in query")
562 } else {
563 usize::MAX
564 }
565 });
566 let mut unbound = ArrayVec::from_iter(variables);
567 unbound.sort_unstable_by_key(|v| {
568 (
569 Reverse(
570 estimates[*v]
571 .checked_ilog2()
572 .map(|magnitude| magnitude + 1)
573 .unwrap_or(0),
574 ),
575 influences[*v].count(),
576 )
577 });
578
579 Query {
580 constraint,
581 postprocessing,
582 mode: Search::NextVariable,
583 binding,
584 influences,
585 estimates,
586 touched_variables: VariableSet::new_empty(),
587 stack: ArrayVec::new(),
588 unbound,
589 values: ArrayVec::from([const { None }; 128]),
590 }
591 }
592}
593
594/// The search mode of the query engine.
595/// The query engine uses a depth-first search to find solutions to the query,
596/// proposing values for the variables and backtracking when it reaches a dead end.
597/// The search mode is used to keep track of the current state of the search.
598/// The search mode can be one of the following:
599/// - `NextVariable` - The query engine is looking for the next variable to assign a value to.
600/// - `NextValue` - The query engine is looking for the next value to assign to a variable.
601/// - `Backtrack` - The query engine is backtracking to try a different value for a variable.
602/// - `Done` - The query engine has finished the search and there are no more results.
603#[derive(Copy, Clone, Debug)]
604enum Search {
605 NextVariable,
606 NextValue,
607 Backtrack,
608 Done,
609}
610
611impl<'a, C: Constraint<'a>, P: Fn(&Binding) -> Option<R>, R> Iterator for Query<C, P, R> {
612 type Item = R;
613
614 fn next(&mut self) -> Option<Self::Item> {
615 loop {
616 match &self.mode {
617 Search::NextVariable => {
618 self.mode = Search::NextValue;
619 if self.unbound.is_empty() {
620 if let Some(result) = (self.postprocessing)(&self.binding) {
621 return Some(result);
622 }
623 // Post-processing rejected this binding; continue
624 // searching (mode is already NextValue).
625 continue;
626 }
627 self.push_next_variable();
628 }
629 Search::NextValue => {
630 if let Some(&variable) = self.stack.last() {
631 if let Some(assignment) = self.values[variable]
632 .as_mut()
633 .expect("values should be initialized")
634 .pop()
635 {
636 self.binding.set(variable, &assignment);
637 self.touched_variables.set(variable);
638 self.mode = Search::NextVariable;
639 } else {
640 self.mode = Search::Backtrack;
641 }
642 } else {
643 self.mode = Search::Done;
644 return None;
645 }
646 }
647 Search::Backtrack => {
648 if let Some(variable) = self.stack.pop() {
649 self.binding.unset(variable);
650 // Note that we did not update estiamtes for the unbound variables
651 // as we are backtracking, so the estimates are still valid.
652 // Since we choose this variable before, we know that it would
653 // still go last in the unbound list.
654 self.unbound.push(variable);
655
656 // However, we need to update the touched variables,
657 // as we are backtracking and the variable is no longer bound.
658 // We're essentially restoring the estimate of the touched variables
659 // to the state before we bound this variable.
660 self.touched_variables.set(variable);
661 self.mode = Search::NextValue;
662 } else {
663 self.mode = Search::Done;
664 return None;
665 }
666 }
667 Search::Done => {
668 return None;
669 }
670 }
671 }
672 }
673}
674
675impl<'a, C: Constraint<'a>, P: Fn(&Binding) -> Option<R>, R> fmt::Debug for Query<C, P, R> {
676 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
677 f.debug_struct("Query")
678 .field("constraint", &std::any::type_name::<C>())
679 .field("mode", &self.mode)
680 .field("binding", &self.binding)
681 .field("stack", &self.stack)
682 .field("unbound", &self.unbound)
683 .finish()
684 }
685}
686
687// ---------------------------------------------------------------------------
688// Parallel execution via rayon.
689//
690// `Query` implements `IntoParallelIterator` with `Iter = QueryParIter`.
691// `QueryParIter` is a separate wrapper type implementing `ParallelIterator`
692// + `UnindexedProducer`, distinct from `Query` itself to avoid method-name
693// ambiguity between `Iterator` and `ParallelIterator` — methods like
694// `.count()`, `.collect()`, `.map()` exist on both.
695//
696// Usage: `find!(...).into_par_iter().map(...).collect::<Vec<_>>()`.
697//
698// The producer's `split` uses the "split-or-descend" rule: while the current
699// top-of-stack has a single remaining proposal, bind it and descend one level;
700// when the top has ≥2 remaining, bisect them between two sub-queries. This
701// keeps the invariant that every non-top stack level has zero remaining
702// proposals, so backtracking out of a sub-search unwinds cleanly to done
703// without any re-enumeration across clones.
704//
705// `fold_with` is the terminal leaf: it just drives the existing sequential
706// `Iterator::next()` and feeds results into the folder. No duplicated
707// execution logic.
708// ---------------------------------------------------------------------------
709
710#[cfg(feature = "parallel")]
711pub use parallel::QueryParIter;
712
713#[cfg(feature = "parallel")]
714mod parallel {
715 use super::*;
716 use rayon::iter::plumbing::{
717 bridge_unindexed, Folder, UnindexedConsumer, UnindexedProducer,
718 };
719 use rayon::iter::{IntoParallelIterator, ParallelIterator};
720
721 /// Parallel iterator over the results of a [`Query`]. Obtained via
722 /// [`IntoParallelIterator::into_par_iter`] on a `Query`.
723 ///
724 /// Drives rayon's work-stealing scheduler through an `UnindexedProducer`
725 /// impl on the underlying query state. The sequential `Iterator::next`
726 /// on `Query` is reused as the fold leaf — parallel execution is purely
727 /// additional, no duplicated engine logic.
728 ///
729 /// The inner query is stored in a [`Box`] so rayon's work-stealing
730 /// `split` (which clones the producer) doesn't memcpy ~15 KB of query
731 /// state on every fork — just a Box pointer copy, with the heap alloc
732 /// paid only by the child.
733 ///
734 /// `split_budget` bounds the number of splits this sub-producer will
735 /// perform. Rayon's default `Splitter` *resets* its budget on every
736 /// stolen task, so on a busy thread pool the split tree could grow
737 /// unboundedly deep — the Query always has more proposals to bisect.
738 /// A bounded per-producer budget (`num_threads²`) caps the split tree
739 /// at ~N² leaves — enough for each worker to have roughly N chunks to
740 /// rebalance via stealing — regardless of stealing pressure.
741 pub struct QueryParIter<C, P: Fn(&Binding) -> Option<R>, R> {
742 inner: Box<Query<C, P, R>>,
743 split_budget: usize,
744 }
745
746 impl<'a, C, P, R> IntoParallelIterator for Query<C, P, R>
747 where
748 C: Constraint<'a> + Clone + Send + 'a,
749 P: Fn(&Binding) -> Option<R> + Clone + Send,
750 R: Send,
751 {
752 type Item = R;
753 type Iter = QueryParIter<C, P, R>;
754
755 fn into_par_iter(self) -> Self::Iter {
756 // num_threads² chunks: intuition is "every worker has one spare
757 // chunk for every other worker," giving N²/N = N chunks apiece
758 // for rebalancing. log₂(N²) = 2·log₂(N), so depth stays modest
759 // (8 on a 16-thread box, 10 on a 32-thread) — well below any
760 // stack concern.
761 let n = rayon::current_num_threads();
762 let split_budget = n.saturating_mul(n).max(2);
763 QueryParIter {
764 inner: Box::new(self),
765 split_budget,
766 }
767 }
768 }
769
770 impl<'a, C, P, R> UnindexedProducer for QueryParIter<C, P, R>
771 where
772 C: Constraint<'a> + Clone + Send + 'a,
773 P: Fn(&Binding) -> Option<R> + Clone + Send,
774 R: Send,
775 {
776 type Item = R;
777
778 /// Advance the Query's state machine until either the current
779 /// top-of-stack has ≥2 remaining proposals (bisect, return a
780 /// right half) or the sub-query is exhausted (return `None`,
781 /// leaving `self` as a leaf that `fold_with` will fold
782 /// sequentially). Single-value levels are descended through —
783 /// see the module doc comment for why this preserves correctness
784 /// without re-enumeration.
785 fn split(mut self) -> (Self, Option<Self>) {
786 if self.split_budget == 0 {
787 return (self, None);
788 }
789 self.split_budget -= 1;
790 let q = &mut *self.inner;
791 loop {
792 // Advance the state machine until we're in NextValue with
793 // a populated top — the only state where split-or-descend
794 // makes sense.
795 while !matches!(q.mode, Search::NextValue) {
796 match q.mode {
797 Search::NextVariable => {
798 q.mode = Search::NextValue;
799 if q.unbound.is_empty() {
800 // All variables bound. Leaf — fold_with
801 // will drive sequential `next()` to yield
802 // the one postprocessed result.
803 q.mode = Search::NextVariable;
804 return (self, None);
805 }
806 q.push_next_variable();
807 }
808 Search::Backtrack => {
809 if let Some(variable) = q.stack.pop() {
810 q.binding.unset(variable);
811 q.unbound.push(variable);
812 q.touched_variables.set(variable);
813 q.mode = Search::NextValue;
814 } else {
815 q.mode = Search::Done;
816 return (self, None);
817 }
818 }
819 Search::Done => return (self, None),
820 Search::NextValue => unreachable!(),
821 }
822 }
823
824 // mode == NextValue. Inspect top-of-stack's remaining
825 // proposals.
826 let Some(&top) = q.stack.last() else {
827 return (self, None);
828 };
829 let top_len = q.values[top].as_ref().map_or(0, |v| v.len());
830 match top_len {
831 0 => q.mode = Search::Backtrack,
832 1 => {
833 // Descend: pop the single value, bind it,
834 // transition to NextVariable so the outer loop
835 // runs propose.
836 let assignment = q.values[top].as_mut().unwrap().pop().unwrap();
837 q.binding.set(top, &assignment);
838 q.touched_variables.set(top);
839 q.mode = Search::NextVariable;
840 }
841 _ => {
842 // Bisect the remaining proposals; clone the rest
843 // of the query state into the right half. Clone
844 // cost is one ~15 KB arraycopy per
845 // rayon-requested split — rayon only asks under
846 // stealing pressure.
847 let vals = q.values[top].as_mut().unwrap();
848 let mid = vals.len() / 2;
849 let right_vals: Vec<RawInline> = vals.drain(mid..).collect();
850 let mut right = q.clone();
851 right.values[top] = Some(right_vals);
852
853 let left_budget = self.split_budget / 2;
854 let right_budget = self.split_budget - left_budget;
855 self.split_budget = left_budget;
856 return (
857 self,
858 Some(QueryParIter {
859 inner: Box::new(right),
860 split_budget: right_budget,
861 }),
862 );
863 }
864 }
865 }
866 }
867
868 fn fold_with<F: Folder<R>>(self, mut folder: F) -> F {
869 let QueryParIter { inner: mut q, .. } = self;
870 while !folder.full() {
871 match q.next() {
872 Some(item) => folder = folder.consume(item),
873 None => break,
874 }
875 }
876 folder
877 }
878 }
879
880 impl<'a, C, P, R> ParallelIterator for QueryParIter<C, P, R>
881 where
882 C: Constraint<'a> + Clone + Send + 'a,
883 P: Fn(&Binding) -> Option<R> + Clone + Send,
884 R: Send,
885 {
886 type Item = R;
887
888 fn drive_unindexed<Con>(self, consumer: Con) -> Con::Result
889 where
890 Con: UnindexedConsumer<Self::Item>,
891 {
892 bridge_unindexed(self, consumer)
893 }
894 }
895
896}
897
898/// Iterate over query results, converting each variable via
899/// [`TryFromInline`](crate::inline::TryFromInline).
900///
901/// The macro takes two arguments: a tuple of variables with optional type
902/// annotations, and a constraint expression. It injects a `__local_find_context!`
903/// macro that provides the variable context to nested query macros like
904/// [`pattern!`](crate::macros::pattern) and [`ignore!`](crate::ignore).
905///
906/// # Variable syntax
907///
908/// | Syntax | Meaning |
909/// |--------|---------|
910/// | `name` | inferred type, filter on conversion failure |
911/// | `name: Type` | explicit type, filter on conversion failure |
912/// | `name?` | inferred type, yield `Result<T, E>` (no filter) |
913/// | `name: Type?` | explicit type, yield `Result<T, E>` (no filter) |
914///
915/// The unit form `find!((), constraint)` projects no variables and yields one
916/// `()` for every matching row. This is useful when you only care about
917/// existence, counting, or composing the query without returning values.
918///
919/// **Filter semantics (default):** when a variable's conversion fails the
920/// entire row is silently skipped — like a constraint that doesn't match.
921/// For types whose `TryFromInline::Error = Infallible` the error branch is
922/// dead code, so no rows can ever be accidentally filtered.
923///
924/// **`?` pass-through:** appending `?` to a variable makes it yield
925/// `Result<T, E>` directly. Both `Ok` and `Err` values pass through with
926/// no filtering, matching Rust's `?` semantics of "bubble the error to the
927/// caller."
928///
929/// # Examples
930///
931/// ```
932/// # use triblespace_core::prelude::*;
933/// # use triblespace_core::prelude::inlineencodings::ShortString;
934/// // Filter semantics — rows where conversion fails are skipped:
935/// let results = find!((x: Inline<ShortString>), x.is("foo".to_inline())).collect::<Vec<_>>();
936/// ```
937#[macro_export]
938macro_rules! find {
939 ($($tokens:tt)*) => {
940 {
941 #[allow(unused_mut, unused_variables)]
942 let mut ctx = $crate::query::VariableContext::new();
943
944 macro_rules! __local_find_context {
945 () => { &mut ctx }
946 }
947
948 $crate::macros::__find_impl!($crate, ctx, $($tokens)*)
949 }
950 };
951}
952/// Re-export of the [`find!`] macro.
953pub use find;
954
955/// Returns `true` when a query produces at least one row.
956///
957/// This is equivalent to calling `find!(...).next().is_some()`, but reads more
958/// directly for existence checks.
959///
960/// # Forms
961///
962/// - `exists!(constraint)` checks a pure constraint with no projected
963/// variables.
964/// - `exists!((vars...), constraint)` uses the same variable/conversion syntax
965/// as [`find!`] before checking whether any row survives projection.
966///
967/// ```rust,ignore
968/// exists!(pattern!(&kb, [{ ?person @ social::name: "Alice" }]))
969/// ```
970///
971/// ```rust,ignore
972/// exists!(
973/// (name: Inline<_>),
974/// pattern!(&kb, [{ ?person @ social::name: ?name }])
975/// )
976/// ```
977#[macro_export]
978macro_rules! exists {
979 (($($vars:tt)*), $Constraint:expr) => {
980 $crate::query::find!(($($vars)*), $Constraint).next().is_some()
981 };
982 ($Constraint:expr) => {
983 $crate::query::find!((), $Constraint).next().is_some()
984 };
985}
986/// Re-export of the [`exists!`] macro.
987pub use exists;
988
989/// Introduces one or more temporary query variables for a nested constraint.
990///
991/// `temp!` is only meaningful inside macros that provide a local query context,
992/// such as [`find!`], [`exists!`], or macros expanded from them like
993/// [`pattern!`](crate::macros::pattern). Each identifier becomes a fresh query
994/// variable that is scoped to the wrapped body.
995///
996/// ```rust,ignore
997/// find!(
998/// (person: Inline<_>),
999/// temp!((friend), and!(
1000/// pattern!(&kb, [{ ?person @ social::friend: ?friend }]),
1001/// pattern!(&kb, [{ ?friend @ social::name: "Bob" }])
1002/// ))
1003/// )
1004/// ```
1005#[macro_export]
1006macro_rules! temp {
1007 (($Var:ident), $body:expr) => {{
1008 let $Var = __local_find_context!().next_variable();
1009 $body
1010 }};
1011 (($Var:ident,), $body:expr) => {
1012 $crate::temp!(($Var), $body)
1013 };
1014 (($Var:ident, $($rest:ident),+ $(,)?), $body:expr) => {{
1015 $crate::temp!(
1016 ($Var),
1017 $crate::temp!(($($rest),+), $body)
1018 )
1019 }};
1020}
1021/// Re-export of the [`temp!`] macro.
1022pub use temp;
1023
1024#[cfg(test)]
1025mod tests {
1026 use inlineencodings::ShortString;
1027
1028 use crate::ignore;
1029 use crate::prelude::inlineencodings::*;
1030 use crate::prelude::*;
1031
1032 use crate::examples::literature;
1033
1034 use fake::faker::lorem::en::Sentence;
1035 use fake::faker::lorem::en::Words;
1036 use fake::faker::name::raw::*;
1037 use fake::locales::*;
1038 use fake::Fake;
1039
1040 use std::collections::HashSet;
1041
1042 use super::*;
1043
1044 pub mod knights {
1045 use crate::prelude::*;
1046
1047 attributes! {
1048 "8143F46E812E88C4544E7094080EC523" as loves: inlineencodings::GenId;
1049 "D6E0F2A6E5214E1330565B4D4138E55C" as name: inlineencodings::ShortString;
1050 }
1051 }
1052
1053 mod social {
1054 use crate::prelude::*;
1055
1056 attributes! {
1057 "A19EC1D9DD534BA9896223A457A6B9C9" as name: inlineencodings::ShortString;
1058 "C21DE0AA5BA3446AB886C9640BA60244" as friend: inlineencodings::GenId;
1059 }
1060 }
1061
1062 #[test]
1063 fn and_set() {
1064 let mut books = HashSet::<String>::new();
1065 let mut movies = HashSet::<Inline<ShortString>>::new();
1066
1067 books.insert("LOTR".to_string());
1068 books.insert("Dragonrider".to_string());
1069 books.insert("Highlander".to_string());
1070
1071 movies.insert("LOTR".to_inline());
1072 movies.insert("Highlander".to_inline());
1073
1074 let inter: Vec<_> =
1075 find!((a: Inline<ShortString>), and!(books.has(a), movies.has(a))).collect();
1076
1077 assert_eq!(inter.len(), 2);
1078
1079 let cross: Vec<_> =
1080 find!((a: Inline<ShortString>, b: Inline<ShortString>), and!(books.has(a), movies.has(b))).collect();
1081
1082 assert_eq!(cross.len(), 6);
1083
1084 let one: Vec<_> = find!((a: Inline<ShortString>),
1085 and!(books.has(a), a.is(ShortString::inline_from("LOTR")))
1086 )
1087 .collect();
1088
1089 assert_eq!(one.len(), 1);
1090 }
1091
1092 #[test]
1093 fn pattern() {
1094 let mut kb = TribleSet::new();
1095 (0..1000).for_each(|_| {
1096 let author = fucid();
1097 let book = fucid();
1098 kb += entity! { &author @
1099 literature::firstname: FirstName(EN).fake::<String>(),
1100 literature::lastname: LastName(EN).fake::<String>(),
1101 };
1102 kb += entity! { &book @
1103 literature::author: &author,
1104 literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
1105 literature::quote: Sentence(5..25).fake::<String>().to_blob().get_handle()
1106 };
1107 });
1108
1109 let author = fucid();
1110 let book = fucid();
1111 kb += entity! { &author @
1112 literature::firstname: "Frank",
1113 literature::lastname: "Herbert",
1114 };
1115 kb += entity! { &book @
1116 literature::author: &author,
1117 literature::title: "Dune",
1118 literature::quote: "I must not fear. Fear is the \
1119 mind-killer. Fear is the little-death that brings total \
1120 obliteration. I will face my fear. I will permit it to \
1121 pass over me and through me. And when it has gone past I \
1122 will turn the inner eye to see its path. Where the fear \
1123 has gone there will be nothing. Only I will remain.".to_blob().get_handle()
1124 };
1125
1126 (0..100).for_each(|_| {
1127 let author = fucid();
1128 let book = fucid();
1129 kb += entity! { &author @
1130 literature::firstname: "Fake",
1131 literature::lastname: "Herbert",
1132 };
1133 kb += entity! { &book @
1134 literature::author: &author,
1135 literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
1136 literature::quote: Sentence(5..25).fake::<String>().to_blob().get_handle()
1137 };
1138 });
1139
1140 let r: Vec<_> = find!(
1141 (author: Inline<_>, book: Inline<_>, title: Inline<_>, quote: Inline<_>),
1142 pattern!(&kb, [
1143 {?author @
1144 literature::firstname: "Frank",
1145 literature::lastname: "Herbert"},
1146 {?book @
1147 literature::author: ?author,
1148 literature::title: ?title,
1149 literature::quote: ?quote
1150 }]))
1151 .collect();
1152
1153 assert_eq!(1, r.len())
1154 }
1155
1156 #[test]
1157 fn constant() {
1158 let r: Vec<_> = find! {
1159 (string: Inline<_>, number: Inline<_>),
1160 and!(
1161 string.is(ShortString::inline_from("Hello World!")),
1162 number.is(I256BE::inline_from(42))
1163 )
1164 }
1165 .collect();
1166
1167 assert_eq!(1, r.len())
1168 }
1169
1170 #[test]
1171 fn exists_true() {
1172 assert!(exists!((a: Inline<_>), a.is(I256BE::inline_from(42))));
1173 }
1174
1175 #[test]
1176 fn exists_false() {
1177 assert!(!exists!(
1178 (a: Inline<_>),
1179 and!(a.is(I256BE::inline_from(1)), a.is(I256BE::inline_from(2)))
1180 ));
1181 }
1182
1183 #[test]
1184 fn exists_no_variables_true() {
1185 let mut ctx = VariableContext::new();
1186 let a = ctx.next_variable::<I256BE>();
1187 assert!(exists!(a.is(I256BE::inline_from(42))));
1188 }
1189
1190 #[test]
1191 fn find_no_variables_yields_unit() {
1192 let mut ctx = VariableContext::new();
1193 let a = ctx.next_variable::<I256BE>();
1194 let rows: Vec<()> = find!((), a.is(I256BE::inline_from(42))).collect();
1195 assert_eq!(rows, vec![()]);
1196 }
1197
1198 #[test]
1199 fn temp_variables_span_patterns() {
1200 use social::*;
1201
1202 let mut kb = TribleSet::new();
1203 let alice = fucid();
1204 let bob = fucid();
1205
1206 kb += entity! { &alice @ name: "Alice", friend: &bob };
1207 kb += entity! { &bob @ name: "Bob" };
1208
1209 let matches: Vec<_> = find!(
1210 (person_name: Inline<_>),
1211 temp!((mutual_friend),
1212 and!(
1213 pattern!(&kb, [{ _?person @ name: ?person_name, friend: ?mutual_friend }]),
1214 pattern!(&kb, [{ ?mutual_friend @ name: "Bob" }])
1215 )
1216 )
1217 )
1218 .collect();
1219
1220 assert_eq!(matches.len(), 1);
1221 assert_eq!(matches[0].0.try_from_inline::<&str>().unwrap(), "Alice");
1222 }
1223
1224 #[test]
1225 fn ignore_skips_variables() {
1226 let results: Vec<_> = find!(
1227 (x: Inline<_>),
1228 ignore!((y), and!(x.is(I256BE::inline_from(1)), y.is(I256BE::inline_from(2))))
1229 )
1230 .collect();
1231
1232 assert_eq!(results.len(), 1);
1233 assert_eq!(results[0].0, I256BE::inline_from(1));
1234 }
1235
1236 #[test]
1237 fn estimate_override_debug_order() {
1238 use std::cell::RefCell;
1239 use std::rc::Rc;
1240
1241 let mut ctx = VariableContext::new();
1242 let a = ctx.next_variable::<ShortString>();
1243 let b = ctx.next_variable::<ShortString>();
1244
1245 let base = and!(
1246 a.is(ShortString::inline_from("A")),
1247 b.is(ShortString::inline_from("B"))
1248 );
1249
1250 let mut wrapper = crate::debug::query::EstimateOverrideConstraint::new(base);
1251 wrapper.set_estimate(a.index, 10);
1252 wrapper.set_estimate(b.index, 1);
1253
1254 let record = Rc::new(RefCell::new(Vec::new()));
1255 let debug = crate::debug::query::DebugConstraint::new(wrapper, Rc::clone(&record));
1256
1257 let q: Query<_, _, _> = Query::new(debug, |_| Some(()));
1258 let r: Vec<_> = q.collect();
1259 assert_eq!(1, r.len());
1260 assert_eq!(&*record.borrow(), &[b.index, a.index]);
1261 }
1262}