Skip to main content

triblespace_core/
query.rs

1//! Query facilities for matching tribles by declaring patterns of constraints.
2//! Build queries with the [`find!`](crate::prelude::find) macro which binds variables and
3//! combines constraint expressions:
4//!
5//! ```
6//! # use triblespace_core::prelude::*;
7//! # use triblespace_core::prelude::inlineencodings::ShortString;
8//! let results = find!((x: Inline<ShortString>), x.is("foo".to_inline())).collect::<Vec<_>>();
9//! ```
10//!
11//! Variables are converted via [`TryFromInline`](crate::inline::TryFromInline). By default,
12//! conversion failures silently skip the row (filter semantics). Append `?` to a variable
13//! to receive `Result<T, E>` instead, letting the caller handle errors explicitly.
14//!
15//! For a tour of the language see the "Query Language" chapter in the book.
16//! Conceptual background on schemas and join strategy appears in the
17//! "Query Engine" and "Atreides Join" chapters.
18/// [`ConstantConstraint`] — pins a variable to a single value.
19pub mod constantconstraint;
20/// [`EqualityConstraint`](equalityconstraint::EqualityConstraint) — constrains two variables to have the same value.
21pub mod equalityconstraint;
22/// [`KeysConstraint`](hashmapconstraint::KeysConstraint) — constrains a variable to HashMap keys.
23pub mod hashmapconstraint;
24/// [`SetConstraint`](hashsetconstraint::SetConstraint) — constrains a variable to HashSet members.
25pub mod hashsetconstraint;
26/// [`IgnoreConstraint`] — hides variables from the outer query.
27pub mod ignore;
28/// [`IntersectionConstraint`](intersectionconstraint::IntersectionConstraint) — logical AND.
29pub mod intersectionconstraint;
30/// [`PatchValueConstraint`](patchconstraint::PatchValueConstraint) and [`PatchIdConstraint`](patchconstraint::PatchIdConstraint) — constrains variables to PATCH entries.
31pub mod patchconstraint;
32/// [`InlineRange`](rangeconstraint::InlineRange) — restricts a variable to a byte-lexicographic range.
33pub mod rangeconstraint;
34/// [`RegularPathConstraint`] — regular path expressions over graphs.
35pub mod regularpathconstraint;
36/// [`SortedSliceConstraint`](sortedsliceconstraint::SortedSliceConstraint) — constrains a variable to values in a sorted slice (binary search confirm).
37pub mod sortedsliceconstraint;
38/// [`UnionConstraint`](unionconstraint::UnionConstraint) — logical OR.
39pub mod unionconstraint;
40mod variableset;
41
42use std::cmp::Reverse;
43use std::fmt;
44use std::iter::FromIterator;
45use std::marker::PhantomData;
46
47use arrayvec::ArrayVec;
48use constantconstraint::*;
49/// Re-export of [`IgnoreConstraint`].
50pub use ignore::IgnoreConstraint;
51
52use crate::inline::encodings::genid::GenId;
53use crate::inline::RawInline;
54use crate::inline::Inline;
55use crate::inline::InlineEncoding;
56
57/// Re-export of [`PathOp`].
58pub use regularpathconstraint::PathOp;
59/// Re-export of [`RegularPathConstraint`].
60pub use regularpathconstraint::RegularPathConstraint;
61/// Re-export of [`VariableSet`](variableset::VariableSet).
62pub use variableset::VariableSet;
63
64/// Types storing tribles can implement this trait to expose them to queries.
65/// The trait provides a method to create a constraint for a given trible pattern.
66pub trait TriblePattern {
67    /// The type of the constraint created by the pattern method.
68    ///
69    /// `Send + Sync` is required so the resulting constraint tree can be
70    /// used with the `parallel` feature's rayon iterators. Every in-tree
71    /// pattern backend (TribleSet, SuccinctArchive) satisfies this; custom
72    /// implementations should hold their data behind `Arc` or similar.
73    type PatternConstraint<'a>: Constraint<'a> + Send + Sync
74    where
75        Self: 'a;
76
77    /// Create a constraint for a given trible pattern.
78    /// The method takes three variables, one for each part of the trible.
79    /// The schemas of the entities and attributes are always [GenId], while the value
80    /// schema can be any type implementing [InlineEncoding] and is specified as a type parameter.
81    ///
82    /// This method is usually not called directly, but rather through typed query language
83    /// macros like [pattern!][crate::macros::pattern].
84    fn pattern<'a, V: InlineEncoding>(
85        &'a self,
86        e: Variable<GenId>,
87        a: Variable<GenId>,
88        v: Variable<V>,
89    ) -> Self::PatternConstraint<'a>;
90}
91
92/// Low-level identifier for a variable in a query.
93pub type VariableId = usize;
94
95/// Context for creating variables in a query.
96/// The context keeps track of the next index to assign to a variable.
97/// This allows for the creation of new anonymous variables in higher-level query languages.
98#[derive(Debug)]
99pub struct VariableContext {
100    /// The index that will be assigned to the next variable.
101    pub next_index: VariableId,
102}
103
104impl Default for VariableContext {
105    fn default() -> Self {
106        Self::new()
107    }
108}
109
110impl VariableContext {
111    /// Create a new variable context.
112    /// The context starts with an index of 0.
113    pub fn new() -> Self {
114        VariableContext { next_index: 0 }
115    }
116
117    /// Create a new variable.
118    /// The variable is assigned the next available index.
119    ///
120    /// Panics if the number of variables exceeds 128.
121    ///
122    /// This method is usually not called directly, but rather through typed query language
123    /// macros like [find!][crate::query].
124    pub fn next_variable<T: InlineEncoding>(&mut self) -> Variable<T> {
125        assert!(
126            self.next_index < 128,
127            "currently queries support at most 128 variables"
128        );
129        let v = Variable::new(self.next_index);
130        self.next_index += 1;
131        v
132    }
133}
134
135/// A placeholder for unknowns in a query.
136/// Within the query engine each variable is identified by an integer,
137/// which can be accessed via the `index` property.
138/// Variables also have an associated type which is used to parse the [Inline]s
139/// found by the query engine.
140#[derive(Debug)]
141pub struct Variable<T: InlineEncoding> {
142    /// The integer index identifying this variable in the [`Binding`].
143    pub index: VariableId,
144    typed: PhantomData<T>,
145}
146
147impl<T: InlineEncoding> Copy for Variable<T> {}
148
149impl<T: InlineEncoding> Clone for Variable<T> {
150    fn clone(&self) -> Self {
151        *self
152    }
153}
154
155impl<T: InlineEncoding> Variable<T> {
156    /// Creates a variable with the given index.
157    pub fn new(index: VariableId) -> Self {
158        Variable {
159            index,
160            typed: PhantomData,
161        }
162    }
163
164    /// Extracts the bound value for this variable from `binding`.
165    ///
166    /// # Panics
167    ///
168    /// Panics if the variable has not been bound.
169    pub fn extract(self, binding: &Binding) -> &Inline<T> {
170        let raw = binding.get(self.index).unwrap_or_else(|| {
171            panic!(
172                "query variable (idx {}) was never bound before projection. This usually means the variable was projected in `find!` but never appeared in any constraint. If you intended a pure existence query, use `find!((), ...)` or `exists!(constraint)`.",
173                self.index
174            )
175        });
176        Inline::as_transmute_raw(raw)
177    }
178}
179
180/// Collections can implement this trait so that they can be used in queries.
181/// The returned constraint will filter the values assigned to the variable
182/// to only those that are contained in the collection.
183pub trait ContainsConstraint<'a, T: InlineEncoding> {
184    /// The concrete constraint type produced by [`has`](ContainsConstraint::has).
185    type Constraint: Constraint<'a>;
186
187    /// Create a constraint that filters the values assigned to the variable
188    /// to only those that are contained in the collection.
189    ///
190    /// The returned constraint will usually perform a conversion between the
191    /// concrete rust type stored in the collection a [Inline] of the appropriate schema
192    /// type for the variable.
193    fn has(self, v: Variable<T>) -> Self::Constraint;
194}
195
196impl<T: InlineEncoding> Variable<T> {
197    /// Create a constraint so that only a specific value can be assigned to the variable.
198    pub fn is(self, constant: Inline<T>) -> ConstantConstraint {
199        ConstantConstraint::new(self, constant)
200    }
201}
202
203/// The binding keeps track of the values assigned to variables in a query.
204/// It maps variables to values - by their index - via a simple array,
205/// and keeps track of which variables are bound.
206/// It is used to store intermediate results and to pass information
207/// between different constraints.
208/// The binding is mutable, as it is modified by the query engine.
209/// It is not thread-safe and should not be shared between threads.
210/// The binding is a simple data structure that is cheap to clone.
211/// It is not intended to be used as a long-term storage for query results.
212#[derive(Clone, Debug)]
213pub struct Binding {
214    /// Bitset tracking which variables have been assigned a value.
215    pub bound: VariableSet,
216    values: [RawInline; 128],
217}
218
219impl Binding {
220    /// Binds `variable` to `value`.
221    pub fn set(&mut self, variable: VariableId, value: &RawInline) {
222        self.values[variable] = *value;
223        self.bound.set(variable);
224    }
225
226    /// Unset a variable in the binding.
227    /// This is used to backtrack in the query engine.
228    pub fn unset(&mut self, variable: VariableId) {
229        self.bound.unset(variable);
230    }
231
232    /// Check if a variable is bound in the binding.
233    pub fn get(&self, variable: VariableId) -> Option<&RawInline> {
234        if self.bound.is_set(variable) {
235            Some(&self.values[variable])
236        } else {
237            None
238        }
239    }
240}
241
242impl Default for Binding {
243    fn default() -> Self {
244        Self {
245            bound: VariableSet::new_empty(),
246            values: [[0; 32]; 128],
247        }
248    }
249}
250
251/// The cooperative protocol that every query participant implements.
252///
253/// A constraint restricts the values that can be assigned to query variables.
254/// The query engine does not plan joins in advance; instead it consults
255/// constraints directly during a depth-first search over partial bindings.
256/// Each constraint reports which variables it touches, estimates how many
257/// candidates remain, enumerates concrete values on demand, and signals
258/// whether its requirements are still satisfiable. This protocol is the
259/// sole interface between the engine and the data — whether that data lives
260/// in a [`TribleSet`](crate::trible::TribleSet), a [`HashMap`](std::collections::HashMap),
261/// or a custom application predicate.
262///
263/// # The protocol
264///
265/// The engine drives the search by calling five methods in a fixed rhythm:
266///
267/// | Method | Role | Called when |
268/// |--------|------|------------|
269/// | [`variables`](Constraint::variables) | Declares which variables the constraint touches. | Once, at query start. |
270/// | [`estimate`](Constraint::estimate) | Predicts the candidate count for a variable. | Before each binding decision. |
271/// | [`propose`](Constraint::propose) | Enumerates candidate values for a variable. | On the most selective constraint. |
272/// | [`confirm`](Constraint::confirm) | Filters candidates proposed by another constraint. | On all remaining constraints. |
273/// | [`satisfied`](Constraint::satisfied) | Checks whether fully-bound sub-constraints still hold. | Before propose/confirm in composite constraints. |
274///
275/// [`influence`](Constraint::influence) completes the picture by telling the
276/// engine which estimates to refresh when a variable is bound or unbound.
277///
278/// # Statelessness
279///
280/// Constraints are stateless: every method receives the current [`Binding`]
281/// as a parameter rather than maintaining internal bookkeeping. This lets
282/// the engine backtrack freely by unsetting variables in the binding
283/// without notifying the constraints.
284///
285/// # Composability
286///
287/// Constraints combine via [`IntersectionConstraint`](crate::query::intersectionconstraint::IntersectionConstraint)
288/// (logical AND — built by [`and!`](crate::and)) and
289/// [`UnionConstraint`](crate::query::unionconstraint::UnionConstraint)
290/// (logical OR — built by [`or!`](crate::or)). Because every constraint
291/// speaks the same protocol, heterogeneous data sources mix freely in a
292/// single query.
293///
294/// # Implementing a custom constraint
295///
296/// A new constraint only needs to implement [`variables`](Constraint::variables),
297/// [`estimate`](Constraint::estimate), [`propose`](Constraint::propose), and
298/// [`confirm`](Constraint::confirm). Override [`satisfied`](Constraint::satisfied)
299/// when the constraint can detect unsatisfiability before the engine asks
300/// about individual variables (e.g. a fully-bound triple lookup that found
301/// no match). Override [`influence`](Constraint::influence) when binding one
302/// variable changes the estimates for a non-obvious set of others.
303pub trait Constraint<'a> {
304    /// Returns the set of variables this constraint touches.
305    ///
306    /// Called once at query start. The engine uses this to build influence
307    /// graphs and to determine which constraints participate when a
308    /// particular variable is being bound.
309    fn variables(&self) -> VariableSet;
310
311    /// Estimates the number of candidate values for `variable` given the
312    /// current partial `binding`.
313    ///
314    /// Returns `None` when `variable` is not constrained by this constraint.
315    /// The estimate need not be exact — it guides variable ordering, not
316    /// correctness. Tighter estimates lead to better search pruning; see the
317    /// [Atreides join](crate) family for how different estimate fidelities
318    /// affect performance.
319    fn estimate(&self, variable: VariableId, binding: &Binding) -> Option<usize>;
320
321    /// Enumerates candidate values for `variable` into `proposals`.
322    ///
323    /// Called on the constraint with the lowest estimate for the variable
324    /// being bound. Values are appended to `proposals`; the engine may
325    /// already have values in the vector from a previous round.
326    ///
327    /// Does nothing when `variable` is not constrained by this constraint.
328    fn propose(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>);
329
330    /// Filters `proposals` to remove values for `variable` that violate
331    /// this constraint.
332    ///
333    /// Called on every constraint *except* the one that proposed, in order
334    /// of increasing estimate. Implementations remove entries from
335    /// `proposals` that are inconsistent with the current `binding`.
336    ///
337    /// Does nothing when `variable` is not constrained by this constraint.
338    fn confirm(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>);
339
340    /// Returns whether this constraint is consistent with the current
341    /// `binding`.
342    ///
343    /// The default implementation returns `true`. Override this when the
344    /// constraint can cheaply detect that no solution exists — for example,
345    /// a `TribleSetConstraint`
346    /// whose entity, attribute, and value are all bound but the triple is
347    /// absent from the dataset.
348    ///
349    /// Composite constraints propagate this check to their children:
350    /// [`IntersectionConstraint`](crate::query::intersectionconstraint::IntersectionConstraint)
351    /// requires *all* children to be satisfied, while
352    /// [`UnionConstraint`](crate::query::unionconstraint::UnionConstraint)
353    /// requires *at least one*. The union uses this to skip dead variants
354    /// in propose and confirm, preventing values from a satisfied variant
355    /// from leaking through a dead one.
356    fn satisfied(&self, _binding: &Binding) -> bool {
357        true
358    }
359
360    /// Returns the set of variables whose estimates may change when
361    /// `variable` is bound or unbound.
362    ///
363    /// The default includes every variable this constraint touches except
364    /// `variable` itself. Returns an empty set when `variable` is not part
365    /// of this constraint.
366    fn influence(&self, variable: VariableId) -> VariableSet {
367        let mut vars = self.variables();
368        if vars.is_set(variable) {
369            vars.unset(variable);
370            vars
371        } else {
372            VariableSet::new_empty()
373        }
374    }
375}
376
377impl<'a, T: Constraint<'a> + ?Sized> Constraint<'a> for Box<T> {
378    fn variables(&self) -> VariableSet {
379        let inner: &T = self;
380        inner.variables()
381    }
382
383    fn estimate(&self, variable: VariableId, binding: &Binding) -> Option<usize> {
384        let inner: &T = self;
385        inner.estimate(variable, binding)
386    }
387
388    fn propose(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>) {
389        let inner: &T = self;
390        inner.propose(variable, binding, proposals)
391    }
392
393    fn confirm(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>) {
394        let inner: &T = self;
395        inner.confirm(variable, binding, proposals)
396    }
397
398    fn satisfied(&self, binding: &Binding) -> bool {
399        let inner: &T = self;
400        inner.satisfied(binding)
401    }
402
403    fn influence(&self, variable: VariableId) -> VariableSet {
404        let inner: &T = self;
405        inner.influence(variable)
406    }
407}
408
409impl<'a, T: Constraint<'a> + ?Sized> Constraint<'a> for std::sync::Arc<T> {
410    fn variables(&self) -> VariableSet {
411        let inner: &T = self;
412        inner.variables()
413    }
414
415    fn estimate(&self, variable: VariableId, binding: &Binding) -> Option<usize> {
416        let inner: &T = self;
417        inner.estimate(variable, binding)
418    }
419
420    fn propose(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>) {
421        let inner: &T = self;
422        inner.propose(variable, binding, proposals)
423    }
424
425    fn confirm(&self, variable: VariableId, binding: &Binding, proposal: &mut Vec<RawInline>) {
426        let inner: &T = self;
427        inner.confirm(variable, binding, proposal)
428    }
429
430    fn satisfied(&self, binding: &Binding) -> bool {
431        let inner: &T = self;
432        inner.satisfied(binding)
433    }
434
435    fn influence(&self, variable: VariableId) -> VariableSet {
436        let inner: &T = self;
437        inner.influence(variable)
438    }
439}
440
441/// A query is an iterator over the results of a query.
442/// It takes a constraint and a post-processing function as input,
443/// and returns the results of the query as a stream of values.
444/// The query engine uses a depth-first search to find solutions to the query,
445/// proposing values for the variables and backtracking when it reaches a dead end.
446/// The query engine is designed to be simple and efficient, providing low, consistent,
447/// and predictable latency, skew resistance, and no required (or possible) tuning.
448/// The query engine is designed to be used in combination with the [Constraint] trait,
449/// which provides a simple and flexible way to implement constraints that can be used
450/// to filter the results of a query.
451///
452/// This struct is usually not created directly, but rather through the `find!` macro,
453/// which provides a convenient way to declare variables and concrete types for them.
454/// And which sets up the nessecairy context for higher-level query languages
455/// like the one provided by the [`crate::macros`] module.
456pub struct Query<C, P: Fn(&Binding) -> Option<R>, R> {
457    constraint: C,
458    postprocessing: P,
459    mode: Search,
460    binding: Binding,
461    influences: [VariableSet; 128],
462    estimates: [usize; 128],
463    touched_variables: VariableSet,
464    stack: ArrayVec<VariableId, 128>,
465    unbound: ArrayVec<VariableId, 128>,
466    values: ArrayVec<Option<Vec<RawInline>>, 128>,
467}
468
469// Manual `Clone` impl, because `#[derive(Clone)]` would require `R: Clone`
470// which isn't actually needed — `R` only appears in `P`'s return type.
471#[cfg(feature = "parallel")]
472impl<C, P, R> Clone for Query<C, P, R>
473where
474    C: Clone,
475    P: Fn(&Binding) -> Option<R> + Clone,
476{
477    fn clone(&self) -> Self {
478        Self {
479            constraint: self.constraint.clone(),
480            postprocessing: self.postprocessing.clone(),
481            mode: self.mode,
482            binding: self.binding.clone(),
483            influences: self.influences,
484            estimates: self.estimates,
485            touched_variables: self.touched_variables,
486            stack: self.stack.clone(),
487            unbound: self.unbound.clone(),
488            values: self.values.clone(),
489        }
490    }
491}
492
493impl<'a, C: Constraint<'a>, P: Fn(&Binding) -> Option<R>, R> Query<C, P, R> {
494    /// Picks the next unbound variable, refreshes estimates touched by
495    /// the most recent binding, re-sorts `unbound`, pushes the chosen
496    /// variable onto the stack, and fills its proposal vector via
497    /// [`Constraint::propose`]. Leaves `mode = NextValue`. The caller is
498    /// responsible for ensuring `unbound` is non-empty.
499    ///
500    /// Shared between [`Iterator::next`]'s `NextVariable` branch and the
501    /// [`UnindexedProducer::split`](crate::query::QueryParIter) implementation
502    /// — the "push + propose" dance is identical in both.
503    fn push_next_variable(&mut self) {
504        let mut stale_estimates = VariableSet::new_empty();
505        while let Some(variable) = self.touched_variables.drain_next_ascending() {
506            stale_estimates = stale_estimates.union(self.influences[variable]);
507        }
508        // Bound variables can't be influenced by the unbound ones, so skip.
509        stale_estimates = stale_estimates.subtract(self.binding.bound);
510
511        if !stale_estimates.is_empty() {
512            while let Some(v) = stale_estimates.drain_next_ascending() {
513                self.estimates[v] = self
514                    .constraint
515                    .estimate(v, &self.binding)
516                    .expect("unconstrained variable in query");
517            }
518            self.unbound.sort_unstable_by_key(|v| {
519                (
520                    Reverse(
521                        self.estimates[*v]
522                            .checked_ilog2()
523                            .map(|magnitude| magnitude + 1)
524                            .unwrap_or(0),
525                    ),
526                    self.influences[*v].count(),
527                )
528            });
529        }
530
531        let variable = self.unbound.pop().expect("non-empty unbound");
532        let estimate = self.estimates[variable];
533        self.stack.push(variable);
534        let values = self.values[variable].get_or_insert(Vec::new());
535        values.clear();
536        values.reserve_exact(estimate.saturating_sub(values.capacity()));
537        self.constraint.propose(variable, &self.binding, values);
538    }
539
540    /// Create a new query.
541    /// The query takes a constraint and a post-processing function as input,
542    /// and returns the results of the query as a stream of values.
543    /// The post-processing function returns `Option<R>`: returning `None`
544    /// skips the current binding and continues the search.
545    ///
546    /// This method is usually not called directly, but rather through the [find!] macro,
547    pub fn new(constraint: C, postprocessing: P) -> Self {
548        let variables = constraint.variables();
549        let influences = std::array::from_fn(|v| {
550            if variables.is_set(v) {
551                constraint.influence(v)
552            } else {
553                VariableSet::new_empty()
554            }
555        });
556        let binding = Binding::default();
557        let estimates = std::array::from_fn(|v| {
558            if variables.is_set(v) {
559                constraint
560                    .estimate(v, &binding)
561                    .expect("unconstrained variable in query")
562            } else {
563                usize::MAX
564            }
565        });
566        let mut unbound = ArrayVec::from_iter(variables);
567        unbound.sort_unstable_by_key(|v| {
568            (
569                Reverse(
570                    estimates[*v]
571                        .checked_ilog2()
572                        .map(|magnitude| magnitude + 1)
573                        .unwrap_or(0),
574                ),
575                influences[*v].count(),
576            )
577        });
578
579        Query {
580            constraint,
581            postprocessing,
582            mode: Search::NextVariable,
583            binding,
584            influences,
585            estimates,
586            touched_variables: VariableSet::new_empty(),
587            stack: ArrayVec::new(),
588            unbound,
589            values: ArrayVec::from([const { None }; 128]),
590        }
591    }
592}
593
594/// The search mode of the query engine.
595/// The query engine uses a depth-first search to find solutions to the query,
596/// proposing values for the variables and backtracking when it reaches a dead end.
597/// The search mode is used to keep track of the current state of the search.
598/// The search mode can be one of the following:
599/// - `NextVariable` - The query engine is looking for the next variable to assign a value to.
600/// - `NextValue` - The query engine is looking for the next value to assign to a variable.
601/// - `Backtrack` - The query engine is backtracking to try a different value for a variable.
602/// - `Done` - The query engine has finished the search and there are no more results.
603#[derive(Copy, Clone, Debug)]
604enum Search {
605    NextVariable,
606    NextValue,
607    Backtrack,
608    Done,
609}
610
611impl<'a, C: Constraint<'a>, P: Fn(&Binding) -> Option<R>, R> Iterator for Query<C, P, R> {
612    type Item = R;
613
614    fn next(&mut self) -> Option<Self::Item> {
615        loop {
616            match &self.mode {
617                Search::NextVariable => {
618                    self.mode = Search::NextValue;
619                    if self.unbound.is_empty() {
620                        if let Some(result) = (self.postprocessing)(&self.binding) {
621                            return Some(result);
622                        }
623                        // Post-processing rejected this binding; continue
624                        // searching (mode is already NextValue).
625                        continue;
626                    }
627                    self.push_next_variable();
628                }
629                Search::NextValue => {
630                    if let Some(&variable) = self.stack.last() {
631                        if let Some(assignment) = self.values[variable]
632                            .as_mut()
633                            .expect("values should be initialized")
634                            .pop()
635                        {
636                            self.binding.set(variable, &assignment);
637                            self.touched_variables.set(variable);
638                            self.mode = Search::NextVariable;
639                        } else {
640                            self.mode = Search::Backtrack;
641                        }
642                    } else {
643                        self.mode = Search::Done;
644                        return None;
645                    }
646                }
647                Search::Backtrack => {
648                    if let Some(variable) = self.stack.pop() {
649                        self.binding.unset(variable);
650                        // Note that we did not update estiamtes for the unbound variables
651                        // as we are backtracking, so the estimates are still valid.
652                        // Since we choose this variable before, we know that it would
653                        // still go last in the unbound list.
654                        self.unbound.push(variable);
655
656                        // However, we need to update the touched variables,
657                        // as we are backtracking and the variable is no longer bound.
658                        // We're essentially restoring the estimate of the touched variables
659                        // to the state before we bound this variable.
660                        self.touched_variables.set(variable);
661                        self.mode = Search::NextValue;
662                    } else {
663                        self.mode = Search::Done;
664                        return None;
665                    }
666                }
667                Search::Done => {
668                    return None;
669                }
670            }
671        }
672    }
673}
674
675impl<'a, C: Constraint<'a>, P: Fn(&Binding) -> Option<R>, R> fmt::Debug for Query<C, P, R> {
676    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
677        f.debug_struct("Query")
678            .field("constraint", &std::any::type_name::<C>())
679            .field("mode", &self.mode)
680            .field("binding", &self.binding)
681            .field("stack", &self.stack)
682            .field("unbound", &self.unbound)
683            .finish()
684    }
685}
686
687// ---------------------------------------------------------------------------
688// Parallel execution via rayon.
689//
690// `Query` implements `IntoParallelIterator` with `Iter = QueryParIter`.
691// `QueryParIter` is a separate wrapper type implementing `ParallelIterator`
692// + `UnindexedProducer`, distinct from `Query` itself to avoid method-name
693// ambiguity between `Iterator` and `ParallelIterator` — methods like
694// `.count()`, `.collect()`, `.map()` exist on both.
695//
696// Usage: `find!(...).into_par_iter().map(...).collect::<Vec<_>>()`.
697//
698// The producer's `split` uses the "split-or-descend" rule: while the current
699// top-of-stack has a single remaining proposal, bind it and descend one level;
700// when the top has ≥2 remaining, bisect them between two sub-queries. This
701// keeps the invariant that every non-top stack level has zero remaining
702// proposals, so backtracking out of a sub-search unwinds cleanly to done
703// without any re-enumeration across clones.
704//
705// `fold_with` is the terminal leaf: it just drives the existing sequential
706// `Iterator::next()` and feeds results into the folder. No duplicated
707// execution logic.
708// ---------------------------------------------------------------------------
709
710#[cfg(feature = "parallel")]
711pub use parallel::QueryParIter;
712
713#[cfg(feature = "parallel")]
714mod parallel {
715    use super::*;
716    use rayon::iter::plumbing::{
717        bridge_unindexed, Folder, UnindexedConsumer, UnindexedProducer,
718    };
719    use rayon::iter::{IntoParallelIterator, ParallelIterator};
720
721    /// Parallel iterator over the results of a [`Query`]. Obtained via
722    /// [`IntoParallelIterator::into_par_iter`] on a `Query`.
723    ///
724    /// Drives rayon's work-stealing scheduler through an `UnindexedProducer`
725    /// impl on the underlying query state. The sequential `Iterator::next`
726    /// on `Query` is reused as the fold leaf — parallel execution is purely
727    /// additional, no duplicated engine logic.
728    ///
729    /// The inner query is stored in a [`Box`] so rayon's work-stealing
730    /// `split` (which clones the producer) doesn't memcpy ~15 KB of query
731    /// state on every fork — just a Box pointer copy, with the heap alloc
732    /// paid only by the child.
733    ///
734    /// `split_budget` bounds the number of splits this sub-producer will
735    /// perform. Rayon's default `Splitter` *resets* its budget on every
736    /// stolen task, so on a busy thread pool the split tree could grow
737    /// unboundedly deep — the Query always has more proposals to bisect.
738    /// A bounded per-producer budget (`num_threads²`) caps the split tree
739    /// at ~N² leaves — enough for each worker to have roughly N chunks to
740    /// rebalance via stealing — regardless of stealing pressure.
741    pub struct QueryParIter<C, P: Fn(&Binding) -> Option<R>, R> {
742        inner: Box<Query<C, P, R>>,
743        split_budget: usize,
744    }
745
746    impl<'a, C, P, R> IntoParallelIterator for Query<C, P, R>
747    where
748        C: Constraint<'a> + Clone + Send + 'a,
749        P: Fn(&Binding) -> Option<R> + Clone + Send,
750        R: Send,
751    {
752        type Item = R;
753        type Iter = QueryParIter<C, P, R>;
754
755        fn into_par_iter(self) -> Self::Iter {
756            // num_threads² chunks: intuition is "every worker has one spare
757            // chunk for every other worker," giving N²/N = N chunks apiece
758            // for rebalancing. log₂(N²) = 2·log₂(N), so depth stays modest
759            // (8 on a 16-thread box, 10 on a 32-thread) — well below any
760            // stack concern.
761            let n = rayon::current_num_threads();
762            let split_budget = n.saturating_mul(n).max(2);
763            QueryParIter {
764                inner: Box::new(self),
765                split_budget,
766            }
767        }
768    }
769
770    impl<'a, C, P, R> UnindexedProducer for QueryParIter<C, P, R>
771    where
772        C: Constraint<'a> + Clone + Send + 'a,
773        P: Fn(&Binding) -> Option<R> + Clone + Send,
774        R: Send,
775    {
776        type Item = R;
777
778        /// Advance the Query's state machine until either the current
779        /// top-of-stack has ≥2 remaining proposals (bisect, return a
780        /// right half) or the sub-query is exhausted (return `None`,
781        /// leaving `self` as a leaf that `fold_with` will fold
782        /// sequentially). Single-value levels are descended through —
783        /// see the module doc comment for why this preserves correctness
784        /// without re-enumeration.
785        fn split(mut self) -> (Self, Option<Self>) {
786            if self.split_budget == 0 {
787                return (self, None);
788            }
789            self.split_budget -= 1;
790            let q = &mut *self.inner;
791            loop {
792                // Advance the state machine until we're in NextValue with
793                // a populated top — the only state where split-or-descend
794                // makes sense.
795                while !matches!(q.mode, Search::NextValue) {
796                    match q.mode {
797                        Search::NextVariable => {
798                            q.mode = Search::NextValue;
799                            if q.unbound.is_empty() {
800                                // All variables bound. Leaf — fold_with
801                                // will drive sequential `next()` to yield
802                                // the one postprocessed result.
803                                q.mode = Search::NextVariable;
804                                return (self, None);
805                            }
806                            q.push_next_variable();
807                        }
808                        Search::Backtrack => {
809                            if let Some(variable) = q.stack.pop() {
810                                q.binding.unset(variable);
811                                q.unbound.push(variable);
812                                q.touched_variables.set(variable);
813                                q.mode = Search::NextValue;
814                            } else {
815                                q.mode = Search::Done;
816                                return (self, None);
817                            }
818                        }
819                        Search::Done => return (self, None),
820                        Search::NextValue => unreachable!(),
821                    }
822                }
823
824                // mode == NextValue. Inspect top-of-stack's remaining
825                // proposals.
826                let Some(&top) = q.stack.last() else {
827                    return (self, None);
828                };
829                let top_len = q.values[top].as_ref().map_or(0, |v| v.len());
830                match top_len {
831                    0 => q.mode = Search::Backtrack,
832                    1 => {
833                        // Descend: pop the single value, bind it,
834                        // transition to NextVariable so the outer loop
835                        // runs propose.
836                        let assignment = q.values[top].as_mut().unwrap().pop().unwrap();
837                        q.binding.set(top, &assignment);
838                        q.touched_variables.set(top);
839                        q.mode = Search::NextVariable;
840                    }
841                    _ => {
842                        // Bisect the remaining proposals; clone the rest
843                        // of the query state into the right half. Clone
844                        // cost is one ~15 KB arraycopy per
845                        // rayon-requested split — rayon only asks under
846                        // stealing pressure.
847                        let vals = q.values[top].as_mut().unwrap();
848                        let mid = vals.len() / 2;
849                        let right_vals: Vec<RawInline> = vals.drain(mid..).collect();
850                        let mut right = q.clone();
851                        right.values[top] = Some(right_vals);
852
853                        let left_budget = self.split_budget / 2;
854                        let right_budget = self.split_budget - left_budget;
855                        self.split_budget = left_budget;
856                        return (
857                            self,
858                            Some(QueryParIter {
859                                inner: Box::new(right),
860                                split_budget: right_budget,
861                            }),
862                        );
863                    }
864                }
865            }
866        }
867
868        fn fold_with<F: Folder<R>>(self, mut folder: F) -> F {
869            let QueryParIter { inner: mut q, .. } = self;
870            while !folder.full() {
871                match q.next() {
872                    Some(item) => folder = folder.consume(item),
873                    None => break,
874                }
875            }
876            folder
877        }
878    }
879
880    impl<'a, C, P, R> ParallelIterator for QueryParIter<C, P, R>
881    where
882        C: Constraint<'a> + Clone + Send + 'a,
883        P: Fn(&Binding) -> Option<R> + Clone + Send,
884        R: Send,
885    {
886        type Item = R;
887
888        fn drive_unindexed<Con>(self, consumer: Con) -> Con::Result
889        where
890            Con: UnindexedConsumer<Self::Item>,
891        {
892            bridge_unindexed(self, consumer)
893        }
894    }
895
896}
897
898/// Iterate over query results, converting each variable via
899/// [`TryFromInline`](crate::inline::TryFromInline).
900///
901/// The macro takes two arguments: a tuple of variables with optional type
902/// annotations, and a constraint expression. It injects a `__local_find_context!`
903/// macro that provides the variable context to nested query macros like
904/// [`pattern!`](crate::macros::pattern) and [`ignore!`](crate::ignore).
905///
906/// # Variable syntax
907///
908/// | Syntax | Meaning |
909/// |--------|---------|
910/// | `name` | inferred type, filter on conversion failure |
911/// | `name: Type` | explicit type, filter on conversion failure |
912/// | `name?` | inferred type, yield `Result<T, E>` (no filter) |
913/// | `name: Type?` | explicit type, yield `Result<T, E>` (no filter) |
914///
915/// The unit form `find!((), constraint)` projects no variables and yields one
916/// `()` for every matching row. This is useful when you only care about
917/// existence, counting, or composing the query without returning values.
918///
919/// **Filter semantics (default):** when a variable's conversion fails the
920/// entire row is silently skipped — like a constraint that doesn't match.
921/// For types whose `TryFromInline::Error = Infallible` the error branch is
922/// dead code, so no rows can ever be accidentally filtered.
923///
924/// **`?` pass-through:** appending `?` to a variable makes it yield
925/// `Result<T, E>` directly. Both `Ok` and `Err` values pass through with
926/// no filtering, matching Rust's `?` semantics of "bubble the error to the
927/// caller."
928///
929/// # Examples
930///
931/// ```
932/// # use triblespace_core::prelude::*;
933/// # use triblespace_core::prelude::inlineencodings::ShortString;
934/// // Filter semantics — rows where conversion fails are skipped:
935/// let results = find!((x: Inline<ShortString>), x.is("foo".to_inline())).collect::<Vec<_>>();
936/// ```
937#[macro_export]
938macro_rules! find {
939    ($($tokens:tt)*) => {
940        {
941            #[allow(unused_mut, unused_variables)]
942            let mut ctx = $crate::query::VariableContext::new();
943
944            macro_rules! __local_find_context {
945                () => { &mut ctx }
946            }
947
948            $crate::macros::__find_impl!($crate, ctx, $($tokens)*)
949        }
950    };
951}
952/// Re-export of the [`find!`] macro.
953pub use find;
954
955/// Returns `true` when a query produces at least one row.
956///
957/// This is equivalent to calling `find!(...).next().is_some()`, but reads more
958/// directly for existence checks.
959///
960/// # Forms
961///
962/// - `exists!(constraint)` checks a pure constraint with no projected
963///   variables.
964/// - `exists!((vars...), constraint)` uses the same variable/conversion syntax
965///   as [`find!`] before checking whether any row survives projection.
966///
967/// ```rust,ignore
968/// exists!(pattern!(&kb, [{ ?person @ social::name: "Alice" }]))
969/// ```
970///
971/// ```rust,ignore
972/// exists!(
973///     (name: Inline<_>),
974///     pattern!(&kb, [{ ?person @ social::name: ?name }])
975/// )
976/// ```
977#[macro_export]
978macro_rules! exists {
979    (($($vars:tt)*), $Constraint:expr) => {
980        $crate::query::find!(($($vars)*), $Constraint).next().is_some()
981    };
982    ($Constraint:expr) => {
983        $crate::query::find!((), $Constraint).next().is_some()
984    };
985}
986/// Re-export of the [`exists!`] macro.
987pub use exists;
988
989/// Introduces one or more temporary query variables for a nested constraint.
990///
991/// `temp!` is only meaningful inside macros that provide a local query context,
992/// such as [`find!`], [`exists!`], or macros expanded from them like
993/// [`pattern!`](crate::macros::pattern). Each identifier becomes a fresh query
994/// variable that is scoped to the wrapped body.
995///
996/// ```rust,ignore
997/// find!(
998///     (person: Inline<_>),
999///     temp!((friend), and!(
1000///         pattern!(&kb, [{ ?person @ social::friend: ?friend }]),
1001///         pattern!(&kb, [{ ?friend @ social::name: "Bob" }])
1002///     ))
1003/// )
1004/// ```
1005#[macro_export]
1006macro_rules! temp {
1007    (($Var:ident), $body:expr) => {{
1008        let $Var = __local_find_context!().next_variable();
1009        $body
1010    }};
1011    (($Var:ident,), $body:expr) => {
1012        $crate::temp!(($Var), $body)
1013    };
1014    (($Var:ident, $($rest:ident),+ $(,)?), $body:expr) => {{
1015        $crate::temp!(
1016            ($Var),
1017            $crate::temp!(($($rest),+), $body)
1018        )
1019    }};
1020}
1021/// Re-export of the [`temp!`] macro.
1022pub use temp;
1023
1024#[cfg(test)]
1025mod tests {
1026    use inlineencodings::ShortString;
1027
1028    use crate::ignore;
1029    use crate::prelude::inlineencodings::*;
1030    use crate::prelude::*;
1031
1032    use crate::examples::literature;
1033
1034    use fake::faker::lorem::en::Sentence;
1035    use fake::faker::lorem::en::Words;
1036    use fake::faker::name::raw::*;
1037    use fake::locales::*;
1038    use fake::Fake;
1039
1040    use std::collections::HashSet;
1041
1042    use super::*;
1043
1044    pub mod knights {
1045        use crate::prelude::*;
1046
1047        attributes! {
1048            "8143F46E812E88C4544E7094080EC523" as loves: inlineencodings::GenId;
1049            "D6E0F2A6E5214E1330565B4D4138E55C" as name: inlineencodings::ShortString;
1050        }
1051    }
1052
1053    mod social {
1054        use crate::prelude::*;
1055
1056        attributes! {
1057            "A19EC1D9DD534BA9896223A457A6B9C9" as name: inlineencodings::ShortString;
1058            "C21DE0AA5BA3446AB886C9640BA60244" as friend: inlineencodings::GenId;
1059        }
1060    }
1061
1062    #[test]
1063    fn and_set() {
1064        let mut books = HashSet::<String>::new();
1065        let mut movies = HashSet::<Inline<ShortString>>::new();
1066
1067        books.insert("LOTR".to_string());
1068        books.insert("Dragonrider".to_string());
1069        books.insert("Highlander".to_string());
1070
1071        movies.insert("LOTR".to_inline());
1072        movies.insert("Highlander".to_inline());
1073
1074        let inter: Vec<_> =
1075            find!((a: Inline<ShortString>), and!(books.has(a), movies.has(a))).collect();
1076
1077        assert_eq!(inter.len(), 2);
1078
1079        let cross: Vec<_> =
1080            find!((a: Inline<ShortString>, b: Inline<ShortString>), and!(books.has(a), movies.has(b))).collect();
1081
1082        assert_eq!(cross.len(), 6);
1083
1084        let one: Vec<_> = find!((a: Inline<ShortString>),
1085            and!(books.has(a), a.is(ShortString::inline_from("LOTR")))
1086        )
1087        .collect();
1088
1089        assert_eq!(one.len(), 1);
1090    }
1091
1092    #[test]
1093    fn pattern() {
1094        let mut kb = TribleSet::new();
1095        (0..1000).for_each(|_| {
1096            let author = fucid();
1097            let book = fucid();
1098            kb += entity! { &author @
1099               literature::firstname: FirstName(EN).fake::<String>(),
1100               literature::lastname: LastName(EN).fake::<String>(),
1101            };
1102            kb += entity! { &book @
1103               literature::author: &author,
1104               literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
1105               literature::quote: Sentence(5..25).fake::<String>().to_blob().get_handle()
1106            };
1107        });
1108
1109        let author = fucid();
1110        let book = fucid();
1111        kb += entity! { &author @
1112           literature::firstname: "Frank",
1113           literature::lastname: "Herbert",
1114        };
1115        kb += entity! { &book @
1116           literature::author: &author,
1117           literature::title: "Dune",
1118           literature::quote: "I must not fear. Fear is the \
1119                   mind-killer. Fear is the little-death that brings total \
1120                   obliteration. I will face my fear. I will permit it to \
1121                   pass over me and through me. And when it has gone past I \
1122                   will turn the inner eye to see its path. Where the fear \
1123                   has gone there will be nothing. Only I will remain.".to_blob().get_handle()
1124        };
1125
1126        (0..100).for_each(|_| {
1127            let author = fucid();
1128            let book = fucid();
1129            kb += entity! { &author @
1130               literature::firstname: "Fake",
1131               literature::lastname: "Herbert",
1132            };
1133            kb += entity! { &book @
1134               literature::author: &author,
1135               literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
1136               literature::quote: Sentence(5..25).fake::<String>().to_blob().get_handle()
1137            };
1138        });
1139
1140        let r: Vec<_> = find!(
1141        (author: Inline<_>, book: Inline<_>, title: Inline<_>, quote: Inline<_>),
1142        pattern!(&kb, [
1143        {?author @
1144            literature::firstname: "Frank",
1145            literature::lastname: "Herbert"},
1146        {?book @
1147          literature::author: ?author,
1148          literature::title: ?title,
1149          literature::quote: ?quote
1150        }]))
1151        .collect();
1152
1153        assert_eq!(1, r.len())
1154    }
1155
1156    #[test]
1157    fn constant() {
1158        let r: Vec<_> = find! {
1159            (string: Inline<_>, number: Inline<_>),
1160            and!(
1161                string.is(ShortString::inline_from("Hello World!")),
1162                number.is(I256BE::inline_from(42))
1163            )
1164        }
1165        .collect();
1166
1167        assert_eq!(1, r.len())
1168    }
1169
1170    #[test]
1171    fn exists_true() {
1172        assert!(exists!((a: Inline<_>), a.is(I256BE::inline_from(42))));
1173    }
1174
1175    #[test]
1176    fn exists_false() {
1177        assert!(!exists!(
1178            (a: Inline<_>),
1179            and!(a.is(I256BE::inline_from(1)), a.is(I256BE::inline_from(2)))
1180        ));
1181    }
1182
1183    #[test]
1184    fn exists_no_variables_true() {
1185        let mut ctx = VariableContext::new();
1186        let a = ctx.next_variable::<I256BE>();
1187        assert!(exists!(a.is(I256BE::inline_from(42))));
1188    }
1189
1190    #[test]
1191    fn find_no_variables_yields_unit() {
1192        let mut ctx = VariableContext::new();
1193        let a = ctx.next_variable::<I256BE>();
1194        let rows: Vec<()> = find!((), a.is(I256BE::inline_from(42))).collect();
1195        assert_eq!(rows, vec![()]);
1196    }
1197
1198    #[test]
1199    fn temp_variables_span_patterns() {
1200        use social::*;
1201
1202        let mut kb = TribleSet::new();
1203        let alice = fucid();
1204        let bob = fucid();
1205
1206        kb += entity! { &alice @ name: "Alice", friend: &bob };
1207        kb += entity! { &bob @ name: "Bob" };
1208
1209        let matches: Vec<_> = find!(
1210            (person_name: Inline<_>),
1211            temp!((mutual_friend),
1212                and!(
1213                    pattern!(&kb, [{ _?person @ name: ?person_name, friend: ?mutual_friend }]),
1214                    pattern!(&kb, [{ ?mutual_friend @ name: "Bob" }])
1215                )
1216            )
1217        )
1218        .collect();
1219
1220        assert_eq!(matches.len(), 1);
1221        assert_eq!(matches[0].0.try_from_inline::<&str>().unwrap(), "Alice");
1222    }
1223
1224    #[test]
1225    fn ignore_skips_variables() {
1226        let results: Vec<_> = find!(
1227            (x: Inline<_>),
1228            ignore!((y), and!(x.is(I256BE::inline_from(1)), y.is(I256BE::inline_from(2))))
1229        )
1230        .collect();
1231
1232        assert_eq!(results.len(), 1);
1233        assert_eq!(results[0].0, I256BE::inline_from(1));
1234    }
1235
1236    #[test]
1237    fn estimate_override_debug_order() {
1238        use std::cell::RefCell;
1239        use std::rc::Rc;
1240
1241        let mut ctx = VariableContext::new();
1242        let a = ctx.next_variable::<ShortString>();
1243        let b = ctx.next_variable::<ShortString>();
1244
1245        let base = and!(
1246            a.is(ShortString::inline_from("A")),
1247            b.is(ShortString::inline_from("B"))
1248        );
1249
1250        let mut wrapper = crate::debug::query::EstimateOverrideConstraint::new(base);
1251        wrapper.set_estimate(a.index, 10);
1252        wrapper.set_estimate(b.index, 1);
1253
1254        let record = Rc::new(RefCell::new(Vec::new()));
1255        let debug = crate::debug::query::DebugConstraint::new(wrapper, Rc::clone(&record));
1256
1257        let q: Query<_, _, _> = Query::new(debug, |_| Some(()));
1258        let r: Vec<_> = q.collect();
1259        assert_eq!(1, r.len());
1260        assert_eq!(&*record.borrow(), &[b.index, a.index]);
1261    }
1262}