xlog_logic/hypergraph/eligibility.rs
1//! Eligibility analysis: decide whether a [`HypergraphRule`] can be
2//! planned as a multiway join, or must fall back to the existing
3//! binary-join lowering.
4//!
5//! "Eligible" here means **could be planned as multiway** — it does
6//! NOT mean "must use multiway". A rule with exactly two positive
7//! atoms is eligible because the planner could legally choose either
8//! a multiway plan or a binary plan; the choice is the planner's, not
9//! the analyzer's. The CPU reference evaluator (PR 2) and later GPU
10//! kernels (PR 3) consume both shapes.
11//!
12//! "Ineligible" means at least one [`Boundary`] makes multiway
13//! planning either impossible (negation, aggregation in head) or
14//! unsupported by the executor context under consideration. Each
15//! boundary is reported separately so the explain output and tests
16//! can lock in *why* a rule fell back.
17
18use super::ir::{HypergraphRule, VertexId};
19use std::collections::BTreeMap;
20use xlog_core::ScalarType;
21
22/// The maximum number of distinct join-key variables supported by
23/// the existing binary-fallback executor. Borrowed verbatim from the
24/// `pack_keys_gpu_on_stream` constraint in
25/// `xlog-cuda/src/provider/relational.rs`.
26///
27/// This is a **binary-fallback** constraint, not a hypergraph
28/// property. WCOJ eligibility uses [`ExecutorContext::WcojEligible`]
29/// and a separate context limit; hash fallback continues to use
30/// this value verbatim.
31pub const BINARY_FALLBACK_KEY_LIMIT: usize = 4;
32
33/// The widest K-clique shape the WCOJ planner architecture admits at
34/// the eligibility layer. K=7 and K=8 are accepted here so the
35/// Phase-2 templates can inherit the same planner contract.
36pub const WCOJ_ELIGIBLE_KEY_LIMIT: usize = 8;
37
38/// Executor capability context for join-key width checks.
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum ExecutorContext {
41 /// Existing hash/binary fallback executor. The 4-key
42 /// `pack_keys_gpu_on_stream` limit remains binding.
43 HashFallback,
44 /// WCOJ-capable planner path. K5 through K8 are admissible;
45 /// K9+ remains outside the current executor contract.
46 WcojEligible,
47}
48
49impl ExecutorContext {
50 fn join_key_limit(self) -> usize {
51 match self {
52 Self::HashFallback => BINARY_FALLBACK_KEY_LIMIT,
53 Self::WcojEligible => WCOJ_ELIGIBLE_KEY_LIMIT,
54 }
55 }
56}
57
58/// Verdict for a single rule.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub enum Eligibility {
61 /// The rule could be planned as a multiway join. Note: this is
62 /// *eligibility*, not *requirement* — the eventual planner is
63 /// free to lower an Eligible rule as a binary join if its cost
64 /// model says so. Two-atom rules are Eligible: both multiway
65 /// and binary are valid lowerings.
66 Eligible,
67 /// The rule must fall back to binary-join lowering. Each
68 /// [`Boundary`] in the vector is one independent reason; the
69 /// list is non-empty.
70 Ineligible(Vec<Boundary>),
71}
72
73impl Eligibility {
74 /// Iterate over boundaries (empty for [`Eligibility::Eligible`]).
75 pub fn boundaries(&self) -> &[Boundary] {
76 match self {
77 Eligibility::Eligible => &[],
78 Eligibility::Ineligible(bs) => bs,
79 }
80 }
81}
82
83/// One reason a rule is ineligible for multiway planning.
84///
85/// Boundaries are reported independently — a rule that is *both*
86/// negated *and* over the key limit produces two boundaries, not one
87/// "first-failed" boundary. This makes the eligibility report stable
88/// against ordering changes in future analyzer passes.
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub enum Boundary {
91 /// The rule is a ground fact (no body to plan).
92 GroundFact,
93 /// The rule head contains an aggregation expression. Multiway
94 /// joins underneath an aggregation re-introduce the
95 /// aggregation-boundary problem the binary lowering already
96 /// handles via `GroupBy + Project`; we leave that machinery
97 /// intact rather than re-derive it inside multiway.
98 HeadAggregation,
99 /// The body contains a [`crate::ast::BodyLiteral::Negated`]
100 /// literal. Negated literals lower to set difference (`Diff`)
101 /// in the binary path — multiway has no equivalent at this
102 /// stage of the planner foundation.
103 BodyNegation,
104 /// The body contains a [`crate::ast::BodyLiteral::IsExpr`]
105 /// computed binding. Binding-via-expression introduces a
106 /// dependency between body literals that the join graph alone
107 /// does not capture.
108 BodyIsExpr,
109 /// The body has fewer than two positive atoms. Multiway needs
110 /// at least two atoms to be meaningful; one-atom or zero-atom
111 /// bodies are not multiway candidates regardless of executor.
112 InsufficientPositiveAtoms {
113 /// Observed number of positive body atoms.
114 positive_count: usize,
115 },
116 /// The total number of distinct join-key variables exceeds the
117 /// binary-fallback executor's `pack_keys` limit. Carried as a
118 /// runtime value so the explain output can name the count.
119 /// See [`BINARY_FALLBACK_KEY_LIMIT`].
120 JoinKeysExceedBinaryFallbackLimit {
121 /// Executor context whose key-width cap was exceeded.
122 context: ExecutorContext,
123 /// Observed distinct join-key count.
124 count: usize,
125 /// Hard limit for the selected executor context.
126 limit: usize,
127 },
128 /// A join-key variable has a [`ScalarType`] not supported by
129 /// the executor under consideration.
130 ///
131 /// Emitted by [`analyze_typed`] when a join-key vertex has a
132 /// known type (derived from a body atom's relation schema —
133 /// see [`crate::hypergraph::typed::evaluate_rule_typed`] and
134 /// the typed fixpoint variants) that is outside
135 /// [`WCOJ_SUPPORTED_KEY_TYPES`]. Structural [`analyze`] never
136 /// emits this variant.
137 ///
138 /// **Locked policy (PR 5):** unknown ≠ unsupported. A vertex
139 /// whose type cannot be derived from the supplied
140 /// [`crate::hypergraph::RefRelationStore`] is **not** flagged
141 /// here. Transitive type propagation across recursive SCC
142 /// predicates is a follow-up slice.
143 UnsupportedKeyType {
144 /// Source name of the variable whose type is unsupported.
145 var: String,
146 /// Inferred scalar type that the executor cannot handle.
147 ty: ScalarType,
148 },
149}
150
151/// Analyze a [`HypergraphRule`] and return the eligibility verdict.
152///
153/// All boundaries are checked; the returned vector is in a stable
154/// order matching the [`Boundary`] enum's declaration order (modulo
155/// the one-or-zero-positive-atoms check, which is reported with the
156/// observed `positive_count`). Order stability matters for the
157/// explain-output snapshot tests.
158pub fn analyze(hg: &HypergraphRule, context: ExecutorContext) -> Eligibility {
159 let mut boundaries = Vec::new();
160
161 if hg.is_fact {
162 boundaries.push(Boundary::GroundFact);
163 }
164 if hg.head_has_aggregation {
165 boundaries.push(Boundary::HeadAggregation);
166 }
167 if hg.has_negation {
168 boundaries.push(Boundary::BodyNegation);
169 }
170 if hg.has_is_expr {
171 boundaries.push(Boundary::BodyIsExpr);
172 }
173 let positive_count = hg.hyperedge_count();
174 if !hg.is_fact && positive_count < 2 {
175 boundaries.push(Boundary::InsufficientPositiveAtoms { positive_count });
176 }
177
178 // Count distinct join-key variables. A "join-key variable" is
179 // any vertex shared by two or more hyperedges. Variables that
180 // appear in only one atom contribute to projection / output
181 // schema but are not join keys.
182 let join_key_count = count_join_keys(hg);
183 let join_key_limit = context.join_key_limit();
184 if join_key_count > join_key_limit {
185 boundaries.push(Boundary::JoinKeysExceedBinaryFallbackLimit {
186 context,
187 count: join_key_count,
188 limit: join_key_limit,
189 });
190 }
191
192 if boundaries.is_empty() {
193 Eligibility::Eligible
194 } else {
195 Eligibility::Ineligible(boundaries)
196 }
197}
198
199/// Return true when [`analyze`] says the rule is eligible in the
200/// selected executor context.
201pub fn is_eligible(hg: &HypergraphRule, context: ExecutorContext) -> bool {
202 matches!(analyze(hg, context), Eligibility::Eligible)
203}
204
205/// Count vertices that appear in two or more hyperedges. These are
206/// the variables the planner must equi-join across atoms; vertices
207/// in only one hyperedge do not constrain the cross-atom plan and
208/// are not join keys.
209fn count_join_keys(hg: &HypergraphRule) -> usize {
210 let mut occurrences: Vec<usize> = vec![0; hg.vertex_count()];
211 for edge in &hg.hyperedges {
212 // Count each vertex AT MOST ONCE per hyperedge — a self-join
213 // within a single atom (e.g. `p(X, X)`) is not a multi-atom
214 // join key.
215 for vid in edge.vertices() {
216 let VertexId(idx) = vid;
217 occurrences[idx] += 1;
218 }
219 }
220 occurrences.iter().filter(|c| **c >= 2).count()
221}
222
223/// Scalar types that the WCOJ reference evaluator supports as
224/// join-key types. Membership is checked by [`analyze_typed`] when
225/// emitting [`Boundary::UnsupportedKeyType`].
226///
227/// The set is intentionally narrow and not configurable in PR 2:
228/// only `U32`, `U64`, and `Symbol` are supported. Future PRs may
229/// widen the set as the reference evaluator and (later) GPU
230/// kernels grow type coverage; widening is a deliberate
231/// configuration change, not a parameter to this function.
232pub const WCOJ_SUPPORTED_KEY_TYPES: &[ScalarType] =
233 &[ScalarType::U32, ScalarType::U64, ScalarType::Symbol];
234
235/// Typed eligibility analysis.
236///
237/// Same as [`analyze`], but additionally consults `vertex_types` —
238/// a map from variable name to inferred [`ScalarType`] — to emit
239/// [`Boundary::UnsupportedKeyType`] for join-key vertices whose
240/// type is outside [`WCOJ_SUPPORTED_KEY_TYPES`].
241///
242/// "Join-key vertex" matches the same definition used by [`analyze`]:
243/// a vertex that appears in two or more hyperedges. Projection-only
244/// vertices (those appearing in exactly one hyperedge) are NOT
245/// checked — their types do not affect WCOJ planning.
246///
247/// **Locked policy (PR 5): unknown ≠ unsupported.** Vertices
248/// missing from `vertex_types` are NOT flagged. The
249/// [`crate::hypergraph::typed`] gate populates this map via
250/// schema-driven derivation from a
251/// [`crate::hypergraph::RefRelationStore`]; vertices anchored only
252/// through predicates absent from that store (e.g. an SCC
253/// predicate referenced recursively before its first iteration)
254/// stay absent and pass through. Transitive type propagation
255/// across recursive predicates is a follow-up slice.
256pub fn analyze_typed(
257 hg: &HypergraphRule,
258 vertex_types: &BTreeMap<String, ScalarType>,
259 context: ExecutorContext,
260) -> Eligibility {
261 // Start from the structural verdict so structural boundaries
262 // (negation, aggregation, etc.) carry through.
263 let base = analyze(hg, context);
264 let mut boundaries: Vec<Boundary> = base.boundaries().to_vec();
265
266 let join_key_ids = join_key_vertex_ids(hg);
267 for vid in join_key_ids {
268 let name = &hg.vertex(vid).name;
269 if let Some(&ty) = vertex_types.get(name) {
270 if !WCOJ_SUPPORTED_KEY_TYPES.contains(&ty) {
271 boundaries.push(Boundary::UnsupportedKeyType {
272 var: name.clone(),
273 ty,
274 });
275 }
276 }
277 }
278
279 if boundaries.is_empty() {
280 Eligibility::Eligible
281 } else {
282 Eligibility::Ineligible(boundaries)
283 }
284}
285
286/// Return the [`VertexId`]s of vertices that appear in two or more
287/// hyperedges (the "join key" set), in vertex-id order.
288fn join_key_vertex_ids(hg: &HypergraphRule) -> Vec<VertexId> {
289 let mut occurrences: Vec<usize> = vec![0; hg.vertex_count()];
290 for edge in &hg.hyperedges {
291 for vid in edge.vertices() {
292 let VertexId(idx) = vid;
293 occurrences[idx] += 1;
294 }
295 }
296 occurrences
297 .iter()
298 .enumerate()
299 .filter(|(_, c)| **c >= 2)
300 .map(|(i, _)| VertexId(i))
301 .collect()
302}