Skip to main content

sochdb_query/
filter_ir.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Canonical Filter IR + Planner Pushdown Contract (Task 1)
19//!
20//! This module defines a **single source of truth** for filtering behavior across
21//! all retrieval paths (vector/BM25/hybrid/context). By normalizing filters to a
22//! canonical IR and enforcing pushdown contracts, we:
23//!
24//! 1. **Prevent post-filtering by construction** - filters are applied during
25//!    candidate generation, not after
26//! 2. **Unify semantics** - "namespace = X" means the same thing everywhere
27//! 3. **Enable systematic optimization** - CNF form allows index path selection
28//!
29//! ## Filter IR Design
30//!
31//! Filters are normalized to **Conjunctive Normal Form (CNF)**: a conjunction
32//! of disjunctions of typed atoms.
33//!
34//! ```text
35//! EffectiveFilter = AuthScope ∧ UserFilter
36//!                 = (A₁ ∨ A₂) ∧ (B₁) ∧ (C₁ ∨ C₂ ∨ C₃)
37//! ```
38//!
39//! Where each atom is a typed predicate:
40//! - `Eq(field, value)` - equality
41//! - `In(field, values)` - membership in set
42//! - `Range(field, min, max)` - inclusive range
43//! - `HasTag(tag)` - ACL tag presence (future)
44//!
45//! ## Pushdown Contract
46//!
47//! Every executor MUST implement:
48//! ```text
49//! execute(query_op, filter_ir, auth_scope) -> results
50//! ```
51//!
52//! The executor guarantees:
53//! 1. All returned results satisfy `filter_ir ∧ auth_scope`
54//! 2. No result outside the allowed set is ever generated
55//! 3. Filter application happens BEFORE scoring (no post-filter)
56//!
57//! ## Auth Scope
58//!
59//! `AuthScope` is **non-optional** and always conjoined with user filters:
60//! ```text
61//! EffectiveFilter = AuthScope ∧ UserFilter
62//! ```
63//!
64//! This is a monotone strengthening (can only remove results, never add),
65//! ensuring security invariants hold.
66
67use std::collections::HashSet;
68use std::fmt;
69
70use serde::{Deserialize, Serialize};
71
72// ============================================================================
73// Filter Atoms - Typed Predicates
74// ============================================================================
75
76/// A typed scalar value for filter comparison
77#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
78pub enum FilterValue {
79    /// String value
80    String(String),
81    /// 64-bit signed integer
82    Int64(i64),
83    /// 64-bit unsigned integer (for doc_id, timestamps)
84    Uint64(u64),
85    /// 64-bit float
86    Float64(f64),
87    /// Boolean
88    Bool(bool),
89    /// Null
90    Null,
91}
92
93impl FilterValue {
94    /// Check if this value matches another for equality
95    pub fn eq_match(&self, other: &FilterValue) -> bool {
96        match (self, other) {
97            (FilterValue::String(a), FilterValue::String(b)) => a == b,
98            (FilterValue::Int64(a), FilterValue::Int64(b)) => a == b,
99            (FilterValue::Uint64(a), FilterValue::Uint64(b)) => a == b,
100            (FilterValue::Float64(a), FilterValue::Float64(b)) => (a - b).abs() < f64::EPSILON,
101            (FilterValue::Bool(a), FilterValue::Bool(b)) => a == b,
102            (FilterValue::Null, FilterValue::Null) => true,
103            _ => false,
104        }
105    }
106
107    /// Compare for ordering (returns None if incompatible types)
108    pub fn partial_cmp(&self, other: &FilterValue) -> Option<std::cmp::Ordering> {
109        match (self, other) {
110            (FilterValue::Int64(a), FilterValue::Int64(b)) => Some(a.cmp(b)),
111            (FilterValue::Uint64(a), FilterValue::Uint64(b)) => Some(a.cmp(b)),
112            (FilterValue::Float64(a), FilterValue::Float64(b)) => a.partial_cmp(b),
113            (FilterValue::String(a), FilterValue::String(b)) => Some(a.cmp(b)),
114            _ => None,
115        }
116    }
117}
118
119impl fmt::Display for FilterValue {
120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121        match self {
122            FilterValue::String(s) => write!(f, "'{}'", s),
123            FilterValue::Int64(i) => write!(f, "{}", i),
124            FilterValue::Uint64(u) => write!(f, "{}u64", u),
125            FilterValue::Float64(v) => write!(f, "{}", v),
126            FilterValue::Bool(b) => write!(f, "{}", b),
127            FilterValue::Null => write!(f, "NULL"),
128        }
129    }
130}
131
132impl From<&str> for FilterValue {
133    fn from(s: &str) -> Self {
134        FilterValue::String(s.to_string())
135    }
136}
137
138impl From<String> for FilterValue {
139    fn from(s: String) -> Self {
140        FilterValue::String(s)
141    }
142}
143
144impl From<i64> for FilterValue {
145    fn from(i: i64) -> Self {
146        FilterValue::Int64(i)
147    }
148}
149
150impl From<u64> for FilterValue {
151    fn from(u: u64) -> Self {
152        FilterValue::Uint64(u)
153    }
154}
155
156impl From<f64> for FilterValue {
157    fn from(f: f64) -> Self {
158        FilterValue::Float64(f)
159    }
160}
161
162impl From<bool> for FilterValue {
163    fn from(b: bool) -> Self {
164        FilterValue::Bool(b)
165    }
166}
167
168/// A single filter atom - the smallest unit of filtering
169#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
170pub enum FilterAtom {
171    /// Equality: field = value
172    Eq { field: String, value: FilterValue },
173
174    /// Not equal: field != value
175    Ne { field: String, value: FilterValue },
176
177    /// Membership: field IN (v1, v2, ...)
178    In {
179        field: String,
180        values: Vec<FilterValue>,
181    },
182
183    /// Not in set: field NOT IN (v1, v2, ...)
184    NotIn {
185        field: String,
186        values: Vec<FilterValue>,
187    },
188
189    /// Range: min <= field <= max (inclusive)
190    /// Either bound can be None for open-ended ranges
191    Range {
192        field: String,
193        min: Option<FilterValue>,
194        max: Option<FilterValue>,
195        min_inclusive: bool,
196        max_inclusive: bool,
197    },
198
199    /// Prefix match: field STARTS WITH prefix
200    Prefix { field: String, prefix: String },
201
202    /// Contains substring: field CONTAINS substring
203    Contains { field: String, substring: String },
204
205    /// ACL tag presence (for row-level security)
206    HasTag { tag: String },
207
208    /// Always true (identity for conjunction)
209    True,
210
211    /// Always false (identity for disjunction)
212    False,
213}
214
215impl FilterAtom {
216    /// Create an equality atom
217    pub fn eq(field: impl Into<String>, value: impl Into<FilterValue>) -> Self {
218        FilterAtom::Eq {
219            field: field.into(),
220            value: value.into(),
221        }
222    }
223
224    /// Create an IN atom
225    pub fn in_set(field: impl Into<String>, values: Vec<FilterValue>) -> Self {
226        FilterAtom::In {
227            field: field.into(),
228            values,
229        }
230    }
231
232    /// Create a range atom
233    pub fn range(
234        field: impl Into<String>,
235        min: Option<FilterValue>,
236        max: Option<FilterValue>,
237    ) -> Self {
238        FilterAtom::Range {
239            field: field.into(),
240            min,
241            max,
242            min_inclusive: true,
243            max_inclusive: true,
244        }
245    }
246
247    /// Create an open range (exclusive bounds)
248    pub fn range_exclusive(
249        field: impl Into<String>,
250        min: Option<FilterValue>,
251        max: Option<FilterValue>,
252    ) -> Self {
253        FilterAtom::Range {
254            field: field.into(),
255            min,
256            max,
257            min_inclusive: false,
258            max_inclusive: false,
259        }
260    }
261
262    /// Get the field name this atom filters on (if any)
263    pub fn field(&self) -> Option<&str> {
264        match self {
265            FilterAtom::Eq { field, .. } => Some(field),
266            FilterAtom::Ne { field, .. } => Some(field),
267            FilterAtom::In { field, .. } => Some(field),
268            FilterAtom::NotIn { field, .. } => Some(field),
269            FilterAtom::Range { field, .. } => Some(field),
270            FilterAtom::Prefix { field, .. } => Some(field),
271            FilterAtom::Contains { field, .. } => Some(field),
272            FilterAtom::HasTag { .. } => None,
273            FilterAtom::True | FilterAtom::False => None,
274        }
275    }
276
277    /// Check if this atom is always true
278    pub fn is_trivially_true(&self) -> bool {
279        matches!(self, FilterAtom::True)
280    }
281
282    /// Check if this atom is always false
283    pub fn is_trivially_false(&self) -> bool {
284        matches!(self, FilterAtom::False)
285    }
286
287    /// Negate this atom
288    pub fn negate(&self) -> FilterAtom {
289        match self {
290            FilterAtom::Eq { field, value } => FilterAtom::Ne {
291                field: field.clone(),
292                value: value.clone(),
293            },
294            FilterAtom::Ne { field, value } => FilterAtom::Eq {
295                field: field.clone(),
296                value: value.clone(),
297            },
298            FilterAtom::In { field, values } => FilterAtom::NotIn {
299                field: field.clone(),
300                values: values.clone(),
301            },
302            FilterAtom::NotIn { field, values } => FilterAtom::In {
303                field: field.clone(),
304                values: values.clone(),
305            },
306            FilterAtom::True => FilterAtom::False,
307            FilterAtom::False => FilterAtom::True,
308            // For complex atoms, wrap in negation via De Morgan's
309            other => other.clone(), // Simplified - full implementation would use Not wrapper
310        }
311    }
312}
313
314impl fmt::Display for FilterAtom {
315    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316        match self {
317            FilterAtom::Eq { field, value } => write!(f, "{} = {}", field, value),
318            FilterAtom::Ne { field, value } => write!(f, "{} != {}", field, value),
319            FilterAtom::In { field, values } => {
320                let vals: Vec<_> = values.iter().map(|v| v.to_string()).collect();
321                write!(f, "{} IN ({})", field, vals.join(", "))
322            }
323            FilterAtom::NotIn { field, values } => {
324                let vals: Vec<_> = values.iter().map(|v| v.to_string()).collect();
325                write!(f, "{} NOT IN ({})", field, vals.join(", "))
326            }
327            FilterAtom::Range {
328                field,
329                min,
330                max,
331                min_inclusive,
332                max_inclusive,
333            } => {
334                let left = if *min_inclusive { "[" } else { "(" };
335                let right = if *max_inclusive { "]" } else { ")" };
336                let min_str = min
337                    .as_ref()
338                    .map(|v| v.to_string())
339                    .unwrap_or_else(|| "-∞".to_string());
340                let max_str = max
341                    .as_ref()
342                    .map(|v| v.to_string())
343                    .unwrap_or_else(|| "∞".to_string());
344                write!(f, "{} ∈ {}{}, {}{}", field, left, min_str, max_str, right)
345            }
346            FilterAtom::Prefix { field, prefix } => write!(f, "{} STARTS WITH '{}'", field, prefix),
347            FilterAtom::Contains { field, substring } => {
348                write!(f, "{} CONTAINS '{}'", field, substring)
349            }
350            FilterAtom::HasTag { tag } => write!(f, "HAS_TAG('{}')", tag),
351            FilterAtom::True => write!(f, "TRUE"),
352            FilterAtom::False => write!(f, "FALSE"),
353        }
354    }
355}
356
357// ============================================================================
358// Filter IR - Normalized Boolean Expression
359// ============================================================================
360
361/// A disjunction (OR) of atoms
362#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
363pub struct Disjunction {
364    pub atoms: Vec<FilterAtom>,
365}
366
367impl Disjunction {
368    /// Create a disjunction from atoms
369    pub fn new(atoms: Vec<FilterAtom>) -> Self {
370        Self { atoms }
371    }
372
373    /// Create a single-atom disjunction
374    pub fn single(atom: FilterAtom) -> Self {
375        Self { atoms: vec![atom] }
376    }
377
378    /// Check if this disjunction is trivially true (contains TRUE or is empty after simplification)
379    pub fn is_trivially_true(&self) -> bool {
380        self.atoms.iter().any(|a| a.is_trivially_true())
381    }
382
383    /// Check if this disjunction is trivially false (empty or all atoms are FALSE)
384    pub fn is_trivially_false(&self) -> bool {
385        self.atoms.is_empty() || self.atoms.iter().all(|a| a.is_trivially_false())
386    }
387
388    /// Simplify this disjunction
389    pub fn simplify(self) -> Self {
390        // Remove FALSE atoms
391        let atoms: Vec<_> = self
392            .atoms
393            .into_iter()
394            .filter(|a| !a.is_trivially_false())
395            .collect();
396
397        // If any atom is TRUE, the whole disjunction is TRUE
398        if atoms.iter().any(|a| a.is_trivially_true()) {
399            return Self {
400                atoms: vec![FilterAtom::True],
401            };
402        }
403
404        // If empty, it's FALSE
405        if atoms.is_empty() {
406            return Self {
407                atoms: vec![FilterAtom::False],
408            };
409        }
410
411        Self { atoms }
412    }
413}
414
415impl fmt::Display for Disjunction {
416    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
417        if self.atoms.len() == 1 {
418            write!(f, "{}", self.atoms[0])
419        } else {
420            let parts: Vec<_> = self.atoms.iter().map(|a| a.to_string()).collect();
421            write!(f, "({})", parts.join(" OR "))
422        }
423    }
424}
425
426/// Canonical Filter IR in Conjunctive Normal Form (CNF)
427///
428/// CNF = (A₁ ∨ A₂) ∧ (B₁) ∧ (C₁ ∨ C₂ ∨ C₃)
429///
430/// This representation enables:
431/// 1. Systematic index path selection (each clause maps to an index)
432/// 2. Easy conjunction with auth scope (just append clauses)
433/// 3. Efficient serialization and transmission
434#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
435pub struct FilterIR {
436    /// Conjunction of disjunctions (CNF form)
437    pub clauses: Vec<Disjunction>,
438}
439
440impl FilterIR {
441    /// Create an empty filter (matches everything)
442    pub fn all() -> Self {
443        Self { clauses: vec![] }
444    }
445
446    /// Create a filter that matches nothing
447    pub fn none() -> Self {
448        Self {
449            clauses: vec![Disjunction::single(FilterAtom::False)],
450        }
451    }
452
453    /// Create a filter from a single atom
454    pub fn from_atom(atom: FilterAtom) -> Self {
455        Self {
456            clauses: vec![Disjunction::single(atom)],
457        }
458    }
459
460    /// Create a filter from a single disjunction
461    pub fn from_disjunction(disj: Disjunction) -> Self {
462        Self {
463            clauses: vec![disj],
464        }
465    }
466
467    /// Conjoin (AND) with another filter
468    ///
469    /// This is the key operation for auth scope injection:
470    /// `EffectiveFilter = AuthScope ∧ UserFilter`
471    pub fn and(mut self, other: FilterIR) -> Self {
472        self.clauses.extend(other.clauses);
473        self
474    }
475
476    /// Conjoin with a single atom
477    pub fn and_atom(mut self, atom: FilterAtom) -> Self {
478        self.clauses.push(Disjunction::single(atom));
479        self
480    }
481
482    /// Disjoin (OR) with another filter
483    ///
484    /// Note: This may expand the CNF representation
485    pub fn or(self, other: FilterIR) -> Self {
486        if self.clauses.is_empty() {
487            return other;
488        }
489        if other.clauses.is_empty() {
490            return self;
491        }
492
493        // Distribute: (A ∧ B) ∨ (C ∧ D) = (A ∨ C) ∧ (A ∨ D) ∧ (B ∨ C) ∧ (B ∨ D)
494        // This can cause exponential blowup - in practice, limit depth
495        let mut new_clauses = Vec::new();
496        for c1 in &self.clauses {
497            for c2 in &other.clauses {
498                let mut combined = c1.atoms.clone();
499                combined.extend(c2.atoms.clone());
500                new_clauses.push(Disjunction::new(combined));
501            }
502        }
503
504        FilterIR {
505            clauses: new_clauses,
506        }
507    }
508
509    /// Check if this filter matches everything
510    pub fn is_all(&self) -> bool {
511        self.clauses.is_empty() || self.clauses.iter().all(|c| c.is_trivially_true())
512    }
513
514    /// Check if this filter matches nothing
515    pub fn is_none(&self) -> bool {
516        self.clauses.iter().any(|c| c.is_trivially_false())
517    }
518
519    /// Simplify the filter
520    pub fn simplify(self) -> Self {
521        let clauses: Vec<_> = self
522            .clauses
523            .into_iter()
524            .map(|c| c.simplify())
525            .filter(|c| !c.is_trivially_true())
526            .collect();
527
528        // If any clause is FALSE, the whole conjunction is FALSE
529        if clauses.iter().any(|c| c.is_trivially_false()) {
530            return Self::none();
531        }
532
533        Self { clauses }
534    }
535
536    /// Extract atoms for a specific field
537    pub fn atoms_for_field(&self, field: &str) -> Vec<&FilterAtom> {
538        self.clauses
539            .iter()
540            .flat_map(|c| c.atoms.iter())
541            .filter(|a| a.field() == Some(field))
542            .collect()
543    }
544
545    /// Check if this filter constrains a specific field
546    pub fn constrains_field(&self, field: &str) -> bool {
547        !self.atoms_for_field(field).is_empty()
548    }
549
550    /// Get all fields constrained by this filter
551    pub fn constrained_fields(&self) -> HashSet<&str> {
552        self.clauses
553            .iter()
554            .flat_map(|c| c.atoms.iter())
555            .filter_map(|a| a.field())
556            .collect()
557    }
558}
559
560impl Default for FilterIR {
561    fn default() -> Self {
562        Self::all()
563    }
564}
565
566impl fmt::Display for FilterIR {
567    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
568        if self.clauses.is_empty() {
569            return write!(f, "TRUE");
570        }
571        let parts: Vec<_> = self.clauses.iter().map(|c| c.to_string()).collect();
572        write!(f, "{}", parts.join(" AND "))
573    }
574}
575
576// ============================================================================
577// Auth Scope - Non-Optional Security Context
578// ============================================================================
579
580/// Authorization scope - ALWAYS conjoined with user filters
581///
582/// This is the security boundary that cannot be bypassed. It encodes:
583/// - Allowed namespaces/tenants
584/// - Optional project scope
585/// - Token expiry
586/// - Capability flags
587#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
588pub struct AuthScope {
589    /// Allowed namespaces (non-empty; at least one required)
590    pub allowed_namespaces: Vec<String>,
591
592    /// Optional tenant ID (for multi-tenant deployments)
593    pub tenant_id: Option<String>,
594
595    /// Optional project scope
596    pub project_id: Option<String>,
597
598    /// Token expiry timestamp (Unix epoch seconds)
599    pub expires_at: Option<u64>,
600
601    /// Capability flags
602    pub capabilities: AuthCapabilities,
603
604    /// Optional ACL tags the caller has access to
605    pub acl_tags: Vec<String>,
606}
607
608/// Capability flags for authorization
609#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
610pub struct AuthCapabilities {
611    /// Can read vectors
612    pub can_read: bool,
613    /// Can write/insert vectors
614    pub can_write: bool,
615    /// Can delete vectors
616    pub can_delete: bool,
617    /// Can perform admin operations
618    pub can_admin: bool,
619}
620
621impl AuthScope {
622    /// Create a new auth scope for a single namespace
623    pub fn for_namespace(namespace: impl Into<String>) -> Self {
624        Self {
625            allowed_namespaces: vec![namespace.into()],
626            tenant_id: None,
627            project_id: None,
628            expires_at: None,
629            capabilities: AuthCapabilities {
630                can_read: true,
631                can_write: false,
632                can_delete: false,
633                can_admin: false,
634            },
635            acl_tags: vec![],
636        }
637    }
638
639    /// Create with full access to a namespace
640    pub fn full_access(namespace: impl Into<String>) -> Self {
641        Self {
642            allowed_namespaces: vec![namespace.into()],
643            tenant_id: None,
644            project_id: None,
645            expires_at: None,
646            capabilities: AuthCapabilities {
647                can_read: true,
648                can_write: true,
649                can_delete: true,
650                can_admin: false,
651            },
652            acl_tags: vec![],
653        }
654    }
655
656    /// Add a namespace to the allowed list
657    pub fn with_namespace(mut self, namespace: impl Into<String>) -> Self {
658        self.allowed_namespaces.push(namespace.into());
659        self
660    }
661
662    /// Set tenant ID
663    pub fn with_tenant(mut self, tenant_id: impl Into<String>) -> Self {
664        self.tenant_id = Some(tenant_id.into());
665        self
666    }
667
668    /// Set project ID
669    pub fn with_project(mut self, project_id: impl Into<String>) -> Self {
670        self.project_id = Some(project_id.into());
671        self
672    }
673
674    /// Set expiry
675    pub fn with_expiry(mut self, expires_at: u64) -> Self {
676        self.expires_at = Some(expires_at);
677        self
678    }
679
680    /// Add ACL tags
681    pub fn with_acl_tags(mut self, tags: Vec<String>) -> Self {
682        self.acl_tags = tags;
683        self
684    }
685
686    /// Check if this scope is expired
687    pub fn is_expired(&self) -> bool {
688        if let Some(expires_at) = self.expires_at {
689            let now = std::time::SystemTime::now()
690                .duration_since(std::time::UNIX_EPOCH)
691                .map(|d| d.as_secs())
692                .unwrap_or(0);
693            now > expires_at
694        } else {
695            false
696        }
697    }
698
699    /// Check if a namespace is allowed
700    pub fn is_namespace_allowed(&self, namespace: &str) -> bool {
701        self.allowed_namespaces.iter().any(|ns| ns == namespace)
702    }
703
704    /// Convert auth scope to filter IR clauses
705    ///
706    /// This generates the mandatory predicates that MUST be conjoined
707    /// with any user filter.
708    pub fn to_filter_ir(&self) -> FilterIR {
709        let mut filter = FilterIR::all();
710
711        // Namespace constraint (mandatory)
712        if self.allowed_namespaces.len() == 1 {
713            filter = filter.and_atom(FilterAtom::eq(
714                "namespace",
715                self.allowed_namespaces[0].clone(),
716            ));
717        } else if !self.allowed_namespaces.is_empty() {
718            filter = filter.and_atom(FilterAtom::in_set(
719                "namespace",
720                self.allowed_namespaces
721                    .iter()
722                    .map(|ns| FilterValue::String(ns.clone()))
723                    .collect(),
724            ));
725        }
726
727        // Tenant constraint (if present)
728        if let Some(ref tenant_id) = self.tenant_id {
729            filter = filter.and_atom(FilterAtom::eq("tenant_id", tenant_id.clone()));
730        }
731
732        // Project constraint (if present)
733        if let Some(ref project_id) = self.project_id {
734            filter = filter.and_atom(FilterAtom::eq("project_id", project_id.clone()));
735        }
736
737        // ACL tags (if present, user must have at least one matching tag)
738        // This is handled differently - the executor checks tag intersection
739        // rather than adding to filter IR (since it's "has any of these tags")
740
741        filter
742    }
743}
744
745// ============================================================================
746// Pushdown Contract - Executor Interface
747// ============================================================================
748
749/// The pushdown contract that every executor MUST implement
750///
751/// This trait enforces:
752/// 1. Filter is provided upfront, not as post-processing
753/// 2. Auth scope is non-optional
754/// 3. Results are guaranteed to satisfy the effective filter
755pub trait FilteredExecutor {
756    /// The query operation type (varies by executor)
757    type QueryOp;
758
759    /// The result type
760    type Result;
761
762    /// The error type
763    type Error;
764
765    /// Execute a query with mandatory filtering
766    ///
767    /// # Contract
768    ///
769    /// - `filter_ir`: User-provided filter (may be empty = all)
770    /// - `auth_scope`: Non-optional security context
771    ///
772    /// The executor MUST:
773    /// 1. Compute `effective_filter = auth_scope.to_filter_ir() ∧ filter_ir`
774    /// 2. Apply `effective_filter` BEFORE generating candidates
775    /// 3. Guarantee all results satisfy `effective_filter`
776    ///
777    /// The executor MUST NOT:
778    /// 1. Return any result outside `effective_filter`
779    /// 2. Apply filtering after candidate scoring
780    /// 3. Ignore or bypass `auth_scope`
781    fn execute(
782        &self,
783        query: &Self::QueryOp,
784        filter_ir: &FilterIR,
785        auth_scope: &AuthScope,
786    ) -> Result<Self::Result, Self::Error>;
787
788    /// Compute the effective filter (auth ∧ user)
789    ///
790    /// This is a convenience method that executors can use.
791    fn effective_filter(&self, filter_ir: &FilterIR, auth_scope: &AuthScope) -> FilterIR {
792        auth_scope.to_filter_ir().and(filter_ir.clone())
793    }
794}
795
796// ============================================================================
797// Filter Builder - Ergonomic Construction
798// ============================================================================
799
800/// Builder for constructing filter IR ergonomically
801#[derive(Debug, Clone, Default)]
802pub struct FilterBuilder {
803    clauses: Vec<Disjunction>,
804}
805
806impl FilterBuilder {
807    /// Create a new filter builder
808    pub fn new() -> Self {
809        Self::default()
810    }
811
812    /// Add an equality constraint
813    pub fn eq(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
814        self.clauses
815            .push(Disjunction::single(FilterAtom::eq(field, value)));
816        self
817    }
818
819    /// Add a not-equal constraint
820    pub fn ne(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
821        self.clauses.push(Disjunction::single(FilterAtom::Ne {
822            field: field.to_string(),
823            value: value.into(),
824        }));
825        self
826    }
827
828    /// Add an IN constraint
829    pub fn in_set(mut self, field: &str, values: Vec<FilterValue>) -> Self {
830        self.clauses
831            .push(Disjunction::single(FilterAtom::in_set(field, values)));
832        self
833    }
834
835    /// Add a range constraint
836    pub fn range(
837        mut self,
838        field: &str,
839        min: Option<impl Into<FilterValue>>,
840        max: Option<impl Into<FilterValue>>,
841    ) -> Self {
842        self.clauses.push(Disjunction::single(FilterAtom::range(
843            field,
844            min.map(Into::into),
845            max.map(Into::into),
846        )));
847        self
848    }
849
850    /// Add a greater-than constraint
851    pub fn gt(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
852        self.clauses.push(Disjunction::single(FilterAtom::Range {
853            field: field.to_string(),
854            min: Some(value.into()),
855            max: None,
856            min_inclusive: false,
857            max_inclusive: false,
858        }));
859        self
860    }
861
862    /// Add a greater-than-or-equal constraint
863    pub fn gte(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
864        self.clauses.push(Disjunction::single(FilterAtom::Range {
865            field: field.to_string(),
866            min: Some(value.into()),
867            max: None,
868            min_inclusive: true,
869            max_inclusive: false,
870        }));
871        self
872    }
873
874    /// Add a less-than constraint
875    pub fn lt(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
876        self.clauses.push(Disjunction::single(FilterAtom::Range {
877            field: field.to_string(),
878            min: None,
879            max: Some(value.into()),
880            min_inclusive: false,
881            max_inclusive: false,
882        }));
883        self
884    }
885
886    /// Add a less-than-or-equal constraint
887    pub fn lte(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
888        self.clauses.push(Disjunction::single(FilterAtom::Range {
889            field: field.to_string(),
890            min: None,
891            max: Some(value.into()),
892            min_inclusive: false,
893            max_inclusive: true,
894        }));
895        self
896    }
897
898    /// Add a prefix match constraint
899    pub fn prefix(mut self, field: &str, prefix: &str) -> Self {
900        self.clauses.push(Disjunction::single(FilterAtom::Prefix {
901            field: field.to_string(),
902            prefix: prefix.to_string(),
903        }));
904        self
905    }
906
907    /// Add a contains constraint
908    pub fn contains(mut self, field: &str, substring: &str) -> Self {
909        self.clauses.push(Disjunction::single(FilterAtom::Contains {
910            field: field.to_string(),
911            substring: substring.to_string(),
912        }));
913        self
914    }
915
916    /// Add a namespace constraint (convenience method)
917    pub fn namespace(self, namespace: &str) -> Self {
918        self.eq("namespace", namespace)
919    }
920
921    /// Add a doc_id IN constraint (convenience method)
922    pub fn doc_ids(self, doc_ids: &[u64]) -> Self {
923        self.in_set(
924            "doc_id",
925            doc_ids.iter().map(|&id| FilterValue::Uint64(id)).collect(),
926        )
927    }
928
929    /// Add a time range constraint (convenience method)
930    pub fn time_range(self, field: &str, start: Option<u64>, end: Option<u64>) -> Self {
931        self.range(
932            field,
933            start.map(FilterValue::Uint64),
934            end.map(FilterValue::Uint64),
935        )
936    }
937
938    /// Add a disjunction (OR of multiple atoms)
939    pub fn or_atoms(mut self, atoms: Vec<FilterAtom>) -> Self {
940        self.clauses.push(Disjunction::new(atoms));
941        self
942    }
943
944    /// Build the filter IR
945    pub fn build(self) -> FilterIR {
946        FilterIR {
947            clauses: self.clauses,
948        }
949    }
950}
951
952// ============================================================================
953// Convenience Macros
954// ============================================================================
955
956/// Create a filter IR from a simple DSL
957///
958/// ```ignore
959/// let filter = filter_ir! {
960///     namespace = "my_ns",
961///     project_id = "proj_123",
962///     timestamp in 1000..2000
963/// };
964/// ```
965#[macro_export]
966macro_rules! filter_ir {
967    // Empty filter
968    () => {
969        $crate::filter_ir::FilterIR::all()
970    };
971
972    // Equality
973    ($field:ident = $value:expr $(, $($rest:tt)*)?) => {{
974        let mut builder = $crate::filter_ir::FilterBuilder::new()
975            .eq(stringify!($field), $value);
976        $(
977            builder = filter_ir!(@chain builder, $($rest)*);
978        )?
979        builder.build()
980    }};
981
982    // Chaining helper
983    (@chain $builder:expr, $field:ident = $value:expr $(, $($rest:tt)*)?) => {{
984        let builder = $builder.eq(stringify!($field), $value);
985        $(
986            filter_ir!(@chain builder, $($rest)*)
987        )?
988        builder
989    }};
990}
991
992// ============================================================================
993// Tests
994// ============================================================================
995
996#[cfg(test)]
997mod tests {
998    use super::*;
999
1000    #[test]
1001    fn test_filter_atom_creation() {
1002        let eq = FilterAtom::eq("namespace", "my_ns");
1003        assert_eq!(eq.field(), Some("namespace"));
1004
1005        let range = FilterAtom::range(
1006            "timestamp",
1007            Some(FilterValue::Uint64(1000)),
1008            Some(FilterValue::Uint64(2000)),
1009        );
1010        assert_eq!(range.field(), Some("timestamp"));
1011    }
1012
1013    #[test]
1014    fn test_filter_ir_conjunction() {
1015        let filter1 = FilterIR::from_atom(FilterAtom::eq("namespace", "ns1"));
1016        let filter2 = FilterIR::from_atom(FilterAtom::eq("project_id", "proj1"));
1017
1018        let combined = filter1.and(filter2);
1019        assert_eq!(combined.clauses.len(), 2);
1020    }
1021
1022    #[test]
1023    fn test_auth_scope_to_filter() {
1024        let scope = AuthScope::for_namespace("production").with_tenant("acme_corp");
1025
1026        let filter = scope.to_filter_ir();
1027        assert!(filter.constrains_field("namespace"));
1028        assert!(filter.constrains_field("tenant_id"));
1029        assert!(!filter.constrains_field("project_id"));
1030    }
1031
1032    #[test]
1033    fn test_effective_filter() {
1034        let auth = AuthScope::for_namespace("production");
1035        let user_filter = FilterBuilder::new()
1036            .eq("source", "documents")
1037            .time_range("created_at", Some(1000), Some(2000))
1038            .build();
1039
1040        let effective = auth.to_filter_ir().and(user_filter);
1041
1042        // Should have namespace + source + time range
1043        assert_eq!(effective.clauses.len(), 3);
1044        assert!(effective.constrains_field("namespace"));
1045        assert!(effective.constrains_field("source"));
1046        assert!(effective.constrains_field("created_at"));
1047    }
1048
1049    #[test]
1050    fn test_filter_builder() {
1051        let filter = FilterBuilder::new()
1052            .namespace("my_namespace")
1053            .eq("project_id", "proj_123")
1054            .doc_ids(&[1, 2, 3, 4, 5])
1055            .time_range("timestamp", Some(1000), None)
1056            .build();
1057
1058        assert_eq!(filter.clauses.len(), 4);
1059    }
1060
1061    #[test]
1062    fn test_filter_simplification() {
1063        // TRUE AND X = X
1064        let filter = FilterIR::from_atom(FilterAtom::True)
1065            .and(FilterIR::from_atom(FilterAtom::eq("x", "y")));
1066        let simplified = filter.simplify();
1067        assert_eq!(simplified.clauses.len(), 1);
1068
1069        // FALSE AND X = FALSE
1070        let filter2 = FilterIR::from_atom(FilterAtom::False)
1071            .and(FilterIR::from_atom(FilterAtom::eq("x", "y")));
1072        let simplified2 = filter2.simplify();
1073        assert!(simplified2.is_none());
1074    }
1075
1076    #[test]
1077    fn test_filter_display() {
1078        let filter = FilterBuilder::new()
1079            .eq("namespace", "prod")
1080            .range("timestamp", Some(1000i64), Some(2000i64))
1081            .build();
1082
1083        let display = filter.to_string();
1084        assert!(display.contains("namespace"));
1085        assert!(display.contains("timestamp"));
1086    }
1087}