Skip to main content

sochdb_query/
filter_ir.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Canonical Filter IR + Planner Pushdown Contract (Task 1)
19//!
20//! This module defines a **single source of truth** for filtering behavior across
21//! all retrieval paths (vector/BM25/hybrid/context). By normalizing filters to a
22//! canonical IR and enforcing pushdown contracts, we:
23//!
24//! 1. **Prevent post-filtering by construction** - filters are applied during
25//!    candidate generation, not after
26//! 2. **Unify semantics** - "namespace = X" means the same thing everywhere
27//! 3. **Enable systematic optimization** - CNF form allows index path selection
28//!
29//! ## Filter IR Design
30//!
31//! Filters are normalized to **Conjunctive Normal Form (CNF)**: a conjunction
32//! of disjunctions of typed atoms.
33//!
34//! ```text
35//! EffectiveFilter = AuthScope ∧ UserFilter
36//!                 = (A₁ ∨ A₂) ∧ (B₁) ∧ (C₁ ∨ C₂ ∨ C₃)
37//! ```
38//!
39//! Where each atom is a typed predicate:
40//! - `Eq(field, value)` - equality
41//! - `In(field, values)` - membership in set
42//! - `Range(field, min, max)` - inclusive range
43//! - `HasTag(tag)` - ACL tag presence (future)
44//!
45//! ## Pushdown Contract
46//!
47//! Every executor MUST implement:
48//! ```text
49//! execute(query_op, filter_ir, auth_scope) -> results
50//! ```
51//!
52//! The executor guarantees:
53//! 1. All returned results satisfy `filter_ir ∧ auth_scope`
54//! 2. No result outside the allowed set is ever generated
55//! 3. Filter application happens BEFORE scoring (no post-filter)
56//!
57//! ## Auth Scope
58//!
59//! `AuthScope` is **non-optional** and always conjoined with user filters:
60//! ```text
61//! EffectiveFilter = AuthScope ∧ UserFilter
62//! ```
63//!
64//! This is a monotone strengthening (can only remove results, never add),
65//! ensuring security invariants hold.
66
67use std::collections::HashSet;
68use std::fmt;
69
70use serde::{Deserialize, Serialize};
71
72// ============================================================================
73// Filter Atoms - Typed Predicates
74// ============================================================================
75
76/// A typed scalar value for filter comparison
77#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
78pub enum FilterValue {
79    /// String value
80    String(String),
81    /// 64-bit signed integer
82    Int64(i64),
83    /// 64-bit unsigned integer (for doc_id, timestamps)
84    Uint64(u64),
85    /// 64-bit float
86    Float64(f64),
87    /// Boolean
88    Bool(bool),
89    /// Null
90    Null,
91}
92
93impl FilterValue {
94    /// Check if this value matches another for equality
95    pub fn eq_match(&self, other: &FilterValue) -> bool {
96        match (self, other) {
97            (FilterValue::String(a), FilterValue::String(b)) => a == b,
98            (FilterValue::Int64(a), FilterValue::Int64(b)) => a == b,
99            (FilterValue::Uint64(a), FilterValue::Uint64(b)) => a == b,
100            (FilterValue::Float64(a), FilterValue::Float64(b)) => {
101                (a - b).abs() < f64::EPSILON
102            }
103            (FilterValue::Bool(a), FilterValue::Bool(b)) => a == b,
104            (FilterValue::Null, FilterValue::Null) => true,
105            _ => false,
106        }
107    }
108
109    /// Compare for ordering (returns None if incompatible types)
110    pub fn partial_cmp(&self, other: &FilterValue) -> Option<std::cmp::Ordering> {
111        match (self, other) {
112            (FilterValue::Int64(a), FilterValue::Int64(b)) => Some(a.cmp(b)),
113            (FilterValue::Uint64(a), FilterValue::Uint64(b)) => Some(a.cmp(b)),
114            (FilterValue::Float64(a), FilterValue::Float64(b)) => a.partial_cmp(b),
115            (FilterValue::String(a), FilterValue::String(b)) => Some(a.cmp(b)),
116            _ => None,
117        }
118    }
119}
120
121impl fmt::Display for FilterValue {
122    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123        match self {
124            FilterValue::String(s) => write!(f, "'{}'", s),
125            FilterValue::Int64(i) => write!(f, "{}", i),
126            FilterValue::Uint64(u) => write!(f, "{}u64", u),
127            FilterValue::Float64(v) => write!(f, "{}", v),
128            FilterValue::Bool(b) => write!(f, "{}", b),
129            FilterValue::Null => write!(f, "NULL"),
130        }
131    }
132}
133
134impl From<&str> for FilterValue {
135    fn from(s: &str) -> Self {
136        FilterValue::String(s.to_string())
137    }
138}
139
140impl From<String> for FilterValue {
141    fn from(s: String) -> Self {
142        FilterValue::String(s)
143    }
144}
145
146impl From<i64> for FilterValue {
147    fn from(i: i64) -> Self {
148        FilterValue::Int64(i)
149    }
150}
151
152impl From<u64> for FilterValue {
153    fn from(u: u64) -> Self {
154        FilterValue::Uint64(u)
155    }
156}
157
158impl From<f64> for FilterValue {
159    fn from(f: f64) -> Self {
160        FilterValue::Float64(f)
161    }
162}
163
164impl From<bool> for FilterValue {
165    fn from(b: bool) -> Self {
166        FilterValue::Bool(b)
167    }
168}
169
170/// A single filter atom - the smallest unit of filtering
171#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
172pub enum FilterAtom {
173    /// Equality: field = value
174    Eq {
175        field: String,
176        value: FilterValue,
177    },
178    
179    /// Not equal: field != value
180    Ne {
181        field: String,
182        value: FilterValue,
183    },
184    
185    /// Membership: field IN (v1, v2, ...)
186    In {
187        field: String,
188        values: Vec<FilterValue>,
189    },
190    
191    /// Not in set: field NOT IN (v1, v2, ...)
192    NotIn {
193        field: String,
194        values: Vec<FilterValue>,
195    },
196    
197    /// Range: min <= field <= max (inclusive)
198    /// Either bound can be None for open-ended ranges
199    Range {
200        field: String,
201        min: Option<FilterValue>,
202        max: Option<FilterValue>,
203        min_inclusive: bool,
204        max_inclusive: bool,
205    },
206    
207    /// Prefix match: field STARTS WITH prefix
208    Prefix {
209        field: String,
210        prefix: String,
211    },
212    
213    /// Contains substring: field CONTAINS substring
214    Contains {
215        field: String,
216        substring: String,
217    },
218    
219    /// ACL tag presence (for row-level security)
220    HasTag {
221        tag: String,
222    },
223    
224    /// Always true (identity for conjunction)
225    True,
226    
227    /// Always false (identity for disjunction)
228    False,
229}
230
231impl FilterAtom {
232    /// Create an equality atom
233    pub fn eq(field: impl Into<String>, value: impl Into<FilterValue>) -> Self {
234        FilterAtom::Eq {
235            field: field.into(),
236            value: value.into(),
237        }
238    }
239    
240    /// Create an IN atom
241    pub fn in_set(field: impl Into<String>, values: Vec<FilterValue>) -> Self {
242        FilterAtom::In {
243            field: field.into(),
244            values,
245        }
246    }
247    
248    /// Create a range atom
249    pub fn range(
250        field: impl Into<String>,
251        min: Option<FilterValue>,
252        max: Option<FilterValue>,
253    ) -> Self {
254        FilterAtom::Range {
255            field: field.into(),
256            min,
257            max,
258            min_inclusive: true,
259            max_inclusive: true,
260        }
261    }
262    
263    /// Create an open range (exclusive bounds)
264    pub fn range_exclusive(
265        field: impl Into<String>,
266        min: Option<FilterValue>,
267        max: Option<FilterValue>,
268    ) -> Self {
269        FilterAtom::Range {
270            field: field.into(),
271            min,
272            max,
273            min_inclusive: false,
274            max_inclusive: false,
275        }
276    }
277    
278    /// Get the field name this atom filters on (if any)
279    pub fn field(&self) -> Option<&str> {
280        match self {
281            FilterAtom::Eq { field, .. } => Some(field),
282            FilterAtom::Ne { field, .. } => Some(field),
283            FilterAtom::In { field, .. } => Some(field),
284            FilterAtom::NotIn { field, .. } => Some(field),
285            FilterAtom::Range { field, .. } => Some(field),
286            FilterAtom::Prefix { field, .. } => Some(field),
287            FilterAtom::Contains { field, .. } => Some(field),
288            FilterAtom::HasTag { .. } => None,
289            FilterAtom::True | FilterAtom::False => None,
290        }
291    }
292    
293    /// Check if this atom is always true
294    pub fn is_trivially_true(&self) -> bool {
295        matches!(self, FilterAtom::True)
296    }
297    
298    /// Check if this atom is always false
299    pub fn is_trivially_false(&self) -> bool {
300        matches!(self, FilterAtom::False)
301    }
302    
303    /// Negate this atom
304    pub fn negate(&self) -> FilterAtom {
305        match self {
306            FilterAtom::Eq { field, value } => FilterAtom::Ne {
307                field: field.clone(),
308                value: value.clone(),
309            },
310            FilterAtom::Ne { field, value } => FilterAtom::Eq {
311                field: field.clone(),
312                value: value.clone(),
313            },
314            FilterAtom::In { field, values } => FilterAtom::NotIn {
315                field: field.clone(),
316                values: values.clone(),
317            },
318            FilterAtom::NotIn { field, values } => FilterAtom::In {
319                field: field.clone(),
320                values: values.clone(),
321            },
322            FilterAtom::True => FilterAtom::False,
323            FilterAtom::False => FilterAtom::True,
324            // For complex atoms, wrap in negation via De Morgan's
325            other => other.clone(), // Simplified - full implementation would use Not wrapper
326        }
327    }
328}
329
330impl fmt::Display for FilterAtom {
331    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
332        match self {
333            FilterAtom::Eq { field, value } => write!(f, "{} = {}", field, value),
334            FilterAtom::Ne { field, value } => write!(f, "{} != {}", field, value),
335            FilterAtom::In { field, values } => {
336                let vals: Vec<_> = values.iter().map(|v| v.to_string()).collect();
337                write!(f, "{} IN ({})", field, vals.join(", "))
338            }
339            FilterAtom::NotIn { field, values } => {
340                let vals: Vec<_> = values.iter().map(|v| v.to_string()).collect();
341                write!(f, "{} NOT IN ({})", field, vals.join(", "))
342            }
343            FilterAtom::Range { field, min, max, min_inclusive, max_inclusive } => {
344                let left = if *min_inclusive { "[" } else { "(" };
345                let right = if *max_inclusive { "]" } else { ")" };
346                let min_str = min.as_ref().map(|v| v.to_string()).unwrap_or_else(|| "-∞".to_string());
347                let max_str = max.as_ref().map(|v| v.to_string()).unwrap_or_else(|| "∞".to_string());
348                write!(f, "{} ∈ {}{}, {}{}", field, left, min_str, max_str, right)
349            }
350            FilterAtom::Prefix { field, prefix } => write!(f, "{} STARTS WITH '{}'", field, prefix),
351            FilterAtom::Contains { field, substring } => write!(f, "{} CONTAINS '{}'", field, substring),
352            FilterAtom::HasTag { tag } => write!(f, "HAS_TAG('{}')", tag),
353            FilterAtom::True => write!(f, "TRUE"),
354            FilterAtom::False => write!(f, "FALSE"),
355        }
356    }
357}
358
359// ============================================================================
360// Filter IR - Normalized Boolean Expression
361// ============================================================================
362
363/// A disjunction (OR) of atoms
364#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
365pub struct Disjunction {
366    pub atoms: Vec<FilterAtom>,
367}
368
369impl Disjunction {
370    /// Create a disjunction from atoms
371    pub fn new(atoms: Vec<FilterAtom>) -> Self {
372        Self { atoms }
373    }
374    
375    /// Create a single-atom disjunction
376    pub fn single(atom: FilterAtom) -> Self {
377        Self { atoms: vec![atom] }
378    }
379    
380    /// Check if this disjunction is trivially true (contains TRUE or is empty after simplification)
381    pub fn is_trivially_true(&self) -> bool {
382        self.atoms.iter().any(|a| a.is_trivially_true())
383    }
384    
385    /// Check if this disjunction is trivially false (empty or all atoms are FALSE)
386    pub fn is_trivially_false(&self) -> bool {
387        self.atoms.is_empty() || self.atoms.iter().all(|a| a.is_trivially_false())
388    }
389    
390    /// Simplify this disjunction
391    pub fn simplify(self) -> Self {
392        // Remove FALSE atoms
393        let atoms: Vec<_> = self.atoms.into_iter()
394            .filter(|a| !a.is_trivially_false())
395            .collect();
396        
397        // If any atom is TRUE, the whole disjunction is TRUE
398        if atoms.iter().any(|a| a.is_trivially_true()) {
399            return Self { atoms: vec![FilterAtom::True] };
400        }
401        
402        // If empty, it's FALSE
403        if atoms.is_empty() {
404            return Self { atoms: vec![FilterAtom::False] };
405        }
406        
407        Self { atoms }
408    }
409}
410
411impl fmt::Display for Disjunction {
412    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
413        if self.atoms.len() == 1 {
414            write!(f, "{}", self.atoms[0])
415        } else {
416            let parts: Vec<_> = self.atoms.iter().map(|a| a.to_string()).collect();
417            write!(f, "({})", parts.join(" OR "))
418        }
419    }
420}
421
422/// Canonical Filter IR in Conjunctive Normal Form (CNF)
423///
424/// CNF = (A₁ ∨ A₂) ∧ (B₁) ∧ (C₁ ∨ C₂ ∨ C₃)
425///
426/// This representation enables:
427/// 1. Systematic index path selection (each clause maps to an index)
428/// 2. Easy conjunction with auth scope (just append clauses)
429/// 3. Efficient serialization and transmission
430#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
431pub struct FilterIR {
432    /// Conjunction of disjunctions (CNF form)
433    pub clauses: Vec<Disjunction>,
434}
435
436impl FilterIR {
437    /// Create an empty filter (matches everything)
438    pub fn all() -> Self {
439        Self { clauses: vec![] }
440    }
441    
442    /// Create a filter that matches nothing
443    pub fn none() -> Self {
444        Self {
445            clauses: vec![Disjunction::single(FilterAtom::False)],
446        }
447    }
448    
449    /// Create a filter from a single atom
450    pub fn from_atom(atom: FilterAtom) -> Self {
451        Self {
452            clauses: vec![Disjunction::single(atom)],
453        }
454    }
455    
456    /// Create a filter from a single disjunction
457    pub fn from_disjunction(disj: Disjunction) -> Self {
458        Self { clauses: vec![disj] }
459    }
460    
461    /// Conjoin (AND) with another filter
462    ///
463    /// This is the key operation for auth scope injection:
464    /// `EffectiveFilter = AuthScope ∧ UserFilter`
465    pub fn and(mut self, other: FilterIR) -> Self {
466        self.clauses.extend(other.clauses);
467        self
468    }
469    
470    /// Conjoin with a single atom
471    pub fn and_atom(mut self, atom: FilterAtom) -> Self {
472        self.clauses.push(Disjunction::single(atom));
473        self
474    }
475    
476    /// Disjoin (OR) with another filter
477    ///
478    /// Note: This may expand the CNF representation
479    pub fn or(self, other: FilterIR) -> Self {
480        if self.clauses.is_empty() {
481            return other;
482        }
483        if other.clauses.is_empty() {
484            return self;
485        }
486        
487        // Distribute: (A ∧ B) ∨ (C ∧ D) = (A ∨ C) ∧ (A ∨ D) ∧ (B ∨ C) ∧ (B ∨ D)
488        // This can cause exponential blowup - in practice, limit depth
489        let mut new_clauses = Vec::new();
490        for c1 in &self.clauses {
491            for c2 in &other.clauses {
492                let mut combined = c1.atoms.clone();
493                combined.extend(c2.atoms.clone());
494                new_clauses.push(Disjunction::new(combined));
495            }
496        }
497        
498        FilterIR { clauses: new_clauses }
499    }
500    
501    /// Check if this filter matches everything
502    pub fn is_all(&self) -> bool {
503        self.clauses.is_empty() || self.clauses.iter().all(|c| c.is_trivially_true())
504    }
505    
506    /// Check if this filter matches nothing
507    pub fn is_none(&self) -> bool {
508        self.clauses.iter().any(|c| c.is_trivially_false())
509    }
510    
511    /// Simplify the filter
512    pub fn simplify(self) -> Self {
513        let clauses: Vec<_> = self.clauses
514            .into_iter()
515            .map(|c| c.simplify())
516            .filter(|c| !c.is_trivially_true())
517            .collect();
518        
519        // If any clause is FALSE, the whole conjunction is FALSE
520        if clauses.iter().any(|c| c.is_trivially_false()) {
521            return Self::none();
522        }
523        
524        Self { clauses }
525    }
526    
527    /// Extract atoms for a specific field
528    pub fn atoms_for_field(&self, field: &str) -> Vec<&FilterAtom> {
529        self.clauses
530            .iter()
531            .flat_map(|c| c.atoms.iter())
532            .filter(|a| a.field() == Some(field))
533            .collect()
534    }
535    
536    /// Check if this filter constrains a specific field
537    pub fn constrains_field(&self, field: &str) -> bool {
538        !self.atoms_for_field(field).is_empty()
539    }
540    
541    /// Get all fields constrained by this filter
542    pub fn constrained_fields(&self) -> HashSet<&str> {
543        self.clauses
544            .iter()
545            .flat_map(|c| c.atoms.iter())
546            .filter_map(|a| a.field())
547            .collect()
548    }
549}
550
551impl Default for FilterIR {
552    fn default() -> Self {
553        Self::all()
554    }
555}
556
557impl fmt::Display for FilterIR {
558    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559        if self.clauses.is_empty() {
560            return write!(f, "TRUE");
561        }
562        let parts: Vec<_> = self.clauses.iter().map(|c| c.to_string()).collect();
563        write!(f, "{}", parts.join(" AND "))
564    }
565}
566
567// ============================================================================
568// Auth Scope - Non-Optional Security Context
569// ============================================================================
570
571/// Authorization scope - ALWAYS conjoined with user filters
572///
573/// This is the security boundary that cannot be bypassed. It encodes:
574/// - Allowed namespaces/tenants
575/// - Optional project scope
576/// - Token expiry
577/// - Capability flags
578#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
579pub struct AuthScope {
580    /// Allowed namespaces (non-empty; at least one required)
581    pub allowed_namespaces: Vec<String>,
582    
583    /// Optional tenant ID (for multi-tenant deployments)
584    pub tenant_id: Option<String>,
585    
586    /// Optional project scope
587    pub project_id: Option<String>,
588    
589    /// Token expiry timestamp (Unix epoch seconds)
590    pub expires_at: Option<u64>,
591    
592    /// Capability flags
593    pub capabilities: AuthCapabilities,
594    
595    /// Optional ACL tags the caller has access to
596    pub acl_tags: Vec<String>,
597}
598
599/// Capability flags for authorization
600#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
601pub struct AuthCapabilities {
602    /// Can read vectors
603    pub can_read: bool,
604    /// Can write/insert vectors
605    pub can_write: bool,
606    /// Can delete vectors
607    pub can_delete: bool,
608    /// Can perform admin operations
609    pub can_admin: bool,
610}
611
612impl AuthScope {
613    /// Create a new auth scope for a single namespace
614    pub fn for_namespace(namespace: impl Into<String>) -> Self {
615        Self {
616            allowed_namespaces: vec![namespace.into()],
617            tenant_id: None,
618            project_id: None,
619            expires_at: None,
620            capabilities: AuthCapabilities {
621                can_read: true,
622                can_write: false,
623                can_delete: false,
624                can_admin: false,
625            },
626            acl_tags: vec![],
627        }
628    }
629    
630    /// Create with full access to a namespace
631    pub fn full_access(namespace: impl Into<String>) -> Self {
632        Self {
633            allowed_namespaces: vec![namespace.into()],
634            tenant_id: None,
635            project_id: None,
636            expires_at: None,
637            capabilities: AuthCapabilities {
638                can_read: true,
639                can_write: true,
640                can_delete: true,
641                can_admin: false,
642            },
643            acl_tags: vec![],
644        }
645    }
646    
647    /// Add a namespace to the allowed list
648    pub fn with_namespace(mut self, namespace: impl Into<String>) -> Self {
649        self.allowed_namespaces.push(namespace.into());
650        self
651    }
652    
653    /// Set tenant ID
654    pub fn with_tenant(mut self, tenant_id: impl Into<String>) -> Self {
655        self.tenant_id = Some(tenant_id.into());
656        self
657    }
658    
659    /// Set project ID
660    pub fn with_project(mut self, project_id: impl Into<String>) -> Self {
661        self.project_id = Some(project_id.into());
662        self
663    }
664    
665    /// Set expiry
666    pub fn with_expiry(mut self, expires_at: u64) -> Self {
667        self.expires_at = Some(expires_at);
668        self
669    }
670    
671    /// Add ACL tags
672    pub fn with_acl_tags(mut self, tags: Vec<String>) -> Self {
673        self.acl_tags = tags;
674        self
675    }
676    
677    /// Check if this scope is expired
678    pub fn is_expired(&self) -> bool {
679        if let Some(expires_at) = self.expires_at {
680            let now = std::time::SystemTime::now()
681                .duration_since(std::time::UNIX_EPOCH)
682                .map(|d| d.as_secs())
683                .unwrap_or(0);
684            now > expires_at
685        } else {
686            false
687        }
688    }
689    
690    /// Check if a namespace is allowed
691    pub fn is_namespace_allowed(&self, namespace: &str) -> bool {
692        self.allowed_namespaces.iter().any(|ns| ns == namespace)
693    }
694    
695    /// Convert auth scope to filter IR clauses
696    ///
697    /// This generates the mandatory predicates that MUST be conjoined
698    /// with any user filter.
699    pub fn to_filter_ir(&self) -> FilterIR {
700        let mut filter = FilterIR::all();
701        
702        // Namespace constraint (mandatory)
703        if self.allowed_namespaces.len() == 1 {
704            filter = filter.and_atom(FilterAtom::eq(
705                "namespace",
706                self.allowed_namespaces[0].clone(),
707            ));
708        } else if !self.allowed_namespaces.is_empty() {
709            filter = filter.and_atom(FilterAtom::in_set(
710                "namespace",
711                self.allowed_namespaces
712                    .iter()
713                    .map(|ns| FilterValue::String(ns.clone()))
714                    .collect(),
715            ));
716        }
717        
718        // Tenant constraint (if present)
719        if let Some(ref tenant_id) = self.tenant_id {
720            filter = filter.and_atom(FilterAtom::eq("tenant_id", tenant_id.clone()));
721        }
722        
723        // Project constraint (if present)
724        if let Some(ref project_id) = self.project_id {
725            filter = filter.and_atom(FilterAtom::eq("project_id", project_id.clone()));
726        }
727        
728        // ACL tags (if present, user must have at least one matching tag)
729        // This is handled differently - the executor checks tag intersection
730        // rather than adding to filter IR (since it's "has any of these tags")
731        
732        filter
733    }
734}
735
736// ============================================================================
737// Pushdown Contract - Executor Interface
738// ============================================================================
739
740/// The pushdown contract that every executor MUST implement
741///
742/// This trait enforces:
743/// 1. Filter is provided upfront, not as post-processing
744/// 2. Auth scope is non-optional
745/// 3. Results are guaranteed to satisfy the effective filter
746pub trait FilteredExecutor {
747    /// The query operation type (varies by executor)
748    type QueryOp;
749    
750    /// The result type
751    type Result;
752    
753    /// The error type
754    type Error;
755    
756    /// Execute a query with mandatory filtering
757    ///
758    /// # Contract
759    ///
760    /// - `filter_ir`: User-provided filter (may be empty = all)
761    /// - `auth_scope`: Non-optional security context
762    ///
763    /// The executor MUST:
764    /// 1. Compute `effective_filter = auth_scope.to_filter_ir() ∧ filter_ir`
765    /// 2. Apply `effective_filter` BEFORE generating candidates
766    /// 3. Guarantee all results satisfy `effective_filter`
767    ///
768    /// The executor MUST NOT:
769    /// 1. Return any result outside `effective_filter`
770    /// 2. Apply filtering after candidate scoring
771    /// 3. Ignore or bypass `auth_scope`
772    fn execute(
773        &self,
774        query: &Self::QueryOp,
775        filter_ir: &FilterIR,
776        auth_scope: &AuthScope,
777    ) -> Result<Self::Result, Self::Error>;
778    
779    /// Compute the effective filter (auth ∧ user)
780    ///
781    /// This is a convenience method that executors can use.
782    fn effective_filter(&self, filter_ir: &FilterIR, auth_scope: &AuthScope) -> FilterIR {
783        auth_scope.to_filter_ir().and(filter_ir.clone())
784    }
785}
786
787// ============================================================================
788// Filter Builder - Ergonomic Construction
789// ============================================================================
790
791/// Builder for constructing filter IR ergonomically
792#[derive(Debug, Clone, Default)]
793pub struct FilterBuilder {
794    clauses: Vec<Disjunction>,
795}
796
797impl FilterBuilder {
798    /// Create a new filter builder
799    pub fn new() -> Self {
800        Self::default()
801    }
802    
803    /// Add an equality constraint
804    pub fn eq(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
805        self.clauses.push(Disjunction::single(FilterAtom::eq(field, value)));
806        self
807    }
808    
809    /// Add a not-equal constraint
810    pub fn ne(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
811        self.clauses.push(Disjunction::single(FilterAtom::Ne {
812            field: field.to_string(),
813            value: value.into(),
814        }));
815        self
816    }
817    
818    /// Add an IN constraint
819    pub fn in_set(mut self, field: &str, values: Vec<FilterValue>) -> Self {
820        self.clauses.push(Disjunction::single(FilterAtom::in_set(field, values)));
821        self
822    }
823    
824    /// Add a range constraint
825    pub fn range(
826        mut self,
827        field: &str,
828        min: Option<impl Into<FilterValue>>,
829        max: Option<impl Into<FilterValue>>,
830    ) -> Self {
831        self.clauses.push(Disjunction::single(FilterAtom::range(
832            field,
833            min.map(Into::into),
834            max.map(Into::into),
835        )));
836        self
837    }
838    
839    /// Add a greater-than constraint
840    pub fn gt(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
841        self.clauses.push(Disjunction::single(FilterAtom::Range {
842            field: field.to_string(),
843            min: Some(value.into()),
844            max: None,
845            min_inclusive: false,
846            max_inclusive: false,
847        }));
848        self
849    }
850    
851    /// Add a greater-than-or-equal constraint
852    pub fn gte(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
853        self.clauses.push(Disjunction::single(FilterAtom::Range {
854            field: field.to_string(),
855            min: Some(value.into()),
856            max: None,
857            min_inclusive: true,
858            max_inclusive: false,
859        }));
860        self
861    }
862    
863    /// Add a less-than constraint
864    pub fn lt(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
865        self.clauses.push(Disjunction::single(FilterAtom::Range {
866            field: field.to_string(),
867            min: None,
868            max: Some(value.into()),
869            min_inclusive: false,
870            max_inclusive: false,
871        }));
872        self
873    }
874    
875    /// Add a less-than-or-equal constraint
876    pub fn lte(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
877        self.clauses.push(Disjunction::single(FilterAtom::Range {
878            field: field.to_string(),
879            min: None,
880            max: Some(value.into()),
881            min_inclusive: false,
882            max_inclusive: true,
883        }));
884        self
885    }
886    
887    /// Add a prefix match constraint
888    pub fn prefix(mut self, field: &str, prefix: &str) -> Self {
889        self.clauses.push(Disjunction::single(FilterAtom::Prefix {
890            field: field.to_string(),
891            prefix: prefix.to_string(),
892        }));
893        self
894    }
895    
896    /// Add a contains constraint
897    pub fn contains(mut self, field: &str, substring: &str) -> Self {
898        self.clauses.push(Disjunction::single(FilterAtom::Contains {
899            field: field.to_string(),
900            substring: substring.to_string(),
901        }));
902        self
903    }
904    
905    /// Add a namespace constraint (convenience method)
906    pub fn namespace(self, namespace: &str) -> Self {
907        self.eq("namespace", namespace)
908    }
909    
910    /// Add a doc_id IN constraint (convenience method)
911    pub fn doc_ids(self, doc_ids: &[u64]) -> Self {
912        self.in_set(
913            "doc_id",
914            doc_ids.iter().map(|&id| FilterValue::Uint64(id)).collect(),
915        )
916    }
917    
918    /// Add a time range constraint (convenience method)
919    pub fn time_range(self, field: &str, start: Option<u64>, end: Option<u64>) -> Self {
920        self.range(
921            field,
922            start.map(FilterValue::Uint64),
923            end.map(FilterValue::Uint64),
924        )
925    }
926    
927    /// Add a disjunction (OR of multiple atoms)
928    pub fn or_atoms(mut self, atoms: Vec<FilterAtom>) -> Self {
929        self.clauses.push(Disjunction::new(atoms));
930        self
931    }
932    
933    /// Build the filter IR
934    pub fn build(self) -> FilterIR {
935        FilterIR { clauses: self.clauses }
936    }
937}
938
939// ============================================================================
940// Convenience Macros
941// ============================================================================
942
943/// Create a filter IR from a simple DSL
944///
945/// ```ignore
946/// let filter = filter_ir! {
947///     namespace = "my_ns",
948///     project_id = "proj_123",
949///     timestamp in 1000..2000
950/// };
951/// ```
952#[macro_export]
953macro_rules! filter_ir {
954    // Empty filter
955    () => {
956        $crate::filter_ir::FilterIR::all()
957    };
958    
959    // Equality
960    ($field:ident = $value:expr $(, $($rest:tt)*)?) => {{
961        let mut builder = $crate::filter_ir::FilterBuilder::new()
962            .eq(stringify!($field), $value);
963        $(
964            builder = filter_ir!(@chain builder, $($rest)*);
965        )?
966        builder.build()
967    }};
968    
969    // Chaining helper
970    (@chain $builder:expr, $field:ident = $value:expr $(, $($rest:tt)*)?) => {{
971        let builder = $builder.eq(stringify!($field), $value);
972        $(
973            filter_ir!(@chain builder, $($rest)*)
974        )?
975        builder
976    }};
977}
978
979// ============================================================================
980// Tests
981// ============================================================================
982
983#[cfg(test)]
984mod tests {
985    use super::*;
986    
987    #[test]
988    fn test_filter_atom_creation() {
989        let eq = FilterAtom::eq("namespace", "my_ns");
990        assert_eq!(eq.field(), Some("namespace"));
991        
992        let range = FilterAtom::range("timestamp", Some(FilterValue::Uint64(1000)), Some(FilterValue::Uint64(2000)));
993        assert_eq!(range.field(), Some("timestamp"));
994    }
995    
996    #[test]
997    fn test_filter_ir_conjunction() {
998        let filter1 = FilterIR::from_atom(FilterAtom::eq("namespace", "ns1"));
999        let filter2 = FilterIR::from_atom(FilterAtom::eq("project_id", "proj1"));
1000        
1001        let combined = filter1.and(filter2);
1002        assert_eq!(combined.clauses.len(), 2);
1003    }
1004    
1005    #[test]
1006    fn test_auth_scope_to_filter() {
1007        let scope = AuthScope::for_namespace("production")
1008            .with_tenant("acme_corp");
1009        
1010        let filter = scope.to_filter_ir();
1011        assert!(filter.constrains_field("namespace"));
1012        assert!(filter.constrains_field("tenant_id"));
1013        assert!(!filter.constrains_field("project_id"));
1014    }
1015    
1016    #[test]
1017    fn test_effective_filter() {
1018        let auth = AuthScope::for_namespace("production");
1019        let user_filter = FilterBuilder::new()
1020            .eq("source", "documents")
1021            .time_range("created_at", Some(1000), Some(2000))
1022            .build();
1023        
1024        let effective = auth.to_filter_ir().and(user_filter);
1025        
1026        // Should have namespace + source + time range
1027        assert_eq!(effective.clauses.len(), 3);
1028        assert!(effective.constrains_field("namespace"));
1029        assert!(effective.constrains_field("source"));
1030        assert!(effective.constrains_field("created_at"));
1031    }
1032    
1033    #[test]
1034    fn test_filter_builder() {
1035        let filter = FilterBuilder::new()
1036            .namespace("my_namespace")
1037            .eq("project_id", "proj_123")
1038            .doc_ids(&[1, 2, 3, 4, 5])
1039            .time_range("timestamp", Some(1000), None)
1040            .build();
1041        
1042        assert_eq!(filter.clauses.len(), 4);
1043    }
1044    
1045    #[test]
1046    fn test_filter_simplification() {
1047        // TRUE AND X = X
1048        let filter = FilterIR::from_atom(FilterAtom::True)
1049            .and(FilterIR::from_atom(FilterAtom::eq("x", "y")));
1050        let simplified = filter.simplify();
1051        assert_eq!(simplified.clauses.len(), 1);
1052        
1053        // FALSE AND X = FALSE
1054        let filter2 = FilterIR::from_atom(FilterAtom::False)
1055            .and(FilterIR::from_atom(FilterAtom::eq("x", "y")));
1056        let simplified2 = filter2.simplify();
1057        assert!(simplified2.is_none());
1058    }
1059    
1060    #[test]
1061    fn test_filter_display() {
1062        let filter = FilterBuilder::new()
1063            .eq("namespace", "prod")
1064            .range("timestamp", Some(1000i64), Some(2000i64))
1065            .build();
1066        
1067        let display = filter.to_string();
1068        assert!(display.contains("namespace"));
1069        assert!(display.contains("timestamp"));
1070    }
1071}