sochdb_query/
filter_ir.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Canonical Filter IR + Planner Pushdown Contract (Task 1)
16//!
17//! This module defines a **single source of truth** for filtering behavior across
18//! all retrieval paths (vector/BM25/hybrid/context). By normalizing filters to a
19//! canonical IR and enforcing pushdown contracts, we:
20//!
21//! 1. **Prevent post-filtering by construction** - filters are applied during
22//!    candidate generation, not after
23//! 2. **Unify semantics** - "namespace = X" means the same thing everywhere
24//! 3. **Enable systematic optimization** - CNF form allows index path selection
25//!
26//! ## Filter IR Design
27//!
28//! Filters are normalized to **Conjunctive Normal Form (CNF)**: a conjunction
29//! of disjunctions of typed atoms.
30//!
31//! ```text
32//! EffectiveFilter = AuthScope ∧ UserFilter
33//!                 = (A₁ ∨ A₂) ∧ (B₁) ∧ (C₁ ∨ C₂ ∨ C₃)
34//! ```
35//!
36//! Where each atom is a typed predicate:
37//! - `Eq(field, value)` - equality
38//! - `In(field, values)` - membership in set
39//! - `Range(field, min, max)` - inclusive range
40//! - `HasTag(tag)` - ACL tag presence (future)
41//!
42//! ## Pushdown Contract
43//!
44//! Every executor MUST implement:
45//! ```text
46//! execute(query_op, filter_ir, auth_scope) -> results
47//! ```
48//!
49//! The executor guarantees:
50//! 1. All returned results satisfy `filter_ir ∧ auth_scope`
51//! 2. No result outside the allowed set is ever generated
52//! 3. Filter application happens BEFORE scoring (no post-filter)
53//!
54//! ## Auth Scope
55//!
56//! `AuthScope` is **non-optional** and always conjoined with user filters:
57//! ```text
58//! EffectiveFilter = AuthScope ∧ UserFilter
59//! ```
60//!
61//! This is a monotone strengthening (can only remove results, never add),
62//! ensuring security invariants hold.
63
64use std::collections::HashSet;
65use std::fmt;
66
67use serde::{Deserialize, Serialize};
68
69// ============================================================================
70// Filter Atoms - Typed Predicates
71// ============================================================================
72
73/// A typed scalar value for filter comparison
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub enum FilterValue {
76    /// String value
77    String(String),
78    /// 64-bit signed integer
79    Int64(i64),
80    /// 64-bit unsigned integer (for doc_id, timestamps)
81    Uint64(u64),
82    /// 64-bit float
83    Float64(f64),
84    /// Boolean
85    Bool(bool),
86    /// Null
87    Null,
88}
89
90impl FilterValue {
91    /// Check if this value matches another for equality
92    pub fn eq_match(&self, other: &FilterValue) -> bool {
93        match (self, other) {
94            (FilterValue::String(a), FilterValue::String(b)) => a == b,
95            (FilterValue::Int64(a), FilterValue::Int64(b)) => a == b,
96            (FilterValue::Uint64(a), FilterValue::Uint64(b)) => a == b,
97            (FilterValue::Float64(a), FilterValue::Float64(b)) => {
98                (a - b).abs() < f64::EPSILON
99            }
100            (FilterValue::Bool(a), FilterValue::Bool(b)) => a == b,
101            (FilterValue::Null, FilterValue::Null) => true,
102            _ => false,
103        }
104    }
105
106    /// Compare for ordering (returns None if incompatible types)
107    pub fn partial_cmp(&self, other: &FilterValue) -> Option<std::cmp::Ordering> {
108        match (self, other) {
109            (FilterValue::Int64(a), FilterValue::Int64(b)) => Some(a.cmp(b)),
110            (FilterValue::Uint64(a), FilterValue::Uint64(b)) => Some(a.cmp(b)),
111            (FilterValue::Float64(a), FilterValue::Float64(b)) => a.partial_cmp(b),
112            (FilterValue::String(a), FilterValue::String(b)) => Some(a.cmp(b)),
113            _ => None,
114        }
115    }
116}
117
118impl fmt::Display for FilterValue {
119    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
120        match self {
121            FilterValue::String(s) => write!(f, "'{}'", s),
122            FilterValue::Int64(i) => write!(f, "{}", i),
123            FilterValue::Uint64(u) => write!(f, "{}u64", u),
124            FilterValue::Float64(v) => write!(f, "{}", v),
125            FilterValue::Bool(b) => write!(f, "{}", b),
126            FilterValue::Null => write!(f, "NULL"),
127        }
128    }
129}
130
131impl From<&str> for FilterValue {
132    fn from(s: &str) -> Self {
133        FilterValue::String(s.to_string())
134    }
135}
136
137impl From<String> for FilterValue {
138    fn from(s: String) -> Self {
139        FilterValue::String(s)
140    }
141}
142
143impl From<i64> for FilterValue {
144    fn from(i: i64) -> Self {
145        FilterValue::Int64(i)
146    }
147}
148
149impl From<u64> for FilterValue {
150    fn from(u: u64) -> Self {
151        FilterValue::Uint64(u)
152    }
153}
154
155impl From<f64> for FilterValue {
156    fn from(f: f64) -> Self {
157        FilterValue::Float64(f)
158    }
159}
160
161impl From<bool> for FilterValue {
162    fn from(b: bool) -> Self {
163        FilterValue::Bool(b)
164    }
165}
166
167/// A single filter atom - the smallest unit of filtering
168#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
169pub enum FilterAtom {
170    /// Equality: field = value
171    Eq {
172        field: String,
173        value: FilterValue,
174    },
175    
176    /// Not equal: field != value
177    Ne {
178        field: String,
179        value: FilterValue,
180    },
181    
182    /// Membership: field IN (v1, v2, ...)
183    In {
184        field: String,
185        values: Vec<FilterValue>,
186    },
187    
188    /// Not in set: field NOT IN (v1, v2, ...)
189    NotIn {
190        field: String,
191        values: Vec<FilterValue>,
192    },
193    
194    /// Range: min <= field <= max (inclusive)
195    /// Either bound can be None for open-ended ranges
196    Range {
197        field: String,
198        min: Option<FilterValue>,
199        max: Option<FilterValue>,
200        min_inclusive: bool,
201        max_inclusive: bool,
202    },
203    
204    /// Prefix match: field STARTS WITH prefix
205    Prefix {
206        field: String,
207        prefix: String,
208    },
209    
210    /// Contains substring: field CONTAINS substring
211    Contains {
212        field: String,
213        substring: String,
214    },
215    
216    /// ACL tag presence (for row-level security)
217    HasTag {
218        tag: String,
219    },
220    
221    /// Always true (identity for conjunction)
222    True,
223    
224    /// Always false (identity for disjunction)
225    False,
226}
227
228impl FilterAtom {
229    /// Create an equality atom
230    pub fn eq(field: impl Into<String>, value: impl Into<FilterValue>) -> Self {
231        FilterAtom::Eq {
232            field: field.into(),
233            value: value.into(),
234        }
235    }
236    
237    /// Create an IN atom
238    pub fn in_set(field: impl Into<String>, values: Vec<FilterValue>) -> Self {
239        FilterAtom::In {
240            field: field.into(),
241            values,
242        }
243    }
244    
245    /// Create a range atom
246    pub fn range(
247        field: impl Into<String>,
248        min: Option<FilterValue>,
249        max: Option<FilterValue>,
250    ) -> Self {
251        FilterAtom::Range {
252            field: field.into(),
253            min,
254            max,
255            min_inclusive: true,
256            max_inclusive: true,
257        }
258    }
259    
260    /// Create an open range (exclusive bounds)
261    pub fn range_exclusive(
262        field: impl Into<String>,
263        min: Option<FilterValue>,
264        max: Option<FilterValue>,
265    ) -> Self {
266        FilterAtom::Range {
267            field: field.into(),
268            min,
269            max,
270            min_inclusive: false,
271            max_inclusive: false,
272        }
273    }
274    
275    /// Get the field name this atom filters on (if any)
276    pub fn field(&self) -> Option<&str> {
277        match self {
278            FilterAtom::Eq { field, .. } => Some(field),
279            FilterAtom::Ne { field, .. } => Some(field),
280            FilterAtom::In { field, .. } => Some(field),
281            FilterAtom::NotIn { field, .. } => Some(field),
282            FilterAtom::Range { field, .. } => Some(field),
283            FilterAtom::Prefix { field, .. } => Some(field),
284            FilterAtom::Contains { field, .. } => Some(field),
285            FilterAtom::HasTag { .. } => None,
286            FilterAtom::True | FilterAtom::False => None,
287        }
288    }
289    
290    /// Check if this atom is always true
291    pub fn is_trivially_true(&self) -> bool {
292        matches!(self, FilterAtom::True)
293    }
294    
295    /// Check if this atom is always false
296    pub fn is_trivially_false(&self) -> bool {
297        matches!(self, FilterAtom::False)
298    }
299    
300    /// Negate this atom
301    pub fn negate(&self) -> FilterAtom {
302        match self {
303            FilterAtom::Eq { field, value } => FilterAtom::Ne {
304                field: field.clone(),
305                value: value.clone(),
306            },
307            FilterAtom::Ne { field, value } => FilterAtom::Eq {
308                field: field.clone(),
309                value: value.clone(),
310            },
311            FilterAtom::In { field, values } => FilterAtom::NotIn {
312                field: field.clone(),
313                values: values.clone(),
314            },
315            FilterAtom::NotIn { field, values } => FilterAtom::In {
316                field: field.clone(),
317                values: values.clone(),
318            },
319            FilterAtom::True => FilterAtom::False,
320            FilterAtom::False => FilterAtom::True,
321            // For complex atoms, wrap in negation via De Morgan's
322            other => other.clone(), // Simplified - full implementation would use Not wrapper
323        }
324    }
325}
326
327impl fmt::Display for FilterAtom {
328    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
329        match self {
330            FilterAtom::Eq { field, value } => write!(f, "{} = {}", field, value),
331            FilterAtom::Ne { field, value } => write!(f, "{} != {}", field, value),
332            FilterAtom::In { field, values } => {
333                let vals: Vec<_> = values.iter().map(|v| v.to_string()).collect();
334                write!(f, "{} IN ({})", field, vals.join(", "))
335            }
336            FilterAtom::NotIn { field, values } => {
337                let vals: Vec<_> = values.iter().map(|v| v.to_string()).collect();
338                write!(f, "{} NOT IN ({})", field, vals.join(", "))
339            }
340            FilterAtom::Range { field, min, max, min_inclusive, max_inclusive } => {
341                let left = if *min_inclusive { "[" } else { "(" };
342                let right = if *max_inclusive { "]" } else { ")" };
343                let min_str = min.as_ref().map(|v| v.to_string()).unwrap_or_else(|| "-∞".to_string());
344                let max_str = max.as_ref().map(|v| v.to_string()).unwrap_or_else(|| "∞".to_string());
345                write!(f, "{} ∈ {}{}, {}{}", field, left, min_str, max_str, right)
346            }
347            FilterAtom::Prefix { field, prefix } => write!(f, "{} STARTS WITH '{}'", field, prefix),
348            FilterAtom::Contains { field, substring } => write!(f, "{} CONTAINS '{}'", field, substring),
349            FilterAtom::HasTag { tag } => write!(f, "HAS_TAG('{}')", tag),
350            FilterAtom::True => write!(f, "TRUE"),
351            FilterAtom::False => write!(f, "FALSE"),
352        }
353    }
354}
355
356// ============================================================================
357// Filter IR - Normalized Boolean Expression
358// ============================================================================
359
360/// A disjunction (OR) of atoms
361#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
362pub struct Disjunction {
363    pub atoms: Vec<FilterAtom>,
364}
365
366impl Disjunction {
367    /// Create a disjunction from atoms
368    pub fn new(atoms: Vec<FilterAtom>) -> Self {
369        Self { atoms }
370    }
371    
372    /// Create a single-atom disjunction
373    pub fn single(atom: FilterAtom) -> Self {
374        Self { atoms: vec![atom] }
375    }
376    
377    /// Check if this disjunction is trivially true (contains TRUE or is empty after simplification)
378    pub fn is_trivially_true(&self) -> bool {
379        self.atoms.iter().any(|a| a.is_trivially_true())
380    }
381    
382    /// Check if this disjunction is trivially false (empty or all atoms are FALSE)
383    pub fn is_trivially_false(&self) -> bool {
384        self.atoms.is_empty() || self.atoms.iter().all(|a| a.is_trivially_false())
385    }
386    
387    /// Simplify this disjunction
388    pub fn simplify(self) -> Self {
389        // Remove FALSE atoms
390        let atoms: Vec<_> = self.atoms.into_iter()
391            .filter(|a| !a.is_trivially_false())
392            .collect();
393        
394        // If any atom is TRUE, the whole disjunction is TRUE
395        if atoms.iter().any(|a| a.is_trivially_true()) {
396            return Self { atoms: vec![FilterAtom::True] };
397        }
398        
399        // If empty, it's FALSE
400        if atoms.is_empty() {
401            return Self { atoms: vec![FilterAtom::False] };
402        }
403        
404        Self { atoms }
405    }
406}
407
408impl fmt::Display for Disjunction {
409    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
410        if self.atoms.len() == 1 {
411            write!(f, "{}", self.atoms[0])
412        } else {
413            let parts: Vec<_> = self.atoms.iter().map(|a| a.to_string()).collect();
414            write!(f, "({})", parts.join(" OR "))
415        }
416    }
417}
418
419/// Canonical Filter IR in Conjunctive Normal Form (CNF)
420///
421/// CNF = (A₁ ∨ A₂) ∧ (B₁) ∧ (C₁ ∨ C₂ ∨ C₃)
422///
423/// This representation enables:
424/// 1. Systematic index path selection (each clause maps to an index)
425/// 2. Easy conjunction with auth scope (just append clauses)
426/// 3. Efficient serialization and transmission
427#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
428pub struct FilterIR {
429    /// Conjunction of disjunctions (CNF form)
430    pub clauses: Vec<Disjunction>,
431}
432
433impl FilterIR {
434    /// Create an empty filter (matches everything)
435    pub fn all() -> Self {
436        Self { clauses: vec![] }
437    }
438    
439    /// Create a filter that matches nothing
440    pub fn none() -> Self {
441        Self {
442            clauses: vec![Disjunction::single(FilterAtom::False)],
443        }
444    }
445    
446    /// Create a filter from a single atom
447    pub fn from_atom(atom: FilterAtom) -> Self {
448        Self {
449            clauses: vec![Disjunction::single(atom)],
450        }
451    }
452    
453    /// Create a filter from a single disjunction
454    pub fn from_disjunction(disj: Disjunction) -> Self {
455        Self { clauses: vec![disj] }
456    }
457    
458    /// Conjoin (AND) with another filter
459    ///
460    /// This is the key operation for auth scope injection:
461    /// `EffectiveFilter = AuthScope ∧ UserFilter`
462    pub fn and(mut self, other: FilterIR) -> Self {
463        self.clauses.extend(other.clauses);
464        self
465    }
466    
467    /// Conjoin with a single atom
468    pub fn and_atom(mut self, atom: FilterAtom) -> Self {
469        self.clauses.push(Disjunction::single(atom));
470        self
471    }
472    
473    /// Disjoin (OR) with another filter
474    ///
475    /// Note: This may expand the CNF representation
476    pub fn or(self, other: FilterIR) -> Self {
477        if self.clauses.is_empty() {
478            return other;
479        }
480        if other.clauses.is_empty() {
481            return self;
482        }
483        
484        // Distribute: (A ∧ B) ∨ (C ∧ D) = (A ∨ C) ∧ (A ∨ D) ∧ (B ∨ C) ∧ (B ∨ D)
485        // This can cause exponential blowup - in practice, limit depth
486        let mut new_clauses = Vec::new();
487        for c1 in &self.clauses {
488            for c2 in &other.clauses {
489                let mut combined = c1.atoms.clone();
490                combined.extend(c2.atoms.clone());
491                new_clauses.push(Disjunction::new(combined));
492            }
493        }
494        
495        FilterIR { clauses: new_clauses }
496    }
497    
498    /// Check if this filter matches everything
499    pub fn is_all(&self) -> bool {
500        self.clauses.is_empty() || self.clauses.iter().all(|c| c.is_trivially_true())
501    }
502    
503    /// Check if this filter matches nothing
504    pub fn is_none(&self) -> bool {
505        self.clauses.iter().any(|c| c.is_trivially_false())
506    }
507    
508    /// Simplify the filter
509    pub fn simplify(self) -> Self {
510        let clauses: Vec<_> = self.clauses
511            .into_iter()
512            .map(|c| c.simplify())
513            .filter(|c| !c.is_trivially_true())
514            .collect();
515        
516        // If any clause is FALSE, the whole conjunction is FALSE
517        if clauses.iter().any(|c| c.is_trivially_false()) {
518            return Self::none();
519        }
520        
521        Self { clauses }
522    }
523    
524    /// Extract atoms for a specific field
525    pub fn atoms_for_field(&self, field: &str) -> Vec<&FilterAtom> {
526        self.clauses
527            .iter()
528            .flat_map(|c| c.atoms.iter())
529            .filter(|a| a.field() == Some(field))
530            .collect()
531    }
532    
533    /// Check if this filter constrains a specific field
534    pub fn constrains_field(&self, field: &str) -> bool {
535        !self.atoms_for_field(field).is_empty()
536    }
537    
538    /// Get all fields constrained by this filter
539    pub fn constrained_fields(&self) -> HashSet<&str> {
540        self.clauses
541            .iter()
542            .flat_map(|c| c.atoms.iter())
543            .filter_map(|a| a.field())
544            .collect()
545    }
546}
547
548impl Default for FilterIR {
549    fn default() -> Self {
550        Self::all()
551    }
552}
553
554impl fmt::Display for FilterIR {
555    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
556        if self.clauses.is_empty() {
557            return write!(f, "TRUE");
558        }
559        let parts: Vec<_> = self.clauses.iter().map(|c| c.to_string()).collect();
560        write!(f, "{}", parts.join(" AND "))
561    }
562}
563
564// ============================================================================
565// Auth Scope - Non-Optional Security Context
566// ============================================================================
567
568/// Authorization scope - ALWAYS conjoined with user filters
569///
570/// This is the security boundary that cannot be bypassed. It encodes:
571/// - Allowed namespaces/tenants
572/// - Optional project scope
573/// - Token expiry
574/// - Capability flags
575#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
576pub struct AuthScope {
577    /// Allowed namespaces (non-empty; at least one required)
578    pub allowed_namespaces: Vec<String>,
579    
580    /// Optional tenant ID (for multi-tenant deployments)
581    pub tenant_id: Option<String>,
582    
583    /// Optional project scope
584    pub project_id: Option<String>,
585    
586    /// Token expiry timestamp (Unix epoch seconds)
587    pub expires_at: Option<u64>,
588    
589    /// Capability flags
590    pub capabilities: AuthCapabilities,
591    
592    /// Optional ACL tags the caller has access to
593    pub acl_tags: Vec<String>,
594}
595
596/// Capability flags for authorization
597#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
598pub struct AuthCapabilities {
599    /// Can read vectors
600    pub can_read: bool,
601    /// Can write/insert vectors
602    pub can_write: bool,
603    /// Can delete vectors
604    pub can_delete: bool,
605    /// Can perform admin operations
606    pub can_admin: bool,
607}
608
609impl AuthScope {
610    /// Create a new auth scope for a single namespace
611    pub fn for_namespace(namespace: impl Into<String>) -> Self {
612        Self {
613            allowed_namespaces: vec![namespace.into()],
614            tenant_id: None,
615            project_id: None,
616            expires_at: None,
617            capabilities: AuthCapabilities {
618                can_read: true,
619                can_write: false,
620                can_delete: false,
621                can_admin: false,
622            },
623            acl_tags: vec![],
624        }
625    }
626    
627    /// Create with full access to a namespace
628    pub fn full_access(namespace: impl Into<String>) -> Self {
629        Self {
630            allowed_namespaces: vec![namespace.into()],
631            tenant_id: None,
632            project_id: None,
633            expires_at: None,
634            capabilities: AuthCapabilities {
635                can_read: true,
636                can_write: true,
637                can_delete: true,
638                can_admin: false,
639            },
640            acl_tags: vec![],
641        }
642    }
643    
644    /// Add a namespace to the allowed list
645    pub fn with_namespace(mut self, namespace: impl Into<String>) -> Self {
646        self.allowed_namespaces.push(namespace.into());
647        self
648    }
649    
650    /// Set tenant ID
651    pub fn with_tenant(mut self, tenant_id: impl Into<String>) -> Self {
652        self.tenant_id = Some(tenant_id.into());
653        self
654    }
655    
656    /// Set project ID
657    pub fn with_project(mut self, project_id: impl Into<String>) -> Self {
658        self.project_id = Some(project_id.into());
659        self
660    }
661    
662    /// Set expiry
663    pub fn with_expiry(mut self, expires_at: u64) -> Self {
664        self.expires_at = Some(expires_at);
665        self
666    }
667    
668    /// Add ACL tags
669    pub fn with_acl_tags(mut self, tags: Vec<String>) -> Self {
670        self.acl_tags = tags;
671        self
672    }
673    
674    /// Check if this scope is expired
675    pub fn is_expired(&self) -> bool {
676        if let Some(expires_at) = self.expires_at {
677            let now = std::time::SystemTime::now()
678                .duration_since(std::time::UNIX_EPOCH)
679                .map(|d| d.as_secs())
680                .unwrap_or(0);
681            now > expires_at
682        } else {
683            false
684        }
685    }
686    
687    /// Check if a namespace is allowed
688    pub fn is_namespace_allowed(&self, namespace: &str) -> bool {
689        self.allowed_namespaces.iter().any(|ns| ns == namespace)
690    }
691    
692    /// Convert auth scope to filter IR clauses
693    ///
694    /// This generates the mandatory predicates that MUST be conjoined
695    /// with any user filter.
696    pub fn to_filter_ir(&self) -> FilterIR {
697        let mut filter = FilterIR::all();
698        
699        // Namespace constraint (mandatory)
700        if self.allowed_namespaces.len() == 1 {
701            filter = filter.and_atom(FilterAtom::eq(
702                "namespace",
703                self.allowed_namespaces[0].clone(),
704            ));
705        } else if !self.allowed_namespaces.is_empty() {
706            filter = filter.and_atom(FilterAtom::in_set(
707                "namespace",
708                self.allowed_namespaces
709                    .iter()
710                    .map(|ns| FilterValue::String(ns.clone()))
711                    .collect(),
712            ));
713        }
714        
715        // Tenant constraint (if present)
716        if let Some(ref tenant_id) = self.tenant_id {
717            filter = filter.and_atom(FilterAtom::eq("tenant_id", tenant_id.clone()));
718        }
719        
720        // Project constraint (if present)
721        if let Some(ref project_id) = self.project_id {
722            filter = filter.and_atom(FilterAtom::eq("project_id", project_id.clone()));
723        }
724        
725        // ACL tags (if present, user must have at least one matching tag)
726        // This is handled differently - the executor checks tag intersection
727        // rather than adding to filter IR (since it's "has any of these tags")
728        
729        filter
730    }
731}
732
733// ============================================================================
734// Pushdown Contract - Executor Interface
735// ============================================================================
736
737/// The pushdown contract that every executor MUST implement
738///
739/// This trait enforces:
740/// 1. Filter is provided upfront, not as post-processing
741/// 2. Auth scope is non-optional
742/// 3. Results are guaranteed to satisfy the effective filter
743pub trait FilteredExecutor {
744    /// The query operation type (varies by executor)
745    type QueryOp;
746    
747    /// The result type
748    type Result;
749    
750    /// The error type
751    type Error;
752    
753    /// Execute a query with mandatory filtering
754    ///
755    /// # Contract
756    ///
757    /// - `filter_ir`: User-provided filter (may be empty = all)
758    /// - `auth_scope`: Non-optional security context
759    ///
760    /// The executor MUST:
761    /// 1. Compute `effective_filter = auth_scope.to_filter_ir() ∧ filter_ir`
762    /// 2. Apply `effective_filter` BEFORE generating candidates
763    /// 3. Guarantee all results satisfy `effective_filter`
764    ///
765    /// The executor MUST NOT:
766    /// 1. Return any result outside `effective_filter`
767    /// 2. Apply filtering after candidate scoring
768    /// 3. Ignore or bypass `auth_scope`
769    fn execute(
770        &self,
771        query: &Self::QueryOp,
772        filter_ir: &FilterIR,
773        auth_scope: &AuthScope,
774    ) -> Result<Self::Result, Self::Error>;
775    
776    /// Compute the effective filter (auth ∧ user)
777    ///
778    /// This is a convenience method that executors can use.
779    fn effective_filter(&self, filter_ir: &FilterIR, auth_scope: &AuthScope) -> FilterIR {
780        auth_scope.to_filter_ir().and(filter_ir.clone())
781    }
782}
783
784// ============================================================================
785// Filter Builder - Ergonomic Construction
786// ============================================================================
787
788/// Builder for constructing filter IR ergonomically
789#[derive(Debug, Clone, Default)]
790pub struct FilterBuilder {
791    clauses: Vec<Disjunction>,
792}
793
794impl FilterBuilder {
795    /// Create a new filter builder
796    pub fn new() -> Self {
797        Self::default()
798    }
799    
800    /// Add an equality constraint
801    pub fn eq(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
802        self.clauses.push(Disjunction::single(FilterAtom::eq(field, value)));
803        self
804    }
805    
806    /// Add a not-equal constraint
807    pub fn ne(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
808        self.clauses.push(Disjunction::single(FilterAtom::Ne {
809            field: field.to_string(),
810            value: value.into(),
811        }));
812        self
813    }
814    
815    /// Add an IN constraint
816    pub fn in_set(mut self, field: &str, values: Vec<FilterValue>) -> Self {
817        self.clauses.push(Disjunction::single(FilterAtom::in_set(field, values)));
818        self
819    }
820    
821    /// Add a range constraint
822    pub fn range(
823        mut self,
824        field: &str,
825        min: Option<impl Into<FilterValue>>,
826        max: Option<impl Into<FilterValue>>,
827    ) -> Self {
828        self.clauses.push(Disjunction::single(FilterAtom::range(
829            field,
830            min.map(Into::into),
831            max.map(Into::into),
832        )));
833        self
834    }
835    
836    /// Add a greater-than constraint
837    pub fn gt(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
838        self.clauses.push(Disjunction::single(FilterAtom::Range {
839            field: field.to_string(),
840            min: Some(value.into()),
841            max: None,
842            min_inclusive: false,
843            max_inclusive: false,
844        }));
845        self
846    }
847    
848    /// Add a greater-than-or-equal constraint
849    pub fn gte(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
850        self.clauses.push(Disjunction::single(FilterAtom::Range {
851            field: field.to_string(),
852            min: Some(value.into()),
853            max: None,
854            min_inclusive: true,
855            max_inclusive: false,
856        }));
857        self
858    }
859    
860    /// Add a less-than constraint
861    pub fn lt(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
862        self.clauses.push(Disjunction::single(FilterAtom::Range {
863            field: field.to_string(),
864            min: None,
865            max: Some(value.into()),
866            min_inclusive: false,
867            max_inclusive: false,
868        }));
869        self
870    }
871    
872    /// Add a less-than-or-equal constraint
873    pub fn lte(mut self, field: &str, value: impl Into<FilterValue>) -> Self {
874        self.clauses.push(Disjunction::single(FilterAtom::Range {
875            field: field.to_string(),
876            min: None,
877            max: Some(value.into()),
878            min_inclusive: false,
879            max_inclusive: true,
880        }));
881        self
882    }
883    
884    /// Add a prefix match constraint
885    pub fn prefix(mut self, field: &str, prefix: &str) -> Self {
886        self.clauses.push(Disjunction::single(FilterAtom::Prefix {
887            field: field.to_string(),
888            prefix: prefix.to_string(),
889        }));
890        self
891    }
892    
893    /// Add a contains constraint
894    pub fn contains(mut self, field: &str, substring: &str) -> Self {
895        self.clauses.push(Disjunction::single(FilterAtom::Contains {
896            field: field.to_string(),
897            substring: substring.to_string(),
898        }));
899        self
900    }
901    
902    /// Add a namespace constraint (convenience method)
903    pub fn namespace(self, namespace: &str) -> Self {
904        self.eq("namespace", namespace)
905    }
906    
907    /// Add a doc_id IN constraint (convenience method)
908    pub fn doc_ids(self, doc_ids: &[u64]) -> Self {
909        self.in_set(
910            "doc_id",
911            doc_ids.iter().map(|&id| FilterValue::Uint64(id)).collect(),
912        )
913    }
914    
915    /// Add a time range constraint (convenience method)
916    pub fn time_range(self, field: &str, start: Option<u64>, end: Option<u64>) -> Self {
917        self.range(
918            field,
919            start.map(FilterValue::Uint64),
920            end.map(FilterValue::Uint64),
921        )
922    }
923    
924    /// Add a disjunction (OR of multiple atoms)
925    pub fn or_atoms(mut self, atoms: Vec<FilterAtom>) -> Self {
926        self.clauses.push(Disjunction::new(atoms));
927        self
928    }
929    
930    /// Build the filter IR
931    pub fn build(self) -> FilterIR {
932        FilterIR { clauses: self.clauses }
933    }
934}
935
936// ============================================================================
937// Convenience Macros
938// ============================================================================
939
940/// Create a filter IR from a simple DSL
941///
942/// ```ignore
943/// let filter = filter_ir! {
944///     namespace = "my_ns",
945///     project_id = "proj_123",
946///     timestamp in 1000..2000
947/// };
948/// ```
949#[macro_export]
950macro_rules! filter_ir {
951    // Empty filter
952    () => {
953        $crate::filter_ir::FilterIR::all()
954    };
955    
956    // Equality
957    ($field:ident = $value:expr $(, $($rest:tt)*)?) => {{
958        let mut builder = $crate::filter_ir::FilterBuilder::new()
959            .eq(stringify!($field), $value);
960        $(
961            builder = filter_ir!(@chain builder, $($rest)*);
962        )?
963        builder.build()
964    }};
965    
966    // Chaining helper
967    (@chain $builder:expr, $field:ident = $value:expr $(, $($rest:tt)*)?) => {{
968        let builder = $builder.eq(stringify!($field), $value);
969        $(
970            filter_ir!(@chain builder, $($rest)*)
971        )?
972        builder
973    }};
974}
975
976// ============================================================================
977// Tests
978// ============================================================================
979
980#[cfg(test)]
981mod tests {
982    use super::*;
983    
984    #[test]
985    fn test_filter_atom_creation() {
986        let eq = FilterAtom::eq("namespace", "my_ns");
987        assert_eq!(eq.field(), Some("namespace"));
988        
989        let range = FilterAtom::range("timestamp", Some(FilterValue::Uint64(1000)), Some(FilterValue::Uint64(2000)));
990        assert_eq!(range.field(), Some("timestamp"));
991    }
992    
993    #[test]
994    fn test_filter_ir_conjunction() {
995        let filter1 = FilterIR::from_atom(FilterAtom::eq("namespace", "ns1"));
996        let filter2 = FilterIR::from_atom(FilterAtom::eq("project_id", "proj1"));
997        
998        let combined = filter1.and(filter2);
999        assert_eq!(combined.clauses.len(), 2);
1000    }
1001    
1002    #[test]
1003    fn test_auth_scope_to_filter() {
1004        let scope = AuthScope::for_namespace("production")
1005            .with_tenant("acme_corp");
1006        
1007        let filter = scope.to_filter_ir();
1008        assert!(filter.constrains_field("namespace"));
1009        assert!(filter.constrains_field("tenant_id"));
1010        assert!(!filter.constrains_field("project_id"));
1011    }
1012    
1013    #[test]
1014    fn test_effective_filter() {
1015        let auth = AuthScope::for_namespace("production");
1016        let user_filter = FilterBuilder::new()
1017            .eq("source", "documents")
1018            .time_range("created_at", Some(1000), Some(2000))
1019            .build();
1020        
1021        let effective = auth.to_filter_ir().and(user_filter);
1022        
1023        // Should have namespace + source + time range
1024        assert_eq!(effective.clauses.len(), 3);
1025        assert!(effective.constrains_field("namespace"));
1026        assert!(effective.constrains_field("source"));
1027        assert!(effective.constrains_field("created_at"));
1028    }
1029    
1030    #[test]
1031    fn test_filter_builder() {
1032        let filter = FilterBuilder::new()
1033            .namespace("my_namespace")
1034            .eq("project_id", "proj_123")
1035            .doc_ids(&[1, 2, 3, 4, 5])
1036            .time_range("timestamp", Some(1000), None)
1037            .build();
1038        
1039        assert_eq!(filter.clauses.len(), 4);
1040    }
1041    
1042    #[test]
1043    fn test_filter_simplification() {
1044        // TRUE AND X = X
1045        let filter = FilterIR::from_atom(FilterAtom::True)
1046            .and(FilterIR::from_atom(FilterAtom::eq("x", "y")));
1047        let simplified = filter.simplify();
1048        assert_eq!(simplified.clauses.len(), 1);
1049        
1050        // FALSE AND X = FALSE
1051        let filter2 = FilterIR::from_atom(FilterAtom::False)
1052            .and(FilterIR::from_atom(FilterAtom::eq("x", "y")));
1053        let simplified2 = filter2.simplify();
1054        assert!(simplified2.is_none());
1055    }
1056    
1057    #[test]
1058    fn test_filter_display() {
1059        let filter = FilterBuilder::new()
1060            .eq("namespace", "prod")
1061            .range("timestamp", Some(1000i64), Some(2000i64))
1062            .build();
1063        
1064        let display = filter.to_string();
1065        assert!(display.contains("namespace"));
1066        assert!(display.contains("timestamp"));
1067    }
1068}