Skip to main content

qexpr/
lib.rs

1//! `qexpr`: typed query expressions (query algebra).
2//!
3//! Goal: provide a small, stable AST for common retrieval query operators without
4//! committing to any particular index backend or scoring model.
5//!
6//! This is intentionally **not** a parser. Parsing (syntax) is product-specific.
7//! This crate is about a shared, typed *meaning* that multiple systems can compile
8//! down to their preferred execution plan.
9
10#![warn(missing_docs)]
11
12#[cfg(feature = "serde")]
13use serde::{Deserialize, Serialize};
14
15/// A query expression for retrieval.
16#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
17#[derive(Debug, Clone, PartialEq, Eq, Hash)]
18pub enum QExpr {
19    /// A single term.
20    Term(Term),
21    /// A phrase (ordered sequence of terms).
22    ///
23    /// Semantics require positional information in the target index to evaluate exactly.
24    Phrase(Phrase),
25    /// Proximity query: terms must occur within a window.
26    ///
27    /// This is the semantic payload behind operators like `NEAR/k`.
28    /// Evaluation requires positional information in the target index (or a verifier stage).
29    Near(Near),
30    /// Conjunction: all children must match.
31    And(Vec<QExpr>),
32    /// Disjunction: any child may match.
33    Or(Vec<QExpr>),
34    /// Negation: exclude matches of inner expression.
35    Not(Box<QExpr>),
36    /// Field scoping (e.g. `title:term`).
37    ///
38    /// Evaluation requires field-aware indexing (or a compiler that rewrites into field-specific terms).
39    Field(FieldName, Box<QExpr>),
40}
41
42/// A normalized term token.
43#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
44#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
45pub struct Term(pub String);
46
47impl Term {
48    /// Create a term (caller is responsible for normalization/tokenization policy).
49    pub fn new(s: impl Into<String>) -> Self {
50        Self(s.into())
51    }
52
53    /// Returns true if the term is empty or whitespace.
54    pub fn is_blank(&self) -> bool {
55        self.0.trim().is_empty()
56    }
57}
58
59/// A phrase of ordered terms.
60#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
61#[derive(Debug, Clone, PartialEq, Eq, Hash)]
62pub struct Phrase {
63    /// Ordered terms.
64    pub terms: Vec<Term>,
65}
66
67impl Phrase {
68    /// Create a phrase.
69    pub fn new(terms: Vec<Term>) -> Self {
70        Self { terms }
71    }
72
73    /// Returns true if the phrase has no terms (or all terms are blank).
74    pub fn is_blank(&self) -> bool {
75        self.terms.is_empty() || self.terms.iter().all(|t| t.is_blank())
76    }
77}
78
79/// A proximity query over ordered terms.
80///
81/// This represents constraints like “the terms occur within `window` tokens”.
82#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
83#[derive(Debug, Clone, PartialEq, Eq, Hash)]
84pub struct Near {
85    /// Terms participating in the proximity constraint.
86    ///
87    /// Must have length >= 2.
88    pub terms: Vec<Term>,
89    /// Window size in tokens.
90    ///
91    /// Interpretation: there exists an assignment of positions (one per term occurrence)
92    /// such that `max(pos) - min(pos) <= window`.
93    pub window: u32,
94    /// If true, enforce term order (like an ordered NEAR / “WITHIN k in order”).
95    ///
96    /// If false, order is ignored (unordered NEAR/k).
97    pub ordered: bool,
98}
99
100impl Near {
101    /// Create a proximity query.
102    pub fn new(terms: Vec<Term>, window: u32, ordered: bool) -> Self {
103        Self {
104            terms,
105            window,
106            ordered,
107        }
108    }
109
110    /// Returns true if the constraint is structurally blank/invalid.
111    pub fn is_blank(&self) -> bool {
112        self.terms.len() < 2 || self.terms.iter().all(|t| t.is_blank()) || self.window == 0
113    }
114}
115
116/// A field name.
117#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
118#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
119pub struct FieldName(pub String);
120
121impl FieldName {
122    /// Create a field name.
123    pub fn new(s: impl Into<String>) -> Self {
124        Self(s.into())
125    }
126
127    /// Returns true if the field name is empty or whitespace.
128    pub fn is_blank(&self) -> bool {
129        self.0.trim().is_empty()
130    }
131}
132
133/// Structural validation errors for `QExpr`.
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub enum ValidateError {
136    /// A `Term` node contained a blank term.
137    BlankTerm,
138    /// A `Phrase` node contained no usable terms.
139    BlankPhrase,
140    /// A `Near` node contained fewer than 2 usable terms or an invalid window.
141    BlankNear,
142    /// An `And`/`Or` node had no children.
143    EmptyJunction,
144    /// A `Field` node had a blank field name.
145    BlankFieldName,
146}
147
148/// Validate a query expression for basic structural invariants.
149///
150/// This does **not** attempt semantic checks like "is phrase supported by the target index".
151pub fn validate(expr: &QExpr) -> Result<(), ValidateError> {
152    match expr {
153        QExpr::Term(t) => {
154            if t.is_blank() {
155                Err(ValidateError::BlankTerm)
156            } else {
157                Ok(())
158            }
159        }
160        QExpr::Phrase(p) => {
161            if p.is_blank() {
162                Err(ValidateError::BlankPhrase)
163            } else {
164                Ok(())
165            }
166        }
167        QExpr::Near(n) => {
168            if n.is_blank() {
169                Err(ValidateError::BlankNear)
170            } else {
171                Ok(())
172            }
173        }
174        QExpr::And(xs) | QExpr::Or(xs) => {
175            if xs.is_empty() {
176                return Err(ValidateError::EmptyJunction);
177            }
178            for x in xs {
179                validate(x)?;
180            }
181            Ok(())
182        }
183        QExpr::Not(x) => validate(x),
184        QExpr::Field(name, inner) => {
185            if name.is_blank() {
186                return Err(ValidateError::BlankFieldName);
187            }
188            validate(inner)
189        }
190    }
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    #[test]
198    fn validate_rejects_blank_term() {
199        assert_eq!(
200            validate(&QExpr::Term(Term::new("  "))).unwrap_err(),
201            ValidateError::BlankTerm
202        );
203    }
204
205    #[test]
206    fn validate_rejects_blank_phrase() {
207        assert_eq!(
208            validate(&QExpr::Phrase(Phrase::new(vec![]))).unwrap_err(),
209            ValidateError::BlankPhrase
210        );
211        assert_eq!(
212            validate(&QExpr::Phrase(Phrase::new(vec![Term::new("  ")]))).unwrap_err(),
213            ValidateError::BlankPhrase
214        );
215    }
216
217    #[test]
218    fn validate_rejects_blank_near() {
219        assert_eq!(
220            validate(&QExpr::Near(Near::new(
221                vec![Term::new("a"), Term::new("b")],
222                0,
223                false
224            )))
225            .unwrap_err(),
226            ValidateError::BlankNear
227        );
228        assert_eq!(
229            validate(&QExpr::Near(Near::new(vec![Term::new("a")], 5, false))).unwrap_err(),
230            ValidateError::BlankNear
231        );
232    }
233
234    #[test]
235    fn validate_rejects_empty_junctions() {
236        assert_eq!(
237            validate(&QExpr::And(vec![])).unwrap_err(),
238            ValidateError::EmptyJunction
239        );
240        assert_eq!(
241            validate(&QExpr::Or(vec![])).unwrap_err(),
242            ValidateError::EmptyJunction
243        );
244    }
245
246    #[test]
247    fn validate_rejects_blank_field_name() {
248        let q = QExpr::Field(FieldName::new(" "), Box::new(QExpr::Term(Term::new("x"))));
249        assert_eq!(validate(&q).unwrap_err(), ValidateError::BlankFieldName);
250    }
251
252    #[test]
253    fn validate_accepts_non_blank_tree() {
254        let q = QExpr::And(vec![
255            QExpr::Term(Term::new("alpha")),
256            QExpr::Phrase(Phrase::new(vec![Term::new("new"), Term::new("york")])),
257            // A phrase is allowed to contain blank terms as long as it contains at least one
258            // non-blank term; normalization is caller-defined.
259            QExpr::Phrase(Phrase::new(vec![Term::new("  "), Term::new("x")])),
260            QExpr::Near(Near::new(
261                vec![Term::new("deep"), Term::new("learning")],
262                5,
263                false,
264            )),
265        ]);
266        validate(&q).unwrap();
267    }
268}