Skip to main content

nookdb_core/
query.rs

1//! Query-shaping options (`sort` / `limit` / `offset`) applied by `find`.
2use serde::Deserialize;
3
4/// Sort direction for one sort key.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
6#[serde(rename_all = "lowercase")]
7pub enum SortDir {
8    Asc,
9    Desc,
10}
11
12/// Decoded query options.
13///
14/// Sort is an ordered list of `(field, dir)` pairs; the wire encodes it as a
15/// JSON array so key/priority order is preserved (a JSON object would lose
16/// order through `serde_json`'s map decode).
17#[derive(Debug, Clone, Default, Deserialize)]
18#[serde(default)]
19pub struct QueryOptions {
20    pub sort: Vec<(String, SortDir)>,
21    pub limit: Option<usize>,
22    pub offset: usize,
23}
24
25impl QueryOptions {
26    /// Parses an optional wire `optionsJson`. `None`/empty → default.
27    ///
28    /// # Errors
29    /// Returns `NookError::InvalidArg` if the JSON is malformed or a value
30    /// is out of range (negative / fractional `limit`/`offset`, bad `dir`).
31    pub fn parse(options_json: Option<&str>) -> Result<Self, crate::error::NookError> {
32        match options_json {
33            None => Ok(Self::default()),
34            Some(s) if s.trim().is_empty() => Ok(Self::default()),
35            Some(s) => serde_json::from_str(s).map_err(|e| crate::error::NookError::InvalidArg {
36                msg: format!("invalid query options: {e}"),
37            }),
38        }
39    }
40
41    /// `true` when no sort keys are set.
42    #[must_use]
43    pub fn has_sort(&self) -> bool {
44        !self.sort.is_empty()
45    }
46
47    /// Validates that every sort field exists and is orderable (not an
48    /// array). Callers that sort (`apply`) AND callers that ignore sort but
49    /// must reject an invalid spec (`count`) both run this, so the
50    /// accept/reject decision is identical across read ops.
51    ///
52    /// # Errors
53    /// `NookError::Schema` if a sort field is unknown or non-orderable.
54    pub fn validate_sort_fields<'f>(
55        &self,
56        field_ty: impl Fn(&str) -> Option<&'f crate::schema::ir::FieldType>,
57    ) -> Result<(), crate::error::NookError> {
58        use crate::schema::ir::FieldType;
59        for (field, _) in &self.sort {
60            match field_ty(field) {
61                None => {
62                    return Err(crate::error::NookError::Schema {
63                        msg: format!("cannot sort on unknown field {field:?}"),
64                    })
65                }
66                Some(FieldType::Array(_)) => {
67                    return Err(crate::error::NookError::Schema {
68                        msg: format!("cannot sort on array field {field:?}"),
69                    })
70                }
71                Some(_) => {}
72            }
73        }
74        Ok(())
75    }
76
77    /// Orders `docs` in place by the configured sort keys, then applies
78    /// `offset`/`limit`. Comparison is schema-typed via `field_ty`
79    /// (a lookup `field name → Option<&FieldType>`). null/missing values
80    /// sort LAST regardless of direction; ties break by `id_field` (ascending)
81    /// for deterministic pagination.
82    ///
83    /// # Errors
84    /// `NookError::Schema` if a sort field is unknown or its type is not
85    /// orderable (array).
86    pub fn apply<'f>(
87        &self,
88        mut docs: Vec<serde_json::Value>,
89        id_field: &str,
90        field_ty: impl Fn(&str) -> Option<&'f crate::schema::ir::FieldType>,
91    ) -> Result<Vec<serde_json::Value>, crate::error::NookError> {
92        use std::cmp::Ordering;
93
94        self.validate_sort_fields(field_ty)?;
95
96        if self.has_sort() {
97            docs.sort_by(|a, b| {
98                for (field, dir) in &self.sort {
99                    let av = a.get(field);
100                    let bv = b.get(field);
101                    let ord = cmp_values(av, bv);
102                    // Desc flips only present-vs-present ordering; null/missing
103                    // stays LAST in both directions (handled via `is_absent`).
104                    let ord = if matches!(dir, SortDir::Desc) && !is_absent(av) && !is_absent(bv) {
105                        ord.reverse()
106                    } else {
107                        ord
108                    };
109                    if ord != Ordering::Equal {
110                        return ord;
111                    }
112                }
113                // Deterministic tie-break by id (ascending).
114                let aid = a.get(id_field).and_then(serde_json::Value::as_str);
115                let bid = b.get(id_field).and_then(serde_json::Value::as_str);
116                aid.cmp(&bid)
117            });
118        }
119
120        let start = self.offset.min(docs.len());
121        let mut out = docs.split_off(start);
122        if let Some(limit) = self.limit {
123            out.truncate(limit);
124        }
125        Ok(out)
126    }
127}
128
129/// `true` when a sort value is null or the field is missing — such values
130/// sort LAST regardless of direction. The single definition both
131/// `cmp_values` and `apply`'s desc-reversal consult, so the null-last rule
132/// lives in exactly one place.
133const fn is_absent(v: Option<&serde_json::Value>) -> bool {
134    matches!(v, None | Some(serde_json::Value::Null))
135}
136
137/// Total order across the JSON scalar types we sort, with null/missing LAST.
138///
139/// Present values: numbers compared numerically, everything else (string,
140/// bool, enum, date-as-ISO-string) compared by its natural `serde_json`
141/// ordering via string/bool/number arms. Mixed present types fall back to a
142/// stable type-rank so the sort never panics.
143fn cmp_values(a: Option<&serde_json::Value>, b: Option<&serde_json::Value>) -> std::cmp::Ordering {
144    use serde_json::Value;
145    use std::cmp::Ordering;
146    match (is_absent(a), is_absent(b)) {
147        (true, true) => Ordering::Equal,
148        (true, false) => Ordering::Greater, // null/missing sorts last
149        (false, true) => Ordering::Less,
150        (false, false) => {
151            let (a, b) = (a.unwrap(), b.unwrap());
152            match (a, b) {
153                (Value::Number(x), Value::Number(y)) => cmp_numbers(x, y),
154                (Value::String(x), Value::String(y)) => x.cmp(y),
155                (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
156                _ => type_rank(a).cmp(&type_rank(b)),
157            }
158        }
159    }
160}
161
162/// Orders two JSON numbers. Integers are compared EXACTLY (via `i64`/`u64`)
163/// so values above `2^53` don't collapse to `Equal` through an `f64` cast;
164/// only when a side isn't integer-representable (or signs differ across the
165/// i64/u64 split) do we fall back to `f64`.
166fn cmp_numbers(x: &serde_json::Number, y: &serde_json::Number) -> std::cmp::Ordering {
167    use std::cmp::Ordering;
168    if let (Some(a), Some(b)) = (x.as_i64(), y.as_i64()) {
169        return a.cmp(&b);
170    }
171    if let (Some(a), Some(b)) = (x.as_u64(), y.as_u64()) {
172        return a.cmp(&b);
173    }
174    match (x.as_f64(), y.as_f64()) {
175        (Some(a), Some(b)) => a.partial_cmp(&b).unwrap_or(Ordering::Equal),
176        _ => Ordering::Equal,
177    }
178}
179
180/// Stable rank for mixed present types (keeps `sort_by` total + panic-free).
181const fn type_rank(v: &serde_json::Value) -> u8 {
182    use serde_json::Value;
183    match v {
184        Value::Bool(_) => 0,
185        Value::Number(_) => 1,
186        Value::String(_) => 2,
187        _ => 3,
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194
195    #[test]
196    fn none_and_empty_decode_to_default() {
197        assert_eq!(QueryOptions::parse(None).unwrap().offset, 0);
198        assert!(QueryOptions::parse(Some("")).unwrap().sort.is_empty());
199        assert!(QueryOptions::parse(Some("  ")).unwrap().limit.is_none());
200    }
201
202    #[test]
203    fn decodes_sort_pairs_in_order() {
204        let o = QueryOptions::parse(Some(
205            r#"{"sort":[["status","asc"],["updatedAt","desc"]],"limit":50,"offset":10}"#,
206        ))
207        .unwrap();
208        assert_eq!(o.sort.len(), 2);
209        assert_eq!(o.sort[0], ("status".to_string(), SortDir::Asc));
210        assert_eq!(o.sort[1], ("updatedAt".to_string(), SortDir::Desc));
211        assert_eq!(o.limit, Some(50));
212        assert_eq!(o.offset, 10);
213    }
214
215    #[test]
216    fn rejects_negative_limit() {
217        assert!(QueryOptions::parse(Some(r#"{"limit":-1}"#)).is_err());
218    }
219
220    #[test]
221    fn rejects_fractional_offset() {
222        assert!(QueryOptions::parse(Some(r#"{"offset":1.5}"#)).is_err());
223    }
224
225    #[test]
226    fn rejects_unknown_direction() {
227        assert!(QueryOptions::parse(Some(r#"{"sort":[["a","up"]]}"#)).is_err());
228    }
229
230    use crate::schema::ir::FieldType;
231    use serde_json::json;
232
233    // Returns `Option` to match the `field_ty: Fn(&str) -> Option<&FieldType>`
234    // shape that `apply` consumes (a real lookup can miss); the wrap is not
235    // redundant at the call site.
236    #[allow(clippy::unnecessary_wraps)]
237    fn num_ty(_f: &str) -> Option<&'static FieldType> {
238        // leaked once; fine for tests
239        Some(Box::leak(Box::new(FieldType::Number)))
240    }
241
242    #[test]
243    fn sorts_numbers_asc_with_nulls_last() {
244        let o = QueryOptions::parse(Some(r#"{"sort":[["n","asc"]]}"#)).unwrap();
245        let docs = vec![
246            json!({"id":"a","n":3}),
247            json!({"id":"b"}),
248            json!({"id":"c","n":1}),
249            json!({"id":"d","n":2}),
250        ];
251        let out = o.apply(docs, "id", num_ty).unwrap();
252        let ns: Vec<_> = out.iter().map(|d| d.get("n").cloned()).collect();
253        assert_eq!(
254            ns,
255            vec![Some(json!(1)), Some(json!(2)), Some(json!(3)), None]
256        );
257    }
258
259    #[test]
260    fn sorts_desc_keeps_nulls_last() {
261        let o = QueryOptions::parse(Some(r#"{"sort":[["n","desc"]]}"#)).unwrap();
262        let docs = vec![
263            json!({"id":"a","n":1}),
264            json!({"id":"b"}),
265            json!({"id":"c","n":3}),
266        ];
267        let out = o.apply(docs, "id", num_ty).unwrap();
268        let ns: Vec<_> = out.iter().map(|d| d.get("n").cloned()).collect();
269        assert_eq!(ns, vec![Some(json!(3)), Some(json!(1)), None]);
270    }
271
272    #[test]
273    fn sorts_large_integers_exactly() {
274        // Two distinct integers above 2^53 that collapse to the same f64.
275        // The old `as_f64` comparison ranked them Equal (then id tie-break);
276        // exact i64 comparison must order 992 before 993.
277        let o = QueryOptions::parse(Some(r#"{"sort":[["n","asc"]]}"#)).unwrap();
278        let docs = vec![
279            json!({"id":"a","n": 9_007_199_254_740_993_i64}),
280            json!({"id":"b","n": 9_007_199_254_740_992_i64}),
281        ];
282        let out = o.apply(docs, "id", num_ty).unwrap();
283        let ids: Vec<_> = out
284            .iter()
285            .map(|d| d["id"].as_str().unwrap().to_string())
286            .collect();
287        assert_eq!(ids, vec!["b", "a"]);
288    }
289
290    #[test]
291    fn ties_break_by_id_ascending() {
292        let o = QueryOptions::parse(Some(r#"{"sort":[["n","asc"]]}"#)).unwrap();
293        let docs = vec![json!({"id":"z","n":1}), json!({"id":"a","n":1})];
294        let out = o.apply(docs, "id", num_ty).unwrap();
295        let ids: Vec<_> = out
296            .iter()
297            .map(|d| d["id"].as_str().unwrap().to_string())
298            .collect();
299        assert_eq!(ids, vec!["a", "z"]);
300    }
301
302    #[test]
303    fn offset_and_limit_after_sort() {
304        let o =
305            QueryOptions::parse(Some(r#"{"sort":[["n","asc"]],"offset":1,"limit":2}"#)).unwrap();
306        let docs = vec![
307            json!({"id":"a","n":4}),
308            json!({"id":"b","n":1}),
309            json!({"id":"c","n":3}),
310            json!({"id":"d","n":2}),
311        ];
312        let out = o.apply(docs, "id", num_ty).unwrap();
313        let ns: Vec<_> = out.iter().map(|d| d["n"].as_i64().unwrap()).collect();
314        assert_eq!(ns, vec![2, 3]);
315    }
316
317    #[test]
318    fn limit_zero_is_empty_and_offset_past_end_is_empty() {
319        let z = QueryOptions::parse(Some(r#"{"limit":0}"#)).unwrap();
320        assert!(z
321            .apply(vec![json!({"id":"a"})], "id", num_ty)
322            .unwrap()
323            .is_empty());
324        let past = QueryOptions::parse(Some(r#"{"offset":9}"#)).unwrap();
325        assert!(past
326            .apply(vec![json!({"id":"a"})], "id", num_ty)
327            .unwrap()
328            .is_empty());
329    }
330
331    #[test]
332    fn rejects_sort_on_unknown_field() {
333        let o = QueryOptions::parse(Some(r#"{"sort":[["x","asc"]]}"#)).unwrap();
334        let err = o.apply(vec![], "id", |_| None).unwrap_err();
335        assert!(matches!(err, crate::error::NookError::Schema { .. }));
336    }
337
338    #[test]
339    fn rejects_sort_on_array_field() {
340        let o = QueryOptions::parse(Some(r#"{"sort":[["tags","asc"]]}"#)).unwrap();
341        let arr = Box::leak(Box::new(FieldType::Array(Box::new(FieldType::String))));
342        let err = o.apply(vec![], "id", |_| Some(arr)).unwrap_err();
343        assert!(matches!(err, crate::error::NookError::Schema { .. }));
344    }
345}