Skip to main content

luci/query/
parser.rs

1//! JSON query parser — converts ES-compatible JSON to [`ScoringExpression`] trees.
2//!
3//! Supports both the full object form and shorthand forms:
4//! - `{"term": {"status": {"value": "active"}}}` (full)
5//! - `{"term": {"status": "active"}}` (shorthand)
6//! - `{"match": {"title": {"query": "search engine"}}}` (full)
7//! - `{"match": {"title": "search engine"}}` (shorthand)
8//!
9//! See [[query-dsl]] and [[architecture-query-execution#Step 5]].
10
11use crate::core::{LuciError, Result};
12use serde_json::Value;
13
14use super::ast::{
15    FieldValueModifier, FunctionBoostMode, FunctionScoreMode, FusionMethod, GeoShapeValue,
16    QueryExpression, RankingExpression, ScoreFunction, ScoringExpression, SpanExpression,
17    SpatialRelation,
18};
19
20/// Reject keys outside the expected set for a given query body.
21///
22/// Preserves the speed of `Value::get()` while still rejecting typos
23/// — see [[fix-strict-search-parsing]].
24fn validate_keys<'a>(
25    node: &'a Value,
26    expected: &[&str],
27    ctx: &str,
28) -> Result<&'a serde_json::Map<String, Value>> {
29    let obj = node
30        .as_object()
31        .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: must be an object")))?;
32    for key in obj.keys() {
33        if !expected.contains(&key.as_str()) {
34            let expected_list = expected
35                .iter()
36                .map(|k| format!("`{k}`"))
37                .collect::<Vec<_>>()
38                .join(", ");
39            return Err(LuciError::InvalidQuery(format!(
40                "{ctx}: unknown field `{key}`, expected one of {expected_list}"
41            )));
42        }
43    }
44    Ok(obj)
45}
46
47/// Read an optional unsigned-integer field, erroring when the key is
48/// present with a non-integer value.
49///
50/// Absent or JSON `null` yields `Ok(None)` so a *documented default* can
51/// apply — that is honest. But a value the caller *did* specify with the
52/// wrong type must never be silently replaced by the default: it returns
53/// an explicit error. See [[code-must-not-lie]].
54pub(crate) fn opt_u64(
55    obj: &serde_json::Map<String, Value>,
56    key: &str,
57    ctx: &str,
58) -> Result<Option<u64>> {
59    match obj.get(key) {
60        Some(v) if !v.is_null() => v.as_u64().map(Some).ok_or_else(|| {
61            LuciError::InvalidQuery(format!(
62                "{ctx}: \"{key}\" must be a non-negative integer, got {v}"
63            ))
64        }),
65        _ => Ok(None),
66    }
67}
68
69/// Read an optional floating-point field, erroring when the key is present
70/// with a non-numeric value. Mirrors [`opt_u64`] for `f64`-typed options
71/// (boosts, thresholds, factors). See [[code-must-not-lie]].
72pub(crate) fn opt_f64(
73    obj: &serde_json::Map<String, Value>,
74    key: &str,
75    ctx: &str,
76) -> Result<Option<f64>> {
77    match obj.get(key) {
78        Some(v) if !v.is_null() => v.as_f64().map(Some).ok_or_else(|| {
79            LuciError::InvalidQuery(format!("{ctx}: \"{key}\" must be a number, got {v}"))
80        }),
81        _ => Ok(None),
82    }
83}
84
85/// Read an optional string field, erroring when the key is present with a
86/// non-string value. Mirrors [`opt_u64`] for `&str`-typed options
87/// (analyzers, enum discriminants like `score_mode`/`order`). The borrow
88/// is tied to `obj` so callers `.map(String::from)` when they need owned
89/// data. See [[code-must-not-lie]].
90pub(crate) fn opt_str<'a>(
91    obj: &'a serde_json::Map<String, Value>,
92    key: &str,
93    ctx: &str,
94) -> Result<Option<&'a str>> {
95    match obj.get(key) {
96        Some(v) if !v.is_null() => v.as_str().map(Some).ok_or_else(|| {
97            LuciError::InvalidQuery(format!("{ctx}: \"{key}\" must be a string, got {v}"))
98        }),
99        _ => Ok(None),
100    }
101}
102
103/// Read an optional boolean field, erroring when the key is present with a
104/// non-boolean value. Mirrors [`opt_u64`] for `bool`-typed options
105/// (`in_order`, …). Booleans are strict in ES too — only JSON `true`/
106/// `false`, never a quoted string. See [[code-must-not-lie]].
107pub(crate) fn opt_bool(
108    obj: &serde_json::Map<String, Value>,
109    key: &str,
110    ctx: &str,
111) -> Result<Option<bool>> {
112    match obj.get(key) {
113        Some(v) if !v.is_null() => v.as_bool().map(Some).ok_or_else(|| {
114            LuciError::InvalidQuery(format!("{ctx}: \"{key}\" must be a boolean, got {v}"))
115        }),
116        _ => Ok(None),
117    }
118}
119
120// Strictness for the query DSL is implemented via [`validate_keys`]:
121// each per-query parser enumerates its known keys, and unrecognised
122// keys surface as a `LuciError::InvalidQuery` listing the valid ones.
123// Sub-queries recurse through the dispatcher, which re-validates at
124// each layer.
125
126/// Parse a JSON query into a `QueryExpression`.
127///
128/// Accepts either `{"query": {...}}` wrapper or a bare query object.
129/// Returns `QueryExpression::Scoring` for standard queries, or
130/// `QueryExpression::Ranking` for fusion queries.
131///
132/// See [[feature-rrf-retrievers]].
133pub fn parse_query_expression(json: &Value) -> Result<QueryExpression> {
134    let query_obj = if let Some(q) = json.get("query") {
135        q
136    } else {
137        json
138    };
139    parse_query_expression_node(query_obj)
140}
141
142/// Parse a query node that may be a scoring query or a ranking expression.
143fn parse_query_expression_node(node: &Value) -> Result<QueryExpression> {
144    let obj = node
145        .as_object()
146        .ok_or_else(|| LuciError::InvalidQuery("query must be a JSON object".into()))?;
147
148    // Check for ranking expressions first
149    if let Some(v) = obj.get("fusion") {
150        return parse_fusion_query(v);
151    }
152
153    // Everything else is a scoring expression
154    Ok(QueryExpression::Scoring(parse_query_node(node)?))
155}
156
157/// Parse a JSON query into a `ScoringExpression`.
158///
159/// Accepts either `{"query": {...}}` wrapper or a bare query object.
160pub fn parse_query(json: &Value) -> Result<ScoringExpression> {
161    // Unwrap optional "query" wrapper
162    let query_obj = if let Some(q) = json.get("query") {
163        q
164    } else {
165        json
166    };
167
168    parse_query_node(query_obj)
169}
170
171fn parse_query_node(node: &Value) -> Result<ScoringExpression> {
172    let obj = node
173        .as_object()
174        .ok_or_else(|| LuciError::InvalidQuery("query must be a JSON object".into()))?;
175
176    if obj.is_empty() {
177        return Err(LuciError::InvalidQuery("empty query object".into()));
178    }
179
180    // Each query type is identified by its key
181    if let Some(v) = obj.get("term") {
182        return parse_term_query(v);
183    }
184    if let Some(v) = obj.get("terms") {
185        return parse_terms_query(v);
186    }
187    if let Some(v) = obj.get("match") {
188        return parse_match_query(v);
189    }
190    if let Some(v) = obj.get("match_phrase") {
191        return parse_match_phrase_query(v);
192    }
193    if let Some(v) = obj.get("match_bool_prefix") {
194        return parse_match_bool_prefix_query(v);
195    }
196    if let Some(v) = obj.get("multi_match") {
197        return parse_multi_match_query(v);
198    }
199    if let Some(v) = obj.get("bool") {
200        return parse_bool_query(v);
201    }
202    if let Some(v) = obj.get("dis_max") {
203        return parse_dis_max_query(v);
204    }
205    if let Some(v) = obj.get("exists") {
206        return parse_exists_query(v);
207    }
208    if let Some(v) = obj.get("prefix") {
209        return parse_prefix_query(v);
210    }
211    if let Some(v) = obj.get("script_score") {
212        return parse_script_score_query(v);
213    }
214    if let Some(v) = obj.get("function_score") {
215        return parse_function_score_query(v);
216    }
217    if let Some(v) = obj.get("boosting") {
218        return parse_boosting_query(v);
219    }
220    if let Some(v) = obj.get("fuzzy") {
221        return parse_fuzzy_query(v);
222    }
223    if let Some(v) = obj.get("regexp") {
224        return parse_regexp_query(v);
225    }
226    if let Some(v) = obj.get("wildcard") {
227        return parse_wildcard_query(v);
228    }
229    if let Some(v) = obj.get("range") {
230        return parse_range_query(v);
231    }
232    if let Some(v) = obj.get("span_term") {
233        return parse_span_term_query(v);
234    }
235    if let Some(v) = obj.get("span_near") {
236        return parse_span_near_query(v);
237    }
238    if let Some(v) = obj.get("span_not") {
239        return parse_span_not_query(v);
240    }
241    if let Some(v) = obj.get("span_first") {
242        return parse_span_first_query(v);
243    }
244    if let Some(v) = obj.get("constant_score") {
245        return parse_constant_score_query(v);
246    }
247    if let Some(v) = obj.get("nested") {
248        return parse_nested_query(v);
249    }
250    if let Some(v) = obj.get("geo_distance") {
251        return parse_geo_distance_query(v);
252    }
253    if let Some(v) = obj.get("geo_bounding_box") {
254        return parse_geo_bbox_query(v);
255    }
256    if let Some(v) = obj.get("geo_shape") {
257        return parse_geo_shape_query(v);
258    }
259    if let Some(v) = obj.get("knn") {
260        return parse_knn_query(v);
261    }
262    if obj.contains_key("match_all") {
263        return Ok(ScoringExpression::MatchAll);
264    }
265    if obj.contains_key("match_none") {
266        return Ok(ScoringExpression::MatchNone);
267    }
268
269    let key = obj.keys().next().unwrap();
270    Err(LuciError::InvalidQuery(format!(
271        "unknown query type: {key}"
272    )))
273}
274
275/// Wrap a query expression with a Boost if boost is Some and != 1.0.
276fn maybe_boost(ast: ScoringExpression, boost: Option<f64>) -> ScoringExpression {
277    match boost {
278        Some(b) if (b - 1.0).abs() > f64::EPSILON => ScoringExpression::Boost {
279            query: Box::new(ast),
280            boost: b as f32,
281        },
282        _ => ast,
283    }
284}
285
286/// Parse `{"field": "value"}` or `{"field": {"value": "..."}}`.
287fn parse_term_query(node: &Value) -> Result<ScoringExpression> {
288    let obj = node
289        .as_object()
290        .ok_or_else(|| LuciError::InvalidQuery("term query must be an object".into()))?;
291
292    let (field, field_val) = obj
293        .iter()
294        .next()
295        .ok_or_else(|| LuciError::InvalidQuery("term query: missing field".into()))?;
296
297    let (value, boost) = match field_val {
298        Value::String(s) => (s.clone(), None),
299        Value::Number(n) => (n.to_string(), None),
300        Value::Bool(b) => (b.to_string(), None),
301        Value::Object(_) => {
302            let ctx = format!("term[{field}]");
303            let inner = validate_keys(field_val, &["value", "boost"], &ctx)?;
304            let v = inner
305                .get("value")
306                .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: missing 'value' field")))?;
307            let value = match v {
308                Value::String(s) => s.clone(),
309                Value::Number(n) => n.to_string(),
310                Value::Bool(b) => b.to_string(),
311                _ => {
312                    return Err(LuciError::InvalidQuery(format!(
313                        "{ctx}: 'value' must be a string, number, or bool"
314                    )));
315                }
316            };
317            (value, opt_f64(inner, "boost", &ctx)?)
318        }
319        _ => {
320            return Err(LuciError::InvalidQuery(
321                "term query: invalid value type".into(),
322            ));
323        }
324    };
325
326    Ok(maybe_boost(
327        ScoringExpression::Term {
328            field: field.clone(),
329            value,
330        },
331        boost,
332    ))
333}
334
335fn parse_terms_query(node: &Value) -> Result<ScoringExpression> {
336    let obj = node
337        .as_object()
338        .ok_or_else(|| LuciError::InvalidQuery("terms query must be an object".into()))?;
339
340    let (field, values_val) = obj
341        .iter()
342        .next()
343        .ok_or_else(|| LuciError::InvalidQuery("terms query: missing field".into()))?;
344
345    let arr = values_val
346        .as_array()
347        .ok_or_else(|| LuciError::InvalidQuery("terms query: values must be an array".into()))?;
348
349    let values: Vec<String> = arr
350        .iter()
351        .map(|v| match v {
352            Value::String(s) => Ok(s.clone()),
353            Value::Number(n) => Ok(n.to_string()),
354            Value::Bool(b) => Ok(b.to_string()),
355            _ => Err(LuciError::InvalidQuery(
356                "terms query: invalid value type in array".into(),
357            )),
358        })
359        .collect::<Result<_>>()?;
360
361    Ok(ScoringExpression::Terms {
362        field: field.clone(),
363        values,
364    })
365}
366
367/// Parse `{"field": "query"}` or `{"field": {"query": "...", "analyzer": "..."}}`.
368fn parse_match_query(node: &Value) -> Result<ScoringExpression> {
369    let obj = node
370        .as_object()
371        .ok_or_else(|| LuciError::InvalidQuery("match query must be an object".into()))?;
372
373    let (field, field_val) = obj
374        .iter()
375        .next()
376        .ok_or_else(|| LuciError::InvalidQuery("match query: missing field".into()))?;
377
378    let (query, analyzer, boost) = match field_val {
379        Value::String(s) => (s.clone(), None, None),
380        Value::Object(_) => {
381            let ctx = format!("match[{field}]");
382            let inner = validate_keys(field_val, &["query", "analyzer", "boost"], &ctx)?;
383            let q = inner
384                .get("query")
385                .and_then(|v| v.as_str())
386                .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: missing 'query' field")))?
387                .to_string();
388            let a = opt_str(inner, "analyzer", &ctx)?.map(String::from);
389            (q, a, opt_f64(inner, "boost", &ctx)?)
390        }
391        _ => {
392            return Err(LuciError::InvalidQuery(
393                "match query: invalid field value".into(),
394            ));
395        }
396    };
397
398    Ok(maybe_boost(
399        ScoringExpression::Match {
400            field: field.clone(),
401            query,
402            analyzer,
403        },
404        boost,
405    ))
406}
407
408fn parse_match_phrase_query(node: &Value) -> Result<ScoringExpression> {
409    let obj = node
410        .as_object()
411        .ok_or_else(|| LuciError::InvalidQuery("match_phrase query must be an object".into()))?;
412
413    let (field, field_val) = obj
414        .iter()
415        .next()
416        .ok_or_else(|| LuciError::InvalidQuery("match_phrase query: missing field".into()))?;
417
418    let (query, analyzer) = match field_val {
419        Value::String(s) => (s.clone(), None),
420        Value::Object(_) => {
421            let ctx = format!("match_phrase[{field}]");
422            let inner = validate_keys(field_val, &["query", "analyzer"], &ctx)?;
423            let q = inner
424                .get("query")
425                .and_then(|v| v.as_str())
426                .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: missing 'query'")))?
427                .to_string();
428            let a = opt_str(inner, "analyzer", &ctx)?.map(String::from);
429            (q, a)
430        }
431        _ => {
432            return Err(LuciError::InvalidQuery(
433                "match_phrase: invalid field value".into(),
434            ));
435        }
436    };
437
438    Ok(ScoringExpression::MatchPhrase {
439        field: field.clone(),
440        query,
441        analyzer,
442    })
443}
444
445fn parse_match_bool_prefix_query(node: &Value) -> Result<ScoringExpression> {
446    let obj = node
447        .as_object()
448        .ok_or_else(|| LuciError::InvalidQuery("match_bool_prefix must be an object".into()))?;
449
450    let (field, field_val) = obj
451        .iter()
452        .next()
453        .ok_or_else(|| LuciError::InvalidQuery("match_bool_prefix: missing field".into()))?;
454
455    let (query, analyzer) = match field_val {
456        Value::String(s) => (s.clone(), None),
457        Value::Object(_) => {
458            let ctx = format!("match_bool_prefix[{field}]");
459            let inner = validate_keys(field_val, &["query", "analyzer"], &ctx)?;
460            let q = inner
461                .get("query")
462                .and_then(|v| v.as_str())
463                .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: missing 'query'")))?
464                .to_string();
465            let a = opt_str(inner, "analyzer", &ctx)?.map(String::from);
466            (q, a)
467        }
468        _ => {
469            return Err(LuciError::InvalidQuery(
470                "match_bool_prefix: invalid field value".into(),
471            ));
472        }
473    };
474
475    Ok(ScoringExpression::MatchBoolPrefix {
476        field: field.clone(),
477        query,
478        analyzer,
479    })
480}
481
482fn parse_bool_query(node: &Value) -> Result<ScoringExpression> {
483    let obj = validate_keys(
484        node,
485        &[
486            "must",
487            "should",
488            "must_not",
489            "filter",
490            "minimum_should_match",
491            "boost",
492        ],
493        "bool",
494    )?;
495
496    let parse_clauses = |key: &str| -> Result<Vec<ScoringExpression>> {
497        match obj.get(key) {
498            None => Ok(Vec::new()),
499            Some(Value::Array(arr)) => arr.iter().map(parse_query_node).collect(),
500            Some(single) => Ok(vec![parse_query_node(single)?]),
501        }
502    };
503
504    let boost = opt_f64(obj, "boost", "bool")?;
505    Ok(maybe_boost(
506        ScoringExpression::Bool {
507            must: parse_clauses("must")?,
508            should: parse_clauses("should")?,
509            must_not: parse_clauses("must_not")?,
510            filter: parse_clauses("filter")?,
511            minimum_should_match: opt_u64(obj, "minimum_should_match", "bool")?.map(|v| v as u32),
512        },
513        boost,
514    ))
515}
516
517fn parse_dis_max_query(node: &Value) -> Result<ScoringExpression> {
518    let obj = validate_keys(node, &["queries", "tie_breaker", "boost"], "dis_max")?;
519    let queries = match obj.get("queries") {
520        Some(Value::Array(arr)) => arr
521            .iter()
522            .map(parse_query_node)
523            .collect::<Result<Vec<_>>>()?,
524        _ => {
525            return Err(LuciError::InvalidQuery(
526                "dis_max: missing 'queries' array".into(),
527            ));
528        }
529    };
530    let tie_breaker = opt_f64(obj, "tie_breaker", "dis_max")?.unwrap_or(0.0) as f32;
531    let boost = opt_f64(obj, "boost", "dis_max")?;
532    Ok(maybe_boost(
533        ScoringExpression::DisMax {
534            queries,
535            tie_breaker,
536        },
537        boost,
538    ))
539}
540
541fn parse_exists_query(node: &Value) -> Result<ScoringExpression> {
542    let obj = validate_keys(node, &["field"], "exists")?;
543    let field = obj
544        .get("field")
545        .and_then(|v| v.as_str())
546        .ok_or_else(|| LuciError::InvalidQuery("exists: missing 'field'".into()))?
547        .to_string();
548    Ok(ScoringExpression::Exists { field })
549}
550
551fn parse_prefix_query(node: &Value) -> Result<ScoringExpression> {
552    let obj = node
553        .as_object()
554        .ok_or_else(|| LuciError::InvalidQuery("prefix query must be an object".into()))?;
555
556    let (field, field_val) = obj
557        .iter()
558        .next()
559        .ok_or_else(|| LuciError::InvalidQuery("prefix query: missing field".into()))?;
560
561    let (value, boost) = match field_val {
562        Value::String(s) => (s.clone(), None),
563        Value::Object(_) => {
564            let ctx = format!("prefix[{field}]");
565            let inner = validate_keys(field_val, &["value", "boost"], &ctx)?;
566            let value = inner
567                .get("value")
568                .and_then(|v| v.as_str())
569                .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: missing 'value'")))?
570                .to_string();
571            (value, opt_f64(inner, "boost", &ctx)?)
572        }
573        _ => {
574            return Err(LuciError::InvalidQuery(
575                "prefix query: invalid value type".into(),
576            ));
577        }
578    };
579
580    Ok(maybe_boost(
581        ScoringExpression::Prefix {
582            field: field.clone(),
583            value,
584        },
585        boost,
586    ))
587}
588
589fn parse_range_query(node: &Value) -> Result<ScoringExpression> {
590    let obj = node
591        .as_object()
592        .ok_or_else(|| LuciError::InvalidQuery("range query must be an object".into()))?;
593
594    let (field, field_val) = obj
595        .iter()
596        .next()
597        .ok_or_else(|| LuciError::InvalidQuery("range query: missing field".into()))?;
598
599    let ctx = format!("range[{field}]");
600    let range_obj = validate_keys(field_val, &["gte", "gt", "lte", "lt", "boost"], &ctx)?;
601    let base = ScoringExpression::Range {
602        field: field.clone(),
603        gte: opt_f64(range_obj, "gte", &ctx)?,
604        gt: opt_f64(range_obj, "gt", &ctx)?,
605        lte: opt_f64(range_obj, "lte", &ctx)?,
606        lt: opt_f64(range_obj, "lt", &ctx)?,
607    };
608    let boost = opt_f64(range_obj, "boost", &ctx)?;
609    Ok(maybe_boost(base, boost))
610}
611
612fn parse_script_score_query(node: &Value) -> Result<ScoringExpression> {
613    let obj = validate_keys(node, &["query", "script"], "script_score")?;
614    let query = match obj.get("query") {
615        Some(q) => parse_query_node(q)?,
616        None => ScoringExpression::MatchAll,
617    };
618    let script_val = obj
619        .get("script")
620        .ok_or_else(|| LuciError::InvalidQuery("script_score: missing 'script' object".into()))?;
621    let script_obj = validate_keys(script_val, &["source", "params"], "script_score.script")?;
622    let source = script_obj
623        .get("source")
624        .and_then(|v| v.as_str())
625        .ok_or_else(|| LuciError::InvalidQuery("script_score: missing 'source'".into()))?
626        .to_string();
627    let mut params = std::collections::HashMap::new();
628    if let Some(p) = script_obj.get("params") {
629        let p = p.as_object().ok_or_else(|| {
630            LuciError::InvalidQuery("script_score.script.params: must be an object".into())
631        })?;
632        for (k, v) in p {
633            let n = v.as_f64().ok_or_else(|| {
634                LuciError::InvalidQuery(format!(
635                    "script_score.script.params: \"{k}\" must be a number, got {v}"
636                ))
637            })?;
638            params.insert(k.clone(), n);
639        }
640    }
641    Ok(ScoringExpression::ScriptScore {
642        query: Box::new(query),
643        script: source,
644        params,
645    })
646}
647
648fn parse_function_score_query(node: &Value) -> Result<ScoringExpression> {
649    let obj = validate_keys(
650        node,
651        &[
652            "query",
653            "functions",
654            "field_value_factor",
655            "random_score",
656            "weight",
657            "score_mode",
658            "boost_mode",
659            "boost",
660        ],
661        "function_score",
662    )?;
663
664    let query = match obj.get("query") {
665        Some(q) => parse_query_node(q)?,
666        None => ScoringExpression::MatchAll,
667    };
668
669    let mut functions = Vec::new();
670
671    // Parse "functions" array — each element is a full score-function
672    // object with its own strict shape.
673    if let Some(Value::Array(funcs)) = obj.get("functions") {
674        for func_obj in funcs {
675            if let Some(f) = parse_score_function(func_obj)? {
676                functions.push(f);
677            }
678        }
679    }
680
681    // Parse shorthand score functions at the top level.
682    if let Some(fvf) = obj.get("field_value_factor") {
683        functions.push(parse_field_value_factor(fvf)?);
684    }
685    if let Some(rs) = obj.get("random_score") {
686        functions.push(parse_random_score(rs)?);
687    }
688    if let Some(weight) = opt_f64(obj, "weight", "function_score")? {
689        functions.push(ScoreFunction::Weight(weight as f32));
690    }
691
692    let score_mode = match opt_str(obj, "score_mode", "function_score")? {
693        Some("multiply") | None => FunctionScoreMode::Multiply,
694        Some("sum") => FunctionScoreMode::Sum,
695        Some("avg") => FunctionScoreMode::Avg,
696        Some("first") => FunctionScoreMode::First,
697        Some("max") => FunctionScoreMode::Max,
698        Some("min") => FunctionScoreMode::Min,
699        Some(other) => {
700            return Err(LuciError::InvalidQuery(format!(
701                "function_score: unknown score_mode '{other}'"
702            )));
703        }
704    };
705
706    let boost_mode = match opt_str(obj, "boost_mode", "function_score")? {
707        Some("multiply") | None => FunctionBoostMode::Multiply,
708        Some("replace") => FunctionBoostMode::Replace,
709        Some("sum") => FunctionBoostMode::Sum,
710        Some("avg") => FunctionBoostMode::Avg,
711        Some("max") => FunctionBoostMode::Max,
712        Some("min") => FunctionBoostMode::Min,
713        Some(other) => {
714            return Err(LuciError::InvalidQuery(format!(
715                "function_score: unknown boost_mode '{other}'"
716            )));
717        }
718    };
719
720    let boost = opt_f64(obj, "boost", "function_score")?;
721    let base = ScoringExpression::FunctionScore {
722        query: Box::new(query),
723        functions,
724        score_mode,
725        boost_mode,
726    };
727    Ok(maybe_boost(base, boost))
728}
729
730fn parse_score_function(node: &Value) -> Result<Option<ScoreFunction>> {
731    // `query` is accepted as a filter on the function but is not yet
732    // plumbed through to [`ScoreFunction`]; silent acceptance is
733    // intentional here.
734    let obj = validate_keys(
735        node,
736        &["query", "field_value_factor", "random_score", "weight"],
737        "function_score.functions[]",
738    )?;
739    if let Some(fvf) = obj.get("field_value_factor") {
740        return Ok(Some(parse_field_value_factor(fvf)?));
741    }
742    if let Some(rs) = obj.get("random_score") {
743        return Ok(Some(parse_random_score(rs)?));
744    }
745    if let Some(weight) = opt_f64(obj, "weight", "function_score.functions[]")? {
746        return Ok(Some(ScoreFunction::Weight(weight as f32)));
747    }
748    Ok(None)
749}
750
751fn parse_field_value_factor(node: &Value) -> Result<ScoreFunction> {
752    let obj = validate_keys(
753        node,
754        &["field", "factor", "modifier", "missing"],
755        "field_value_factor",
756    )?;
757    let field = obj
758        .get("field")
759        .and_then(|v| v.as_str())
760        .ok_or_else(|| LuciError::InvalidQuery("field_value_factor: missing 'field'".into()))?
761        .to_string();
762    let factor = opt_f64(obj, "factor", "field_value_factor")?.unwrap_or(1.0) as f32;
763    let modifier = match opt_str(obj, "modifier", "field_value_factor")? {
764        Some("log1p") => FieldValueModifier::Log1p,
765        Some("log2p") => FieldValueModifier::Log2p,
766        Some("ln1p") => FieldValueModifier::Ln1p,
767        Some("ln2p") => FieldValueModifier::Ln2p,
768        Some("sqrt") => FieldValueModifier::Sqrt,
769        Some("square") => FieldValueModifier::Square,
770        Some("reciprocal") => FieldValueModifier::Reciprocal,
771        None | Some("none") => FieldValueModifier::None,
772        Some(other) => {
773            return Err(LuciError::InvalidQuery(format!(
774                "field_value_factor: unknown modifier '{other}'"
775            )));
776        }
777    };
778    let missing = opt_f64(obj, "missing", "field_value_factor")?.unwrap_or(1.0);
779    Ok(ScoreFunction::FieldValueFactor {
780        field,
781        factor,
782        modifier,
783        missing,
784    })
785}
786
787fn parse_random_score(node: &Value) -> Result<ScoreFunction> {
788    let obj = validate_keys(node, &["seed"], "random_score")?;
789    let seed = opt_u64(obj, "seed", "random_score")?.unwrap_or(0);
790    Ok(ScoreFunction::RandomScore { seed })
791}
792
793fn parse_boosting_query(node: &Value) -> Result<ScoringExpression> {
794    let obj = validate_keys(
795        node,
796        &["positive", "negative", "negative_boost"],
797        "boosting",
798    )?;
799    let positive = obj
800        .get("positive")
801        .ok_or_else(|| LuciError::InvalidQuery("boosting: missing 'positive'".into()))?;
802    let negative = obj
803        .get("negative")
804        .ok_or_else(|| LuciError::InvalidQuery("boosting: missing 'negative'".into()))?;
805    let negative_boost = opt_f64(obj, "negative_boost", "boosting")?.unwrap_or(0.5) as f32;
806    Ok(ScoringExpression::Boosting {
807        positive: Box::new(parse_query_node(positive)?),
808        negative: Box::new(parse_query_node(negative)?),
809        negative_boost,
810    })
811}
812
813fn parse_fuzzy_query(node: &Value) -> Result<ScoringExpression> {
814    let obj = node
815        .as_object()
816        .ok_or_else(|| LuciError::InvalidQuery("fuzzy query must be an object".into()))?;
817    let (field, field_val) = obj
818        .iter()
819        .next()
820        .ok_or_else(|| LuciError::InvalidQuery("fuzzy query: missing field".into()))?;
821    let (value, fuzziness, boost) = match field_val {
822        Value::String(s) => (s.clone(), 2u32, None),
823        Value::Object(_) => {
824            let ctx = format!("fuzzy[{field}]");
825            let inner = validate_keys(field_val, &["value", "fuzziness", "boost"], &ctx)?;
826            let value = inner
827                .get("value")
828                .and_then(|v| v.as_str())
829                .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: missing 'value'")))?
830                .to_string();
831            let fuzziness = opt_u64(inner, "fuzziness", &ctx)?.unwrap_or(2) as u32;
832            (value, fuzziness, opt_f64(inner, "boost", &ctx)?)
833        }
834        other => (other.to_string(), 2u32, None),
835    };
836    Ok(maybe_boost(
837        ScoringExpression::Fuzzy {
838            field: field.clone(),
839            value,
840            fuzziness,
841        },
842        boost,
843    ))
844}
845
846fn parse_regexp_query(node: &Value) -> Result<ScoringExpression> {
847    let obj = node
848        .as_object()
849        .ok_or_else(|| LuciError::InvalidQuery("regexp query must be an object".into()))?;
850    let (field, field_val) = obj
851        .iter()
852        .next()
853        .ok_or_else(|| LuciError::InvalidQuery("regexp query: missing field".into()))?;
854    let (value, boost) = parse_pattern_value(field_val, &format!("regexp[{field}]"))?;
855    Ok(maybe_boost(
856        ScoringExpression::Regexp {
857            field: field.clone(),
858            value,
859        },
860        boost,
861    ))
862}
863
864fn parse_wildcard_query(node: &Value) -> Result<ScoringExpression> {
865    let obj = node
866        .as_object()
867        .ok_or_else(|| LuciError::InvalidQuery("wildcard query must be an object".into()))?;
868    let (field, field_val) = obj
869        .iter()
870        .next()
871        .ok_or_else(|| LuciError::InvalidQuery("wildcard query: missing field".into()))?;
872    let (value, boost) = parse_pattern_value(field_val, &format!("wildcard[{field}]"))?;
873    Ok(maybe_boost(
874        ScoringExpression::Wildcard {
875            field: field.clone(),
876            value,
877        },
878        boost,
879    ))
880}
881
882/// Shared parser for regexp/wildcard field values (`"x"` shorthand or
883/// `{"value": "x", "boost": 2.0}`).
884fn parse_pattern_value(field_val: &Value, ctx: &str) -> Result<(String, Option<f64>)> {
885    match field_val {
886        Value::String(s) => Ok((s.clone(), None)),
887        Value::Object(_) => {
888            let inner = validate_keys(field_val, &["value", "boost"], ctx)?;
889            let value = inner
890                .get("value")
891                .and_then(|v| v.as_str())
892                .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: missing 'value'")))?
893                .to_string();
894            Ok((value, opt_f64(inner, "boost", ctx)?))
895        }
896        other => Ok((other.to_string(), None)),
897    }
898}
899
900fn parse_multi_match_query(node: &Value) -> Result<ScoringExpression> {
901    let obj = validate_keys(
902        node,
903        &["query", "fields", "analyzer", "type", "tie_breaker"],
904        "multi_match",
905    )?;
906    let query = obj
907        .get("query")
908        .and_then(|v| v.as_str())
909        .ok_or_else(|| LuciError::InvalidQuery("multi_match: missing 'query'".into()))?
910        .to_string();
911    let fields = obj
912        .get("fields")
913        .and_then(|v| v.as_array())
914        .ok_or_else(|| LuciError::InvalidQuery("multi_match: missing 'fields' array".into()))?
915        .iter()
916        .map(|v| {
917            v.as_str().map(String::from).ok_or_else(|| {
918                LuciError::InvalidQuery(format!(
919                    "multi_match: fields[] entries must be strings, got {v}"
920                ))
921            })
922        })
923        .collect::<Result<Vec<_>>>()?;
924    let analyzer = opt_str(obj, "analyzer", "multi_match")?.map(String::from);
925    let mm_type = opt_str(obj, "type", "multi_match")?.unwrap_or("best_fields");
926    let default_tie_breaker = match mm_type {
927        "best_fields" => 0.0,
928        "most_fields" | "bool_prefix" => 1.0,
929        other => {
930            return Err(LuciError::InvalidQuery(format!(
931                "multi_match: unsupported type '{other}', expected one of \
932                 best_fields, most_fields, bool_prefix"
933            )));
934        }
935    };
936    let tie_breaker = opt_f64(obj, "tie_breaker", "multi_match")?
937        .map(|v| v as f32)
938        .unwrap_or(default_tie_breaker);
939    Ok(ScoringExpression::MultiMatch {
940        fields,
941        query,
942        analyzer,
943        tie_breaker,
944    })
945}
946
947// Top-level span parsers delegate to the SpanExpression parsers and
948// wrap in ScoringExpression::Span(...). One representation per concept
949// — no variant duplication across the two enums.
950
951fn parse_span_term_query(node: &Value) -> Result<ScoringExpression> {
952    Ok(ScoringExpression::Span(parse_span_expression_term(node)?))
953}
954
955fn parse_span_near_query(node: &Value) -> Result<ScoringExpression> {
956    Ok(ScoringExpression::Span(parse_span_expression_near(node)?))
957}
958
959fn parse_span_not_query(node: &Value) -> Result<ScoringExpression> {
960    Ok(ScoringExpression::Span(parse_span_expression_not(node)?))
961}
962
963fn parse_span_first_query(node: &Value) -> Result<ScoringExpression> {
964    Ok(ScoringExpression::Span(parse_span_expression_first(node)?))
965}
966
967/// Parse a span-typed JSON node into a [`SpanExpression`]. This is
968/// the sole producer of `SpanExpression`, so the AST type guarantees
969/// that anything inside `SpanFirst.query` or `SpanNot.{include,exclude}`
970/// is span-composable. Non-span input is rejected here with a parse
971/// error — validation happens as early as possible, at parse time.
972fn parse_span_expression_node(node: &Value) -> Result<SpanExpression> {
973    let obj = node.as_object().ok_or_else(|| {
974        LuciError::InvalidQuery("span_first/span_not inner must be a span query object".into())
975    })?;
976    if let Some(v) = obj.get("span_term") {
977        return parse_span_expression_term(v);
978    }
979    if let Some(v) = obj.get("span_near") {
980        return parse_span_expression_near(v);
981    }
982    if let Some(v) = obj.get("span_not") {
983        return parse_span_expression_not(v);
984    }
985    if let Some(v) = obj.get("span_first") {
986        return parse_span_expression_first(v);
987    }
988    Err(LuciError::InvalidQuery(
989        "span_first/span_not inner must be one of: span_term, span_near, span_not, span_first"
990            .into(),
991    ))
992}
993
994fn parse_span_expression_term(node: &Value) -> Result<SpanExpression> {
995    let obj = node
996        .as_object()
997        .ok_or_else(|| LuciError::InvalidQuery("span_term must be an object".into()))?;
998    let (field, field_val) = obj
999        .iter()
1000        .next()
1001        .ok_or_else(|| LuciError::InvalidQuery("span_term: missing field".into()))?;
1002    let value = match field_val {
1003        Value::String(s) => s.clone(),
1004        Value::Object(_) => {
1005            let ctx = format!("span_term[{field}]");
1006            let inner = validate_keys(field_val, &["value"], &ctx)?;
1007            inner
1008                .get("value")
1009                .and_then(|v| v.as_str())
1010                .ok_or_else(|| LuciError::InvalidQuery(format!("{ctx}: missing 'value'")))?
1011                .to_string()
1012        }
1013        other => other.to_string(),
1014    };
1015    Ok(SpanExpression::SpanTerm {
1016        field: field.clone(),
1017        value,
1018    })
1019}
1020
1021fn parse_span_expression_near(node: &Value) -> Result<SpanExpression> {
1022    let obj = validate_keys(node, &["clauses", "slop", "in_order"], "span_near")?;
1023    let clauses = obj
1024        .get("clauses")
1025        .and_then(|v| v.as_array())
1026        .ok_or_else(|| LuciError::InvalidQuery("span_near: missing 'clauses'".into()))?;
1027    if clauses.is_empty() {
1028        return Err(LuciError::InvalidQuery("span_near: no clauses".into()));
1029    }
1030    let slop = opt_u64(obj, "slop", "span_near")?
1031        .map(|v| v as u32)
1032        .unwrap_or(0);
1033    let in_order = opt_bool(obj, "in_order", "span_near")?.unwrap_or(true);
1034    let mut field: Option<String> = None;
1035    let mut terms: Vec<String> = Vec::with_capacity(clauses.len());
1036    for clause in clauses {
1037        let inner = clause.get("span_term").ok_or_else(|| {
1038            LuciError::InvalidQuery("span_near.clauses[] must be span_term".into())
1039        })?;
1040        let sub = parse_span_expression_term(inner)?;
1041        let SpanExpression::SpanTerm { field: f, value: v } = sub else {
1042            return Err(LuciError::InvalidQuery(
1043                "span_near.clauses[] must be span_term".into(),
1044            ));
1045        };
1046        if let Some(existing) = &field {
1047            if existing != &f {
1048                return Err(LuciError::InvalidQuery(format!(
1049                    "span_near: all clauses must be on the same field; got {existing:?} and {f:?}"
1050                )));
1051            }
1052        } else {
1053            field = Some(f);
1054        }
1055        terms.push(v);
1056    }
1057    let field = field.ok_or_else(|| LuciError::InvalidQuery("span_near: no clauses".into()))?;
1058    Ok(SpanExpression::SpanNear {
1059        field,
1060        terms,
1061        slop,
1062        in_order,
1063    })
1064}
1065
1066fn parse_span_expression_not(node: &Value) -> Result<SpanExpression> {
1067    let obj = validate_keys(node, &["include", "exclude"], "span_not")?;
1068    let include = obj
1069        .get("include")
1070        .ok_or_else(|| LuciError::InvalidQuery("span_not: missing 'include'".into()))?;
1071    let exclude = obj
1072        .get("exclude")
1073        .ok_or_else(|| LuciError::InvalidQuery("span_not: missing 'exclude'".into()))?;
1074    Ok(SpanExpression::SpanNot {
1075        include: Box::new(parse_span_expression_node(include)?),
1076        exclude: Box::new(parse_span_expression_node(exclude)?),
1077    })
1078}
1079
1080fn parse_span_expression_first(node: &Value) -> Result<SpanExpression> {
1081    let obj = validate_keys(node, &["match", "end"], "span_first")?;
1082    let match_query = obj
1083        .get("match")
1084        .ok_or_else(|| LuciError::InvalidQuery("span_first: missing 'match'".into()))?;
1085    let end = obj
1086        .get("end")
1087        .and_then(|v| v.as_u64())
1088        .ok_or_else(|| LuciError::InvalidQuery("span_first: missing 'end'".into()))?
1089        as u32;
1090    Ok(SpanExpression::SpanFirst {
1091        query: Box::new(parse_span_expression_node(match_query)?),
1092        end,
1093    })
1094}
1095
1096fn parse_constant_score_query(node: &Value) -> Result<ScoringExpression> {
1097    let obj = validate_keys(node, &["filter", "boost"], "constant_score")?;
1098    let filter = obj
1099        .get("filter")
1100        .ok_or_else(|| LuciError::InvalidQuery("constant_score: missing 'filter'".into()))?;
1101    let query = parse_query_node(filter)?;
1102    let boost = opt_f64(obj, "boost", "constant_score")?
1103        .map(|f| f as f32)
1104        .unwrap_or(1.0);
1105    Ok(ScoringExpression::ConstantScore {
1106        query: Box::new(query),
1107        boost,
1108    })
1109}
1110
1111fn parse_nested_query(node: &Value) -> Result<ScoringExpression> {
1112    let obj = validate_keys(node, &["path", "query", "inner_hits"], "nested")?;
1113    let path = obj
1114        .get("path")
1115        .and_then(|v| v.as_str())
1116        .ok_or_else(|| LuciError::InvalidQuery("nested: missing 'path'".into()))?
1117        .to_string();
1118    let query = obj
1119        .get("query")
1120        .ok_or_else(|| LuciError::InvalidQuery("nested: missing 'query'".into()))?;
1121    let inner_hits = match obj.get("inner_hits") {
1122        Some(ih) => {
1123            let ih_obj = validate_keys(ih, &["name", "size", "from"], "nested.inner_hits")?;
1124            Some(crate::query::ast::InnerHitsConfig {
1125                name: opt_str(ih_obj, "name", "nested.inner_hits")?.map(String::from),
1126                size: opt_u64(ih_obj, "size", "nested.inner_hits")?
1127                    .map(|v| v as usize)
1128                    .unwrap_or(3),
1129                from: opt_u64(ih_obj, "from", "nested.inner_hits")?
1130                    .map(|v| v as usize)
1131                    .unwrap_or(0),
1132            })
1133        }
1134        None => None,
1135    };
1136    Ok(ScoringExpression::Nested {
1137        path,
1138        query: Box::new(parse_query_node(query)?),
1139        inner_hits,
1140    })
1141}
1142
1143fn parse_geo_distance_query(node: &Value) -> Result<ScoringExpression> {
1144    let obj = node
1145        .as_object()
1146        .ok_or_else(|| LuciError::InvalidQuery("geo_distance must be an object".into()))?;
1147
1148    let distance = obj
1149        .get("distance")
1150        .and_then(|v| v.as_str())
1151        .ok_or_else(|| LuciError::InvalidQuery("geo_distance: missing 'distance'".into()))?
1152        .to_string();
1153
1154    // Find the field (any key that's not "distance")
1155    for (key, val) in obj {
1156        if key == "distance" {
1157            continue;
1158        }
1159        let point = crate::spatial::geo::GeoPoint::from_json(val).ok_or_else(|| {
1160            LuciError::InvalidQuery(format!("geo_distance: invalid geo point for field '{key}'"))
1161        })?;
1162        return Ok(ScoringExpression::GeoDistance {
1163            field: key.clone(),
1164            lat: point.lat,
1165            lon: point.lon,
1166            distance,
1167        });
1168    }
1169    Err(LuciError::InvalidQuery(
1170        "geo_distance: missing field".into(),
1171    ))
1172}
1173
1174fn parse_geo_bbox_query(node: &Value) -> Result<ScoringExpression> {
1175    let obj = node
1176        .as_object()
1177        .ok_or_else(|| LuciError::InvalidQuery("geo_bounding_box must be an object".into()))?;
1178
1179    for (key, val) in obj {
1180        let bbox = val.as_object().ok_or_else(|| {
1181            LuciError::InvalidQuery("geo_bounding_box: field value must be an object".into())
1182        })?;
1183        let tl = bbox.get("top_left").ok_or_else(|| {
1184            LuciError::InvalidQuery("geo_bounding_box: missing 'top_left'".into())
1185        })?;
1186        let br = bbox.get("bottom_right").ok_or_else(|| {
1187            LuciError::InvalidQuery("geo_bounding_box: missing 'bottom_right'".into())
1188        })?;
1189        let tl_point = crate::spatial::geo::GeoPoint::from_json(tl)
1190            .ok_or_else(|| LuciError::InvalidQuery("invalid top_left".into()))?;
1191        let br_point = crate::spatial::geo::GeoPoint::from_json(br)
1192            .ok_or_else(|| LuciError::InvalidQuery("invalid bottom_right".into()))?;
1193
1194        return Ok(ScoringExpression::GeoBoundingBox {
1195            field: key.clone(),
1196            top_left_lat: tl_point.lat,
1197            top_left_lon: tl_point.lon,
1198            bottom_right_lat: br_point.lat,
1199            bottom_right_lon: br_point.lon,
1200        });
1201    }
1202    Err(LuciError::InvalidQuery(
1203        "geo_bounding_box: missing field".into(),
1204    ))
1205}
1206
1207fn parse_geo_shape_query(node: &Value) -> Result<ScoringExpression> {
1208    let obj = node
1209        .as_object()
1210        .ok_or_else(|| LuciError::InvalidQuery("geo_shape must be an object".into()))?;
1211
1212    for (key, val) in obj {
1213        let field_obj = val.as_object().ok_or_else(|| {
1214            LuciError::InvalidQuery(format!("geo_shape: field '{key}' must be an object"))
1215        })?;
1216
1217        let shape_val = field_obj
1218            .get("shape")
1219            .ok_or_else(|| LuciError::InvalidQuery("geo_shape: missing 'shape'".into()))?;
1220
1221        let relation_str = opt_str(field_obj, "relation", "geo_shape")?.unwrap_or("intersects");
1222
1223        let relation = match relation_str {
1224            "intersects" | "INTERSECTS" => SpatialRelation::Intersects,
1225            "within" | "WITHIN" => SpatialRelation::Within,
1226            "contains" | "CONTAINS" => SpatialRelation::Contains,
1227            "disjoint" | "DISJOINT" => SpatialRelation::Disjoint,
1228            "touches" | "TOUCHES" => SpatialRelation::Touches,
1229            "crosses" | "CROSSES" => SpatialRelation::Crosses,
1230            "overlaps" | "OVERLAPS" => SpatialRelation::Overlaps,
1231            "equals" | "EQUALS" => SpatialRelation::Equals,
1232            "covers" | "COVERS" => SpatialRelation::Covers,
1233            "coveredby" | "COVEREDBY" => SpatialRelation::CoveredBy,
1234            "contains_properly" | "CONTAINS_PROPERLY" => SpatialRelation::ContainsProperly,
1235            other => {
1236                return Err(LuciError::InvalidQuery(format!(
1237                    "geo_shape: unknown relation '{other}'"
1238                )));
1239            }
1240        };
1241
1242        return Ok(ScoringExpression::GeoShape {
1243            field: key.clone(),
1244            shape: GeoShapeValue {
1245                json: shape_val.clone(),
1246            },
1247            relation,
1248        });
1249    }
1250    Err(LuciError::InvalidQuery("geo_shape: missing field".into()))
1251}
1252
1253/// Parse a kNN query: `{"knn": {"field": "...", "query_vector": [...], "k": 10}}`.
1254///
1255/// See [[feature-knn-query-type]].
1256fn parse_knn_query(node: &Value) -> Result<ScoringExpression> {
1257    let obj = validate_keys(
1258        node,
1259        &[
1260            "field",
1261            "query_vector",
1262            "k",
1263            "num_candidates",
1264            "threshold",
1265            "boost",
1266        ],
1267        "knn",
1268    )?;
1269    let field = obj
1270        .get("field")
1271        .and_then(|v| v.as_str())
1272        .ok_or_else(|| LuciError::InvalidQuery("knn requires 'field'".into()))?
1273        .to_string();
1274    let raw_vec = obj
1275        .get("query_vector")
1276        .and_then(|v| v.as_array())
1277        .ok_or_else(|| LuciError::InvalidQuery("knn requires 'query_vector'".into()))?;
1278    let query_vector: Vec<f32> = raw_vec
1279        .iter()
1280        .map(|v| {
1281            v.as_f64().map(|f| f as f32).ok_or_else(|| {
1282                LuciError::InvalidQuery("knn query_vector elements must be numbers".into())
1283            })
1284        })
1285        .collect::<Result<Vec<f32>>>()?;
1286    if query_vector.is_empty() {
1287        return Err(LuciError::InvalidQuery(
1288            "knn query_vector must not be empty".into(),
1289        ));
1290    }
1291    let k = opt_u64(obj, "k", "knn")?.unwrap_or(10) as usize;
1292    if k == 0 {
1293        return Err(LuciError::InvalidQuery("knn k must be > 0".into()));
1294    }
1295    let num_candidates = opt_u64(obj, "num_candidates", "knn")?
1296        .map(|v| v as usize)
1297        .unwrap_or((k as f64 * 1.5).ceil() as usize);
1298    let threshold = opt_f64(obj, "threshold", "knn")?.map(|v| v as f32);
1299    let boost = opt_f64(obj, "boost", "knn")?;
1300    let base = ScoringExpression::Knn {
1301        field,
1302        query_vector,
1303        k,
1304        num_candidates,
1305        threshold,
1306    };
1307    Ok(maybe_boost(base, boost))
1308}
1309
1310/// Parse a fusion query: `{"fusion": {"sources": [...], "method": "rrf"}}`.
1311///
1312/// See [[feature-rrf-retrievers]].
1313fn parse_fusion_query(node: &Value) -> Result<QueryExpression> {
1314    let obj = validate_keys(
1315        node,
1316        &[
1317            "sources",
1318            "method",
1319            "rank_constant",
1320            "rank_window_size",
1321            "weights",
1322        ],
1323        "fusion",
1324    )?;
1325
1326    let sources_arr = obj
1327        .get("sources")
1328        .and_then(|v| v.as_array())
1329        .ok_or_else(|| LuciError::InvalidQuery("fusion requires 'sources' array".into()))?;
1330    if sources_arr.len() < 2 {
1331        return Err(LuciError::InvalidQuery(
1332            "fusion requires at least 2 sources".into(),
1333        ));
1334    }
1335    let sources: Vec<QueryExpression> = sources_arr
1336        .iter()
1337        .map(parse_query_expression_node)
1338        .collect::<Result<Vec<_>>>()?;
1339
1340    let method = match opt_str(obj, "method", "fusion")? {
1341        Some("rrf") | Some("reciprocal_rank") => FusionMethod::ReciprocalRank,
1342        Some("sum") => FusionMethod::Sum,
1343        Some("arithmetic_mean") | Some("avg") => FusionMethod::ArithmeticMean,
1344        Some("harmonic_mean") => FusionMethod::HarmonicMean,
1345        Some("geometric_mean") => FusionMethod::GeometricMean,
1346        Some(other) => {
1347            return Err(LuciError::InvalidQuery(format!(
1348                "unknown fusion method: '{other}'"
1349            )));
1350        }
1351        None => FusionMethod::ReciprocalRank, // default
1352    };
1353
1354    let rank_constant = opt_f64(obj, "rank_constant", "fusion")?.unwrap_or(60.0) as f32;
1355    let rank_window_size = opt_u64(obj, "rank_window_size", "fusion")?.map(|v| v as usize);
1356    let weights = match obj.get("weights") {
1357        Some(v) => {
1358            let arr = v.as_array().ok_or_else(|| {
1359                LuciError::InvalidQuery("fusion: \"weights\" must be an array of numbers".into())
1360            })?;
1361            let ws = arr
1362                .iter()
1363                .map(|w| {
1364                    w.as_f64().map(|f| f as f32).ok_or_else(|| {
1365                        LuciError::InvalidQuery(format!(
1366                            "fusion: weights[] entries must be numbers, got {w}"
1367                        ))
1368                    })
1369                })
1370                .collect::<Result<Vec<f32>>>()?;
1371            Some(ws)
1372        }
1373        None => None,
1374    };
1375
1376    if let Some(ref ws) = weights {
1377        if ws.len() != sources.len() {
1378            return Err(LuciError::InvalidQuery(format!(
1379                "fusion weights length ({}) must match sources length ({})",
1380                ws.len(),
1381                sources.len()
1382            )));
1383        }
1384    }
1385
1386    Ok(QueryExpression::Ranking(RankingExpression::Fusion {
1387        sources,
1388        method,
1389        rank_constant,
1390        rank_window_size,
1391        weights,
1392    }))
1393}
1394
1395/// Parse distance string like "10km", "5mi", "1000m". Returns km.
1396pub fn parse_distance_km(s: &str) -> f64 {
1397    let s = s.trim();
1398    if let Some(n) = s.strip_suffix("km") {
1399        n.trim().parse().unwrap_or(0.0)
1400    } else if let Some(n) = s.strip_suffix("mi") {
1401        n.trim().parse::<f64>().unwrap_or(0.0) * 1.60934
1402    } else if let Some(n) = s.strip_suffix('m') {
1403        n.trim().parse::<f64>().unwrap_or(0.0) / 1000.0
1404    } else {
1405        // default: meters
1406        s.parse::<f64>().unwrap_or(0.0) / 1000.0
1407    }
1408}
1409
1410#[cfg(test)]
1411mod tests {
1412    use super::*;
1413    use serde_json::json;
1414
1415    #[test]
1416    fn parse_term_shorthand() {
1417        let q = parse_query(&json!({"term": {"status": "active"}})).unwrap();
1418        assert_eq!(
1419            q,
1420            ScoringExpression::Term {
1421                field: "status".into(),
1422                value: "active".into()
1423            }
1424        );
1425    }
1426
1427    #[test]
1428    fn parse_term_full_form() {
1429        let q = parse_query(&json!({"term": {"status": {"value": "active"}}})).unwrap();
1430        assert_eq!(
1431            q,
1432            ScoringExpression::Term {
1433                field: "status".into(),
1434                value: "active".into()
1435            }
1436        );
1437    }
1438
1439    #[test]
1440    fn parse_term_numeric() {
1441        let q = parse_query(&json!({"term": {"age": 25}})).unwrap();
1442        assert_eq!(
1443            q,
1444            ScoringExpression::Term {
1445                field: "age".into(),
1446                value: "25".into()
1447            }
1448        );
1449    }
1450
1451    #[test]
1452    fn parse_terms() {
1453        let q = parse_query(&json!({"terms": {"status": ["a", "b", "c"]}})).unwrap();
1454        assert_eq!(
1455            q,
1456            ScoringExpression::Terms {
1457                field: "status".into(),
1458                values: vec!["a".into(), "b".into(), "c".into()]
1459            }
1460        );
1461    }
1462
1463    #[test]
1464    fn parse_match_shorthand() {
1465        let q = parse_query(&json!({"match": {"title": "search engine"}})).unwrap();
1466        assert_eq!(
1467            q,
1468            ScoringExpression::Match {
1469                field: "title".into(),
1470                query: "search engine".into(),
1471                analyzer: None
1472            }
1473        );
1474    }
1475
1476    #[test]
1477    fn parse_match_full_form() {
1478        let q = parse_query(&json!({
1479            "match": {"title": {"query": "search", "analyzer": "standard"}}
1480        }))
1481        .unwrap();
1482        assert_eq!(
1483            q,
1484            ScoringExpression::Match {
1485                field: "title".into(),
1486                query: "search".into(),
1487                analyzer: Some("standard".into())
1488            }
1489        );
1490    }
1491
1492    #[test]
1493    fn parse_match_phrase_shorthand() {
1494        let q = parse_query(&json!({"match_phrase": {"body": "quick brown fox"}})).unwrap();
1495        assert_eq!(
1496            q,
1497            ScoringExpression::MatchPhrase {
1498                field: "body".into(),
1499                query: "quick brown fox".into(),
1500                analyzer: None
1501            }
1502        );
1503    }
1504
1505    #[test]
1506    fn parse_match_phrase_full_form() {
1507        let q = parse_query(&json!({
1508            "match_phrase": {"body": {"query": "quick brown"}}
1509        }))
1510        .unwrap();
1511        if let ScoringExpression::MatchPhrase { query, .. } = &q {
1512            assert_eq!(query, "quick brown");
1513        } else {
1514            panic!("expected MatchPhrase");
1515        }
1516    }
1517
1518    #[test]
1519    fn parse_bool_basic() {
1520        let q = parse_query(&json!({
1521            "bool": {
1522                "must": [{"term": {"status": "active"}}],
1523                "filter": [{"exists": {"field": "title"}}]
1524            }
1525        }))
1526        .unwrap();
1527
1528        if let ScoringExpression::Bool {
1529            must,
1530            should,
1531            must_not,
1532            filter,
1533            ..
1534        } = &q
1535        {
1536            assert_eq!(must.len(), 1);
1537            assert!(should.is_empty());
1538            assert!(must_not.is_empty());
1539            assert_eq!(filter.len(), 1);
1540        } else {
1541            panic!("expected Bool");
1542        }
1543    }
1544
1545    #[test]
1546    fn parse_bool_all_clauses() {
1547        let q = parse_query(&json!({
1548            "bool": {
1549                "must": [{"match": {"title": "search"}}],
1550                "should": [{"term": {"tag": "hot"}}],
1551                "must_not": [{"term": {"status": "deleted"}}],
1552                "filter": [{"exists": {"field": "body"}}]
1553            }
1554        }))
1555        .unwrap();
1556
1557        if let ScoringExpression::Bool {
1558            must,
1559            should,
1560            must_not,
1561            filter,
1562            ..
1563        } = &q
1564        {
1565            assert_eq!(must.len(), 1);
1566            assert_eq!(should.len(), 1);
1567            assert_eq!(must_not.len(), 1);
1568            assert_eq!(filter.len(), 1);
1569        }
1570    }
1571
1572    #[test]
1573    fn parse_bool_single_clause_not_array() {
1574        // ES accepts a single query object without wrapping in array
1575        let q = parse_query(&json!({
1576            "bool": {
1577                "must": {"term": {"status": "active"}}
1578            }
1579        }))
1580        .unwrap();
1581
1582        if let ScoringExpression::Bool { must, .. } = &q {
1583            assert_eq!(must.len(), 1);
1584        }
1585    }
1586
1587    #[test]
1588    fn parse_exists() {
1589        let q = parse_query(&json!({"exists": {"field": "title"}})).unwrap();
1590        assert_eq!(
1591            q,
1592            ScoringExpression::Exists {
1593                field: "title".into()
1594            }
1595        );
1596    }
1597
1598    #[test]
1599    fn parse_prefix_shorthand() {
1600        let q = parse_query(&json!({"prefix": {"title": "sea"}})).unwrap();
1601        assert_eq!(
1602            q,
1603            ScoringExpression::Prefix {
1604                field: "title".into(),
1605                value: "sea".into()
1606            }
1607        );
1608    }
1609
1610    #[test]
1611    fn parse_prefix_full_form() {
1612        let q = parse_query(&json!({"prefix": {"title": {"value": "sea"}}})).unwrap();
1613        assert_eq!(
1614            q,
1615            ScoringExpression::Prefix {
1616                field: "title".into(),
1617                value: "sea".into()
1618            }
1619        );
1620    }
1621
1622    #[test]
1623    fn parse_constant_score() {
1624        let q = parse_query(&json!({
1625            "constant_score": {
1626                "filter": {"term": {"status": "active"}},
1627                "boost": 1.5
1628            }
1629        }))
1630        .unwrap();
1631
1632        if let ScoringExpression::ConstantScore { boost, query } = &q {
1633            assert_eq!(*boost, 1.5);
1634            assert!(matches!(query.as_ref(), ScoringExpression::Term { .. }));
1635        } else {
1636            panic!("expected ConstantScore");
1637        }
1638    }
1639
1640    #[test]
1641    fn parse_match_all() {
1642        let q = parse_query(&json!({"match_all": {}})).unwrap();
1643        assert_eq!(q, ScoringExpression::MatchAll);
1644    }
1645
1646    #[test]
1647    fn parse_match_none() {
1648        let q = parse_query(&json!({"match_none": {}})).unwrap();
1649        assert_eq!(q, ScoringExpression::MatchNone);
1650    }
1651
1652    #[test]
1653    fn parse_with_query_wrapper() {
1654        let q = parse_query(&json!({
1655            "query": {"term": {"status": "active"}}
1656        }))
1657        .unwrap();
1658        assert!(matches!(q, ScoringExpression::Term { .. }));
1659    }
1660
1661    #[test]
1662    fn parse_unknown_query_type() {
1663        let r = parse_query(&json!({"unknown_type": {"field": "val"}}));
1664        assert!(r.is_err());
1665    }
1666
1667    #[test]
1668    fn parse_empty_object() {
1669        let r = parse_query(&json!({}));
1670        assert!(r.is_err());
1671    }
1672
1673    #[test]
1674    fn parse_nested_bool() {
1675        let q = parse_query(&json!({
1676            "bool": {
1677                "must": [{
1678                    "bool": {
1679                        "should": [
1680                            {"term": {"a": "1"}},
1681                            {"term": {"b": "2"}}
1682                        ]
1683                    }
1684                }]
1685            }
1686        }))
1687        .unwrap();
1688
1689        if let ScoringExpression::Bool { must, .. } = &q {
1690            assert!(matches!(&must[0], ScoringExpression::Bool { .. }));
1691        }
1692    }
1693
1694    #[test]
1695    fn parse_deeply_nested() {
1696        let q = parse_query(&json!({
1697            "bool": {
1698                "filter": [{
1699                    "constant_score": {
1700                        "filter": {"term": {"x": "y"}},
1701                        "boost": 2.0
1702                    }
1703                }]
1704            }
1705        }))
1706        .unwrap();
1707
1708        if let ScoringExpression::Bool { filter, .. } = &q {
1709            assert!(matches!(
1710                &filter[0],
1711                ScoringExpression::ConstantScore { .. }
1712            ));
1713        }
1714    }
1715
1716    // --- kNN query parsing ---
1717
1718    #[test]
1719    fn parse_knn_query_basic() {
1720        let q = parse_query(&json!({"knn": {
1721            "field": "embedding",
1722            "query_vector": [1.0, 2.0, 3.0],
1723            "k": 5,
1724            "num_candidates": 20
1725        }}))
1726        .unwrap();
1727
1728        assert!(matches!(q, ScoringExpression::Knn { k: 5, .. }));
1729        if let ScoringExpression::Knn {
1730            field,
1731            query_vector,
1732            k,
1733            num_candidates,
1734            threshold,
1735        } = &q
1736        {
1737            assert_eq!(field, "embedding");
1738            assert_eq!(query_vector, &[1.0, 2.0, 3.0]);
1739            assert_eq!(*k, 5);
1740            assert_eq!(*num_candidates, 20);
1741            assert!(threshold.is_none());
1742        }
1743    }
1744
1745    #[test]
1746    fn parse_knn_query_defaults() {
1747        let q = parse_query(&json!({"knn": {
1748            "field": "f",
1749            "query_vector": [1.0]
1750        }}))
1751        .unwrap();
1752
1753        if let ScoringExpression::Knn {
1754            k, num_candidates, ..
1755        } = &q
1756        {
1757            assert_eq!(*k, 10); // default
1758            assert_eq!(*num_candidates, 15); // ceil(10 * 1.5)
1759        } else {
1760            panic!("expected Knn");
1761        }
1762    }
1763
1764    #[test]
1765    fn parse_knn_query_with_threshold() {
1766        let q = parse_query(&json!({"knn": {
1767            "field": "f",
1768            "query_vector": [1.0],
1769            "threshold": 0.5
1770        }}))
1771        .unwrap();
1772
1773        if let ScoringExpression::Knn { threshold, .. } = &q {
1774            assert_eq!(*threshold, Some(0.5));
1775        } else {
1776            panic!("expected Knn");
1777        }
1778    }
1779
1780    #[test]
1781    fn parse_knn_query_zero_k_rejected() {
1782        let result = parse_query(&json!({"knn": {
1783            "field": "f",
1784            "query_vector": [1.0],
1785            "k": 0
1786        }}));
1787        assert!(result.is_err());
1788    }
1789
1790    #[test]
1791    fn parse_knn_query_empty_vector_rejected() {
1792        let result = parse_query(&json!({"knn": {
1793            "field": "f",
1794            "query_vector": []
1795        }}));
1796        assert!(result.is_err());
1797    }
1798
1799    #[test]
1800    fn parse_knn_query_non_numeric_rejected() {
1801        let result = parse_query(&json!({"knn": {
1802            "field": "f",
1803            "query_vector": [1.0, "bad", 3.0]
1804        }}));
1805        assert!(result.is_err());
1806    }
1807
1808    #[test]
1809    fn parse_knn_query_missing_field_rejected() {
1810        let result = parse_query(&json!({"knn": {
1811            "query_vector": [1.0]
1812        }}));
1813        assert!(result.is_err());
1814    }
1815
1816    #[test]
1817    fn parse_knn_query_missing_vector_rejected() {
1818        let result = parse_query(&json!({"knn": {
1819            "field": "f"
1820        }}));
1821        assert!(result.is_err());
1822    }
1823
1824    // --- E10: a specified-but-mistyped knn option must error, not
1825    // silently fall back to a default. See [[code-must-not-lie]]. The
1826    // old parser took `unwrap_or(default)` on the coercion, so a string
1827    // `k` silently became 10. ---
1828
1829    #[test]
1830    fn parse_knn_query_string_k_rejected() {
1831        let result = parse_query(&json!({"knn": {
1832            "field": "f",
1833            "query_vector": [1.0],
1834            "k": "5"
1835        }}));
1836        let err = result.unwrap_err();
1837        let msg = format!("{err}");
1838        assert!(
1839            msg.contains("\"k\"") && msg.contains("integer"),
1840            "error must explain the type mismatch: {msg}"
1841        );
1842    }
1843
1844    #[test]
1845    fn parse_knn_query_float_k_rejected() {
1846        // A non-integral float is not a valid k.
1847        let result = parse_query(&json!({"knn": {
1848            "field": "f",
1849            "query_vector": [1.0],
1850            "k": 5.5
1851        }}));
1852        assert!(result.is_err(), "float k must be rejected");
1853    }
1854
1855    #[test]
1856    fn parse_knn_query_string_num_candidates_rejected() {
1857        let result = parse_query(&json!({"knn": {
1858            "field": "f",
1859            "query_vector": [1.0],
1860            "num_candidates": "20"
1861        }}));
1862        let err = result.unwrap_err();
1863        let msg = format!("{err}");
1864        assert!(
1865            msg.contains("num_candidates"),
1866            "error must name the option: {msg}"
1867        );
1868    }
1869
1870    #[test]
1871    fn parse_knn_query_string_threshold_rejected() {
1872        let result = parse_query(&json!({"knn": {
1873            "field": "f",
1874            "query_vector": [1.0],
1875            "threshold": "high"
1876        }}));
1877        let err = result.unwrap_err();
1878        let msg = format!("{err}");
1879        assert!(
1880            msg.contains("threshold") && msg.contains("number"),
1881            "error must explain the type mismatch: {msg}"
1882        );
1883    }
1884
1885    #[test]
1886    fn parse_knn_query_string_boost_rejected() {
1887        let result = parse_query(&json!({"knn": {
1888            "field": "f",
1889            "query_vector": [1.0],
1890            "boost": "2"
1891        }}));
1892        let err = result.unwrap_err();
1893        let msg = format!("{err}");
1894        assert!(msg.contains("boost"), "error must name the option: {msg}");
1895    }
1896
1897    #[test]
1898    fn parse_knn_query_null_k_uses_default() {
1899        // Explicit null is treated as omitted: the documented default
1900        // applies (honest), since the caller did not specify a value.
1901        let q = parse_query(&json!({"knn": {
1902            "field": "f",
1903            "query_vector": [1.0],
1904            "k": null
1905        }}))
1906        .unwrap();
1907        if let ScoringExpression::Knn { k, .. } = &q {
1908            assert_eq!(*k, 10);
1909        } else {
1910            panic!("expected Knn");
1911        }
1912    }
1913
1914    // --- E10: strict value types on known query fields. A known key
1915    // carrying a wrong-typed value must error, never silently default or
1916    // drop. See [[code-must-not-lie]] and [[fix-strict-search-parsing]]. ---
1917
1918    #[test]
1919    fn parse_term_string_boost_rejected() {
1920        let err = parse_query(&json!({"term": {"f": {"value": "x", "boost": "2"}}})).unwrap_err();
1921        let msg = format!("{err}");
1922        assert!(
1923            msg.contains("boost") && msg.contains("number"),
1924            "got: {msg}"
1925        );
1926    }
1927
1928    #[test]
1929    fn parse_term_valid_boost_still_parses() {
1930        // Happy path: a correctly-typed boost is unaffected.
1931        parse_query(&json!({"term": {"f": {"value": "x", "boost": 2.0}}})).unwrap();
1932    }
1933
1934    #[test]
1935    fn parse_match_non_string_analyzer_rejected() {
1936        // `analyzer` was silently dropped on a non-string before E10.
1937        let err = parse_query(&json!({"match": {"f": {"query": "x", "analyzer": 7}}})).unwrap_err();
1938        assert!(format!("{err}").contains("analyzer"), "{err}");
1939    }
1940
1941    #[test]
1942    fn parse_bool_string_minimum_should_match_rejected() {
1943        let err = parse_query(
1944            &json!({"bool": {"should": [{"term": {"f": "a"}}], "minimum_should_match": "1"}}),
1945        )
1946        .unwrap_err();
1947        assert!(format!("{err}").contains("minimum_should_match"), "{err}");
1948    }
1949
1950    #[test]
1951    fn parse_range_string_bound_rejected() {
1952        let err = parse_query(&json!({"range": {"price": {"gte": "10"}}})).unwrap_err();
1953        let msg = format!("{err}");
1954        assert!(msg.contains("gte") && msg.contains("number"), "{msg}");
1955    }
1956
1957    #[test]
1958    fn parse_function_score_non_string_score_mode_rejected() {
1959        // `.and_then(as_str)` used to map a non-string to the default.
1960        let err = parse_query(&json!({"function_score": {"score_mode": 5}})).unwrap_err();
1961        assert!(format!("{err}").contains("score_mode"), "{err}");
1962    }
1963
1964    #[test]
1965    fn parse_function_score_unknown_score_mode_rejected() {
1966        let err = parse_query(&json!({"function_score": {"score_mode": "prod"}})).unwrap_err();
1967        let msg = format!("{err}");
1968        assert!(msg.contains("score_mode") && msg.contains("prod"), "{msg}");
1969    }
1970
1971    #[test]
1972    fn parse_multi_match_non_string_field_rejected() {
1973        let err = parse_query(&json!({"multi_match": {"query": "x", "fields": ["title", 7]}}))
1974            .unwrap_err();
1975        assert!(format!("{err}").contains("fields"), "{err}");
1976    }
1977
1978    #[test]
1979    fn parse_multi_match_unsupported_type_rejected() {
1980        // `cross_fields` silently degraded to best_fields before E10.
1981        let err = parse_query(
1982            &json!({"multi_match": {"query": "x", "fields": ["a"], "type": "cross_fields"}}),
1983        )
1984        .unwrap_err();
1985        assert!(format!("{err}").contains("type"), "{err}");
1986    }
1987
1988    #[test]
1989    fn parse_fusion_non_number_weight_rejected() {
1990        // `fusion` is a ranking expression — parsed via the expression
1991        // entry point, not the scoring-query dispatcher.
1992        let err = parse_query_expression(&json!({"fusion": {
1993            "sources": [{"match": {"f": "a"}}, {"match": {"f": "b"}}],
1994            "weights": ["1.0", "2.0"]
1995        }}))
1996        .unwrap_err();
1997        assert!(format!("{err}").contains("weights"), "{err}");
1998    }
1999}