lance_index/scalar/inverted/
parser.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use super::query::{
5    BooleanQuery, BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, Occur, Operator, PhraseQuery,
6};
7use lance_core::{Error, Result};
8use serde_json::Value;
9use snafu::location;
10
11pub trait JsonParser {
12    fn from_json(value: &Value) -> Result<Self>
13    where
14        Self: Sized;
15}
16
17impl JsonParser for MatchQuery {
18    fn from_json(value: &Value) -> Result<Self> {
19        let column = value["column"].as_str().map(String::from);
20        let terms = value["terms"]
21            .as_str()
22            .ok_or_else(|| Error::invalid_input("missing terms in match query", location!()))?
23            .to_string();
24        let boost = value["boost"]
25            .as_f64()
26            .map(|v| v as f32)
27            .unwrap_or(Self::default_boost());
28        let fuzziness = match &value["fuzziness"] {
29            Value::Number(num) if num.is_u64() => Some(num.as_u64().unwrap() as u32),
30            Value::String(s) if s.as_str() == "auto" => None,
31            Value::Null => None,
32            _ => Some(0),
33        };
34        let max_expansions = value["max_expansions"]
35            .as_u64()
36            .map(|v| v as usize)
37            .unwrap_or(Self::default_max_expansions());
38        let operator = value["operator"]
39            .as_str()
40            .map(|s| s.try_into().unwrap_or(Operator::default()))
41            .unwrap_or_default();
42        let prefix_length = value["prefix_length"]
43            .as_u64()
44            .map(|v| v as u32)
45            .unwrap_or(0);
46
47        Ok(Self {
48            column,
49            terms,
50            boost,
51            fuzziness,
52            max_expansions,
53            operator,
54            prefix_length,
55        })
56    }
57}
58
59impl JsonParser for PhraseQuery {
60    fn from_json(value: &Value) -> Result<Self> {
61        let column = value["column"].as_str().map(String::from);
62        let terms = value["terms"]
63            .as_str()
64            .ok_or_else(|| Error::invalid_input("missing terms in phrase query", location!()))?
65            .to_string();
66        let slop = value["slop"].as_u64().map(|v| v as u32).unwrap_or(0);
67
68        Ok(Self {
69            column,
70            terms,
71            slop,
72        })
73    }
74}
75
76impl JsonParser for BoostQuery {
77    fn from_json(value: &Value) -> Result<Self> {
78        let positive = value["positive"]
79            .as_object()
80            .ok_or_else(|| Error::invalid_input("missing positive in boost query", location!()))?;
81        let positive_query = from_json_value(&Value::Object(positive.clone()))?;
82
83        let negative = value["negative"]
84            .as_object()
85            .ok_or_else(|| Error::invalid_input("missing negative in boost query", location!()))?;
86        let negative_query = from_json_value(&Value::Object(negative.clone()))?;
87
88        let negative_boost = value["negative_boost"].as_f64().map(|v| v as f32);
89
90        Ok(Self::new(positive_query, negative_query, negative_boost))
91    }
92}
93
94impl JsonParser for MultiMatchQuery {
95    fn from_json(value: &Value) -> Result<Self> {
96        let query = value["match_queries"].as_array().ok_or_else(|| {
97            Error::invalid_input("missing match_queries in multi_match query", location!())
98        })?;
99        let query = query
100            .iter()
101            .map(MatchQuery::from_json)
102            .collect::<Result<Vec<MatchQuery>>>()?;
103
104        if query.is_empty() {
105            return Err(Error::invalid_input("empty multi_match query", location!()));
106        }
107
108        Ok(Self {
109            match_queries: query,
110        })
111    }
112}
113
114impl JsonParser for BooleanQuery {
115    fn from_json(value: &Value) -> Result<Self> {
116        let mut clauses = Vec::new();
117
118        if let Some(must) = value["must"].as_array() {
119            for query_val in must {
120                let query = from_json_value(query_val)?;
121                clauses.push((Occur::Must, query));
122            }
123        }
124
125        if let Some(should) = value["should"].as_array() {
126            for query_val in should {
127                let query = from_json_value(query_val)?;
128                clauses.push((Occur::Should, query));
129            }
130        }
131
132        if let Some(must_not) = value["must_not"].as_array() {
133            for query_val in must_not {
134                let query = from_json_value(query_val)?;
135                clauses.push((Occur::MustNot, query));
136            }
137        }
138
139        Ok(Self::new(clauses))
140    }
141}
142
143fn from_json_value(value: &Value) -> Result<FtsQuery> {
144    let value = value
145        .as_object()
146        .ok_or_else(|| Error::invalid_input("value must be a JSON object", location!()))?;
147    if value.len() != 1 {
148        return Err(Error::invalid_input(
149            "value must be a single JSON object",
150            location!(),
151        ));
152    }
153
154    let (query_type, query_val) = value.into_iter().next().unwrap();
155    match query_type.as_str() {
156        "match" => Ok(FtsQuery::Match(MatchQuery::from_json(query_val)?)),
157        "phrase" => Ok(FtsQuery::Phrase(PhraseQuery::from_json(query_val)?)),
158        "boost" => Ok(FtsQuery::Boost(BoostQuery::from_json(query_val)?)),
159        "multi_match" => Ok(FtsQuery::MultiMatch(MultiMatchQuery::from_json(query_val)?)),
160        "boolean" => Ok(FtsQuery::Boolean(BooleanQuery::from_json(query_val)?)),
161        _ => Err(Error::invalid_input(
162            format!("unknown fts query type: {}", query_type),
163            location!(),
164        )),
165    }
166}
167
168pub fn from_json(json: &str) -> Result<FtsQuery> {
169    let value: Value = serde_json::from_str(json)
170        .map_err(|e| Error::invalid_input(format!("invalid json: {}", e), location!()))?;
171    from_json_value(&value)
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_from_json_match() {
180        let json = r#"
181        {
182            "match": {
183                "column": "text",
184                "terms": "hello world",
185                "boost": 2.0,
186                "fuzziness": 1,
187                "max_expansions": 10,
188                "operator": "and",
189                "prefix_length": 2
190            }
191        }
192        "#;
193        let fts_query = from_json(json).unwrap();
194        let expected_query = FtsQuery::Match(MatchQuery {
195            column: Some("text".to_string()),
196            terms: "hello world".to_string(),
197            boost: 2.0,
198            fuzziness: Some(1),
199            max_expansions: 10,
200            operator: Operator::And,
201            prefix_length: 2,
202        });
203        assert_eq!(fts_query, expected_query);
204    }
205
206    #[test]
207    fn test_from_json_phrase() {
208        let json = r#"
209        {
210            "phrase": {
211                "column": "text",
212                "terms": "hello world",
213                "slop": 1
214            }
215        }"#;
216        let fts_query = from_json(json).unwrap();
217        let expected_query = FtsQuery::Phrase(PhraseQuery {
218            column: Some("text".to_string()),
219            terms: "hello world".to_string(),
220            slop: 1,
221        });
222        assert_eq!(fts_query, expected_query);
223    }
224
225    #[test]
226    fn test_from_json_boost() {
227        let json = r#"
228        {
229            "boost": {
230                "positive": {
231                    "match": {
232                        "column": "title",
233                        "terms": "hello"
234                    }
235                },
236                "negative": {
237                    "phrase": {
238                        "column": "body",
239                        "terms": "world"
240                    }
241                },
242                "negative_boost": 0.5
243            }
244        }"#;
245        let fts_query = from_json(json).unwrap();
246        let positive_query = Box::new(FtsQuery::Match(
247            MatchQuery::new("hello".to_string())
248                .with_column(Some("title".to_string()))
249                .with_fuzziness(None),
250        ));
251        let negative_query = Box::new(FtsQuery::Phrase(
252            PhraseQuery::new("world".to_string()).with_column(Some("body".to_string())),
253        ));
254        let expected_query = FtsQuery::Boost(BoostQuery {
255            positive: positive_query,
256            negative: negative_query,
257            negative_boost: 0.5,
258        });
259        assert_eq!(fts_query, expected_query);
260    }
261
262    #[test]
263    fn test_from_json_multi_match() {
264        let json = r#"
265        {
266            "multi_match": {
267                "match_queries": [
268                    {
269                        "column": "title",
270                        "terms": "hello"
271                    },
272                    {
273                        "column": "body",
274                        "terms": "world"
275                    }
276                ]
277            }
278        }"#;
279        let fts_query = from_json(json).unwrap();
280        let match_queries = vec![
281            MatchQuery::new("hello".to_string())
282                .with_column(Some("title".to_string()))
283                .with_fuzziness(None),
284            MatchQuery::new("world".to_string())
285                .with_column(Some("body".to_string()))
286                .with_fuzziness(None),
287        ];
288        let expected_query = FtsQuery::MultiMatch(MultiMatchQuery { match_queries });
289        assert_eq!(fts_query, expected_query);
290    }
291
292    #[test]
293    fn test_from_json_boolean() {
294        let json = r#"{
295            "boolean": {
296                "must": [
297                    {
298                        "match": {
299                            "column": "text",
300                            "terms": "hello"
301                        }
302                    }
303                ],
304                "should": [
305                    {
306                        "phrase": {
307                            "column": "text",
308                            "terms": "world"
309                        }
310                    }
311                ],
312                "must_not": []
313            }
314        }"#;
315        let fts_query = from_json(json).unwrap();
316        let must_query = FtsQuery::Match(
317            MatchQuery::new("hello".to_string())
318                .with_column(Some("text".to_string()))
319                .with_fuzziness(None),
320        );
321        let should_query = FtsQuery::Phrase(
322            PhraseQuery::new("world".to_string()).with_column(Some("text".to_string())),
323        );
324
325        let expected_query = FtsQuery::Boolean(BooleanQuery::new(vec![
326            (Occur::Must, must_query),
327            (Occur::Should, should_query),
328        ]));
329        assert_eq!(fts_query, expected_query);
330    }
331}