Skip to main content

lance_index/scalar/inverted/
parser.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use super::query::{
5    BooleanQuery, BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, Occur, Operator, PhraseQuery,
6};
7use lance_core::{Error, Result};
8use serde_json::Value;
9
10pub trait JsonParser {
11    fn from_json(value: &Value) -> Result<Self>
12    where
13        Self: Sized;
14}
15
16impl JsonParser for MatchQuery {
17    fn from_json(value: &Value) -> Result<Self> {
18        let column = value["column"].as_str().map(String::from);
19        let terms = value["terms"]
20            .as_str()
21            .ok_or_else(|| Error::invalid_input("missing terms in match query"))?
22            .to_string();
23        let boost = value["boost"]
24            .as_f64()
25            .map(|v| v as f32)
26            .unwrap_or(Self::default_boost());
27        let fuzziness = match &value["fuzziness"] {
28            Value::Number(num) if num.is_u64() => Some(num.as_u64().unwrap() as u32),
29            Value::String(s) if s.as_str() == "auto" => None,
30            Value::Null => None,
31            _ => Some(0),
32        };
33        let max_expansions = value["max_expansions"]
34            .as_u64()
35            .map(|v| v as usize)
36            .unwrap_or(Self::default_max_expansions());
37        let operator = value["operator"]
38            .as_str()
39            .map(|s| s.try_into().unwrap_or(Operator::default()))
40            .unwrap_or_default();
41        let prefix_length = value["prefix_length"]
42            .as_u64()
43            .map(|v| v as u32)
44            .unwrap_or(0);
45
46        Ok(Self {
47            column,
48            terms,
49            boost,
50            fuzziness,
51            max_expansions,
52            operator,
53            prefix_length,
54        })
55    }
56}
57
58impl JsonParser for PhraseQuery {
59    fn from_json(value: &Value) -> Result<Self> {
60        let column = value["column"].as_str().map(String::from);
61        let terms = value["terms"]
62            .as_str()
63            .ok_or_else(|| Error::invalid_input("missing terms in phrase query"))?
64            .to_string();
65        let slop = value["slop"].as_u64().map(|v| v as u32).unwrap_or(0);
66
67        Ok(Self {
68            column,
69            terms,
70            slop,
71        })
72    }
73}
74
75impl JsonParser for BoostQuery {
76    fn from_json(value: &Value) -> Result<Self> {
77        let positive = value["positive"]
78            .as_object()
79            .ok_or_else(|| Error::invalid_input("missing positive in boost query"))?;
80        let positive_query = from_json_value(&Value::Object(positive.clone()))?;
81
82        let negative = value["negative"]
83            .as_object()
84            .ok_or_else(|| Error::invalid_input("missing negative in boost query"))?;
85        let negative_query = from_json_value(&Value::Object(negative.clone()))?;
86
87        let negative_boost = value["negative_boost"].as_f64().map(|v| v as f32);
88
89        Ok(Self::new(positive_query, negative_query, negative_boost))
90    }
91}
92
93impl JsonParser for MultiMatchQuery {
94    fn from_json(value: &Value) -> Result<Self> {
95        let query = value["match_queries"]
96            .as_array()
97            .ok_or_else(|| Error::invalid_input("missing match_queries in multi_match query"))?;
98        let query = query
99            .iter()
100            .map(MatchQuery::from_json)
101            .collect::<Result<Vec<MatchQuery>>>()?;
102
103        if query.is_empty() {
104            return Err(Error::invalid_input("empty multi_match query"));
105        }
106
107        Ok(Self {
108            match_queries: query,
109        })
110    }
111}
112
113impl JsonParser for BooleanQuery {
114    fn from_json(value: &Value) -> Result<Self> {
115        let mut clauses = Vec::new();
116
117        if let Some(must) = value["must"].as_array() {
118            for query_val in must {
119                let query = from_json_value(query_val)?;
120                clauses.push((Occur::Must, query));
121            }
122        }
123
124        if let Some(should) = value["should"].as_array() {
125            for query_val in should {
126                let query = from_json_value(query_val)?;
127                clauses.push((Occur::Should, query));
128            }
129        }
130
131        if let Some(must_not) = value["must_not"].as_array() {
132            for query_val in must_not {
133                let query = from_json_value(query_val)?;
134                clauses.push((Occur::MustNot, query));
135            }
136        }
137
138        Ok(Self::new(clauses))
139    }
140}
141
142fn from_json_value(value: &Value) -> Result<FtsQuery> {
143    let value = value
144        .as_object()
145        .ok_or_else(|| Error::invalid_input("value must be a JSON object"))?;
146    if value.len() != 1 {
147        return Err(Error::invalid_input("value must be a single JSON object"));
148    }
149
150    let (query_type, query_val) = value.into_iter().next().unwrap();
151    match query_type.as_str() {
152        "match" => Ok(FtsQuery::Match(MatchQuery::from_json(query_val)?)),
153        "phrase" => Ok(FtsQuery::Phrase(PhraseQuery::from_json(query_val)?)),
154        "boost" => Ok(FtsQuery::Boost(BoostQuery::from_json(query_val)?)),
155        "multi_match" => Ok(FtsQuery::MultiMatch(MultiMatchQuery::from_json(query_val)?)),
156        "boolean" => Ok(FtsQuery::Boolean(BooleanQuery::from_json(query_val)?)),
157        _ => Err(Error::invalid_input(format!(
158            "unknown fts query type: {}",
159            query_type
160        ))),
161    }
162}
163
164pub fn from_json(json: &str) -> Result<FtsQuery> {
165    let value: Value = serde_json::from_str(json)
166        .map_err(|e| Error::invalid_input(format!("invalid json: {}", e)))?;
167    from_json_value(&value)
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn test_from_json_match() {
176        let json = r#"
177        {
178            "match": {
179                "column": "text",
180                "terms": "hello world",
181                "boost": 2.0,
182                "fuzziness": 1,
183                "max_expansions": 10,
184                "operator": "and",
185                "prefix_length": 2
186            }
187        }
188        "#;
189        let fts_query = from_json(json).unwrap();
190        let expected_query = FtsQuery::Match(MatchQuery {
191            column: Some("text".to_string()),
192            terms: "hello world".to_string(),
193            boost: 2.0,
194            fuzziness: Some(1),
195            max_expansions: 10,
196            operator: Operator::And,
197            prefix_length: 2,
198        });
199        assert_eq!(fts_query, expected_query);
200    }
201
202    #[test]
203    fn test_from_json_phrase() {
204        let json = r#"
205        {
206            "phrase": {
207                "column": "text",
208                "terms": "hello world",
209                "slop": 1
210            }
211        }"#;
212        let fts_query = from_json(json).unwrap();
213        let expected_query = FtsQuery::Phrase(PhraseQuery {
214            column: Some("text".to_string()),
215            terms: "hello world".to_string(),
216            slop: 1,
217        });
218        assert_eq!(fts_query, expected_query);
219    }
220
221    #[test]
222    fn test_from_json_boost() {
223        let json = r#"
224        {
225            "boost": {
226                "positive": {
227                    "match": {
228                        "column": "title",
229                        "terms": "hello"
230                    }
231                },
232                "negative": {
233                    "phrase": {
234                        "column": "body",
235                        "terms": "world"
236                    }
237                },
238                "negative_boost": 0.5
239            }
240        }"#;
241        let fts_query = from_json(json).unwrap();
242        let positive_query = Box::new(FtsQuery::Match(
243            MatchQuery::new("hello".to_string())
244                .with_column(Some("title".to_string()))
245                .with_fuzziness(None),
246        ));
247        let negative_query = Box::new(FtsQuery::Phrase(
248            PhraseQuery::new("world".to_string()).with_column(Some("body".to_string())),
249        ));
250        let expected_query = FtsQuery::Boost(BoostQuery {
251            positive: positive_query,
252            negative: negative_query,
253            negative_boost: 0.5,
254        });
255        assert_eq!(fts_query, expected_query);
256    }
257
258    #[test]
259    fn test_from_json_multi_match() {
260        let json = r#"
261        {
262            "multi_match": {
263                "match_queries": [
264                    {
265                        "column": "title",
266                        "terms": "hello"
267                    },
268                    {
269                        "column": "body",
270                        "terms": "world"
271                    }
272                ]
273            }
274        }"#;
275        let fts_query = from_json(json).unwrap();
276        let match_queries = vec![
277            MatchQuery::new("hello".to_string())
278                .with_column(Some("title".to_string()))
279                .with_fuzziness(None),
280            MatchQuery::new("world".to_string())
281                .with_column(Some("body".to_string()))
282                .with_fuzziness(None),
283        ];
284        let expected_query = FtsQuery::MultiMatch(MultiMatchQuery { match_queries });
285        assert_eq!(fts_query, expected_query);
286    }
287
288    #[test]
289    fn test_from_json_boolean() {
290        let json = r#"{
291            "boolean": {
292                "must": [
293                    {
294                        "match": {
295                            "column": "text",
296                            "terms": "hello"
297                        }
298                    }
299                ],
300                "should": [
301                    {
302                        "phrase": {
303                            "column": "text",
304                            "terms": "world"
305                        }
306                    }
307                ],
308                "must_not": []
309            }
310        }"#;
311        let fts_query = from_json(json).unwrap();
312        let must_query = FtsQuery::Match(
313            MatchQuery::new("hello".to_string())
314                .with_column(Some("text".to_string()))
315                .with_fuzziness(None),
316        );
317        let should_query = FtsQuery::Phrase(
318            PhraseQuery::new("world".to_string()).with_column(Some("text".to_string())),
319        );
320
321        let expected_query = FtsQuery::Boolean(BooleanQuery::new(vec![
322            (Occur::Must, must_query),
323            (Occur::Should, should_query),
324        ]));
325        assert_eq!(fts_query, expected_query);
326    }
327}