lance_index/scalar/inverted/
parser.rs1use super::query::{
5 BooleanQuery, BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, Occur, Operator, PhraseQuery,
6};
7use lance_core::{Error, Result};
8use serde_json::Value;
9
10pub trait JsonParser {
11 fn from_json(value: &Value) -> Result<Self>
12 where
13 Self: Sized;
14}
15
16impl JsonParser for MatchQuery {
17 fn from_json(value: &Value) -> Result<Self> {
18 let column = value["column"].as_str().map(String::from);
19 let terms = value["terms"]
20 .as_str()
21 .ok_or_else(|| Error::invalid_input("missing terms in match query"))?
22 .to_string();
23 let boost = value["boost"]
24 .as_f64()
25 .map(|v| v as f32)
26 .unwrap_or(Self::default_boost());
27 let fuzziness = match &value["fuzziness"] {
28 Value::Number(num) if num.is_u64() => Some(num.as_u64().unwrap() as u32),
29 Value::String(s) if s.as_str() == "auto" => None,
30 Value::Null => None,
31 _ => Some(0),
32 };
33 let max_expansions = value["max_expansions"]
34 .as_u64()
35 .map(|v| v as usize)
36 .unwrap_or(Self::default_max_expansions());
37 let operator = value["operator"]
38 .as_str()
39 .map(|s| s.try_into().unwrap_or(Operator::default()))
40 .unwrap_or_default();
41 let prefix_length = value["prefix_length"]
42 .as_u64()
43 .map(|v| v as u32)
44 .unwrap_or(0);
45
46 Ok(Self {
47 column,
48 terms,
49 boost,
50 fuzziness,
51 max_expansions,
52 operator,
53 prefix_length,
54 })
55 }
56}
57
58impl JsonParser for PhraseQuery {
59 fn from_json(value: &Value) -> Result<Self> {
60 let column = value["column"].as_str().map(String::from);
61 let terms = value["terms"]
62 .as_str()
63 .ok_or_else(|| Error::invalid_input("missing terms in phrase query"))?
64 .to_string();
65 let slop = value["slop"].as_u64().map(|v| v as u32).unwrap_or(0);
66
67 Ok(Self {
68 column,
69 terms,
70 slop,
71 })
72 }
73}
74
75impl JsonParser for BoostQuery {
76 fn from_json(value: &Value) -> Result<Self> {
77 let positive = value["positive"]
78 .as_object()
79 .ok_or_else(|| Error::invalid_input("missing positive in boost query"))?;
80 let positive_query = from_json_value(&Value::Object(positive.clone()))?;
81
82 let negative = value["negative"]
83 .as_object()
84 .ok_or_else(|| Error::invalid_input("missing negative in boost query"))?;
85 let negative_query = from_json_value(&Value::Object(negative.clone()))?;
86
87 let negative_boost = value["negative_boost"].as_f64().map(|v| v as f32);
88
89 Ok(Self::new(positive_query, negative_query, negative_boost))
90 }
91}
92
93impl JsonParser for MultiMatchQuery {
94 fn from_json(value: &Value) -> Result<Self> {
95 let query = value["match_queries"]
96 .as_array()
97 .ok_or_else(|| Error::invalid_input("missing match_queries in multi_match query"))?;
98 let query = query
99 .iter()
100 .map(MatchQuery::from_json)
101 .collect::<Result<Vec<MatchQuery>>>()?;
102
103 if query.is_empty() {
104 return Err(Error::invalid_input("empty multi_match query"));
105 }
106
107 Ok(Self {
108 match_queries: query,
109 })
110 }
111}
112
113impl JsonParser for BooleanQuery {
114 fn from_json(value: &Value) -> Result<Self> {
115 let mut clauses = Vec::new();
116
117 if let Some(must) = value["must"].as_array() {
118 for query_val in must {
119 let query = from_json_value(query_val)?;
120 clauses.push((Occur::Must, query));
121 }
122 }
123
124 if let Some(should) = value["should"].as_array() {
125 for query_val in should {
126 let query = from_json_value(query_val)?;
127 clauses.push((Occur::Should, query));
128 }
129 }
130
131 if let Some(must_not) = value["must_not"].as_array() {
132 for query_val in must_not {
133 let query = from_json_value(query_val)?;
134 clauses.push((Occur::MustNot, query));
135 }
136 }
137
138 Ok(Self::new(clauses))
139 }
140}
141
142fn from_json_value(value: &Value) -> Result<FtsQuery> {
143 let value = value
144 .as_object()
145 .ok_or_else(|| Error::invalid_input("value must be a JSON object"))?;
146 if value.len() != 1 {
147 return Err(Error::invalid_input("value must be a single JSON object"));
148 }
149
150 let (query_type, query_val) = value.into_iter().next().unwrap();
151 match query_type.as_str() {
152 "match" => Ok(FtsQuery::Match(MatchQuery::from_json(query_val)?)),
153 "phrase" => Ok(FtsQuery::Phrase(PhraseQuery::from_json(query_val)?)),
154 "boost" => Ok(FtsQuery::Boost(BoostQuery::from_json(query_val)?)),
155 "multi_match" => Ok(FtsQuery::MultiMatch(MultiMatchQuery::from_json(query_val)?)),
156 "boolean" => Ok(FtsQuery::Boolean(BooleanQuery::from_json(query_val)?)),
157 _ => Err(Error::invalid_input(format!(
158 "unknown fts query type: {}",
159 query_type
160 ))),
161 }
162}
163
164pub fn from_json(json: &str) -> Result<FtsQuery> {
165 let value: Value = serde_json::from_str(json)
166 .map_err(|e| Error::invalid_input(format!("invalid json: {}", e)))?;
167 from_json_value(&value)
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 #[test]
175 fn test_from_json_match() {
176 let json = r#"
177 {
178 "match": {
179 "column": "text",
180 "terms": "hello world",
181 "boost": 2.0,
182 "fuzziness": 1,
183 "max_expansions": 10,
184 "operator": "and",
185 "prefix_length": 2
186 }
187 }
188 "#;
189 let fts_query = from_json(json).unwrap();
190 let expected_query = FtsQuery::Match(MatchQuery {
191 column: Some("text".to_string()),
192 terms: "hello world".to_string(),
193 boost: 2.0,
194 fuzziness: Some(1),
195 max_expansions: 10,
196 operator: Operator::And,
197 prefix_length: 2,
198 });
199 assert_eq!(fts_query, expected_query);
200 }
201
202 #[test]
203 fn test_from_json_phrase() {
204 let json = r#"
205 {
206 "phrase": {
207 "column": "text",
208 "terms": "hello world",
209 "slop": 1
210 }
211 }"#;
212 let fts_query = from_json(json).unwrap();
213 let expected_query = FtsQuery::Phrase(PhraseQuery {
214 column: Some("text".to_string()),
215 terms: "hello world".to_string(),
216 slop: 1,
217 });
218 assert_eq!(fts_query, expected_query);
219 }
220
221 #[test]
222 fn test_from_json_boost() {
223 let json = r#"
224 {
225 "boost": {
226 "positive": {
227 "match": {
228 "column": "title",
229 "terms": "hello"
230 }
231 },
232 "negative": {
233 "phrase": {
234 "column": "body",
235 "terms": "world"
236 }
237 },
238 "negative_boost": 0.5
239 }
240 }"#;
241 let fts_query = from_json(json).unwrap();
242 let positive_query = Box::new(FtsQuery::Match(
243 MatchQuery::new("hello".to_string())
244 .with_column(Some("title".to_string()))
245 .with_fuzziness(None),
246 ));
247 let negative_query = Box::new(FtsQuery::Phrase(
248 PhraseQuery::new("world".to_string()).with_column(Some("body".to_string())),
249 ));
250 let expected_query = FtsQuery::Boost(BoostQuery {
251 positive: positive_query,
252 negative: negative_query,
253 negative_boost: 0.5,
254 });
255 assert_eq!(fts_query, expected_query);
256 }
257
258 #[test]
259 fn test_from_json_multi_match() {
260 let json = r#"
261 {
262 "multi_match": {
263 "match_queries": [
264 {
265 "column": "title",
266 "terms": "hello"
267 },
268 {
269 "column": "body",
270 "terms": "world"
271 }
272 ]
273 }
274 }"#;
275 let fts_query = from_json(json).unwrap();
276 let match_queries = vec![
277 MatchQuery::new("hello".to_string())
278 .with_column(Some("title".to_string()))
279 .with_fuzziness(None),
280 MatchQuery::new("world".to_string())
281 .with_column(Some("body".to_string()))
282 .with_fuzziness(None),
283 ];
284 let expected_query = FtsQuery::MultiMatch(MultiMatchQuery { match_queries });
285 assert_eq!(fts_query, expected_query);
286 }
287
288 #[test]
289 fn test_from_json_boolean() {
290 let json = r#"{
291 "boolean": {
292 "must": [
293 {
294 "match": {
295 "column": "text",
296 "terms": "hello"
297 }
298 }
299 ],
300 "should": [
301 {
302 "phrase": {
303 "column": "text",
304 "terms": "world"
305 }
306 }
307 ],
308 "must_not": []
309 }
310 }"#;
311 let fts_query = from_json(json).unwrap();
312 let must_query = FtsQuery::Match(
313 MatchQuery::new("hello".to_string())
314 .with_column(Some("text".to_string()))
315 .with_fuzziness(None),
316 );
317 let should_query = FtsQuery::Phrase(
318 PhraseQuery::new("world".to_string()).with_column(Some("text".to_string())),
319 );
320
321 let expected_query = FtsQuery::Boolean(BooleanQuery::new(vec![
322 (Occur::Must, must_query),
323 (Occur::Should, should_query),
324 ]));
325 assert_eq!(fts_query, expected_query);
326 }
327}