lance_index/scalar/inverted/
parser.rs1use super::query::{
5 BooleanQuery, BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, Occur, Operator, PhraseQuery,
6};
7use lance_core::{Error, Result};
8use serde_json::Value;
9use snafu::location;
10
11pub trait JsonParser {
12 fn from_json(value: &Value) -> Result<Self>
13 where
14 Self: Sized;
15}
16
17impl JsonParser for MatchQuery {
18 fn from_json(value: &Value) -> Result<Self> {
19 let column = value["column"].as_str().map(String::from);
20 let terms = value["terms"]
21 .as_str()
22 .ok_or_else(|| Error::invalid_input("missing terms in match query", location!()))?
23 .to_string();
24 let boost = value["boost"]
25 .as_f64()
26 .map(|v| v as f32)
27 .unwrap_or(Self::default_boost());
28 let fuzziness = match &value["fuzziness"] {
29 Value::Number(num) if num.is_u64() => Some(num.as_u64().unwrap() as u32),
30 Value::String(s) if s.as_str() == "auto" => None,
31 Value::Null => None,
32 _ => Some(0),
33 };
34 let max_expansions = value["max_expansions"]
35 .as_u64()
36 .map(|v| v as usize)
37 .unwrap_or(Self::default_max_expansions());
38 let operator = value["operator"]
39 .as_str()
40 .map(|s| s.try_into().unwrap_or(Operator::default()))
41 .unwrap_or_default();
42 let prefix_length = value["prefix_length"]
43 .as_u64()
44 .map(|v| v as u32)
45 .unwrap_or(0);
46
47 Ok(Self {
48 column,
49 terms,
50 boost,
51 fuzziness,
52 max_expansions,
53 operator,
54 prefix_length,
55 })
56 }
57}
58
59impl JsonParser for PhraseQuery {
60 fn from_json(value: &Value) -> Result<Self> {
61 let column = value["column"].as_str().map(String::from);
62 let terms = value["terms"]
63 .as_str()
64 .ok_or_else(|| Error::invalid_input("missing terms in phrase query", location!()))?
65 .to_string();
66 let slop = value["slop"].as_u64().map(|v| v as u32).unwrap_or(0);
67
68 Ok(Self {
69 column,
70 terms,
71 slop,
72 })
73 }
74}
75
76impl JsonParser for BoostQuery {
77 fn from_json(value: &Value) -> Result<Self> {
78 let positive = value["positive"]
79 .as_object()
80 .ok_or_else(|| Error::invalid_input("missing positive in boost query", location!()))?;
81 let positive_query = from_json_value(&Value::Object(positive.clone()))?;
82
83 let negative = value["negative"]
84 .as_object()
85 .ok_or_else(|| Error::invalid_input("missing negative in boost query", location!()))?;
86 let negative_query = from_json_value(&Value::Object(negative.clone()))?;
87
88 let negative_boost = value["negative_boost"].as_f64().map(|v| v as f32);
89
90 Ok(Self::new(positive_query, negative_query, negative_boost))
91 }
92}
93
94impl JsonParser for MultiMatchQuery {
95 fn from_json(value: &Value) -> Result<Self> {
96 let query = value["match_queries"].as_array().ok_or_else(|| {
97 Error::invalid_input("missing match_queries in multi_match query", location!())
98 })?;
99 let query = query
100 .iter()
101 .map(MatchQuery::from_json)
102 .collect::<Result<Vec<MatchQuery>>>()?;
103
104 if query.is_empty() {
105 return Err(Error::invalid_input("empty multi_match query", location!()));
106 }
107
108 Ok(Self {
109 match_queries: query,
110 })
111 }
112}
113
114impl JsonParser for BooleanQuery {
115 fn from_json(value: &Value) -> Result<Self> {
116 let mut clauses = Vec::new();
117
118 if let Some(must) = value["must"].as_array() {
119 for query_val in must {
120 let query = from_json_value(query_val)?;
121 clauses.push((Occur::Must, query));
122 }
123 }
124
125 if let Some(should) = value["should"].as_array() {
126 for query_val in should {
127 let query = from_json_value(query_val)?;
128 clauses.push((Occur::Should, query));
129 }
130 }
131
132 if let Some(must_not) = value["must_not"].as_array() {
133 for query_val in must_not {
134 let query = from_json_value(query_val)?;
135 clauses.push((Occur::MustNot, query));
136 }
137 }
138
139 Ok(Self::new(clauses))
140 }
141}
142
143fn from_json_value(value: &Value) -> Result<FtsQuery> {
144 let value = value
145 .as_object()
146 .ok_or_else(|| Error::invalid_input("value must be a JSON object", location!()))?;
147 if value.len() != 1 {
148 return Err(Error::invalid_input(
149 "value must be a single JSON object",
150 location!(),
151 ));
152 }
153
154 let (query_type, query_val) = value.into_iter().next().unwrap();
155 match query_type.as_str() {
156 "match" => Ok(FtsQuery::Match(MatchQuery::from_json(query_val)?)),
157 "phrase" => Ok(FtsQuery::Phrase(PhraseQuery::from_json(query_val)?)),
158 "boost" => Ok(FtsQuery::Boost(BoostQuery::from_json(query_val)?)),
159 "multi_match" => Ok(FtsQuery::MultiMatch(MultiMatchQuery::from_json(query_val)?)),
160 "boolean" => Ok(FtsQuery::Boolean(BooleanQuery::from_json(query_val)?)),
161 _ => Err(Error::invalid_input(
162 format!("unknown fts query type: {}", query_type),
163 location!(),
164 )),
165 }
166}
167
168pub fn from_json(json: &str) -> Result<FtsQuery> {
169 let value: Value = serde_json::from_str(json)
170 .map_err(|e| Error::invalid_input(format!("invalid json: {}", e), location!()))?;
171 from_json_value(&value)
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 #[test]
179 fn test_from_json_match() {
180 let json = r#"
181 {
182 "match": {
183 "column": "text",
184 "terms": "hello world",
185 "boost": 2.0,
186 "fuzziness": 1,
187 "max_expansions": 10,
188 "operator": "and",
189 "prefix_length": 2
190 }
191 }
192 "#;
193 let fts_query = from_json(json).unwrap();
194 let expected_query = FtsQuery::Match(MatchQuery {
195 column: Some("text".to_string()),
196 terms: "hello world".to_string(),
197 boost: 2.0,
198 fuzziness: Some(1),
199 max_expansions: 10,
200 operator: Operator::And,
201 prefix_length: 2,
202 });
203 assert_eq!(fts_query, expected_query);
204 }
205
206 #[test]
207 fn test_from_json_phrase() {
208 let json = r#"
209 {
210 "phrase": {
211 "column": "text",
212 "terms": "hello world",
213 "slop": 1
214 }
215 }"#;
216 let fts_query = from_json(json).unwrap();
217 let expected_query = FtsQuery::Phrase(PhraseQuery {
218 column: Some("text".to_string()),
219 terms: "hello world".to_string(),
220 slop: 1,
221 });
222 assert_eq!(fts_query, expected_query);
223 }
224
225 #[test]
226 fn test_from_json_boost() {
227 let json = r#"
228 {
229 "boost": {
230 "positive": {
231 "match": {
232 "column": "title",
233 "terms": "hello"
234 }
235 },
236 "negative": {
237 "phrase": {
238 "column": "body",
239 "terms": "world"
240 }
241 },
242 "negative_boost": 0.5
243 }
244 }"#;
245 let fts_query = from_json(json).unwrap();
246 let positive_query = Box::new(FtsQuery::Match(
247 MatchQuery::new("hello".to_string())
248 .with_column(Some("title".to_string()))
249 .with_fuzziness(None),
250 ));
251 let negative_query = Box::new(FtsQuery::Phrase(
252 PhraseQuery::new("world".to_string()).with_column(Some("body".to_string())),
253 ));
254 let expected_query = FtsQuery::Boost(BoostQuery {
255 positive: positive_query,
256 negative: negative_query,
257 negative_boost: 0.5,
258 });
259 assert_eq!(fts_query, expected_query);
260 }
261
262 #[test]
263 fn test_from_json_multi_match() {
264 let json = r#"
265 {
266 "multi_match": {
267 "match_queries": [
268 {
269 "column": "title",
270 "terms": "hello"
271 },
272 {
273 "column": "body",
274 "terms": "world"
275 }
276 ]
277 }
278 }"#;
279 let fts_query = from_json(json).unwrap();
280 let match_queries = vec![
281 MatchQuery::new("hello".to_string())
282 .with_column(Some("title".to_string()))
283 .with_fuzziness(None),
284 MatchQuery::new("world".to_string())
285 .with_column(Some("body".to_string()))
286 .with_fuzziness(None),
287 ];
288 let expected_query = FtsQuery::MultiMatch(MultiMatchQuery { match_queries });
289 assert_eq!(fts_query, expected_query);
290 }
291
292 #[test]
293 fn test_from_json_boolean() {
294 let json = r#"{
295 "boolean": {
296 "must": [
297 {
298 "match": {
299 "column": "text",
300 "terms": "hello"
301 }
302 }
303 ],
304 "should": [
305 {
306 "phrase": {
307 "column": "text",
308 "terms": "world"
309 }
310 }
311 ],
312 "must_not": []
313 }
314 }"#;
315 let fts_query = from_json(json).unwrap();
316 let must_query = FtsQuery::Match(
317 MatchQuery::new("hello".to_string())
318 .with_column(Some("text".to_string()))
319 .with_fuzziness(None),
320 );
321 let should_query = FtsQuery::Phrase(
322 PhraseQuery::new("world".to_string()).with_column(Some("text".to_string())),
323 );
324
325 let expected_query = FtsQuery::Boolean(BooleanQuery::new(vec![
326 (Occur::Must, must_query),
327 (Occur::Should, should_query),
328 ]));
329 assert_eq!(fts_query, expected_query);
330 }
331}