1use crate::error::PickError;
2use serde_json::Value;
3
4const MAX_EXTRACT_RESULTS: usize = 1_000_000;
5
6#[derive(Debug, Clone, PartialEq)]
7pub struct Selector {
8 pub segments: Vec<Segment>,
9}
10
11#[derive(Debug, Clone, PartialEq)]
12pub struct Segment {
13 pub key: Option<String>,
14 pub indices: Vec<Index>,
15}
16
17#[derive(Debug, Clone, PartialEq)]
18pub enum Index {
19 Number(i64),
20 Wildcard,
21}
22
23impl Selector {
24 pub fn parse(input: &str) -> Result<Self, PickError> {
25 if input.is_empty() {
26 return Ok(Selector { segments: vec![] });
27 }
28
29 let mut segments = Vec::new();
30 let mut remaining = input;
31
32 while !remaining.is_empty() {
33 let (segment, rest) = parse_segment(remaining)?;
34 segments.push(segment);
35 remaining = rest;
36
37 if remaining.starts_with('.') {
38 remaining = &remaining[1..];
39 if remaining.is_empty() {
40 return Err(PickError::InvalidSelector(
41 "trailing dot in selector".into(),
42 ));
43 }
44 }
45 }
46
47 Ok(Selector { segments })
48 }
49}
50
51fn parse_segment(input: &str) -> Result<(Segment, &str), PickError> {
52 let (key, remaining) = parse_key(input)?;
53 let (indices, remaining) = parse_indices(remaining)?;
54
55 if key.is_none() && indices.is_empty() {
56 return Err(PickError::InvalidSelector(format!(
57 "unexpected character: '{}'",
58 input.chars().next().unwrap_or('?')
59 )));
60 }
61
62 Ok((Segment { key, indices }, remaining))
63}
64
65fn parse_key(input: &str) -> Result<(Option<String>, &str), PickError> {
66 if input.is_empty() {
67 return Ok((None, input));
68 }
69
70 let first = input.as_bytes()[0];
71
72 if first == b'"' {
73 let rest = &input[1..];
75 let mut key = String::new();
76 let mut chars = rest.chars();
77 let mut consumed = 0;
78 loop {
79 match chars.next() {
80 None => return Err(PickError::InvalidSelector("unterminated quoted key".into())),
81 Some('"') => {
82 consumed += 1;
83 break;
84 }
85 Some('\\') => {
86 consumed += 1;
87 match chars.next() {
88 Some('"') => {
89 key.push('"');
90 consumed += 1;
91 }
92 Some('\\') => {
93 key.push('\\');
94 consumed += 1;
95 }
96 Some(c) => {
97 key.push('\\');
98 key.push(c);
99 consumed += c.len_utf8();
100 }
101 None => {
102 return Err(PickError::InvalidSelector(
103 "unterminated quoted key".into(),
104 ));
105 }
106 }
107 }
108 Some(c) => {
109 key.push(c);
110 consumed += c.len_utf8();
111 }
112 }
113 }
114 Ok((Some(key), &rest[consumed..]))
115 } else if first == b'[' {
116 Ok((None, input))
118 } else if first.is_ascii_alphanumeric() || first == b'_' {
119 let end = input
121 .find(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-')
122 .unwrap_or(input.len());
123 let key = &input[..end];
124 Ok((Some(key.to_string()), &input[end..]))
125 } else {
126 Err(PickError::InvalidSelector(format!(
127 "unexpected character: '{}'",
128 first as char
129 )))
130 }
131}
132
133fn parse_indices(input: &str) -> Result<(Vec<Index>, &str), PickError> {
134 let mut indices = Vec::new();
135 let mut remaining = input;
136
137 while remaining.starts_with('[') {
138 remaining = &remaining[1..]; if remaining.starts_with('*') {
141 indices.push(Index::Wildcard);
142 remaining = &remaining[1..]; } else {
144 let end = remaining
146 .find(']')
147 .ok_or_else(|| PickError::InvalidSelector("unterminated index bracket".into()))?;
148 let num_str = &remaining[..end];
149 let n: i64 = num_str
150 .parse()
151 .map_err(|_| PickError::InvalidSelector(format!("invalid index: '{num_str}'")))?;
152 indices.push(Index::Number(n));
153 remaining = &remaining[end..];
154 }
155
156 if !remaining.starts_with(']') {
157 return Err(PickError::InvalidSelector("expected ']'".into()));
158 }
159 remaining = &remaining[1..]; }
161
162 Ok((indices, remaining))
163}
164
165fn value_type_name(v: &Value) -> &'static str {
166 match v {
167 Value::Null => "null",
168 Value::Bool(_) => "boolean",
169 Value::Number(_) => "number",
170 Value::String(_) => "string",
171 Value::Array(_) => "array",
172 Value::Object(_) => "object",
173 }
174}
175
176pub fn extract(value: &Value, selector: &Selector) -> Result<Vec<Value>, PickError> {
177 if selector.segments.is_empty() {
178 return Ok(vec![value.clone()]);
179 }
180
181 let mut current = vec![value.clone()];
182
183 for segment in &selector.segments {
184 let mut next = Vec::new();
185
186 for val in ¤t {
187 let keyed = if let Some(ref key) = segment.key {
189 match val {
190 Value::Object(map) => match map.get(key) {
191 Some(v) => vec![v.clone()],
192 None => return Err(PickError::KeyNotFound(key.clone())),
193 },
194 other => {
195 return Err(PickError::NotAnObject(
196 key.clone(),
197 value_type_name(other).into(),
198 ));
199 }
200 }
201 } else {
202 vec![val.clone()]
203 };
204
205 let mut indexed = keyed;
207 for index in &segment.indices {
208 let mut next_indexed = Vec::new();
209 for v in &indexed {
210 match index {
211 Index::Number(n) => match v {
212 Value::Array(arr) => {
213 let i = if *n < 0 {
214 let len = i64::try_from(arr.len())
215 .map_err(|_| PickError::IndexOutOfBounds(*n))?;
216 if n.unsigned_abs() > len as u64 {
217 return Err(PickError::IndexOutOfBounds(*n));
218 }
219 (len + n) as usize
220 } else {
221 *n as usize
222 };
223 match arr.get(i) {
224 Some(elem) => next_indexed.push(elem.clone()),
225 None => return Err(PickError::IndexOutOfBounds(*n)),
226 }
227 }
228 other => {
229 return Err(PickError::NotAnArray(value_type_name(other).into()));
230 }
231 },
232 Index::Wildcard => match v {
233 Value::Array(arr) => {
234 next_indexed.extend(arr.iter().cloned());
235 }
236 other => {
237 return Err(PickError::NotAnArray(value_type_name(other).into()));
238 }
239 },
240 }
241 }
242 indexed = next_indexed;
243 }
244
245 next.extend(indexed);
246 if next.len() > MAX_EXTRACT_RESULTS {
247 return Err(PickError::TooManyResults(MAX_EXTRACT_RESULTS));
248 }
249 }
250
251 current = next;
252 }
253
254 Ok(current)
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260 use serde_json::json;
261
262 #[test]
265 fn parse_empty_selector() {
266 let sel = Selector::parse("").unwrap();
267 assert!(sel.segments.is_empty());
268 }
269
270 #[test]
271 fn parse_simple_key() {
272 let sel = Selector::parse("foo").unwrap();
273 assert_eq!(sel.segments.len(), 1);
274 assert_eq!(sel.segments[0].key, Some("foo".into()));
275 assert!(sel.segments[0].indices.is_empty());
276 }
277
278 #[test]
279 fn parse_nested_keys() {
280 let sel = Selector::parse("foo.bar.baz").unwrap();
281 assert_eq!(sel.segments.len(), 3);
282 assert_eq!(sel.segments[0].key, Some("foo".into()));
283 assert_eq!(sel.segments[1].key, Some("bar".into()));
284 assert_eq!(sel.segments[2].key, Some("baz".into()));
285 }
286
287 #[test]
288 fn parse_array_index() {
289 let sel = Selector::parse("items[0]").unwrap();
290 assert_eq!(sel.segments.len(), 1);
291 assert_eq!(sel.segments[0].key, Some("items".into()));
292 assert_eq!(sel.segments[0].indices, vec![Index::Number(0)]);
293 }
294
295 #[test]
296 fn parse_nested_with_index() {
297 let sel = Selector::parse("foo.bar[0].baz").unwrap();
298 assert_eq!(sel.segments.len(), 3);
299 assert_eq!(sel.segments[1].key, Some("bar".into()));
300 assert_eq!(sel.segments[1].indices, vec![Index::Number(0)]);
301 }
302
303 #[test]
304 fn parse_wildcard() {
305 let sel = Selector::parse("items[*]").unwrap();
306 assert_eq!(sel.segments[0].indices, vec![Index::Wildcard]);
307 }
308
309 #[test]
310 fn parse_multiple_indices() {
311 let sel = Selector::parse("matrix[0][1]").unwrap();
312 assert_eq!(
313 sel.segments[0].indices,
314 vec![Index::Number(0), Index::Number(1)]
315 );
316 }
317
318 #[test]
319 fn parse_negative_index() {
320 let sel = Selector::parse("items[-1]").unwrap();
321 assert_eq!(sel.segments[0].indices, vec![Index::Number(-1)]);
322 }
323
324 #[test]
325 fn parse_quoted_key() {
326 let sel = Selector::parse("\"foo.bar\".baz").unwrap();
327 assert_eq!(sel.segments.len(), 2);
328 assert_eq!(sel.segments[0].key, Some("foo.bar".into()));
329 assert_eq!(sel.segments[1].key, Some("baz".into()));
330 }
331
332 #[test]
333 fn parse_key_with_hyphens() {
334 let sel = Selector::parse("content-type").unwrap();
335 assert_eq!(sel.segments[0].key, Some("content-type".into()));
336 }
337
338 #[test]
339 fn parse_key_with_numbers() {
340 let sel = Selector::parse("item1.value2").unwrap();
341 assert_eq!(sel.segments[0].key, Some("item1".into()));
342 assert_eq!(sel.segments[1].key, Some("value2".into()));
343 }
344
345 #[test]
346 fn parse_leading_index() {
347 let sel = Selector::parse("[0].name").unwrap();
348 assert_eq!(sel.segments.len(), 2);
349 assert_eq!(sel.segments[0].key, None);
350 assert_eq!(sel.segments[0].indices, vec![Index::Number(0)]);
351 assert_eq!(sel.segments[1].key, Some("name".into()));
352 }
353
354 #[test]
355 fn parse_only_index() {
356 let sel = Selector::parse("[0]").unwrap();
357 assert_eq!(sel.segments.len(), 1);
358 assert_eq!(sel.segments[0].key, None);
359 assert_eq!(sel.segments[0].indices, vec![Index::Number(0)]);
360 }
361
362 #[test]
363 fn parse_only_wildcard() {
364 let sel = Selector::parse("[*]").unwrap();
365 assert_eq!(sel.segments.len(), 1);
366 assert_eq!(sel.segments[0].indices, vec![Index::Wildcard]);
367 }
368
369 #[test]
370 fn parse_trailing_dot_error() {
371 assert!(Selector::parse("foo.").is_err());
372 }
373
374 #[test]
375 fn parse_double_dot_error() {
376 assert!(Selector::parse("foo..bar").is_err());
377 }
378
379 #[test]
380 fn parse_unterminated_bracket_error() {
381 assert!(Selector::parse("foo[0").is_err());
382 }
383
384 #[test]
385 fn parse_empty_bracket_error() {
386 assert!(Selector::parse("foo[]").is_err());
387 }
388
389 #[test]
390 fn parse_invalid_index_error() {
391 assert!(Selector::parse("foo[abc]").is_err());
392 }
393
394 #[test]
395 fn parse_unterminated_quote_error() {
396 assert!(Selector::parse("\"foo").is_err());
397 }
398
399 #[test]
400 fn parse_wildcard_then_index() {
401 let sel = Selector::parse("[*][0]").unwrap();
402 assert_eq!(
403 sel.segments[0].indices,
404 vec![Index::Wildcard, Index::Number(0)]
405 );
406 }
407
408 #[test]
411 fn extract_empty_selector() {
412 let val = json!({"a": 1});
413 let sel = Selector::parse("").unwrap();
414 let result = extract(&val, &sel).unwrap();
415 assert_eq!(result, vec![json!({"a": 1})]);
416 }
417
418 #[test]
419 fn extract_simple_key() {
420 let val = json!({"name": "Alice"});
421 let sel = Selector::parse("name").unwrap();
422 let result = extract(&val, &sel).unwrap();
423 assert_eq!(result, vec![json!("Alice")]);
424 }
425
426 #[test]
427 fn extract_nested_key() {
428 let val = json!({"foo": {"bar": 42}});
429 let sel = Selector::parse("foo.bar").unwrap();
430 let result = extract(&val, &sel).unwrap();
431 assert_eq!(result, vec![json!(42)]);
432 }
433
434 #[test]
435 fn extract_array_index() {
436 let val = json!({"items": [10, 20, 30]});
437 let sel = Selector::parse("items[1]").unwrap();
438 let result = extract(&val, &sel).unwrap();
439 assert_eq!(result, vec![json!(20)]);
440 }
441
442 #[test]
443 fn extract_negative_index() {
444 let val = json!({"items": [10, 20, 30]});
445 let sel = Selector::parse("items[-1]").unwrap();
446 let result = extract(&val, &sel).unwrap();
447 assert_eq!(result, vec![json!(30)]);
448 }
449
450 #[test]
451 fn extract_negative_index_first() {
452 let val = json!({"items": [10, 20, 30]});
453 let sel = Selector::parse("items[-3]").unwrap();
454 let result = extract(&val, &sel).unwrap();
455 assert_eq!(result, vec![json!(10)]);
456 }
457
458 #[test]
459 fn extract_wildcard() {
460 let val = json!({"items": [{"name": "a"}, {"name": "b"}]});
461 let sel = Selector::parse("items[*].name").unwrap();
462 let result = extract(&val, &sel).unwrap();
463 assert_eq!(result, vec![json!("a"), json!("b")]);
464 }
465
466 #[test]
467 fn extract_chained_indices() {
468 let val = json!({"matrix": [[1, 2], [3, 4]]});
469 let sel = Selector::parse("matrix[0][1]").unwrap();
470 let result = extract(&val, &sel).unwrap();
471 assert_eq!(result, vec![json!(2)]);
472 }
473
474 #[test]
475 fn extract_leading_index() {
476 let val = json!([{"name": "first"}, {"name": "second"}]);
477 let sel = Selector::parse("[0].name").unwrap();
478 let result = extract(&val, &sel).unwrap();
479 assert_eq!(result, vec![json!("first")]);
480 }
481
482 #[test]
483 fn extract_key_not_found() {
484 let val = json!({"a": 1});
485 let sel = Selector::parse("b").unwrap();
486 assert!(extract(&val, &sel).is_err());
487 }
488
489 #[test]
490 fn extract_index_out_of_bounds() {
491 let val = json!({"items": [1, 2]});
492 let sel = Selector::parse("items[5]").unwrap();
493 assert!(extract(&val, &sel).is_err());
494 }
495
496 #[test]
497 fn extract_negative_index_out_of_bounds() {
498 let val = json!({"items": [1, 2]});
499 let sel = Selector::parse("items[-5]").unwrap();
500 assert!(extract(&val, &sel).is_err());
501 }
502
503 #[test]
504 fn extract_not_an_object() {
505 let val = json!("hello");
506 let sel = Selector::parse("foo").unwrap();
507 assert!(extract(&val, &sel).is_err());
508 }
509
510 #[test]
511 fn extract_not_an_array() {
512 let val = json!({"foo": "bar"});
513 let sel = Selector::parse("foo[0]").unwrap();
514 assert!(extract(&val, &sel).is_err());
515 }
516
517 #[test]
518 fn extract_wildcard_on_non_array() {
519 let val = json!({"foo": "bar"});
520 let sel = Selector::parse("foo[*]").unwrap();
521 assert!(extract(&val, &sel).is_err());
522 }
523
524 #[test]
525 fn extract_null_value() {
526 let val = json!({"foo": null});
527 let sel = Selector::parse("foo").unwrap();
528 let result = extract(&val, &sel).unwrap();
529 assert_eq!(result, vec![Value::Null]);
530 }
531
532 #[test]
533 fn extract_boolean() {
534 let val = json!({"active": true});
535 let sel = Selector::parse("active").unwrap();
536 let result = extract(&val, &sel).unwrap();
537 assert_eq!(result, vec![json!(true)]);
538 }
539
540 #[test]
541 fn extract_nested_array_wildcard() {
542 let val = json!([{"items": [1, 2]}, {"items": [3, 4]}]);
543 let sel = Selector::parse("[*].items[0]").unwrap();
544 let result = extract(&val, &sel).unwrap();
545 assert_eq!(result, vec![json!(1), json!(3)]);
546 }
547
548 #[test]
549 fn extract_deep_nesting() {
550 let val = json!({"a": {"b": {"c": {"d": 99}}}});
551 let sel = Selector::parse("a.b.c.d").unwrap();
552 let result = extract(&val, &sel).unwrap();
553 assert_eq!(result, vec![json!(99)]);
554 }
555
556 #[test]
557 fn extract_key_on_null() {
558 let val = json!({"a": null});
559 let sel = Selector::parse("a.b").unwrap();
560 assert!(extract(&val, &sel).is_err());
561 }
562
563 #[test]
564 fn extract_quoted_key_with_dot() {
565 let val = json!({"foo.bar": {"baz": 1}});
566 let sel = Selector::parse("\"foo.bar\".baz").unwrap();
567 let result = extract(&val, &sel).unwrap();
568 assert_eq!(result, vec![json!(1)]);
569 }
570
571 #[test]
572 fn extract_hyphenated_key() {
573 let val = json!({"content-type": "text/html"});
574 let sel = Selector::parse("content-type").unwrap();
575 let result = extract(&val, &sel).unwrap();
576 assert_eq!(result, vec![json!("text/html")]);
577 }
578
579 #[test]
580 fn extract_empty_array_wildcard() {
581 let val = json!({"items": []});
582 let sel = Selector::parse("items[*]").unwrap();
583 let result = extract(&val, &sel).unwrap();
584 assert!(result.is_empty());
585 }
586}