Skip to main content

pick/
selector.rs

1use crate::error::PickError;
2use serde_json::Value;
3
4const MAX_EXTRACT_RESULTS: usize = 1_000_000;
5
6#[derive(Debug, Clone, PartialEq)]
7pub struct Selector {
8    pub segments: Vec<Segment>,
9}
10
11#[derive(Debug, Clone, PartialEq)]
12pub struct Segment {
13    pub key: Option<String>,
14    pub indices: Vec<Index>,
15}
16
17#[derive(Debug, Clone, PartialEq)]
18pub enum Index {
19    Number(i64),
20    Wildcard,
21}
22
23impl Selector {
24    pub fn parse(input: &str) -> Result<Self, PickError> {
25        if input.is_empty() {
26            return Ok(Selector { segments: vec![] });
27        }
28
29        let mut segments = Vec::new();
30        let mut remaining = input;
31
32        while !remaining.is_empty() {
33            let (segment, rest) = parse_segment(remaining)?;
34            segments.push(segment);
35            remaining = rest;
36
37            if remaining.starts_with('.') {
38                remaining = &remaining[1..];
39                if remaining.is_empty() {
40                    return Err(PickError::InvalidSelector(
41                        "trailing dot in selector".into(),
42                    ));
43                }
44            }
45        }
46
47        Ok(Selector { segments })
48    }
49}
50
51fn parse_segment(input: &str) -> Result<(Segment, &str), PickError> {
52    let (key, remaining) = parse_key(input)?;
53    let (indices, remaining) = parse_indices(remaining)?;
54
55    if key.is_none() && indices.is_empty() {
56        return Err(PickError::InvalidSelector(format!(
57            "unexpected character: '{}'",
58            input.chars().next().unwrap_or('?')
59        )));
60    }
61
62    Ok((Segment { key, indices }, remaining))
63}
64
65fn parse_key(input: &str) -> Result<(Option<String>, &str), PickError> {
66    if input.is_empty() {
67        return Ok((None, input));
68    }
69
70    let first = input.as_bytes()[0];
71
72    if first == b'"' {
73        // Quoted key with escape support
74        let rest = &input[1..];
75        let mut key = String::new();
76        let mut chars = rest.chars();
77        let mut consumed = 0;
78        loop {
79            match chars.next() {
80                None => return Err(PickError::InvalidSelector("unterminated quoted key".into())),
81                Some('"') => {
82                    consumed += 1;
83                    break;
84                }
85                Some('\\') => {
86                    consumed += 1;
87                    match chars.next() {
88                        Some('"') => {
89                            key.push('"');
90                            consumed += 1;
91                        }
92                        Some('\\') => {
93                            key.push('\\');
94                            consumed += 1;
95                        }
96                        Some(c) => {
97                            key.push('\\');
98                            key.push(c);
99                            consumed += c.len_utf8();
100                        }
101                        None => {
102                            return Err(PickError::InvalidSelector(
103                                "unterminated quoted key".into(),
104                            ));
105                        }
106                    }
107                }
108                Some(c) => {
109                    key.push(c);
110                    consumed += c.len_utf8();
111                }
112            }
113        }
114        Ok((Some(key), &rest[consumed..]))
115    } else if first == b'[' {
116        // No key, just indices
117        Ok((None, input))
118    } else if first.is_ascii_alphanumeric() || first == b'_' {
119        // Bare key: alphanumeric, underscore, hyphen
120        let end = input
121            .find(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-')
122            .unwrap_or(input.len());
123        let key = &input[..end];
124        Ok((Some(key.to_string()), &input[end..]))
125    } else {
126        Err(PickError::InvalidSelector(format!(
127            "unexpected character: '{}'",
128            first as char
129        )))
130    }
131}
132
133fn parse_indices(input: &str) -> Result<(Vec<Index>, &str), PickError> {
134    let mut indices = Vec::new();
135    let mut remaining = input;
136
137    while remaining.starts_with('[') {
138        remaining = &remaining[1..]; // consume [
139
140        if remaining.starts_with('*') {
141            indices.push(Index::Wildcard);
142            remaining = &remaining[1..]; // consume *
143        } else {
144            // Parse integer
145            let end = remaining
146                .find(']')
147                .ok_or_else(|| PickError::InvalidSelector("unterminated index bracket".into()))?;
148            let num_str = &remaining[..end];
149            let n: i64 = num_str
150                .parse()
151                .map_err(|_| PickError::InvalidSelector(format!("invalid index: '{num_str}'")))?;
152            indices.push(Index::Number(n));
153            remaining = &remaining[end..];
154        }
155
156        if !remaining.starts_with(']') {
157            return Err(PickError::InvalidSelector("expected ']'".into()));
158        }
159        remaining = &remaining[1..]; // consume ]
160    }
161
162    Ok((indices, remaining))
163}
164
165fn value_type_name(v: &Value) -> &'static str {
166    match v {
167        Value::Null => "null",
168        Value::Bool(_) => "boolean",
169        Value::Number(_) => "number",
170        Value::String(_) => "string",
171        Value::Array(_) => "array",
172        Value::Object(_) => "object",
173    }
174}
175
176pub fn extract(value: &Value, selector: &Selector) -> Result<Vec<Value>, PickError> {
177    if selector.segments.is_empty() {
178        return Ok(vec![value.clone()]);
179    }
180
181    let mut current = vec![value.clone()];
182
183    for segment in &selector.segments {
184        let mut next = Vec::new();
185
186        for val in &current {
187            // Apply key if present
188            let keyed = if let Some(ref key) = segment.key {
189                match val {
190                    Value::Object(map) => match map.get(key) {
191                        Some(v) => vec![v.clone()],
192                        None => return Err(PickError::KeyNotFound(key.clone())),
193                    },
194                    other => {
195                        return Err(PickError::NotAnObject(
196                            key.clone(),
197                            value_type_name(other).into(),
198                        ));
199                    }
200                }
201            } else {
202                vec![val.clone()]
203            };
204
205            // Apply indices sequentially
206            let mut indexed = keyed;
207            for index in &segment.indices {
208                let mut next_indexed = Vec::new();
209                for v in &indexed {
210                    match index {
211                        Index::Number(n) => match v {
212                            Value::Array(arr) => {
213                                let i = if *n < 0 {
214                                    let len = i64::try_from(arr.len())
215                                        .map_err(|_| PickError::IndexOutOfBounds(*n))?;
216                                    if n.unsigned_abs() > len as u64 {
217                                        return Err(PickError::IndexOutOfBounds(*n));
218                                    }
219                                    (len + n) as usize
220                                } else {
221                                    *n as usize
222                                };
223                                match arr.get(i) {
224                                    Some(elem) => next_indexed.push(elem.clone()),
225                                    None => return Err(PickError::IndexOutOfBounds(*n)),
226                                }
227                            }
228                            other => {
229                                return Err(PickError::NotAnArray(value_type_name(other).into()));
230                            }
231                        },
232                        Index::Wildcard => match v {
233                            Value::Array(arr) => {
234                                next_indexed.extend(arr.iter().cloned());
235                            }
236                            other => {
237                                return Err(PickError::NotAnArray(value_type_name(other).into()));
238                            }
239                        },
240                    }
241                }
242                indexed = next_indexed;
243            }
244
245            next.extend(indexed);
246            if next.len() > MAX_EXTRACT_RESULTS {
247                return Err(PickError::TooManyResults(MAX_EXTRACT_RESULTS));
248            }
249        }
250
251        current = next;
252    }
253
254    Ok(current)
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260    use serde_json::json;
261
262    // --- Selector Parsing Tests ---
263
264    #[test]
265    fn parse_empty_selector() {
266        let sel = Selector::parse("").unwrap();
267        assert!(sel.segments.is_empty());
268    }
269
270    #[test]
271    fn parse_simple_key() {
272        let sel = Selector::parse("foo").unwrap();
273        assert_eq!(sel.segments.len(), 1);
274        assert_eq!(sel.segments[0].key, Some("foo".into()));
275        assert!(sel.segments[0].indices.is_empty());
276    }
277
278    #[test]
279    fn parse_nested_keys() {
280        let sel = Selector::parse("foo.bar.baz").unwrap();
281        assert_eq!(sel.segments.len(), 3);
282        assert_eq!(sel.segments[0].key, Some("foo".into()));
283        assert_eq!(sel.segments[1].key, Some("bar".into()));
284        assert_eq!(sel.segments[2].key, Some("baz".into()));
285    }
286
287    #[test]
288    fn parse_array_index() {
289        let sel = Selector::parse("items[0]").unwrap();
290        assert_eq!(sel.segments.len(), 1);
291        assert_eq!(sel.segments[0].key, Some("items".into()));
292        assert_eq!(sel.segments[0].indices, vec![Index::Number(0)]);
293    }
294
295    #[test]
296    fn parse_nested_with_index() {
297        let sel = Selector::parse("foo.bar[0].baz").unwrap();
298        assert_eq!(sel.segments.len(), 3);
299        assert_eq!(sel.segments[1].key, Some("bar".into()));
300        assert_eq!(sel.segments[1].indices, vec![Index::Number(0)]);
301    }
302
303    #[test]
304    fn parse_wildcard() {
305        let sel = Selector::parse("items[*]").unwrap();
306        assert_eq!(sel.segments[0].indices, vec![Index::Wildcard]);
307    }
308
309    #[test]
310    fn parse_multiple_indices() {
311        let sel = Selector::parse("matrix[0][1]").unwrap();
312        assert_eq!(
313            sel.segments[0].indices,
314            vec![Index::Number(0), Index::Number(1)]
315        );
316    }
317
318    #[test]
319    fn parse_negative_index() {
320        let sel = Selector::parse("items[-1]").unwrap();
321        assert_eq!(sel.segments[0].indices, vec![Index::Number(-1)]);
322    }
323
324    #[test]
325    fn parse_quoted_key() {
326        let sel = Selector::parse("\"foo.bar\".baz").unwrap();
327        assert_eq!(sel.segments.len(), 2);
328        assert_eq!(sel.segments[0].key, Some("foo.bar".into()));
329        assert_eq!(sel.segments[1].key, Some("baz".into()));
330    }
331
332    #[test]
333    fn parse_key_with_hyphens() {
334        let sel = Selector::parse("content-type").unwrap();
335        assert_eq!(sel.segments[0].key, Some("content-type".into()));
336    }
337
338    #[test]
339    fn parse_key_with_numbers() {
340        let sel = Selector::parse("item1.value2").unwrap();
341        assert_eq!(sel.segments[0].key, Some("item1".into()));
342        assert_eq!(sel.segments[1].key, Some("value2".into()));
343    }
344
345    #[test]
346    fn parse_leading_index() {
347        let sel = Selector::parse("[0].name").unwrap();
348        assert_eq!(sel.segments.len(), 2);
349        assert_eq!(sel.segments[0].key, None);
350        assert_eq!(sel.segments[0].indices, vec![Index::Number(0)]);
351        assert_eq!(sel.segments[1].key, Some("name".into()));
352    }
353
354    #[test]
355    fn parse_only_index() {
356        let sel = Selector::parse("[0]").unwrap();
357        assert_eq!(sel.segments.len(), 1);
358        assert_eq!(sel.segments[0].key, None);
359        assert_eq!(sel.segments[0].indices, vec![Index::Number(0)]);
360    }
361
362    #[test]
363    fn parse_only_wildcard() {
364        let sel = Selector::parse("[*]").unwrap();
365        assert_eq!(sel.segments.len(), 1);
366        assert_eq!(sel.segments[0].indices, vec![Index::Wildcard]);
367    }
368
369    #[test]
370    fn parse_trailing_dot_error() {
371        assert!(Selector::parse("foo.").is_err());
372    }
373
374    #[test]
375    fn parse_double_dot_error() {
376        assert!(Selector::parse("foo..bar").is_err());
377    }
378
379    #[test]
380    fn parse_unterminated_bracket_error() {
381        assert!(Selector::parse("foo[0").is_err());
382    }
383
384    #[test]
385    fn parse_empty_bracket_error() {
386        assert!(Selector::parse("foo[]").is_err());
387    }
388
389    #[test]
390    fn parse_invalid_index_error() {
391        assert!(Selector::parse("foo[abc]").is_err());
392    }
393
394    #[test]
395    fn parse_unterminated_quote_error() {
396        assert!(Selector::parse("\"foo").is_err());
397    }
398
399    #[test]
400    fn parse_wildcard_then_index() {
401        let sel = Selector::parse("[*][0]").unwrap();
402        assert_eq!(
403            sel.segments[0].indices,
404            vec![Index::Wildcard, Index::Number(0)]
405        );
406    }
407
408    // --- Extraction Tests ---
409
410    #[test]
411    fn extract_empty_selector() {
412        let val = json!({"a": 1});
413        let sel = Selector::parse("").unwrap();
414        let result = extract(&val, &sel).unwrap();
415        assert_eq!(result, vec![json!({"a": 1})]);
416    }
417
418    #[test]
419    fn extract_simple_key() {
420        let val = json!({"name": "Alice"});
421        let sel = Selector::parse("name").unwrap();
422        let result = extract(&val, &sel).unwrap();
423        assert_eq!(result, vec![json!("Alice")]);
424    }
425
426    #[test]
427    fn extract_nested_key() {
428        let val = json!({"foo": {"bar": 42}});
429        let sel = Selector::parse("foo.bar").unwrap();
430        let result = extract(&val, &sel).unwrap();
431        assert_eq!(result, vec![json!(42)]);
432    }
433
434    #[test]
435    fn extract_array_index() {
436        let val = json!({"items": [10, 20, 30]});
437        let sel = Selector::parse("items[1]").unwrap();
438        let result = extract(&val, &sel).unwrap();
439        assert_eq!(result, vec![json!(20)]);
440    }
441
442    #[test]
443    fn extract_negative_index() {
444        let val = json!({"items": [10, 20, 30]});
445        let sel = Selector::parse("items[-1]").unwrap();
446        let result = extract(&val, &sel).unwrap();
447        assert_eq!(result, vec![json!(30)]);
448    }
449
450    #[test]
451    fn extract_negative_index_first() {
452        let val = json!({"items": [10, 20, 30]});
453        let sel = Selector::parse("items[-3]").unwrap();
454        let result = extract(&val, &sel).unwrap();
455        assert_eq!(result, vec![json!(10)]);
456    }
457
458    #[test]
459    fn extract_wildcard() {
460        let val = json!({"items": [{"name": "a"}, {"name": "b"}]});
461        let sel = Selector::parse("items[*].name").unwrap();
462        let result = extract(&val, &sel).unwrap();
463        assert_eq!(result, vec![json!("a"), json!("b")]);
464    }
465
466    #[test]
467    fn extract_chained_indices() {
468        let val = json!({"matrix": [[1, 2], [3, 4]]});
469        let sel = Selector::parse("matrix[0][1]").unwrap();
470        let result = extract(&val, &sel).unwrap();
471        assert_eq!(result, vec![json!(2)]);
472    }
473
474    #[test]
475    fn extract_leading_index() {
476        let val = json!([{"name": "first"}, {"name": "second"}]);
477        let sel = Selector::parse("[0].name").unwrap();
478        let result = extract(&val, &sel).unwrap();
479        assert_eq!(result, vec![json!("first")]);
480    }
481
482    #[test]
483    fn extract_key_not_found() {
484        let val = json!({"a": 1});
485        let sel = Selector::parse("b").unwrap();
486        assert!(extract(&val, &sel).is_err());
487    }
488
489    #[test]
490    fn extract_index_out_of_bounds() {
491        let val = json!({"items": [1, 2]});
492        let sel = Selector::parse("items[5]").unwrap();
493        assert!(extract(&val, &sel).is_err());
494    }
495
496    #[test]
497    fn extract_negative_index_out_of_bounds() {
498        let val = json!({"items": [1, 2]});
499        let sel = Selector::parse("items[-5]").unwrap();
500        assert!(extract(&val, &sel).is_err());
501    }
502
503    #[test]
504    fn extract_not_an_object() {
505        let val = json!("hello");
506        let sel = Selector::parse("foo").unwrap();
507        assert!(extract(&val, &sel).is_err());
508    }
509
510    #[test]
511    fn extract_not_an_array() {
512        let val = json!({"foo": "bar"});
513        let sel = Selector::parse("foo[0]").unwrap();
514        assert!(extract(&val, &sel).is_err());
515    }
516
517    #[test]
518    fn extract_wildcard_on_non_array() {
519        let val = json!({"foo": "bar"});
520        let sel = Selector::parse("foo[*]").unwrap();
521        assert!(extract(&val, &sel).is_err());
522    }
523
524    #[test]
525    fn extract_null_value() {
526        let val = json!({"foo": null});
527        let sel = Selector::parse("foo").unwrap();
528        let result = extract(&val, &sel).unwrap();
529        assert_eq!(result, vec![Value::Null]);
530    }
531
532    #[test]
533    fn extract_boolean() {
534        let val = json!({"active": true});
535        let sel = Selector::parse("active").unwrap();
536        let result = extract(&val, &sel).unwrap();
537        assert_eq!(result, vec![json!(true)]);
538    }
539
540    #[test]
541    fn extract_nested_array_wildcard() {
542        let val = json!([{"items": [1, 2]}, {"items": [3, 4]}]);
543        let sel = Selector::parse("[*].items[0]").unwrap();
544        let result = extract(&val, &sel).unwrap();
545        assert_eq!(result, vec![json!(1), json!(3)]);
546    }
547
548    #[test]
549    fn extract_deep_nesting() {
550        let val = json!({"a": {"b": {"c": {"d": 99}}}});
551        let sel = Selector::parse("a.b.c.d").unwrap();
552        let result = extract(&val, &sel).unwrap();
553        assert_eq!(result, vec![json!(99)]);
554    }
555
556    #[test]
557    fn extract_key_on_null() {
558        let val = json!({"a": null});
559        let sel = Selector::parse("a.b").unwrap();
560        assert!(extract(&val, &sel).is_err());
561    }
562
563    #[test]
564    fn extract_quoted_key_with_dot() {
565        let val = json!({"foo.bar": {"baz": 1}});
566        let sel = Selector::parse("\"foo.bar\".baz").unwrap();
567        let result = extract(&val, &sel).unwrap();
568        assert_eq!(result, vec![json!(1)]);
569    }
570
571    #[test]
572    fn extract_hyphenated_key() {
573        let val = json!({"content-type": "text/html"});
574        let sel = Selector::parse("content-type").unwrap();
575        let result = extract(&val, &sel).unwrap();
576        assert_eq!(result, vec![json!("text/html")]);
577    }
578
579    #[test]
580    fn extract_empty_array_wildcard() {
581        let val = json!({"items": []});
582        let sel = Selector::parse("items[*]").unwrap();
583        let result = extract(&val, &sel).unwrap();
584        assert!(result.is_empty());
585    }
586}