Skip to main content

jaq_json/
funs.rs

1use crate::{read, Error, Val, ValR, ValX};
2use alloc::{boxed::Box, vec::Vec};
3use bstr::ByteSlice;
4use bytes::{BufMut, Bytes, BytesMut};
5use core::fmt;
6use jaq_core::box_iter::{then, BoxIter};
7use jaq_core::native::{bome, run, unary, v, Filter, Fun};
8use jaq_core::{DataT, Exn, RunPtr};
9use jaq_std::ValT as _;
10
11impl Val {
12    /// Return 0 for null, the absolute value for numbers, and
13    /// the length for strings, arrays, and objects.
14    ///
15    /// Fail on booleans.
16    fn length(&self) -> ValR {
17        match self {
18            Val::Null => Ok(Val::from(0usize)),
19            Val::Num(n) => Ok(Val::Num(n.length())),
20            Val::TStr(s) => Ok(Val::from(utf8_length(s))),
21            Val::BStr(b) => Ok(Val::from(b.len())),
22            Val::Arr(a) => Ok(Val::from(a.len())),
23            Val::Obj(o) => Ok(Val::from(o.len())),
24            Val::Bool(_) => Err(Error::str(format_args!("{self} has no length"))),
25        }
26    }
27
28    /// Return the indices of `y` in `self`.
29    fn indices<'a>(&'a self, y: &'a Val) -> Result<Box<dyn Iterator<Item = usize> + 'a>, Error> {
30        match (self, y) {
31            (Val::BStr(_), Val::BStr(y)) | (Val::TStr(_), Val::TStr(y)) if y.is_empty() => {
32                Ok(Box::new(core::iter::empty()))
33            }
34            (Val::Arr(_), Val::Arr(y)) if y.is_empty() => Ok(Box::new(core::iter::empty())),
35            (Val::TStr(x), Val::TStr(y)) => {
36                let index = |(i, _, _)| x.get(i..i + y.len());
37                let iw = x.char_indices().map_while(index).enumerate();
38                Ok(Box::new(iw.filter_map(|(i, w)| (w == **y).then_some(i))))
39            }
40            (Val::BStr(x), Val::BStr(y)) => {
41                let iw = x.windows(y.len()).enumerate();
42                Ok(Box::new(iw.filter_map(|(i, w)| (w == **y).then_some(i))))
43            }
44            (Val::Arr(x), Val::Arr(y)) => {
45                let iw = x.windows(y.len()).enumerate();
46                Ok(Box::new(iw.filter_map(|(i, w)| (w == **y).then_some(i))))
47            }
48            (Val::Arr(x), y) => {
49                let ix = x.iter().enumerate();
50                Ok(Box::new(ix.filter_map(move |(i, x)| (x == y).then_some(i))))
51            }
52            (x, y) => Err(Error::index(x.clone(), y.clone())),
53        }
54    }
55
56    /// `a` contains `b` iff either
57    /// * the string `b` is a substring of `a`,
58    /// * every element in the array `b` is contained in some element of the array `a`,
59    /// * for every key-value pair `k, v` in `b`,
60    ///   there is a key-value pair `k, v'` in `a` such that `v'` contains `v`, or
61    /// * `a` equals `b`.
62    fn contains(&self, other: &Self) -> bool {
63        match (self, other) {
64            (Self::BStr(l), Self::BStr(r)) | (Self::TStr(l), Self::TStr(r)) => l.contains_str(&**r),
65            (Self::Arr(l), Self::Arr(r)) => r.iter().all(|r| l.iter().any(|l| l.contains(r))),
66            (Self::Obj(l), Self::Obj(r)) => r
67                .iter()
68                .all(|(k, r)| l.get(k).is_some_and(|l| l.contains(r))),
69            _ => self == other,
70        }
71    }
72
73    fn to_bytes(&self) -> Result<Bytes, Self> {
74        match self {
75            Val::Num(n) => n
76                .as_isize()
77                .and_then(|i| u8::try_from(i).ok())
78                .map(|u| Bytes::from(Vec::from([u])))
79                .ok_or_else(|| self.clone()),
80            Val::BStr(b) | Val::TStr(b) => Ok(*b.clone()),
81            Val::Arr(a) => {
82                let mut buf = BytesMut::new();
83                for x in a.iter() {
84                    buf.put(Val::to_bytes(x)?);
85                }
86                Ok(buf.into())
87            }
88            _ => Err(self.clone()),
89        }
90    }
91
92    fn as_bytes_owned(&self) -> Option<Bytes> {
93        match self {
94            Self::BStr(b) | Self::TStr(b) => Some(*b.clone()),
95            _ => None,
96        }
97    }
98
99    fn as_utf8_bytes_owned(&self) -> Option<Bytes> {
100        self.is_utf8_str().then(|| self.as_bytes_owned()).flatten()
101    }
102
103    /// Return bytes if the value is a (byte or text) string.
104    pub fn try_as_bytes_owned(&self) -> Result<Bytes, Error> {
105        self.as_bytes_owned()
106            .ok_or_else(|| Error::typ(self.clone(), "string"))
107    }
108
109    /// Return bytes if the value is a text string.
110    pub fn try_as_utf8_bytes_owned(&self) -> Result<Bytes, Error> {
111        self.as_utf8_bytes_owned()
112            .ok_or_else(|| Error::typ(self.clone(), "string"))
113    }
114}
115
116/// Box Map, Map Error.
117fn bmme<'a>(iter: BoxIter<'a, ValR>) -> BoxIter<'a, ValX<'a>> {
118    Box::new(iter.map(|r| r.map_err(Exn::from)))
119}
120
121fn parse_fail(i: &impl fmt::Display, fmt: &str, e: impl fmt::Display) -> Error {
122    Error::str(format_args!("cannot parse {i} as {fmt}: {e}"))
123}
124
125self_cell::self_cell!(
126    struct BytesValRs {
127        owner: Bytes,
128
129        #[not_covariant]
130        dependent: ValRs,
131    }
132);
133
134impl Iterator for BytesValRs {
135    type Item = ValR;
136    fn next(&mut self) -> Option<Self::Item> {
137        self.with_dependent_mut(|_owner, iter| iter.next())
138    }
139}
140
141type ValRs<'a> = BoxIter<'a, ValR>;
142
143/// Apply a function to bytes and yield the resulting value results.
144pub fn bytes_valrs(b: Bytes, f: impl FnOnce(&[u8]) -> ValRs) -> ValRs<'static> {
145    Box::new(BytesValRs::new(b, |b| f(b)))
146}
147
148/// Obtain length of string that is assumed to be UTF-8 encoded.
149///
150/// This can be removed once https://github.com/BurntSushi/bstr/pull/223 lands.
151fn utf8_length(mut s: &[u8]) -> usize {
152    let mut count = 0;
153    loop {
154        // ASCII fast path taken if two consecutive ASCII chars found
155        match s {
156            [fst, snd, ..] if *fst <= 0x7F && *snd <= 0x7F => {
157                let size = s.find_non_ascii_byte().unwrap_or(s.len());
158                count += size;
159                s = &s[size..];
160            }
161            _ => (),
162        }
163
164        let (_ch, size) = bstr::decode_utf8(s);
165        if size == 0 {
166            return count;
167        } else {
168            count += 1;
169            s = &s[size..];
170        }
171    }
172}
173
174/// Functions of the standard library.
175pub fn funs<D: for<'a> DataT<V<'a> = Val>>() -> impl Iterator<Item = Fun<D>> {
176    base().into_vec().into_iter().map(run)
177}
178
179fn base<D: for<'a> DataT<V<'a> = Val>>() -> Box<[Filter<RunPtr<D>>]> {
180    Box::new([
181        ("fromjson", v(0), |cv| {
182            bmme(then(cv.1.try_as_utf8_bytes_owned(), |s| {
183                let fail = move |r: Result<_, _>| r.map_err(|e| parse_fail(&cv.1, "JSON", e));
184                bytes_valrs(s, |s| Box::new(read::parse_many(s).map(fail)))
185            }))
186        }),
187        ("tojson", v(0), |cv| bome(Ok(Val::utf8_str(cv.1.to_json())))),
188        ("tobytes", v(0), |cv| {
189            let fail = |v| Error::str(format_args!("cannot convert {v} to bytes"));
190            bome(cv.1.to_bytes().map(Val::byte_str).map_err(fail))
191        }),
192        ("length", v(0), |cv| bome(cv.1.length())),
193        ("contains", v(1), |cv| {
194            unary(cv, |x, y| Ok(Val::from(x.contains(&y))))
195        }),
196        ("has", v(1), |cv| {
197            unary(cv, |v, k| v.index_opt(&k).map(|o| o.is_some().into()))
198        }),
199        ("indices", v(1), |cv| {
200            let to_int = |i: usize| Val::from(i as isize);
201            unary(cv, move |x, v| {
202                x.indices(&v).map(|idxs| idxs.map(to_int).collect())
203            })
204        }),
205        ("bsearch", v(1), |cv| {
206            let to_idx = |r: Result<_, _>| r.map_or_else(|i| -1 - i as isize, |i| i as isize);
207            unary(cv, move |a, x| {
208                a.as_arr().map(|a| Val::from(to_idx(a.binary_search(&x))))
209            })
210        }),
211    ])
212}