Skip to main content

savvy/sexp/
string.rs

1use savvy_ffi::{R_NaString, SET_STRING_ELT, SEXP, STRING_ELT, STRSXP};
2
3use super::na::NotAvailableValue;
4use super::utils::{assert_len, charsxp_to_str, str_to_charsxp};
5use super::{impl_common_sexp_ops, impl_common_sexp_ops_owned, Sexp};
6use crate::protect::{self, local_protect};
7
8/// An external SEXP of a character vector.
9pub struct StringSexp(pub SEXP);
10
11/// A newly-created SEXP of a character vector.
12pub struct OwnedStringSexp {
13    inner: SEXP,
14    token: SEXP,
15    len: usize,
16}
17
18// implement inner(), len(), empty(), and name()
19impl_common_sexp_ops!(StringSexp);
20impl_common_sexp_ops_owned!(OwnedStringSexp);
21
22impl StringSexp {
23    /// Returns an iterator over the underlying data of the SEXP.
24    ///
25    /// # Examples
26    ///
27    /// ```
28    /// # let str_sexp = savvy::OwnedStringSexp::try_from_slice(["a", "b", "c"])?.as_read_only();
29    /// // `str_sexp` is c("a", "b", "c")
30    /// let mut iter = str_sexp.iter();
31    /// assert_eq!(iter.next(), Some("a"));
32    /// assert_eq!(iter.collect::<Vec<&str>>(), vec!["b", "c"]);
33    /// ```
34    pub fn iter<'a>(&'a self) -> StringSexpIter<'a> {
35        StringSexpIter {
36            sexp: &self.0,
37            i: 0,
38            len: self.len(),
39        }
40    }
41
42    /// Copies the underlying data of the SEXP into a new `Vec`.
43    ///
44    /// # Examples
45    ///
46    /// ```
47    /// # let str_sexp = savvy::OwnedStringSexp::try_from_slice(["a", "b", "c"])?.as_read_only();
48    /// // `str_sexp` is c("a", "b", "c")
49    /// assert_eq!(str_sexp.to_vec(), vec!["a", "b", "c"]);
50    /// ```
51    pub fn to_vec(&self) -> Vec<&'static str> {
52        self.iter().collect()
53    }
54}
55
56impl OwnedStringSexp {
57    /// Returns the read-only version of the wrapper. This is mainly for testing
58    /// purposes.
59    pub fn as_read_only(&self) -> StringSexp {
60        StringSexp(self.inner)
61    }
62
63    /// Returns an iterator over the underlying data of the SEXP.
64    ///
65    /// # Examples
66    ///
67    /// ```
68    /// use savvy::OwnedStringSexp;
69    ///
70    /// let str_sexp = OwnedStringSexp::try_from_slice(["a", "b", "c"])?;
71    /// let mut iter = str_sexp.iter();
72    /// assert_eq!(iter.next(), Some("a"));
73    /// assert_eq!(iter.collect::<Vec<&str>>(), vec!["b", "c"]);
74    /// ```
75    pub fn iter<'a>(&'a self) -> StringSexpIter<'a> {
76        StringSexpIter {
77            sexp: &self.inner,
78            i: 0,
79            len: self.len,
80        }
81    }
82
83    /// Copies the underlying data of the SEXP into a new `Vec`.
84    pub fn to_vec(&self) -> Vec<&'static str> {
85        self.iter().collect()
86    }
87
88    /// Set the value of the `i`-th element. `i` starts from `0`.
89    ///
90    /// # Examples
91    ///
92    /// ```
93    /// use savvy::OwnedStringSexp;
94    ///
95    /// let mut str_sexp = OwnedStringSexp::new(3)?;
96    /// str_sexp.set_elt(2, "foo")?;
97    /// assert_eq!(str_sexp.to_vec(), &["", "", "foo"]);
98    /// ```
99    pub fn set_elt(&mut self, i: usize, v: &str) -> crate::error::Result<()> {
100        assert_len(self.len, i)?;
101        unsafe { self.set_elt_unchecked(i, str_to_charsxp(v)?) };
102
103        Ok(())
104    }
105
106    // Set the value of the `i`-th element.
107    // Safety: the user has to assure bounds are checked.
108    #[inline]
109    pub(crate) unsafe fn set_elt_unchecked(&mut self, i: usize, v: SEXP) {
110        unsafe { SET_STRING_ELT(self.inner, i as _, v) };
111    }
112
113    /// Set the `i`-th element to NA. `i` starts from `0`.
114    ///
115    /// # Examples
116    ///
117    /// ```
118    /// use savvy::OwnedStringSexp;
119    /// use savvy::NotAvailableValue;
120    ///
121    /// let mut str_sexp = OwnedStringSexp::new(3)?;
122    /// str_sexp.set_na(2)?;
123    /// assert_eq!(str_sexp.to_vec(), vec!["", "", <&str>::na()]);
124    /// ```
125    pub fn set_na(&mut self, i: usize) -> crate::error::Result<()> {
126        assert_len(self.len, i)?;
127
128        unsafe { self.set_elt_unchecked(i, R_NaString) };
129
130        Ok(())
131    }
132
133    /// Constructs a new string vector.
134    ///
135    /// ```
136    /// let x = savvy::OwnedStringSexp::new(3)?;
137    /// assert_eq!(x.to_vec(), vec!["", "", ""]);
138    /// ```
139    pub fn new(len: usize) -> crate::error::Result<Self> {
140        let inner = crate::alloc_vector(STRSXP, len as _)?;
141        Self::new_from_raw_sexp(inner, len)
142    }
143
144    fn new_from_raw_sexp(inner: SEXP, len: usize) -> crate::error::Result<Self> {
145        let token = protect::insert_to_preserved_list(inner);
146
147        // Note: `R_allocVector()` initializes character vectors, so we don't
148        // need to do it by ourselves. R-exts (5.9.2 Allocating storage) says:
149        //
150        // >  One distinction is that whereas the R functions always initialize
151        // >  the elements of the vector, allocVector only does so for lists,
152        // >  expressions and character vectors (the cases where the elements
153        // >  are themselves R objects).
154
155        Ok(Self { inner, token, len })
156    }
157
158    /// Constructs a new real vector from an iterator.
159    ///
160    /// Note that, if you already have a slice or vec, you can also use
161    /// [`try_from_slice`][1].
162    ///
163    /// [1]: `Self::try_from_slice()`
164    ///
165    /// # Examples
166    ///
167    /// ```
168    /// use savvy::OwnedStringSexp;
169    ///
170    /// let iter = ["foo", "❤", "bar"].into_iter().filter(|x| x.is_ascii());
171    /// let str_sexp = OwnedStringSexp::try_from_iter(iter)?;
172    /// assert_eq!(str_sexp.to_vec(), vec!["foo", "bar"]);
173    /// ```
174    pub fn try_from_iter<I, U>(iter: I) -> crate::error::Result<Self>
175    where
176        I: IntoIterator<Item = U>,
177        U: AsRef<str>,
178    {
179        let iter = iter.into_iter();
180
181        match iter.size_hint() {
182            (_, Some(upper)) => {
183                // If the maximum length is known, use it at frist. But, the
184                // iterator's length might be shorter than the reported one
185                // (e.g. `(0..10).filter(|x| x % 2 == 0)`), so it needs to be
186                // truncated to the actual length at last.
187
188                let inner = crate::alloc_vector(STRSXP, upper as _)?;
189                let _inner_guard = local_protect(inner);
190
191                let mut last_index = 0;
192                for (i, v) in iter.enumerate() {
193                    // The upper bound of size_hint() is just for optimization
194                    // and what we should not trust.
195                    assert_len(upper, i)?;
196                    unsafe { SET_STRING_ELT(inner, i as _, str_to_charsxp(v.as_ref())?) };
197
198                    last_index = i;
199                }
200
201                let new_len = last_index + 1;
202                if new_len == upper {
203                    // If the length is the same as expected, use it as it is.
204                    Self::new_from_raw_sexp(inner, upper)
205                } else {
206                    // If the length is shorter than expected, re-allocate a new
207                    // SEXP and copy the values into it.
208                    let mut out = Self::new(new_len)?;
209                    for i in 0..new_len {
210                        unsafe { out.set_elt_unchecked(i, STRING_ELT(inner, i as _)) };
211                    }
212                    Ok(out)
213                }
214            }
215            (_, None) => {
216                // When the length is not known at all, collect() it first.
217
218                let v: Vec<I::Item> = iter.collect();
219                v.try_into()
220            }
221        }
222    }
223
224    /// Constructs a new string vector from a slice or vec.
225    ///
226    /// # Examples
227    ///
228    /// ```
229    /// use savvy::OwnedStringSexp;
230    ///
231    /// let str_sexp = OwnedStringSexp::try_from_slice(["foo", "❤", "bar"])?;
232    /// assert_eq!(str_sexp.to_vec(), vec!["foo", "❤", "bar"]);
233    /// ```
234    pub fn try_from_slice<S, U>(x: S) -> crate::error::Result<Self>
235    where
236        S: AsRef<[U]>,
237        U: AsRef<str>,
238    {
239        let x_slice = x.as_ref();
240        let mut out = Self::new(x_slice.len())?;
241        for (i, v) in x_slice.iter().enumerate() {
242            // Safety: slice and OwnedStringSexp have the same length.
243            unsafe { out.set_elt_unchecked(i, str_to_charsxp(v.as_ref())?) };
244        }
245        Ok(out)
246    }
247
248    /// Constructs a new string vector from a scalar value.
249    ///
250    /// # Examples
251    ///
252    /// ```
253    /// use savvy::OwnedStringSexp;
254    ///
255    /// let str_sexp = OwnedStringSexp::try_from_scalar("❤")?;
256    /// assert_eq!(str_sexp.to_vec(), vec!["❤"]);
257    /// ```
258    pub fn try_from_scalar<T: AsRef<str>>(value: T) -> crate::error::Result<Self> {
259        let sexp = unsafe {
260            // Note: unlike `new()`, this allocates a STRSXP after creating a
261            // CHARSXP. So, the `CHARSXP` needs to be protected.
262            let charsxp = str_to_charsxp(value.as_ref())?;
263            let _charsxp_guard = local_protect(charsxp);
264            crate::unwind_protect(|| savvy_ffi::Rf_ScalarString(charsxp))?
265        };
266        Self::new_from_raw_sexp(sexp, 1)
267    }
268}
269
270impl Drop for OwnedStringSexp {
271    fn drop(&mut self) {
272        protect::release_from_preserved_list(self.token);
273    }
274}
275
276// conversions from/to StringSexp ***************
277
278impl TryFrom<Sexp> for StringSexp {
279    type Error = crate::error::Error;
280
281    fn try_from(value: Sexp) -> crate::error::Result<Self> {
282        value.assert_string()?;
283        Ok(Self(value.0))
284    }
285}
286
287impl From<StringSexp> for Sexp {
288    fn from(value: StringSexp) -> Self {
289        Self(value.inner())
290    }
291}
292
293impl From<StringSexp> for crate::error::Result<Sexp> {
294    fn from(value: StringSexp) -> Self {
295        Ok(<Sexp>::from(value))
296    }
297}
298
299// conversions from/to StringSexp ***************
300
301impl<T> TryFrom<&[T]> for OwnedStringSexp
302where
303    T: AsRef<str>, // This works both for &str and String
304{
305    type Error = crate::error::Error;
306
307    fn try_from(value: &[T]) -> crate::error::Result<Self> {
308        Self::try_from_slice(value)
309    }
310}
311
312impl<T> TryFrom<Vec<T>> for OwnedStringSexp
313where
314    T: AsRef<str>, // This works both for &str and String
315{
316    type Error = crate::error::Error;
317
318    fn try_from(value: Vec<T>) -> crate::error::Result<Self> {
319        Self::try_from_slice(value)
320    }
321}
322
323impl TryFrom<&str> for OwnedStringSexp {
324    type Error = crate::error::Error;
325
326    fn try_from(value: &str) -> crate::error::Result<Self> {
327        Self::try_from_scalar(value)
328    }
329}
330
331impl TryFrom<String> for OwnedStringSexp {
332    type Error = crate::error::Error;
333
334    fn try_from(value: String) -> crate::error::Result<Self> {
335        Self::try_from_scalar(value)
336    }
337}
338
339impl From<OwnedStringSexp> for Sexp {
340    fn from(value: OwnedStringSexp) -> Self {
341        Self(value.inner())
342    }
343}
344
345impl From<OwnedStringSexp> for crate::error::Result<Sexp> {
346    fn from(value: OwnedStringSexp) -> Self {
347        Ok(<Sexp>::from(value))
348    }
349}
350
351macro_rules! impl_try_from_rust_strings {
352    ($ty: ty) => {
353        impl TryFrom<$ty> for Sexp {
354            type Error = crate::error::Error;
355
356            fn try_from(value: $ty) -> crate::error::Result<Self> {
357                <OwnedStringSexp>::try_from(value).map(|x| x.into())
358            }
359        }
360    };
361}
362
363impl_try_from_rust_strings!(&[&str]);
364impl_try_from_rust_strings!(&[String]);
365impl_try_from_rust_strings!(Vec<&str>);
366impl_try_from_rust_strings!(Vec<String>);
367impl_try_from_rust_strings!(&str);
368impl_try_from_rust_strings!(String);
369
370// Iterator for StringSexp ***************
371
372pub struct StringSexpIter<'a> {
373    pub sexp: &'a SEXP,
374    i: usize,
375    len: usize,
376}
377
378impl Iterator for StringSexpIter<'_> {
379    // The lifetime here is 'static, not 'a, in the assumption that strings in
380    // `R_StringHash`, the global `CHARSXP` cache, won't be deleted during the R
381    // session.
382    //
383    // Note that, in order to stick with 'static lifetime, I can't use
384    // `Rf_translateCharUTF8()` here because it doesn't use `R_StringHash` and
385    // allocates the string on R's side, which means it's not guaranteed to stay
386    // during the whole R session.
387    //
388    // cf.)
389    // - https://cran.r-project.org/doc/manuals/r-devel/R-ints.html#The-CHARSXP-cache
390    // - https://github.com/wch/r-source/blob/023ada039c86bf9b65983a71110c586b5994e18d/src/main/sysutils.c#L1284-L1296
391    type Item = &'static str;
392
393    fn next(&mut self) -> Option<Self::Item> {
394        let i = self.i;
395        self.i += 1;
396
397        if i >= self.len {
398            return None;
399        }
400
401        unsafe {
402            let e = STRING_ELT(*self.sexp, i as _);
403
404            // Because `None` means the end of the iterator, we cannot return
405            // `None` even for missing values.
406            if e == savvy_ffi::R_NaString {
407                return Some(Self::Item::na());
408            }
409
410            Some(charsxp_to_str(e))
411        }
412    }
413
414    fn size_hint(&self) -> (usize, Option<usize>) {
415        (self.len, Some(self.len))
416    }
417}
418
419impl ExactSizeIterator for StringSexpIter<'_> {}