savvy/sexp/string.rs
1use savvy_ffi::{R_NaString, SET_STRING_ELT, SEXP, STRING_ELT, STRSXP};
2
3use super::na::NotAvailableValue;
4use super::utils::{assert_len, charsxp_to_str, str_to_charsxp};
5use super::{impl_common_sexp_ops, impl_common_sexp_ops_owned, Sexp};
6use crate::protect::{self, local_protect};
7
8/// An external SEXP of a character vector.
9pub struct StringSexp(pub SEXP);
10
11/// A newly-created SEXP of a character vector.
12pub struct OwnedStringSexp {
13 inner: SEXP,
14 token: SEXP,
15 len: usize,
16}
17
18// implement inner(), len(), empty(), and name()
19impl_common_sexp_ops!(StringSexp);
20impl_common_sexp_ops_owned!(OwnedStringSexp);
21
22impl StringSexp {
23 /// Returns an iterator over the underlying data of the SEXP.
24 ///
25 /// # Examples
26 ///
27 /// ```
28 /// # let str_sexp = savvy::OwnedStringSexp::try_from_slice(["a", "b", "c"])?.as_read_only();
29 /// // `str_sexp` is c("a", "b", "c")
30 /// let mut iter = str_sexp.iter();
31 /// assert_eq!(iter.next(), Some("a"));
32 /// assert_eq!(iter.collect::<Vec<&str>>(), vec!["b", "c"]);
33 /// ```
34 pub fn iter<'a>(&'a self) -> StringSexpIter<'a> {
35 StringSexpIter {
36 sexp: &self.0,
37 i: 0,
38 len: self.len(),
39 }
40 }
41
42 /// Copies the underlying data of the SEXP into a new `Vec`.
43 ///
44 /// # Examples
45 ///
46 /// ```
47 /// # let str_sexp = savvy::OwnedStringSexp::try_from_slice(["a", "b", "c"])?.as_read_only();
48 /// // `str_sexp` is c("a", "b", "c")
49 /// assert_eq!(str_sexp.to_vec(), vec!["a", "b", "c"]);
50 /// ```
51 pub fn to_vec(&self) -> Vec<&'static str> {
52 self.iter().collect()
53 }
54}
55
56impl OwnedStringSexp {
57 /// Returns the read-only version of the wrapper. This is mainly for testing
58 /// purposes.
59 pub fn as_read_only(&self) -> StringSexp {
60 StringSexp(self.inner)
61 }
62
63 /// Returns an iterator over the underlying data of the SEXP.
64 ///
65 /// # Examples
66 ///
67 /// ```
68 /// use savvy::OwnedStringSexp;
69 ///
70 /// let str_sexp = OwnedStringSexp::try_from_slice(["a", "b", "c"])?;
71 /// let mut iter = str_sexp.iter();
72 /// assert_eq!(iter.next(), Some("a"));
73 /// assert_eq!(iter.collect::<Vec<&str>>(), vec!["b", "c"]);
74 /// ```
75 pub fn iter<'a>(&'a self) -> StringSexpIter<'a> {
76 StringSexpIter {
77 sexp: &self.inner,
78 i: 0,
79 len: self.len,
80 }
81 }
82
83 /// Copies the underlying data of the SEXP into a new `Vec`.
84 pub fn to_vec(&self) -> Vec<&'static str> {
85 self.iter().collect()
86 }
87
88 /// Set the value of the `i`-th element. `i` starts from `0`.
89 ///
90 /// # Examples
91 ///
92 /// ```
93 /// use savvy::OwnedStringSexp;
94 ///
95 /// let mut str_sexp = OwnedStringSexp::new(3)?;
96 /// str_sexp.set_elt(2, "foo")?;
97 /// assert_eq!(str_sexp.to_vec(), &["", "", "foo"]);
98 /// ```
99 pub fn set_elt(&mut self, i: usize, v: &str) -> crate::error::Result<()> {
100 assert_len(self.len, i)?;
101 unsafe { self.set_elt_unchecked(i, str_to_charsxp(v)?) };
102
103 Ok(())
104 }
105
106 // Set the value of the `i`-th element.
107 // Safety: the user has to assure bounds are checked.
108 #[inline]
109 pub(crate) unsafe fn set_elt_unchecked(&mut self, i: usize, v: SEXP) {
110 unsafe { SET_STRING_ELT(self.inner, i as _, v) };
111 }
112
113 /// Set the `i`-th element to NA. `i` starts from `0`.
114 ///
115 /// # Examples
116 ///
117 /// ```
118 /// use savvy::OwnedStringSexp;
119 /// use savvy::NotAvailableValue;
120 ///
121 /// let mut str_sexp = OwnedStringSexp::new(3)?;
122 /// str_sexp.set_na(2)?;
123 /// assert_eq!(str_sexp.to_vec(), vec!["", "", <&str>::na()]);
124 /// ```
125 pub fn set_na(&mut self, i: usize) -> crate::error::Result<()> {
126 assert_len(self.len, i)?;
127
128 unsafe { self.set_elt_unchecked(i, R_NaString) };
129
130 Ok(())
131 }
132
133 /// Constructs a new string vector.
134 ///
135 /// ```
136 /// let x = savvy::OwnedStringSexp::new(3)?;
137 /// assert_eq!(x.to_vec(), vec!["", "", ""]);
138 /// ```
139 pub fn new(len: usize) -> crate::error::Result<Self> {
140 let inner = crate::alloc_vector(STRSXP, len as _)?;
141 Self::new_from_raw_sexp(inner, len)
142 }
143
144 fn new_from_raw_sexp(inner: SEXP, len: usize) -> crate::error::Result<Self> {
145 let token = protect::insert_to_preserved_list(inner);
146
147 // Note: `R_allocVector()` initializes character vectors, so we don't
148 // need to do it by ourselves. R-exts (5.9.2 Allocating storage) says:
149 //
150 // > One distinction is that whereas the R functions always initialize
151 // > the elements of the vector, allocVector only does so for lists,
152 // > expressions and character vectors (the cases where the elements
153 // > are themselves R objects).
154
155 Ok(Self { inner, token, len })
156 }
157
158 /// Constructs a new real vector from an iterator.
159 ///
160 /// Note that, if you already have a slice or vec, you can also use
161 /// [`try_from_slice`][1].
162 ///
163 /// [1]: `Self::try_from_slice()`
164 ///
165 /// # Examples
166 ///
167 /// ```
168 /// use savvy::OwnedStringSexp;
169 ///
170 /// let iter = ["foo", "❤", "bar"].into_iter().filter(|x| x.is_ascii());
171 /// let str_sexp = OwnedStringSexp::try_from_iter(iter)?;
172 /// assert_eq!(str_sexp.to_vec(), vec!["foo", "bar"]);
173 /// ```
174 pub fn try_from_iter<I, U>(iter: I) -> crate::error::Result<Self>
175 where
176 I: IntoIterator<Item = U>,
177 U: AsRef<str>,
178 {
179 let iter = iter.into_iter();
180
181 match iter.size_hint() {
182 (_, Some(upper)) => {
183 // If the maximum length is known, use it at frist. But, the
184 // iterator's length might be shorter than the reported one
185 // (e.g. `(0..10).filter(|x| x % 2 == 0)`), so it needs to be
186 // truncated to the actual length at last.
187
188 let inner = crate::alloc_vector(STRSXP, upper as _)?;
189 let _inner_guard = local_protect(inner);
190
191 let mut last_index = 0;
192 for (i, v) in iter.enumerate() {
193 // The upper bound of size_hint() is just for optimization
194 // and what we should not trust.
195 assert_len(upper, i)?;
196 unsafe { SET_STRING_ELT(inner, i as _, str_to_charsxp(v.as_ref())?) };
197
198 last_index = i;
199 }
200
201 let new_len = last_index + 1;
202 if new_len == upper {
203 // If the length is the same as expected, use it as it is.
204 Self::new_from_raw_sexp(inner, upper)
205 } else {
206 // If the length is shorter than expected, re-allocate a new
207 // SEXP and copy the values into it.
208 let mut out = Self::new(new_len)?;
209 for i in 0..new_len {
210 unsafe { out.set_elt_unchecked(i, STRING_ELT(inner, i as _)) };
211 }
212 Ok(out)
213 }
214 }
215 (_, None) => {
216 // When the length is not known at all, collect() it first.
217
218 let v: Vec<I::Item> = iter.collect();
219 v.try_into()
220 }
221 }
222 }
223
224 /// Constructs a new string vector from a slice or vec.
225 ///
226 /// # Examples
227 ///
228 /// ```
229 /// use savvy::OwnedStringSexp;
230 ///
231 /// let str_sexp = OwnedStringSexp::try_from_slice(["foo", "❤", "bar"])?;
232 /// assert_eq!(str_sexp.to_vec(), vec!["foo", "❤", "bar"]);
233 /// ```
234 pub fn try_from_slice<S, U>(x: S) -> crate::error::Result<Self>
235 where
236 S: AsRef<[U]>,
237 U: AsRef<str>,
238 {
239 let x_slice = x.as_ref();
240 let mut out = Self::new(x_slice.len())?;
241 for (i, v) in x_slice.iter().enumerate() {
242 // Safety: slice and OwnedStringSexp have the same length.
243 unsafe { out.set_elt_unchecked(i, str_to_charsxp(v.as_ref())?) };
244 }
245 Ok(out)
246 }
247
248 /// Constructs a new string vector from a scalar value.
249 ///
250 /// # Examples
251 ///
252 /// ```
253 /// use savvy::OwnedStringSexp;
254 ///
255 /// let str_sexp = OwnedStringSexp::try_from_scalar("❤")?;
256 /// assert_eq!(str_sexp.to_vec(), vec!["❤"]);
257 /// ```
258 pub fn try_from_scalar<T: AsRef<str>>(value: T) -> crate::error::Result<Self> {
259 let sexp = unsafe {
260 // Note: unlike `new()`, this allocates a STRSXP after creating a
261 // CHARSXP. So, the `CHARSXP` needs to be protected.
262 let charsxp = str_to_charsxp(value.as_ref())?;
263 let _charsxp_guard = local_protect(charsxp);
264 crate::unwind_protect(|| savvy_ffi::Rf_ScalarString(charsxp))?
265 };
266 Self::new_from_raw_sexp(sexp, 1)
267 }
268}
269
270impl Drop for OwnedStringSexp {
271 fn drop(&mut self) {
272 protect::release_from_preserved_list(self.token);
273 }
274}
275
276// conversions from/to StringSexp ***************
277
278impl TryFrom<Sexp> for StringSexp {
279 type Error = crate::error::Error;
280
281 fn try_from(value: Sexp) -> crate::error::Result<Self> {
282 value.assert_string()?;
283 Ok(Self(value.0))
284 }
285}
286
287impl From<StringSexp> for Sexp {
288 fn from(value: StringSexp) -> Self {
289 Self(value.inner())
290 }
291}
292
293impl From<StringSexp> for crate::error::Result<Sexp> {
294 fn from(value: StringSexp) -> Self {
295 Ok(<Sexp>::from(value))
296 }
297}
298
299// conversions from/to StringSexp ***************
300
301impl<T> TryFrom<&[T]> for OwnedStringSexp
302where
303 T: AsRef<str>, // This works both for &str and String
304{
305 type Error = crate::error::Error;
306
307 fn try_from(value: &[T]) -> crate::error::Result<Self> {
308 Self::try_from_slice(value)
309 }
310}
311
312impl<T> TryFrom<Vec<T>> for OwnedStringSexp
313where
314 T: AsRef<str>, // This works both for &str and String
315{
316 type Error = crate::error::Error;
317
318 fn try_from(value: Vec<T>) -> crate::error::Result<Self> {
319 Self::try_from_slice(value)
320 }
321}
322
323impl TryFrom<&str> for OwnedStringSexp {
324 type Error = crate::error::Error;
325
326 fn try_from(value: &str) -> crate::error::Result<Self> {
327 Self::try_from_scalar(value)
328 }
329}
330
331impl TryFrom<String> for OwnedStringSexp {
332 type Error = crate::error::Error;
333
334 fn try_from(value: String) -> crate::error::Result<Self> {
335 Self::try_from_scalar(value)
336 }
337}
338
339impl From<OwnedStringSexp> for Sexp {
340 fn from(value: OwnedStringSexp) -> Self {
341 Self(value.inner())
342 }
343}
344
345impl From<OwnedStringSexp> for crate::error::Result<Sexp> {
346 fn from(value: OwnedStringSexp) -> Self {
347 Ok(<Sexp>::from(value))
348 }
349}
350
351macro_rules! impl_try_from_rust_strings {
352 ($ty: ty) => {
353 impl TryFrom<$ty> for Sexp {
354 type Error = crate::error::Error;
355
356 fn try_from(value: $ty) -> crate::error::Result<Self> {
357 <OwnedStringSexp>::try_from(value).map(|x| x.into())
358 }
359 }
360 };
361}
362
363impl_try_from_rust_strings!(&[&str]);
364impl_try_from_rust_strings!(&[String]);
365impl_try_from_rust_strings!(Vec<&str>);
366impl_try_from_rust_strings!(Vec<String>);
367impl_try_from_rust_strings!(&str);
368impl_try_from_rust_strings!(String);
369
370// Iterator for StringSexp ***************
371
372pub struct StringSexpIter<'a> {
373 pub sexp: &'a SEXP,
374 i: usize,
375 len: usize,
376}
377
378impl Iterator for StringSexpIter<'_> {
379 // The lifetime here is 'static, not 'a, in the assumption that strings in
380 // `R_StringHash`, the global `CHARSXP` cache, won't be deleted during the R
381 // session.
382 //
383 // Note that, in order to stick with 'static lifetime, I can't use
384 // `Rf_translateCharUTF8()` here because it doesn't use `R_StringHash` and
385 // allocates the string on R's side, which means it's not guaranteed to stay
386 // during the whole R session.
387 //
388 // cf.)
389 // - https://cran.r-project.org/doc/manuals/r-devel/R-ints.html#The-CHARSXP-cache
390 // - https://github.com/wch/r-source/blob/023ada039c86bf9b65983a71110c586b5994e18d/src/main/sysutils.c#L1284-L1296
391 type Item = &'static str;
392
393 fn next(&mut self) -> Option<Self::Item> {
394 let i = self.i;
395 self.i += 1;
396
397 if i >= self.len {
398 return None;
399 }
400
401 unsafe {
402 let e = STRING_ELT(*self.sexp, i as _);
403
404 // Because `None` means the end of the iterator, we cannot return
405 // `None` even for missing values.
406 if e == savvy_ffi::R_NaString {
407 return Some(Self::Item::na());
408 }
409
410 Some(charsxp_to_str(e))
411 }
412 }
413
414 fn size_hint(&self) -> (usize, Option<usize>) {
415 (self.len, Some(self.len))
416 }
417}
418
419impl ExactSizeIterator for StringSexpIter<'_> {}