Skip to main content

scheme_rs/
strings.rs

1//! String builtins and data types.
2//!
3//! Although we would like to, the R6RS standard of Rust precludes us from
4//! using the standard library String type by mandating O(1) lookups of
5//! characters from indices. This means that Scheme strings are vectors of
6//! unicode code points rather that UTF-8 strings.
7
8use std::{fmt, hash::Hash, sync::Arc};
9
10use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
11
12use crate::{
13    Either,
14    character::{char_switch_case, to_foldcase},
15    exceptions::Exception,
16    gc::Trace,
17    registry::bridge,
18    value::{Value, ValueType},
19};
20
21#[repr(align(16))]
22pub(crate) struct WideStringInner {
23    pub(crate) chars: RwLock<Vec<char>>,
24    mutable: bool,
25}
26
27/// A string that is a vector of characters, rather than a vector bytes encoding
28/// a utf-8 string. This is because R6RS mandates O(1) lookups of character
29/// indices.
30#[derive(Clone, Trace)]
31pub struct WideString(pub(crate) Arc<WideStringInner>);
32
33impl WideString {
34    pub fn immutable(s: impl fmt::Display) -> Self {
35        Self::from(s.to_string())
36    }
37
38    pub fn mutable<V>(value: V) -> Self
39    where
40        Self: From<V>,
41    {
42        let mut this = Self::from(value);
43        Arc::get_mut(&mut this.0).unwrap().mutable = true;
44        this
45    }
46
47    pub fn as_slice(&self) -> MappedRwLockReadGuard<'_, [char]> {
48        RwLockReadGuard::map(self.0.chars.read(), |chars| chars.as_slice())
49    }
50
51    pub fn clear(&self) {
52        self.0.chars.write().clear()
53    }
54
55    pub fn len(&self) -> usize {
56        self.0.chars.read().len()
57    }
58
59    pub fn is_empty(&self) -> bool {
60        self.0.chars.read().is_empty()
61    }
62
63    pub fn get(&self, idx: usize) -> Option<char> {
64        self.0.chars.read().get(idx).copied()
65    }
66}
67
68impl From<Vec<char>> for WideString {
69    fn from(value: Vec<char>) -> Self {
70        Self(Arc::new(WideStringInner {
71            chars: RwLock::new(value),
72            mutable: false,
73        }))
74    }
75}
76
77impl From<String> for WideString {
78    fn from(value: String) -> Self {
79        Self(Arc::new(WideStringInner {
80            chars: RwLock::new(value.chars().collect()),
81            mutable: false,
82        }))
83    }
84}
85
86impl From<&str> for WideString {
87    fn from(value: &str) -> Self {
88        Self(Arc::new(WideStringInner {
89            chars: RwLock::new(value.chars().collect()),
90            mutable: false,
91        }))
92    }
93}
94
95impl From<WideString> for String {
96    fn from(value: WideString) -> Self {
97        value.0.chars.read().iter().copied().collect()
98    }
99}
100
101impl Hash for WideString {
102    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
103        self.0.chars.read().hash(state);
104    }
105}
106
107impl PartialEq for WideString {
108    fn eq(&self, rhs: &Self) -> bool {
109        *self.0.chars.read() == *rhs.0.chars.read()
110    }
111}
112
113impl PartialEq<str> for WideString {
114    fn eq(&self, rhs: &str) -> bool {
115        self.0.chars.read().iter().copied().eq(rhs.chars())
116    }
117}
118
119impl fmt::Display for WideString {
120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121        for char in &*self.0.chars.read() {
122            write!(f, "{char}")?;
123        }
124        Ok(())
125    }
126}
127
128impl fmt::Debug for WideString {
129    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130        write!(f, "\"")?;
131        for char in self
132            .0
133            .chars
134            .read()
135            .iter()
136            .flat_map(|chr| chr.escape_debug())
137        {
138            write!(f, "{char}")?;
139        }
140        write!(f, "\"")
141    }
142}
143
144#[bridge(name = "string?", lib = "(rnrs base builtins (6))")]
145pub fn string_pred(arg: &Value) -> Result<Vec<Value>, Exception> {
146    Ok(vec![Value::from(arg.type_of() == ValueType::String)])
147}
148
149#[bridge(name = "make-string", lib = "(rnrs base builtins (6))")]
150pub fn make_string(k: &Value, chr: &[Value]) -> Result<Vec<Value>, Exception> {
151    let chr: char = match chr {
152        [] => '\0',
153        [chr] => chr.clone().try_into()?,
154        x => return Err(Exception::wrong_num_of_args(2, 1 + x.len())),
155    };
156    let k: usize = k.clone().try_into()?;
157    let ret = Value::from(WideString(Arc::new(WideStringInner {
158        chars: RwLock::new(std::iter::repeat_n(chr, k).collect()),
159        mutable: true,
160    })));
161    Ok(vec![ret])
162}
163
164#[bridge(name = "string", lib = "(rnrs base builtins (6))")]
165pub fn string(char: &Value, chars: &[Value]) -> Result<Vec<Value>, Exception> {
166    Ok(vec![Value::from(WideString(Arc::new(WideStringInner {
167        chars: RwLock::new(
168            Some(char)
169                .into_iter()
170                .chain(chars.iter())
171                .cloned()
172                .map(Value::try_into)
173                .collect::<Result<Vec<char>, _>>()?,
174        ),
175        mutable: true,
176    })))])
177}
178
179#[bridge(name = "string-length", lib = "(rnrs base builtins (6))")]
180pub fn string_length(s: &Value) -> Result<Vec<Value>, Exception> {
181    let s: WideString = s.clone().try_into()?;
182    Ok(vec![Value::from(s.len())])
183}
184
185#[bridge(name = "string-ref", lib = "(rnrs base builtins (6))")]
186pub fn string_ref(string: &Value, k: &Value) -> Result<Vec<Value>, Exception> {
187    let string: WideString = string.clone().try_into()?;
188    let k: usize = k.clone().try_into()?;
189    let chars = string.0.chars.read();
190    if k >= chars.len() {
191        return Err(Exception::invalid_index(k, chars.len()));
192    }
193    Ok(vec![Value::from(chars[k])])
194}
195
196#[bridge(name = "string=?", lib = "(rnrs base builtins (6))")]
197pub fn string_eq_pred(
198    string_1: &Value,
199    string_2: &Value,
200    string_n: &[Value],
201) -> Result<Vec<Value>, Exception> {
202    let string_1: WideString = string_1.clone().try_into()?;
203    let string_1_chars = string_1.0.chars.read();
204    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
205        let string_n: WideString = string_n.try_into()?;
206        if *string_1_chars != *string_n.0.chars.read() {
207            return Ok(vec![Value::from(false)]);
208        }
209    }
210    Ok(vec![Value::from(true)])
211}
212
213#[bridge(name = "string<?", lib = "(rnrs base builtins (6))")]
214pub fn string_less_pred(
215    string_1: WideString,
216    string_2: &Value,
217    string_n: &[Value],
218) -> Result<Vec<Value>, Exception> {
219    let mut prev_string = string_1;
220    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
221        let string_n: WideString = string_n.try_into()?;
222        {
223            let prev_string_read = prev_string.0.chars.read();
224            if *prev_string_read >= *string_n.0.chars.read() {
225                return Ok(vec![Value::from(false)]);
226            }
227        }
228        prev_string = string_n;
229    }
230    Ok(vec![Value::from(true)])
231}
232
233#[bridge(name = "string>?", lib = "(rnrs base builtins (6))")]
234pub fn string_greater_pred(
235    string_1: WideString,
236    string_2: &Value,
237    string_n: &[Value],
238) -> Result<Vec<Value>, Exception> {
239    let mut prev_string = string_1;
240    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
241        let string_n: WideString = string_n.try_into()?;
242        {
243            let prev_string_read = prev_string.0.chars.read();
244            if *prev_string_read <= *string_n.0.chars.read() {
245                return Ok(vec![Value::from(false)]);
246            }
247        }
248        prev_string = string_n;
249    }
250    Ok(vec![Value::from(true)])
251}
252
253#[bridge(name = "string<=?", lib = "(rnrs base builtins (6))")]
254pub fn string_less_equal_pred(
255    string_1: WideString,
256    string_2: &Value,
257    string_n: &[Value],
258) -> Result<Vec<Value>, Exception> {
259    let mut prev_string = string_1;
260    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
261        let string_n: WideString = string_n.try_into()?;
262        {
263            let prev_string_read = prev_string.0.chars.read();
264            if *prev_string_read > *string_n.0.chars.read() {
265                return Ok(vec![Value::from(false)]);
266            }
267        }
268        prev_string = string_n;
269    }
270    Ok(vec![Value::from(true)])
271}
272
273#[bridge(name = "string>=?", lib = "(rnrs base builtins (6))")]
274pub fn string_greater_equal_pred(
275    string_1: WideString,
276    string_2: &Value,
277    string_n: &[Value],
278) -> Result<Vec<Value>, Exception> {
279    let mut prev_string = string_1;
280    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
281        let string_n: WideString = string_n.try_into()?;
282        {
283            let prev_string_read = prev_string.0.chars.read();
284            if *prev_string_read < *string_n.0.chars.read() {
285                return Ok(vec![Value::from(false)]);
286            }
287        }
288        prev_string = string_n;
289    }
290    Ok(vec![Value::from(true)])
291}
292
293#[bridge(name = "substring", lib = "(rnrs base builtins (6))")]
294pub fn substring(string: WideString, start: usize, end: usize) -> Result<Vec<Value>, Exception> {
295    if start > end {
296        return Err(Exception::error(format!(
297            "start {start} is greater than end {end}"
298        )));
299    }
300    let substr = string.0.chars.read()[start..end].to_vec();
301    Ok(vec![Value::from(WideString::mutable(substr))])
302}
303
304#[bridge(name = "string-append", lib = "(rnrs base builtins (6))")]
305pub fn list(args: &[Value]) -> Result<Vec<Value>, Exception> {
306    let mut output = String::new();
307    for arg in args.iter().cloned() {
308        let arg: String = arg.try_into()?;
309        output += arg.as_str();
310    }
311    Ok(vec![Value::from(output)])
312}
313
314#[bridge(name = "string->list", lib = "(rnrs base builtins (6))")]
315pub fn string_to_list(string: WideString) -> Result<Vec<Value>, Exception> {
316    let mut list = Value::null();
317    for chr in string.0.chars.read().iter().rev() {
318        list = Value::from((Value::from(*chr), list));
319    }
320    Ok(vec![list])
321}
322
323#[bridge(name = "string-copy", lib = "(rnrs base builtins (6))")]
324pub fn string_copy(string: WideString) -> Result<Vec<Value>, Exception> {
325    let copy = string.0.chars.read().clone();
326    Ok(vec![Value::from(WideString::mutable(copy))])
327}
328
329#[bridge(name = "string->vector", lib = "(rnrs base builtins (6))")]
330pub fn string_to_vector(from: &Value, range: &[Value]) -> Result<Vec<Value>, Exception> {
331    let string: WideString = from.clone().try_into()?;
332
333    let len = string.0.chars.read().len();
334    let start: usize = range
335        .first()
336        .cloned()
337        .map(Value::try_into)
338        .transpose()?
339        .unwrap_or(0);
340    let end: usize = range
341        .get(1)
342        .cloned()
343        .map(Value::try_into)
344        .transpose()?
345        .unwrap_or(len);
346
347    if end < start {
348        return Err(Exception::error(format!(
349            "range end {end} is less than start {start}",
350        )));
351    } else if end > len {
352        return Err(Exception::invalid_range(start..end, len));
353    }
354
355    Ok(vec![Value::from(
356        string.0.chars.read()[start..end]
357            .iter()
358            .copied()
359            .map(Value::from)
360            .collect::<Vec<_>>(),
361    )])
362}
363
364#[bridge(name = "string-set!", lib = "(rnrs mutable-strings (6))")]
365pub fn string_set_bang(string: &Value, k: &Value, chr: &Value) -> Result<Vec<Value>, Exception> {
366    let string: WideString = string.clone().try_into()?;
367    let k: usize = k.clone().try_into()?;
368    let chr: char = chr.clone().try_into()?;
369    if !string.0.mutable {
370        return Err(Exception::error("string is immutable"));
371    }
372    let mut chars = string.0.chars.write();
373    if k >= chars.len() {
374        return Err(Exception::invalid_index(k, chars.len()));
375    }
376    chars[k] = chr;
377    Ok(vec![])
378}
379
380#[bridge(name = "string-foldcase", lib = "(rnrs base builtins (6))")]
381pub fn string_foldcase(string: &Value) -> Result<Vec<Value>, Exception> {
382    let string: WideString = string.try_to_scheme_type()?;
383    let folded = string
384        .0
385        .chars
386        .read()
387        .iter()
388        .flat_map(|ch| match char_switch_case(*ch, to_foldcase) {
389            Either::Left(ch) => vec![ch],
390            Either::Right(s) => s,
391        })
392        .collect::<Vec<_>>();
393    let folded = WideString(Arc::new(WideStringInner {
394        chars: RwLock::new(folded),
395        mutable: true,
396    }));
397    Ok(vec![Value::from(folded)])
398}