Skip to main content

scheme_rs/
strings.rs

1//! String builtins and data types.
2//!
3//! Although we would like to, the R6RS standard of Rust precludes us from
4//! using the standard library String type by mandating O(1) lookups of
5//! characters from indices. This means that Scheme strings are vectors of
6//! unicode code points rather that UTF-8 strings.
7
8use std::{fmt, hash::Hash, sync::Arc};
9
10use parking_lot::RwLock;
11
12use crate::{
13    Either,
14    character::{char_switch_case, to_foldcase},
15    exceptions::Exception,
16    gc::Trace,
17    registry::bridge,
18    value::{Value, ValueType},
19};
20
21#[repr(align(16))]
22pub(crate) struct WideStringInner {
23    pub(crate) chars: RwLock<Vec<char>>,
24    mutable: bool,
25}
26
27/// A string that is a vector of characters, rather than a vector bytes encoding
28/// a utf-8 string. This is because R6RS mandates O(1) lookups of character
29/// indices.
30#[derive(Clone, Trace)]
31pub struct WideString(pub(crate) Arc<WideStringInner>);
32
33impl WideString {
34    pub fn new(s: impl fmt::Display) -> Self {
35        Self::from(s.to_string())
36    }
37
38    pub fn new_mutable<V>(value: V) -> Self
39    where
40        Self: From<V>,
41    {
42        let mut this = Self::from(value);
43        Arc::get_mut(&mut this.0).unwrap().mutable = true;
44        this
45    }
46
47    pub fn clear(&self) {
48        self.0.chars.write().clear()
49    }
50
51    pub fn len(&self) -> usize {
52        self.0.chars.read().len()
53    }
54
55    pub fn is_empty(&self) -> bool {
56        self.0.chars.read().is_empty()
57    }
58
59    pub fn get(&self, idx: usize) -> Option<char> {
60        self.0.chars.read().get(idx).copied()
61    }
62}
63
64impl From<Vec<char>> for WideString {
65    fn from(value: Vec<char>) -> Self {
66        Self(Arc::new(WideStringInner {
67            chars: RwLock::new(value),
68            mutable: false,
69        }))
70    }
71}
72
73impl From<String> for WideString {
74    fn from(value: String) -> Self {
75        Self(Arc::new(WideStringInner {
76            chars: RwLock::new(value.chars().collect()),
77            mutable: false,
78        }))
79    }
80}
81
82impl From<&str> for WideString {
83    fn from(value: &str) -> Self {
84        Self(Arc::new(WideStringInner {
85            chars: RwLock::new(value.chars().collect()),
86            mutable: false,
87        }))
88    }
89}
90
91impl From<WideString> for String {
92    fn from(value: WideString) -> Self {
93        value.0.chars.read().iter().copied().collect()
94    }
95}
96
97impl Hash for WideString {
98    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
99        self.0.chars.read().hash(state);
100    }
101}
102
103impl PartialEq for WideString {
104    fn eq(&self, rhs: &Self) -> bool {
105        *self.0.chars.read() == *rhs.0.chars.read()
106    }
107}
108
109impl PartialEq<str> for WideString {
110    fn eq(&self, rhs: &str) -> bool {
111        self.0.chars.read().iter().copied().eq(rhs.chars())
112    }
113}
114
115impl fmt::Display for WideString {
116    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117        for char in &*self.0.chars.read() {
118            write!(f, "{char}")?;
119        }
120        Ok(())
121    }
122}
123
124impl fmt::Debug for WideString {
125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126        write!(f, "\"")?;
127        for char in self
128            .0
129            .chars
130            .read()
131            .iter()
132            .flat_map(|chr| chr.escape_debug())
133        {
134            write!(f, "{char}")?;
135        }
136        write!(f, "\"")
137    }
138}
139
140#[bridge(name = "string?", lib = "(rnrs base builtins (6))")]
141pub fn string_pred(arg: &Value) -> Result<Vec<Value>, Exception> {
142    Ok(vec![Value::from(arg.type_of() == ValueType::String)])
143}
144
145#[bridge(name = "make-string", lib = "(rnrs base builtins (6))")]
146pub fn make_string(k: &Value, chr: &[Value]) -> Result<Vec<Value>, Exception> {
147    let chr: char = match chr {
148        [] => '\0',
149        [chr] => chr.clone().try_into()?,
150        x => return Err(Exception::wrong_num_of_args(2, 1 + x.len())),
151    };
152    let k: usize = k.clone().try_into()?;
153    let ret = Value::from(WideString(Arc::new(WideStringInner {
154        chars: RwLock::new(std::iter::repeat_n(chr, k).collect()),
155        mutable: true,
156    })));
157    Ok(vec![ret])
158}
159
160#[bridge(name = "string", lib = "(rnrs base builtins (6))")]
161pub fn string(char: &Value, chars: &[Value]) -> Result<Vec<Value>, Exception> {
162    Ok(vec![Value::from(WideString(Arc::new(WideStringInner {
163        chars: RwLock::new(
164            Some(char)
165                .into_iter()
166                .chain(chars.iter())
167                .cloned()
168                .map(Value::try_into)
169                .collect::<Result<Vec<char>, _>>()?,
170        ),
171        mutable: true,
172    })))])
173}
174
175#[bridge(name = "string-length", lib = "(rnrs base builtins (6))")]
176pub fn string_length(s: &Value) -> Result<Vec<Value>, Exception> {
177    let s: WideString = s.clone().try_into()?;
178    Ok(vec![Value::from(s.len())])
179}
180
181#[bridge(name = "string-ref", lib = "(rnrs base builtins (6))")]
182pub fn string_ref(string: &Value, k: &Value) -> Result<Vec<Value>, Exception> {
183    let string: WideString = string.clone().try_into()?;
184    let k: usize = k.clone().try_into()?;
185    let chars = string.0.chars.read();
186    if k >= chars.len() {
187        return Err(Exception::invalid_index(k, chars.len()));
188    }
189    Ok(vec![Value::from(chars[k])])
190}
191
192#[bridge(name = "string=?", lib = "(rnrs base builtins (6))")]
193pub fn string_eq_pred(
194    string_1: &Value,
195    string_2: &Value,
196    string_n: &[Value],
197) -> Result<Vec<Value>, Exception> {
198    let string_1: WideString = string_1.clone().try_into()?;
199    let string_1_chars = string_1.0.chars.read();
200    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
201        let string_n: WideString = string_n.try_into()?;
202        if *string_1_chars != *string_n.0.chars.read() {
203            return Ok(vec![Value::from(false)]);
204        }
205    }
206    Ok(vec![Value::from(true)])
207}
208
209#[bridge(name = "string<?", lib = "(rnrs base builtins (6))")]
210pub fn string_less_pred(
211    string_1: WideString,
212    string_2: &Value,
213    string_n: &[Value],
214) -> Result<Vec<Value>, Exception> {
215    let mut prev_string = string_1;
216    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
217        let string_n: WideString = string_n.try_into()?;
218        {
219            let prev_string_read = prev_string.0.chars.read();
220            if *prev_string_read >= *string_n.0.chars.read() {
221                return Ok(vec![Value::from(false)]);
222            }
223        }
224        prev_string = string_n;
225    }
226    Ok(vec![Value::from(true)])
227}
228
229#[bridge(name = "string>?", lib = "(rnrs base builtins (6))")]
230pub fn string_greater_pred(
231    string_1: WideString,
232    string_2: &Value,
233    string_n: &[Value],
234) -> Result<Vec<Value>, Exception> {
235    let mut prev_string = string_1;
236    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
237        let string_n: WideString = string_n.try_into()?;
238        {
239            let prev_string_read = prev_string.0.chars.read();
240            if *prev_string_read <= *string_n.0.chars.read() {
241                return Ok(vec![Value::from(false)]);
242            }
243        }
244        prev_string = string_n;
245    }
246    Ok(vec![Value::from(true)])
247}
248
249#[bridge(name = "string<=?", lib = "(rnrs base builtins (6))")]
250pub fn string_less_equal_pred(
251    string_1: WideString,
252    string_2: &Value,
253    string_n: &[Value],
254) -> Result<Vec<Value>, Exception> {
255    let mut prev_string = string_1;
256    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
257        let string_n: WideString = string_n.try_into()?;
258        {
259            let prev_string_read = prev_string.0.chars.read();
260            if *prev_string_read > *string_n.0.chars.read() {
261                return Ok(vec![Value::from(false)]);
262            }
263        }
264        prev_string = string_n;
265    }
266    Ok(vec![Value::from(true)])
267}
268
269#[bridge(name = "string>=?", lib = "(rnrs base builtins (6))")]
270pub fn string_greater_equal_pred(
271    string_1: WideString,
272    string_2: &Value,
273    string_n: &[Value],
274) -> Result<Vec<Value>, Exception> {
275    let mut prev_string = string_1;
276    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
277        let string_n: WideString = string_n.try_into()?;
278        {
279            let prev_string_read = prev_string.0.chars.read();
280            if *prev_string_read < *string_n.0.chars.read() {
281                return Ok(vec![Value::from(false)]);
282            }
283        }
284        prev_string = string_n;
285    }
286    Ok(vec![Value::from(true)])
287}
288
289#[bridge(name = "substring", lib = "(rnrs base builtins (6))")]
290pub fn substring(string: WideString, start: usize, end: usize) -> Result<Vec<Value>, Exception> {
291    if start > end {
292        return Err(Exception::error(format!(
293            "start ({start}) cannot be greater than end ({end})"
294        )));
295    }
296    let substr = string.0.chars.read()[start..end].to_vec();
297    Ok(vec![Value::from(WideString::new_mutable(substr))])
298}
299
300#[bridge(name = "string-append", lib = "(rnrs base builtins (6))")]
301pub fn list(args: &[Value]) -> Result<Vec<Value>, Exception> {
302    let mut output = String::new();
303    for arg in args.iter().cloned() {
304        let arg: String = arg.try_into()?;
305        output += arg.as_str();
306    }
307    Ok(vec![Value::from(output)])
308}
309
310#[bridge(name = "string->list", lib = "(rnrs base builtins (6))")]
311pub fn string_to_list(string: WideString) -> Result<Vec<Value>, Exception> {
312    let mut list = Value::null();
313    for chr in string.0.chars.read().iter().rev() {
314        list = Value::from((Value::from(*chr), list));
315    }
316    Ok(vec![list])
317}
318
319#[bridge(name = "string-copy", lib = "(rnrs base builtins (6))")]
320pub fn string_copy(string: WideString) -> Result<Vec<Value>, Exception> {
321    let copy = string.0.chars.read().clone();
322    Ok(vec![Value::from(WideString::new_mutable(copy))])
323}
324
325#[bridge(name = "string->vector", lib = "(rnrs base builtins (6))")]
326pub fn string_to_vector(from: &Value, range: &[Value]) -> Result<Vec<Value>, Exception> {
327    let string: WideString = from.clone().try_into()?;
328
329    let len = string.0.chars.read().len();
330    let start: usize = range
331        .first()
332        .cloned()
333        .map(Value::try_into)
334        .transpose()?
335        .unwrap_or(0);
336    let end: usize = range
337        .get(1)
338        .cloned()
339        .map(Value::try_into)
340        .transpose()?
341        .unwrap_or(len);
342
343    if end < start {
344        return Err(Exception::error(format!(
345            "Range end {end} cannot be less than start {start}",
346        )));
347    } else if end > len {
348        return Err(Exception::invalid_range(start..end, len));
349    }
350
351    Ok(vec![Value::from(
352        string.0.chars.read()[start..end]
353            .iter()
354            .copied()
355            .map(Value::from)
356            .collect::<Vec<_>>(),
357    )])
358}
359
360#[bridge(name = "string-set!", lib = "(rnrs mutable-strings (6))")]
361pub fn string_set_bang(string: &Value, k: &Value, chr: &Value) -> Result<Vec<Value>, Exception> {
362    let string: WideString = string.clone().try_into()?;
363    let k: usize = k.clone().try_into()?;
364    let chr: char = chr.clone().try_into()?;
365    if !string.0.mutable {
366        return Err(Exception::error("string is immutable"));
367    }
368    let mut chars = string.0.chars.write();
369    if k >= chars.len() {
370        return Err(Exception::invalid_index(k, chars.len()));
371    }
372    chars[k] = chr;
373    Ok(vec![])
374}
375
376#[bridge(name = "string-foldcase", lib = "(rnrs base builtins (6))")]
377pub fn string_foldcase(string: &Value) -> Result<Vec<Value>, Exception> {
378    let string: WideString = string.try_to_scheme_type()?;
379    let folded = string
380        .0
381        .chars
382        .read()
383        .iter()
384        .flat_map(|ch| match char_switch_case(*ch, to_foldcase) {
385            Either::Left(ch) => vec![ch],
386            Either::Right(s) => s,
387        })
388        .collect::<Vec<_>>();
389    let folded = WideString(Arc::new(WideStringInner {
390        chars: RwLock::new(folded),
391        mutable: true,
392    }));
393    Ok(vec![Value::from(folded)])
394}