scheme-rs 0.1.0

Embedded scheme for the Rust ecosystem
Documentation
//! String builtins and data types.
//!
//! Although we would like to, the R6RS standard of Rust precludes us from
//! using the standard library String type by mandating O(1) lookups of
//! characters from indices. This means that Scheme strings are vectors of
//! unicode code points rather that UTF-8 strings.

use std::{fmt, hash::Hash, sync::Arc};

use parking_lot::RwLock;

use crate::{
    Either,
    character::{char_switch_case, to_foldcase},
    exceptions::Exception,
    gc::Trace,
    registry::bridge,
    value::{Value, ValueType},
};

#[repr(align(16))]
pub(crate) struct WideStringInner {
    pub(crate) chars: RwLock<Vec<char>>,
    mutable: bool,
}

/// A string that is a vector of characters, rather than a vector bytes encoding
/// a utf-8 string. This is because R6RS mandates O(1) lookups of character
/// indices.
#[derive(Clone, Trace)]
pub struct WideString(pub(crate) Arc<WideStringInner>);

impl WideString {
    pub fn new(s: impl fmt::Display) -> Self {
        Self::from(s.to_string())
    }

    pub fn new_mutable<V>(value: V) -> Self
    where
        Self: From<V>,
    {
        let mut this = Self::from(value);
        Arc::get_mut(&mut this.0).unwrap().mutable = true;
        this
    }

    pub fn clear(&self) {
        self.0.chars.write().clear()
    }

    pub fn len(&self) -> usize {
        self.0.chars.read().len()
    }

    pub fn is_empty(&self) -> bool {
        self.0.chars.read().is_empty()
    }

    pub fn get(&self, idx: usize) -> Option<char> {
        self.0.chars.read().get(idx).copied()
    }
}

impl From<Vec<char>> for WideString {
    fn from(value: Vec<char>) -> Self {
        Self(Arc::new(WideStringInner {
            chars: RwLock::new(value),
            mutable: false,
        }))
    }
}

impl From<String> for WideString {
    fn from(value: String) -> Self {
        Self(Arc::new(WideStringInner {
            chars: RwLock::new(value.chars().collect()),
            mutable: false,
        }))
    }
}

impl From<&str> for WideString {
    fn from(value: &str) -> Self {
        Self(Arc::new(WideStringInner {
            chars: RwLock::new(value.chars().collect()),
            mutable: false,
        }))
    }
}

impl From<WideString> for String {
    fn from(value: WideString) -> Self {
        value.0.chars.read().iter().copied().collect()
    }
}

impl Hash for WideString {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.0.chars.read().hash(state);
    }
}

impl PartialEq for WideString {
    fn eq(&self, rhs: &Self) -> bool {
        *self.0.chars.read() == *rhs.0.chars.read()
    }
}

impl PartialEq<str> for WideString {
    fn eq(&self, rhs: &str) -> bool {
        self.0.chars.read().iter().copied().eq(rhs.chars())
    }
}

impl fmt::Display for WideString {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        for char in &*self.0.chars.read() {
            write!(f, "{char}")?;
        }
        Ok(())
    }
}

impl fmt::Debug for WideString {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "\"")?;
        for char in self
            .0
            .chars
            .read()
            .iter()
            .flat_map(|chr| chr.escape_debug())
        {
            write!(f, "{char}")?;
        }
        write!(f, "\"")
    }
}

#[bridge(name = "string?", lib = "(rnrs base builtins (6))")]
pub fn string_pred(arg: &Value) -> Result<Vec<Value>, Exception> {
    Ok(vec![Value::from(arg.type_of() == ValueType::String)])
}

#[bridge(name = "make-string", lib = "(rnrs base builtins (6))")]
pub fn make_string(k: &Value, chr: &[Value]) -> Result<Vec<Value>, Exception> {
    let chr: char = match chr {
        [] => '\0',
        [chr] => chr.clone().try_into()?,
        x => return Err(Exception::wrong_num_of_args(2, 1 + x.len())),
    };
    let k: usize = k.clone().try_into()?;
    let ret = Value::from(WideString(Arc::new(WideStringInner {
        chars: RwLock::new(std::iter::repeat_n(chr, k).collect()),
        mutable: true,
    })));
    Ok(vec![ret])
}

#[bridge(name = "string", lib = "(rnrs base builtins (6))")]
pub fn string(char: &Value, chars: &[Value]) -> Result<Vec<Value>, Exception> {
    Ok(vec![Value::from(WideString(Arc::new(WideStringInner {
        chars: RwLock::new(
            Some(char)
                .into_iter()
                .chain(chars.iter())
                .cloned()
                .map(Value::try_into)
                .collect::<Result<Vec<char>, _>>()?,
        ),
        mutable: true,
    })))])
}

#[bridge(name = "string-length", lib = "(rnrs base builtins (6))")]
pub fn string_length(s: &Value) -> Result<Vec<Value>, Exception> {
    let s: WideString = s.clone().try_into()?;
    Ok(vec![Value::from(s.len())])
}

#[bridge(name = "string-ref", lib = "(rnrs base builtins (6))")]
pub fn string_ref(string: &Value, k: &Value) -> Result<Vec<Value>, Exception> {
    let string: WideString = string.clone().try_into()?;
    let k: usize = k.clone().try_into()?;
    let chars = string.0.chars.read();
    if k >= chars.len() {
        return Err(Exception::invalid_index(k, chars.len()));
    }
    Ok(vec![Value::from(chars[k])])
}

#[bridge(name = "string=?", lib = "(rnrs base builtins (6))")]
pub fn string_eq_pred(
    string_1: &Value,
    string_2: &Value,
    string_n: &[Value],
) -> Result<Vec<Value>, Exception> {
    let string_1: WideString = string_1.clone().try_into()?;
    let string_1_chars = string_1.0.chars.read();
    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
        let string_n: WideString = string_n.try_into()?;
        if *string_1_chars != *string_n.0.chars.read() {
            return Ok(vec![Value::from(false)]);
        }
    }
    Ok(vec![Value::from(true)])
}

#[bridge(name = "string<?", lib = "(rnrs base builtins (6))")]
pub fn string_less_pred(
    string_1: WideString,
    string_2: &Value,
    string_n: &[Value],
) -> Result<Vec<Value>, Exception> {
    let mut prev_string = string_1;
    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
        let string_n: WideString = string_n.try_into()?;
        {
            let prev_string_read = prev_string.0.chars.read();
            if *prev_string_read >= *string_n.0.chars.read() {
                return Ok(vec![Value::from(false)]);
            }
        }
        prev_string = string_n;
    }
    Ok(vec![Value::from(true)])
}

#[bridge(name = "string>?", lib = "(rnrs base builtins (6))")]
pub fn string_greater_pred(
    string_1: WideString,
    string_2: &Value,
    string_n: &[Value],
) -> Result<Vec<Value>, Exception> {
    let mut prev_string = string_1;
    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
        let string_n: WideString = string_n.try_into()?;
        {
            let prev_string_read = prev_string.0.chars.read();
            if *prev_string_read <= *string_n.0.chars.read() {
                return Ok(vec![Value::from(false)]);
            }
        }
        prev_string = string_n;
    }
    Ok(vec![Value::from(true)])
}

#[bridge(name = "string<=?", lib = "(rnrs base builtins (6))")]
pub fn string_less_equal_pred(
    string_1: WideString,
    string_2: &Value,
    string_n: &[Value],
) -> Result<Vec<Value>, Exception> {
    let mut prev_string = string_1;
    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
        let string_n: WideString = string_n.try_into()?;
        {
            let prev_string_read = prev_string.0.chars.read();
            if *prev_string_read > *string_n.0.chars.read() {
                return Ok(vec![Value::from(false)]);
            }
        }
        prev_string = string_n;
    }
    Ok(vec![Value::from(true)])
}

#[bridge(name = "string>=?", lib = "(rnrs base builtins (6))")]
pub fn string_greater_equal_pred(
    string_1: WideString,
    string_2: &Value,
    string_n: &[Value],
) -> Result<Vec<Value>, Exception> {
    let mut prev_string = string_1;
    for string_n in Some(string_2).into_iter().chain(string_n.iter()).cloned() {
        let string_n: WideString = string_n.try_into()?;
        {
            let prev_string_read = prev_string.0.chars.read();
            if *prev_string_read < *string_n.0.chars.read() {
                return Ok(vec![Value::from(false)]);
            }
        }
        prev_string = string_n;
    }
    Ok(vec![Value::from(true)])
}

#[bridge(name = "substring", lib = "(rnrs base builtins (6))")]
pub fn substring(string: WideString, start: usize, end: usize) -> Result<Vec<Value>, Exception> {
    if start > end {
        return Err(Exception::error(format!(
            "start ({start}) cannot be greater than end ({end})"
        )));
    }
    let substr = string.0.chars.read()[start..end].to_vec();
    Ok(vec![Value::from(WideString::new_mutable(substr))])
}

#[bridge(name = "string-append", lib = "(rnrs base builtins (6))")]
pub fn list(args: &[Value]) -> Result<Vec<Value>, Exception> {
    let mut output = String::new();
    for arg in args.iter().cloned() {
        let arg: String = arg.try_into()?;
        output += arg.as_str();
    }
    Ok(vec![Value::from(output)])
}

#[bridge(name = "string->list", lib = "(rnrs base builtins (6))")]
pub fn string_to_list(string: WideString) -> Result<Vec<Value>, Exception> {
    let mut list = Value::null();
    for chr in string.0.chars.read().iter().rev() {
        list = Value::from((Value::from(*chr), list));
    }
    Ok(vec![list])
}

#[bridge(name = "string-copy", lib = "(rnrs base builtins (6))")]
pub fn string_copy(string: WideString) -> Result<Vec<Value>, Exception> {
    let copy = string.0.chars.read().clone();
    Ok(vec![Value::from(WideString::new_mutable(copy))])
}

#[bridge(name = "string->vector", lib = "(rnrs base builtins (6))")]
pub fn string_to_vector(from: &Value, range: &[Value]) -> Result<Vec<Value>, Exception> {
    let string: WideString = from.clone().try_into()?;

    let len = string.0.chars.read().len();
    let start: usize = range
        .first()
        .cloned()
        .map(Value::try_into)
        .transpose()?
        .unwrap_or(0);
    let end: usize = range
        .get(1)
        .cloned()
        .map(Value::try_into)
        .transpose()?
        .unwrap_or(len);

    if end < start {
        return Err(Exception::error(format!(
            "Range end {end} cannot be less than start {start}",
        )));
    } else if end > len {
        return Err(Exception::invalid_range(start..end, len));
    }

    Ok(vec![Value::from(
        string.0.chars.read()[start..end]
            .iter()
            .copied()
            .map(Value::from)
            .collect::<Vec<_>>(),
    )])
}

#[bridge(name = "string-set!", lib = "(rnrs mutable-strings (6))")]
pub fn string_set_bang(string: &Value, k: &Value, chr: &Value) -> Result<Vec<Value>, Exception> {
    let string: WideString = string.clone().try_into()?;
    let k: usize = k.clone().try_into()?;
    let chr: char = chr.clone().try_into()?;
    if !string.0.mutable {
        return Err(Exception::error("string is immutable"));
    }
    let mut chars = string.0.chars.write();
    if k >= chars.len() {
        return Err(Exception::invalid_index(k, chars.len()));
    }
    chars[k] = chr;
    Ok(vec![])
}

#[bridge(name = "string-foldcase", lib = "(rnrs base builtins (6))")]
pub fn string_foldcase(string: &Value) -> Result<Vec<Value>, Exception> {
    let string: WideString = string.try_to_scheme_type()?;
    let folded = string
        .0
        .chars
        .read()
        .iter()
        .flat_map(|ch| match char_switch_case(*ch, to_foldcase) {
            Either::Left(ch) => vec![ch],
            Either::Right(s) => s,
        })
        .collect::<Vec<_>>();
    let folded = WideString(Arc::new(WideStringInner {
        chars: RwLock::new(folded),
        mutable: true,
    }));
    Ok(vec![Value::from(folded)])
}