magnus 0.4.4

High level Ruby bindings for Rust.
Documentation
//! Types for working with Ruby’s String class.

use std::{
    borrow::Cow,
    fmt, io,
    iter::Iterator,
    mem::transmute,
    ops::Deref,
    os::raw::{c_char, c_long},
    path::{Path, PathBuf},
    ptr::{self, NonNull},
    slice, str,
};

#[cfg(ruby_gte_3_0)]
use rb_sys::rb_str_to_interned_str;
#[cfg(all(ruby_gte_3_0, ruby_lt_3_2))]
use rb_sys::ruby_rstring_consts::RSTRING_EMBED_LEN_SHIFT;
#[cfg(ruby_lt_3_0)]
use rb_sys::ruby_rstring_flags::RSTRING_EMBED_LEN_SHIFT;
use rb_sys::{
    self, rb_enc_str_coderange, rb_enc_str_new, rb_str_buf_append, rb_str_buf_new, rb_str_cat,
    rb_str_conv_enc, rb_str_new, rb_str_new_frozen, rb_str_new_shared, rb_str_strlen,
    rb_str_to_str, rb_utf8_str_new, rb_utf8_str_new_static, ruby_coderange_type,
    ruby_rstring_flags, ruby_value_type, VALUE,
};

use crate::{
    debug_assert_value,
    encoding::{self, Coderange, EncodingCapable, RbEncoding},
    error::{protect, Error},
    exception,
    object::Object,
    try_convert::TryConvert,
    value::{private, NonZeroValue, ReprValue, Value},
};

/// A Value pointer to a RString struct, Ruby's internal representation of
/// strings.
///
/// All [`Value`] methods should be available on this type through [`Deref`],
/// but some may be missed by this documentation.
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct RString(NonZeroValue);

impl RString {
    /// Return `Some(RString)` if `val` is a `RString`, `None` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert!(RString::from_value(eval(r#""example""#).unwrap()).is_some());
    /// assert!(RString::from_value(eval(":example").unwrap()).is_none());
    /// ```
    #[inline]
    pub fn from_value(val: Value) -> Option<Self> {
        unsafe {
            (val.rb_type() == ruby_value_type::RUBY_T_STRING)
                .then(|| Self(NonZeroValue::new_unchecked(val)))
        }
    }

    pub(crate) fn ref_from_value(val: &Value) -> Option<&Self> {
        unsafe {
            (val.rb_type() == ruby_value_type::RUBY_T_STRING)
                .then(|| &*(val as *const _ as *const RString))
        }
    }

    #[inline]
    pub(crate) unsafe fn from_rb_value_unchecked(val: VALUE) -> Self {
        Self(NonZeroValue::new_unchecked(Value::new(val)))
    }

    fn as_internal(self) -> NonNull<rb_sys::RString> {
        // safe as inner value is NonZero
        unsafe { NonNull::new_unchecked(self.0.get().as_rb_value() as *mut _) }
    }

    /// Create a new Ruby string from the Rust string `s`.
    ///
    /// The encoding of the Ruby string will be UTF-8.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let val = RString::new("example");
    /// let res: bool = eval!(r#"val == "example""#, val).unwrap();
    /// assert!(res);
    /// ```
    pub fn new(s: &str) -> Self {
        let len = s.len();
        let ptr = s.as_ptr();
        unsafe {
            Self::from_rb_value_unchecked(rb_utf8_str_new(ptr as *const c_char, len as c_long))
        }
    }

    /// Implementation detail of [`r_string`].
    #[doc(hidden)]
    #[inline]
    pub unsafe fn new_lit(ptr: *const c_char, len: c_long) -> Self {
        Self::from_rb_value_unchecked(rb_utf8_str_new_static(ptr, len))
    }

    /// Create a new Ruby string with capacity `n`.
    ///
    /// The encoding will be set to ASCII-8BIT (aka BINARY). See also
    /// [`with_capacity`](RString::with_capacity).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let buf = RString::buf_new(4096);
    /// buf.cat(&[13, 14, 10, 13, 11, 14, 14, 15]);
    /// let res: bool = eval!(r#"buf == "\r\x0E\n\r\v\x0E\x0E\x0F""#, buf).unwrap();
    /// assert!(res);
    /// ```
    pub fn buf_new(n: usize) -> Self {
        unsafe { Self::from_rb_value_unchecked(rb_str_buf_new(n as c_long)) }
    }

    /// Create a new Ruby string with capacity `n`.
    ///
    /// The encoding will be set to UTF-8. See also
    /// [`buf_new`](RString::buf_new).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::with_capacity(9);
    /// s.cat("foo");
    /// s.cat("bar");
    /// s.cat("baz");
    /// let res: bool = eval!(r#"s == "foobarbaz""#, s).unwrap();
    /// assert!(res);
    /// ```
    pub fn with_capacity(n: usize) -> Self {
        let s = Self::buf_new(n);
        s.enc_associate(encoding::Index::utf8()).unwrap();
        s
    }

    /// Create a new Ruby string from the Rust slice `s`.
    ///
    /// The encoding of the Ruby string will be set to ASCII-8BIT (aka BINARY).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let buf = RString::from_slice(&[13, 14, 10, 13, 11, 14, 14, 15]);
    /// let res: bool = eval!(r#"buf == "\r\x0E\n\r\v\x0E\x0E\x0F""#, buf).unwrap();
    /// assert!(res);
    /// ```
    pub fn from_slice(s: &[u8]) -> Self {
        let len = s.len();
        let ptr = s.as_ptr();
        unsafe { Self::from_rb_value_unchecked(rb_str_new(ptr as *const c_char, len as c_long)) }
    }

    /// Create a new Ruby string from the value `s` with the encoding `enc`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let val = RString::enc_new("example", RbEncoding::usascii());
    /// let res: bool = eval!(r#"val == "example""#, val).unwrap();
    /// assert!(res);
    /// ```
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let val = RString::enc_new([255, 128, 128], RbEncoding::ascii8bit());
    /// let res: bool = eval!(r#"val == "\xFF\x80\x80".force_encoding("BINARY")"#, val).unwrap();
    /// assert!(res);
    /// ```
    pub fn enc_new<T, E>(s: T, enc: E) -> Self
    where
        T: AsRef<[u8]>,
        E: Into<RbEncoding>,
    {
        let s = s.as_ref();
        let len = s.len();
        let ptr = s.as_ptr();
        unsafe {
            Self::from_rb_value_unchecked(rb_enc_str_new(
                ptr as *const c_char,
                len as c_long,
                enc.into().as_ptr(),
            ))
        }
    }

    /// Create a new Ruby string from the Rust char `c`.
    ///
    /// The encoding of the Ruby string will be UTF-8.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let c = RString::from_char('a');
    /// let res: bool = eval!(r#"c == "a""#, c).unwrap();
    /// assert!(res);
    /// ```
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let c = RString::from_char('🦀');
    /// let res: bool = eval!(r#"c == "🦀""#, c).unwrap();
    /// assert!(res);
    /// ```
    pub fn from_char(c: char) -> Self {
        let mut buf = [0; 4];
        Self::new(c.encode_utf8(&mut buf[..]))
    }

    /// Create a new Ruby string containing the codepoint `code` in the
    /// encoding `enc`.
    ///
    /// The encoding of the Ruby string will be the passed encoding `enc`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let c = RString::chr(97, RbEncoding::usascii()).unwrap();
    /// let res: bool = eval!(r#"c == "a""#, c).unwrap();
    /// assert!(res);
    /// ```
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let c = RString::chr(129408, RbEncoding::utf8()).unwrap();
    /// let res: bool = eval!(r#"c == "🦀""#, c).unwrap();
    /// assert!(res);
    /// ```
    pub fn chr<T>(code: u32, enc: T) -> Result<Self, Error>
    where
        T: Into<RbEncoding>,
    {
        enc.into().chr(code)
    }

    /// Create a new Ruby string that shares the same backing data as `s`.
    ///
    /// Both string objects will point at the same underlying data until one is
    /// modified, and only then will the data be duplicated. This operation is
    /// cheep, and useful for cases where you may need to modify a string, but
    /// don't want to mutate a value passed to your function.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// let dup = RString::new_shared(s);
    /// let res: bool = eval!("s == dup", s, dup).unwrap();
    /// assert!(res);
    /// // mutating one doesn't mutate both
    /// dup.cat("foo");
    /// let res: bool = eval!("s != dup", s, dup).unwrap();
    /// assert!(res);
    /// ```
    pub fn new_shared(s: Self) -> Self {
        unsafe { Self::from_rb_value_unchecked(rb_str_new_shared(s.as_rb_value())) }
    }

    /// Create a new Ruby string that is a frozen copy of `s`.
    ///
    /// This can be used to get a copy of a string that is guranteed not to be
    /// modified while you are referencing it.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let orig = RString::new("example");
    /// let frozen = RString::new_frozen(orig);
    /// let res: bool = eval!(r#"frozen == "example""#, frozen).unwrap();
    /// assert!(res);
    /// // mutating original doesn't impact the frozen copy
    /// orig.cat("foo");
    /// let res: bool = eval!(r#"frozen == "example""#, frozen).unwrap();
    /// assert!(res);
    /// ```
    pub fn new_frozen(s: Self) -> Self {
        unsafe { Self::from_rb_value_unchecked(rb_str_new_frozen(s.as_rb_value())) }
    }

    /// Return `self` as a slice of bytes.
    ///
    /// # Safety
    ///
    /// This is directly viewing memory owned and managed by Ruby. Ruby may
    /// modify or free the memory backing the returned slice, the caller must
    /// ensure this does not happen.
    ///
    /// Ruby must not be allowed to garbage collect or modify `self` while a
    /// refrence to the slice is held.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// // safe as we don't give Ruby the chance to mess with the string while
    /// // we hold a refrence to the slice.
    /// unsafe { assert_eq!(s.as_slice(), [101, 120, 97, 109, 112, 108, 101]) };
    /// ```
    pub unsafe fn as_slice(&self) -> &[u8] {
        self.as_slice_unconstrained()
    }

    unsafe fn as_slice_unconstrained<'a>(self) -> &'a [u8] {
        #[cfg(ruby_gte_3_1)]
        unsafe fn embedded_ary_ptr(rstring: RString) -> *const u8 {
            &rstring.as_internal().as_ref().as_.embed.ary as *const _ as *const u8
        }

        #[cfg(ruby_lt_3_1)]
        unsafe fn embedded_ary_ptr(rstring: RString) -> *const u8 {
            &rstring.as_internal().as_ref().as_.ary as *const _ as *const u8
        }

        debug_assert_value!(self);
        let r_basic = self.r_basic_unchecked();
        let f = r_basic.as_ref().flags;
        if (f & ruby_rstring_flags::RSTRING_NOEMBED as VALUE) != 0 {
            let h = self.as_internal().as_ref().as_.heap;
            slice::from_raw_parts(h.ptr as *const u8, h.len as usize)
        } else {
            slice::from_raw_parts(embedded_ary_ptr(self), embed_len(self, f) as usize)
        }
    }

    /// Return an iterator over `self`'s codepoints.
    ///
    /// # Safety
    ///
    /// The returned iterator references memory owned and managed by Ruby. Ruby
    /// may modify or free that memory, the caller must ensure this does not
    /// happen at any time while still holding a reference to the iterator.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{Error, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀 café");
    ///
    /// let codepoints = unsafe {
    ///     // ensure string isn't mutated during iteration by creating a
    ///     // frozen copy and iterating over that
    ///     let f = RString::new_frozen(s);
    ///     f.codepoints().collect::<Result<Vec<_>, Error>>().unwrap()
    /// };
    ///
    /// assert_eq!(codepoints, [129408, 32, 99, 97, 102, 233]);
    /// ```
    pub unsafe fn codepoints(&self) -> Codepoints {
        Codepoints {
            slice: self.as_slice(),
            encoding: self.enc_get().into(),
        }
    }

    /// Return an iterator over `self`'s chars as slices of bytes.
    ///
    /// # Safety
    ///
    /// The returned iterator references memory owned and managed by Ruby. Ruby
    /// may modify or free that memory, the caller must ensure this does not
    /// happen at any time while still holding a reference to the iterator.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀 café");
    ///
    /// // ensure string isn't mutated during iteration by creating a frozen
    /// // copy and iterating over that
    /// let f = RString::new_frozen(s);
    /// let codepoints = unsafe {
    ///     f.char_bytes().collect::<Vec<_>>()
    /// };
    ///
    /// assert_eq!(codepoints, [&[240, 159, 166, 128][..], &[32], &[99], &[97], &[102], &[195, 169]]);
    /// ```
    pub unsafe fn char_bytes(&self) -> CharBytes {
        CharBytes {
            slice: self.as_slice(),
            encoding: self.enc_get().into(),
        }
    }

    /// Returns true if the encoding for this string is UTF-8 or US-ASCII,
    /// false otherwise.
    ///
    /// The encoding on a Ruby String is just a label, it provides no guarantee
    /// that the String really is valid UTF-8.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#""café""#).unwrap();
    /// assert!(s.is_utf8_compatible_encoding());
    /// ```
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#""café".encode("ISO-8859-1")"#).unwrap();
    /// assert!(!s.is_utf8_compatible_encoding());
    /// ```
    pub fn is_utf8_compatible_encoding(self) -> bool {
        let encindex = self.enc_get();
        // us-ascii is a 100% compatible subset of utf8
        encindex == encoding::Index::utf8() || encindex == encoding::Index::usascii()
    }

    /// Returns a new string by reencoding `self` from its current encoding to
    /// the given `enc`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{encoding::RbEncoding, eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#""café".encode("ISO-8859-1")"#).unwrap();
    /// // safe as we don't give Ruby the chance to mess with the string while
    /// // we hold a refrence to the slice.
    /// unsafe { assert_eq!(s.as_slice(), &[99, 97, 102, 233]) };
    /// let e = s.conv_enc(RbEncoding::utf8()).unwrap();
    /// unsafe { assert_eq!(e.as_slice(), &[99, 97, 102, 195, 169]) };
    /// ```
    pub fn conv_enc<T>(self, enc: T) -> Result<Self, Error>
    where
        T: Into<RbEncoding>,
    {
        protect(|| unsafe {
            Self::from_rb_value_unchecked(rb_str_conv_enc(
                self.as_rb_value(),
                ptr::null_mut(),
                enc.into().as_ptr(),
            ))
        })
    }

    /// Returns the cached coderange value that describes how `self` relates to
    /// its encoding.
    ///
    /// See also [`enc_coderange_scan`](RString::enc_coderange_scan).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{encoding::Coderange, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// // Coderange is unknown on creation.
    /// let s = RString::new("test");
    /// assert_eq!(s.enc_coderange(), Coderange::Unknown);
    ///
    /// // Methods that operate on the string using the encoding will set the
    /// // coderange as a side effect.
    /// let _: usize = s.funcall("length", ()).unwrap();
    /// assert_eq!(s.enc_coderange(), Coderange::SevenBit);
    ///
    /// // Operations with two strings with known coderanges will set it
    /// // appropriately.
    /// let t = RString::new("🦀");
    /// let _: usize = t.funcall("length", ()).unwrap();
    /// assert_eq!(t.enc_coderange(), Coderange::Valid);
    /// s.append(t).unwrap();
    /// assert_eq!(s.enc_coderange(), Coderange::Valid);
    ///
    /// // Operations that modify the string with an unknown coderange will
    /// // set the coderange back to unknown.
    /// s.cat([128]);
    /// assert_eq!(s.enc_coderange(), Coderange::Unknown);
    ///
    /// // Which may leave the string with a broken encoding.
    /// let _: usize = s.funcall("length", ()).unwrap();
    /// assert_eq!(s.enc_coderange(), Coderange::Broken);
    /// ```
    pub fn enc_coderange(self) -> Coderange {
        unsafe {
            transmute(
                (self.r_basic_unchecked().as_ref().flags
                    & ruby_coderange_type::RUBY_ENC_CODERANGE_MASK as VALUE) as u32,
            )
        }
    }

    /// Scans `self` to establish its coderange.
    ///
    /// If the coderange is already known, simply returns the known value.
    /// See also [`enc_coderange`](RString::enc_coderange).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{encoding::Coderange, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("test");
    /// assert_eq!(s.enc_coderange_scan(), Coderange::SevenBit);
    /// ```
    pub fn enc_coderange_scan(self) -> Coderange {
        unsafe { transmute(rb_enc_str_coderange(self.as_rb_value()) as u32) }
    }

    /// Clear `self`'s cached coderange, setting it to `Unknown`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{encoding::Coderange, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀");
    /// // trigger setting coderange
    /// let _: usize = s.funcall("length", ()).unwrap();
    /// assert_eq!(s.enc_coderange(), Coderange::Valid);
    ///
    /// s.enc_coderange_clear();
    /// assert_eq!(s.enc_coderange(), Coderange::Unknown);
    /// ```
    pub fn enc_coderange_clear(self) {
        unsafe {
            self.r_basic_unchecked().as_mut().flags &=
                !(ruby_coderange_type::RUBY_ENC_CODERANGE_MASK as VALUE)
        }
    }

    /// Sets `self`'s cached coderange.
    ///
    /// Rather than using the method it is recommended to set the coderange to
    /// `Unknown` with [`enc_coderange_clear`](RString::enc_coderange_clear)
    /// and let Ruby determine the coderange lazily when needed.
    ///
    /// # Safety
    ///
    /// This must be set correctly. `SevenBit` if all codepoints are within
    /// 0..=127, `Valid` if the string is valid for its encoding, or `Broken`
    /// if it is not. `Unknown` can be set safely with
    /// [`enc_coderange_clear`](RString::enc_coderange_clear).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{encoding::{self, Coderange, EncodingCapable}, exception, Error, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// fn crabbify(s: RString) -> Result<(), Error> {
    ///     if s.enc_get() != encoding::Index::utf8() {
    ///         return Err(Error::new(exception::encoding_error(), "expected utf-8"));
    ///     }
    ///     let original = s.enc_coderange();
    ///     // ::cat() will clear the coderange
    ///     s.cat("🦀");
    ///     // we added a multibyte char, so if we started with `SevenBit` it
    ///     // should be upgraded to `Valid`, and if it was `Valid` it is still
    ///     // `Valid`.
    ///     if original == Coderange::SevenBit || original == Coderange::Valid {
    ///         unsafe {
    ///             s.enc_coderange_set(Coderange::Valid);
    ///         }
    ///     }
    ///     Ok(())
    /// }
    ///
    /// let s = RString::new("test");
    /// // trigger setting coderange
    /// let _: usize = s.funcall("length", ()).unwrap();
    ///
    /// crabbify(s).unwrap();
    /// assert_eq!(s.enc_coderange(), Coderange::Valid);
    /// ```
    pub unsafe fn enc_coderange_set(self, cr: Coderange) {
        self.enc_coderange_clear();
        self.r_basic_unchecked().as_mut().flags |= cr as VALUE;
    }

    /// Returns a Rust `&str` reference to the value of `self`.
    ///
    /// Returns `None` if `self`'s encoding is not UTF-8 (or US-ASCII), or if
    /// the string is not valid UTF-8.
    ///
    /// # Safety
    ///
    /// This is directly viewing memory owned and managed by Ruby. Ruby may
    /// modify or free the memory backing the returned str, the caller must
    /// ensure this does not happen.
    ///
    /// Ruby must not be allowed to garbage collect or modify `self` while a
    /// refrence to the str is held.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// // safe as we don't give Ruby the chance to mess with the string while
    /// // we hold a refrence to the slice.
    /// unsafe { assert_eq!(s.test_as_str().unwrap(), "example") };
    /// ```
    pub unsafe fn test_as_str(&self) -> Option<&str> {
        self.test_as_str_unconstrained()
    }

    /// Returns a Rust `&str` reference to the value of `self`.
    ///
    /// Errors if `self`'s encoding is not UTF-8 (or US-ASCII), or if the
    /// string is not valid UTF-8.
    ///
    /// # Safety
    ///
    /// This is directly viewing memory owned and managed by Ruby. Ruby may
    /// modify or free the memory backing the returned str, the caller must
    /// ensure this does not happen.
    ///
    /// Ruby must not be allowed to garbage collect or modify `self` while a
    /// refrence to the str is held.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// // safe as we don't give Ruby the chance to mess with the string while
    /// // we hold a refrence to the slice.
    /// unsafe { assert_eq!(s.as_str().unwrap(), "example") };
    /// ```
    pub unsafe fn as_str(&self) -> Result<&str, Error> {
        self.as_str_unconstrained()
    }

    unsafe fn test_as_str_unconstrained<'a>(self) -> Option<&'a str> {
        let enc = self.enc_get();
        let cr = self.enc_coderange_scan();
        ((self.is_utf8_compatible_encoding()
            && (cr == Coderange::SevenBit || cr == Coderange::Valid))
            || (enc == encoding::Index::ascii8bit() && cr == Coderange::SevenBit))
            .then(|| str::from_utf8_unchecked(self.as_slice_unconstrained()))
    }

    unsafe fn as_str_unconstrained<'a>(self) -> Result<&'a str, Error> {
        self.test_as_str_unconstrained().ok_or_else(|| {
            let msg: Cow<'static, str> = if self.is_utf8_compatible_encoding() {
                format!(
                    "expected utf-8, got {}",
                    RbEncoding::from(self.enc_get()).name()
                )
                .into()
            } else {
                "invalid byte sequence in UTF-8".into()
            };
            Error::new(exception::encoding_error(), msg)
        })
    }

    /// Returns `self` as a Rust string, ignoring the Ruby encoding and
    /// dropping any non-UTF-8 characters. If `self` is valid UTF-8 this will
    /// return a `&str` reference.
    ///
    /// # Safety
    ///
    /// This may return a direct view of memory owned and managed by Ruby. Ruby
    /// may modify or free the memory backing the returned str, the caller must
    /// ensure this does not happen.
    ///
    /// Ruby must not be allowed to garbage collect or modify `self` while a
    /// refrence to the str is held.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// // safe as we don't give Ruby the chance to mess with the string while
    /// // we hold a refrence to the slice.
    /// unsafe { assert_eq!(s.to_string_lossy(), "example") };
    /// ```
    #[allow(clippy::wrong_self_convention)]
    pub unsafe fn to_string_lossy(&self) -> Cow<'_, str> {
        String::from_utf8_lossy(self.as_slice())
    }

    /// Returns `self` as an owned Rust `String`. The Ruby string will be
    /// reencoded as UTF-8 if required. Errors if the string can not be encoded
    /// as UTF-8.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// assert_eq!(s.to_string().unwrap(), "example");
    /// ```
    pub fn to_string(self) -> Result<String, Error> {
        let utf8 = if self.is_utf8_compatible_encoding() {
            self
        } else {
            self.conv_enc(RbEncoding::utf8())?
        };
        str::from_utf8(unsafe { utf8.as_slice() })
            .map(ToOwned::to_owned)
            .map_err(|e| Error::new(exception::encoding_error(), format!("{}", e)))
    }

    /// Converts `self` to a [`char`]. Errors if the string is more than one
    /// character or can not be encoded as UTF-8.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("a");
    /// assert_eq!(s.to_char().unwrap(), 'a');
    /// ```
    pub fn to_char(self) -> Result<char, Error> {
        let utf8 = if self.is_utf8_compatible_encoding() {
            self
        } else {
            self.conv_enc(RbEncoding::utf8())?
        };
        unsafe {
            str::from_utf8(utf8.as_slice())
                .map_err(|e| Error::new(exception::encoding_error(), format!("{}", e)))?
                .parse()
                .map_err(|e| {
                    Error::new(
                        exception::type_error(),
                        format!("could not convert string to char, {}", e),
                    )
                })
        }
    }

    /// Returns whether `self` is a frozen interned string. Interned strings
    /// are usually string literals with the in files with the
    /// `# frozen_string_literal: true` 'magic comment'.
    ///
    /// Interned strings won't be garbage collected or modified, so should be
    /// safe to store on the heap or hold a `&str` refrence to. See
    /// [`as_interned_str`](RString::as_interned_str).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#"
    /// ## frozen_string_literal: true
    ///
    /// "example"
    /// "#).unwrap();
    /// assert!(s.is_interned());
    /// ```
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#""example""#).unwrap();
    /// assert!(!s.is_interned());
    /// ```
    pub fn is_interned(self) -> bool {
        unsafe {
            self.r_basic_unchecked().as_ref().flags & ruby_rstring_flags::RSTRING_FSTR as VALUE != 0
        }
    }

    /// Returns `Some(FString)` if self is interned, `None` otherwise.
    ///
    /// Interned strings won't be garbage collected or modified, so should be
    /// safe to store on the heap or hold a `&str` refrence to. The `FString`
    /// type returned by this function provides a way to encode this property
    /// into the type system, and provides safe methods to access the string
    /// as a `&str` or slice.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#"
    /// ## frozen_string_literal: true
    ///
    /// "example"
    /// "#).unwrap();
    /// assert!(s.as_interned_str().is_some());
    /// ```
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#""example""#).unwrap();
    /// assert!(s.as_interned_str().is_none());
    /// ```
    pub fn as_interned_str(self) -> Option<FString> {
        self.is_interned().then(|| FString(self))
    }

    /// Interns self and returns a [`FString`]. Be aware that once interned a
    /// string will never be garbage collected.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let fstring = RString::new("example").to_interned_str();
    /// assert_eq!(fstring.as_str().unwrap(), "example");
    /// ```
    #[cfg(any(ruby_gte_3_0, docsrs))]
    #[cfg_attr(docsrs, doc(cfg(ruby_gte_3_0)))]
    pub fn to_interned_str(self) -> FString {
        unsafe {
            FString(RString::from_rb_value_unchecked(rb_str_to_interned_str(
                self.as_rb_value(),
            )))
        }
    }

    /// Mutate `self`, adding `other` to the end. Errors if `self` and
    /// other`'s encodings are not compatible.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let a = RString::new("foo");
    /// let b = RString::new("bar");
    /// a.append(b).unwrap();
    /// assert_eq!(a.to_string().unwrap(), "foobar");
    /// ```
    pub fn append(self, other: Self) -> Result<(), Error> {
        protect(|| unsafe {
            Value::new(rb_str_buf_append(self.as_rb_value(), other.as_rb_value()))
        })?;
        Ok(())
    }

    /// Mutate `self`, adding `buf` to the end.
    ///
    /// Note: This ignore's `self`'s encoding, and may result in `self`
    /// containing invalid bytes for its encoding. It's assumed this will more
    /// often be used with ASCII-8BIT (aka BINARY) encoded strings. See
    /// [`buf_new`](RString::buf_new) and [`from_slice`](RString::from_slice).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let buf = RString::buf_new(4096);
    /// buf.cat(&[102, 111, 111]);
    /// buf.cat("bar");
    /// assert_eq!(buf.to_string().unwrap(), "foobar");
    /// ```
    pub fn cat<T: AsRef<[u8]>>(self, buf: T) {
        let buf = buf.as_ref();
        let len = buf.len();
        let ptr = buf.as_ptr();
        unsafe {
            rb_str_cat(self.as_rb_value(), ptr as *const c_char, len as c_long);
        }
    }

    /// Returns the number of bytes in `self`.
    ///
    /// See also [`length`](RString::length).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀 Hello, Ferris");
    /// assert_eq!(s.len(), 18);
    /// ```
    pub fn len(self) -> usize {
        debug_assert_value!(self);
        unsafe {
            let r_basic = self.r_basic_unchecked();
            let f = r_basic.as_ref().flags;
            if (f & ruby_rstring_flags::RSTRING_NOEMBED as VALUE) != 0 {
                let h = self.as_internal().as_ref().as_.heap;
                h.len as usize
            } else {
                embed_len(self, f) as usize
            }
        }
    }

    /// Returns the number of characters in `self`.
    ///
    /// See also [`len`](RString::len).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀 Hello, Ferris");
    /// assert_eq!(s.length(), 15);
    /// ```
    pub fn length(self) -> usize {
        unsafe { rb_str_strlen(self.as_rb_value()) as usize }
    }

    /// Return whether self contains any characters or not.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::RString;
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("");
    /// assert!(s.is_empty());
    /// ```
    pub fn is_empty(self) -> bool {
        self.len() == 0
    }
}

#[cfg(ruby_gte_3_2)]
unsafe fn embed_len(value: RString, _: VALUE) -> c_long {
    value.as_internal().as_ref().as_.embed.len
}

#[cfg(ruby_lt_3_2)]
unsafe fn embed_len(_: RString, mut f: VALUE) -> VALUE {
    f &= ruby_rstring_flags::RSTRING_EMBED_LEN_MASK as VALUE;
    f >>= RSTRING_EMBED_LEN_SHIFT as VALUE;
    f
}

impl Deref for RString {
    type Target = Value;

    fn deref(&self) -> &Self::Target {
        self.0.get_ref()
    }
}

impl fmt::Display for RString {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", unsafe { self.to_s_infallible() })
    }
}

impl fmt::Debug for RString {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", self.inspect())
    }
}

impl EncodingCapable for RString {}

impl io::Write for RString {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        let len = buf.len();
        self.cat(buf);
        Ok(len)
    }

    fn flush(&mut self) -> io::Result<()> {
        Ok(())
    }
}

impl From<&str> for RString {
    fn from(val: &str) -> Self {
        RString::new(val)
    }
}

impl From<String> for RString {
    fn from(val: String) -> Self {
        RString::new(&val)
    }
}

impl From<RString> for Value {
    fn from(val: RString) -> Self {
        *val
    }
}

impl From<&str> for Value {
    fn from(val: &str) -> Self {
        RString::new(val).into()
    }
}

impl From<String> for Value {
    fn from(val: String) -> Self {
        val.as_str().into()
    }
}

impl From<char> for Value {
    fn from(val: char) -> Self {
        RString::from_char(val).into()
    }
}

#[cfg(unix)]
impl From<&Path> for Value {
    fn from(val: &Path) -> Self {
        use std::os::unix::ffi::OsStrExt;
        RString::from_slice(val.as_os_str().as_bytes()).into()
    }
}

#[cfg(not(unix))]
impl From<&Path> for Value {
    fn from(val: &Path) -> Self {
        RString::new(val.to_string_lossy().as_ref()).into()
    }
}

impl From<PathBuf> for Value {
    fn from(val: PathBuf) -> Self {
        val.as_path().into()
    }
}

impl Object for RString {}

unsafe impl private::ReprValue for RString {
    fn to_value(self) -> Value {
        *self
    }

    unsafe fn from_value_unchecked(val: Value) -> Self {
        Self(NonZeroValue::new_unchecked(val))
    }
}

impl ReprValue for RString {}

impl TryConvert for RString {
    fn try_convert(val: Value) -> Result<Self, Error> {
        match Self::from_value(val) {
            Some(i) => Ok(i),
            None => protect(|| {
                debug_assert_value!(val);
                unsafe { Self::from_rb_value_unchecked(rb_str_to_str(val.as_rb_value())) }
            }),
        }
    }
}

/// FString contains an RString known to be interned.
///
/// Interned strings won't be garbage collected or modified, so should be
/// safe to store on the heap or hold a `&str` refrence to. `FString` provides
/// a way to encode this property into the type system, and provides safe
/// methods to access the string as a `&str` or slice.
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct FString(RString);

impl FString {
    /// Returns the interned string as a [`RString`].
    pub fn as_r_string(self) -> RString {
        self.0
    }

    /// Returns the interned string as a slice of bytes.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#"
    /// ## frozen_string_literal: true
    ///
    /// "example"
    /// "#).unwrap();
    /// let fstring = s.as_interned_str().unwrap();
    /// assert_eq!(fstring.as_slice(), &[101, 120, 97, 109, 112, 108, 101]);
    /// ```
    pub fn as_slice(self) -> &'static [u8] {
        unsafe { self.as_r_string().as_slice_unconstrained() }
    }

    /// Returns the interned string as a `&str` or `None` string contains
    /// invliad UTF-8.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#"# frozen_string_literal: true
    /// "example"
    /// "#).unwrap();
    /// let fstring = s.as_interned_str().unwrap();
    /// assert_eq!(fstring.test_as_str().unwrap(), "example");
    /// ```
    pub fn test_as_str(self) -> Option<&'static str> {
        unsafe { self.as_r_string().test_as_str_unconstrained() }
    }

    /// Returns the interned string as a `&str`. Errors if the string contains
    /// invliad UTF-8.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#"# frozen_string_literal: true
    /// "example"
    /// "#).unwrap();
    /// let fstring = s.as_interned_str().unwrap();
    /// assert_eq!(fstring.as_str().unwrap(), "example");
    /// ```
    pub fn as_str(self) -> Result<&'static str, Error> {
        unsafe { self.as_r_string().as_str_unconstrained() }
    }

    /// Returns interned string as a Rust string, ignoring the Ruby encoding
    /// and dropping any non-UTF-8 characters. If the string is valid UTF-8
    /// this will return a `&str` reference.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s: RString = eval!(r#"
    /// ## frozen_string_literal: true
    ///
    /// "example"
    /// "#).unwrap();
    /// let fstring = s.as_interned_str().unwrap();
    /// assert_eq!(fstring.to_string_lossy(), "example");
    /// ```
    pub fn to_string_lossy(self) -> Cow<'static, str> {
        String::from_utf8_lossy(self.as_slice())
    }
}

impl Deref for FString {
    type Target = Value;

    fn deref(&self) -> &Self::Target {
        self.0.deref()
    }
}

impl fmt::Display for FString {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", unsafe { self.as_r_string().to_s_infallible() })
    }
}

impl fmt::Debug for FString {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", self.as_r_string().inspect())
    }
}

impl From<FString> for Value {
    fn from(val: FString) -> Self {
        *val.as_r_string()
    }
}

unsafe impl private::ReprValue for FString {
    fn to_value(self) -> Value {
        *self
    }

    unsafe fn from_value_unchecked(val: Value) -> Self {
        Self(RString::from_value_unchecked(val))
    }
}

impl ReprValue for FString {}

/// An iterator over a Ruby string's codepoints.
pub struct Codepoints<'a> {
    slice: &'a [u8],
    encoding: RbEncoding,
}

impl<'a> Iterator for Codepoints<'a> {
    type Item = Result<u32, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.slice.is_empty() {
            return None;
        }
        match self.encoding.codepoint_len(self.slice) {
            Ok((codepoint, len)) => {
                self.slice = &self.slice[len..];
                Some(Ok(codepoint))
            }
            Err(e) => {
                self.slice = &self.slice[self.slice.len()..];
                Some(Err(e))
            }
        }
    }
}

/// An iterator over a Ruby string's chars as slices of bytes.
pub struct CharBytes<'a> {
    slice: &'a [u8],
    encoding: RbEncoding,
}

impl<'a> Iterator for CharBytes<'a> {
    type Item = &'a [u8];

    fn next(&mut self) -> Option<Self::Item> {
        if self.slice.is_empty() {
            return None;
        }
        let len = self.encoding.mbclen(self.slice);
        let bytes = &self.slice[..len];
        self.slice = &self.slice[len..];
        Some(bytes)
    }
}

/// Create a [`RString`] from a Rust str literal.
///
/// # Examples
///
/// ```
/// use magnus::{eval, r_string};
/// # let _cleanup = unsafe { magnus::embed::init() };
///
/// let s = r_string!("Hello, world!");
/// let res: bool = eval!(r#"s == "Hello, world!""#, s).unwrap();
/// assert!(res);
/// ```
#[macro_export]
macro_rules! r_string {
    ($lit:expr) => {{
        let s = concat!($lit, "\0");
        let len = s.len() - 1;
        unsafe { $crate::RString::new_lit(s.as_ptr() as *const _, len as _) }
    }};
}