magnus 0.4.4 - Docs.rs

//! Types and functions for working with encodings.
//!
//! This module defines 3 types for working with encodings, these types can
//! be converted back and forth with [`From`]/[`Into`] like so:
//! ``` text
//! Encoding <-> RbEncoding <-> Index
//!       |______________________^
//! ```
//! Many functions that require an encoding take thier arguments as
//! `Into<RbEncoding>` or `Into<Index>` to ease working with the different
//! types. The type specified for the `Into` conversion hints at the type the
//! function nativly works with, and thus will avoid any conversion cost.
//!
//! [`Encoding`] and [`RbEncoding`] both implement [`TryConvert`] and
//! `Into<Value>` so can be used as parameters and return values in functions
//! bound to Ruby. Both convert from either an instance of `Encoding` or a
//! string of an encoding name, and convert to an instance of `Encoding`.

use std::{
    convert::TryInto,
    ffi::{CStr, CString},
    fmt,
    ops::{Deref, Range},
    os::raw::{c_char, c_int},
    ptr::{self, NonNull},
};

use rb_sys::{
    rb_ascii8bit_encindex, rb_ascii8bit_encoding, rb_default_external_encoding,
    rb_default_internal_encoding, rb_enc_ascget, rb_enc_associate_index, rb_enc_check,
    rb_enc_codelen, rb_enc_codepoint_len, rb_enc_compatible, rb_enc_copy, rb_enc_default_external,
    rb_enc_default_internal, rb_enc_fast_mbclen, rb_enc_find, rb_enc_find_index,
    rb_enc_from_encoding, rb_enc_from_index, rb_enc_get_index, rb_enc_mbclen,
    rb_enc_precise_mbclen, rb_enc_set_index, rb_enc_to_index, rb_enc_uint_chr, rb_encoding,
    rb_filesystem_encindex, rb_filesystem_encoding, rb_find_encoding, rb_locale_encindex,
    rb_locale_encoding, rb_to_encoding, rb_to_encoding_index, rb_usascii_encindex,
    rb_usascii_encoding, rb_utf8_encindex, rb_utf8_encoding,
};

use crate::{
    class,
    error::{protect, Error},
    exception,
    object::Object,
    r_string::RString,
    try_convert::TryConvert,
    value::{private, NonZeroValue, ReprValue, Value, QNIL},
};

/// Wrapper type for a Value known to be an instance of Ruby's Encoding class.
///
/// This is the representation of an encoding exposed to Ruby code.
///
/// All [`Value`] methods should be available on this type through [`Deref`],
/// but some may be missed by this documentation.
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Encoding(NonZeroValue);

impl Encoding {
    /// Return `Some(Encoding)` if `val` is an `Encoding`, `None` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::Encoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert!(Encoding::from_value(eval("Encoding::US_ASCII").unwrap()).is_some());
    /// assert!(Encoding::from_value(eval("nil").unwrap()).is_none());
    /// ```
    #[inline]
    pub fn from_value(val: Value) -> Option<Self> {
        unsafe {
            val.is_kind_of(class::encoding())
                .then(|| Self(NonZeroValue::new_unchecked(val)))
        }
    }

    /// Returns the default internal encoding as a Ruby object.
    ///
    /// This is the encoding used for anything out-of-process, such as reading
    /// from files or sockets.
    pub fn default_external() -> Self {
        Self::from_value(Value::new(unsafe { rb_enc_default_external() })).unwrap()
    }

    /// Returns the default external encoding as a Ruby object.
    ///
    /// If set, any out-of-process data is transcoded from the default external
    /// encoding to the default internal encoding.
    pub fn default_internal() -> Option<Self> {
        Self::from_value(Value::new(unsafe { rb_enc_default_internal() }))
    }
}

impl Deref for Encoding {
    type Target = Value;

    fn deref(&self) -> &Self::Target {
        self.0.get_ref()
    }
}

impl fmt::Display for Encoding {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", unsafe { self.to_s_infallible() })
    }
}

impl fmt::Debug for Encoding {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", self.deref().inspect())
    }
}

impl From<Encoding> for Index {
    fn from(val: Encoding) -> Self {
        let i = unsafe { rb_to_encoding_index(val.as_rb_value()) };
        if i == -1 {
            panic!("got encoding index -1");
        }
        Index(i)
    }
}

impl From<Encoding> for RbEncoding {
    fn from(val: Encoding) -> Self {
        let ptr = unsafe { rb_find_encoding(val.as_rb_value()) };
        RbEncoding::new(ptr).expect("got NULL rb_encoding")
    }
}

impl From<Encoding> for Value {
    fn from(val: Encoding) -> Self {
        *val
    }
}

impl Object for Encoding {}

unsafe impl private::ReprValue for Encoding {
    fn to_value(self) -> Value {
        *self
    }

    unsafe fn from_value_unchecked(val: Value) -> Self {
        Self(NonZeroValue::new_unchecked(val))
    }
}

impl ReprValue for Encoding {}

impl TryConvert for Encoding {
    fn try_convert(val: Value) -> Result<Self, Error> {
        if let Some(enc) = Self::from_value(val) {
            return Ok(enc);
        }
        RbEncoding::try_convert(val).map(Into::into)
    }
}

/// Ruby's internal encoding type.
///
/// This type contains the data for an encoding, and is used with operations
/// such as converting a string from one encoding to another, or reading a
/// string character by character.
#[repr(transparent)]
pub struct RbEncoding(NonNull<rb_encoding>);

impl RbEncoding {
    fn new(inner: *mut rb_encoding) -> Option<Self> {
        NonNull::new(inner).map(Self)
    }

    /// Returns the encoding that represents ASCII-8BIT a.k.a. binary.
    pub fn ascii8bit() -> Self {
        Self::new(unsafe { rb_ascii8bit_encoding() }).unwrap()
    }

    /// Returns the encoding that represents UTF-8.
    pub fn utf8() -> Self {
        Self::new(unsafe { rb_utf8_encoding() }).unwrap()
    }

    /// Returns the encoding that represents US-ASCII.
    pub fn usascii() -> Self {
        Self::new(unsafe { rb_usascii_encoding() }).unwrap()
    }

    /// Returns the encoding that represents the process' current locale.
    ///
    /// This is dynamic. If you change the process' locale that should also
    /// change the return value of this function.
    pub fn locale() -> Self {
        Self::new(unsafe { rb_locale_encoding() }).unwrap()
    }

    /// Returns the filesystem encoding.
    ///
    /// This is the encoding that Ruby expects data from the OS' file system
    /// to be encoded as, such as directory names.
    pub fn filesystem() -> Self {
        Self::new(unsafe { rb_filesystem_encoding() }).unwrap()
    }

    /// Returns the default external encoding.
    ///
    /// This is the encoding used for anything out-of-process, such as reading
    /// from files or sockets.
    pub fn default_external() -> Self {
        Self::new(unsafe { rb_default_external_encoding() }).unwrap()
    }

    /// Returns the default internal encoding.
    ///
    /// If set, any out-of-process data is transcoded from the default external
    /// encoding to the default internal encoding.
    pub fn default_internal() -> Option<Self> {
        Self::new(unsafe { rb_default_internal_encoding() })
    }

    /// Returns the encoding with the name or alias `name`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert_eq!(RbEncoding::find("UTF-8").unwrap().name(), "UTF-8");
    /// assert_eq!(RbEncoding::find("BINARY").unwrap().name(), "ASCII-8BIT");
    /// ```
    pub fn find(name: &str) -> Option<Self> {
        let name = CString::new(name).unwrap();
        let ptr = unsafe { rb_enc_find(name.as_ptr()) };
        Self::new(ptr)
    }

    pub(crate) fn as_ptr(&self) -> *mut rb_encoding {
        self.0.as_ptr()
    }

    /// Returns the canonical name of the encoding.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert_eq!(RbEncoding::utf8().name(), "UTF-8");
    /// assert_eq!(RbEncoding::find("UTF-16").unwrap().name(), "UTF-16");
    /// ```
    ///
    /// # Panics
    ///
    /// Panics if the name is not valid UTF-8. Encoding names are expected to
    /// be ASCII only.
    pub fn name(&self) -> &str {
        unsafe { CStr::from_ptr(self.0.as_ref().name).to_str().unwrap() }
    }

    /// Returns the minimum number of bytes the encoding needs to represent a
    /// single character.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert_eq!(RbEncoding::usascii().mbminlen(), 1);
    /// assert_eq!(RbEncoding::utf8().mbminlen(), 1);
    /// ```
    pub fn mbminlen(&self) -> usize {
        unsafe { self.0.as_ref().min_enc_len as usize }
    }

    /// Returns the maximum number of bytes the encoding may need to represent
    /// a single character.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert_eq!(RbEncoding::usascii().mbmaxlen(), 1);
    /// assert_eq!(RbEncoding::utf8().mbmaxlen(), 4);
    /// ```
    pub fn mbmaxlen(&self) -> usize {
        unsafe { self.0.as_ref().max_enc_len as usize }
    }

    /// Returns the number of bytes of the first character in `slice`.
    ///
    /// If the first byte of `slice` is mid way through a character this will
    /// return the number of bytes until the next character boundry.
    ///
    /// If the slice ends before the last byte of the character this will
    /// return the number of bytes until the end of the slice.
    ///
    /// See also [`fast_mbclen`](RbEncoding::fast_mbclen) and
    /// [`precise_mbclen`](RbEncoding::precise_mbclen).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::{EncodingCapable, RbEncoding}, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀 café");
    /// let encoding: RbEncoding = s.enc_get().into();
    /// let mut chars = 0;
    ///
    /// unsafe {
    ///     let mut bytes = s.as_slice();
    ///     assert_eq!(bytes.len(), 10);
    ///
    ///     while !bytes.is_empty() {
    ///         chars += 1;
    ///         let len = encoding.mbclen(bytes);
    ///         bytes = &bytes[len..];
    ///     }
    /// }
    ///
    /// assert_eq!(chars, 6);
    /// ```
    pub fn mbclen(&self, slice: &[u8]) -> usize {
        let Range { start: p, end: e } = slice.as_ptr_range();
        unsafe { rb_enc_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) as usize }
    }

    /// Returns the number of bytes of the first character in `slice`.
    ///
    /// If the first byte of `slice` is mid way through a character this will
    /// return the number of bytes until the next character boundry.
    ///
    /// If the slice ends before the last byte of the character this will
    /// return the theoretical number of bytes until the end of the character,
    /// which will be past the end of the slice. If the string has been read
    /// from an IO source this may indicate more data needs to be read.
    ///
    /// See also [`mbclen`](RbEncoding::mbclen) and
    /// [`precise_mbclen`](RbEncoding::precise_mbclen).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::{EncodingCapable, RbEncoding}, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀 café");
    /// let encoding: RbEncoding = s.enc_get().into();
    /// let mut chars = 0;
    ///
    /// unsafe {
    ///     let mut bytes = s.as_slice();
    ///     assert_eq!(bytes.len(), 10);
    ///
    ///     while !bytes.is_empty() {
    ///         chars += 1;
    ///         let len = encoding.fast_mbclen(bytes);
    ///         bytes = &bytes[len..];
    ///     }
    /// }
    ///
    /// assert_eq!(chars, 6);
    /// ```
    pub fn fast_mbclen(&self, slice: &[u8]) -> usize {
        let Range { start: p, end: e } = slice.as_ptr_range();
        unsafe {
            rb_enc_fast_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) as usize
        }
    }

    /// Returns the number of bytes of the first character in `slice`.
    ///
    /// See also [`mbclen`](RbEncoding::mbclen) and
    /// [`fast_mbclen`](RbEncoding::fast_mbclen).
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::{EncodingCapable, MbcLen, RbEncoding}, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀 café");
    /// let encoding: RbEncoding = s.enc_get().into();
    /// let mut chars = 0;
    ///
    /// unsafe {
    ///     let mut bytes = s.as_slice();
    ///     assert_eq!(bytes.len(), 10);
    ///
    ///     while !bytes.is_empty() {
    ///         chars += 1;
    ///         match encoding.precise_mbclen(bytes) {
    ///             MbcLen::CharFound(len) => bytes = &bytes[len..],
    ///             MbcLen::NeedMore(len) => panic!("Met end of string expecting {} bytes", len),
    ///             MbcLen::Invalid => panic!("corrupted string"),
    ///         }
    ///     }
    /// }
    ///
    /// assert_eq!(chars, 6);
    /// ```
    pub fn precise_mbclen(&self, slice: &[u8]) -> MbcLen {
        let Range { start: p, end: e } = slice.as_ptr_range();
        let r =
            unsafe { rb_enc_precise_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) };
        if 0 < r {
            MbcLen::CharFound(r as usize)
        } else if r < -1 {
            MbcLen::NeedMore((-1 - r) as usize)
        } else if r == -1 {
            MbcLen::Invalid
        } else {
            unreachable!()
        }
    }

    /// If the first character in `slice` is included in ASCII return it and
    /// its encoded length in `slice`, otherwise returns None.
    ///
    /// Typically the length will be 1, but some encodings such as UTF-16 will
    /// encode ASCII characters in 2 bytes.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::{EncodingCapable, RbEncoding}, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// let encoding: RbEncoding = s.enc_get().into();
    /// let mut chars = Vec::new();
    ///
    /// unsafe {
    ///     let mut bytes = s.as_slice();
    ///
    ///     while !bytes.is_empty() {
    ///         match encoding.ascget(bytes) {
    ///             Some((char, len)) => {
    ///                 chars.push(char);
    ///                 bytes = &bytes[len..];
    ///             }
    ///             None => panic!("string not ASCII"),
    ///         }
    ///     }
    /// }
    ///
    /// assert_eq!(chars, [101, 120, 97, 109, 112, 108, 101]);
    /// ```
    pub fn ascget(&self, slice: &[u8]) -> Option<(u8, usize)> {
        let Range { start: p, end: e } = slice.as_ptr_range();
        let mut len = 0;
        let c = unsafe {
            rb_enc_ascget(
                p as *const c_char,
                e as *const c_char,
                &mut len as *mut _,
                self.as_ptr(),
            )
        };
        if len == 0 {
            panic!("{:?}", slice);
        }
        (c > -1).then(|| (c as u8, len as usize))
    }

    /// Returns the codepoint and length in bytes of the first character in
    /// `slice`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::{EncodingCapable, RbEncoding}, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("🦀 café");
    /// let encoding: RbEncoding = s.enc_get().into();
    /// let mut codepoints = Vec::new();
    ///
    /// unsafe {
    ///     let mut bytes = s.as_slice();
    ///
    ///     while !bytes.is_empty() {
    ///         let (codepoint, len) = encoding.codepoint_len(bytes).unwrap();
    ///         codepoints.push(codepoint);
    ///         bytes = &bytes[len..];
    ///     }
    /// }
    ///
    /// assert_eq!(codepoints, [129408, 32, 99, 97, 102, 233]);
    /// ```
    pub fn codepoint_len(&self, slice: &[u8]) -> Result<(u32, usize), Error> {
        let Range { start: p, end: e } = slice.as_ptr_range();
        let mut len = 0;
        let mut c = 0;
        protect(|| {
            c = unsafe {
                rb_enc_codepoint_len(
                    p as *const c_char,
                    e as *const c_char,
                    &mut len as *mut _,
                    self.as_ptr(),
                )
            };
            QNIL
        })?;
        Ok((c as u32, len as usize))
    }

    /// Returns the number of bytes required to represent the code point `code`
    /// in the encoding of `self`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert_eq!(RbEncoding::utf8().codelen(97).unwrap(), 1);
    /// assert_eq!(RbEncoding::utf8().codelen(129408).unwrap(), 4);
    /// ```
    pub fn codelen(&self, code: u32) -> Result<usize, Error> {
        let code = code
            .try_into()
            .map_err(|e: <usize as TryInto<c_int>>::Error| {
                Error::new(exception::arg_error(), e.to_string())
            })?;
        let mut len = 0;
        protect(|| {
            unsafe { len = rb_enc_codelen(code, self.as_ptr()) as usize }
            QNIL
        })?;
        Ok(len)
    }

    /// Encode the codepoint `code` as a series of bytes in the encoding `self`
    /// and return the result as a Ruby string.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let c = RbEncoding::usascii().chr(97).unwrap();
    /// let res: bool = eval!(r#"c == "a""#, c).unwrap();
    /// assert!(res);
    /// ```
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let c = RbEncoding::utf8().chr(129408).unwrap();
    /// let res: bool = eval!(r#"c == "🦀""#, c).unwrap();
    /// assert!(res);
    /// ```
    pub fn chr(&self, code: u32) -> Result<RString, Error> {
        protect(|| unsafe {
            RString::from_rb_value_unchecked(rb_enc_uint_chr(code, self.as_ptr()))
        })
    }

    /// Returns `true` if the first character in `slice` is a newline in the
    /// encoding `self`, `false` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::RbEncoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert!(RbEncoding::utf8().is_mbc_newline(&[10]));
    /// assert!(!RbEncoding::utf8().is_mbc_newline(&[32]));
    /// ```
    pub fn is_mbc_newline(&self, slice: &[u8]) -> bool {
        let Range { start: p, end: e } = slice.as_ptr_range();
        unsafe {
            self.0.as_ref().is_mbc_newline.unwrap()(p as *const _, e as *const _, self.as_ptr())
                != 0
        }
    }

    /// Returns whether the given codepoint `code` is of the character type
    /// `ctype` in the encoding `self`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding::{CType, RbEncoding}};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert!(RbEncoding::utf8().is_code_ctype(9, CType::Space));   // "\t"
    /// assert!(RbEncoding::utf8().is_code_ctype(32, CType::Space));  // " "
    /// assert!(!RbEncoding::utf8().is_code_ctype(65, CType::Space)); // "A"
    /// assert!(RbEncoding::utf8().is_code_ctype(65, CType::Alnum));  // "A"
    /// assert!(RbEncoding::utf8().is_code_ctype(65, CType::Upper));  // "A"
    /// ```
    pub fn is_code_ctype(&self, code: u32, ctype: CType) -> bool {
        unsafe { self.0.as_ref().is_code_ctype.unwrap()(code, ctype as _, self.as_ptr()) != 0 }
    }
}

/// Return value for [`RbEncoding::precise_mbclen`].
pub enum MbcLen {
    /// Found a valid char, value is the char's length.
    CharFound(usize),
    /// The slice ended before the end of the current char. Value is the
    /// theoretical total length of the char.
    NeedMore(usize),
    /// The bytes at the start of the slice are not valid for the encoding.
    Invalid,
}

/// A character type.
#[repr(u32)]
#[derive(Debug, Copy, Clone)]
pub enum CType {
    /// Newline.
    Newline = 0,
    /// Alphabetical.
    Alpha = 1,
    /// Blank.
    Blank = 2,
    /// Control.
    Cntrl = 3,
    /// Digit.
    Digit = 4,
    /// Graph.
    Graph = 5,
    /// Lowercase.
    Lower = 6,
    /// Printable.
    Print = 7,
    /// Punctuation.
    Punct = 8,
    /// Whitespace.
    Space = 9,
    /// Uppercase.
    Upper = 10,
    /// Xdigit.
    Xdigit = 11,
    /// Word.
    Word = 12,
    /// Alphanumeric.
    Alnum = 13,
    /// ASCII.
    Ascii = 14,
}

impl From<RbEncoding> for Encoding {
    fn from(val: RbEncoding) -> Self {
        Encoding::from_value(Value::new(unsafe { rb_enc_from_encoding(val.as_ptr()) })).unwrap()
    }
}

impl From<RbEncoding> for Index {
    fn from(val: RbEncoding) -> Self {
        Index(unsafe { rb_enc_to_index(val.as_ptr()) })
    }
}

impl From<RbEncoding> for Value {
    fn from(val: RbEncoding) -> Self {
        *Encoding::from(val)
    }
}

impl TryConvert for RbEncoding {
    fn try_convert(val: Value) -> Result<Self, Error> {
        let mut ptr = ptr::null_mut();
        protect(|| {
            ptr = unsafe { rb_to_encoding(val.as_rb_value()) };
            QNIL
        })?;
        Ok(Self::new(ptr).unwrap())
    }
}

/// The index of an encoding in Ruby's internal encodings table.
///
/// This is the type Ruby uses to label encoding capable types, so is used with
/// operations that require reading or setting that label.
#[derive(Clone, Copy, Eq, PartialEq)]
#[repr(transparent)]
pub struct Index(c_int);

impl Index {
    /// Returns the index for ASCII-8BIT a.k.a. binary.
    pub fn ascii8bit() -> Self {
        Self(unsafe { rb_ascii8bit_encindex() })
    }

    /// Returns the index for UTF-8.
    pub fn utf8() -> Self {
        Self(unsafe { rb_utf8_encindex() })
    }

    /// Returns the index for US-ASCII.
    pub fn usascii() -> Self {
        Self(unsafe { rb_usascii_encindex() })
    }

    /// Returns the index for the process' current locale encoding.
    ///
    /// This is dynamic. If you change the process' locale that should also
    /// change the return value of this function.
    pub fn locale() -> Self {
        Self(unsafe { rb_locale_encindex() })
    }

    /// Returns the index for filesystem encoding.
    ///
    /// This is the encoding that Ruby expects data from the OS' file system
    /// to be encoded as, such as directory names.
    pub fn filesystem() -> Self {
        Self(unsafe { rb_filesystem_encindex() })
    }

    /// Returns the index for the encoding with the name or alias `name`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{eval, encoding};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert!(encoding::Index::find("UTF-8").is_ok());
    /// assert!(encoding::Index::find("BINARY").is_ok());
    /// assert!(encoding::Index::find("none").is_err());
    /// ```
    pub fn find(name: &str) -> Result<Self, Error> {
        let name = CString::new(name).unwrap();
        let mut i = 0;
        protect(|| {
            i = unsafe { rb_enc_find_index(name.as_ptr()) };
            QNIL
        })?;
        if i == -1 {
            return Err(Error::new(
                exception::runtime_error(),
                format!("Encoding {:?} exists, but can not be loaded", name),
            ));
        }
        Ok(Index(i))
    }

    pub(crate) fn to_int(self) -> c_int {
        self.0
    }
}

impl From<Index> for RbEncoding {
    fn from(val: Index) -> Self {
        RbEncoding::new(unsafe { rb_enc_from_index(val.to_int()) }).expect("no encoding for index")
    }
}

impl TryConvert for Index {
    fn try_convert(val: Value) -> Result<Self, Error> {
        let i = unsafe { rb_to_encoding_index(val.as_rb_value()) };
        if i == -1 && RString::from_value(val).is_some() {
            return Err(Error::new(
                exception::runtime_error(),
                format!("ArgumentError: unknown encoding name - {}", val),
            ));
        } else if i == -1 {
            return RString::try_convert(val)?.try_convert();
        }
        Ok(Index(i))
    }
}

/// Possible states of how a string matches its encoding.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Coderange {
    /// It is unknown if the string is valid for its encoding.
    Unknown = 0,
    /// The string is entirely within the 0 to 127 ASCII range.
    SevenBit = 1048576,
    /// The string is valid for its encoding.
    Valid = 2097152,
    /// The string holds values that are invalid for its encoding.
    Broken = 3145728,
}

/// Trait that marks Ruby types cable of having an encoding.
pub trait EncodingCapable: Deref<Target = Value> {
    /// Get the encoding of `self`.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{encoding::{self, EncodingCapable}, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// assert!(RString::new("example").enc_get() == encoding::Index::utf8());
    /// ```
    fn enc_get(&self) -> Index {
        let i = unsafe { rb_enc_get_index(self.as_rb_value()) };
        if i == -1 {
            panic!("{} not encoding capable", self.deref());
        }
        Index(i)
    }

    /// Set `self`'s encoding.
    ///
    /// Returns `Err` if `self` is frozen or the encoding can not be loaded.
    ///
    /// See also [`EncodingCapable::enc_associate`].
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{encoding::{self, EncodingCapable}, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// assert!(s.enc_get() == encoding::Index::utf8());
    /// s.enc_set(encoding::Index::usascii());
    /// assert!(s.enc_get() == encoding::Index::usascii());
    /// ```
    fn enc_set<T>(&self, enc: T) -> Result<(), Error>
    where
        T: Into<Index>,
    {
        protect(|| {
            unsafe { rb_enc_set_index(self.as_rb_value(), enc.into().to_int()) };
            QNIL
        })?;
        Ok(())
    }

    /// Set `self`'s encoding, along with performing additional fix-up `self`'s
    /// contents.
    ///
    /// For example, Ruby's strings contain an additional terminating null byte
    /// hidden from Ruby, but allowing for easy c string interop. This method
    /// will adjust the length of that terminating char depending on the
    /// encoding.
    ///
    /// Returns `Err` if `self` is frozen or the encoding can not be loaded.
    ///
    /// # Examples
    ///
    /// ```
    /// use magnus::{encoding::{self, EncodingCapable}, RString};
    /// # let _cleanup = unsafe { magnus::embed::init() };
    ///
    /// let s = RString::new("example");
    /// assert!(s.enc_get() == encoding::Index::utf8());
    /// s.enc_associate(encoding::Index::usascii());
    /// assert!(s.enc_get() == encoding::Index::usascii());
    /// ```
    fn enc_associate<T>(&self, enc: T) -> Result<(), Error>
    where
        T: Into<Index>,
    {
        protect(|| {
            Value::new(unsafe { rb_enc_associate_index(self.as_rb_value(), enc.into().to_int()) })
        })?;
        Ok(())
    }
}

/// Returns the common encoding between `v1` and `v2`, or `None`.
///
/// Returns `None` if there is no common compatible encoding.
///
/// See also [`check`].
///
/// # Examples
///
/// ```
/// use magnus::{encoding::{self, EncodingCapable}, RString};
/// # let _cleanup = unsafe { magnus::embed::init() };
///
/// let a = RString::new("a");
/// let b = RString::new("b");
///
/// assert!(a.enc_get() == encoding::Index::utf8());
/// b.enc_set(encoding::Index::usascii());
///
/// assert_eq!(encoding::compatible(a, b).unwrap().name(), "UTF-8");
/// ```
pub fn compatible<T, U>(v1: T, v2: U) -> Option<RbEncoding>
where
    T: EncodingCapable,
    U: EncodingCapable,
{
    RbEncoding::new(unsafe { rb_enc_compatible(v1.as_rb_value(), v2.as_rb_value()) })
}

/// Returns the common encoding between `v1` and `v2`, or `Err`.
///
/// Returns `Err` if there is no common compatible encoding.
///
/// See also [`compatible`].
///
/// # Examples
///
/// ```
/// use magnus::{encoding::{self, EncodingCapable}, RString};
/// # let _cleanup = unsafe { magnus::embed::init() };
///
/// let a = RString::new("a");
/// let b = RString::new("b");
///
/// assert!(a.enc_get() == encoding::Index::utf8());
/// b.enc_set(encoding::Index::usascii());
///
/// assert_eq!(encoding::check(a, b).unwrap().name(), "UTF-8");
/// ```
pub fn check<T, U>(v1: T, v2: U) -> Result<RbEncoding, Error>
where
    T: EncodingCapable,
    U: EncodingCapable,
{
    let mut ptr = ptr::null_mut();
    protect(|| {
        ptr = unsafe { rb_enc_check(v1.as_rb_value(), v2.as_rb_value()) };
        QNIL
    })?;
    Ok(RbEncoding::new(ptr).unwrap())
}

/// Compies the encoding from `src` to `dst`.
///
/// This does not reconcode `dst.`
///
/// Similar to [`EncodingCapable::enc_associate`], except takes the encoding of
/// `src` rather than an encoding object or index.
///
/// # Examples
///
/// ```
/// use magnus::{encoding::{self, EncodingCapable}, RString};
/// # let _cleanup = unsafe { magnus::embed::init() };
///
/// let a = RString::new("a");
/// assert!(a.enc_get() == encoding::Index::utf8());
/// let b = RString::new("b");
/// assert!(b.enc_get() == encoding::Index::utf8());
///
/// a.enc_set(encoding::Index::usascii());
/// encoding::copy(b, a).unwrap();
///
/// assert!(b.enc_get() == encoding::Index::usascii());
/// ```
pub fn copy<T, U>(dst: T, src: U) -> Result<(), Error>
where
    T: EncodingCapable,
    U: EncodingCapable,
{
    protect(|| {
        unsafe { rb_enc_copy(dst.as_rb_value(), src.as_rb_value()) };
        QNIL
    })?;
    Ok(())
}