use std::{
ffi::{CStr, CString},
fmt,
ops::Range,
os::raw::{c_char, c_int},
ptr::{self, NonNull},
};
use rb_sys::{
rb_ascii8bit_encindex, rb_ascii8bit_encoding, rb_default_external_encoding,
rb_default_internal_encoding, rb_enc_ascget, rb_enc_associate_index, rb_enc_check,
rb_enc_codelen, rb_enc_codepoint_len, rb_enc_compatible, rb_enc_copy, rb_enc_default_external,
rb_enc_default_internal, rb_enc_fast_mbclen, rb_enc_find, rb_enc_find_index,
rb_enc_from_encoding, rb_enc_from_index, rb_enc_get_index, rb_enc_mbclen,
rb_enc_precise_mbclen, rb_enc_set_index, rb_enc_to_index, rb_enc_uint_chr, rb_encoding,
rb_filesystem_encindex, rb_filesystem_encoding, rb_find_encoding, rb_locale_encindex,
rb_locale_encoding, rb_to_encoding, rb_to_encoding_index, rb_usascii_encindex,
rb_usascii_encoding, rb_utf8_encindex, rb_utf8_encoding,
};
use crate::{
error::{protect, Error},
into_value::IntoValue,
object::Object,
r_string::RString,
try_convert::TryConvert,
value::{
private::{self, ReprValue as _},
NonZeroValue, ReprValue, Value,
},
Ruby,
};
impl Ruby {
pub fn enc_default_external(&self) -> Encoding {
Encoding::from_value(Value::new(unsafe { rb_enc_default_external() })).unwrap()
}
pub fn enc_default_internal(&self) -> Option<Encoding> {
Encoding::from_value(Value::new(unsafe { rb_enc_default_internal() }))
}
}
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Encoding(NonZeroValue);
impl Encoding {
#[inline]
pub fn from_value(val: Value) -> Option<Self> {
unsafe {
val.is_kind_of(Ruby::get_with(val).class_encoding())
.then(|| Self(NonZeroValue::new_unchecked(val)))
}
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::enc_default_external` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn default_external() -> Self {
get_ruby!().enc_default_external()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::enc_default_internal` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn default_internal() -> Option<Self> {
get_ruby!().enc_default_internal()
}
}
impl fmt::Display for Encoding {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", unsafe { self.to_s_infallible() })
}
}
impl fmt::Debug for Encoding {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.inspect())
}
}
impl From<Encoding> for Index {
fn from(val: Encoding) -> Self {
let i = unsafe { rb_to_encoding_index(val.as_rb_value()) };
if i == -1 {
panic!("got encoding index -1");
}
Index(i)
}
}
impl From<Encoding> for RbEncoding {
fn from(val: Encoding) -> Self {
let ptr = unsafe { rb_find_encoding(val.as_rb_value()) };
RbEncoding::new(ptr).expect("got NULL rb_encoding")
}
}
impl IntoValue for Encoding {
#[inline]
fn into_value_with(self, _: &Ruby) -> Value {
self.0.get()
}
}
impl Object for Encoding {}
unsafe impl private::ReprValue for Encoding {}
impl ReprValue for Encoding {}
impl TryConvert for Encoding {
fn try_convert(val: Value) -> Result<Self, Error> {
if let Some(enc) = Self::from_value(val) {
return Ok(enc);
}
RbEncoding::try_convert(val).map(Into::into)
}
}
impl Ruby {
pub fn ascii8bit_encoding(&self) -> RbEncoding {
RbEncoding::new(unsafe { rb_ascii8bit_encoding() }).unwrap()
}
pub fn utf8_encoding(&self) -> RbEncoding {
RbEncoding::new(unsafe { rb_utf8_encoding() }).unwrap()
}
pub fn usascii_encoding(&self) -> RbEncoding {
RbEncoding::new(unsafe { rb_usascii_encoding() }).unwrap()
}
pub fn locale_encoding(&self) -> RbEncoding {
RbEncoding::new(unsafe { rb_locale_encoding() }).unwrap()
}
pub fn filesystem_encoding(&self) -> RbEncoding {
RbEncoding::new(unsafe { rb_filesystem_encoding() }).unwrap()
}
pub fn default_external_encoding(&self) -> RbEncoding {
RbEncoding::new(unsafe { rb_default_external_encoding() }).unwrap()
}
pub fn default_internal_encoding(&self) -> Option<RbEncoding> {
RbEncoding::new(unsafe { rb_default_internal_encoding() })
}
pub fn find_encoding(&self, name: &str) -> Option<RbEncoding> {
let name = CString::new(name).unwrap();
let ptr = unsafe { rb_enc_find(name.as_ptr()) };
RbEncoding::new(ptr)
}
}
#[repr(transparent)]
pub struct RbEncoding(NonNull<rb_encoding>);
impl RbEncoding {
pub(crate) fn new(inner: *mut rb_encoding) -> Option<Self> {
NonNull::new(inner).map(Self)
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::ascii8bit_encoding` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn ascii8bit() -> Self {
get_ruby!().ascii8bit_encoding()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::utf8_encoding` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn utf8() -> Self {
get_ruby!().utf8_encoding()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::usascii_encoding` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn usascii() -> Self {
get_ruby!().usascii_encoding()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::locale_encoding` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn locale() -> Self {
get_ruby!().locale_encoding()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::filesystem_encoding` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn filesystem() -> Self {
get_ruby!().filesystem_encoding()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::default_external_encoding` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn default_external() -> Self {
get_ruby!().default_external_encoding()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::default_internal_encoding` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn default_internal() -> Option<Self> {
get_ruby!().default_internal_encoding()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::find_encoding` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn find(name: &str) -> Option<Self> {
get_ruby!().find_encoding(name)
}
pub(crate) fn as_ptr(&self) -> *mut rb_encoding {
self.0.as_ptr()
}
pub fn name(&self) -> &str {
unsafe { CStr::from_ptr(self.0.as_ref().name).to_str().unwrap() }
}
pub fn mbminlen(&self) -> usize {
unsafe { self.0.as_ref().min_enc_len as usize }
}
pub fn mbmaxlen(&self) -> usize {
unsafe { self.0.as_ref().max_enc_len as usize }
}
pub fn mbclen(&self, slice: &[u8]) -> usize {
let Range { start: p, end: e } = slice.as_ptr_range();
unsafe { rb_enc_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) as usize }
}
pub fn fast_mbclen(&self, slice: &[u8]) -> usize {
let Range { start: p, end: e } = slice.as_ptr_range();
unsafe {
rb_enc_fast_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) as usize
}
}
pub fn precise_mbclen(&self, slice: &[u8]) -> MbcLen {
let Range { start: p, end: e } = slice.as_ptr_range();
let r =
unsafe { rb_enc_precise_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) };
if 0 < r {
MbcLen::CharFound(r as usize)
} else if r < -1 {
MbcLen::NeedMore((-1 - r) as usize)
} else if r == -1 {
MbcLen::Invalid
} else {
unreachable!()
}
}
pub fn ascget(&self, slice: &[u8]) -> Option<(u8, usize)> {
let Range { start: p, end: e } = slice.as_ptr_range();
let mut len = 0;
let c = unsafe {
rb_enc_ascget(
p as *const c_char,
e as *const c_char,
&mut len as *mut _,
self.as_ptr(),
)
};
if len == 0 {
panic!("{:?}", slice);
}
(c > -1).then_some((c as u8, len as usize))
}
pub fn codepoint_len(&self, slice: &[u8]) -> Result<(u32, usize), Error> {
let Range { start: p, end: e } = slice.as_ptr_range();
let mut len = 0;
let mut c = 0;
protect(|| unsafe {
c = rb_enc_codepoint_len(
p as *const c_char,
e as *const c_char,
&mut len as *mut _,
self.as_ptr(),
);
Ruby::get_unchecked().qnil()
})?;
Ok((c, len as usize))
}
pub fn codelen(&self, code: u32) -> Result<usize, Error> {
let handle = unsafe { Ruby::get_unchecked() };
let code = code
.try_into()
.map_err(|e: <usize as TryInto<c_int>>::Error| {
Error::new(handle.exception_arg_error(), e.to_string())
})?;
let mut len = 0;
protect(|| {
unsafe { len = rb_enc_codelen(code, self.as_ptr()) as usize };
handle.qnil()
})?;
Ok(len)
}
pub fn chr(&self, code: u32) -> Result<RString, Error> {
protect(|| unsafe {
RString::from_rb_value_unchecked(rb_enc_uint_chr(code, self.as_ptr()))
})
}
pub fn is_mbc_newline(&self, slice: &[u8]) -> bool {
let Range { start: p, end: e } = slice.as_ptr_range();
unsafe {
self.0.as_ref().is_mbc_newline.unwrap()(p as *const _, e as *const _, self.as_ptr())
!= 0
}
}
pub fn is_code_ctype(&self, code: u32, ctype: CType) -> bool {
unsafe { self.0.as_ref().is_code_ctype.unwrap()(code, ctype as _, self.as_ptr()) != 0 }
}
}
pub enum MbcLen {
CharFound(usize),
NeedMore(usize),
Invalid,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone)]
pub enum CType {
Newline = 0,
Alpha = 1,
Blank = 2,
Cntrl = 3,
Digit = 4,
Graph = 5,
Lower = 6,
Print = 7,
Punct = 8,
Space = 9,
Upper = 10,
Xdigit = 11,
Word = 12,
Alnum = 13,
Ascii = 14,
}
impl From<RbEncoding> for Encoding {
fn from(val: RbEncoding) -> Self {
Encoding::from_value(Value::new(unsafe { rb_enc_from_encoding(val.as_ptr()) })).unwrap()
}
}
impl From<RbEncoding> for Index {
fn from(val: RbEncoding) -> Self {
Index(unsafe { rb_enc_to_index(val.as_ptr()) })
}
}
impl IntoValue for RbEncoding {
#[inline]
fn into_value_with(self, handle: &Ruby) -> Value {
Encoding::from(self).into_value_with(handle)
}
}
impl TryConvert for RbEncoding {
fn try_convert(val: Value) -> Result<Self, Error> {
let mut ptr = ptr::null_mut();
protect(|| unsafe {
ptr = rb_to_encoding(val.as_rb_value());
Ruby::get_unchecked().qnil()
})?;
Ok(Self::new(ptr).unwrap())
}
}
impl Ruby {
pub fn ascii8bit_encindex(&self) -> Index {
Index(unsafe { rb_ascii8bit_encindex() })
}
pub fn utf8_encindex(&self) -> Index {
Index(unsafe { rb_utf8_encindex() })
}
pub fn usascii_encindex(&self) -> Index {
Index(unsafe { rb_usascii_encindex() })
}
pub fn locale_encindex(&self) -> Index {
Index(unsafe { rb_locale_encindex() })
}
pub fn filesystem_encindex(&self) -> Index {
Index(unsafe { rb_filesystem_encindex() })
}
pub fn find_encindex(&self, name: &str) -> Result<Index, Error> {
let name = CString::new(name).unwrap();
let mut i = 0;
protect(|| {
unsafe { i = rb_enc_find_index(name.as_ptr()) };
self.qnil()
})?;
if i == -1 {
return Err(Error::new(
self.exception_runtime_error(),
format!("Encoding {:?} exists, but can not be loaded", name),
));
}
Ok(Index(i))
}
}
#[derive(Clone, Copy, Eq, PartialEq)]
#[repr(transparent)]
pub struct Index(c_int);
impl Index {
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::ascii8bit_encindex` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn ascii8bit() -> Self {
get_ruby!().ascii8bit_encindex()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::utf8_encindex` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn utf8() -> Self {
get_ruby!().utf8_encindex()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::usascii_encindex` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn usascii() -> Self {
get_ruby!().usascii_encindex()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::locale_encindex` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn locale() -> Self {
get_ruby!().locale_encindex()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::filesystem_encindex` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn filesystem() -> Self {
get_ruby!().filesystem_encindex()
}
#[cfg_attr(
not(feature = "old-api"),
deprecated(note = "please use `Ruby::find_encindex` instead")
)]
#[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
#[inline]
pub fn find(name: &str) -> Result<Self, Error> {
get_ruby!().find_encindex(name)
}
pub(crate) fn to_int(self) -> c_int {
self.0
}
}
impl From<Index> for RbEncoding {
fn from(val: Index) -> Self {
RbEncoding::new(unsafe { rb_enc_from_index(val.to_int()) }).expect("no encoding for index")
}
}
impl TryConvert for Index {
fn try_convert(val: Value) -> Result<Self, Error> {
let i = unsafe { rb_to_encoding_index(val.as_rb_value()) };
if i == -1 && RString::from_value(val).is_some() {
return Err(Error::new(
Ruby::get_with(val).exception_runtime_error(),
format!("ArgumentError: unknown encoding name - {}", val),
));
} else if i == -1 {
return TryConvert::try_convert(RString::try_convert(val)?.as_value());
}
Ok(Index(i))
}
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Coderange {
Unknown = 0,
SevenBit = 1048576,
Valid = 2097152,
Broken = 3145728,
}
pub trait EncodingCapable: ReprValue + Copy {
fn enc_get(self) -> Index {
let i = unsafe { rb_enc_get_index(self.as_rb_value()) };
if i == -1 {
panic!("{} not encoding capable", self.as_value());
}
Index(i)
}
fn enc_set<T>(self, enc: T) -> Result<(), Error>
where
T: Into<Index>,
{
protect(|| unsafe {
rb_enc_set_index(self.as_rb_value(), enc.into().to_int());
Ruby::get_unchecked().qnil()
})?;
Ok(())
}
fn enc_associate<T>(self, enc: T) -> Result<(), Error>
where
T: Into<Index>,
{
protect(|| {
Value::new(unsafe { rb_enc_associate_index(self.as_rb_value(), enc.into().to_int()) })
})?;
Ok(())
}
}
pub fn compatible<T, U>(v1: T, v2: U) -> Option<RbEncoding>
where
T: EncodingCapable,
U: EncodingCapable,
{
RbEncoding::new(unsafe { rb_enc_compatible(v1.as_rb_value(), v2.as_rb_value()) })
}
pub fn check<T, U>(v1: T, v2: U) -> Result<RbEncoding, Error>
where
T: EncodingCapable,
U: EncodingCapable,
{
let mut ptr = ptr::null_mut();
protect(|| unsafe {
ptr = rb_enc_check(v1.as_rb_value(), v2.as_rb_value());
Ruby::get_with(v1).qnil()
})?;
Ok(RbEncoding::new(ptr).unwrap())
}
pub fn copy<T, U>(dst: T, src: U) -> Result<(), Error>
where
T: EncodingCapable,
U: EncodingCapable,
{
protect(|| unsafe {
rb_enc_copy(dst.as_rb_value(), src.as_rb_value());
Ruby::get_with(dst).qnil()
})?;
Ok(())
}