use std::{
convert::TryInto,
ffi::{CStr, CString},
fmt,
ops::{Deref, Range},
os::raw::{c_char, c_int},
ptr::{self, NonNull},
};
use rb_sys::{
rb_ascii8bit_encindex, rb_ascii8bit_encoding, rb_default_external_encoding,
rb_default_internal_encoding, rb_enc_ascget, rb_enc_associate_index, rb_enc_check,
rb_enc_codelen, rb_enc_codepoint_len, rb_enc_compatible, rb_enc_copy, rb_enc_default_external,
rb_enc_default_internal, rb_enc_fast_mbclen, rb_enc_find, rb_enc_find_index,
rb_enc_from_encoding, rb_enc_from_index, rb_enc_get_index, rb_enc_mbclen,
rb_enc_precise_mbclen, rb_enc_set_index, rb_enc_to_index, rb_enc_uint_chr, rb_encoding,
rb_filesystem_encindex, rb_filesystem_encoding, rb_find_encoding, rb_locale_encindex,
rb_locale_encoding, rb_to_encoding, rb_to_encoding_index, rb_usascii_encindex,
rb_usascii_encoding, rb_utf8_encindex, rb_utf8_encoding,
};
use crate::{
class,
error::{protect, Error},
exception,
object::Object,
r_string::RString,
try_convert::TryConvert,
value::{private, NonZeroValue, ReprValue, Value, QNIL},
};
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Encoding(NonZeroValue);
impl Encoding {
#[inline]
pub fn from_value(val: Value) -> Option<Self> {
unsafe {
val.is_kind_of(class::encoding())
.then(|| Self(NonZeroValue::new_unchecked(val)))
}
}
pub fn default_external() -> Self {
Self::from_value(Value::new(unsafe { rb_enc_default_external() })).unwrap()
}
pub fn default_internal() -> Option<Self> {
Self::from_value(Value::new(unsafe { rb_enc_default_internal() }))
}
}
impl Deref for Encoding {
type Target = Value;
fn deref(&self) -> &Self::Target {
self.0.get_ref()
}
}
impl fmt::Display for Encoding {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", unsafe { self.to_s_infallible() })
}
}
impl fmt::Debug for Encoding {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.deref().inspect())
}
}
impl From<Encoding> for Index {
fn from(val: Encoding) -> Self {
let i = unsafe { rb_to_encoding_index(val.as_rb_value()) };
if i == -1 {
panic!("got encoding index -1");
}
Index(i)
}
}
impl From<Encoding> for RbEncoding {
fn from(val: Encoding) -> Self {
let ptr = unsafe { rb_find_encoding(val.as_rb_value()) };
RbEncoding::new(ptr).expect("got NULL rb_encoding")
}
}
impl From<Encoding> for Value {
fn from(val: Encoding) -> Self {
*val
}
}
impl Object for Encoding {}
unsafe impl private::ReprValue for Encoding {
fn to_value(self) -> Value {
*self
}
unsafe fn from_value_unchecked(val: Value) -> Self {
Self(NonZeroValue::new_unchecked(val))
}
}
impl ReprValue for Encoding {}
impl TryConvert for Encoding {
fn try_convert(val: Value) -> Result<Self, Error> {
if let Some(enc) = Self::from_value(val) {
return Ok(enc);
}
RbEncoding::try_convert(val).map(Into::into)
}
}
#[repr(transparent)]
pub struct RbEncoding(NonNull<rb_encoding>);
impl RbEncoding {
fn new(inner: *mut rb_encoding) -> Option<Self> {
NonNull::new(inner).map(Self)
}
pub fn ascii8bit() -> Self {
Self::new(unsafe { rb_ascii8bit_encoding() }).unwrap()
}
pub fn utf8() -> Self {
Self::new(unsafe { rb_utf8_encoding() }).unwrap()
}
pub fn usascii() -> Self {
Self::new(unsafe { rb_usascii_encoding() }).unwrap()
}
pub fn locale() -> Self {
Self::new(unsafe { rb_locale_encoding() }).unwrap()
}
pub fn filesystem() -> Self {
Self::new(unsafe { rb_filesystem_encoding() }).unwrap()
}
pub fn default_external() -> Self {
Self::new(unsafe { rb_default_external_encoding() }).unwrap()
}
pub fn default_internal() -> Option<Self> {
Self::new(unsafe { rb_default_internal_encoding() })
}
pub fn find(name: &str) -> Option<Self> {
let name = CString::new(name).unwrap();
let ptr = unsafe { rb_enc_find(name.as_ptr()) };
Self::new(ptr)
}
pub(crate) fn as_ptr(&self) -> *mut rb_encoding {
self.0.as_ptr()
}
pub fn name(&self) -> &str {
unsafe { CStr::from_ptr(self.0.as_ref().name).to_str().unwrap() }
}
pub fn mbminlen(&self) -> usize {
unsafe { self.0.as_ref().min_enc_len as usize }
}
pub fn mbmaxlen(&self) -> usize {
unsafe { self.0.as_ref().max_enc_len as usize }
}
pub fn mbclen(&self, slice: &[u8]) -> usize {
let Range { start: p, end: e } = slice.as_ptr_range();
unsafe { rb_enc_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) as usize }
}
pub fn fast_mbclen(&self, slice: &[u8]) -> usize {
let Range { start: p, end: e } = slice.as_ptr_range();
unsafe {
rb_enc_fast_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) as usize
}
}
pub fn precise_mbclen(&self, slice: &[u8]) -> MbcLen {
let Range { start: p, end: e } = slice.as_ptr_range();
let r =
unsafe { rb_enc_precise_mbclen(p as *const c_char, e as *const c_char, self.as_ptr()) };
if 0 < r {
MbcLen::CharFound(r as usize)
} else if r < -1 {
MbcLen::NeedMore((-1 - r) as usize)
} else if r == -1 {
MbcLen::Invalid
} else {
unreachable!()
}
}
pub fn ascget(&self, slice: &[u8]) -> Option<(u8, usize)> {
let Range { start: p, end: e } = slice.as_ptr_range();
let mut len = 0;
let c = unsafe {
rb_enc_ascget(
p as *const c_char,
e as *const c_char,
&mut len as *mut _,
self.as_ptr(),
)
};
if len == 0 {
panic!("{:?}", slice);
}
(c > -1).then(|| (c as u8, len as usize))
}
pub fn codepoint_len(&self, slice: &[u8]) -> Result<(u32, usize), Error> {
let Range { start: p, end: e } = slice.as_ptr_range();
let mut len = 0;
let mut c = 0;
protect(|| {
c = unsafe {
rb_enc_codepoint_len(
p as *const c_char,
e as *const c_char,
&mut len as *mut _,
self.as_ptr(),
)
};
QNIL
})?;
Ok((c as u32, len as usize))
}
pub fn codelen(&self, code: u32) -> Result<usize, Error> {
let code = code
.try_into()
.map_err(|e: <usize as TryInto<c_int>>::Error| {
Error::new(exception::arg_error(), e.to_string())
})?;
let mut len = 0;
protect(|| {
unsafe { len = rb_enc_codelen(code, self.as_ptr()) as usize }
QNIL
})?;
Ok(len)
}
pub fn chr(&self, code: u32) -> Result<RString, Error> {
protect(|| unsafe {
RString::from_rb_value_unchecked(rb_enc_uint_chr(code, self.as_ptr()))
})
}
pub fn is_mbc_newline(&self, slice: &[u8]) -> bool {
let Range { start: p, end: e } = slice.as_ptr_range();
unsafe {
self.0.as_ref().is_mbc_newline.unwrap()(p as *const _, e as *const _, self.as_ptr())
!= 0
}
}
pub fn is_code_ctype(&self, code: u32, ctype: CType) -> bool {
unsafe { self.0.as_ref().is_code_ctype.unwrap()(code, ctype as _, self.as_ptr()) != 0 }
}
}
pub enum MbcLen {
CharFound(usize),
NeedMore(usize),
Invalid,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone)]
pub enum CType {
Newline = 0,
Alpha = 1,
Blank = 2,
Cntrl = 3,
Digit = 4,
Graph = 5,
Lower = 6,
Print = 7,
Punct = 8,
Space = 9,
Upper = 10,
Xdigit = 11,
Word = 12,
Alnum = 13,
Ascii = 14,
}
impl From<RbEncoding> for Encoding {
fn from(val: RbEncoding) -> Self {
Encoding::from_value(Value::new(unsafe { rb_enc_from_encoding(val.as_ptr()) })).unwrap()
}
}
impl From<RbEncoding> for Index {
fn from(val: RbEncoding) -> Self {
Index(unsafe { rb_enc_to_index(val.as_ptr()) })
}
}
impl From<RbEncoding> for Value {
fn from(val: RbEncoding) -> Self {
*Encoding::from(val)
}
}
impl TryConvert for RbEncoding {
fn try_convert(val: Value) -> Result<Self, Error> {
let mut ptr = ptr::null_mut();
protect(|| {
ptr = unsafe { rb_to_encoding(val.as_rb_value()) };
QNIL
})?;
Ok(Self::new(ptr).unwrap())
}
}
#[derive(Clone, Copy, Eq, PartialEq)]
#[repr(transparent)]
pub struct Index(c_int);
impl Index {
pub fn ascii8bit() -> Self {
Self(unsafe { rb_ascii8bit_encindex() })
}
pub fn utf8() -> Self {
Self(unsafe { rb_utf8_encindex() })
}
pub fn usascii() -> Self {
Self(unsafe { rb_usascii_encindex() })
}
pub fn locale() -> Self {
Self(unsafe { rb_locale_encindex() })
}
pub fn filesystem() -> Self {
Self(unsafe { rb_filesystem_encindex() })
}
pub fn find(name: &str) -> Result<Self, Error> {
let name = CString::new(name).unwrap();
let mut i = 0;
protect(|| {
i = unsafe { rb_enc_find_index(name.as_ptr()) };
QNIL
})?;
if i == -1 {
return Err(Error::new(
exception::runtime_error(),
format!("Encoding {:?} exists, but can not be loaded", name),
));
}
Ok(Index(i))
}
pub(crate) fn to_int(self) -> c_int {
self.0
}
}
impl From<Index> for RbEncoding {
fn from(val: Index) -> Self {
RbEncoding::new(unsafe { rb_enc_from_index(val.to_int()) }).expect("no encoding for index")
}
}
impl TryConvert for Index {
fn try_convert(val: Value) -> Result<Self, Error> {
let i = unsafe { rb_to_encoding_index(val.as_rb_value()) };
if i == -1 && RString::from_value(val).is_some() {
return Err(Error::new(
exception::runtime_error(),
format!("ArgumentError: unknown encoding name - {}", val),
));
} else if i == -1 {
return RString::try_convert(val)?.try_convert();
}
Ok(Index(i))
}
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Coderange {
Unknown = 0,
SevenBit = 1048576,
Valid = 2097152,
Broken = 3145728,
}
pub trait EncodingCapable: Deref<Target = Value> {
fn enc_get(&self) -> Index {
let i = unsafe { rb_enc_get_index(self.as_rb_value()) };
if i == -1 {
panic!("{} not encoding capable", self.deref());
}
Index(i)
}
fn enc_set<T>(&self, enc: T) -> Result<(), Error>
where
T: Into<Index>,
{
protect(|| {
unsafe { rb_enc_set_index(self.as_rb_value(), enc.into().to_int()) };
QNIL
})?;
Ok(())
}
fn enc_associate<T>(&self, enc: T) -> Result<(), Error>
where
T: Into<Index>,
{
protect(|| {
Value::new(unsafe { rb_enc_associate_index(self.as_rb_value(), enc.into().to_int()) })
})?;
Ok(())
}
}
pub fn compatible<T, U>(v1: T, v2: U) -> Option<RbEncoding>
where
T: EncodingCapable,
U: EncodingCapable,
{
RbEncoding::new(unsafe { rb_enc_compatible(v1.as_rb_value(), v2.as_rb_value()) })
}
pub fn check<T, U>(v1: T, v2: U) -> Result<RbEncoding, Error>
where
T: EncodingCapable,
U: EncodingCapable,
{
let mut ptr = ptr::null_mut();
protect(|| {
ptr = unsafe { rb_enc_check(v1.as_rb_value(), v2.as_rb_value()) };
QNIL
})?;
Ok(RbEncoding::new(ptr).unwrap())
}
pub fn copy<T, U>(dst: T, src: U) -> Result<(), Error>
where
T: EncodingCapable,
U: EncodingCapable,
{
protect(|| {
unsafe { rb_enc_copy(dst.as_rb_value(), src.as_rb_value()) };
QNIL
})?;
Ok(())
}