Struct magnus::encoding::RbEncoding
source · [−]#[repr(transparent)]pub struct RbEncoding(_);
Expand description
Ruby’s internal encoding type.
This type contains the data for an encoding, and is used with operations such as converting a string from one encoding to another, or reading a string character by character.
Implementations
sourceimpl RbEncoding
impl RbEncoding
sourcepub fn locale() -> Self
pub fn locale() -> Self
Returns the encoding that represents the process’ current locale.
This is dynamic. If you change the process’ locale that should also change the return value of this function.
sourcepub fn filesystem() -> Self
pub fn filesystem() -> Self
Returns the filesystem encoding.
This is the encoding that Ruby expects data from the OS’ file system to be encoded as, such as directory names.
sourcepub fn default_external() -> Self
pub fn default_external() -> Self
Returns the default external encoding.
This is the encoding used for anything out-of-process, such as reading from files or sockets.
sourcepub fn default_internal() -> Option<Self>
pub fn default_internal() -> Option<Self>
Returns the default internal encoding.
If set, any out-of-process data is transcoded from the default external encoding to the default internal encoding.
sourcepub fn find(name: &str) -> Option<Self>
pub fn find(name: &str) -> Option<Self>
Returns the encoding with the name or alias name
.
Examples
use magnus::{eval, encoding::RbEncoding};
assert_eq!(RbEncoding::find("UTF-8").unwrap().name(), "UTF-8");
assert_eq!(RbEncoding::find("BINARY").unwrap().name(), "ASCII-8BIT");
sourcepub fn mbminlen(&self) -> usize
pub fn mbminlen(&self) -> usize
Returns the minimum number of bytes the encoding needs to represent a single character.
Examples
use magnus::{eval, encoding::RbEncoding};
assert_eq!(RbEncoding::utf8().mbminlen(), 1);
assert_eq!(RbEncoding::find("UTF-16").unwrap().mbminlen(), 2);
sourcepub fn mbmaxlen(&self) -> usize
pub fn mbmaxlen(&self) -> usize
Returns the maximum number of bytes the encoding may need to represent a single character.
Examples
use magnus::{eval, encoding::RbEncoding};
assert_eq!(RbEncoding::usascii().mbmaxlen(), 1);
assert_eq!(RbEncoding::utf8().mbmaxlen(), 4);
sourcepub fn mbclen(&self, slice: &[u8]) -> usize
pub fn mbclen(&self, slice: &[u8]) -> usize
Returns the number of bytes of the first character in slice
.
If the first byte of slice
is mid way through a character this will
return the number of bytes until the next character boundry.
If the slice ends before the last byte of the character this will return the number of bytes until the end of the slice.
See also fast_mbclen
and
precise_mbclen
.
Examples
use magnus::{eval, encoding::{EncodingCapable, RbEncoding}, RString};
let s = RString::new("🦀 café");
let encoding: RbEncoding = s.enc_get().into();
let mut chars = 0;
unsafe {
let mut bytes = s.as_slice();
assert_eq!(bytes.len(), 10);
while !bytes.is_empty() {
chars += 1;
let len = encoding.mbclen(bytes);
bytes = &bytes[len..];
}
}
assert_eq!(chars, 6);
sourcepub fn fast_mbclen(&self, slice: &[u8]) -> usize
pub fn fast_mbclen(&self, slice: &[u8]) -> usize
Returns the number of bytes of the first character in slice
.
If the first byte of slice
is mid way through a character this will
return the number of bytes until the next character boundry.
If the slice ends before the last byte of the character this will return the theoretical number of bytes until the end of the character, which will be past the end of the slice. If the string has been read from an IO source this may indicate more data needs to be read.
See also mbclen
and
precise_mbclen
.
Examples
use magnus::{eval, encoding::{EncodingCapable, RbEncoding}, RString};
let s = RString::new("🦀 café");
let encoding: RbEncoding = s.enc_get().into();
let mut chars = 0;
unsafe {
let mut bytes = s.as_slice();
assert_eq!(bytes.len(), 10);
while !bytes.is_empty() {
chars += 1;
let len = encoding.fast_mbclen(bytes);
bytes = &bytes[len..];
}
}
assert_eq!(chars, 6);
sourcepub fn precise_mbclen(&self, slice: &[u8]) -> MbcLen
pub fn precise_mbclen(&self, slice: &[u8]) -> MbcLen
Returns the number of bytes of the first character in slice
.
See also mbclen
and
fast_mbclen
.
Examples
use magnus::{eval, encoding::{EncodingCapable, MbcLen, RbEncoding}, RString};
let s = RString::new("🦀 café");
let encoding: RbEncoding = s.enc_get().into();
let mut chars = 0;
unsafe {
let mut bytes = s.as_slice();
assert_eq!(bytes.len(), 10);
while !bytes.is_empty() {
chars += 1;
match encoding.precise_mbclen(bytes) {
MbcLen::CharFound(len) => bytes = &bytes[len..],
MbcLen::NeedMore(len) => panic!("Met end of string expecting {} bytes", len),
MbcLen::Invalid => panic!("corrupted string"),
}
}
}
assert_eq!(chars, 6);
sourcepub fn ascget(&self, slice: &[u8]) -> Option<(u8, usize)>
pub fn ascget(&self, slice: &[u8]) -> Option<(u8, usize)>
If the first character in slice
is included in ASCII return it and
its encoded length in slice
, otherwise returns None.
Typically the length will be 1, but some encodings such as UTF-16 will encode ASCII characters in 2 bytes.
Examples
use magnus::{eval, encoding::{EncodingCapable, RbEncoding}, RString};
let s = RString::new("example");
let encoding: RbEncoding = s.enc_get().into();
let mut chars = Vec::new();
unsafe {
let mut bytes = s.as_slice();
while !bytes.is_empty() {
match encoding.ascget(bytes) {
Some((char, len)) => {
chars.push(char);
bytes = &bytes[len..];
}
None => panic!("string not ASCII"),
}
}
}
assert_eq!(chars, [101, 120, 97, 109, 112, 108, 101]);
sourcepub fn codepoint_len(&self, slice: &[u8]) -> Result<(u32, usize), Error>
pub fn codepoint_len(&self, slice: &[u8]) -> Result<(u32, usize), Error>
Returns the codepoint and length in bytes of the first character in
slice
.
Examples
use magnus::{eval, encoding::{EncodingCapable, RbEncoding}, RString};
let s = RString::new("🦀 café");
let encoding: RbEncoding = s.enc_get().into();
let mut codepoints = Vec::new();
unsafe {
let mut bytes = s.as_slice();
while !bytes.is_empty() {
let (codepoint, len) = encoding.codepoint_len(bytes).unwrap();
codepoints.push(codepoint);
bytes = &bytes[len..];
}
}
assert_eq!(codepoints, [129408, 32, 99, 97, 102, 233]);
sourcepub fn codelen(&self, code: u32) -> Result<usize, Error>
pub fn codelen(&self, code: u32) -> Result<usize, Error>
Returns the number of bytes required to represent the code point code
in the encoding of self
.
Examples
use magnus::{eval, encoding::RbEncoding};
assert_eq!(RbEncoding::utf8().codelen(97).unwrap(), 1);
assert_eq!(RbEncoding::utf8().codelen(129408).unwrap(), 4);
sourcepub fn chr(&self, code: u32) -> Result<RString, Error>
pub fn chr(&self, code: u32) -> Result<RString, Error>
Encode the codepoint code
as a series of bytes in the encoding self
and return the result as a Ruby string.
Examples
use magnus::{eval, encoding::RbEncoding};
let c = RbEncoding::usascii().chr(97).unwrap();
let res: bool = eval!(r#"c == "a""#, c).unwrap();
assert!(res);
use magnus::{eval, encoding::RbEncoding};
let c = RbEncoding::utf8().chr(129408).unwrap();
let res: bool = eval!(r#"c == "🦀""#, c).unwrap();
assert!(res);
sourcepub fn is_mbc_newline(&self, slice: &[u8]) -> bool
pub fn is_mbc_newline(&self, slice: &[u8]) -> bool
Returns true
if the first character in slice
is a newline in the
encoding self
, false
otherwise.
Examples
use magnus::{eval, encoding::RbEncoding};
assert!(RbEncoding::utf8().is_mbc_newline(&[10]));
assert!(!RbEncoding::utf8().is_mbc_newline(&[32]));
sourcepub fn is_code_ctype(&self, code: u32, ctype: CType) -> bool
pub fn is_code_ctype(&self, code: u32, ctype: CType) -> bool
Returns whether the given codepoint code
is of the character type
ctype
in the encoding self
.
Examples
use magnus::{eval, encoding::{CType, RbEncoding}};
assert!(RbEncoding::utf8().is_code_ctype(9, CType::Space)); // "\t"
assert!(RbEncoding::utf8().is_code_ctype(32, CType::Space)); // " "
assert!(!RbEncoding::utf8().is_code_ctype(65, CType::Space)); // "A"
assert!(RbEncoding::utf8().is_code_ctype(65, CType::Alnum)); // "A"
assert!(RbEncoding::utf8().is_code_ctype(65, CType::Upper)); // "A"
Trait Implementations
sourceimpl From<Encoding> for RbEncoding
impl From<Encoding> for RbEncoding
sourceimpl From<Index> for RbEncoding
impl From<Index> for RbEncoding
sourceimpl From<RbEncoding> for Encoding
impl From<RbEncoding> for Encoding
sourcefn from(val: RbEncoding) -> Self
fn from(val: RbEncoding) -> Self
Converts to this type from the input type.
sourceimpl From<RbEncoding> for Index
impl From<RbEncoding> for Index
sourcefn from(val: RbEncoding) -> Self
fn from(val: RbEncoding) -> Self
Converts to this type from the input type.
sourceimpl From<RbEncoding> for Value
impl From<RbEncoding> for Value
sourcefn from(val: RbEncoding) -> Self
fn from(val: RbEncoding) -> Self
Converts to this type from the input type.
sourceimpl TryConvert for RbEncoding
impl TryConvert for RbEncoding
sourcefn try_convert(val: &Value) -> Result<Self, Error>
fn try_convert(val: &Value) -> Result<Self, Error>
Convert val
into Self
.
Auto Trait Implementations
impl RefUnwindSafe for RbEncoding
impl !Send for RbEncoding
impl !Sync for RbEncoding
impl Unpin for RbEncoding
impl UnwindSafe for RbEncoding
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more