use crate::utf16_iterators::Utf16Iterator;
use crate::traits::{CharExt, U16UtfExt};
use crate::utf8_char::Utf8Char;
use crate::errors::{Utf16SliceError, Utf16ArrayError, Utf16TupleError};
use crate::errors::{NonBmpError, EmptyStrError, FromStrError};
extern crate core;
use core::{hash,fmt};
use core::cmp::Ordering;
use core::borrow::Borrow;
use core::ops::Deref;
use core::str::FromStr;
#[cfg(feature="std")]
use core::iter::FromIterator;
#[cfg(feature="ascii")]
use core::char;
#[cfg(feature="ascii")]
extern crate ascii;
#[cfg(feature="ascii")]
use ascii::{AsciiChar,ToAsciiChar,ToAsciiCharError};
#[derive(Default)]
#[derive(PartialEq,Eq)]
#[derive(Clone,Copy)]
pub struct Utf16Char {
units: [u16; 2],
}
impl FromStr for Utf16Char {
type Err = FromStrError;
fn from_str(s: &str) -> Result<Self, FromStrError> {
match Utf16Char::from_str_start(s) {
Ok((u16c,bytes)) if bytes == s.len() => Ok(u16c),
Ok((_,_)) => Err(FromStrError::MultipleCodepoints),
Err(EmptyStrError) => Err(FromStrError::Empty),
}
}
}
impl From<char> for Utf16Char {
fn from(c: char) -> Self {
let (first, second) = c.to_utf16_tuple();
Utf16Char{ units: [first, second.unwrap_or(0)] }
}
}
impl From<Utf8Char> for Utf16Char {
fn from(utf8: Utf8Char) -> Utf16Char {
let (b, utf8_len) = utf8.to_array();
match utf8_len {
1 => Utf16Char{ units: [b[0] as u16, 0] },
4 => { let mut first = 0xd800 - (0x01_00_00u32 >> 10) as u16;
first += (b[0] as u16 & 0x07) << 8;
first += (b[1] as u16 & 0x3f) << 2;
first += (b[2] as u16 & 0x30) >> 4;
let mut second = 0xdc00;
second |= (b[2] as u16 & 0x0f) << 6;
second |= b[3] as u16 & 0x3f;
Utf16Char{ units: [first, second] }
},
_ => { let mut unit = ((b[0] as u16 & 0x1f) << 6) | (b[1] as u16 & 0x3f);
if utf8_len == 3 {
unit = (unit << 6) | (b[2] as u16 & 0x3f);
}
Utf16Char{ units: [unit, 0] }
},
}
}
}
impl From<Utf16Char> for char {
fn from(uc: Utf16Char) -> char {
char::from_utf16_array_unchecked(uc.to_array())
}
}
impl IntoIterator for Utf16Char {
type Item=u16;
type IntoIter=Utf16Iterator;
fn into_iter(self) -> Utf16Iterator {
Utf16Iterator::from(self)
}
}
#[cfg(feature="std")]
impl Extend<Utf16Char> for Vec<u16> {
fn extend<I:IntoIterator<Item=Utf16Char>>(&mut self, iter: I) {
let iter = iter.into_iter();
self.reserve(iter.size_hint().0);
for u16c in iter {
self.push(u16c.units[0]);
if u16c.units[1] != 0 {
self.push(u16c.units[1]);
}
}
}
}
#[cfg(feature="std")]
impl<'a> Extend<&'a Utf16Char> for Vec<u16> {
fn extend<I:IntoIterator<Item=&'a Utf16Char>>(&mut self, iter: I) {
self.extend(iter.into_iter().cloned())
}
}
#[cfg(feature="std")]
impl FromIterator<Utf16Char> for Vec<u16> {
fn from_iter<I:IntoIterator<Item=Utf16Char>>(iter: I) -> Self {
let mut vec = Vec::new();
vec.extend(iter);
return vec;
}
}
#[cfg(feature="std")]
impl<'a> FromIterator<&'a Utf16Char> for Vec<u16> {
fn from_iter<I:IntoIterator<Item=&'a Utf16Char>>(iter: I) -> Self {
Self::from_iter(iter.into_iter().cloned())
}
}
#[cfg(feature="std")]
impl Extend<Utf16Char> for String {
fn extend<I:IntoIterator<Item=Utf16Char>>(&mut self, iter: I) {
self.extend(iter.into_iter().map(Utf8Char::from));
}
}
#[cfg(feature="std")]
impl<'a> Extend<&'a Utf16Char> for String {
fn extend<I:IntoIterator<Item=&'a Utf16Char>>(&mut self, iter: I) {
self.extend(iter.into_iter().cloned());
}
}
#[cfg(feature="std")]
impl FromIterator<Utf16Char> for String {
fn from_iter<I:IntoIterator<Item=Utf16Char>>(iter: I) -> Self {
let mut s = String::new();
s.extend(iter);
return s;
}
}
#[cfg(feature="std")]
impl<'a> FromIterator<&'a Utf16Char> for String {
fn from_iter<I:IntoIterator<Item=&'a Utf16Char>>(iter: I) -> Self {
Self::from_iter(iter.into_iter().cloned())
}
}
impl AsRef<[u16]> for Utf16Char {
#[inline]
fn as_ref(&self) -> &[u16] {
&self.units[..self.len()]
}
}
impl Borrow<[u16]> for Utf16Char {
#[inline]
fn borrow(&self) -> &[u16] {
self.as_ref()
}
}
impl Deref for Utf16Char {
type Target = [u16];
#[inline]
fn deref(&self) -> &[u16] {
self.as_ref()
}
}
#[cfg(feature="ascii")]
impl From<AsciiChar> for Utf16Char {
#[inline]
fn from(ac: AsciiChar) -> Self {
Utf16Char{ units: [ac.as_byte() as u16, 0] }
}
}
#[cfg(feature="ascii")]
impl ToAsciiChar for Utf16Char {
#[inline]
fn to_ascii_char(self) -> Result<AsciiChar, ToAsciiCharError> {
self.units[0].to_ascii_char()
}
#[inline]
unsafe fn to_ascii_char_unchecked(self) -> AsciiChar {
unsafe { self.units[0].to_ascii_char_unchecked() }
}
}
impl hash::Hash for Utf16Char {
fn hash<H : hash::Hasher>(&self, state: &mut H) {
self.to_char().hash(state);
}
}
impl fmt::Debug for Utf16Char {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.to_char(), fmtr)
}
}
impl fmt::Display for Utf16Char {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&Utf8Char::from(*self), fmtr)
}
}
impl PartialOrd for Utf16Char {
#[inline]
fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
Some(self.cmp(rhs))
}
}
impl Ord for Utf16Char {
#[inline]
fn cmp(&self, rhs: &Self) -> Ordering {
let lhs = (self.units[0] as u32, self.units[1] as u32);
let rhs = (rhs.units[0] as u32, rhs.units[1] as u32);
let lhs = (lhs.0 << (lhs.1 >> 12)) + lhs.1;
let rhs = (rhs.0 << (rhs.1 >> 12)) + rhs.1;
lhs.cmp(&rhs)
}
}
impl PartialEq<char> for Utf16Char {
fn eq(&self, u32c: &char) -> bool {
*self == Utf16Char::from(*u32c)
}
}
impl PartialEq<Utf16Char> for char {
fn eq(&self, u16c: &Utf16Char) -> bool {
Utf16Char::from(*self) == *u16c
}
}
impl PartialOrd<char> for Utf16Char {
fn partial_cmp(&self, u32c: &char) -> Option<Ordering> {
self.partial_cmp(&Utf16Char::from(*u32c))
}
}
impl PartialOrd<Utf16Char> for char {
fn partial_cmp(&self, u16c: &Utf16Char) -> Option<Ordering> {
Utf16Char::from(*self).partial_cmp(u16c)
}
}
impl PartialEq<Utf8Char> for Utf16Char {
fn eq(&self, u8c: &Utf8Char) -> bool {
*self == Utf16Char::from(*u8c)
}
}
impl PartialOrd<Utf8Char> for Utf16Char {
fn partial_cmp(&self, u8c: &Utf8Char) -> Option<Ordering> {
self.partial_cmp(&Utf16Char::from(*u8c))
}
}
impl PartialEq<u16> for Utf16Char {
fn eq(&self, unit: &u16) -> bool {
self.units[0] == *unit && self.units[1] == 0
}
}
impl PartialEq<u8> for Utf16Char {
fn eq(&self, byte: &u8) -> bool {
self.units[0] == *byte as u16
}
}
#[cfg(feature = "ascii")]
impl PartialEq<AsciiChar> for Utf16Char {
#[inline]
fn eq(&self, ascii: &AsciiChar) -> bool {
self.units[0] == *ascii as u16
}
}
#[cfg(feature = "ascii")]
impl PartialEq<Utf16Char> for AsciiChar {
#[inline]
fn eq(&self, u16c: &Utf16Char) -> bool {
*self as u16 == u16c.units[0]
}
}
#[cfg(feature = "ascii")]
impl PartialOrd<AsciiChar> for Utf16Char {
#[inline]
fn partial_cmp(&self, ascii: &AsciiChar) -> Option<Ordering> {
self.units[0].partial_cmp(&(*ascii as u16))
}
}
#[cfg(feature = "ascii")]
impl PartialOrd<Utf16Char> for AsciiChar {
#[inline]
fn partial_cmp(&self, u16c: &Utf16Char) -> Option<Ordering> {
(*self as u16).partial_cmp(&u16c.units[0])
}
}
impl Utf16Char {
pub const fn new(c: char) -> Self {
if c <= '\u{ffff}' {
Utf16Char{ units: [c as u16, 0] }
} else {
let c = (c as u32).wrapping_sub(0x01_00_00);
let first = 0xd8_00 | (c >> 10) as u16;
let second = 0xdc_00 | (c & 0x0_03_ff) as u16;
Utf16Char{ units: [first, second] }
}
}
pub const fn from_str_start(s: &str) -> Result<(Self,usize), EmptyStrError> {
if s.is_empty() {
return Err(EmptyStrError);
}
let b = s.as_bytes();
match b[0] {
0..=127 => { let unit = b[0] as u16; Ok((Utf16Char{ units: [unit, 0] }, 1))
},
0b1000_0000..=0b1101_1111 => { let unit = (((b[1] & 0x3f) as u16) << 0) | (((b[0] & 0x1f) as u16) << 6); Ok((Utf16Char{ units: [unit, 0] }, 2))
},
0b1110_0000..=0b1110_1111 => { let unit = (((b[2] & 0x3f) as u16) << 0) | (((b[1] & 0x3f) as u16) << 6) | (((b[0] & 0x0f) as u16) << 12); Ok((Utf16Char{ units: [unit, 0] }, 3))
},
_ => { let second = 0xdc00 | (((b[3] & 0x3f) as u16) << 0) | (((b[2] & 0x0f) as u16) << 6); let first = 0xd800-(0x01_00_00u32>>10) as u16 + (((b[2] & 0x30) as u16) >> 4) + (((b[1] & 0x3f) as u16) << 2) + (((b[0] & 0x07) as u16) << 8); Ok((Utf16Char{ units: [first, second] }, 4))
}
}
}
pub fn from_slice_start(src: &[u16]) -> Result<(Self,usize), Utf16SliceError> {
char::from_utf16_slice_start(src).map(|(_,len)| {
let second = if len==2 {src[1]} else {0};
(Utf16Char{ units: [src[0], second] }, len)
})
}
pub unsafe fn from_slice_start_unchecked(src: &[u16]) -> (Self,usize) {
unsafe {
let first = *src.get_unchecked(0);
if first.is_utf16_leading_surrogate() {
(Utf16Char{ units: [first, *src.get_unchecked(1)] }, 2)
} else {
(Utf16Char{ units: [first, 0] }, 1)
}
}
}
pub const fn from_array(units: [u16; 2]) -> Result<Self,Utf16ArrayError> {
if (units[0] & 0xf8_00) != 0xd8_00 {
Ok(Utf16Char { units: [units[0], 0] })
} else if units[0] < 0xdc_00 && (units[1] & 0xfc_00) == 0xdc_00 {
Ok(Utf16Char { units })
} else if units[0] < 0xdc_00 {
Err(Utf16ArrayError::SecondIsNotTrailingSurrogate)
} else {
Err(Utf16ArrayError::FirstIsTrailingSurrogate)
}
}
pub const unsafe fn from_array_unchecked(units: [u16; 2]) -> Self {
Utf16Char { units }
}
pub(crate) const fn validate_tuple(utf16: (u16,Option<u16>)) -> Result<(),Utf16TupleError> {
match utf16 {
(0x00_00..=0xd7_ff, None) | (0xe0_00..=0xff_ff, None) | (0xd8_00..=0xdb_ff, Some(0xdc_00..=0xdf_ff)) => Ok(()),
(0xd8_00..=0xdb_ff, Some(_)) => Err(Utf16TupleError::SecondIsNotTrailingSurrogate),
(0xd8_00..=0xdb_ff, None ) => Err(Utf16TupleError::MissingSecond),
(0xdc_00..=0xdf_ff, _ ) => Err(Utf16TupleError::FirstIsTrailingSurrogate),
( _ , Some(_)) => Err(Utf16TupleError::SuperfluousSecond),
}
}
pub const fn from_tuple(utf16: (u16,Option<u16>)) -> Result<Self,Utf16TupleError> {
unsafe {
match Self::validate_tuple(utf16) {
Ok(()) => Ok(Self::from_tuple_unchecked(utf16)),
Err(e) => Err(e),
}
}
}
pub const unsafe fn from_tuple_unchecked(utf16: (u16,Option<u16>)) -> Self {
let second = match utf16.1 {
Some(extra) => extra,
None => 0,
};
Utf16Char { units: [utf16.0, second] }
}
pub const fn from_bmp(bmp_codepoint: u16) -> Result<Self,NonBmpError> {
let is_not_bmp = bmp_codepoint & 0xf800 == 0xd800;
let if_good = Utf16Char{ units: [bmp_codepoint, 0] };
[Ok(if_good), Err(NonBmpError)][is_not_bmp as usize]
}
#[inline]
pub const unsafe fn from_bmp_unchecked(bmp_codepoint: u16) -> Self {
Utf16Char{ units: [bmp_codepoint, 0] }
}
#[inline]
pub const fn is_bmp(self) -> bool {
self.units[1] == 0
}
#[inline]
pub const fn len(self) -> usize {
1 + (self.units[1] as usize >> 15)
}
#[inline]
pub const fn is_ascii(self) -> bool {
self.units[0] <= 127
}
pub const fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
if self.is_ascii() && other.is_ascii() {
(self.units[0] as u8).eq_ignore_ascii_case(&(other.units[0] as u8))
} else {
self.units[0] == other.units[0] && self.units[1] == other.units[1]
}
}
pub const fn to_ascii_uppercase(self) -> Self {
let n = self.units[0].wrapping_sub(b'a' as u16);
if n < 26 {Utf16Char{ units: [n+b'A' as u16, 0] }}
else {self}
}
pub const fn to_ascii_lowercase(self) -> Self {
let n = self.units[0].wrapping_sub(b'A' as u16);
if n < 26 {Utf16Char{ units: [n+b'a' as u16, 0] }}
else {self}
}
pub fn make_ascii_uppercase(&mut self) {
*self = self.to_ascii_uppercase()
}
pub fn make_ascii_lowercase(&mut self) {
*self = self.to_ascii_lowercase();
}
pub fn to_char(self) -> char {
self.into()
}
pub fn to_slice(self, dst: &mut[u16]) -> usize {
let extra = self.units[1] as usize >> 15;
match dst.get_mut(extra) {
Some(first) => *first = self.units[extra],
None => panic!("The provided buffer is too small.")
}
if extra != 0 {dst[0] = self.units[0];}
extra+1
}
#[inline]
pub const fn to_array(self) -> [u16;2] {
self.units
}
#[inline]
pub const fn to_tuple(self) -> (u16,Option<u16>) {
(self.units[0], [None, Some(self.units[1])][self.units[1] as usize >> 15])
}
}