use std::{
borrow::Cow,
cmp::{Ordering, PartialEq, PartialOrd},
error::Error,
ffi::OsString,
fmt::{self, Debug, Display, Formatter},
hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
num::NonZeroU32,
path::PathBuf,
str::{self, FromStr},
};
use astral_util::hash::Murmur3;
use super::{StringId, Subsystem, Text, Utf16Error, Utf8Error};
pub struct Name<'system, H = BuildHasherDefault<Murmur3>> {
id: StringId,
number: Option<NonZeroU32>,
system: &'system Subsystem<H>,
}
impl<'system, H> Name<'system, H>
where
H: BuildHasher,
{
pub fn new<T>(string: T, system: &'system Subsystem<H>) -> Self
where
T: AsRef<str>,
{
let (string, number) = Self::split_string(string.as_ref());
let id = system.create_string_id(string);
unsafe { Self::from_raw_parts(id, number, system) }
}
pub fn from_utf8(v: &[u8], system: &'system Subsystem<H>) -> Result<Self, Utf8Error> {
Ok(Self::new(
str::from_utf8(v).map_err(Utf8Error::from_std)?,
system,
))
}
pub fn from_utf8_lossy(v: &[u8], system: &'system Subsystem<H>) -> Self {
Self::new(String::from_utf8_lossy(v), system)
}
pub unsafe fn from_utf8_unchecked(v: &[u8], system: &'system Subsystem<H>) -> Self {
Self::new(str::from_utf8_unchecked(v), system)
}
pub fn from_utf16(v: &[u16], system: &'system Subsystem<H>) -> Result<Self, Utf16Error> {
Ok(Self::new(
String::from_utf16(v).map_err(Utf16Error::from_std)?,
system,
))
}
pub fn from_utf16_lossy(v: &[u16], system: &'system Subsystem<H>) -> Self {
Self::new(String::from_utf16_lossy(v), system)
}
}
impl<'system, H> Name<'system, H> {
fn split_string(string: &str) -> (&str, Option<NonZeroU32>) {
let mut last_valid = None;
for (index, byte) in string.bytes().enumerate().rev() {
if byte.is_ascii_digit() {
if byte != b'0' {
last_valid = Some(index)
}
} else {
break;
}
}
last_valid.map_or((string, None), |idx| {
let (prefix, number) = string.split_at(idx);
u32::from_str(number)
.map(|number| (prefix, Some(NonZeroU32::new(number).unwrap())))
.unwrap_or((string, None))
})
}
pub unsafe fn from_raw_parts(
id: StringId,
number: Option<NonZeroU32>,
system: &'system Subsystem<H>,
) -> Self {
Self { id, number, system }
}
pub fn id(self) -> StringId {
self.id
}
pub fn string_part(self) -> &'system str {
self.system.string(self.id)
}
pub fn number(self) -> Option<NonZeroU32> {
self.number
}
pub fn as_str(self) -> Cow<'system, str> {
if self.number.is_some() {
Cow::Owned(self.to_string())
} else {
Cow::Borrowed(self.string_part())
}
}
pub fn is_empty(self) -> bool {
if self.number.is_some() {
false
} else {
self.system.is_empty(self.id)
}
}
pub fn len(self) -> usize {
let len = self.system.len(self.id);
if let Some(number) = self.number() {
len + number.to_string().len()
} else {
len
}
}
}
impl<H> Clone for Name<'_, H> {
fn clone(&self) -> Self {
unsafe { Self::from_raw_parts(self.id, self.number, self.system) }
}
}
impl<H> Copy for Name<'_, H> {}
impl<B> Hash for Name<'_, B> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.id().hash(state);
self.number().hash(state);
}
}
#[allow(box_pointers)]
impl<H> From<Name<'_, H>> for Box<str> {
fn from(string: Name<'_, H>) -> Self {
string.to_string().into_boxed_str()
}
}
impl<'system, H> From<Name<'system, H>> for Cow<'system, str> {
#[inline]
fn from(string: Name<'system, H>) -> Cow<'system, str> {
string.as_str()
}
}
impl<H> From<Name<'_, H>> for String {
#[inline]
fn from(string: Name<'_, H>) -> Self {
string.to_string()
}
}
impl<H> From<Name<'_, H>> for OsString {
fn from(string: Name<'_, H>) -> Self {
Self::from(string.to_string())
}
}
impl<H> From<Name<'_, H>> for PathBuf {
fn from(string: Name<'_, H>) -> Self {
Self::from(string.to_string())
}
}
#[allow(box_pointers)]
impl<H> From<Name<'_, H>> for Box<dyn Error> {
fn from(string: Name<'_, H>) -> Self {
Self::from(string.to_string())
}
}
#[allow(box_pointers)]
impl<H> From<Name<'_, H>> for Box<dyn Error + Send + Sync> {
fn from(string: Name<'_, H>) -> Self {
Self::from(string.to_string())
}
}
impl<H> Debug for Name<'_, H> {
fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
write!(fmt, "\"{}\"", self)
}
}
impl<H> Display for Name<'_, H> {
fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
let string_part = self.string_part();
if let Some(number) = self.number {
write!(fmt, "{}{}", string_part, number)
} else {
Display::fmt(string_part, fmt)
}
}
}
impl<'system, H> Extend<Name<'system, H>> for String
where
H: 'system,
{
fn extend<I: IntoIterator<Item = Name<'system, H>>>(&mut self, iter: I) {
for s in iter {
self.push_str(&s.as_str())
}
}
}
impl<H> PartialEq for Name<'_, H> {
#[inline]
fn eq(&self, other: &Self) -> bool {
let self_system: *const _ = &self.system;
let other_system: *const _ = &other.system;
if self_system == other_system {
self.id == other.id && self.number() == other.number()
} else {
PartialEq::eq(
&(self.string_part(), self.number),
&(other.string_part(), other.number),
)
}
}
}
impl<H> Eq for Name<'_, H> {}
impl<H> PartialOrd for Name<'_, H> {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
let self_system: *const _ = &self.system;
let other_system: *const _ = &other.system;
if self_system == other_system && self.id == other.id && self.number() == other.number() {
Some(Ordering::Equal)
} else {
PartialOrd::partial_cmp(
&(self.string_part(), self.number),
&(other.string_part(), other.number),
)
}
}
}
impl<H> Ord for Name<'_, H> {
fn cmp(&self, other: &Self) -> Ordering {
let self_system: *const _ = &self.system;
let other_system: *const _ = &other.system;
if self_system == other_system && self.id == other.id && self.number() == other.number() {
Ordering::Equal
} else {
Ord::cmp(
&(self.string_part(), self.number),
&(other.string_part(), other.number),
)
}
}
}
macro_rules! impl_cmp {
($ty:ty) => {
impl<H> PartialEq<$ty> for Name<'_, H> {
#[inline]
fn eq(&self, other: &$ty) -> bool {
if self.number.is_some() {
PartialEq::eq(
&(self.string_part(), self.number),
&Self::split_string(&other[..]),
)
} else {
PartialEq::eq(self.string_part(), &other[..])
}
}
}
impl<H> PartialEq<Name<'_, H>> for $ty {
#[inline]
fn eq(&self, other: &Name<'_, H>) -> bool {
if other.number.is_some() {
PartialEq::eq(
&Name::<'_, H>::split_string(&self[..]),
&(other.string_part(), other.number),
)
} else {
PartialEq::eq(&self[..], other.string_part())
}
}
}
impl<H> PartialOrd<$ty> for Name<'_, H> {
#[inline]
fn partial_cmp(&self, other: &$ty) -> Option<Ordering> {
if self.number.is_some() {
PartialOrd::partial_cmp(
&(self.string_part(), self.number),
&Self::split_string(&other[..]),
)
} else {
PartialOrd::partial_cmp(self.string_part(), &other[..])
}
}
}
impl<H> PartialOrd<Name<'_, H>> for $ty {
#[inline]
fn partial_cmp(&self, other: &Name<'_, H>) -> Option<Ordering> {
if other.number.is_some() {
PartialOrd::partial_cmp(
&Name::<'_, H>::split_string(&self[..]),
&(other.string_part(), other.number),
)
} else {
PartialOrd::partial_cmp(&self[..], other.string_part())
}
}
}
};
}
impl_cmp! { str }
impl_cmp! { &str }
impl_cmp! { String }
impl_cmp! { Cow<'_, str> }
impl_cmp! { Text<'_, H> }
#[cfg(test)]
mod test {
#![allow(clippy::non_ascii_literal, clippy::shadow_unrelated)]
use astral::thirdparty::slog;
use super::*;
#[cfg(target_pointer_width = "64")]
#[test]
fn test_size() {
assert_eq!(std::mem::size_of::<Name<'_>>(), 16);
assert_eq!(std::mem::size_of::<Option<Name<'_>>>(), 16);
}
#[cfg(target_pointer_width = "32")]
#[test]
fn test_size() {
assert_eq!(std::mem::size_of::<Name<'_>>(), 12);
assert_eq!(std::mem::size_of::<Option<Name<'_>>>(), 12);
}
#[test]
fn test_from_utf8() {
let logger = slog::Logger::root(slog::Discard, slog::o!());
let string_subsystem = Subsystem::new(64, &logger);
let xs = b"hello";
assert_eq!(
Name::from_utf8(xs, &string_subsystem).unwrap(),
Name::new("hello", &string_subsystem)
);
let xs = "ศไทย中华Việt Nam".as_bytes();
assert_eq!(
Name::from_utf8(xs, &string_subsystem).unwrap(),
Name::new("ศไทย中华Việt Nam", &string_subsystem)
);
}
#[test]
fn test_from_utf8_lossy() {
let logger = slog::Logger::root(slog::Discard, slog::o!());
let string_subsystem = Subsystem::new(64, &logger);
let xs = b"hello";
assert_eq!(Name::from_utf8_lossy(xs, &string_subsystem), "hello");
let xs = "ศไทย中华Việt Nam".as_bytes();
let ys = "ศไทย中华Việt Nam";
assert_eq!(Name::from_utf8_lossy(xs, &string_subsystem), ys);
let xs = b"Hello\xC2 There\xFF Goodbye";
assert_eq!(
Name::from_utf8_lossy(xs, &string_subsystem),
Name::new("Hello\u{FFFD} There\u{FFFD} Goodbye", &string_subsystem)
);
let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
assert_eq!(
Name::from_utf8_lossy(xs, &string_subsystem),
Name::new(
"Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye",
&string_subsystem
)
);
let xs = b"\xF5foo\xF5\x80bar";
assert_eq!(
Name::from_utf8_lossy(xs, &string_subsystem),
Name::new("\u{FFFD}foo\u{FFFD}\u{FFFD}bar", &string_subsystem)
);
let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
assert_eq!(
Name::from_utf8_lossy(xs, &string_subsystem),
Name::new("\u{FFFD}foo\u{FFFD}bar\u{FFFD}baz", &string_subsystem)
);
let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
assert_eq!(
Name::from_utf8_lossy(xs, &string_subsystem),
Name::new(
"\u{FFFD}foo\u{FFFD}bar\u{FFFD}\u{FFFD}baz",
&string_subsystem
)
);
let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
assert_eq!(
Name::from_utf8_lossy(xs, &string_subsystem),
Name::new(
"\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}foo\u{10000}bar",
&string_subsystem
)
);
let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
assert_eq!(
Name::from_utf8_lossy(xs, &string_subsystem),
Name::new(
"\u{FFFD}\u{FFFD}\u{FFFD}foo\u{FFFD}\u{FFFD}\u{FFFD}bar",
&string_subsystem
)
);
}
#[test]
fn test_from_utf16() {
let logger = slog::Logger::root(slog::Discard, slog::o!());
let string_subsystem = Subsystem::new(64, &logger);
let pairs: [(Name<'_>, Vec<u16>); 5] = [(Name::new("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n", &string_subsystem),
vec![0xd800, 0xdf45, 0xd800, 0xdf3f, 0xd800, 0xdf3b, 0xd800, 0xdf46, 0xd800,
0xdf39, 0xd800, 0xdf3b, 0xd800, 0xdf30, 0x000a]),
(Name::new("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n", &string_subsystem),
vec![0xd801, 0xdc12, 0xd801, 0xdc49, 0xd801, 0xdc2e, 0xd801, 0xdc40, 0xd801,
0xdc32, 0xd801, 0xdc4b, 0x0020, 0xd801, 0xdc0f, 0xd801, 0xdc32, 0xd801,
0xdc4d, 0x000a]),
(Name::new("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n", &string_subsystem),
vec![0xd800, 0xdf00, 0xd800, 0xdf16, 0xd800, 0xdf0b, 0xd800, 0xdf04, 0xd800,
0xdf11, 0xd800, 0xdf09, 0x00b7, 0xd800, 0xdf0c, 0xd800, 0xdf04, 0xd800,
0xdf15, 0xd800, 0xdf04, 0xd800, 0xdf0b, 0xd800, 0xdf09, 0xd800, 0xdf11,
0x000a]),
(Name::new("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n", &string_subsystem),
vec![0xd801, 0xdc8b, 0xd801, 0xdc98, 0xd801, 0xdc88, 0xd801, 0xdc91, 0xd801,
0xdc9b, 0xd801, 0xdc92, 0x0020, 0xd801, 0xdc95, 0xd801, 0xdc93, 0x0020,
0xd801, 0xdc88, 0xd801, 0xdc9a, 0xd801, 0xdc8d, 0x0020, 0xd801, 0xdc8f,
0xd801, 0xdc9c, 0xd801, 0xdc92, 0xd801, 0xdc96, 0xd801, 0xdc86, 0x0020,
0xd801, 0xdc95, 0xd801, 0xdc86, 0x000a]),
(Name::new("\u{20000}", &string_subsystem), vec![0xD840, 0xDC00])];
for p in &pairs {
let (s, u) = (*p).clone();
let s_str = s.as_str();
let s_as_utf16 = s_str.encode_utf16().collect::<Vec<u16>>();
let u_as_string = Name::from_utf16(&u, &string_subsystem).unwrap().as_str();
assert!(std::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok()));
assert_eq!(s_as_utf16, u);
assert_eq!(u_as_string, s);
assert_eq!(Name::from_utf16_lossy(&u, &string_subsystem), s);
assert_eq!(Name::from_utf16(&s_as_utf16, &string_subsystem).unwrap(), s);
assert_eq!(u_as_string.encode_utf16().collect::<Vec<u16>>(), u);
}
}
#[test]
fn test_utf16_invalid() {
let logger = slog::Logger::root(slog::Discard, slog::o!());
let string_subsystem = Subsystem::new(64, &logger);
assert!(Name::from_utf16(&[0xD800], &string_subsystem).is_err());
assert!(Name::from_utf16(&[0xD800, 0xD800], &string_subsystem).is_err());
assert!(Name::from_utf16(&[0x0061, 0xDC00], &string_subsystem).is_err());
assert!(Name::from_utf16(&[0xD800, 0xd801, 0xdc8b, 0xD800], &string_subsystem).is_err());
}
#[test]
fn test_from_utf16_lossy() {
let logger = slog::Logger::root(slog::Discard, slog::o!());
let string_subsystem = Subsystem::new(64, &logger);
assert_eq!(
Name::from_utf16_lossy(&[0xD800], &string_subsystem),
Name::new("\u{FFFD}", &string_subsystem)
);
assert_eq!(
Name::from_utf16_lossy(&[0xD800, 0xD800], &string_subsystem),
Name::new("\u{FFFD}\u{FFFD}", &string_subsystem)
);
assert_eq!(
Name::from_utf16_lossy(&[0x0061, 0xDC00], &string_subsystem),
Name::new("a\u{FFFD}", &string_subsystem)
);
assert_eq!(
Name::from_utf16_lossy(&[0xD800, 0xd801, 0xdc8b, 0xD800], &string_subsystem),
Name::new("\u{FFFD}𐒋\u{FFFD}", &string_subsystem)
);
}
#[allow(clippy::string_extend_chars)]
#[test]
fn test_from_iterator() {
let logger = slog::Logger::root(slog::Discard, slog::o!());
let string_subsystem = Subsystem::new(64, &logger);
let s = Name::new("ศไทย中华Việt Nam", &string_subsystem);
let t = "ศไทย中华";
let u = "Việt Nam";
let mut a = t.to_string();
a.extend(u.chars());
assert_eq!(s, a);
let b: String = vec![t, u].into_iter().collect();
assert_eq!(s, b);
let mut c = t.to_string();
c.extend(vec![u]);
assert_eq!(s, c);
}
}