pub mod iter;
use iter::{Split, Lines};
use std::{ops, str, cmp, fmt, hash, borrow};
use ops::Bound;
use std::rc::Rc;
use std::sync::Arc;
use std::string::FromUtf8Error;
#[derive(Clone)]
pub struct SharedGenString<R>
where R: RefCounter {
start: usize,
len: usize,
bytes: R
}
pub type SharedString = SharedGenString<Rc<Box<[u8]>>>;
pub type SharedSyncString = SharedGenString<Arc<Box<[u8]>>>;
pub trait RefCounter: Clone + Sized + ops::Deref<Target = Box<[u8]>> + From<Box<[u8]>> {
fn try_unwrap(self) -> Result<Box<[u8]>, Self>;
}
impl RefCounter for Rc<Box<[u8]>> {
#[inline]
fn try_unwrap(self) -> Result<Box<[u8]>, Self> {
Rc::try_unwrap(self)
}
}
impl RefCounter for Arc<Box<[u8]>> {
#[inline]
fn try_unwrap(self) -> Result<Box<[u8]>, Self> {
Arc::try_unwrap(self)
}
}
impl<R> SharedGenString<R>
where R: RefCounter {
#[inline]
pub fn new() -> Self {
"".into()
}
#[inline]
pub(crate) fn new_raw(start: usize, len: usize, bytes: R) -> Self {
Self { start, len, bytes }
}
#[inline]
pub fn from_utf8(vec: Vec<u8>) -> Result<Self, FromUtf8Error> {
String::from_utf8(vec).map(|s| s.into())
}
#[inline]
pub unsafe fn from_utf8_unchecked(vec: Vec<u8>) -> Self {
Self {
start: 0,
len: vec.len(),
bytes: vec.into_boxed_slice().into()
}
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
let end = self.start + self.len;
unsafe {
self.bytes.get_unchecked(self.start..end)
}
}
#[inline]
pub fn as_full_bytes(&self) -> &[u8] {
&self.bytes
}
#[inline]
pub fn as_str(&self) -> &str {
&self
}
#[inline]
pub fn as_full_str(&self) -> &str {
unsafe { str::from_utf8_unchecked(&self.bytes) }
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
fn validate_range<I>(&self, range: I) -> Option<(usize, usize)>
where I: ops::RangeBounds<usize> {
let len = self.len();
let start = match range.start_bound() {
Bound::Included(&i) => i,
Bound::Excluded(&i) => i + 1,
Bound::Unbounded => 0
};
let end = match range.end_bound() {
Bound::Included(&i) => i + 1,
Bound::Excluded(&i) => i,
Bound::Unbounded => len
};
if start > end || end > len {
None
} else {
Some((start, end))
}
}
#[inline]
pub fn get<I>(&self, range: I) -> Option<Self>
where I: ops::RangeBounds<usize> {
let (start, end) = self.validate_range(range)?;
if start == end {
return Some(Self::new())
}
let s = self.as_str();
if !(s.is_char_boundary(start) && s.is_char_boundary(end)) {
return None;
}
Some(Self {
start: self.start + start,
len: end - start,
bytes: self.bytes.clone()
})
}
#[inline]
pub fn idx<I>(&self, range: I) -> Self
where I: ops::RangeBounds<usize> {
let (start, end) = self.validate_range(range).expect("invalid range");
Self {
start: self.start + start,
len: end - start,
bytes: self.bytes.clone()
}
}
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
match self.bytes.try_unwrap().map(|b| b.into_vec()) {
Ok(mut bytes) if self.start == 0 => {
bytes.truncate(self.len);
bytes
},
Ok(bytes) => unsafe {
let range = self.start..(self.start + self.len);
bytes.get_unchecked(range).to_vec()
},
Err(slice) => unsafe {
let range = self.start..(self.start + self.len);
slice.get_unchecked(range).to_vec()
}
}
}
#[inline]
pub fn into_full_bytes(self) -> Vec<u8> {
match self.bytes.try_unwrap() {
Ok(bytes) => bytes.into(),
Err(slice) => slice.to_vec()
}
}
#[inline]
pub fn into_string(self) -> String {
let vec = self.into_bytes();
unsafe { String::from_utf8_unchecked(vec) }
}
#[inline]
pub fn into_full_string(self) -> String {
let vec = self.into_full_bytes();
unsafe { String::from_utf8_unchecked(vec) }
}
#[inline]
pub fn push(self, ch: char) -> String {
let mut s = self.into_string();
s.push(ch);
s
}
#[inline]
pub fn push_str(self, string: &str) -> String {
let mut s = self.into_string();
s.push_str(string);
s
}
#[inline]
pub fn split_off(&mut self, at: usize) -> Self {
if at == 0 {
let c = self.clone();
self.len = 0;
return c
}
assert!(self.is_char_boundary(at));
let n_len = self.len - at;
self.len = at;
Self {
start: self.start + at,
len: n_len,
bytes: self.bytes.clone()
}
}
#[inline]
pub fn split(self, byte: u8) -> Split<R> {
Split::new(self.start, self.len, self.bytes, byte)
}
#[inline]
pub fn lines(self) -> Lines<R> {
Lines::new(self.start, self.len, self.bytes)
}
#[inline]
pub fn truncate(&mut self, new_len: usize) {
if new_len < self.len {
assert!(self.is_char_boundary(new_len));
self.len = new_len;
}
}
}
impl<R> fmt::Display for SharedGenString<R>
where R: RefCounter {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl<R> fmt::Debug for SharedGenString<R>
where R: RefCounter {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_str(), f)
}
}
impl<R> hash::Hash for SharedGenString<R>
where R: RefCounter {
#[inline]
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
self.as_str().hash(hasher)
}
}
impl<R> ops::Deref for SharedGenString<R>
where R: RefCounter {
type Target = str;
#[inline]
fn deref(&self) -> &str {
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
}
}
impl<R> AsRef<str> for SharedGenString<R>
where R: RefCounter {
#[inline]
fn as_ref(&self) -> &str {
self
}
}
impl<R> borrow::Borrow<str> for SharedGenString<R>
where R: RefCounter {
#[inline]
fn borrow(&self) -> &str {
self
}
}
impl<R, O> cmp::PartialEq<SharedGenString<O>> for SharedGenString<R>
where
R: RefCounter,
O: RefCounter {
#[inline]
fn eq(&self, other: &SharedGenString<O>) -> bool {
self.as_bytes() == other.as_bytes()
}
}
impl<R: RefCounter> cmp::Eq for SharedGenString<R> {}
impl<R> cmp::PartialEq<str> for SharedGenString<R>
where R: RefCounter {
#[inline]
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl<R> cmp::PartialEq<&str> for SharedGenString<R>
where R: RefCounter {
#[inline]
fn eq(&self, other: &&str) -> bool {
self.as_str() == *other
}
}
impl cmp::PartialEq<SharedString> for str {
#[inline]
fn eq(&self, other: &SharedString) -> bool {
self == other.as_str()
}
}
impl cmp::PartialEq<SharedString> for &str {
#[inline]
fn eq(&self, other: &SharedString) -> bool {
*self == other.as_str()
}
}
impl cmp::PartialEq<SharedSyncString> for str {
#[inline]
fn eq(&self, other: &SharedSyncString) -> bool {
self == other.as_str()
}
}
impl cmp::PartialEq<SharedSyncString> for &str {
#[inline]
fn eq(&self, other: &SharedSyncString) -> bool {
*self == other.as_str()
}
}
impl<R> Default for SharedGenString<R>
where R: RefCounter {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl<R> From<String> for SharedGenString<R>
where R: RefCounter {
#[inline]
fn from(s: String) -> Self {
Self {
start: 0,
len: s.len(),
bytes: s.into_bytes().into_boxed_slice().into()
}
}
}
impl<R> From<&str> for SharedGenString<R>
where R: RefCounter {
#[inline]
fn from(s: &str) -> Self {
s.to_string().into()
}
}
impl<R> From<SharedGenString<R>> for String
where R: RefCounter {
#[inline]
fn from(s: SharedGenString<R>) -> Self {
s.into_string()
}
}
#[cfg(test)]
mod tests {
use super::{SharedString, SharedSyncString};
#[test]
fn rc() {
let mut hello: SharedString = "Hello, World!".into();
assert_eq!(hello.len(), 13);
let world = hello.split_off(7);
assert_eq!(hello, "Hello, ");
assert_eq!(hello.len(), 7);
assert_eq!(world, "World!");
assert_eq!(world.len(), 6);
}
#[test]
fn arc() {
let mut hello: SharedSyncString = "Hello, World!".into();
let world = hello.split_off(7);
std::thread::spawn(move || {
assert_eq!(world, "World!");
assert_eq!(world.as_full_str(), "Hello, World!");
});
assert_eq!(hello, "Hello, ");
}
#[test]
fn into() {
let hello = SharedString::from("Hello, World!");
let s = hello.into_string();
assert_eq!(s, "Hello, World!");
let mut hello: SharedString = s.into();
let world = hello.split_off(7);
let s = world.into_string();
assert_eq!(s, "World!");
assert!(hello != s.as_str());
let n_hello = SharedString::from("Hello, ");
assert_eq!(hello, n_hello);
}
#[test]
fn split_off_zero() {
let mut foobar = SharedString::from("foobar");
let n_foobar = foobar.split_off(0);
assert_eq!("", foobar);
assert_eq!("foobar", n_foobar);
}
#[test]
#[should_panic]
fn panic_char_boundary() {
let mut s = SharedString::from("abc 好 def");
let _ = s.split_off(5);
}
#[test]
#[should_panic]
fn panic_length() {
let mut s = SharedString::from("abc");
let _ = s.split_off(5);
}
#[test]
fn range_as_str() {
let raw = SharedString::from("Hello, World!");
let hello = &raw[..5];
let world = &raw[7..];
assert_eq!(hello, "Hello");
assert_eq!(world, "World!");
}
#[test]
fn range_with_get() {
let raw = SharedString::from("Hello, World!");
let hello = raw.get(..5).unwrap();
let world = raw.get(7..).unwrap();
assert_eq!(hello, "Hello");
assert_eq!(world, "World!");
}
#[test]
fn range_with_idx() {
let raw = SharedString::from("Hello, World!");
let hello = raw.idx(..5);
let world = raw.idx(7..);
assert_eq!(hello, "Hello");
assert_eq!(world, "World!");
}
#[test]
fn empty() {
let s = SharedString::from("");
assert_eq!(s.len(), 0);
assert!(s.is_empty());
assert!(s.get(..).is_some());
assert!(s.idx(..).is_empty());
assert!(s.get(1..).is_none());
}
#[test]
fn equal() {
let rc: SharedString = "Hello, World!".into();
let arc: SharedSyncString = "Hello, World!".into();
assert_eq!(rc, arc);
}
#[test]
fn split() {
let fullname = SharedString::from("Albert Einstein");
let mut split = fullname.split(b' ');
assert_eq!(split.next().unwrap(), "Albert");
assert_eq!(split.next().unwrap(), "Einstein");
assert_eq!(split.next(), None);
}
#[test]
fn lines() {
let quote = SharedString::from("Wenn die Menschen nur über das sprächen,\nwas sie begreifen,\r\ndann würde es sehr still auf der Welt sein.\n\r\n");
let mut lines = quote.lines();
assert_eq!(
lines.next().unwrap(),
"Wenn die Menschen nur über das sprächen,"
);
assert_eq!(lines.next().unwrap(), "was sie begreifen,");
assert_eq!(
lines.next().unwrap(),
"dann würde es sehr still auf der Welt sein."
);
assert_eq!(lines.next().unwrap(), "");
assert_eq!(lines.next(), None);
let empty = SharedString::from(" ");
let mut lines = empty.lines();
assert_eq!(" ", lines.next().unwrap());
assert_eq!(lines.next(), None);
}
#[test]
fn range_eq_str_range() {
let line = "foo: bar";
let at = line.find(':').unwrap();
let key = &line[..at];
let value = &line[(at + 2)..];
assert_eq!(key, "foo");
assert_eq!(value, "bar");
let line = SharedString::from(line);
let key = line.idx(..at);
let value = line.idx((at + 2)..);
assert_eq!(key, "foo");
assert_eq!(value, "bar");
}
#[test]
fn range_in_range() {
let line = "date: Mon, 30 Nov 2020 22:16:22 GMT\nserver: mw1271.eqiad.wmnet\nx-content-type-options: nosniff";
let mut lines = SharedString::from(line).lines();
let _ = lines.next().unwrap();
let line = lines.next().unwrap();
let at = line.find(':').unwrap();
assert_eq!(at, 6);
let key = line.idx(..at);
assert_eq!(key, "server");
let value = line.idx((at + 2)..);
assert_eq!(value, "mw1271.eqiad.wmnet");
}
#[test]
fn truncate() {
let mut foobar = SharedString::from("foobar");
foobar.truncate(3);
assert_eq!(foobar, "foo");
}
}