use std::cmp::min;
use std::ops::Add;
use std::ops::Sub;
use std::str;
use dupe::Dupe;
use crate::convert_indices::convert_indices;
#[inline(always)]
fn is_1byte(x: u8) -> bool {
x & 0x80 == 0
}
#[inline(always)]
fn is_1bytes(x: u64) -> bool {
x & 0x8080808080808080 == 0
}
fn skip_at_most_1byte(x: &str, n: usize) -> usize {
if n == 0 {
return 0;
}
debug_assert!(x.len() >= n);
fn f(x: &str, n: usize) -> *const u8 {
let leading = min(x.as_ptr().align_offset(8), n);
let trailing = (n - leading) % 8;
let loops = (n - leading) / 8;
let mut p = x.as_ptr();
for _ in 0..leading {
if is_1byte(unsafe { *p }) {
p = unsafe { p.add(1) };
} else {
return p;
}
}
let mut p = p as *const u64;
for _ in 0..loops {
if is_1bytes(unsafe { *p }) {
p = unsafe { p.add(1) };
} else {
return p as *const u8;
}
}
let mut p = p as *const u8;
for _ in 0..trailing {
if is_1byte(unsafe { *p }) {
p = unsafe { p.add(1) };
} else {
return p;
}
}
p
}
unsafe { f(x, n).offset_from(x.as_ptr()) as usize }
}
#[inline]
pub fn at(x: &str, i: CharIndex) -> Option<char> {
if i.0 >= x.len() {
return None;
}
let n = skip_at_most_1byte(x, i.0);
let s = unsafe { x.get_unchecked(n..) };
s.chars().nth(i.0 - n)
}
#[inline]
pub fn len(x: &str) -> CharIndex {
let n = skip_at_most_1byte(x, x.len());
if n == x.len() {
CharIndex(n) } else {
CharIndex(unsafe { x.get_unchecked(n..) }.chars().count() + n)
}
}
#[inline]
pub fn count_matches_byte(x: &str, needle: u8) -> usize {
x.as_bytes().iter().filter(|x| **x == needle).count()
}
#[inline]
pub fn count_matches(x: &str, needle: &str) -> usize {
if needle.len() == 1 {
count_matches_byte(x, needle.as_bytes()[0])
} else {
x.matches(needle).count()
}
}
#[derive(PartialEq, Debug)]
pub struct StrIndices<'a> {
pub start: CharIndex,
pub haystack: &'a str,
}
#[inline]
pub fn split_at(x: &str, i: CharIndex) -> Option<(&str, &str)> {
if i.0 == 0 {
return Some(("", x));
}
if i.0 > x.len() {
return None;
}
let n = skip_at_most_1byte(x, i.0);
let s = unsafe { x.get_unchecked(n..) };
let mut c = s.chars();
for _ in 0..i.0 - n {
c.next()?;
}
Some(x.split_at(x.len() - c.as_str().len()))
}
fn split_at_end(x: &str, i: CharIndex) -> &str {
match split_at(x, i) {
Some((before, _)) => before,
None => x,
}
}
fn convert_str_indices_slow(s: &str, start: Option<i32>, end: Option<i32>) -> Option<StrIndices> {
debug_assert!(matches!(start, Some(start) if start < 0) || matches!(end, Some(end) if end < 0));
debug_assert!(
matches!((start, end), (Some(start), Some(end))
if start >= 0 || end >= 0 || (start <= end))
|| start.is_none()
|| end.is_none()
);
let len = len(s);
let (start, end) = convert_indices(len.0 as i32, start, end);
if start > end {
return None;
}
let (start, end) = (CharIndex(start), CharIndex(end));
debug_assert!(end <= len);
let s = if len.0 == s.len() {
unsafe { s.get_unchecked(start.0..end.0) }
} else {
let (_, s) = split_at(s, start).unwrap();
let (s, _) = split_at(s, end - start).unwrap();
s
};
Some(StrIndices { start, haystack: s })
}
#[inline(always)]
pub fn convert_str_indices(s: &str, start: Option<i32>, end: Option<i32>) -> Option<StrIndices> {
match (start, end) {
(None, None) => Some(StrIndices {
start: CharIndex(0),
haystack: s,
}),
(Some(start), None) if start >= 0 => {
let (_, s) = split_at(s, CharIndex(start as usize))?;
Some(StrIndices {
start: CharIndex(start as usize),
haystack: s,
})
}
(None, Some(end)) if end >= 0 => {
let s = split_at_end(s, CharIndex(end as usize));
Some(StrIndices {
start: CharIndex(0),
haystack: s,
})
}
(Some(start), Some(end)) if start >= 0 && end >= start => {
let (_, s) = split_at(s, CharIndex(start as usize))?;
let s = split_at_end(s, CharIndex((end - start) as usize));
Some(StrIndices {
start: CharIndex(start as usize),
haystack: s,
})
}
(Some(start), Some(end)) if ((start >= 0) == (end >= 0)) && start > end => None,
(start, end) => convert_str_indices_slow(s, start, end),
}
}
#[inline]
pub fn contains(haystack: &str, needle: &str) -> bool {
if needle.is_empty() {
true
} else if needle.len() == 1 {
memchr::memchr(needle.as_bytes()[0], haystack.as_bytes()).is_some()
} else if haystack.len() < needle.len() {
false
} else {
assert!(haystack.len() >= needle.len());
let needle_0 = needle.as_bytes()[0];
for start in 0..=haystack.len() - needle.len() {
if haystack.as_bytes()[start] != needle_0 {
continue;
}
if haystack.as_bytes()[start..].starts_with(needle.as_bytes()) {
return true;
}
}
false
}
}
#[derive(Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Dupe, Debug)]
pub struct CharIndex(pub usize);
impl Sub for CharIndex {
type Output = CharIndex;
fn sub(self, rhs: CharIndex) -> CharIndex {
CharIndex(self.0 - rhs.0)
}
}
impl Add for CharIndex {
type Output = CharIndex;
fn add(self, rhs: CharIndex) -> CharIndex {
CharIndex(self.0 + rhs.0)
}
}
#[cfg(test)]
mod tests {
use std::iter;
use crate::fast_string::convert_str_indices;
use crate::fast_string::CharIndex;
use crate::fast_string::StrIndices;
#[test]
fn test_convert_str_indices() {
assert_eq!(
Some(StrIndices {
start: CharIndex(0),
haystack: "abc",
}),
convert_str_indices("abc", None, None)
);
assert_eq!(None, convert_str_indices("abc", Some(2), Some(1)));
assert_eq!(
Some(StrIndices {
start: CharIndex(0),
haystack: "abc",
}),
convert_str_indices("abc", Some(-10), Some(10))
);
assert_eq!(
Some(StrIndices {
start: CharIndex(1),
haystack: "",
}),
convert_str_indices("abc", Some(1), Some(1))
);
assert_eq!(
Some(StrIndices {
start: CharIndex(0),
haystack: "ab",
}),
convert_str_indices("abc", Some(-10), Some(2))
);
assert_eq!(
Some(StrIndices {
start: CharIndex(0),
haystack: "ab",
}),
convert_str_indices("abc", Some(-10), Some(-1))
);
assert_eq!(
Some(StrIndices {
start: CharIndex(0),
haystack: "s",
}),
convert_str_indices("short", Some(0), Some(-4))
);
assert_eq!(
Some(StrIndices {
start: CharIndex(0),
haystack: "fish",
}),
convert_str_indices("fish", None, Some(10))
);
}
#[test]
fn test_convert_str_indices_non_ascii() {
assert_eq!(
Some(StrIndices {
start: CharIndex(6),
haystack: "под",
}),
convert_str_indices("Город под подошвой", Some(6), Some(9))
)
}
#[test]
fn test_convert_str_indices_trigger_debug_assertions() {
fn none_ors() -> impl Iterator<Item = Option<i32>> {
iter::once(None).chain((-30..30).map(Some))
}
for s in &["", "a", "abcde", "Телемак"] {
for start in none_ors() {
for end in none_ors() {
let _ = convert_str_indices(s, start, end);
}
}
}
}
}