pub fn chars_to_bytes(s: &str, char_pos: usize) -> usize {
str_indices::chars::to_byte_idx(s, char_pos)
}
pub fn split_at_char(s: &str, char_pos: usize) -> (&str, &str) {
s.split_at(chars_to_bytes(s, char_pos))
}
#[inline]
#[allow(unused)]
pub fn consume_chars<'a>(content: &mut &'a str, len: usize) -> &'a str {
let (here, remaining) = split_at_char(*content, len);
*content = remaining;
here
}
#[inline]
#[allow(unused)]
pub fn bytes_to_chars(s: &str, byte_pos: usize) -> usize {
str_indices::chars::from_byte_idx(s, byte_pos)
}
pub fn count_chars(s: &str) -> usize {
str_indices::chars::count(s)
}
#[cfg(test)]
mod test {
use crate::unicount::*;
fn std_chars_to_bytes(s: &str, char_pos: usize) -> usize {
s.char_indices().nth(char_pos).map_or_else(
|| s.len(),
|(i, _)| i
)
}
pub fn std_bytes_to_chars(s: &str, byte_pos: usize) -> usize {
s[..byte_pos].chars().count()
}
const TRICKY_CHARS: &[&str] = &[
"a", "b", "c", "1", "2", "3", " ", "\n", "©", "¥", "½", "Ύ", "Δ", "δ", "Ϡ", "←", "↯", "↻", "⇈", "𐆐", "𐆔", "𐆘", "𐆚", ];
fn check_matches(s: &str) {
let char_len = s.chars().count();
for i in 0..=char_len {
let actual_bytes = std_chars_to_bytes(s, i);
let ropey_bytes = str_indices::chars::to_byte_idx(s, i);
assert_eq!(ropey_bytes, actual_bytes);
let std_chars = std_bytes_to_chars(s, actual_bytes);
let ropey_chars = bytes_to_chars(s, actual_bytes);
assert_eq!(std_chars, i);
assert_eq!(ropey_chars, i);
}
}
#[test]
fn str_pos_works() {
check_matches("hi");
check_matches("");
for s in TRICKY_CHARS {
check_matches(*s);
}
let mut big_str = String::new();
for s in TRICKY_CHARS {
big_str.push_str(*s);
}
check_matches(big_str.as_str());
}
#[test]
fn test_split_at_char() {
assert_eq!(split_at_char("", 0), ("", ""));
assert_eq!(split_at_char("hi", 0), ("", "hi"));
assert_eq!(split_at_char("hi", 1), ("h", "i"));
assert_eq!(split_at_char("hi", 2), ("hi", ""));
assert_eq!(split_at_char("日本語", 0), ("", "日本語"));
assert_eq!(split_at_char("日本語", 1), ("日", "本語"));
assert_eq!(split_at_char("日本語", 2), ("日本", "語"));
assert_eq!(split_at_char("日本語", 3), ("日本語", ""));
}
}