use std::cell::OnceCell;
use crate::object_pool::{ObjectPool, Pooled};
#[derive(Debug)]
pub struct WithUtf16<'object_pool, 'text> {
pub line: &'text str,
pub utf16_byte_indices: OnceCell<Option<Pooled<'object_pool>>>,
object_pool: &'object_pool ObjectPool,
}
impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> {
pub fn new(object_pool: &'object_pool ObjectPool, line: &'text str) -> Self {
Self {
utf16_byte_indices: OnceCell::new(),
line,
object_pool,
}
}
#[allow(unsafe_code)]
pub fn substring(
&self,
start_utf16_index: usize,
end_utf16_index: usize,
) -> &'text str {
if end_utf16_index <= start_utf16_index {
return "";
}
let utf16_byte_indices = self.utf16_byte_indices.get_or_init(|| {
if self.line.is_ascii() {
return None;
}
let mut vec = self.object_pool.pull(self.line.len());
let bytes = self.line.as_bytes();
let mut byte_pos = 0;
while byte_pos < bytes.len() {
let byte = unsafe { *bytes.get_unchecked(byte_pos) };
if byte < 0x80 {
vec.push(byte_pos);
byte_pos += 1;
} else if byte < 0xE0 {
vec.push(byte_pos);
byte_pos += 2;
} else if byte < 0xF0 {
vec.push(byte_pos);
byte_pos += 3;
} else {
vec.push(byte_pos);
vec.push(byte_pos);
byte_pos += 4;
}
}
Some(vec)
});
let utf8_len = self.line.len();
let Some(utf16_byte_indices) = utf16_byte_indices else {
let start_utf16_index = start_utf16_index.min(utf8_len);
let end_utf16_index = end_utf16_index.min(utf8_len);
return unsafe {
self.line.get_unchecked(start_utf16_index..end_utf16_index)
};
};
let start = *utf16_byte_indices
.get(start_utf16_index)
.unwrap_or(&utf8_len);
let end = *utf16_byte_indices.get(end_utf16_index).unwrap_or(&utf8_len);
unsafe {
self.line.get_unchecked(start..end)
}
}
}
#[cfg(test)]
mod tests {
use crate::object_pool::ObjectPool;
use super::WithUtf16;
#[test]
fn test_substring() {
assert_eq!(
WithUtf16::new(&ObjectPool::default(), "foobar").substring(0, 3),
"foo"
);
}
#[test]
fn test_out_of_bounds() {
assert_eq!(
WithUtf16::new(&ObjectPool::default(), "foobar").substring(0, 10),
"foobar"
);
assert_eq!(
WithUtf16::new(&ObjectPool::default(), "foobar").substring(6, 10),
""
);
}
#[test]
fn test_start_less_than_end() {
assert_eq!(
WithUtf16::new(&ObjectPool::default(), "foobar").substring(3, 2),
""
);
}
#[test]
fn test_start_and_end_equal() {
assert_eq!(
WithUtf16::new(&ObjectPool::default(), "foobar").substring(3, 3),
""
);
}
#[test]
fn test_multiple_byte_characters() {
assert_eq!(
WithUtf16::new(&ObjectPool::default(), "🙈🙉🙊🐒").substring(2, 4),
"🙉"
);
}
}