use std::borrow::Cow;
use std::ptr::NonNull;
#[derive(Debug, PartialEq)]
pub struct Char<'a, 'b>
where
'b: 'a,
{
pub c: Cow<'b, str>,
pub offset: usize,
pub source: Option<&'a Char<'a, 'b>>,
}
impl<'a, 'b> Char<'a, 'b>
where
'b: 'a,
{
fn new(c: Cow<'b, str>, offset: usize) -> Self {
Self {
c,
offset,
source: None,
}
}
fn new_from(c: Cow<'b, str>, offset: usize, source: Option<&'a Char<'a, 'b>>) -> Self {
Self { c, offset, source }
}
fn new_sentinel(offset: usize) -> Self {
Self {
c: Cow::Borrowed(""),
offset,
source: None,
}
}
pub fn c(&self) -> Option<&str> {
if self.c.is_empty() {
None
} else {
Some(self.c.as_ref())
}
}
pub fn is_sentinel(&self) -> bool {
self.c.is_empty()
}
pub fn is_transliterated(&self) -> bool {
let mut c = self;
while let Some(source) = c.source {
if c.c != source.c {
return true;
}
c = source;
}
false
}
}
#[derive(Debug)]
pub struct CharPool<'a, 'b>
where
'b: 'a,
{
#[allow(clippy::vec_box)]
chars: Vec<Box<Char<'a, 'b>>>,
}
impl<'a, 'b> PartialEq for CharPool<'a, 'b>
where
'b: 'a,
{
fn eq(&self, other: &Self) -> bool {
NonNull::from(self) == NonNull::from(other)
}
}
impl<'a, 'b> CharPool<'a, 'b>
where
'b: 'a,
{
#[allow(clippy::new_without_default)]
pub fn new() -> Self {
Self { chars: Vec::new() }
}
pub fn new_char(&mut self, c: Cow<'b, str>, offset: usize) -> &'a Char<'a, 'b>
where
Self: 'a,
{
self.chars.push(Box::new(Char::new(c, offset)));
unsafe { std::mem::transmute::<&_, &'a _>(self.chars.last_mut().unwrap().as_ref()) }
}
pub fn new_char_from(
&mut self,
c: Cow<'b, str>,
offset: usize,
source: &'a Char<'a, 'b>,
) -> &'a Char<'a, 'b>
where
Self: 'a,
{
self.chars
.push(Box::new(Char::new_from(c, offset, Some(source))));
unsafe { std::mem::transmute::<&_, &'a _>(self.chars.last_mut().unwrap().as_ref()) }
}
pub fn new_with_offset(&mut self, original: &'a Char<'a, 'b>, offset: usize) -> &'a Char<'a, 'b>
where
Self: 'a,
{
self.new_char_from(original.c.clone(), offset, original)
}
pub fn new_sentinel(&mut self, offset: usize) -> &'a Char<'a, 'b>
where
Self: 'a,
{
self.chars.push(Box::new(Char::new_sentinel(offset)));
unsafe { std::mem::transmute::<&_, &'a _>(self.chars.last_mut().unwrap().as_ref()) }
}
pub fn build_char_array(&mut self, text: &'b str) -> Vec<&'a Char<'a, 'b>>
where
Self: 'a,
{
let mut result = Vec::new();
let mut chars = text.char_indices();
let mut prev_pair: Option<(usize, char)> = None;
for pair in chars.by_ref() {
if let Some(prev_pair_) = prev_pair {
let cp = pair.1 as u32;
if (0xFE00u32..=0xFE0F).contains(&cp) || (0xE0100u32..=0xE01EF).contains(&cp) {
let mut s = String::new();
s.push(prev_pair_.1);
s.push(pair.1);
result.push(self.new_char(Cow::Owned(s), prev_pair_.0));
prev_pair = None;
continue;
}
result
.push(self.new_char(Cow::Borrowed(&text[prev_pair_.0..pair.0]), prev_pair_.0));
}
prev_pair = Some(pair);
}
if let Some(prev_pair_) = prev_pair {
result.push(self.new_char(Cow::Borrowed(&text[prev_pair_.0..]), prev_pair_.0));
}
result.push(self.new_sentinel(text.len()));
result
}
}
pub trait CharIterator<'a, 'b> {
fn next(&mut self) -> Option<&'a Char<'a, 'b>>;
}
impl<'a, 'b, I> CharIterator<'a, 'b> for I
where
I: Iterator<Item = &'a Char<'a, 'b>>,
'b: 'a,
{
fn next(&mut self) -> Option<&'a Char<'a, 'b>> {
Iterator::next(self)
}
}
impl<'a, 'b> Iterator for Box<dyn CharIterator<'a, 'b> + 'a>
where
'b: 'a,
{
type Item = &'a Char<'a, 'b>;
fn next(&mut self) -> Option<&'a Char<'a, 'b>> {
self.as_mut().next()
}
}
pub trait IntoCharIterator<'a, 'b>
where
'b: 'a,
{
type IntoIter: CharIterator<'a, 'b>;
fn into_iter(self) -> Self::IntoIter;
}
impl<'a, 'b, I> IntoCharIterator<'a, 'b> for I
where
I: IntoIterator<Item = &'a Char<'a, 'b>>,
'b: 'a,
{
type IntoIter = I::IntoIter;
fn into_iter(self) -> Self::IntoIter {
IntoIterator::into_iter(self)
}
}
impl<'a, 'b> IntoIterator
for Box<dyn IntoCharIterator<'a, 'b, IntoIter = Box<dyn CharIterator<'a, 'b> + 'a>>>
where
'b: 'a,
{
type Item = &'a Char<'a, 'b>;
type IntoIter = Box<dyn CharIterator<'a, 'b> + 'a>;
fn into_iter(self) -> Self::IntoIter {
Box::new(IntoCharIterator::into_iter(self))
}
}
pub fn from_chars<'a, 'b>(chars: impl IntoIterator<Item = &'a Char<'a, 'b>>) -> String
where
'b: 'a,
{
chars.into_iter().filter_map(|c| c.c()).collect::<String>()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_build_char_array_basic() {
let input = "Hello";
let mut pool = CharPool::new();
let chars = pool.build_char_array(input);
assert_eq!(chars.len(), 6); assert_eq!(chars[0].c, "H");
assert_eq!(chars[0].offset, 0);
assert_eq!(chars[1].c, "e");
assert_eq!(chars[1].offset, 1);
assert_eq!(chars[4].c, "o");
assert_eq!(chars[4].offset, 4);
assert_eq!(chars[5].c, ""); assert_eq!(chars[5].offset, 5);
}
#[test]
fn test_build_char_array_empty() {
let input = "";
let mut pool = CharPool::new();
let chars = pool.build_char_array(input);
assert_eq!(chars.len(), 1); assert_eq!(chars[0].c, "");
assert_eq!(chars[0].offset, 0);
}
#[test]
fn test_build_char_array_unicode() {
let input = "こんにちは"; let mut pool = CharPool::new();
let chars = pool.build_char_array(input);
assert_eq!(chars.len(), 6); assert_eq!(chars[0].c, "こ");
assert_eq!(chars[1].c, "ん");
assert_eq!(chars[2].c, "に");
assert_eq!(chars[3].c, "ち");
assert_eq!(chars[4].c, "は");
assert_eq!(chars[5].c, ""); }
#[test]
fn test_build_char_array_offsets() {
let input = "A𝓣"; let mut pool = CharPool::new();
let chars = pool.build_char_array(input);
assert_eq!(chars.len(), 3); assert_eq!(chars[0].c, "A");
assert_eq!(chars[0].offset, 0);
assert_eq!(chars[1].c, "𝓣");
assert_eq!(chars[1].offset, 1); assert_eq!(chars[2].offset, 5); }
#[test]
fn test_from_chars_basic() {
let mut pool = CharPool::new();
let chars = [
pool.new_char("H".into(), 0),
pool.new_char("e".into(), 1),
pool.new_char("l".into(), 2),
pool.new_char("l".into(), 3),
pool.new_char("o".into(), 4),
pool.new_sentinel(5), ];
let result = from_chars(chars.iter().cloned());
assert_eq!(result, "Hello");
}
#[test]
fn test_char_iterator_dyn_compatibility() {
let mut pool = CharPool::new();
let chars = pool.build_char_array("Hello");
let iter: Box<dyn CharIterator<'_, '_>> = Box::new(chars.iter().cloned());
let collected: Vec<_> = iter.collect();
assert_eq!(collected.len(), 6); assert_eq!(collected[0].c, "H");
assert_eq!(collected[1].c, "e");
assert_eq!(collected[5].c, ""); }
}