use crate::{CharIter, GraphemeBoundary, GraphemeMachine, StringU8, init_array};
#[test]
#[cfg(feature = "__std")]
fn t01_grapheme_cluster_logic_directly() {
use crate::{GraphemeBoundary, GraphemeMachine, StringU8, charu};
let input = "🧑🌾";
let mut machine = GraphemeMachine::new();
let mut remain = input;
let mut grapheme = StringU8::<32>::new();
let mut buf = [0u8; 4];
while let Some((ch, len)) = charu::from_str_with_len(remain) {
let boundary = machine.next_charu(ch);
if boundary == GraphemeBoundary::Split && !grapheme.is_empty() {
break;
}
grapheme.try_push_str(ch.as_str_into(&mut buf));
remain = &remain[len as usize..];
}
assert_eq!(
grapheme.as_str(),
"🧑🌾",
"Should return complete ZWJ sequence, but got '{}' from input '{}'",
grapheme.as_str(),
input
);
}
#[test]
#[cfg(feature = "__std")]
fn t02_multiple_graphemes_directly() {
use crate::{GraphemeBoundary, GraphemeMachine, StringU8, charu};
let input = "H🧑🌾";
let mut machine = GraphemeMachine::new();
let mut remain = input;
let mut graphemes = Vec::new();
let mut buf = [0u8; 4];
let mut grapheme = StringU8::<32>::new();
while let Some((ch, len)) = charu::from_str_with_len(remain) {
let boundary = machine.next_charu(ch);
if boundary == GraphemeBoundary::Split && !grapheme.is_empty() {
break;
}
grapheme.try_push_str(ch.as_str_into(&mut buf));
remain = &remain[len as usize..];
}
graphemes.push(grapheme.as_str().to_string());
let mut grapheme = StringU8::<32>::new();
while let Some((ch, len)) = charu::from_str_with_len(remain) {
let boundary = machine.next_charu(ch);
if boundary == GraphemeBoundary::Split && !grapheme.is_empty() {
break;
}
grapheme.try_push_str(ch.as_str_into(&mut buf));
remain = &remain[len as usize..];
}
graphemes.push(grapheme.as_str().to_string());
assert_eq!(graphemes[0], "H");
assert_eq!(graphemes[1], "🧑🌾");
}
#[test]
#[rustfmt::skip]
fn core_basics() {
let mut clusters: [StringU8::<16>; 32] =
init_array![default [StringU8::<16>; 32], "safe", "unsafe_array"];
let mut current_cluster = StringU8::<16>::new();
let mut machine = GraphemeMachine::new();
let mut buf = [0u8; 4];
let input = "Hello!\r\nBeep 🧑🌾";
let mut iter = CharIter::<&str>::new(input);
let mut n = 0;
while let Some(c) = iter.next_charu() {
if machine.next_charu(c) == GraphemeBoundary::Split {
if !current_cluster.is_empty() {
clusters[n] = current_cluster;
current_cluster.clear();
n += 1;
}
}
current_cluster.push_str(c.as_str_into(&mut buf));
}
if !current_cluster.is_empty() {
clusters[n] = current_cluster;
}
let expected = ["H", "e", "l", "l", "o", "!", "\r\n", "B", "e", "e", "p", " ", "🧑🌾"];
for (i, (actual, expected_str)) in clusters.iter().zip(expected.iter()).enumerate() {
assert_eq!(actual, expected_str, "mismatch at index {}", i);
}
}
#[test]
#[cfg(feature = "__std")]
fn alloc_basics() {
let mut clusters: Vec<String> = Vec::new();
let mut current_cluster = String::new();
let mut machine = GraphemeMachine::new();
let mut buf = [0u8; 4];
let input = "🧑🌾";
let mut iter = CharIter::<&str>::new(input);
while let Some(c) = iter.next_charu() {
if machine.next_charu(c) == GraphemeBoundary::Split {
if !current_cluster.is_empty() {
clusters.push(current_cluster.clone());
current_cluster.clear();
}
}
current_cluster.push_str(c.as_str_into(&mut buf));
}
if !current_cluster.is_empty() {
clusters.push(current_cluster.clone());
}
let expected = ["🧑🌾"];
assert_eq!(clusters, &expected);
}
#[test]
fn end_of_input() {
let mut machine = GraphemeMachine::new();
let input = "Hello!\r\nBeep 🧑🌾";
let mut iter = CharIter::<&str>::new(input);
while let Some(c) = iter.next_charu() {
machine.end_of_input(); if machine.next_charu(c) != GraphemeBoundary::Split {
panic!("non-split after end_of_input came before {c:?}");
}
}
}
#[test]
#[cfg(feature = "__std")]
fn next_char_from_str() {
use GraphemeBoundary::*;
let mut machine = GraphemeMachine::new();
let input = "Hello!\r\nBeep 🧑🌾";
let got: Vec<_> = machine.next_char_from_str(input).collect();
assert_eq!(
got,
&[
(Split, 'H'),
(Split, 'e'),
(Split, 'l'),
(Split, 'l'),
(Split, 'o'),
(Split, '!'),
(Split, '\r'),
(Continue, '\n'),
(Split, 'B'),
(Split, 'e'),
(Split, 'e'),
(Split, 'p'),
(Split, ' '),
(Split, '🧑'),
(Continue, '\u{200D}'), (Continue, '🌾'),
]
);
}
#[test]
const fn const_iter() {
const INPUT: &str = "Hello!\r\nBeep 🧑🌾";
const CHAR: char = const {
let mut machine = GraphemeMachine::new();
let mut iter = machine.next_char_from_str(INPUT);
let mut c = '\0';
let mut i = 0;
while i < 9 {
i += 1;
if let Some((cluster, scalar)) = iter.next() {
if cluster.eq(GraphemeBoundary::Split) {
c = scalar;
}
}
}
c
};
assert![CHAR == 'B'];
}