#[cfg(feature = "__std")]
use super::{super::GraphemeMachineState, trie::UNICODE_GRAPHEME_CLUSTER_TESTS};
use crate::{GraphemePropCb, GraphemeProps, charu};
#[test]
fn code_point_categories() {
use GraphemePropCb::*;
fn prop(c: char) -> GraphemePropCb {
GraphemeProps::for_charu(charu::from_char(c)).gcb_property()
}
assert_eq!(prop(' '), None);
assert_eq!(prop('\r'), CR);
assert_eq!(prop('\n'), LF);
assert_eq!(prop('\t'), Control);
assert_eq!(prop('\u{0001}'), Control); assert_eq!(prop('\u{0085}'), Control); assert_eq!(prop('\u{00AD}'), Control);
assert_eq!(prop('\u{0301}'), Extend); assert_eq!(prop('\u{0308}'), Extend); assert_eq!(prop('\u{0C41}'), SpacingMark);
assert_eq!(prop('\u{0C42}'), SpacingMark);
assert_eq!(prop('\u{200D}'), Zwj);
assert_eq!(prop('\u{200C}'), Extend);
assert_eq!(prop('\u{1F1E6}'), RegionalIndicator); assert_eq!(prop('\u{1F1FA}'), RegionalIndicator); assert_eq!(prop('\u{1F1FF}'), RegionalIndicator);
assert_eq!(prop('\u{1F9D1}'), ExtendedPictographic); assert_eq!(prop('\u{1F33E}'), ExtendedPictographic); assert_eq!(prop('\u{1F600}'), ExtendedPictographic); assert_eq!(prop('\u{1F469}'), ExtendedPictographic); assert_eq!(prop('\u{1F3FB}'), Extend); assert_eq!(prop('\u{1F3FF}'), Extend);
assert_eq!(prop('\u{1100}'), L); assert_eq!(prop('\u{1160}'), V); assert_eq!(prop('\u{11A8}'), T); assert_eq!(prop('\u{AC00}'), LV); assert_eq!(prop('\u{AC01}'), LVT);
assert_eq!(prop('a'), None); assert_eq!(prop('あ'), None); assert_eq!(prop('क'), None); assert_eq!(prop('\u{0903}'), SpacingMark);
assert_eq!(prop('\u{0600}'), Prepend);
assert_eq!(prop('\u{0C4A}'), Extend); assert_eq!(prop('\u{05B0}'), Extend); assert_eq!(prop('\u{05B8}'), Extend);
assert_eq!(prop('\u{FE0F}'), Extend); }
#[test]
#[cfg(feature = "__std")]
fn crlf() {
use GraphemeMachineState::*;
let got: Vec<_> = transitions(&[
GraphemeProps::None,
GraphemeProps::CR,
GraphemeProps::LF,
GraphemeProps::None,
])
.collect();
assert_eq!(
got,
&[
(true, GraphemeProps::None, Base),
(true, GraphemeProps::CR, Base),
(false, GraphemeProps::LF, Base),
(true, GraphemeProps::None, Base)
]
);
}
#[test]
#[cfg(feature = "__std")]
fn emoji_flags() {
use GraphemeMachineState::*;
let got: Vec<_> = transitions(&[
GraphemeProps::None,
GraphemeProps::RegionalIndicator,
GraphemeProps::None,
GraphemeProps::RegionalIndicator,
GraphemeProps::RegionalIndicator,
GraphemeProps::None,
GraphemeProps::RegionalIndicator,
GraphemeProps::RegionalIndicator,
GraphemeProps::RegionalIndicator,
GraphemeProps::None,
GraphemeProps::RegionalIndicator,
GraphemeProps::RegionalIndicator,
GraphemeProps::RegionalIndicator,
GraphemeProps::RegionalIndicator,
GraphemeProps::None,
])
.collect();
assert_eq!(
got,
&[
(true, GraphemeProps::None, Base),
(true, GraphemeProps::RegionalIndicator, AwaitRegionalPair),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::RegionalIndicator, AwaitRegionalPair),
(false, GraphemeProps::RegionalIndicator, Base),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::RegionalIndicator, AwaitRegionalPair),
(false, GraphemeProps::RegionalIndicator, Base),
(true, GraphemeProps::RegionalIndicator, AwaitRegionalPair),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::RegionalIndicator, AwaitRegionalPair),
(false, GraphemeProps::RegionalIndicator, Base),
(true, GraphemeProps::RegionalIndicator, AwaitRegionalPair),
(false, GraphemeProps::RegionalIndicator, Base),
(true, GraphemeProps::None, Base),
]
);
}
#[test]
#[cfg(feature = "__std")]
fn unicode_test_table() {
let mut failures = 0;
let mut buf = [0u8; 4];
for test in UNICODE_GRAPHEME_CLUSTER_TESTS {
let input = str::from_utf8(test.input).expect("invalid UTF-8 in test input");
let mut remain = input;
let mut state = GraphemeMachineState::Base;
let mut prev: Option<GraphemeProps> = None;
let mut got: Vec<Box<[u8]>> = Vec::new();
let mut current: Vec<u8> = Vec::new();
loop {
let Some((next, len)) = charu::from_str_with_len(remain) else {
break;
};
let next_props = GraphemeProps::for_charu(next);
let (boundary, next_state) = state.transition(prev, next_props);
if boundary {
if !current.is_empty() {
let boxed = current.clone().into_boxed_slice();
got.push(boxed);
current.clear();
}
}
current.extend_from_slice(next.as_bytes_into(&mut buf));
remain = &remain[len as usize..];
prev = Some(next_props);
state = next_state;
}
if !current.is_empty() {
let boxed = current.clone().into_boxed_slice();
got.push(boxed);
current.clear();
}
if !result_matches(&got, test.expected) {
println!("- test failed: {}", test.desc);
println!(" input: {:x?}", test.input);
println!(" got: {:x?}", got);
println!(" want: {:x?}", test.expected);
failures += 1;
}
}
if failures != 0 {
panic!("{failures} tests failed");
}
fn result_matches(got: &Vec<Box<[u8]>>, want: &[&[u8]]) -> bool {
if got.len() != want.len() {
return false;
}
for (got, want) in got.iter().zip(want.iter().copied()) {
if got.len() != want.len() {
return false;
}
for (got, want) in got.iter().zip(want) {
if got != want {
return false;
}
}
}
true
}
}
#[test]
#[cfg(feature = "__std")]
fn emoji_extend() {
use GraphemeMachineState::*;
let got: Vec<_> = transitions(&[
GraphemeProps::None,
GraphemeProps::ExtendedPictographic,
GraphemeProps::None,
GraphemeProps::ExtendedPictographic,
GraphemeProps::ExtendedPictographic,
GraphemeProps::None,
GraphemeProps::ExtendedPictographic,
GraphemeProps::Zwj,
GraphemeProps::ExtendedPictographic,
GraphemeProps::None,
GraphemeProps::ExtendedPictographic,
GraphemeProps::Extend,
GraphemeProps::ExtendedPictographic,
GraphemeProps::None,
GraphemeProps::ExtendedPictographic,
GraphemeProps::Extend,
GraphemeProps::Zwj,
GraphemeProps::ExtendedPictographic,
GraphemeProps::None,
GraphemeProps::ExtendedPictographic,
GraphemeProps::Extend,
GraphemeProps::Extend,
GraphemeProps::Zwj,
GraphemeProps::ExtendedPictographic,
GraphemeProps::None,
GraphemeProps::ExtendedPictographic,
GraphemeProps::Extend,
GraphemeProps::Extend,
GraphemeProps::Zwj,
GraphemeProps::Extend,
GraphemeProps::ExtendedPictographic,
GraphemeProps::None,
])
.collect();
assert_eq!(
got,
&[
(true, GraphemeProps::None, Base),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(false, GraphemeProps::Zwj, AfterZwj),
(false, GraphemeProps::ExtendedPictographic, BeforeZwj),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(false, GraphemeProps::Extend, BeforeZwj),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(false, GraphemeProps::Extend, BeforeZwj),
(false, GraphemeProps::Zwj, AfterZwj),
(false, GraphemeProps::ExtendedPictographic, BeforeZwj),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(false, GraphemeProps::Extend, BeforeZwj),
(false, GraphemeProps::Extend, BeforeZwj),
(false, GraphemeProps::Zwj, AfterZwj),
(false, GraphemeProps::ExtendedPictographic, BeforeZwj),
(true, GraphemeProps::None, Base),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(false, GraphemeProps::Extend, BeforeZwj),
(false, GraphemeProps::Extend, BeforeZwj),
(false, GraphemeProps::Zwj, AfterZwj),
(false, GraphemeProps::Extend, Base),
(true, GraphemeProps::ExtendedPictographic, BeforeZwj),
(true, GraphemeProps::None, Base),
]
);
}
#[cfg(feature = "__std")]
fn transitions(
cats: &[GraphemeProps],
) -> impl Iterator<Item = (bool, GraphemeProps, GraphemeMachineState)> + use<'_> {
struct Iter<'a> {
remain: &'a [GraphemeProps],
state: GraphemeMachineState,
prev: Option<GraphemeProps>,
}
impl<'a> Iterator for Iter<'a> {
type Item = (bool, GraphemeProps, GraphemeMachineState);
fn next(&mut self) -> Option<Self::Item> {
let Some((next, remain)) = self.remain.split_first() else {
return None;
};
let prev = self.prev;
let next = *next;
let (split, next_state) = self.state.transition(prev, next);
self.remain = remain;
self.state = next_state;
self.prev = Some(next);
Some((split, next, next_state))
}
}
Iter {
remain: cats,
state: GraphemeMachineState::Base,
prev: None,
}
}