extern crate unic_segment;
use unic_segment::{WordBoundIndices, WordBounds};
type TestData = &'static [(&'static str, &'static [&'static str])];
const TEST_DATA: TestData = include!("tables/word_break_test_data.rsv");
const EXTRA_TEST_DATA: TestData = include!("extra_word_break_test_data.rsv");
#[test]
fn test_words_conformance() {
let tests = TEST_DATA.iter().chain(EXTRA_TEST_DATA);
for &(input, words) in tests {
macro_rules! assert_ {
($test:expr, $exp:expr, $name:expr) => {
let testing = $test.collect::<Vec<_>>();
let expected = $exp.collect::<Vec<_>>();
assert_eq!(
testing, expected,
"{} test for testcase ({:?}, {:?}) failed.",
$name, input, words
)
};
}
assert_!(
WordBounds::new(input),
words.iter().cloned(),
"Forward word boundaries"
);
assert_!(
WordBounds::new(input).rev(),
words.iter().rev().cloned(),
"Reverse word boundaries"
);
let mut indices = vec![0];
for i in words.iter().cloned().map(|s| s.len()).scan(0, |t, n| {
*t += n;
Some(*t)
}) {
indices.push(i);
}
indices.pop();
let indices = indices;
assert_!(
WordBoundIndices::new(input).map(|(l, _)| l),
indices.iter().cloned(),
"Forward word indices"
);
assert_!(
WordBoundIndices::new(input).rev().map(|(l, _)| l),
indices.iter().rev().cloned(),
"Reverse word indices"
);
}
}