use super::UnicodeSegmentation;
use std::prelude::v1::*;
#[test]
fn test_graphemes() {
use testdata::{TEST_SAME, TEST_DIFF};
pub const EXTRA_DIFF: &'static [(&'static str,
&'static [&'static str],
&'static [&'static str])] = &[
("\u{20}\u{600}\u{600}\u{20}",
&["\u{20}", "\u{600}\u{600}\u{20}"],
&["\u{20}", "\u{600}", "\u{600}", "\u{20}"]),
("\u{600}\u{20}\u{20}",
&["\u{600}\u{20}", "\u{20}"],
&["\u{600}", "\u{20}", "\u{20}"]),
];
for &(s, g) in TEST_SAME {
assert!(UnicodeSegmentation::graphemes(s, true)
.zip(g.iter().cloned())
.all(|(a,b)| a == b));
assert!(UnicodeSegmentation::graphemes(s, false)
.zip(g.iter().cloned())
.all(|(a,b)| a == b));
assert!(UnicodeSegmentation::graphemes(s, true).rev()
.zip(g.iter().rev().cloned())
.all(|(a,b)| a == b));
assert!(UnicodeSegmentation::graphemes(s, false).rev()
.zip(g.iter().rev().cloned())
.all(|(a,b)| a == b));
}
for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
assert!(UnicodeSegmentation::graphemes(s, true)
.zip(gt.iter().cloned())
.all(|(a,b)| a == b), "{:?}", s);
assert!(UnicodeSegmentation::graphemes(s, false)
.zip(gf.iter().cloned())
.all(|(a,b)| a == b));
assert!(UnicodeSegmentation::graphemes(s, true).rev()
.zip(gt.iter().rev().cloned())
.all(|(a,b)| a == b));
assert!(UnicodeSegmentation::graphemes(s, false).rev()
.zip(gf.iter().rev().cloned())
.all(|(a,b)| a == b));
}
let s = "aΜeΜoΜΜ²\r\n";
let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
let b: &[_] = &[(0, "aΜ"), (3, "eΜ"), (6, "oΜΜ²"), (11, "\r\n")];
assert_eq!(gr_inds, b);
let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).rev().collect::<Vec<(usize, &str)>>();
let b: &[_] = &[(11, "\r\n"), (6, "oΜΜ²"), (3, "eΜ"), (0, "aΜ")];
assert_eq!(gr_inds, b);
let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
{
let gr_inds = gr_inds_iter.by_ref();
let e1 = gr_inds.size_hint();
assert_eq!(e1, (1, Some(13)));
let c = gr_inds.count();
assert_eq!(c, 4);
}
let e2 = gr_inds_iter.size_hint();
assert_eq!(e2, (0, Some(0)));
let s = "\n\r\n\r";
let gr = UnicodeSegmentation::graphemes(s, true).rev().collect::<Vec<&str>>();
let b: &[_] = &["\r", "\r\n", "\n"];
assert_eq!(gr, b);
}
#[test]
fn test_words() {
use testdata::TEST_WORD;
const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[
("π¦π«π¦π½π¦π±π©πΏπ¦πΈπ¦π©π¦π΄", &["π¦π«", "π¦π½", "π¦π±", "π©πΏ", "π¦πΈ", "π¦π©", "π¦π΄"]),
("π¦π«π¦π½π¦π±π©πΏπ¦πΈπ¦π©π¦", &["π¦π«", "π¦π½", "π¦π±", "π©πΏ", "π¦πΈ", "π¦π©", "π¦"]),
("π¦aπ«π¦π½aπ¦π±π©πΏπ¦πΈπ¦π©π¦", &["π¦", "a", "π«π¦", "π½", "a", "π¦π±", "π©πΏ", "π¦πΈ", "π¦π©", "π¦"]),
("\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}", &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"]),
("πππΌ", &["π", "ππΌ"]),
("hello world", &["hello", " ", "world"]),
("π¨π¦π¨ππΏπ²πΏ hi", &["π¨π¦", "π¨π", "πΏπ²", "πΏ", " ", "hi"]),
];
for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
macro_rules! assert_ {
($test:expr, $exp:expr, $name:expr) => {
let testing = $test.collect::<Vec<_>>();
let expected = $exp.collect::<Vec<_>>();
assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w)
}
}
assert_!(s.split_word_bounds(),
w.iter().cloned(),
"Forward word boundaries");
assert_!(s.split_word_bounds().rev(),
w.iter().rev().cloned(),
"Reverse word boundaries");
let mut indices = vec![0];
for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| { *t += n; Some(*t) }) {
indices.push(i);
}
indices.pop();
let indices = indices;
assert_!(s.split_word_bound_indices().map(|(l,_)| l),
indices.iter().cloned(),
"Forward word indices");
assert_!(s.split_word_bound_indices().rev().map(|(l,_)| l),
indices.iter().rev().cloned(),
"Reverse word indices");
}
}