use uniworld::segment::{
grapheme_boundaries, grapheme_cluster_boundaries, sentence_boundaries, word_boundaries,
};
#[test]
fn grapheme_boundaries_empty() {
let b = grapheme_boundaries("");
assert!(b.is_empty());
}
#[test]
fn grapheme_boundaries_ascii() {
let b = grapheme_boundaries("abc");
assert_eq!(b, [0, 1, 2]);
}
#[test]
fn grapheme_cluster_boundaries_iterator() {
let s = "ab";
let b: Vec<usize> = grapheme_cluster_boundaries(s).collect();
assert_eq!(b, [0, 1]);
}
#[test]
fn grapheme_base_plus_extend() {
let s = "e\u{0301}";
let b = grapheme_boundaries(s);
assert_eq!(b, [0], "e + acute should be one cluster");
}
#[test]
fn grapheme_crlf() {
let s = "a\r\nb";
let b = grapheme_boundaries(s);
assert_eq!(b, [0, 1, 3], "a, CRLF, b");
}
#[test]
fn grapheme_regional_indicator_pair() {
let s = "\u{1F1E6}\u{1F1E8}"; let b = grapheme_boundaries(s);
assert_eq!(b, [0], "two RIs = one cluster");
}
#[test]
fn grapheme_four_regional_indicators() {
let s = "\u{1F1E6}\u{1F1E8}\u{1F1E6}\u{1F1E8}";
let b = grapheme_boundaries(s);
assert_eq!(b, [0, 8], "four RIs = two clusters");
}
#[test]
fn word_boundaries_empty() {
let b = word_boundaries("", None);
assert_eq!(b, [0]);
}
#[test]
fn word_boundaries_hello_world() {
let b = word_boundaries("hello world", None);
assert_eq!(b, [0, 5, 6], "hello | space | world");
}
#[test]
fn word_boundaries_single_word() {
let b = word_boundaries("hello", None);
assert_eq!(b, [0]);
}
#[test]
fn sentence_boundaries_empty() {
let b = sentence_boundaries("", None);
assert_eq!(b, [0]);
}
#[test]
fn sentence_boundaries_single_sentence() {
let b = sentence_boundaries("Hello world", None);
assert_eq!(b, [0], "no sentence break without terminator");
}
#[test]
fn sentence_boundaries_period_uppercase() {
let b = sentence_boundaries("Hello. World", None);
assert_eq!(b, [0, 7], "Hello. | World");
}
#[test]
fn sentence_boundaries_abbreviation() {
let b = sentence_boundaries("U.S.", None);
assert_eq!(b, [0], "abbreviation should be one sentence");
}
#[test]
fn sentence_boundaries_crlf() {
let b = sentence_boundaries("a.\r\nb", None);
assert_eq!(b, [0, 4], "a.CRLF | b");
}
#[test]
fn sentence_boundaries_exclamation() {
let b = sentence_boundaries("Wow! Great.", None);
assert_eq!(b, [0, 5], "Wow! | Great.");
}