use icu_locale_core::langid;
use icu_segmenter::options::{SentenceBreakOptions, WordBreakOptions};
use icu_segmenter::{SentenceSegmenter, WordSegmenter};
#[test]
fn word_break_with_locale() {
let s = "hello:world";
let mut options_sv = WordBreakOptions::default();
let langid = langid!("sv");
options_sv.content_locale = Some(&langid);
let segmenter = WordSegmenter::try_new_auto(options_sv).expect("Loading should succeed!");
let segmenter = segmenter.as_borrowed();
let iter = segmenter.segment_str(s);
assert_eq!(
iter.collect::<Vec<usize>>(),
vec![0, 11],
"word segmenter with Swedish"
);
let mut options_en = WordBreakOptions::default();
let langid = langid!("en");
options_en.content_locale = Some(&langid);
let segmenter = WordSegmenter::try_new_auto(options_en).expect("Loading should succeed!");
let segmenter = segmenter.as_borrowed();
let iter = segmenter.segment_str(s);
assert_eq!(
iter.collect::<Vec<usize>>(),
vec![0, 5, 6, 11],
"word segmenter with English"
);
}
#[test]
fn sentence_break_with_locale() {
let s = "hello; world";
let mut options_el = SentenceBreakOptions::default();
let langid = langid!("el");
options_el.content_locale = Some(&langid);
let segmenter = SentenceSegmenter::try_new(options_el).expect("Loading should succeed!");
let segmenter = segmenter.as_borrowed();
let iter = segmenter.segment_str(s);
assert_eq!(
iter.collect::<Vec<usize>>(),
vec![0, 7, 12],
"sentence segmenter with Greek"
);
let mut options_en = SentenceBreakOptions::default();
let langid = langid!("en");
options_en.content_locale = Some(&langid);
let segmenter = SentenceSegmenter::try_new(options_en).expect("Loading should succeed!");
let segmenter = segmenter.as_borrowed();
let iter = segmenter.segment_str(s);
assert_eq!(
iter.collect::<Vec<usize>>(),
vec![0, 12],
"sentence segmenter with English"
);
}