#![allow(dead_code)]
use proptest::prelude::*;
pub fn chars_in_range(start: u32, end: u32) -> BoxedStrategy<String> {
let chars: Vec<char> = (start..=end).filter_map(char::from_u32).collect();
proptest::collection::vec(proptest::sample::select(chars), 1..=30)
.prop_map(|v| v.into_iter().collect::<String>())
.boxed()
}
pub fn devanagari_text() -> BoxedStrategy<String> {
chars_in_range(0x0900, 0x097F)
}
pub fn bengali_text() -> BoxedStrategy<String> {
chars_in_range(0x0980, 0x09FF)
}
pub fn tamil_text() -> BoxedStrategy<String> {
chars_in_range(0x0B80, 0x0BFF)
}
pub fn any_indic_text() -> BoxedStrategy<String> {
chars_in_range(0x0900, 0x0DFF)
}
pub fn hebrew_text() -> BoxedStrategy<String> {
chars_in_range(0x0590, 0x05FF)
}
pub fn hebrew_presentation_text() -> BoxedStrategy<String> {
chars_in_range(0xFB1D, 0xFB4F)
}
pub fn sinhala_text() -> BoxedStrategy<String> {
chars_in_range(0x0D80, 0x0DFF)
}
pub fn georgian_text() -> BoxedStrategy<String> {
chars_in_range(0x10D0, 0x10F0)
}
pub fn armenian_text() -> BoxedStrategy<String> {
chars_in_range(0x0531, 0x0587)
}
pub fn thai_text() -> BoxedStrategy<String> {
chars_in_range(0x0E01, 0x0E4B)
}
pub fn thai_consonants_strat() -> BoxedStrategy<String> {
chars_in_range(0x0E01, 0x0E2E)
}
pub fn thai_digits_strat() -> BoxedStrategy<String> {
chars_in_range(0x0E50, 0x0E59)
}
pub fn lao_text() -> BoxedStrategy<String> {
chars_in_range(0x0E81, 0x0ECD)
}
pub fn any_tai_text() -> BoxedStrategy<String> {
proptest::strategy::Union::new(vec![thai_text(), lao_text()]).boxed()
}
pub fn extended_latin_text() -> BoxedStrategy<String> {
chars_in_range(0x00C0, 0x024F)
}
pub fn cyrillic_text() -> BoxedStrategy<String> {
chars_in_range(0x0400, 0x04FF)
}
pub fn cjk_text() -> BoxedStrategy<String> {
chars_in_range(0x4E00, 0x50FF)
}
pub fn hangul_text() -> BoxedStrategy<String> {
chars_in_range(0xAC00, 0xACFF)
}
pub fn devanagari_consonants() -> BoxedStrategy<String> {
chars_in_range(0x0915, 0x0939)
}
pub fn ethiopic_text() -> BoxedStrategy<String> {
let chars: Vec<char> = (0x1200u32..=0x1357)
.filter(|cp| {
let block_offset = cp & 0x07;
block_offset < 7 })
.filter_map(char::from_u32)
.collect();
proptest::collection::vec(proptest::sample::select(chars), 1..=20)
.prop_map(|v| v.into_iter().collect::<String>())
.boxed()
}
pub fn myanmar_text() -> BoxedStrategy<String> {
let chars: Vec<char> = (0x1000u32..=0x104B).filter_map(char::from_u32).collect();
proptest::collection::vec(proptest::sample::select(chars), 1..=20)
.prop_map(|v| v.into_iter().collect::<String>())
.boxed()
}
pub fn khmer_text() -> BoxedStrategy<String> {
let chars: Vec<char> = (0x1780u32..=0x17E9).filter_map(char::from_u32).collect();
proptest::collection::vec(proptest::sample::select(chars), 1..=20)
.prop_map(|v| v.into_iter().collect::<String>())
.boxed()
}
pub fn tibetan_text() -> BoxedStrategy<String> {
let chars: Vec<char> = (0x0F00u32..=0x0F6A)
.chain(0x0F71..=0x0F84)
.chain(0x0F90..=0x0FBC)
.filter_map(char::from_u32)
.filter(|c| c.is_alphanumeric() || !c.is_control())
.collect();
proptest::collection::vec(proptest::sample::select(chars), 1..=20)
.prop_map(|v| v.into_iter().collect::<String>())
.boxed()
}
pub fn arabic_text() -> BoxedStrategy<String> {
chars_in_range(0x0621, 0x064A)
}
pub fn arabic_presentation_text() -> BoxedStrategy<String> {
chars_in_range(0xFE70, 0xFEFC)
}
pub fn syriac_text() -> BoxedStrategy<String> {
chars_in_range(0x0710, 0x073F)
}
pub fn thaana_text() -> BoxedStrategy<String> {
chars_in_range(0x0780, 0x07B0)
}
pub fn nko_text() -> BoxedStrategy<String> {
chars_in_range(0x07C0, 0x07E7)
}
pub fn coptic_text() -> BoxedStrategy<String> {
chars_in_range(0x2C80, 0x2CC1)
}
pub fn cherokee_text() -> BoxedStrategy<String> {
chars_in_range(0x13A0, 0x13F5)
}
pub fn canadian_text() -> BoxedStrategy<String> {
chars_in_range(0x1401, 0x1676)
}
pub fn vai_text() -> BoxedStrategy<String> {
chars_in_range(0xA500, 0xA62B)
}
pub fn mongolian_text() -> BoxedStrategy<String> {
chars_in_range(0x1820, 0x1878)
}
pub fn runic_text() -> BoxedStrategy<String> {
chars_in_range(0x16A0, 0x16EA)
}
pub fn ogham_text() -> BoxedStrategy<String> {
chars_in_range(0x1681, 0x169A)
}
pub fn balinese_text() -> BoxedStrategy<String> {
chars_in_range(0x1B05, 0x1B44)
}
pub fn balinese_consonants() -> BoxedStrategy<String> {
chars_in_range(0x1B13, 0x1B33)
}
pub fn javanese_text() -> BoxedStrategy<String> {
chars_in_range(0xA984, 0xA9C0)
}
pub fn javanese_consonants() -> BoxedStrategy<String> {
chars_in_range(0xA990, 0xA9B2)
}
pub fn tai_le_text() -> BoxedStrategy<String> {
chars_in_range(0x1950, 0x196D)
}
pub fn new_tai_lue_text() -> BoxedStrategy<String> {
chars_in_range(0x1980, 0x19C9)
}
fn scheme_of(strict_iso9: bool, gost7034: bool) -> disarm::api::Scheme {
use disarm::api::Scheme;
if strict_iso9 {
Scheme::StrictIso9
} else if gost7034 {
Scheme::GostR7034
} else {
Scheme::Default
}
}
fn on_unknown_of(error_mode: disarm::ErrorMode, replacement: &str) -> disarm::api::OnUnknown {
use disarm::api::OnUnknown;
match error_mode {
disarm::ErrorMode::Replace => OnUnknown::Replace(replacement.to_owned()),
disarm::ErrorMode::Ignore => OnUnknown::Ignore,
disarm::ErrorMode::Preserve => OnUnknown::Preserve,
}
}
pub fn transliterate<'a>(
text: &'a str,
lang: Option<&str>,
error_mode: disarm::ErrorMode,
replacement: &str,
tones: bool,
strict_iso9: bool,
gost7034: bool,
) -> std::borrow::Cow<'a, str> {
let mut b = disarm::api::Transliterate::new()
.scheme(scheme_of(strict_iso9, gost7034))
.on_unknown(on_unknown_of(error_mode, replacement))
.tones(tones);
if let Some(l) = lang {
b = b.lang(l);
}
b.run(text)
}
pub fn find_untranslatable(
text: &str,
lang: Option<&str>,
tones: bool,
strict_iso9: bool,
gost7034: bool,
) -> Vec<(char, usize)> {
let mut b = disarm::api::Transliterate::new()
.scheme(scheme_of(strict_iso9, gost7034))
.tones(tones);
if let Some(l) = lang {
b = b.lang(l);
}
b.find_untranslatable(text)
.into_iter()
.map(|u| (u.ch, u.offset))
.collect()
}