use {
rust_icu_common::{self as common, simple_drop_impl},
rust_icu_sys::{self as sys, *},
rust_icu_uenum as uenum, rust_icu_ustring as ustring,
std::{convert::TryFrom, ptr, slice},
};
#[derive(Debug)]
pub struct UTransliterator {
rep: ptr::NonNull<sys::UTransliterator>,
}
simple_drop_impl!(UTransliterator, utrans_close);
impl Clone for UTransliterator {
fn clone(&self) -> Self {
UTransliterator {
rep: self.rep.clone(),
}
}
}
impl UTransliterator {
pub fn get_ids() -> Result<uenum::Enumeration, common::Error> {
let mut status = common::Error::OK_CODE;
let rep = unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_openIDs)(&mut status)
};
common::Error::ok_or_warning(status)?;
assert_ne!(rep, 0 as *mut sys::UEnumeration);
let ids = unsafe { uenum::Enumeration::from_raw_parts(None, rep) };
Ok(ids)
}
pub fn register(trans: Self) -> Result<(), common::Error> {
let mut status = common::Error::OK_CODE;
unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_register)(
trans.rep.as_ptr(),
&mut status,
)
}
common::Error::ok_or_warning(status)?;
std::mem::forget(trans);
Ok(())
}
pub fn new(
id: &str,
rules: Option<&str>,
dir: sys::UTransDirection,
) -> Result<Self, common::Error> {
let id = ustring::UChar::try_from(id)?;
let rules = match rules {
Some(s) => Some(ustring::UChar::try_from(s)?),
None => None,
};
Self::new_ustring(&id, rules.as_ref(), dir)
}
pub fn new_ustring(
id: &ustring::UChar,
rules: Option<&ustring::UChar>,
dir: sys::UTransDirection,
) -> Result<Self, common::Error> {
let mut status = common::Error::OK_CODE;
let mut parse_status = common::NO_PARSE_ERROR.clone();
let rep = unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_openU)(
id.as_c_ptr(),
id.len() as i32,
dir,
rules.map_or(0 as *const sys::UChar, |r| r.as_c_ptr()),
rules.as_ref().map_or(0, |r| r.len()) as i32,
&mut parse_status,
&mut status,
)
};
common::Error::ok_or_warning(status)?;
common::parse_ok(parse_status)?;
assert_ne!(rep, 0 as *mut sys::UTransliterator);
Ok(Self {
rep: ptr::NonNull::new(rep).unwrap(),
})
}
pub fn get_id(&self) -> Result<String, common::Error> {
let mut id_len: i32 = 0;
let rep = unsafe {
versioned_function!(utrans_getUnicodeID)(
self.rep.as_ptr(),
&mut id_len,
)
};
assert_ne!(rep, 0 as *const sys::UChar);
let id_buf =
unsafe { slice::from_raw_parts(rep, id_len as usize) }.to_vec();
let id = ustring::UChar::from(id_buf);
String::try_from(&id)
}
pub fn inverse(&self) -> Result<Self, common::Error> {
let mut status = common::Error::OK_CODE;
let rep = unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_openInverse)(
self.rep.as_ptr(),
&mut status,
)
};
common::Error::ok_or_warning(status)?;
assert_ne!(rep, 0 as *mut sys::UTransliterator);
Ok(Self {
rep: ptr::NonNull::new(rep).unwrap(),
})
}
pub fn to_rules(
&self,
escape_unprintable: bool,
) -> Result<String, common::Error> {
let mut status = common::Error::OK_CODE;
let rules_len = unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_toRules)(
self.rep.as_ptr(),
escape_unprintable as sys::UBool,
0 as *mut sys::UChar,
0,
&mut status,
)
};
common::Error::ok_preflight(status)?;
let mut status = common::Error::OK_CODE;
let mut rules: Vec<sys::UChar> = vec![0; rules_len as usize];
unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_toRules)(
self.rep.as_ptr(),
escape_unprintable as sys::UBool,
rules.as_mut_ptr(),
rules_len,
&mut status,
);
}
common::Error::ok_or_warning(status)?;
let rules = ustring::UChar::from(rules);
String::try_from(&rules)
}
pub fn set_filter(
&mut self,
pattern: Option<&str>,
) -> Result<(), common::Error> {
let pattern = match pattern {
Some(s) => Some(ustring::UChar::try_from(s)?),
None => None,
};
self.set_filter_ustring(pattern.as_ref())
}
pub fn set_filter_ustring(
&mut self,
pattern: Option<&ustring::UChar>,
) -> Result<(), common::Error> {
let mut status = common::Error::OK_CODE;
unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_setFilter)(
self.rep.as_ptr(),
pattern.map_or(0 as *const sys::UChar, |p| p.as_c_ptr()),
pattern.as_ref().map_or(0, |p| p.len()) as i32,
&mut status,
)
}
common::Error::ok_or_warning(status)
}
pub fn transliterate(&self, text: &str) -> Result<String, common::Error> {
let text = ustring::UChar::try_from(text)?;
let trans_text = self.transliterate_ustring(&text)?;
String::try_from(&trans_text)
}
pub fn transliterate_ustring(
&self,
text: &ustring::UChar,
) -> Result<ustring::UChar, common::Error> {
let start: i32 = 0;
let text_len = text.len() as i32;
let mut trans_text = text.clone();
let mut trans_text_len = text_len;
let mut limit = text_len;
let mut status = common::Error::OK_CODE;
unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_transUChars)(
self.rep.as_ptr(),
trans_text.as_mut_c_ptr(),
&mut trans_text_len,
text_len,
start,
&mut limit,
&mut status,
)
}
common::Error::ok_preflight(status)?;
if trans_text_len > text_len {
trans_text = text.clone();
trans_text.resize(trans_text_len as usize);
limit = text_len;
let mut status = common::Error::OK_CODE;
let mut length = text_len;
unsafe {
assert!(common::Error::is_ok(status));
versioned_function!(utrans_transUChars)(
self.rep.as_ptr(),
trans_text.as_mut_c_ptr(),
&mut length,
trans_text_len as i32,
start,
&mut limit,
&mut status,
)
}
common::Error::ok_or_warning(status)?;
}
if trans_text.len() > limit as usize {
trans_text.resize(limit as usize);
}
Ok(trans_text)
}
pub fn unregister(id: &str) -> Result<(), common::Error> {
let id = ustring::UChar::try_from(id)?;
unsafe {
versioned_function!(utrans_unregisterID)(
id.as_c_ptr(),
id.len() as i32,
)
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::sys;
use super::UTransliterator;
use log::trace;
const DIR_FWD: sys::UTransDirection = sys::UTransDirection::UTRANS_FORWARD;
const DIR_REV: sys::UTransDirection = sys::UTransDirection::UTRANS_REVERSE;
#[test]
fn test_builtin() {
trace!("Available IDs");
let ids = UTransliterator::get_ids().unwrap().map(|r| r.unwrap());
for id in ids {
trace!(" {}", id);
}
let id = "NFC;Cyrillic-Latin;Latin-ASCII";
let trans = UTransliterator::new(id, None, DIR_FWD).unwrap();
assert_eq!(trans.get_id().unwrap(), id);
let text = "\u{0446}a\u{0308}fe\u{0301}";
assert_eq!(text.chars().count(), 6);
assert_eq!(trans.transliterate(text).unwrap(), "cafe");
}
#[test]
fn test_inverse() {
let trans =
UTransliterator::new("Latin-ASCII", None, DIR_FWD).unwrap();
let inverse = trans.inverse().unwrap();
assert_eq!(inverse.get_id().unwrap(), "ASCII-Latin");
}
#[test]
fn test_rules_based() {
let rules = "a <> xyz;";
let fwd_trans = UTransliterator::new("MyA-MyXYZ", Some(rules), DIR_FWD)
.unwrap();
assert_eq!(fwd_trans.transliterate("abc").unwrap(), "xyzbc");
let rev_trans = UTransliterator::new("MyXYZ-MyA", Some(rules), DIR_REV)
.unwrap();
assert_eq!(rev_trans.transliterate("xyzbc").unwrap(), "abc");
}
#[test]
fn test_to_rules() {
let id = "MyA-MyXYZ";
let rules = "a > xyz;";
let trans = UTransliterator::new(id, Some(rules), DIR_FWD).unwrap();
assert_eq!(trans.to_rules(false).unwrap(), rules);
}
#[test]
fn test_set_filter() {
let id = "MyABC-MyXYZ";
let rules = "{a}bc > x; x{b}c > y; xy{c} > z;";
let mut trans = UTransliterator::new(id, Some(rules), DIR_FWD).unwrap();
trans.set_filter(Some("[ac]")).unwrap();
assert_eq!(trans.transliterate("abc").unwrap(), "xbc");
trans.set_filter(None).unwrap();
assert_eq!(trans.transliterate("abc").unwrap(), "xyz");
}
#[test]
fn test_register_unregister() {
let count_available = || UTransliterator::get_ids().unwrap().count();
let initial_count = count_available();
let id = "MyA-MyXYZ";
let rules = "a > xyz;";
let trans = UTransliterator::new(&id, Some(&rules), DIR_FWD).unwrap();
UTransliterator::register(trans).unwrap();
assert_eq!(count_available(), initial_count + 1);
UTransliterator::unregister(id).unwrap();
assert_eq!(count_available(), initial_count);
}
}