1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
use {
rust_icu_common as common,
rust_icu_sys as sys,
rust_icu_sys::versioned_function,
rust_icu_sys::*,
rust_icu_ustring as ustring,
rust_icu_ustring::buffered_uchar_method_with_retry,
};
use std::convert::{TryFrom, TryInto};
#[derive(Debug)]
pub struct UNormalizer {
rep: std::ptr::NonNull<sys::UNormalizer2>,
owned: bool,
}
impl Drop for UNormalizer {
fn drop(&mut self) {
if !self.owned {
return
}
unsafe {
versioned_function!(unorm2_close)(self.rep.as_ptr())
}
}
}
impl UNormalizer {
pub fn new_nfc() -> Result<Self, common::Error> {
unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFCInstance)) }
}
pub fn new_nfd() -> Result<Self, common::Error> {
unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFDInstance)) }
}
pub fn new_nfkc() -> Result<Self, common::Error> {
unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKCInstance)) }
}
pub fn new_nfkd() -> Result<Self, common::Error> {
unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKDInstance)) }
}
pub fn new_nfkc_casefold() -> Result<Self, common::Error> {
unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKCCasefoldInstance)) }
}
unsafe fn new_normalizer_unowned(
constrfn: unsafe extern "C" fn(*mut sys::UErrorCode) -> *const sys::UNormalizer2) -> Result<Self, common::Error> {
let mut status = common::Error::OK_CODE;
let rep = {
assert!(common::Error::is_ok(status));
let ptr = constrfn(&mut status) as *mut sys::UNormalizer2;
std::ptr::NonNull::new_unchecked(ptr)
};
common::Error::ok_or_warning(status)?;
Ok(UNormalizer{ rep, owned: false })
}
pub fn normalize(&self, norm: &str) -> Result<String, common::Error> {
let norm = ustring::UChar::try_from(norm)?;
let result = self.normalize_ustring(&norm)?;
String::try_from(&result)
}
pub fn normalize_ustring(
&self,
norm: &ustring::UChar
) -> Result<ustring::UChar, common::Error> {
const CAPACITY: usize = 200;
buffered_uchar_method_with_retry!(
norm_uchar,
CAPACITY,
[ptr: *const sys::UNormalizer2, s: *const sys::UChar, l: i32,],
[]
);
let result = norm_uchar(
versioned_function!(unorm2_normalize),
self.rep.as_ptr(),
norm.as_c_ptr(),
norm.len() as i32,
)?;
Ok(result)
}
pub fn compose_pair(&self, point1: sys::UChar32, point2: sys::UChar32) -> sys::UChar32 {
let result: sys::UChar32 = unsafe {
versioned_function!(unorm2_composePair)(
self.rep.as_ptr(), point1, point2)
};
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compose_pair_nfkc() -> Result<(), common::Error> {
struct Test {
p1: sys::UChar32,
p2: sys::UChar32,
ex: sys::UChar32,
}
let tests = vec![
Test {p1: 1, p2: 0, ex: -1, },
Test {p2: 0x300, p1: 'A' as sys::UChar32, ex: 'À' as sys::UChar32 },
Test {p2: 0x301, p1: 'A' as sys::UChar32, ex: 'Á' as sys::UChar32 },
Test {p2: 0x302, p1: 'A' as sys::UChar32, ex: 'Â' as sys::UChar32 },
Test {p2: 0x303, p1: 'A' as sys::UChar32, ex: 'Ã' as sys::UChar32 },
];
for t in tests {
let n = UNormalizer::new_nfkc()?;
let result = n.compose_pair(t.p1, t.p2);
assert_eq!(result, t.ex);
}
Ok(())
}
}