1use std::{
2 ffi::{CStr, CString},
3 mem::forget,
4 os::raw::c_char,
5 path::Path,
6 ptr,
7};
8
9#[repr(C)]
10#[derive(PartialEq, Clone, Debug)]
11pub struct TextRange {
12 pub s: usize,
13 pub e: usize,
14}
15
16#[repr(C)]
17#[derive(PartialEq, Clone, Debug)]
18pub struct Wordcut {}
19
20fn wordcut_new_with_dict_path(path: &Path) -> *mut Wordcut {
21 match wordcut_engine::load_dict(path) {
22 Ok(dict) => {
23 let wordcut = wordcut_engine::Wordcut::new(dict);
24 let boxed_wordcut = Box::new(wordcut);
25 Box::into_raw(boxed_wordcut) as *mut Wordcut
26 }
27 Err(e) => {
28 eprintln!("{}", e);
29 return ptr::null::<Wordcut>() as *mut Wordcut;
30 }
31 }
32}
33
34fn wordcut_new_with_dict_and_cluster_rules_path(
35 dict_path: &Path,
36 cluster_rules_path: &Path,
37) -> *mut Wordcut {
38 match wordcut_engine::load_dict(dict_path) {
39 Ok(dict) => match wordcut_engine::load_cluster_rules(cluster_rules_path) {
40 Ok(cluster_re) => {
41 let wordcut = wordcut_engine::Wordcut::new_with_cluster_re(dict, cluster_re);
42 let boxed_wordcut = Box::new(wordcut);
43 Box::into_raw(boxed_wordcut) as *mut Wordcut
44 }
45 Err(e) => {
46 eprintln!("{}", e);
47 return ptr::null::<Wordcut>() as *mut Wordcut;
48 }
49 },
50 Err(e) => {
51 eprintln!("{}", e);
52 return ptr::null::<Wordcut>() as *mut Wordcut;
53 }
54 }
55}
56
57#[no_mangle]
58pub extern "C" fn wordcut_new_with_dict(path: *const c_char) -> *mut Wordcut {
59 let path = unsafe { CStr::from_ptr(path) }.to_str().unwrap();
60 let path = Path::new(path);
61 wordcut_new_with_dict_path(path)
62}
63
64#[no_mangle]
65pub extern "C" fn wordcut_new_with_dict_and_cluster_rules(
66 dict_path: *const c_char,
67 cluster_rules_path: *const c_char,
68) -> *mut Wordcut {
69 let dict_path = unsafe { CStr::from_ptr(dict_path) }.to_str().unwrap();
70 let dict_path = Path::new(dict_path);
71 let cluster_rules_path = unsafe { CStr::from_ptr(cluster_rules_path) }
72 .to_str()
73 .unwrap();
74 let cluster_rules_path = Path::new(cluster_rules_path);
75 wordcut_new_with_dict_and_cluster_rules_path(dict_path, cluster_rules_path)
76}
77
78#[no_mangle]
79pub extern "C" fn delete_wordcut(wordcut: *mut Wordcut) {
80 unsafe {
81 let _ = Box::from_raw(wordcut as *mut wordcut_engine::Wordcut);
82 }
83}
84
85#[no_mangle]
86pub extern "C" fn delete_text_ranges(text_ranges: *mut TextRange, range_count: usize) {
87 unsafe { Vec::from_raw_parts(text_ranges, range_count, range_count) };
88}
89
90#[no_mangle]
91pub extern "C" fn wordcut_into_text_ranges(
92 wordcut: *const Wordcut,
93 text: *const c_char,
94 range_count: *mut usize,
95) -> *mut TextRange {
96 let wordcut: *const wordcut_engine::Wordcut = wordcut as *const wordcut_engine::Wordcut;
97 let text = unsafe { CStr::from_ptr(text) }.to_str().unwrap();
98 let text_ranges = unsafe { (*wordcut).segment(text) };
99 let mut text_ranges: Vec<TextRange> = text_ranges
100 .into_iter()
101 .map(|r| TextRange { s: r.s, e: r.e })
102 .collect();
103 unsafe {
104 *range_count = text_ranges.len();
105 };
106 let p = text_ranges.as_mut_ptr();
107 forget(text_ranges);
108 return p;
109}
110
111#[no_mangle]
112pub extern "C" fn wordcut_into_strings(
113 wordcut: *const Wordcut,
114 text: *const c_char,
115 string_count: *mut usize,
116) -> *mut *mut c_char {
117 let wordcut: *const wordcut_engine::Wordcut = wordcut as *const wordcut_engine::Wordcut;
118 let text = unsafe { CStr::from_ptr(text) }.to_str().unwrap();
119 let strings = unsafe { (*wordcut).segment_into_strings(text) };
120 let mut strings: Vec<*mut c_char> = strings
121 .into_iter()
122 .map(|s| CString::new(s).unwrap().into_raw())
123 .collect();
124 unsafe {
125 *string_count = strings.len();
126 };
127 let p = strings.as_mut_ptr();
128 forget(strings);
129 return p;
130}
131
132#[no_mangle]
133pub extern "C" fn delete_strings(strings: *mut *mut c_char, string_count: usize) {
134 unsafe {
135 let raw_strings = Vec::from_raw_parts(strings, string_count, string_count);
136 raw_strings.into_iter().for_each(drop);
137 };
138}
139
140#[no_mangle]
141pub extern "C" fn wordcut_put_delimiters(
142 wordcut: *const Wordcut,
143 text: *const c_char,
144 delim: *const c_char,
145) -> *mut c_char {
146 let wordcut: *const wordcut_engine::Wordcut = wordcut as *const wordcut_engine::Wordcut;
147 let text = unsafe { CStr::from_ptr(text) }.to_str().unwrap();
148 let delim = unsafe { CStr::from_ptr(delim) }.to_str().unwrap();
149 let segmented_text = unsafe { (*wordcut).put_delimiters(text, delim) };
150 let p = CString::new(segmented_text).unwrap().into_raw();
151 return p;
152}
153
154#[cfg(test)]
155mod tests {
156 use super::*;
157 use std::ffi::CString;
158
159 #[test]
160 fn test_wordcut_into_text_ranges() {
161 let text = CString::new("ลากา").unwrap().into_raw();
162 let wordcut = wordcut_new_with_dict(CString::new("data/thai.txt").unwrap().into_raw());
163 let mut range_count = 0;
164 let text_ranges = wordcut_into_text_ranges(wordcut, text, &mut range_count);
165 assert_eq!(range_count, 2);
166 unsafe {
167 assert_eq!(*text_ranges, TextRange { s: 0, e: 2 });
168 assert_eq!(*text_ranges.offset(1), TextRange { s: 2, e: 4 });
169 }
170 delete_text_ranges(text_ranges, range_count);
171 delete_wordcut(wordcut);
172 }
173
174 #[test]
175 fn test_wordcut_into_strings() {
176 let text = CString::new("ลากา").unwrap().into_raw();
177 let wordcut = wordcut_new_with_dict(CString::new("data/thai.txt").unwrap().into_raw());
178 let mut string_count = 0;
179 let segmented_strings = wordcut_into_strings(wordcut, text, &mut string_count);
180 assert_eq!(string_count, 2);
181 unsafe {
182 let s0 = CStr::from_ptr(*segmented_strings).to_str().unwrap();
183 let s1 = CStr::from_ptr(*segmented_strings.offset(1))
184 .to_str()
185 .unwrap();
186 assert_eq!(s0, "ลา");
187 assert_eq!(s1, "กา");
188 }
189 delete_strings(segmented_strings, string_count);
190 delete_wordcut(wordcut);
191 }
192
193 #[test]
194 fn test_wordcut_put_delimiters() {
195 let text = CString::new("ลากา").unwrap().into_raw();
196 let delim = CString::new("---").unwrap().into_raw();
197 let wordcut = wordcut_new_with_dict(CString::new("data/thai.txt").unwrap().into_raw());
198 let segmented_text = wordcut_put_delimiters(wordcut, text, delim);
199 unsafe {
200 let s = CStr::from_ptr(segmented_text).to_str().unwrap();
201 assert_eq!(s, "ลา---กา");
202 }
203 delete_wordcut(wordcut);
204 }
205
206 #[test]
207 fn test_wordcut_put_delimiters_with_cluster_rules() {
208 let text = CString::new("เมลามา").unwrap().into_raw();
209 let delim = CString::new("---").unwrap().into_raw();
210 let wordcut = wordcut_new_with_dict_and_cluster_rules(
211 CString::new("data/thai.txt").unwrap().into_raw(),
212 CString::new("data/thai_cluster_rules.txt")
213 .unwrap()
214 .into_raw(),
215 );
216 let segmented_text = wordcut_put_delimiters(wordcut, text, delim);
217 unsafe {
218 let s = CStr::from_ptr(segmented_text).to_str().unwrap();
219 assert_eq!(s, "เม---ลา---มา");
220 }
221 delete_wordcut(wordcut);
222 }
223}