wordcutw/
lib.rs

1use std::{
2    ffi::{CStr, CString},
3    mem::forget,
4    os::raw::c_char,
5    path::Path,
6    ptr,
7};
8
9#[repr(C)]
10#[derive(PartialEq, Clone, Debug)]
11pub struct TextRange {
12    pub s: usize,
13    pub e: usize,
14}
15
16#[repr(C)]
17#[derive(PartialEq, Clone, Debug)]
18pub struct Wordcut {}
19
20fn wordcut_new_with_dict_path(path: &Path) -> *mut Wordcut {
21    match wordcut_engine::load_dict(path) {
22        Ok(dict) => {
23            let wordcut = wordcut_engine::Wordcut::new(dict);
24            let boxed_wordcut = Box::new(wordcut);
25            Box::into_raw(boxed_wordcut) as *mut Wordcut
26        }
27        Err(e) => {
28            eprintln!("{}", e);
29            return ptr::null::<Wordcut>() as *mut Wordcut;
30        }
31    }
32}
33
34fn wordcut_new_with_dict_and_cluster_rules_path(
35    dict_path: &Path,
36    cluster_rules_path: &Path,
37) -> *mut Wordcut {
38    match wordcut_engine::load_dict(dict_path) {
39        Ok(dict) => match wordcut_engine::load_cluster_rules(cluster_rules_path) {
40            Ok(cluster_re) => {
41                let wordcut = wordcut_engine::Wordcut::new_with_cluster_re(dict, cluster_re);
42                let boxed_wordcut = Box::new(wordcut);
43                Box::into_raw(boxed_wordcut) as *mut Wordcut
44            }
45            Err(e) => {
46                eprintln!("{}", e);
47                return ptr::null::<Wordcut>() as *mut Wordcut;
48            }
49        },
50        Err(e) => {
51            eprintln!("{}", e);
52            return ptr::null::<Wordcut>() as *mut Wordcut;
53        }
54    }
55}
56
57#[no_mangle]
58pub extern "C" fn wordcut_new_with_dict(path: *const c_char) -> *mut Wordcut {
59    let path = unsafe { CStr::from_ptr(path) }.to_str().unwrap();
60    let path = Path::new(path);
61    wordcut_new_with_dict_path(path)
62}
63
64#[no_mangle]
65pub extern "C" fn wordcut_new_with_dict_and_cluster_rules(
66    dict_path: *const c_char,
67    cluster_rules_path: *const c_char,
68) -> *mut Wordcut {
69    let dict_path = unsafe { CStr::from_ptr(dict_path) }.to_str().unwrap();
70    let dict_path = Path::new(dict_path);
71    let cluster_rules_path = unsafe { CStr::from_ptr(cluster_rules_path) }
72        .to_str()
73        .unwrap();
74    let cluster_rules_path = Path::new(cluster_rules_path);
75    wordcut_new_with_dict_and_cluster_rules_path(dict_path, cluster_rules_path)
76}
77
78#[no_mangle]
79pub extern "C" fn delete_wordcut(wordcut: *mut Wordcut) {
80    unsafe {
81        let _ = Box::from_raw(wordcut as *mut wordcut_engine::Wordcut);
82    }
83}
84
85#[no_mangle]
86pub extern "C" fn delete_text_ranges(text_ranges: *mut TextRange, range_count: usize) {
87    unsafe { Vec::from_raw_parts(text_ranges, range_count, range_count) };
88}
89
90#[no_mangle]
91pub extern "C" fn wordcut_into_text_ranges(
92    wordcut: *const Wordcut,
93    text: *const c_char,
94    range_count: *mut usize,
95) -> *mut TextRange {
96    let wordcut: *const wordcut_engine::Wordcut = wordcut as *const wordcut_engine::Wordcut;
97    let text = unsafe { CStr::from_ptr(text) }.to_str().unwrap();
98    let text_ranges = unsafe { (*wordcut).segment(text) };
99    let mut text_ranges: Vec<TextRange> = text_ranges
100        .into_iter()
101        .map(|r| TextRange { s: r.s, e: r.e })
102        .collect();
103    unsafe {
104        *range_count = text_ranges.len();
105    };
106    let p = text_ranges.as_mut_ptr();
107    forget(text_ranges);
108    return p;
109}
110
111#[no_mangle]
112pub extern "C" fn wordcut_into_strings(
113    wordcut: *const Wordcut,
114    text: *const c_char,
115    string_count: *mut usize,
116) -> *mut *mut c_char {
117    let wordcut: *const wordcut_engine::Wordcut = wordcut as *const wordcut_engine::Wordcut;
118    let text = unsafe { CStr::from_ptr(text) }.to_str().unwrap();
119    let strings = unsafe { (*wordcut).segment_into_strings(text) };
120    let mut strings: Vec<*mut c_char> = strings
121        .into_iter()
122        .map(|s| CString::new(s).unwrap().into_raw())
123        .collect();
124    unsafe {
125        *string_count = strings.len();
126    };
127    let p = strings.as_mut_ptr();
128    forget(strings);
129    return p;
130}
131
132#[no_mangle]
133pub extern "C" fn delete_strings(strings: *mut *mut c_char, string_count: usize) {
134    unsafe {
135        let raw_strings = Vec::from_raw_parts(strings, string_count, string_count);
136        raw_strings.into_iter().for_each(drop);
137    };
138}
139
140#[no_mangle]
141pub extern "C" fn wordcut_put_delimiters(
142    wordcut: *const Wordcut,
143    text: *const c_char,
144    delim: *const c_char,
145) -> *mut c_char {
146    let wordcut: *const wordcut_engine::Wordcut = wordcut as *const wordcut_engine::Wordcut;
147    let text = unsafe { CStr::from_ptr(text) }.to_str().unwrap();
148    let delim = unsafe { CStr::from_ptr(delim) }.to_str().unwrap();
149    let segmented_text = unsafe { (*wordcut).put_delimiters(text, delim) };
150    let p = CString::new(segmented_text).unwrap().into_raw();
151    return p;
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157    use std::ffi::CString;
158
159    #[test]
160    fn test_wordcut_into_text_ranges() {
161        let text = CString::new("ลากา").unwrap().into_raw();
162        let wordcut = wordcut_new_with_dict(CString::new("data/thai.txt").unwrap().into_raw());
163        let mut range_count = 0;
164        let text_ranges = wordcut_into_text_ranges(wordcut, text, &mut range_count);
165        assert_eq!(range_count, 2);
166        unsafe {
167            assert_eq!(*text_ranges, TextRange { s: 0, e: 2 });
168            assert_eq!(*text_ranges.offset(1), TextRange { s: 2, e: 4 });
169        }
170        delete_text_ranges(text_ranges, range_count);
171        delete_wordcut(wordcut);
172    }
173
174    #[test]
175    fn test_wordcut_into_strings() {
176        let text = CString::new("ลากา").unwrap().into_raw();
177        let wordcut = wordcut_new_with_dict(CString::new("data/thai.txt").unwrap().into_raw());
178        let mut string_count = 0;
179        let segmented_strings = wordcut_into_strings(wordcut, text, &mut string_count);
180        assert_eq!(string_count, 2);
181        unsafe {
182            let s0 = CStr::from_ptr(*segmented_strings).to_str().unwrap();
183            let s1 = CStr::from_ptr(*segmented_strings.offset(1))
184                .to_str()
185                .unwrap();
186            assert_eq!(s0, "ลา");
187            assert_eq!(s1, "กา");
188        }
189        delete_strings(segmented_strings, string_count);
190        delete_wordcut(wordcut);
191    }
192
193    #[test]
194    fn test_wordcut_put_delimiters() {
195        let text = CString::new("ลากา").unwrap().into_raw();
196        let delim = CString::new("---").unwrap().into_raw();
197        let wordcut = wordcut_new_with_dict(CString::new("data/thai.txt").unwrap().into_raw());
198        let segmented_text = wordcut_put_delimiters(wordcut, text, delim);
199        unsafe {
200            let s = CStr::from_ptr(segmented_text).to_str().unwrap();
201            assert_eq!(s, "ลา---กา");
202        }
203        delete_wordcut(wordcut);
204    }
205
206    #[test]
207    fn test_wordcut_put_delimiters_with_cluster_rules() {
208        let text = CString::new("เมลามา").unwrap().into_raw();
209        let delim = CString::new("---").unwrap().into_raw();
210        let wordcut = wordcut_new_with_dict_and_cluster_rules(
211            CString::new("data/thai.txt").unwrap().into_raw(),
212            CString::new("data/thai_cluster_rules.txt")
213                .unwrap()
214                .into_raw(),
215        );
216        let segmented_text = wordcut_put_delimiters(wordcut, text, delim);
217        unsafe {
218            let s = CStr::from_ptr(segmented_text).to_str().unwrap();
219            assert_eq!(s, "เม---ลา---มา");
220        }
221        delete_wordcut(wordcut);
222    }
223}