opencc_rust/
lib.rs

1/*!
2Open Chinese Convert(OpenCC, 開放中文轉換) binding for the Rust language for conversion between Traditional Chinese and Simplified Chinese.
3
4## Compilation
5
6To compile this crate, you need to compile the OpenCC C++ library first. You can install OpenCC in your operating system, or in somewhere in your file system. As for the latter, you need to set the following environment variables to link the OpenCC library:
7
8* `OPENCC_LIB_DIRS`: The directories of library files, like `-L`. Use `:` to separate.
9* `OPENCC_LIBS`: The library names that you want to link, like `-l`. Use `:` to separate. Typically, it contains **opencc:marisa**.
10* `OPENCC_INCLUDE_DIRS`: The directories of header files, like `-i`. Use `:` to separate.
11* `OPENCC_STATIC`: Whether to use `static` or `dylib`.
12* `OPENCC_DYLIB_STDCPP`: If you use `static` linking, and your OpenCC library is compiled by the GNU C, this environment variable should be set.
13
14## Examples
15
16```rust
17use opencc_rust::*;
18
19let opencc = OpenCC::new(DefaultConfig::TW2SP).unwrap();
20
21let s = opencc.convert("涼風有訊");
22
23assert_eq!("凉风有讯", &s);
24
25let s = opencc.convert_to_buffer(",秋月無邊", s);
26
27assert_eq!("凉风有讯,秋月无边", &s);
28```
29
30```rust
31use opencc_rust::*;
32
33let opencc = OpenCC::new(DefaultConfig::S2TWP).unwrap();
34
35let s = opencc.convert("凉风有讯");
36
37assert_eq!("涼風有訊", &s);
38
39let s = opencc.convert_to_buffer(",秋月无边", s);
40
41assert_eq!("涼風有訊,秋月無邊", &s);
42```
43
44## Static Dictionaries
45
46Usually, OpenCC needs to be executed on an environment where OpenCC is installed. If you want to make it portable, you can enable the `static-dictionaries` feature.
47
48```toml
49[dependencies.opencc-rust]
50version = "*"
51features = ["static-dictionaries"]
52```
53Then, the `generate_static_dictionary` and `generate_static_dictionaries` functions are available.
54
55The default OpenCC dictionaries will be compiled into the binary file by `lazy_static_include` crate. And you can use the two functions to recover them on demand.
56
57For example,
58
59```rust,ignore
60use opencc_rust::*;
61
62let output_path = "/path/to/dictionaries-directory";
63
64generate_static_dictionary(&output_path, DefaultConfig::TW2SP).unwrap();
65
66let opencc = OpenCC::new(Path::join(&output_path, DefaultConfig::TW2SP)).unwrap();
67
68assert_eq!("凉风有讯", &opencc.convert("涼風有訊"));
69```
70*/
71
72#[cfg(feature = "static-dictionaries")]
73#[macro_use]
74extern crate lazy_static;
75
76#[cfg(feature = "static-dictionaries")]
77#[macro_use]
78extern crate lazy_static_include;
79
80#[cfg(feature = "static-dictionaries")]
81use std::fs::{self, File};
82#[cfg(feature = "static-dictionaries")]
83use std::io::Write;
84use std::{
85    ffi::{CStr, CString},
86    path::Path,
87};
88
89use libc::{c_char, c_int, c_void, size_t};
90
91#[link(name = "opencc")]
92extern "C" {
93    pub fn opencc_open(config_file_path: *const c_char) -> *mut c_void;
94    pub fn opencc_close(opencc: *mut c_void) -> c_int;
95    pub fn opencc_convert_utf8(
96        opencc: *mut c_void,
97        input: *const c_char,
98        length: size_t,
99    ) -> *mut c_char;
100    pub fn opencc_convert_utf8_to_buffer(
101        opencc: *mut c_void,
102        input: *const c_char,
103        length: size_t,
104        output: *mut c_char,
105    ) -> size_t;
106    pub fn opencc_convert_utf8_free(str: *mut c_char);
107    pub fn opencc_error() -> *const c_char;
108}
109
110#[cfg(feature = "static-dictionaries")]
111struct SD(&'static str, &'static [u8]);
112
113#[cfg(feature = "static-dictionaries")]
114macro_rules! new_sd_instance {
115    ($name:ident, $file_name:expr) => {
116        lazy_static! {
117            static ref $name: SD = {
118                lazy_static_include_bytes! {
119                    RES => ("opencc", $file_name)
120                }
121
122                SD($file_name, &RES)
123            };
124        }
125    };
126}
127
128#[cfg(feature = "static-dictionaries")]
129new_sd_instance!(HK2S_JSON, "hk2s.json");
130#[cfg(feature = "static-dictionaries")]
131new_sd_instance!(HK2T_JSON, "hk2t.json");
132#[cfg(feature = "static-dictionaries")]
133new_sd_instance!(HKVARIANTS_OCD, "HKVariants.ocd2");
134#[cfg(feature = "static-dictionaries")]
135new_sd_instance!(HKVARIANTS_REV_OCD, "HKVariantsRev.ocd2");
136#[cfg(feature = "static-dictionaries")]
137new_sd_instance!(HKVARIANTS_REV_PHRASES_OCD, "HKVariantsRevPhrases.ocd2");
138#[cfg(feature = "static-dictionaries")]
139new_sd_instance!(JP2T_JSON, "jp2t.json");
140#[cfg(feature = "static-dictionaries")]
141new_sd_instance!(JPSHINJITAI_CHARATERS_OCD, "JPShinjitaiCharacters.ocd2");
142#[cfg(feature = "static-dictionaries")]
143new_sd_instance!(JPSHINJITAI_PHRASES_OCD, "JPShinjitaiPhrases.ocd2");
144#[cfg(feature = "static-dictionaries")]
145new_sd_instance!(JPVARIANTS_OCD, "JPVariants.ocd2");
146#[cfg(feature = "static-dictionaries")]
147new_sd_instance!(JPVARIANTS_REV_OCD, "JPVariantsRev.ocd2");
148#[cfg(feature = "static-dictionaries")]
149new_sd_instance!(S2HK_JSON, "s2hk.json");
150#[cfg(feature = "static-dictionaries")]
151new_sd_instance!(S2T_JSON, "s2t.json");
152#[cfg(feature = "static-dictionaries")]
153new_sd_instance!(S2TW_JSON, "s2tw.json");
154#[cfg(feature = "static-dictionaries")]
155new_sd_instance!(S2TWP_JSON, "s2twp.json");
156#[cfg(feature = "static-dictionaries")]
157new_sd_instance!(STCHARACTERS_OCD, "STCharacters.ocd2");
158#[cfg(feature = "static-dictionaries")]
159new_sd_instance!(STPHRASES_OCD, "STPhrases.ocd2");
160#[cfg(feature = "static-dictionaries")]
161new_sd_instance!(T2HK_JSON, "t2hk.json");
162#[cfg(feature = "static-dictionaries")]
163new_sd_instance!(T2JP_JSON, "t2jp.json");
164#[cfg(feature = "static-dictionaries")]
165new_sd_instance!(T2S_JSON, "t2s.json");
166#[cfg(feature = "static-dictionaries")]
167new_sd_instance!(T2TW_JSON, "t2tw.json");
168#[cfg(feature = "static-dictionaries")]
169new_sd_instance!(TSCHARACTERS_OCD, "TSCharacters.ocd2");
170#[cfg(feature = "static-dictionaries")]
171new_sd_instance!(TSPHRASES_OCD, "TSPhrases.ocd2");
172#[cfg(feature = "static-dictionaries")]
173new_sd_instance!(TW2S_JSON, "tw2s.json");
174#[cfg(feature = "static-dictionaries")]
175new_sd_instance!(TW2SP_JSON, "tw2sp.json");
176#[cfg(feature = "static-dictionaries")]
177new_sd_instance!(TW2T_JSON, "tw2t.json");
178#[cfg(feature = "static-dictionaries")]
179new_sd_instance!(TWPHRASES_OCD, "TWPhrases.ocd2");
180#[cfg(feature = "static-dictionaries")]
181new_sd_instance!(TWPHRASES_REV_OCD, "TWPhrasesRev.ocd2");
182#[cfg(feature = "static-dictionaries")]
183new_sd_instance!(TWVARIANTS_OCD, "TWVariants.ocd2");
184#[cfg(feature = "static-dictionaries")]
185new_sd_instance!(TWVARIANTS_REV_OCD, "TWVariantsRev.ocd2");
186#[cfg(feature = "static-dictionaries")]
187new_sd_instance!(TWVARIANTS_REV_PHRASES_OCD, "TWVariantsRevPhrases.ocd2");
188
189/// Default configs.
190#[derive(Debug, Copy, Clone)]
191pub enum DefaultConfig {
192    /// Traditional Chinese (Hong Kong Standard) to Simplified Chinese
193    HK2S,
194    /// Traditional Chinese (Hong Kong Standard) to Traditional Chinese
195    HK2T,
196    /// New Japanese Kanji (Shinjitai) to Traditional Chinese Characters (Kyūjitai)
197    JP2T,
198    /// Simplified Chinese to Traditional Chinese
199    S2T,
200    /// Simplified Chinese to Traditional Chinese (Taiwan Standard)
201    S2TW,
202    /// Simplified Chinese to Traditional Chinese (Taiwan Standard) with Taiwanese idiom
203    S2TWP,
204    /// Traditional Chinese (OpenCC Standard) to Hong Kong Standard
205    T2HK,
206    /// Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai)
207    T2JP,
208    /// Traditional Chinese (OpenCC Standard) to Taiwan Standard
209    T2TW,
210    /// Traditional Chinese to Simplified Chinese
211    T2S,
212    /// Simplified Chinese to Traditional Chinese (Hong Kong Standard)
213    S2HK,
214    /// Traditional Chinese (Taiwan Standard) to Simplified Chinese
215    TW2S,
216    /// Traditional Chinese (Taiwan Standard) to Simplified Chinese with Mainland Chinese idiom
217    TW2SP,
218    /// Traditional Chinese (Taiwan Standard) to Traditional Chinese
219    TW2T,
220}
221
222impl DefaultConfig {
223    /// Get the file name for this default config.
224    pub fn get_file_name(self) -> &'static str {
225        match self {
226            DefaultConfig::HK2S => "hk2s.json",
227            DefaultConfig::HK2T => "hk2t.json",
228            DefaultConfig::JP2T => "jp2t.json",
229            DefaultConfig::S2HK => "s2hk.json",
230            DefaultConfig::S2T => "s2t.json",
231            DefaultConfig::S2TW => "s2tw.json",
232            DefaultConfig::S2TWP => "s2twp.json",
233            DefaultConfig::T2HK => "t2hk.json",
234            DefaultConfig::T2JP => "t2jp.json",
235            DefaultConfig::T2S => "t2s.json",
236            DefaultConfig::T2TW => "t2tw.json",
237            DefaultConfig::TW2S => "tw2s.json",
238            DefaultConfig::TW2SP => "tw2sp.json",
239            DefaultConfig::TW2T => "tw2t.json",
240        }
241    }
242}
243
244impl AsRef<Path> for DefaultConfig {
245    fn as_ref(&self) -> &Path {
246        Path::new(self.get_file_name())
247    }
248}
249
250impl AsRef<str> for DefaultConfig {
251    fn as_ref(&self) -> &str {
252        self.get_file_name()
253    }
254}
255
256/// OpenCC binding for Rust.
257pub struct OpenCC {
258    opencc: *mut c_void,
259}
260
261unsafe impl Send for OpenCC {}
262
263unsafe impl Sync for OpenCC {}
264
265impl OpenCC {
266    /// Create a new OpenCC instance through a file provided by its path.
267    pub fn new<P: AsRef<Path>>(config_file_path: P) -> Result<OpenCC, &'static str> {
268        let config_file_path =
269            CString::new(config_file_path.as_ref().as_os_str().to_str().unwrap()).unwrap();
270
271        let opencc = unsafe { opencc_open(config_file_path.as_ptr()) };
272
273        let v: size_t = opencc as size_t;
274        if v == !0 {
275            return Err("Cannot use this config file path.");
276        }
277
278        Ok(OpenCC {
279            opencc,
280        })
281    }
282
283    /// Convert a string to another string.
284    pub fn convert<S: AsRef<str>>(&self, input: S) -> String {
285        let input = input.as_ref();
286
287        let length = input.len();
288        let input = CString::new(input).unwrap();
289
290        let result_ptr = unsafe { opencc_convert_utf8(self.opencc, input.as_ptr(), length) };
291        let result_cstr = unsafe { CStr::from_ptr(result_ptr) };
292        let result = result_cstr.to_string_lossy().to_string();
293
294        unsafe {
295            opencc_convert_utf8_free(result_ptr);
296        }
297
298        result
299    }
300
301    /// Convert a string to another string and store into a buffer.
302    pub fn convert_to_buffer<S: AsRef<str>>(&self, input: S, output: String) -> String {
303        let input = input.as_ref();
304
305        let length = input.len();
306        let input = CString::new(input).unwrap();
307
308        let mut output = output.into_bytes();
309        let o_len = output.len();
310
311        output.reserve(length * 2);
312
313        let input_ptr = unsafe { output.as_ptr().add(output.len()) as *mut c_char };
314
315        let size = unsafe {
316            opencc_convert_utf8_to_buffer(self.opencc, input.as_ptr(), length, input_ptr)
317        };
318
319        unsafe {
320            output.set_len(o_len + size);
321        }
322
323        unsafe { String::from_utf8_unchecked(output) }
324    }
325}
326
327impl Drop for OpenCC {
328    fn drop(&mut self) {
329        if !self.opencc.is_null() {
330            unsafe {
331                opencc_close(self.opencc);
332            }
333        }
334    }
335}
336
337#[cfg(feature = "static-dictionaries")]
338fn generate_static_dictionary_inner<P: AsRef<Path>>(
339    path: P,
340    config: DefaultConfig,
341) -> Result<(), &'static str> {
342    let path = path.as_ref();
343
344    let mut output_data: Vec<&SD> = Vec::new();
345
346    match config {
347        DefaultConfig::HK2S => {
348            output_data.push(&HK2S_JSON);
349            output_data.push(&TSPHRASES_OCD);
350            output_data.push(&HKVARIANTS_REV_PHRASES_OCD);
351            output_data.push(&HKVARIANTS_REV_OCD);
352            output_data.push(&TSCHARACTERS_OCD);
353        },
354        DefaultConfig::HK2T => {
355            output_data.push(&HK2T_JSON);
356            output_data.push(&HKVARIANTS_REV_PHRASES_OCD);
357            output_data.push(&HKVARIANTS_REV_OCD);
358        },
359        DefaultConfig::JP2T => {
360            output_data.push(&JP2T_JSON);
361            output_data.push(&JPSHINJITAI_PHRASES_OCD);
362            output_data.push(&JPSHINJITAI_CHARATERS_OCD);
363            output_data.push(&JPVARIANTS_REV_OCD);
364        },
365        DefaultConfig::S2HK => {
366            output_data.push(&S2HK_JSON);
367            output_data.push(&STPHRASES_OCD);
368            output_data.push(&STCHARACTERS_OCD);
369            output_data.push(&HKVARIANTS_OCD);
370        },
371        DefaultConfig::S2T => {
372            output_data.push(&S2T_JSON);
373            output_data.push(&STPHRASES_OCD);
374            output_data.push(&STCHARACTERS_OCD);
375        },
376        DefaultConfig::S2TW => {
377            output_data.push(&S2TW_JSON);
378            output_data.push(&STPHRASES_OCD);
379            output_data.push(&STCHARACTERS_OCD);
380            output_data.push(&TWVARIANTS_OCD);
381        },
382        DefaultConfig::S2TWP => {
383            output_data.push(&S2TWP_JSON);
384            output_data.push(&STPHRASES_OCD);
385            output_data.push(&STCHARACTERS_OCD);
386            output_data.push(&TWPHRASES_OCD);
387            output_data.push(&TWVARIANTS_OCD);
388        },
389        DefaultConfig::T2HK => {
390            output_data.push(&T2HK_JSON);
391            output_data.push(&HKVARIANTS_OCD);
392        },
393        DefaultConfig::T2JP => {
394            output_data.push(&T2JP_JSON);
395            output_data.push(&JPVARIANTS_OCD);
396        },
397        DefaultConfig::T2S => {
398            output_data.push(&T2S_JSON);
399            output_data.push(&TSPHRASES_OCD);
400            output_data.push(&TSCHARACTERS_OCD);
401        },
402        DefaultConfig::T2TW => {
403            output_data.push(&T2TW_JSON);
404            output_data.push(&TWVARIANTS_OCD);
405        },
406        DefaultConfig::TW2S => {
407            output_data.push(&TW2S_JSON);
408            output_data.push(&TSPHRASES_OCD);
409            output_data.push(&TWVARIANTS_REV_PHRASES_OCD);
410            output_data.push(&TWVARIANTS_REV_OCD);
411            output_data.push(&TSCHARACTERS_OCD);
412        },
413        DefaultConfig::TW2SP => {
414            output_data.push(&TW2SP_JSON);
415            output_data.push(&TSPHRASES_OCD);
416            output_data.push(&TWPHRASES_REV_OCD);
417            output_data.push(&TWVARIANTS_REV_PHRASES_OCD);
418            output_data.push(&TWVARIANTS_REV_OCD);
419            output_data.push(&TSCHARACTERS_OCD);
420        },
421        DefaultConfig::TW2T => {
422            output_data.push(&TW2T_JSON);
423            output_data.push(&TWVARIANTS_REV_PHRASES_OCD);
424            output_data.push(&TWVARIANTS_REV_OCD);
425        },
426    }
427
428    for data in output_data {
429        let output_path = path.join(data.0);
430
431        if output_path.exists() {
432            if output_path.is_file() {
433                continue;
434            } else {
435                return Err("The dictionary is not correct.");
436            }
437        }
438
439        let mut file = File::create(output_path).map_err(|_| "Cannot create a new file.")?;
440
441        file.write(data.1).map_err(|_| "Cannot write data to a file.")?;
442
443        file.flush().map_err(|_| "Cannot flush file.")?;
444    }
445
446    Ok(())
447}
448
449#[cfg(feature = "static-dictionaries")]
450/// Generate files for a specific dictionary. These files are used for opening a new OpenCC instance.
451pub fn generate_static_dictionary<P: AsRef<Path>>(
452    path: P,
453    config: DefaultConfig,
454) -> Result<(), &'static str> {
455    let path = path.as_ref();
456
457    if path.exists() {
458        if !path.is_dir() {
459            return Err("The path of static dictionaries needs to be a directory.");
460        }
461    } else {
462        match fs::create_dir_all(path) {
463            Ok(_) => (),
464            Err(_) => return Err("Cannot create new directories."),
465        }
466    }
467
468    generate_static_dictionary_inner(path, config)
469}
470
471#[cfg(feature = "static-dictionaries")]
472/// Generate files for specific dictionaries. These files are used for opening a new OpenCC instance.
473pub fn generate_static_dictionaries<P: AsRef<Path>>(
474    path: P,
475    configs: &[DefaultConfig],
476) -> Result<(), &'static str> {
477    let path = path.as_ref();
478
479    if path.exists() {
480        if !path.is_dir() {
481            return Err("The path of static dictionaries needs to be a directory.");
482        }
483    } else {
484        match fs::create_dir_all(path) {
485            Ok(_) => (),
486            Err(_) => return Err("Cannot create new directories."),
487        }
488    }
489
490    for config in configs.iter().copied() {
491        generate_static_dictionary_inner(path, config)?
492    }
493
494    Ok(())
495}