opencc_rs/
lib.rs

1//! OpenCC bindings for Rust
2
3use std::ffi::{CStr, CString, c_void};
4use std::{fs, io};
5
6use libc::uintptr_t;
7use thiserror::Error;
8
9/// OpenCC bindings for Rust
10pub struct OpenCC {
11    openccs: Vec<*mut c_void>,
12}
13
14impl OpenCC {
15    /// Create a new OpenCC instance with the given configuration
16    pub fn new<T>(configs: T) -> Result<OpenCC, Error>
17    where
18        T: AsRef<[Config]>,
19    {
20        let configs = configs.as_ref();
21        assert!(!configs.is_empty());
22
23        let mut openccs = Vec::new();
24
25        for config in configs {
26            let config_data = config.get_data();
27            let dir = tempfile::tempdir()?;
28            for item in &config_data {
29                let file_path = dir.path().join(item.file_name);
30                fs::write(file_path, item.content)?;
31            }
32
33            let config_file_path = dir.path().join(config_data[0].file_name);
34            let config_file_path = CString::new(config_file_path.to_str().unwrap()).unwrap();
35
36            let opencc = unsafe { opencc_sys::opencc_open(config_file_path.as_ptr()) };
37
38            let ptr = opencc as uintptr_t;
39            if ptr == uintptr_t::MAX {
40                return Err(Error::Create);
41            }
42
43            openccs.push(opencc);
44        }
45
46        Ok(OpenCC { openccs })
47    }
48
49    /// Convert a string to another string
50    pub fn convert<T>(&self, input: T) -> Result<String, Error>
51    where
52        T: AsRef<str>,
53    {
54        let mut length = input.as_ref().len();
55        let input = CString::new(input.as_ref()).unwrap();
56        let mut result_ptr = input.as_ptr().cast_mut();
57
58        let mut free = Vec::new();
59
60        for opencc in &self.openccs {
61            result_ptr = unsafe { opencc_sys::opencc_convert_utf8(*opencc, result_ptr, length) };
62            if result_ptr.is_null() {
63                return Err(Error::Convert);
64            }
65
66            free.push(result_ptr);
67
68            if self.openccs.len() > 1 {
69                length = unsafe { libc::strlen(result_ptr) };
70            }
71        }
72
73        let result_cstr = unsafe { CStr::from_ptr(result_ptr) };
74        let result = unsafe { std::str::from_utf8_unchecked(result_cstr.to_bytes()).to_string() };
75
76        for ptr in free {
77            unsafe {
78                opencc_sys::opencc_convert_utf8_free(ptr);
79            }
80        }
81
82        Ok(result)
83    }
84}
85
86impl Drop for OpenCC {
87    fn drop(&mut self) {
88        for opencc in &self.openccs {
89            if !opencc.is_null() {
90                unsafe {
91                    opencc_sys::opencc_close(*opencc);
92                }
93            }
94        }
95    }
96}
97
98unsafe impl Send for OpenCC {}
99
100unsafe impl Sync for OpenCC {}
101
102/// Configurations
103pub enum Config {
104    /// Traditional Chinese (Hong Kong Standard) to Simplified Chinese
105    HK2S,
106    /// Traditional Chinese (Hong Kong Standard) to Traditional Chinese
107    HK2T,
108    /// New Japanese Kanji (Shinjitai) to Traditional Chinese Characters (Kyūjitai)
109    JP2T,
110    /// Simplified Chinese to Traditional Chinese
111    S2T,
112    /// Simplified Chinese to Traditional Chinese (Taiwan Standard)
113    S2TW,
114    /// Simplified Chinese to Traditional Chinese (Taiwan Standard) with Taiwanese idiom
115    S2TWP,
116    /// Traditional Chinese (OpenCC Standard) to Hong Kong Standard
117    T2HK,
118    /// Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai)
119    T2JP,
120    /// Traditional Chinese (OpenCC Standard) to Taiwan Standard
121    T2TW,
122    /// Traditional Chinese to Simplified Chinese
123    T2S,
124    /// Simplified Chinese to Traditional Chinese (Hong Kong Standard)
125    S2HK,
126    /// Traditional Chinese (Taiwan Standard) to Simplified Chinese
127    TW2S,
128    /// Traditional Chinese (Taiwan Standard) to Simplified Chinese with Mainland Chinese idiom
129    TW2SP,
130    /// Traditional Chinese (Taiwan Standard) to Traditional Chinese
131    TW2T,
132}
133
134impl Config {
135    fn get_data(&self) -> Vec<&opencc_sys::Data> {
136        match self {
137            Config::HK2S => vec![
138                &opencc_sys::HK2S_JSON,
139                &opencc_sys::TSPHRASES_OCD2,
140                &opencc_sys::HKVARIANTS_REV_PHRASES_OCD2,
141                &opencc_sys::HKVARIANTS_REV_OCD2,
142                &opencc_sys::TSCHARACTERS_OCD2,
143            ],
144            Config::HK2T => vec![
145                &opencc_sys::HK2T_JSON,
146                &opencc_sys::HKVARIANTS_REV_PHRASES_OCD2,
147                &opencc_sys::HKVARIANTS_REV_OCD2,
148            ],
149            Config::JP2T => vec![
150                &opencc_sys::JP2T_JSON,
151                &opencc_sys::JPSHINJITAI_PHRASES_OCD2,
152                &opencc_sys::JPSHINJITAI_CHARATERS_OCD2,
153                &opencc_sys::JPVARIANTS_REV_OCD2,
154            ],
155            Config::S2HK => vec![
156                &opencc_sys::S2HK_JSON,
157                &opencc_sys::STPHRASES_OCD2,
158                &opencc_sys::STCHARACTERS_OCD2,
159                &opencc_sys::HKVARIANTS_OCD2,
160            ],
161            Config::S2T => vec![
162                &opencc_sys::S2T_JSON,
163                &opencc_sys::STPHRASES_OCD2,
164                &opencc_sys::STCHARACTERS_OCD2,
165            ],
166            Config::S2TW => vec![
167                &opencc_sys::S2TW_JSON,
168                &opencc_sys::STPHRASES_OCD2,
169                &opencc_sys::STCHARACTERS_OCD2,
170                &opencc_sys::TWVARIANTS_OCD2,
171            ],
172            Config::S2TWP => vec![
173                &opencc_sys::S2TWP_JSON,
174                &opencc_sys::STPHRASES_OCD2,
175                &opencc_sys::STCHARACTERS_OCD2,
176                &opencc_sys::TWPHRASES_OCD2,
177                &opencc_sys::TWVARIANTS_OCD2,
178            ],
179            Config::T2HK => vec![&opencc_sys::T2HK_JSON, &opencc_sys::HKVARIANTS_OCD2],
180            Config::T2JP => vec![&opencc_sys::T2JP_JSON, &opencc_sys::JPVARIANTS_OCD2],
181            Config::T2S => vec![
182                &opencc_sys::T2S_JSON,
183                &opencc_sys::TSPHRASES_OCD2,
184                &opencc_sys::TSCHARACTERS_OCD2,
185            ],
186            Config::T2TW => vec![&opencc_sys::T2TW_JSON, &opencc_sys::TWVARIANTS_OCD2],
187            Config::TW2S => vec![
188                &opencc_sys::TW2S_JSON,
189                &opencc_sys::TSPHRASES_OCD2,
190                &opencc_sys::TWVARIANTS_REV_PHRASES_OCD2,
191                &opencc_sys::TWVARIANTS_REV_OCD2,
192                &opencc_sys::TSCHARACTERS_OCD2,
193            ],
194            Config::TW2SP => vec![
195                &opencc_sys::TW2SP_JSON,
196                &opencc_sys::TSPHRASES_OCD2,
197                &opencc_sys::TWPHRASES_REV_OCD2,
198                &opencc_sys::TWVARIANTS_REV_PHRASES_OCD2,
199                &opencc_sys::TWVARIANTS_REV_OCD2,
200                &opencc_sys::TSCHARACTERS_OCD2,
201            ],
202            Config::TW2T => vec![
203                &opencc_sys::TW2T_JSON,
204                &opencc_sys::TWVARIANTS_REV_PHRASES_OCD2,
205                &opencc_sys::TWVARIANTS_REV_OCD2,
206            ],
207        }
208    }
209}
210
211/// OpenCC error
212#[derive(Debug, Error)]
213pub enum Error {
214    /// Failed to create opencc instance
215    #[error("Failed to create opencc instance")]
216    Create,
217
218    /// Failed to convert the string
219    #[error("Failed to convert the string")]
220    Convert,
221
222    /// IO error
223    #[error(transparent)]
224    StdIO(#[from] io::Error),
225}