novel_cli/utils/
convert.rs

1use std::sync::OnceLock;
2
3use color_eyre::eyre::Result;
4use opencc_rs::{Config, OpenCC};
5
6use super::{Content, Novel};
7use crate::cmd::Convert;
8
9pub fn convert<T>(novel: &mut Novel, converts: T) -> Result<()>
10where
11    T: AsRef<[Convert]>,
12{
13    if converts.as_ref().is_empty() {
14        return Ok(());
15    }
16
17    novel.name = convert_str(&novel.name, &converts, false)?;
18
19    novel.author_name = convert_str(&novel.author_name, &converts, false)?;
20
21    if novel.introduction.is_some() {
22        #[allow(clippy::needless_borrows_for_generic_args)]
23        for line in novel.introduction.as_mut().unwrap() {
24            *line = convert_str(&line, &converts, false)?;
25        }
26    }
27
28    for volume in &mut novel.volumes {
29        volume.title = convert_str(&volume.title, &converts, true)?;
30
31        for chapter in &mut volume.chapters {
32            chapter.title = convert_str(&chapter.title, &converts, true)?;
33
34            if chapter.contents.is_some() {
35                let mut contents = chapter.contents.take().unwrap();
36
37                for content in &mut contents {
38                    #[allow(clippy::needless_borrows_for_generic_args)]
39                    if let Content::Text(line) = content {
40                        *line = convert_str(&line, &converts, false)?;
41                    }
42                }
43
44                chapter.contents = Some(contents);
45            }
46        }
47    }
48
49    Ok(())
50}
51
52pub fn convert_str<T, E>(str: T, converts: E, in_heading: bool) -> Result<String>
53where
54    T: AsRef<str>,
55    E: AsRef<[Convert]>,
56{
57    let converts = converts.as_ref();
58
59    if converts.is_empty() {
60        Ok(str.as_ref().to_string())
61    } else {
62        let mut result = String::new();
63
64        static OPENCC_S2T: OnceLock<OpenCC> = OnceLock::new();
65        static OPENCC_T2S: OnceLock<OpenCC> = OnceLock::new();
66        static OPENCC_JP2T2S: OnceLock<OpenCC> = OnceLock::new();
67
68        if converts.contains(&Convert::JP2T2S) {
69            result = OPENCC_JP2T2S
70                .get_or_init(|| {
71                    OpenCC::new(vec![Config::JP2T, Config::TW2S])
72                        .expect("OpenCC initialization failure")
73                })
74                .convert(&str)?;
75        } else if converts.contains(&Convert::T2S) {
76            result = OPENCC_T2S
77                .get_or_init(|| {
78                    OpenCC::new(vec![Config::TW2S]).expect("OpenCC initialization failure")
79                })
80                .convert(&str)?;
81        } else if converts.contains(&Convert::S2T) {
82            result = OPENCC_S2T
83                .get_or_init(|| {
84                    OpenCC::new(vec![Config::S2T]).expect("OpenCC initialization failure")
85                })
86                .convert(&str)?;
87        }
88
89        if converts.contains(&Convert::CUSTOM) {
90            if result.is_empty() {
91                result = custom_convert(str, in_heading);
92            } else {
93                result = custom_convert(result, in_heading);
94            }
95
96            if converts.contains(&Convert::JP2T2S) || converts.contains(&Convert::T2S) {
97                let mut new_result = String::with_capacity(result.len());
98                for c in result.chars() {
99                    match super::CONVERT_T2S_MAP.get(&c) {
100                        Some(new) => {
101                            new_result.push(*new);
102                        }
103                        None => new_result.push(c),
104                    }
105                }
106
107                result = new_result;
108            }
109        }
110
111        Ok(result.trim().to_string())
112    }
113}
114
115#[must_use]
116fn custom_convert<T>(str: T, in_heading: bool) -> String
117where
118    T: AsRef<str>,
119{
120    if str.as_ref().is_empty() {
121        return String::default();
122    }
123
124    let mut s = String::new();
125    for c in html_escape::decode_html_entities(str.as_ref())
126        .to_string()
127        .chars()
128    {
129        match super::CONVERT_MAP.get(&c) {
130            Some(new) => {
131                s.push(*new);
132            }
133            None => s.push(c),
134        }
135    }
136
137    let mut result = String::new();
138    for (c, next_c) in s.chars().zip(s.chars().skip(1)) {
139        do_custom_convert(c, Some(next_c), &mut result, in_heading);
140    }
141    do_custom_convert(s.chars().last().unwrap(), None, &mut result, in_heading);
142
143    result
144}
145
146fn do_custom_convert(c: char, next_c: Option<char>, result: &mut String, in_heading: bool) {
147    let space = ' ';
148    let last = result.chars().last();
149
150    if
151    // https://en.wikipedia.org/wiki/Zero-width_space
152    c == '\u{200B}'
153        // https://en.wikipedia.org/wiki/Zero-width_non-joiner
154        || c == '\u{200C}'
155        // https://en.wikipedia.org/wiki/Zero-width_joiner
156        || c == '\u{200D}'
157        // https://en.wikipedia.org/wiki/Word_joiner
158        || c == '\u{2060}'
159        // https://en.wikipedia.org/wiki/Byte_order_mark
160        || c == '\u{FEFF}'
161        // https://en.wikipedia.org/wiki/Unicode_control_characters
162        || c.is_control()
163    {
164        // do nothing
165    } else if c.is_whitespace() {
166        if last.is_some_and(|c| !super::is_punctuation(c)) {
167            result.push(space)
168        }
169    } else if super::is_punctuation(c) {
170        if !in_heading && last.is_some_and(|c| c.is_whitespace()) {
171            result.pop();
172        }
173
174        if c == ':' {
175            // e.g. 08:00
176            if last.is_some_and(|c| c.is_ascii_digit())
177                && next_c.is_some_and(|c| c.is_ascii_digit())
178            {
179                result.push(':');
180            } else {
181                result.push(':');
182            }
183        } else {
184            result.push(c);
185        }
186    } else {
187        result.push(c);
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use pretty_assertions::assert_eq;
194    use testresult::TestResult;
195
196    use super::*;
197
198    #[test]
199    fn convert() -> TestResult {
200        let config = vec![Convert::JP2T2S, Convert::CUSTOM];
201
202        assert_eq!(convert_str("妳", &config, false)?, "你");
203        assert_eq!(convert_str("Q0", &config, false)?, "Q0");
204        assert_eq!(convert_str("“安装后”", &config, false)?, "“安装后”");
205        assert_eq!(convert_str("&amp;", &config, false)?, "&");
206        assert_eq!(convert_str("安裝後?", &config, false)?, "安装后?");
207        assert_eq!(convert_str("安 装", &config, false)?, "安 装");
208        assert_eq!(convert_str("你\n好", &config, false)?, "你好");
209        assert_eq!(convert_str("08:00", &config, false)?, "08:00");
210        assert_eq!(convert_str("接著", &config, false)?, "接着");
211        assert_eq!(
212            convert_str("第一章 “你好”", &config, false)?,
213            "第一章“你好”"
214        );
215        assert_eq!(
216            convert_str("第一章 “你好”", &config, true)?,
217            "第一章 “你好”"
218        );
219
220        Ok(())
221    }
222}