novel_cli/utils/
convert.rs

1use std::sync::OnceLock;
2
3use color_eyre::eyre::Result;
4use opencc_rs::{Config, OpenCC};
5
6use super::{Content, Novel};
7use crate::cmd::Convert;
8
9pub fn convert<T>(novel: &mut Novel, converts: T) -> Result<()>
10where
11    T: AsRef<[Convert]>,
12{
13    if converts.as_ref().is_empty() {
14        return Ok(());
15    }
16
17    novel.name = convert_str(&novel.name, &converts, false)?;
18
19    novel.author_name = convert_str(&novel.author_name, &converts, false)?;
20
21    if novel.introduction.is_some() {
22        for line in novel.introduction.as_mut().unwrap() {
23            *line = convert_str(&line, &converts, false)?;
24        }
25    }
26
27    for volume in &mut novel.volumes {
28        volume.title = convert_str(&volume.title, &converts, true)?;
29
30        for chapter in &mut volume.chapters {
31            chapter.title = convert_str(&chapter.title, &converts, true)?;
32
33            if chapter.contents.is_some() {
34                let mut contents = chapter.contents.take().unwrap();
35
36                for content in &mut contents {
37                    if let Content::Text(line) = content {
38                        *line = convert_str(&line, &converts, false)?;
39                    }
40                }
41
42                chapter.contents = Some(contents);
43            }
44        }
45    }
46
47    Ok(())
48}
49
50pub fn convert_str<T, E>(str: T, converts: E, in_heading: bool) -> Result<String>
51where
52    T: AsRef<str>,
53    E: AsRef<[Convert]>,
54{
55    let converts = converts.as_ref();
56
57    if converts.is_empty() {
58        Ok(str.as_ref().to_string())
59    } else {
60        let mut result = String::new();
61
62        static OPENCC_S2T: OnceLock<OpenCC> = OnceLock::new();
63        static OPENCC_T2S: OnceLock<OpenCC> = OnceLock::new();
64        static OPENCC_JP2T2S: OnceLock<OpenCC> = OnceLock::new();
65
66        if converts.contains(&Convert::JP2T2S) {
67            result = OPENCC_JP2T2S
68                .get_or_init(|| {
69                    OpenCC::new(vec![Config::JP2T, Config::TW2S])
70                        .expect("OpenCC initialization failure")
71                })
72                .convert(&str)?;
73        } else if converts.contains(&Convert::T2S) {
74            result = OPENCC_T2S
75                .get_or_init(|| {
76                    OpenCC::new(vec![Config::TW2S]).expect("OpenCC initialization failure")
77                })
78                .convert(&str)?;
79        } else if converts.contains(&Convert::S2T) {
80            result = OPENCC_S2T
81                .get_or_init(|| {
82                    OpenCC::new(vec![Config::S2T]).expect("OpenCC initialization failure")
83                })
84                .convert(&str)?;
85        }
86
87        if converts.contains(&Convert::CUSTOM) {
88            if result.is_empty() {
89                result = custom_convert(str, in_heading);
90            } else {
91                result = custom_convert(result, in_heading);
92            }
93
94            if converts.contains(&Convert::JP2T2S) || converts.contains(&Convert::T2S) {
95                let mut new_result = String::with_capacity(result.len());
96                for c in result.chars() {
97                    match super::CONVERT_T2S_MAP.get(&c) {
98                        Some(new) => {
99                            new_result.push(*new);
100                        }
101                        None => new_result.push(c),
102                    }
103                }
104
105                result = new_result;
106            }
107        }
108
109        Ok(result.trim().to_string())
110    }
111}
112
113#[must_use]
114fn custom_convert<T>(str: T, in_heading: bool) -> String
115where
116    T: AsRef<str>,
117{
118    if str.as_ref().is_empty() {
119        return String::default();
120    }
121
122    let mut s = String::new();
123    for c in html_escape::decode_html_entities(str.as_ref())
124        .to_string()
125        .chars()
126    {
127        match super::CONVERT_MAP.get(&c) {
128            Some(new) => {
129                s.push(*new);
130            }
131            None => s.push(c),
132        }
133    }
134
135    let mut result = String::new();
136    for (c, next_c) in s.chars().zip(s.chars().skip(1)) {
137        do_custom_convert(c, Some(next_c), &mut result, in_heading);
138    }
139    do_custom_convert(s.chars().last().unwrap(), None, &mut result, in_heading);
140
141    result
142}
143
144fn do_custom_convert(c: char, next_c: Option<char>, result: &mut String, in_heading: bool) {
145    let space = ' ';
146    let last = result.chars().last();
147
148    if
149    // https://en.wikipedia.org/wiki/Zero-width_space
150    c == '\u{200B}'
151        // https://en.wikipedia.org/wiki/Zero-width_non-joiner
152        || c == '\u{200C}'
153        // https://en.wikipedia.org/wiki/Zero-width_joiner
154        || c == '\u{200D}'
155        // https://en.wikipedia.org/wiki/Word_joiner
156        || c == '\u{2060}'
157        // https://en.wikipedia.org/wiki/Byte_order_mark
158        || c == '\u{FEFF}'
159        // https://en.wikipedia.org/wiki/Unicode_control_characters
160        || c.is_control()
161    {
162        // do nothing
163    } else if c.is_whitespace() {
164        if last.is_some_and(|c| !super::is_punctuation(c)) {
165            result.push(space)
166        }
167    } else if super::is_punctuation(c) {
168        if !in_heading && last.is_some_and(|c| c.is_whitespace()) {
169            result.pop();
170        }
171
172        if c == ':' {
173            // e.g. 08:00
174            if last.is_some_and(|c| c.is_ascii_digit())
175                && next_c.is_some_and(|c| c.is_ascii_digit())
176            {
177                result.push(':');
178            } else {
179                result.push(':');
180            }
181        } else {
182            result.push(c);
183        }
184    } else {
185        result.push(c);
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use pretty_assertions::assert_eq;
192    use testresult::TestResult;
193
194    use super::*;
195
196    #[test]
197    fn convert() -> TestResult {
198        let config = vec![Convert::JP2T2S, Convert::CUSTOM];
199
200        assert_eq!(convert_str("妳", &config, false)?, "你");
201        assert_eq!(convert_str("Q0", &config, false)?, "Q0");
202        assert_eq!(convert_str("“安装后”", &config, false)?, "“安装后”");
203        assert_eq!(convert_str("&amp;", &config, false)?, "&");
204        assert_eq!(convert_str("安裝後?", &config, false)?, "安装后?");
205        assert_eq!(convert_str("安 装", &config, false)?, "安 装");
206        assert_eq!(convert_str("你\n好", &config, false)?, "你好");
207        assert_eq!(convert_str("08:00", &config, false)?, "08:00");
208        assert_eq!(convert_str("接著", &config, false)?, "接着");
209        assert_eq!(
210            convert_str("第一章 “你好”", &config, false)?,
211            "第一章“你好”"
212        );
213        assert_eq!(
214            convert_str("第一章 “你好”", &config, true)?,
215            "第一章 “你好”"
216        );
217
218        Ok(())
219    }
220}