use anyhow::Result;
use novel_api::Timing;
use once_cell::sync::OnceCell;
use opencc_rs::{Config, OpenCC};
use tracing::info;
use crate::cmd::Convert;
use super::{Content, Novel};
pub async fn convert<T>(novel: &mut Novel, converts: T) -> Result<()>
where
T: AsRef<[Convert]>,
{
if converts.as_ref().is_empty() {
return Ok(());
}
let mut timing = Timing::new();
novel.name = convert_str(&novel.name, &converts)?;
novel.author_name = convert_str(&novel.author_name, &converts)?;
if novel.introduction.is_some() {
for line in novel.introduction.as_mut().unwrap() {
*line = convert_str(&line, &converts)?;
}
}
for volume in &mut novel.volumes {
volume.title = convert_str(&volume.title, &converts)?;
for chapter in &mut volume.chapters {
chapter.title = convert_str(&chapter.title, &converts)?;
for content in chapter.contents.write().await.iter_mut() {
if let Content::Text(line) = content {
*line = convert_str(&line, &converts)?;
}
}
}
}
info!("Time spent on `convert`: {}", timing.elapsed()?);
Ok(())
}
pub fn convert_str<T, E>(str: T, converts: E) -> Result<String>
where
T: AsRef<str>,
E: AsRef<[Convert]>,
{
let converts = converts.as_ref();
if converts.is_empty() {
return Ok(str.as_ref().to_string());
} else {
let mut result = String::new();
static OPENCC_S2T: OnceCell<OpenCC> = OnceCell::new();
static OPENCC_T2S: OnceCell<OpenCC> = OnceCell::new();
static OPENCC_JP2T2S: OnceCell<OpenCC> = OnceCell::new();
if converts.contains(&Convert::JP2T2S) {
result = OPENCC_JP2T2S
.get_or_try_init(|| OpenCC::new(vec![Config::JP2T, Config::T2S]))?
.convert(&str)?;
} else if converts.contains(&Convert::T2S) {
result = OPENCC_T2S
.get_or_try_init(|| OpenCC::new(vec![Config::T2S]))?
.convert(&str)?;
} else if converts.contains(&Convert::S2T) {
result = OPENCC_S2T
.get_or_try_init(|| OpenCC::new(vec![Config::S2T]))?
.convert(&str)?;
}
if converts.contains(&Convert::CUSTOM) {
if result.is_empty() {
result = custom_convert(str);
} else {
result = custom_convert(result);
}
}
Ok(result)
}
}
#[must_use]
fn custom_convert<T>(str: T) -> String
where
T: AsRef<str>,
{
if str.as_ref().is_empty() {
return String::default();
}
let mut s = String::new();
for c in html_escape::decode_html_entities(str.as_ref())
.to_string()
.chars()
{
match super::CONVERT_MAP.get(&c) {
Some(new) => {
s.push(*new);
}
None => s.push(c),
}
}
let mut result = String::new();
for (c, next_c) in s.chars().zip(s.chars().skip(1)) {
do_custom_convert(c, Some(next_c), &mut result);
}
do_custom_convert(s.chars().last().unwrap(), None, &mut result);
result.trim().to_string()
}
fn do_custom_convert(c: char, next_c: Option<char>, result: &mut String) {
let space = ' ';
let last = result.chars().last();
if
c == '\u{200B}'
|| c == '\u{200C}'
|| c == '\u{200D}'
|| c == '\u{2060}'
|| c == '\u{FEFF}'
|| c.is_control()
{
} else if c.is_whitespace() {
if novel_api::is_some_and(last, |c| !super::is_punctuation(c)) {
result.push(space)
}
} else if super::is_punctuation(c) {
if novel_api::is_some_and(last, |c| c.is_whitespace()) {
result.pop();
}
if c == '?' {
result.push('?');
} else if c == '!' {
result.push('!');
} else if c == ',' {
result.push(',');
} else if c == ':' {
if novel_api::is_some_and(last, |c| c.is_ascii_digit())
&& novel_api::is_some_and(next_c, |c| c.is_ascii_digit())
{
result.push(':');
} else {
result.push(':');
}
} else if c == ';' {
if result.ends_with(" ") {
result.truncate(result.len() - 5);
result.push(' ');
} else if result.ends_with("<") {
result.truncate(result.len() - 3);
result.push('<');
} else if result.ends_with(">") {
result.truncate(result.len() - 3);
result.push('>');
} else if result.ends_with(""") {
result.truncate(result.len() - 5);
result.push('"');
} else if result.ends_with("&apos") {
result.truncate(result.len() - 5);
result.push('\'');
} else if result.ends_with("&") {
result.truncate(result.len() - 4);
result.push('&');
} else {
result.push(';');
}
} else if c == '(' {
result.push('(');
} else if c == ')' {
result.push(')');
} else if c == '。' || c == ',' || c == '、' {
if novel_api::is_some_and(last, |last_char| last_char == c) {
} else {
result.push(c);
}
} else {
result.push(c);
}
} else {
result.push(c);
}
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn convert() -> Result<()> {
let config = vec![Convert::JP2T2S, Convert::CUSTOM];
assert_eq!(convert_str("顛覆", &config)?, "颠覆");
assert_eq!(convert_str("幺", &config)?, "幺");
assert_eq!(convert_str("妳", &config)?, "妳");
assert_eq!(convert_str("Q0", &config)?, "Q0");
assert_eq!(convert_str("“安装后”", &config)?, "“安装后”");
assert_eq!(convert_str("&", &config)?, "&");
assert_eq!(convert_str("安裝後?", &config)?, "安装后?");
assert_eq!(convert_str(",,,", &config)?, ",");
assert_eq!(convert_str("安 装", &config)?, "安 装");
assert_eq!(convert_str("你\n好", &config)?, "你好");
assert_eq!(convert_str("08:00", &config)?, "08:00");
Ok(())
}
}