tran/
src.rs

1use std::{
2  collections::{HashMap, HashSet},
3  marker::PhantomData,
4  ops::Range,
5};
6
7use aok::Result;
8use tran_trait::ParseResult;
9use xhash::xhash;
10
11use crate::{Cached, Parser, TranCache};
12
13pub struct Src<P: Parser> {
14  pub lang: u16,
15  pub hash_li: Vec<Vec<u8>>,
16  pub range_li: Vec<Range<usize>>,
17  pub title_pos: HashSet<usize>,
18  pub txt: String,
19  _parse: PhantomData<P>,
20}
21
22pub fn new<P: crate::Parser>(lang: u16, txt: impl Into<String>) -> Result<Src<P>> {
23  let txt = txt.into();
24  let ParseResult {
25    range_li,
26    title_pos,
27  } = P::parse(&txt)?;
28  Ok(Src {
29    lang,
30    hash_li: range_li
31      .iter()
32      .map(|i| xhash(&txt[i.start..i.end]))
33      .collect(),
34    title_pos,
35    range_li,
36    txt,
37    _parse: PhantomData,
38  })
39}
40
41pub struct Cache {
42  pub cached: Vec<Option<String>>,
43  pub hash_li: Vec<Vec<u8>>,
44  pub pos_term: HashMap<usize, String>,
45}
46
47impl<P: Parser> Src<P> {
48  pub async fn cache(
49    &self,
50    cache: &impl TranCache,
51    to_lang: u16,
52    term: &Option<tran_term::Term>,
53  ) -> Result<Cache> {
54    // TODO optimize for zh - zh-tw
55    let mut pos_term = HashMap::new();
56    let txt_li: Vec<&str> = self
57      .range_li
58      .iter()
59      .map(|i| &self.txt[i.start..i.end])
60      .collect();
61
62    macro_rules! cache {
63      ($hash_li:expr) => {
64        cache.get(self.lang, to_lang, $hash_li, &txt_li[..]).await?
65      };
66    }
67
68    // 如果有术语,先抽取,然后翻译完成后再还原
69    Ok(if let Some(term) = term {
70      let mut hash_li = self.hash_li.clone();
71      for (pos, t) in txt_li.iter().enumerate() {
72        if let Some(t) = term.replace(t, |s| {
73          let s = htmlize::escape_text(s);
74          format!(r#"<code t>{s}</code>"#)
75        }) {
76          hash_li[pos] = xhash(&t);
77          pos_term.insert(pos, t);
78        }
79      }
80      Cache {
81        cached: cache!(&hash_li),
82        hash_li,
83        pos_term,
84      }
85    } else {
86      Cache {
87        cached: cache!(&self.hash_li),
88        hash_li: self.hash_li.clone(),
89        pos_term,
90      }
91    })
92  }
93
94  pub async fn get(
95    &self,
96    cache: &impl TranCache,
97    to_lang: u16,
98    term: &Option<tran_term::Term>,
99  ) -> Result<Cached> {
100    let Cache {
101      cached,
102      mut pos_term,
103      ..
104    } = self.cache(cache, to_lang, term).await?;
105    let mut to_tran_li: Vec<String> = vec![];
106    let mut to_tran_pos: Vec<usize> = vec![];
107    Ok(Cached {
108      cached: cached
109        .into_iter()
110        .enumerate()
111        .map(|(pos, s)| {
112          if let Some(s) = s {
113            s
114          } else {
115            let range = &self.range_li[pos];
116            to_tran_li.push(
117              pos_term
118                .remove(&pos)
119                .unwrap_or(self.txt[range.start..range.end].to_owned()),
120            );
121            to_tran_pos.push(pos);
122
123            s_::EMPTY
124          }
125        })
126        .collect(),
127      to_tran_li,
128      to_tran_pos,
129    })
130  }
131}