1#![cfg_attr(docsrs, feature(doc_cfg))]
2
3#[cfg(feature = "yml")]
4pub mod yml;
5
6use std::{
7 borrow::Borrow,
8 collections::{HashMap, HashSet},
9};
10
11use daachorse::{CharwiseDoubleArrayAhoCorasick, CharwiseDoubleArrayAhoCorasickBuilder, MatchKind};
12use htmlize::{escape_attribute, unescape_attribute};
13use tag_replace::{TagReplace, word_push};
14use thiserror::Error;
15use unicode_categories::UnicodeCategories;
16use unicode_segmentation::UnicodeSegmentation;
17
18#[static_init::dynamic]
19pub static CODE: TagReplace = TagReplace::new("code", "v");
20
21#[derive(Error, Debug)]
22pub enum Error {
23 #[error("daachorse: {0}")]
24 Daachorse(daachorse::errors::DaachorseError),
25}
26
27fn capitalize_first_letter(s: impl AsRef<str>) -> String {
28 let mut chars = s.as_ref().chars();
29 match chars.next() {
30 Some(first_char) => first_char.to_uppercase().to_string() + chars.as_str(),
31 None => String::new(),
32 }
33}
34
35pub struct Term {
36 pub map: HashMap<String, String>,
37 pub ac: CharwiseDoubleArrayAhoCorasick<usize>,
38}
39
40impl Term {
41 pub fn load<'a, S1: AsRef<str> + 'a, S2: AsRef<str> + 'a, T: Borrow<(S1, S2)>>(
42 from_to_iter: impl IntoIterator<Item = T>,
43 ) -> Result<Self, Error> {
44 let mut map = HashMap::new();
45
46 let mut exist = HashSet::new();
47
48 for i in from_to_iter {
49 let (key, value) = i.borrow();
50 let lower_key = key.as_ref().to_lowercase();
51 if lower_key.is_empty() || exist.contains(&lower_key) {
52 continue;
53 }
54 exist.insert(lower_key.clone());
55
56 let value = value.as_ref().into();
57 map.insert(lower_key, value);
58 }
59
60 match CharwiseDoubleArrayAhoCorasickBuilder::new()
61 .match_kind(MatchKind::LeftmostLongest)
62 .build(map.keys())
63 {
64 Ok(ac) => Ok(Term { map, ac }),
65 Err(err) => Err(Error::Daachorse(err)),
66 }
67 }
68
69 pub fn restore(&self, txt: impl AsRef<str>) -> String {
70 let txt = txt.as_ref();
71 CODE.replace(txt, |li, _origin, val: &str| {
72 word_push(li, unescape_attribute(val));
73 })
74 }
75
76 pub fn replace(&mut self, txt: impl AsRef<str>) -> Option<String> {
77 let txt = txt.as_ref();
78 let txt_lower = txt.to_lowercase();
79 let mut li = vec![];
80 let mut pos = 0;
81 for mat in self.ac.leftmost_find_iter(&txt_lower) {
82 let start = mat.start();
83 let end = mat.end();
84 let matched = &txt_lower[start..end];
85
86 macro_rules! is_word {
87 ($prev:expr, $next:expr) => {
88 if let Some(last) = $prev.chars().last()
89 && let Some(first) = $next.chars().next()
90 {
91 let t = last.to_string() + &first.to_string();
92 if !t.contains('_') {
93 let mut t = t.split_word_bounds();
94 match (t.next(), t.next()) {
95 (Some(_), None) => continue,
96 _ => {}
97 };
98 }
99 }
100 };
101 }
102
103 if let Some(val) = self.map.get(matched) {
104 let mut val = val.to_owned();
105
106 is_word!(&txt[..start], matched);
108 is_word!(matched, &txt[end..]);
109
110 if start > pos {
111 li.push(txt[pos..start].to_owned());
112 }
113
114 let org = &txt[start..end];
115 if let Some(c) = org.chars().next()
116 && (
117 c.is_uppercase() || start == 0
118 )
120 {
121 let pos = c.to_string().len();
122 if org.len() > 1 && org[pos..].chars().all(char::is_uppercase) {
123 val = val.to_uppercase();
124 } else {
125 val = capitalize_first_letter(val);
126 }
127 } else if let Some(last) = txt[..start].trim_end().chars().last()
128 && !"{}()_*[]~".contains(last)
129 && last.is_punctuation()
130 {
131 val = capitalize_first_letter(val)
132 }
133
134 let val = escape_attribute(val);
135 li.push(format!(r#"<code v="{val}">{matched}</code>"#));
136 }
137 pos = end;
138 }
139 if !li.is_empty() {
140 if pos < txt.len() {
141 li.push(txt[pos..].into());
142 }
143 let r = li.concat();
144 return Some(r);
145 }
146 None
147 }
148}
149
150impl PartialEq for Term {
151 fn eq(&self, other: &Self) -> bool {
152 self.map == other.map
153 }
154}