tep_lib/common/rule.rs
1/// Overview: Rule model and default rules set.
2use std::collections::HashSet;
3use structopt::lazy_static::lazy_static;
4
5pub struct Rule {
6 pub from: &'static str,
7 pub to: &'static str,
8}
9
10impl Rule {
11 pub fn default_rules() -> Vec<Self> {
12 Vec::from([
13 // S1
14 Rule {
15 from: "。”",
16 to: ".\" ",
17 },
18 Rule {
19 from: ",”",
20 to: ",\" ",
21 },
22 Rule {
23 from: "?”",
24 to: "?\" ",
25 },
26 Rule {
27 from: "!”",
28 to: "!\" ",
29 },
30 Rule {
31 from: ";”",
32 to: ";\" ",
33 },
34 Rule {
35 from: "……”",
36 to: "...\" ",
37 },
38 Rule {
39 from: "…”",
40 to: "...\" ",
41 },
42 Rule {
43 from: "。’",
44 to: ".' ",
45 },
46 Rule {
47 from: ",’",
48 to: ",' ",
49 },
50 Rule {
51 from: "?’",
52 to: "?' ",
53 },
54 Rule {
55 from: "!’",
56 to: "!' ",
57 },
58 Rule {
59 from: ";’",
60 to: ";' ",
61 },
62 Rule {
63 from: "……’",
64 to: "...' ",
65 },
66 Rule {
67 from: "…’",
68 to: "...' ",
69 },
70 Rule {
71 from: "。」",
72 to: ".' ",
73 },
74 Rule {
75 from: ",」",
76 to: ",' ",
77 },
78 Rule {
79 from: "?」",
80 to: "?' ",
81 },
82 Rule {
83 from: "!」",
84 to: "!' ",
85 },
86 Rule {
87 from: ";」",
88 to: ";' ",
89 },
90 Rule {
91 from: "……」",
92 to: "...' ",
93 },
94 Rule {
95 from: "…」",
96 to: "...' ",
97 },
98 Rule {
99 from: "。』",
100 to: ".\" ",
101 },
102 Rule {
103 from: ",』",
104 to: ",\" ",
105 },
106 Rule {
107 from: "?』",
108 to: "?\" ",
109 },
110 Rule {
111 from: "!』",
112 to: "!\" ",
113 },
114 Rule {
115 from: ";』",
116 to: ";\" ",
117 },
118 Rule {
119 from: "……』",
120 to: "...\" ",
121 },
122 Rule {
123 from: "…』",
124 to: "...\" ",
125 },
126 Rule {
127 from: "。)",
128 to: ".) ",
129 },
130 Rule {
131 from: ",)",
132 to: ",) ",
133 },
134 Rule {
135 from: "?)",
136 to: "?) ",
137 },
138 Rule {
139 from: "!)",
140 to: "!) ",
141 },
142 Rule {
143 from: ";)",
144 to: ";) ",
145 },
146 Rule {
147 from: "……)",
148 to: "...) ",
149 },
150 Rule {
151 from: "…)",
152 to: "...) ",
153 },
154 Rule {
155 from: "。】",
156 to: ".] ",
157 },
158 Rule {
159 from: ",】",
160 to: ",] ",
161 },
162 Rule {
163 from: "?】",
164 to: "?] ",
165 },
166 Rule {
167 from: "!】",
168 to: "!] ",
169 },
170 Rule {
171 from: ";】",
172 to: ";] ",
173 },
174 Rule {
175 from: "……】",
176 to: "...] ",
177 },
178 Rule {
179 from: "…】",
180 to: "...] ",
181 },
182 // S2
183 Rule {
184 from: "“",
185 to: " \"",
186 },
187 Rule {
188 from: "‘",
189 to: " '",
190 },
191 Rule {
192 from: "「",
193 to: " '",
194 },
195 Rule {
196 from: "『",
197 to: " \"",
198 },
199 Rule {
200 from: "(",
201 to: " (",
202 },
203 Rule {
204 from: "【",
205 to: " [",
206 },
207 // S3
208 Rule {
209 from: "”",
210 to: "\" ",
211 },
212 Rule {
213 from: "’",
214 to: "' ",
215 },
216 Rule {
217 from: "」",
218 to: "' ",
219 },
220 Rule {
221 from: "』",
222 to: "\" ",
223 },
224 Rule {
225 from: ")",
226 to: ") ",
227 },
228 Rule {
229 from: "】",
230 to: "] ",
231 },
232 // S4
233 Rule {
234 from: "——",
235 to: " - ",
236 },
237 Rule {
238 from: "。",
239 to: ". ",
240 },
241 Rule {
242 from: ",",
243 to: ", ",
244 },
245 Rule {
246 from: "?",
247 to: "? ",
248 },
249 Rule {
250 from: "!",
251 to: "! ",
252 },
253 Rule {
254 from: ";",
255 to: "; ",
256 },
257 Rule {
258 from: "……",
259 to: "... ",
260 },
261 Rule {
262 from: "…",
263 to: "... ",
264 },
265 Rule {
266 from: ":",
267 to: ": ",
268 },
269 Rule {
270 from: "、",
271 to: ", ",
272 },
273 Rule {
274 from: "—",
275 to: " - ",
276 },
277 Rule {
278 from: "~",
279 to: "~",
280 },
281 // S5
282 Rule {
283 from: "。》",
284 to: ".》",
285 },
286 Rule {
287 from: ",》",
288 to: ",》",
289 },
290 Rule {
291 from: "?》",
292 to: "?》",
293 },
294 Rule {
295 from: "!》",
296 to: "!》",
297 },
298 Rule {
299 from: ";》",
300 to: ";》",
301 },
302 Rule {
303 from: "……》",
304 to: "...》",
305 },
306 Rule {
307 from: "…》",
308 to: "...》",
309 },
310 Rule {
311 from: "。〉",
312 to: ".〉",
313 },
314 Rule {
315 from: ",〉",
316 to: ",〉",
317 },
318 Rule {
319 from: "?〉",
320 to: "?〉",
321 },
322 Rule {
323 from: "!〉",
324 to: "!〉",
325 },
326 Rule {
327 from: ";〉",
328 to: ";〉",
329 },
330 Rule {
331 from: "……〉",
332 to: "...〉",
333 },
334 Rule {
335 from: "…〉",
336 to: "...〉",
337 },
338 ])
339 }
340}
341
342lazy_static! {
343 pub static ref SOFT_MARKS: HashSet<char> = HashSet::from(['.', ',', ';', ':', '?', '!', '-']);
344 pub static ref BREAKS: HashSet<char> = HashSet::from(['\n']);
345 pub static ref HARD_MARKS: HashSet<char> =
346 HashSet::from(['\'', '"', '(', ')', '{', '}', '[', ']', '《', '》', '〈', '〉',]);
347}