tep_lib/common/
rule.rs

1/// Overview: Rule model and default rules set.
2use std::collections::HashSet;
3use structopt::lazy_static::lazy_static;
4
5pub struct Rule {
6    pub from: &'static str,
7    pub to: &'static str,
8}
9
10impl Rule {
11    pub fn default_rules() -> Vec<Self> {
12        Vec::from([
13            // S1
14            Rule {
15                from: "。”",
16                to: ".\" ",
17            },
18            Rule {
19                from: ",”",
20                to: ",\" ",
21            },
22            Rule {
23                from: "?”",
24                to: "?\" ",
25            },
26            Rule {
27                from: "!”",
28                to: "!\" ",
29            },
30            Rule {
31                from: ";”",
32                to: ";\" ",
33            },
34            Rule {
35                from: "……”",
36                to: "...\" ",
37            },
38            Rule {
39                from: "…”",
40                to: "...\" ",
41            },
42            Rule {
43                from: "。’",
44                to: ".' ",
45            },
46            Rule {
47                from: ",’",
48                to: ",' ",
49            },
50            Rule {
51                from: "?’",
52                to: "?' ",
53            },
54            Rule {
55                from: "!’",
56                to: "!' ",
57            },
58            Rule {
59                from: ";’",
60                to: ";' ",
61            },
62            Rule {
63                from: "……’",
64                to: "...' ",
65            },
66            Rule {
67                from: "…’",
68                to: "...' ",
69            },
70            Rule {
71                from: "。」",
72                to: ".' ",
73            },
74            Rule {
75                from: ",」",
76                to: ",' ",
77            },
78            Rule {
79                from: "?」",
80                to: "?' ",
81            },
82            Rule {
83                from: "!」",
84                to: "!' ",
85            },
86            Rule {
87                from: ";」",
88                to: ";' ",
89            },
90            Rule {
91                from: "……」",
92                to: "...' ",
93            },
94            Rule {
95                from: "…」",
96                to: "...' ",
97            },
98            Rule {
99                from: "。』",
100                to: ".\" ",
101            },
102            Rule {
103                from: ",』",
104                to: ",\" ",
105            },
106            Rule {
107                from: "?』",
108                to: "?\" ",
109            },
110            Rule {
111                from: "!』",
112                to: "!\" ",
113            },
114            Rule {
115                from: ";』",
116                to: ";\" ",
117            },
118            Rule {
119                from: "……』",
120                to: "...\" ",
121            },
122            Rule {
123                from: "…』",
124                to: "...\" ",
125            },
126            Rule {
127                from: "。)",
128                to: ".) ",
129            },
130            Rule {
131                from: ",)",
132                to: ",) ",
133            },
134            Rule {
135                from: "?)",
136                to: "?) ",
137            },
138            Rule {
139                from: "!)",
140                to: "!) ",
141            },
142            Rule {
143                from: ";)",
144                to: ";) ",
145            },
146            Rule {
147                from: "……)",
148                to: "...) ",
149            },
150            Rule {
151                from: "…)",
152                to: "...) ",
153            },
154            Rule {
155                from: "。】",
156                to: ".] ",
157            },
158            Rule {
159                from: ",】",
160                to: ",] ",
161            },
162            Rule {
163                from: "?】",
164                to: "?] ",
165            },
166            Rule {
167                from: "!】",
168                to: "!] ",
169            },
170            Rule {
171                from: ";】",
172                to: ";] ",
173            },
174            Rule {
175                from: "……】",
176                to: "...] ",
177            },
178            Rule {
179                from: "…】",
180                to: "...] ",
181            },
182            // S2
183            Rule {
184                from: "“",
185                to: " \"",
186            },
187            Rule {
188                from: "‘",
189                to: " '",
190            },
191            Rule {
192                from: "「",
193                to: " '",
194            },
195            Rule {
196                from: "『",
197                to: " \"",
198            },
199            Rule {
200                from: "(",
201                to: " (",
202            },
203            Rule {
204                from: "【",
205                to: " [",
206            },
207            // S3
208            Rule {
209                from: "”",
210                to: "\" ",
211            },
212            Rule {
213                from: "’",
214                to: "' ",
215            },
216            Rule {
217                from: "」",
218                to: "' ",
219            },
220            Rule {
221                from: "』",
222                to: "\" ",
223            },
224            Rule {
225                from: ")",
226                to: ") ",
227            },
228            Rule {
229                from: "】",
230                to: "] ",
231            },
232            // S4
233            Rule {
234                from: "——",
235                to: " - ",
236            },
237            Rule {
238                from: "。",
239                to: ". ",
240            },
241            Rule {
242                from: ",",
243                to: ", ",
244            },
245            Rule {
246                from: "?",
247                to: "? ",
248            },
249            Rule {
250                from: "!",
251                to: "! ",
252            },
253            Rule {
254                from: ";",
255                to: "; ",
256            },
257            Rule {
258                from: "……",
259                to: "... ",
260            },
261            Rule {
262                from: "…",
263                to: "... ",
264            },
265            Rule {
266                from: ":",
267                to: ": ",
268            },
269            Rule {
270                from: "、",
271                to: ", ",
272            },
273            Rule {
274                from: "—",
275                to: " - ",
276            },
277            Rule {
278                from: "~",
279                to: "~",
280            },
281            // S5
282            Rule {
283                from: "。》",
284                to: ".》",
285            },
286            Rule {
287                from: ",》",
288                to: ",》",
289            },
290            Rule {
291                from: "?》",
292                to: "?》",
293            },
294            Rule {
295                from: "!》",
296                to: "!》",
297            },
298            Rule {
299                from: ";》",
300                to: ";》",
301            },
302            Rule {
303                from: "……》",
304                to: "...》",
305            },
306            Rule {
307                from: "…》",
308                to: "...》",
309            },
310            Rule {
311                from: "。〉",
312                to: ".〉",
313            },
314            Rule {
315                from: ",〉",
316                to: ",〉",
317            },
318            Rule {
319                from: "?〉",
320                to: "?〉",
321            },
322            Rule {
323                from: "!〉",
324                to: "!〉",
325            },
326            Rule {
327                from: ";〉",
328                to: ";〉",
329            },
330            Rule {
331                from: "……〉",
332                to: "...〉",
333            },
334            Rule {
335                from: "…〉",
336                to: "...〉",
337            },
338        ])
339    }
340}
341
342lazy_static! {
343    pub static ref SOFT_MARKS: HashSet<char> = HashSet::from(['.', ',', ';', ':', '?', '!', '-']);
344    pub static ref BREAKS: HashSet<char> = HashSet::from(['\n']);
345    pub static ref HARD_MARKS: HashSet<char> =
346        HashSet::from(['\'', '"', '(', ')', '{', '}', '[', ']', '《', '》', '〈', '〉',]);
347}