1fn is_re_prefix(prefix: &str) -> bool {
8 hashify::tiny_set! {prefix.as_bytes(),
9 "re",
10 "res",
11 "sv",
12 "antw",
13 "ref",
14 "aw",
15 "απ",
16 "השב",
17 "vá",
18 "r",
19 "rif",
20 "bls",
21 "odp",
22 "ynt",
23 "atb",
24 "رد",
25 "回复",
26 "转发",
27 }
28}
29
30fn is_fwd_prefix(prefix: &str) -> bool {
31 hashify::tiny_set! {prefix.as_bytes(),
32 "fwd",
33 "fw",
34 "rv",
35 "enc",
36 "vs",
37 "doorst",
38 "vl",
39 "tr",
40 "wg",
41 "πρθ",
42 "הועבר",
43 "továbbítás",
44 "i",
45 "fs",
46 "trs",
47 "vb",
48 "pd",
49 "i̇lt",
50 "yml",
51 "إعادة توجيه",
52 "回覆",
53 "轉寄",
54 }
55}
56
57pub fn thread_name(text: &str) -> &str {
58 let mut token_start = 0;
59 let mut token_end = 0;
60
61 let mut thread_name_start = 0;
62 let mut fwd_start = 0;
63 let mut fwd_end = 0;
64 let mut last_blob_end = 0;
65
66 let mut in_blob = false;
67 let mut in_blob_ignore = false;
68 let mut seen_header = false;
69 let mut seen_blob_header = false;
70 let mut token_found = false;
71
72 for (pos, ch) in text.char_indices() {
73 match ch {
74 '[' => {
75 if !in_blob {
76 if token_found {
77 if token_end == 0 {
78 token_end = pos;
79 }
80 let prefix = text[token_start..token_end].to_lowercase();
81 if is_re_prefix(prefix.as_ref()) || is_fwd_prefix(prefix.as_ref()) {
82 seen_header = true;
83 } else {
84 break;
85 }
86 }
87 token_found = false;
88 in_blob = true;
89 } else {
90 break;
91 }
92 }
93 ']' if in_blob => {
94 if seen_blob_header && token_found {
95 fwd_start = token_start;
96 fwd_end = pos;
97 }
98 if !seen_header {
99 last_blob_end = pos + 1;
100 }
101 in_blob = false;
102 token_found = false;
103 seen_blob_header = false;
104 in_blob_ignore = false;
105 }
106 ':' if !in_blob => {
107 if (seen_header && token_found) || (!seen_header && !token_found) {
108 break;
109 } else if !seen_header {
110 if token_end == 0 {
111 token_end = pos;
112 }
113 let prefix = text[token_start..token_end].to_lowercase();
114 if !is_re_prefix(prefix.as_ref()) && !is_fwd_prefix(prefix.as_ref()) {
115 break;
116 }
117 } else {
118 seen_header = false;
119 }
120 thread_name_start = pos + 1;
121 token_found = false;
122 }
123 ':' if in_blob && !in_blob_ignore => {
124 if token_end == 0 {
125 token_end = pos;
126 }
127
128 let prefix = text[token_start..token_end].to_lowercase();
129 if is_fwd_prefix(prefix.as_ref()) {
130 token_found = false;
131 seen_blob_header = true;
132 } else if seen_blob_header && is_re_prefix(prefix.as_ref()) {
133 token_found = false;
134 } else {
135 in_blob_ignore = true;
136 }
137 }
138 _ if ch.is_whitespace() => {
139 if token_end == 0 {
140 token_end = pos;
141 }
142 }
143 _ => {
144 if !token_found {
145 token_start = pos;
146 token_end = 0;
147 token_found = true;
148 } else if !in_blob && pos - token_start > 21 {
149 break;
150 }
151 }
152 }
153 }
154
155 if last_blob_end > thread_name_start
156 || (fwd_start > 0 && last_blob_end > fwd_start && fwd_start > thread_name_start)
157 {
158 let result = trim_trailing_fwd(&text[last_blob_end..]);
159 if !result.is_empty() {
160 return result;
161 }
162 }
163
164 if fwd_start > 0 && thread_name_start < fwd_start {
165 let result = trim_trailing_fwd(&text[fwd_start..fwd_end]);
166 if !result.is_empty() {
167 return result;
168 }
169 }
170
171 trim_trailing_fwd(&text[thread_name_start..])
172}
173
174pub fn trim_trailing_fwd(text: &str) -> &str {
175 let mut in_parentheses = false;
176 let mut trim_end = true;
177 let mut end_found = false;
178
179 let mut text_start = 0;
180 let mut text_end = text.len();
181 let mut fwd_end = 0;
182
183 for (pos, ch) in text.char_indices().rev() {
184 match ch {
185 '(' if !end_found => {
186 if in_parentheses {
187 in_parentheses = false;
188 if fwd_end - pos > 2
189 && is_fwd_prefix(text[pos + 1..fwd_end].to_lowercase().as_ref())
190 {
191 text_end = pos;
192 trim_end = true;
193 continue;
194 }
195 }
196 end_found = true;
197 }
198 ')' if !end_found => {
199 if !in_parentheses {
200 in_parentheses = true;
201 fwd_end = pos;
202 } else {
203 end_found = true;
204 }
205 }
206 _ if ch.is_whitespace() => {
207 if trim_end {
208 text_end = pos;
209 }
210 continue;
211 }
212 _ => {
213 if !in_parentheses && !end_found {
214 end_found = true;
215 }
216 }
217 }
218
219 if trim_end {
220 trim_end = false;
221 }
222 text_start = pos;
223 }
224
225 if text_end >= text_start {
226 &text[text_start..text_end]
227 } else {
228 ""
229 }
230}
231
232#[cfg(test)]
233mod tests {
234 use crate::parsers::fields::thread::{thread_name, trim_trailing_fwd};
235
236 #[test]
237 fn parse_thread_name() {
238 let tests = [
239 ("re: hello", "hello"),
240 ("re:re: hello", "hello"),
241 ("re:fwd: hello", "hello"),
242 ("fwd[5]:re[5]: hello", "hello"),
243 ("fwd[99]: re[40]: hello", "hello"),
244 (": hello", ": hello"),
245 ("z: hello", "z: hello"),
246 ("re:: hello", ": hello"),
247 ("[10] hello", "hello"),
248 ("fwd[a]: hello", "hello"),
249 ("re:", ""),
250 ("re::", ":"),
251 ("", ""),
252 (" ", ""),
253 ("回复: 轉寄: 轉寄", "轉寄"),
254 ("aw[50]: wg: aw[1]: hallo", "hallo"),
255 ("res: rv: enc: továbbítás: ", ""),
256 ("[fwd: hello world]", "hello world"),
257 ("re: enc: re[5]: [fwd: hello world]", "hello world"),
258 ("[fwd: re: fw: hello world]", "hello world"),
259 ("[fwd: hello world]: another text", ": another text"),
260 ("[fwd: re: fwd:] another text", "another text"),
261 ("[hello world]", "[hello world]"),
262 ("re: fwd[9]: [hello world]", "[hello world]"),
263 ("[mailing-list] hello world", "hello world"),
264 ("[mailing-list] re: hello world", "hello world"),
265 ("[mailing-list] wg[8]:re: hello world", "hello world"),
266 ("hello [world]", "hello [world]"),
267 (" [hello] [world] ", "[hello] [world]"),
268 ("[mailing-list] hello [world]", "hello [world]"),
269 ("[hello [world]", "[hello [world]"),
270 ("[]hello [world]", "hello [world]"),
271 ("[fwd: re: re:] fwd[6]:re: fw:", ""),
272 ("[fwd hello] world hello", "world hello"),
273 ("[fwd: مرحبا بالعالم]", "مرحبا بالعالم"),
274 ("[fwd: hello world] مرحبا بالعالم", "مرحبا بالعالم"),
275 (" hello world ", "hello world"),
276 (
277 "[mailing-list] wg[8]:re: hello world (fwd)(fwd)",
278 "hello world",
279 ),
280 ("[fwd: re: fw: hello world (fwd)]", "hello world"),
281 (
282 "res: rv: enc: továbbítás: hello world (doorst)",
283 "hello world",
284 ),
285 ("[fwd: re: re: (fwd)] fwd[6]:re: fw: (fwd)", ""),
286 ];
287
288 for (input, expected) in tests {
289 assert_eq!(thread_name(input), expected, "{input:?}");
290 }
291 }
292
293 #[test]
294 fn parse_trail_fwd() {
295 let tests = [
296 ("hello (fwd)", "hello"),
297 (" hello (fwd)(fwd)", "hello"),
298 ("hello (wg) (fwd) (fwd)", "hello"),
299 ("(fwd)(fwd)", ""),
300 ("(fwd)hello(fwd)", "(fwd)hello"),
301 (" hello ", "hello"),
302 (" hello world ", "hello world"),
303 ("", ""),
304 (" ", ""),
305 ("hello ()(fwd)", "hello ()"),
306 ("(hello)", "(hello)"),
307 ("hello () (fwd) ()(fwd)", "hello () (fwd) ()"),
308 (")(", ")("),
309 (" 你好世界(fwd) ", "你好世界"),
310 ("你好世界 (回覆)", "你好世界"),
311 ("hello(fwd", "hello(fwd"),
312 ("hello(fwd))", "hello(fwd))"),
313 ];
314
315 for (input, expected) in tests {
316 assert_eq!(trim_trailing_fwd(input), expected, "{input:?}");
317 }
318 }
319}