mail_parser/parsers/
preview.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::borrow::Cow;
8
9use crate::decoders::html::html_to_text;
10
11pub fn preview_html<'x>(html: Cow<'_, str>, max_len: usize) -> Cow<'x, str> {
12    preview_text(html_to_text(html.as_ref()).into(), max_len)
13}
14
15pub fn preview_text<'x>(text: Cow<'_, str>, mut max_len: usize) -> Cow<'x, str> {
16    if text.len() > max_len {
17        let add_dots = max_len > 6;
18        if add_dots {
19            max_len -= 3;
20        }
21        let mut result = String::with_capacity(max_len);
22        for ch in text.chars() {
23            if ch.len_utf8() + result.len() > max_len {
24                break;
25            }
26            result.push(ch);
27        }
28        if add_dots {
29            result.push_str("...");
30        }
31        result.into()
32    } else {
33        text.into_owned().into()
34    }
35}
36
37pub fn truncate_text<'x>(text: Cow<'_, str>, max_len: usize) -> Cow<'x, str> {
38    preview_text(text, max_len)
39}
40
41pub fn truncate_html<'x>(html: Cow<'_, str>, mut max_len: usize) -> Cow<'x, str> {
42    if html.len() > max_len {
43        let add_dots = max_len > 6;
44        if add_dots {
45            max_len -= 3;
46        }
47
48        let mut result = String::with_capacity(max_len);
49        let mut in_tag = false;
50        let mut in_comment = false;
51        let mut last_tag_end_pos = 0;
52        for (pos, ch) in html.char_indices() {
53            let mut set_last_tag = 0;
54            match ch {
55                '<' if !in_tag => {
56                    in_tag = true;
57                    if let Some("!--") = html.get(pos + 1..pos + 4) {
58                        in_comment = true;
59                    }
60                    set_last_tag = pos;
61                }
62                '>' if in_tag => {
63                    if in_comment {
64                        if let Some("--") = html.get(pos - 2..pos) {
65                            in_comment = false;
66                            in_tag = false;
67                            set_last_tag = pos + 1;
68                        }
69                    } else {
70                        in_tag = false;
71                        set_last_tag = pos + 1;
72                    }
73                }
74                _ => (),
75            }
76            if ch.len_utf8() + pos > max_len {
77                result.push_str(
78                    &html[0..if (in_tag || set_last_tag > 0) && last_tag_end_pos > 0 {
79                        last_tag_end_pos
80                    } else {
81                        pos
82                    }],
83                );
84                if add_dots {
85                    result.push_str("...");
86                }
87                break;
88            } else if set_last_tag > 0 {
89                last_tag_end_pos = set_last_tag;
90            }
91        }
92        result.into()
93    } else {
94        html.into_owned().into()
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    #[test]
101    fn text_preview() {
102        let text_1 = concat!(
103            "J'interdis aux marchands de vanter trop leurs marchandises. ",
104            "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
105            "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
106            "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
107            "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
108            "— Antoine de Saint-Exupéry, Citadelle (1948)"
109        );
110        let text_2 = concat!(
111            "長沮、桀溺耦而耕,孔子過之,使子路問津焉。長沮曰:「夫執輿者為誰?」",
112            "子路曰:「為孔丘。」曰:「是魯孔丘與?」曰:「是也。」曰:「是知津矣。」問於桀溺,",
113            "桀溺曰:「子為誰?」曰:「為仲由。」曰:「是魯孔丘之徒與?」對曰:「然。",
114            "」曰:「滔滔者天下皆是也,而誰以易之?且而與其從辟人之士也,豈若從",
115            "辟世之士哉?」耰而不輟。子路行以告。夫子憮然曰:「鳥獸不可與同群,吾非斯人之徒",
116            "與而誰與?天下有道,丘不與易也。」",
117            "子路從而後,遇丈人,以杖荷蓧。子路問曰:「子見夫子乎?」丈人曰:「四體不勤,",
118            "五穀不分。孰為夫子?」植其杖而芸。子路拱而立。止子路宿,殺雞為黍而食之,見其二",
119            "子焉。明日,子路行以告。子曰:「隱者也。」使子路反見之。至則行矣。子路曰:「",
120            "不仕無義。長幼之節,不可廢也;君臣之義,如之何其廢之?欲潔其身,而亂大倫。君",
121            "子之仕也,行其義也。道之不行,已知之矣。」"
122        );
123
124        assert_eq!(
125            super::truncate_text(text_1.into(), 110),
126            "J'interdis aux marchands de vanter trop leurs marchandises. Car ils se fontvite pédagogues et t'enseignent..."
127        );
128
129        assert_eq!(
130            super::truncate_text(text_2.into(), 110),
131            "長沮、桀溺耦而耕,孔子過之,使子路問津焉。長沮曰:「夫執輿者為誰?」子..."
132        );
133    }
134
135    #[test]
136    fn html_truncate() {
137        for (html, expected_result) in [
138            (
139                "<html>hello<br/>world<br/></html>",
140                "<html>hello<br/>world...",
141            ),
142            ("<html>using &lt;><br/></html>", "<html>using &lt;><br/>..."),
143            (
144                "test <not br/>tag<br />test <not br/>tag<br />",
145                "test <not br/>tag...",
146            ),
147            (
148                "<>< ><tag\n/>>hello    world< br \n />",
149                "<>< ><tag\n/>>hello    ...",
150            ),
151            (
152                concat!(
153                    "<head><title>ignore head</title><not head>xyz</not head></head>",
154                    "<h1>&lt;body&gt;</h1>"
155                ),
156                "<head><title>ignore he...",
157            ),
158            (
159                concat!(
160                    "<p>what is &heartsuit;?</p><p>&#x000DF;&Abreve;&#914;&gamma; ",
161                    "don&apos;t hurt me.</p>"
162                ),
163                "<p>what is &heartsuit;...",
164            ),
165            (
166                "<!-- <> < < < -->the actual<!--> text",
167                "<!-- <> < < < -->the a...",
168            ),
169            (
170                "   < p >  hello < / p > < p > world < / p >   !!! < br > ",
171                "   < p >  hello ...",
172            ),
173            (
174                " <p>please unsubscribe <a href=#>here</a>.</p> ",
175                " <p>please unsubscribe...",
176            ),
177        ] {
178            assert_eq!(super::truncate_html(html.into(), 25), expected_result);
179        }
180    }
181}