sourceannot/snippet/build/
latin1.rs

1use alloc::format;
2use alloc::string::String;
3
4use super::SourceSnippetBuilder;
5use crate::SourceSnippet;
6
7impl SourceSnippet {
8    /// Creates a snippet from a Latin-1 (ISO 8859-1) source.
9    ///
10    /// "\n" and "\r\n" are treated as line breaks.
11    ///
12    /// Control characters (except tabs and line breaks) are represented as
13    /// `<XX>` as alternative text.
14    pub fn build_from_latin1(start_line: usize, source: &[u8], tab_width: usize) -> Self {
15        Self::build_from_latin1_ex(start_line, source, |chr| {
16            if chr == b'\t' {
17                (false, " ".repeat(tab_width))
18            } else {
19                (true, format!("<{chr:02X}>"))
20            }
21        })
22    }
23
24    /// Creates a snippet from a Latin-1 (ISO 8859-1) source.
25    ///
26    /// "\n" and "\r\n" are treated as line breaks.
27    ///
28    /// `on_control` is used to handle control characters (that are not line
29    /// breaks). `on_control` also returns a boolean to indicate if the text
30    /// should be rendered as alternative.
31    pub fn build_from_latin1_ex<FnCtrl>(
32        start_line: usize,
33        source: &[u8],
34        mut on_control: FnCtrl,
35    ) -> Self
36    where
37        FnCtrl: FnMut(u8) -> (bool, String),
38    {
39        let mut snippet = SourceSnippetBuilder::new(start_line);
40
41        let mut chars = source.iter();
42        while let Some(&chr) = chars.next() {
43            if chr == b'\r' && chars.as_slice().starts_with(b"\n") {
44                snippet.next_line(&[1, 1]);
45                chars.next().unwrap();
46            } else if chr == b'\n' {
47                snippet.next_line(&[1]);
48            } else {
49                let orig_len = 1;
50
51                if matches!(chr, b' '..=b'~' | 0xA0..=0xFF) {
52                    // The width of all printable Latin-1 characters is 1.
53                    let chr_width = 1;
54                    snippet.push_char(chr.into(), chr_width, orig_len, false);
55                } else {
56                    let (alt, text) = on_control(chr);
57                    snippet.push_text(&text, orig_len, alt);
58                }
59            }
60        }
61
62        snippet.finish()
63    }
64}
65
66#[cfg(test)]
67mod tests {
68    use alloc::format;
69
70    use crate::range_set::RangeSet;
71    use crate::snippet::{SourceLine, SourceSnippet, SourceUnitMeta};
72
73    fn meta(width: usize, len: usize) -> SourceUnitMeta {
74        SourceUnitMeta::new(width, len)
75    }
76
77    #[test]
78    fn test_simple_1() {
79        let source = b"123\n456";
80        let snippet = SourceSnippet::build_from_latin1_ex(0, source, |_| unreachable!());
81
82        assert_eq!(snippet.start_line, 0);
83        assert_eq!(snippet.lines.len(), 2);
84        assert_eq!(
85            snippet.lines,
86            [
87                SourceLine {
88                    text: "123".into(),
89                    alts: RangeSet::new(),
90                    width: 3,
91                },
92                SourceLine {
93                    text: "456".into(),
94                    alts: RangeSet::new(),
95                    width: 3,
96                },
97            ],
98        );
99        assert_eq!(snippet.line_map, [4]);
100        assert_eq!(
101            snippet.metas,
102            [
103                meta(1, 1),
104                meta(1, 1),
105                meta(1, 1),
106                meta(1, 0),
107                meta(1, 1),
108                meta(1, 1),
109                meta(1, 1),
110            ],
111        );
112    }
113
114    #[test]
115    fn test_simple_2() {
116        let source = b"123\n456\n";
117        let snippet = SourceSnippet::build_from_latin1_ex(0, source, |_| unreachable!());
118
119        assert_eq!(snippet.start_line, 0);
120        assert_eq!(snippet.lines.len(), 3);
121        assert_eq!(
122            snippet.lines,
123            [
124                SourceLine {
125                    text: "123".into(),
126                    alts: RangeSet::new(),
127                    width: 3,
128                },
129                SourceLine {
130                    text: "456".into(),
131                    alts: RangeSet::new(),
132                    width: 3,
133                },
134                SourceLine {
135                    text: "".into(),
136                    alts: RangeSet::new(),
137                    width: 0,
138                },
139            ],
140        );
141        assert_eq!(snippet.line_map, [4, 8]);
142        assert_eq!(
143            snippet.metas,
144            [
145                meta(1, 1),
146                meta(1, 1),
147                meta(1, 1),
148                meta(1, 0),
149                meta(1, 1),
150                meta(1, 1),
151                meta(1, 1),
152                meta(1, 0),
153            ],
154        );
155    }
156
157    #[test]
158    fn test_non_ascii_chr() {
159        let source = b"123\n4\xFF6";
160        let snippet = SourceSnippet::build_from_latin1_ex(0, source, |_| unreachable!());
161
162        assert_eq!(snippet.start_line, 0);
163        assert_eq!(snippet.lines.len(), 2);
164        assert_eq!(
165            snippet.lines,
166            [
167                SourceLine {
168                    text: "123".into(),
169                    alts: RangeSet::new(),
170                    width: 3,
171                },
172                SourceLine {
173                    text: "4\u{FF}6".into(),
174                    alts: RangeSet::new(),
175                    width: 3,
176                },
177            ],
178        );
179        assert_eq!(snippet.line_map, [4]);
180        assert_eq!(
181            snippet.metas,
182            [
183                meta(1, 1),
184                meta(1, 1),
185                meta(1, 1),
186                meta(1, 0),
187                meta(1, 1),
188                meta(1, 2),
189                meta(1, 1),
190            ],
191        );
192    }
193
194    #[test]
195    fn test_control_chr() {
196        let source = b"123\n4\x806";
197        let snippet = SourceSnippet::build_from_latin1(0, source, 4);
198
199        assert_eq!(snippet.start_line, 0);
200        assert_eq!(snippet.lines.len(), 2);
201        assert_eq!(
202            snippet.lines,
203            [
204                SourceLine {
205                    text: "123".into(),
206                    alts: RangeSet::new(),
207                    width: 3,
208                },
209                SourceLine {
210                    text: "4<80>6".into(),
211                    alts: RangeSet::from(1..=4),
212                    width: 6,
213                },
214            ],
215        );
216        assert_eq!(snippet.line_map, [4]);
217        assert_eq!(
218            snippet.metas,
219            [
220                meta(1, 1),
221                meta(1, 1),
222                meta(1, 1),
223                meta(1, 0),
224                meta(1, 1),
225                meta(4, 4),
226                meta(1, 1),
227            ],
228        );
229    }
230
231    #[test]
232    fn test_crlf() {
233        let source = b"123\r\n4\r6\r\n";
234        let snippet =
235            SourceSnippet::build_from_latin1_ex(0, source, |chr| (true, format!("<{chr:02X}>")));
236
237        assert_eq!(snippet.start_line, 0);
238        assert_eq!(snippet.lines.len(), 3);
239        assert_eq!(
240            snippet.lines,
241            [
242                SourceLine {
243                    text: "123".into(),
244                    alts: RangeSet::new(),
245                    width: 3,
246                },
247                SourceLine {
248                    text: "4<0D>6".into(),
249                    alts: RangeSet::from(1..=4),
250                    width: 6,
251                },
252                SourceLine {
253                    text: "".into(),
254                    alts: RangeSet::new(),
255                    width: 0,
256                },
257            ],
258        );
259        assert_eq!(snippet.line_map, [5, 10]);
260        assert_eq!(
261            snippet.metas,
262            [
263                meta(1, 1),
264                meta(1, 1),
265                meta(1, 1),
266                meta(1, 0),
267                meta(1, 0),
268                meta(1, 1),
269                meta(4, 4),
270                meta(1, 1),
271                meta(1, 0),
272                meta(1, 0),
273            ],
274        );
275    }
276
277    #[test]
278    fn test_tabs() {
279        let source = b"123\n\t456";
280        let snippet = SourceSnippet::build_from_latin1(0, source, 4);
281
282        assert_eq!(snippet.start_line, 0);
283        assert_eq!(snippet.lines.len(), 2);
284        assert_eq!(
285            snippet.lines,
286            [
287                SourceLine {
288                    text: "123".into(),
289                    alts: RangeSet::new(),
290                    width: 3,
291                },
292                SourceLine {
293                    text: "    456".into(),
294                    alts: RangeSet::new(),
295                    width: 7,
296                },
297            ],
298        );
299        assert_eq!(snippet.line_map, [4]);
300        assert_eq!(
301            snippet.metas,
302            [
303                meta(1, 1),
304                meta(1, 1),
305                meta(1, 1),
306                meta(1, 0),
307                meta(4, 4),
308                meta(1, 1),
309                meta(1, 1),
310                meta(1, 1),
311            ],
312        );
313    }
314}