encoding/codec/
utf_16.rs

1// This is a part of encoding-next.
2// Copyright (c) 2013-2015, Kang Seonghoon.
3// See README.md and LICENSE.txt for details.
4
5//! UTF-16.
6
7use crate::types::*;
8use crate::util::as_char;
9use std::convert::Into;
10
11/// UTF-16 (UCS Transformation Format, 16-bit), in little endian.
12///
13/// This is a Unicode encoding where one codepoint may use
14/// 2 (up to U+FFFF) or 4 bytes (up to U+10FFFF) depending on its value.
15/// It uses a "surrogate" mechanism to encode non-BMP codepoints,
16/// which are represented as a pair of lower surrogate and upper surrogate characters.
17/// In this effect, surrogate characters (U+D800..DFFF) cannot appear alone
18/// and cannot be included in a valid Unicode string.
19#[derive(Clone, Copy)]
20pub struct UTF16LEEncoding;
21
22impl Encoding for UTF16LEEncoding {
23    fn name(&self) -> &'static str {
24        "utf-16le"
25    }
26    fn whatwg_name(&self) -> Option<&'static str> {
27        Some("utf-16le")
28    }
29    fn raw_encoder(&self) -> Box<dyn RawEncoder> {
30        UTF16LEEncoder::new()
31    }
32    fn raw_decoder(&self) -> Box<dyn RawDecoder> {
33        UTF16LEDecoder::new()
34    }
35}
36
37/// UTF-16 (UCS Transformation Format, 16-bit), in big endian.
38///
39/// This is a Unicode encoding where one codepoint may use
40/// 2 (up to U+FFFF) or 4 bytes (up to U+10FFFF) depending on its value.
41/// It uses a "surrogate" mechanism to encode non-BMP codepoints,
42/// which are represented as a pair of lower surrogate and upper surrogate characters.
43/// In this effect, surrogate characters (U+D800..DFFF) cannot appear alone
44/// and cannot be included in a valid Unicode string.
45#[derive(Clone, Copy)]
46pub struct UTF16BEEncoding;
47
48impl Encoding for UTF16BEEncoding {
49    fn name(&self) -> &'static str {
50        "utf-16be"
51    }
52    fn whatwg_name(&self) -> Option<&'static str> {
53        Some("utf-16be")
54    }
55    fn raw_encoder(&self) -> Box<dyn RawEncoder> {
56        UTF16BEEncoder::new()
57    }
58    fn raw_decoder(&self) -> Box<dyn RawDecoder> {
59        UTF16BEDecoder::new()
60    }
61}
62
63/// A shared encoder logic for UTF-16.
64#[derive(Clone, Copy)]
65struct UTF16Encoder;
66
67impl UTF16Encoder {
68    fn raw_feed<F>(
69        &mut self,
70        input: &str,
71        output: &mut dyn ByteWriter,
72        write_two_bytes: F,
73    ) -> (usize, Option<CodecError>)
74    where
75        F: Fn(&mut dyn ByteWriter, u8, u8),
76    {
77        output.writer_hint(input.len() * 2);
78
79        for ch in input.chars() {
80            match ch {
81                '\u{0}'..='\u{d7ff}' | '\u{e000}'..='\u{ffff}' => {
82                    let ch = ch as u32;
83                    write_two_bytes(output, (ch >> 8) as u8, (ch & 0xff) as u8);
84                }
85                '\u{10000}'..='\u{10ffff}' => {
86                    let ch = ch as u32 - 0x10000;
87                    write_two_bytes(output, (0xd8 | (ch >> 18)) as u8, ((ch >> 10) & 0xff) as u8);
88                    write_two_bytes(output, (0xdc | ((ch >> 8) & 0x3)) as u8, (ch & 0xff) as u8);
89                }
90            }
91        }
92        (input.len(), None)
93    }
94}
95
96/// An encoder for UTF-16 in little endian.
97#[derive(Clone, Copy)]
98pub struct UTF16LEEncoder;
99
100impl UTF16LEEncoder {
101    #[allow(clippy::new_ret_no_self)]
102    fn new() -> Box<dyn RawEncoder> {
103        Box::new(UTF16LEEncoder)
104    }
105}
106
107impl RawEncoder for UTF16LEEncoder {
108    fn from_self(&self) -> Box<dyn RawEncoder> {
109        UTF16LEEncoder::new()
110    }
111    fn raw_feed(
112        &mut self,
113        input: &str,
114        output: &mut dyn ByteWriter,
115    ) -> (usize, Option<CodecError>) {
116        UTF16Encoder.raw_feed(
117            input,
118            output,
119            |output: &mut dyn ByteWriter, msb: u8, lsb: u8| {
120                output.write_byte(lsb);
121                output.write_byte(msb);
122            },
123        )
124    }
125    fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
126        None
127    }
128}
129
130/// An encoder for UTF-16 in big endian.
131#[derive(Clone, Copy)]
132pub struct UTF16BEEncoder;
133
134impl UTF16BEEncoder {
135    #[allow(clippy::new_ret_no_self)]
136    fn new() -> Box<dyn RawEncoder> {
137        Box::new(UTF16BEEncoder)
138    }
139}
140
141impl RawEncoder for UTF16BEEncoder {
142    fn from_self(&self) -> Box<dyn RawEncoder> {
143        UTF16BEEncoder::new()
144    }
145    fn raw_feed(
146        &mut self,
147        input: &str,
148        output: &mut dyn ByteWriter,
149    ) -> (usize, Option<CodecError>) {
150        UTF16Encoder.raw_feed(
151            input,
152            output,
153            |output: &mut dyn ByteWriter, msb: u8, lsb: u8| {
154                output.write_byte(msb);
155                output.write_byte(lsb);
156            },
157        )
158    }
159    fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
160        None
161    }
162}
163
164/// A shared decoder logic for UTF-16.
165#[derive(Clone, Copy)]
166struct UTF16Decoder {
167    leadbyte: u16,
168    leadsurrogate: u16,
169}
170
171impl UTF16Decoder {
172    fn new() -> UTF16Decoder {
173        UTF16Decoder {
174            leadbyte: 0xffff,
175            leadsurrogate: 0xffff,
176        }
177    }
178
179    fn raw_feed<F>(
180        &mut self,
181        input: &[u8],
182        output: &mut dyn StringWriter,
183        concat_two_bytes: F,
184    ) -> (usize, Option<CodecError>)
185    where
186        F: Fn(u16, u8) -> u16,
187    {
188        output.writer_hint(input.len() / 2); // when every codepoint is U+0000..007F
189
190        let mut i = 0;
191        let mut processed = 0;
192        let len = input.len();
193
194        if i >= len {
195            return (processed, None);
196        }
197
198        if self.leadbyte != 0xffff {
199            let ch = concat_two_bytes(self.leadbyte, input[i]);
200            i += 1;
201            self.leadbyte = 0xffff;
202            if self.leadsurrogate != 0xffff {
203                // `ch` is lower surrogate
204                let upper = self.leadsurrogate;
205                self.leadsurrogate = 0xffff;
206                match ch {
207                    0xdc00..=0xdfff => {
208                        let ch = ((upper as u32 - 0xd800) << 10) + (ch as u32 - 0xdc00);
209                        output.write_char(as_char(ch + 0x10000));
210                        processed = i;
211                    }
212                    _ => {
213                        return (
214                            processed,
215                            Some(CodecError {
216                                upto: i as isize - 2,
217                                cause: "invalid sequence".into(),
218                            }),
219                        );
220                    }
221                }
222            } else {
223                match ch {
224                    0xd800..=0xdbff => {
225                        self.leadsurrogate = ch;
226                        // pass through
227                    }
228                    0xdc00..=0xdfff => {
229                        return (
230                            processed,
231                            Some(CodecError {
232                                upto: i as isize,
233                                cause: "invalid sequence".into(),
234                            }),
235                        );
236                    }
237                    _ => {
238                        output.write_char(as_char(ch as u32));
239                        processed = i;
240                    }
241                }
242            }
243            if i >= len {
244                return (processed, None);
245            }
246        }
247
248        if self.leadsurrogate != 0xffff {
249            i += 1;
250            if i >= len {
251                self.leadbyte = input[i - 1] as u16;
252                return (processed, None);
253            }
254            let upper = self.leadsurrogate;
255            let ch = concat_two_bytes(input[i - 1] as u16, input[i]);
256            i += 1;
257            match ch {
258                0xdc00..=0xdfff => {
259                    let ch = ((upper as u32 - 0xd800) << 10) + (ch as u32 - 0xdc00);
260                    output.write_char(as_char(ch + 0x10000));
261                }
262                _ => {
263                    self.leadbyte = 0xffff;
264                    self.leadsurrogate = 0xffff;
265                    return (
266                        processed,
267                        Some(CodecError {
268                            upto: i as isize - 2,
269                            cause: "invalid sequence".into(),
270                        }),
271                    );
272                }
273            }
274        }
275
276        self.leadbyte = 0xffff;
277        self.leadsurrogate = 0xffff;
278        processed = i;
279        while i < len {
280            i += 1;
281            if i >= len {
282                self.leadbyte = input[i - 1] as u16;
283                break;
284            }
285            let ch = concat_two_bytes(input[i - 1] as u16, input[i]);
286            match ch {
287                0xd800..=0xdbff => {
288                    i += 2;
289                    if i >= len {
290                        self.leadsurrogate = ch;
291                        if i - 1 < len {
292                            self.leadbyte = input[i - 1] as u16;
293                        }
294                        break;
295                    }
296                    let ch2 = concat_two_bytes(input[i - 1] as u16, input[i]);
297                    match ch2 {
298                        0xdc00..=0xdfff => {
299                            let ch = ((ch as u32 - 0xd800) << 10) + (ch2 as u32 - 0xdc00);
300                            output.write_char(as_char(ch + 0x10000));
301                        }
302                        _ => {
303                            return (
304                                processed,
305                                Some(CodecError {
306                                    upto: i as isize - 1,
307                                    cause: "invalid sequence".into(),
308                                }),
309                            );
310                        }
311                    }
312                }
313                0xdc00..=0xdfff => {
314                    return (
315                        processed,
316                        Some(CodecError {
317                            upto: i as isize + 1,
318                            cause: "invalid sequence".into(),
319                        }),
320                    );
321                }
322                _ => {
323                    output.write_char(as_char(ch as u32));
324                }
325            }
326            i += 1;
327            processed = i;
328        }
329        (processed, None)
330    }
331
332    fn raw_finish(&mut self, _output: &mut dyn StringWriter) -> Option<CodecError> {
333        let leadbyte = self.leadbyte;
334        let leadsurrogate = self.leadsurrogate;
335        self.leadbyte = 0xffff;
336        self.leadsurrogate = 0xffff;
337        if leadbyte != 0xffff || leadsurrogate != 0xffff {
338            Some(CodecError {
339                upto: 0,
340                cause: "incomplete sequence".into(),
341            })
342        } else {
343            None
344        }
345    }
346}
347
348/// A decoder for UTF-16 in little endian.
349#[derive(Clone, Copy)]
350struct UTF16LEDecoder {
351    inner: UTF16Decoder,
352}
353
354impl UTF16LEDecoder {
355    #[allow(clippy::new_ret_no_self)]
356    pub fn new() -> Box<dyn RawDecoder> {
357        Box::new(UTF16LEDecoder {
358            inner: UTF16Decoder::new(),
359        })
360    }
361}
362
363impl RawDecoder for UTF16LEDecoder {
364    fn from_self(&self) -> Box<dyn RawDecoder> {
365        UTF16LEDecoder::new()
366    }
367    fn raw_feed(
368        &mut self,
369        input: &[u8],
370        output: &mut dyn StringWriter,
371    ) -> (usize, Option<CodecError>) {
372        self.inner.raw_feed(input, output, |lead: u16, trail: u8| {
373            lead | ((trail as u16) << 8)
374        })
375    }
376    fn raw_finish(&mut self, output: &mut dyn StringWriter) -> Option<CodecError> {
377        self.inner.raw_finish(output)
378    }
379}
380
381/// A decoder for UTF-16 in big endian.
382#[derive(Clone, Copy)]
383struct UTF16BEDecoder {
384    inner: UTF16Decoder,
385}
386
387impl UTF16BEDecoder {
388    #[allow(clippy::new_ret_no_self)]
389    pub fn new() -> Box<dyn RawDecoder> {
390        Box::new(UTF16BEDecoder {
391            inner: UTF16Decoder::new(),
392        })
393    }
394}
395
396impl RawDecoder for UTF16BEDecoder {
397    fn from_self(&self) -> Box<dyn RawDecoder> {
398        UTF16BEDecoder::new()
399    }
400    fn raw_feed(
401        &mut self,
402        input: &[u8],
403        output: &mut dyn StringWriter,
404    ) -> (usize, Option<CodecError>) {
405        self.inner.raw_feed(input, output, |lead: u16, trail: u8| {
406            (lead << 8) | trail as u16
407        })
408    }
409    fn raw_finish(&mut self, output: &mut dyn StringWriter) -> Option<CodecError> {
410        self.inner.raw_finish(output)
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    // little endian and big endian is symmetric to each other, there's no need to test both.
417    // since big endian is easier to inspect we test UTF16BEEncoding only.
418
419    use super::UTF16BEEncoding;
420    use crate::types::*;
421
422    #[test]
423    fn test_encoder_valid() {
424        let mut e = UTF16BEEncoding.raw_encoder();
425        assert_feed_ok!(
426            e,
427            "\u{0}\
428                            \u{1}\u{02}\u{004}\u{0008}\
429                            \u{10}\u{020}\u{0040}\u{80}\
430                            \u{100}\u{0200}\u{400}\u{800}\
431                            \u{1000}\u{2000}\u{4000}\u{8000}\
432                            \u{ffff}",
433            "",
434            [
435                0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00, 0x10, 0x00, 0x20,
436                0x00, 0x40, 0x00, 0x80, 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00, 0x10, 0x00,
437                0x20, 0x00, 0x40, 0x00, 0x80, 0x00, 0xff, 0xff
438            ]
439        );
440        assert_feed_ok!(
441            e,
442            "\u{10000}\
443                            \u{10001}\u{010002}\
444                            \u{10004}\u{010008}\
445                            \u{10010}\u{010020}\
446                            \u{10040}\u{010080}\
447                            \u{10100}\u{010200}\
448                            \u{10400}\u{010800}\
449                            \u{11000}\u{012000}\
450                            \u{14000}\u{018000}\
451                            \u{20000}\u{030000}\
452                            \u{50000}\u{090000}\
453                            \u{10FFFF}",
454            "",
455            [
456                0xd8, 0x00, 0xdc, 0x00, 0xd8, 0x00, 0xdc, 0x01, 0xd8, 0x00, 0xdc, 0x02, 0xd8, 0x00,
457                0xdc, 0x04, 0xd8, 0x00, 0xdc, 0x08, 0xd8, 0x00, 0xdc, 0x10, 0xd8, 0x00, 0xdc, 0x20,
458                0xd8, 0x00, 0xdc, 0x40, 0xd8, 0x00, 0xdc, 0x80, 0xd8, 0x00, 0xdd, 0x00, 0xd8, 0x00,
459                0xde, 0x00, 0xd8, 0x01, 0xdc, 0x00, 0xd8, 0x02, 0xdc, 0x00, 0xd8, 0x04, 0xdc, 0x00,
460                0xd8, 0x08, 0xdc, 0x00, 0xd8, 0x10, 0xdc, 0x00, 0xd8, 0x20, 0xdc, 0x00, 0xd8, 0x40,
461                0xdc, 0x00, 0xd8, 0x80, 0xdc, 0x00, 0xd9, 0x00, 0xdc, 0x00, 0xda, 0x00, 0xdc, 0x00,
462                0xdb, 0xff, 0xdf, 0xff
463            ]
464        );
465        assert_finish_ok!(e, []);
466    }
467
468    #[test]
469    fn test_decoder_valid() {
470        let mut d = UTF16BEEncoding.raw_decoder();
471        assert_feed_ok!(
472            d,
473            [
474                0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00, 0x10, 0x00, 0x20,
475                0x00, 0x40, 0x00, 0x80, 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00, 0x10, 0x00,
476                0x20, 0x00, 0x40, 0x00, 0x80, 0x00, 0xff, 0xff
477            ],
478            [],
479            "\u{0}\
480                         \u{1}\u{02}\u{004}\u{0008}\
481                         \u{10}\u{020}\u{0040}\u{80}\
482                         \u{100}\u{0200}\u{400}\u{800}\
483                         \u{1000}\u{2000}\u{4000}\u{8000}\
484                         \u{ffff}"
485        );
486        assert_feed_ok!(
487            d,
488            [
489                0xd8, 0x00, 0xdc, 0x00, 0xd8, 0x00, 0xdc, 0x01, 0xd8, 0x00, 0xdc, 0x02, 0xd8, 0x00,
490                0xdc, 0x04, 0xd8, 0x00, 0xdc, 0x08, 0xd8, 0x00, 0xdc, 0x10, 0xd8, 0x00, 0xdc, 0x20,
491                0xd8, 0x00, 0xdc, 0x40, 0xd8, 0x00, 0xdc, 0x80, 0xd8, 0x00, 0xdd, 0x00, 0xd8, 0x00,
492                0xde, 0x00, 0xd8, 0x01, 0xdc, 0x00, 0xd8, 0x02, 0xdc, 0x00, 0xd8, 0x04, 0xdc, 0x00,
493                0xd8, 0x08, 0xdc, 0x00, 0xd8, 0x10, 0xdc, 0x00, 0xd8, 0x20, 0xdc, 0x00, 0xd8, 0x40,
494                0xdc, 0x00, 0xd8, 0x80, 0xdc, 0x00, 0xd9, 0x00, 0xdc, 0x00, 0xda, 0x00, 0xdc, 0x00,
495                0xdb, 0xff, 0xdf, 0xff
496            ],
497            [],
498            "\u{10000}\
499                         \u{10001}\u{010002}\
500                         \u{10004}\u{010008}\
501                         \u{10010}\u{010020}\
502                         \u{10040}\u{010080}\
503                         \u{10100}\u{010200}\
504                         \u{10400}\u{010800}\
505                         \u{11000}\u{012000}\
506                         \u{14000}\u{018000}\
507                         \u{20000}\u{030000}\
508                         \u{50000}\u{090000}\
509                         \u{10FFFF}"
510        );
511        assert_finish_ok!(d, "");
512    }
513
514    #[test]
515    fn test_decoder_valid_partial_bmp() {
516        let mut d = UTF16BEEncoding.raw_decoder();
517        assert_feed_ok!(d, [], [0x12], "");
518        assert_feed_ok!(d, [0x34], [], "\u{1234}");
519        assert_feed_ok!(d, [], [0x56], "");
520        assert_feed_ok!(d, [0x78], [], "\u{5678}");
521        assert_finish_ok!(d, "");
522
523        let mut d = UTF16BEEncoding.raw_decoder();
524        assert_feed_ok!(d, [], [0x12], "");
525        assert_feed_ok!(d, [0x34], [0x56], "\u{1234}");
526        assert_feed_ok!(d, [0x78, 0xab, 0xcd], [], "\u{5678}\u{abcd}");
527        assert_finish_ok!(d, "");
528    }
529
530    #[test]
531    fn test_decoder_valid_partial_non_bmp() {
532        let mut d = UTF16BEEncoding.raw_decoder();
533        assert_feed_ok!(d, [], [0xd8], "");
534        assert_feed_ok!(d, [], [0x08], "");
535        assert_feed_ok!(d, [], [0xdf], "");
536        assert_feed_ok!(d, [0x45], [0xd9], "\u{12345}");
537        assert_feed_ok!(d, [], [0x5e], "");
538        assert_feed_ok!(d, [], [0xdc], "");
539        assert_feed_ok!(d, [0x90], [], "\u{67890}");
540        assert_finish_ok!(d, "");
541
542        let mut d = UTF16BEEncoding.raw_decoder();
543        assert_feed_ok!(d, [], [0xd8], "");
544        assert_feed_ok!(d, [], [0x08, 0xdf], "");
545        assert_feed_ok!(d, [0x45], [0xd9, 0x5e], "\u{12345}");
546        assert_feed_ok!(d, [0xdc, 0x90], [], "\u{67890}");
547        assert_finish_ok!(d, "");
548
549        let mut d = UTF16BEEncoding.raw_decoder();
550        assert_feed_ok!(d, [], [0xd8, 0x08, 0xdf], "");
551        assert_feed_ok!(d, [0x45], [0xd9, 0x5e, 0xdc], "\u{12345}");
552        assert_feed_ok!(d, [0x90], [], "\u{67890}");
553        assert_finish_ok!(d, "");
554    }
555
556    #[test]
557    fn test_decoder_invalid_partial() {
558        let mut d = UTF16BEEncoding.raw_decoder();
559        assert_feed_ok!(d, [], [0x12], "");
560        assert_finish_err!(d, "");
561
562        let mut d = UTF16BEEncoding.raw_decoder();
563        assert_feed_ok!(d, [], [0xd8], "");
564        assert_finish_err!(d, "");
565
566        let mut d = UTF16BEEncoding.raw_decoder();
567        assert_feed_ok!(d, [], [0xd8, 0x08], "");
568        assert_finish_err!(d, "");
569
570        let mut d = UTF16BEEncoding.raw_decoder();
571        assert_feed_ok!(d, [], [0xd8, 0x08, 0xdf], "");
572        assert_finish_err!(d, "");
573    }
574
575    #[test]
576    fn test_decoder_invalid_lone_upper_surrogate() {
577        let mut d = UTF16BEEncoding.raw_decoder();
578        assert_feed_ok!(d, [], [0xd8, 0x00], "");
579        assert_feed_err!(d, [], [], [0x12, 0x34], "");
580        assert_feed_err!(d, [], [0xd8, 0x00], [0x56, 0x78], "");
581        assert_feed_ok!(d, [], [0xd8, 0x00], "");
582        assert_feed_err!(d, [], [], [0xd8, 0x00], "");
583        assert_feed_ok!(d, [], [0xd8, 0x00], "");
584        assert_finish_err!(d, "");
585
586        let mut d = UTF16BEEncoding.raw_decoder();
587        assert_feed_ok!(d, [], [0xdb, 0xff], "");
588        assert_feed_err!(d, [], [], [0x12, 0x34], "");
589        assert_feed_err!(d, [], [0xdb, 0xff], [0x56, 0x78], "");
590        assert_feed_ok!(d, [], [0xdb, 0xff], "");
591        assert_feed_err!(d, [], [], [0xdb, 0xff], "");
592        assert_feed_ok!(d, [], [0xdb, 0xff], "");
593        assert_finish_err!(d, "");
594    }
595
596    #[test]
597    fn test_decoder_invalid_lone_upper_surrogate_partial() {
598        let mut d = UTF16BEEncoding.raw_decoder();
599        assert_feed_ok!(d, [], [0xd8], "");
600        assert_feed_err!(d, [], [0x00], [0x12, 0x34], "");
601        assert_feed_ok!(d, [], [0xd8, 0x00, 0x56], "");
602        assert_feed_err!(d, -1, [], [], [0x56, 0x78], "");
603        assert_feed_ok!(d, [], [0xd8], "");
604        assert_feed_err!(d, [], [0x00], [0xd8, 0x00], "");
605        assert_feed_ok!(d, [], [0xd8, 0x00, 0xdb], "");
606        assert_feed_err!(d, -1, [], [], [0xdb, 0xff], "");
607        assert_feed_ok!(d, [], [0xd8], "");
608        assert_finish_err!(d, "");
609
610        let mut d = UTF16BEEncoding.raw_decoder();
611        assert_feed_ok!(d, [], [0xdb], "");
612        assert_feed_err!(d, [], [0xff], [0x12, 0x34], "");
613        assert_feed_ok!(d, [], [0xdb, 0xff, 0x56], "");
614        assert_feed_err!(d, -1, [], [], [0x56, 0x78], "");
615        assert_feed_ok!(d, [], [0xdb], "");
616        assert_feed_err!(d, [], [0xff], [0xdb, 0xff], "");
617        assert_feed_ok!(d, [], [0xdb, 0xff, 0xd8], "");
618        assert_feed_err!(d, -1, [], [], [0xd8, 0x00], "");
619        assert_feed_ok!(d, [], [0xdb], "");
620        assert_finish_err!(d, "");
621    }
622
623    #[test]
624    fn test_decoder_invalid_lone_lower_surrogate() {
625        let mut d = UTF16BEEncoding.raw_decoder();
626        assert_feed_err!(d, [], [0xdc, 0x00], [], "");
627        assert_feed_err!(d, [0x12, 0x34], [0xdc, 0x00], [0x56, 0x78], "\u{1234}");
628        assert_finish_ok!(d, "");
629
630        let mut d = UTF16BEEncoding.raw_decoder();
631        assert_feed_err!(d, [], [0xdf, 0xff], [], "");
632        assert_feed_err!(d, [0x12, 0x34], [0xdf, 0xff], [0x56, 0x78], "\u{1234}");
633        assert_finish_ok!(d, "");
634    }
635
636    #[test]
637    fn test_decoder_invalid_lone_lower_surrogate_partial() {
638        let mut d = UTF16BEEncoding.raw_decoder();
639        assert_feed_ok!(d, [], [0xdc], "");
640        assert_feed_err!(d, [], [0x00], [], "");
641        assert_feed_ok!(d, [0x12, 0x34], [0xdc], "\u{1234}");
642        assert_feed_err!(d, [], [0x00], [0x56, 0x78], "");
643        assert_finish_ok!(d, "");
644
645        assert_feed_ok!(d, [], [0xdf], "");
646        assert_feed_err!(d, [], [0xff], [], "");
647        assert_feed_ok!(d, [0x12, 0x34], [0xdf], "\u{1234}");
648        assert_feed_err!(d, [], [0xff], [0x56, 0x78], "");
649        assert_finish_ok!(d, "");
650    }
651
652    #[test]
653    fn test_decoder_invalid_one_byte_before_finish() {
654        let mut d = UTF16BEEncoding.raw_decoder();
655        assert_feed_ok!(d, [], [0x12], "");
656        assert_finish_err!(d, "");
657
658        let mut d = UTF16BEEncoding.raw_decoder();
659        assert_feed_ok!(d, [0x12, 0x34], [0x56], "\u{1234}");
660        assert_finish_err!(d, "");
661    }
662
663    #[test]
664    fn test_decoder_invalid_three_bytes_before_finish() {
665        let mut d = UTF16BEEncoding.raw_decoder();
666        assert_feed_ok!(d, [], [0xd8, 0x00, 0xdc], "");
667        assert_finish_err!(d, "");
668
669        let mut d = UTF16BEEncoding.raw_decoder();
670        assert_feed_ok!(d, [0x12, 0x34], [0xd8, 0x00, 0xdc], "\u{1234}");
671        assert_finish_err!(d, "");
672    }
673
674    #[test]
675    fn test_decoder_invalid_three_bytes_before_finish_partial() {
676        let mut d = UTF16BEEncoding.raw_decoder();
677        assert_feed_ok!(d, [], [0xd8], "");
678        assert_feed_ok!(d, [], [0x00], "");
679        assert_feed_ok!(d, [], [0xdc], "");
680        assert_finish_err!(d, "");
681
682        let mut d = UTF16BEEncoding.raw_decoder();
683        assert_feed_ok!(d, [0x12, 0x34], [0xd8], "\u{1234}");
684        assert_feed_ok!(d, [], [0x00, 0xdc], "");
685        assert_finish_err!(d, "");
686
687        let mut d = UTF16BEEncoding.raw_decoder();
688        assert_feed_ok!(d, [0x12, 0x34], [0xd8, 0x00], "\u{1234}");
689        assert_feed_ok!(d, [], [0xdc], "");
690        assert_finish_err!(d, "");
691    }
692
693    #[test]
694    fn test_decoder_feed_after_finish() {
695        let mut d = UTF16BEEncoding.raw_decoder();
696        assert_feed_ok!(d, [0x12, 0x34], [0x12], "\u{1234}");
697        assert_finish_err!(d, "");
698        assert_feed_ok!(d, [0x12, 0x34], [], "\u{1234}");
699        assert_finish_ok!(d, "");
700
701        let mut d = UTF16BEEncoding.raw_decoder();
702        assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8, 0x08, 0xdf], "\u{12345}");
703        assert_finish_err!(d, "");
704        assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8, 0x08], "\u{12345}");
705        assert_finish_err!(d, "");
706        assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8], "\u{12345}");
707        assert_finish_err!(d, "");
708        assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [], "\u{12345}");
709        assert_finish_ok!(d, "");
710    }
711}