1use self::ISO2022JPState::{Katakana, Lead, ASCII};
8use crate::index_japanese as index;
9use crate::types::*;
10use crate::util::StrCharIndex;
11use std::convert::Into;
12use std::default::Default;
13
14#[derive(Clone, Copy)]
30pub struct EUCJPEncoding;
31
32impl Encoding for EUCJPEncoding {
33 fn name(&self) -> &'static str {
34 "euc-jp"
35 }
36 fn whatwg_name(&self) -> Option<&'static str> {
37 Some("euc-jp")
38 }
39 fn raw_encoder(&self) -> Box<dyn RawEncoder> {
40 EUCJPEncoder::new()
41 }
42 fn raw_decoder(&self) -> Box<dyn RawDecoder> {
43 EUCJP0212Decoder::new()
44 }
45}
46
47#[derive(Clone, Copy)]
49pub struct EUCJPEncoder;
50
51impl EUCJPEncoder {
52 #[allow(clippy::new_ret_no_self)]
53 pub fn new() -> Box<dyn RawEncoder> {
54 Box::new(EUCJPEncoder)
55 }
56}
57
58impl RawEncoder for EUCJPEncoder {
59 fn from_self(&self) -> Box<dyn RawEncoder> {
60 EUCJPEncoder::new()
61 }
62 fn is_ascii_compatible(&self) -> bool {
63 true
64 }
65
66 fn raw_feed(
67 &mut self,
68 input: &str,
69 output: &mut dyn ByteWriter,
70 ) -> (usize, Option<CodecError>) {
71 output.writer_hint(input.len());
72
73 for ((i, j), ch) in input.index_iter() {
74 match ch {
75 '\u{0}'..='\u{7f}' => {
76 output.write_byte(ch as u8);
77 }
78 '\u{a5}' => {
79 output.write_byte(0x5c);
80 }
81 '\u{203e}' => {
82 output.write_byte(0x7e);
83 }
84 '\u{ff61}'..='\u{ff9f}' => {
85 output.write_byte(0x8e);
86 output.write_byte((ch as usize - 0xff61 + 0xa1) as u8);
87 }
88 _ => {
89 let ptr = index::jis0208::backward(ch as u32);
90 if ptr == 0xffff {
91 return (
92 i,
93 Some(CodecError {
94 upto: j as isize,
95 cause: "unrepresentable character".into(),
96 }),
97 );
98 } else {
99 let lead = ptr / 94 + 0xa1;
100 let trail = ptr % 94 + 0xa1;
101 output.write_byte(lead as u8);
102 output.write_byte(trail as u8);
103 }
104 }
105 }
106 }
107 (input.len(), None)
108 }
109
110 fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
111 None
112 }
113}
114
115#[derive(Clone, Copy)]
117struct EUCJP0212Decoder {
118 st: eucjp::State,
119}
120
121impl EUCJP0212Decoder {
122 #[allow(clippy::new_ret_no_self)]
123 pub fn new() -> Box<dyn RawDecoder> {
124 Box::new(EUCJP0212Decoder {
125 st: Default::default(),
126 })
127 }
128}
129
130impl RawDecoder for EUCJP0212Decoder {
131 fn from_self(&self) -> Box<dyn RawDecoder> {
132 EUCJP0212Decoder::new()
133 }
134 fn is_ascii_compatible(&self) -> bool {
135 true
136 }
137
138 fn raw_feed(
139 &mut self,
140 input: &[u8],
141 output: &mut dyn StringWriter,
142 ) -> (usize, Option<CodecError>) {
143 let (st, processed, err) = eucjp::raw_feed(self.st, input, output, &());
144 self.st = st;
145 (processed, err)
146 }
147
148 fn raw_finish(&mut self, output: &mut dyn StringWriter) -> Option<CodecError> {
149 let (st, err) = eucjp::raw_finish(self.st, output, &());
150 self.st = st;
151 err
152 }
153}
154
155stateful_decoder! {
156 module eucjp;
157
158 internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 {
159 use crate::index_japanese as index;
160
161 let lead = lead as u16;
162 let trail = trail as u16;
163 let index = match (lead, trail) {
164 (0xa1..=0xfe, 0xa1..=0xfe) => (lead - 0xa1) * 94 + trail - 0xa1,
165 _ => 0xffff,
166 };
167 index::jis0208::forward(index)
168 }
169
170 internal pub fn map_two_0212_bytes(lead: u8, trail: u8) -> u32 {
171 use crate::index_japanese as index;
172
173 let lead = lead as u16;
174 let trail = trail as u16;
175 let index = match (lead, trail) {
176 (0xa1..=0xfe, 0xa1..=0xfe) => (lead - 0xa1) * 94 + trail - 0xa1,
177 _ => 0xffff,
178 };
179 index::jis0212::forward(index)
180 }
181
182initial:
183 state S0(ctx: Context) {
185 case b @ 0x00..=0x7f => ctx.emit(b as u32);
186 case 0x8e => S1(ctx);
187 case 0x8f => S2(ctx);
188 case b @ 0xa1..=0xfe => S3(ctx, b);
189 case _ => ctx.err("invalid sequence");
190 }
191
192transient:
193 state S1(ctx: Context) {
195 case b @ 0xa1..=0xdf => ctx.emit(0xff61 + b as u32 - 0xa1);
196 case 0xa1..=0xfe => ctx.err("invalid sequence");
197 case _ => ctx.backup_and_err(1, "invalid sequence");
198 }
199
200 state S2(ctx: Context) {
203 case b @ 0xa1..=0xfe => S4(ctx, b);
204 case _ => ctx.backup_and_err(1, "invalid sequence");
205 }
206
207 state S3(ctx: Context, lead: u8) {
210 case b @ 0xa1..=0xfe => match map_two_0208_bytes(lead, b) {
211 0xffff => ctx.err("invalid sequence"),
213 ch => ctx.emit(ch as u32)
214 };
215 case _ => ctx.backup_and_err(1, "invalid sequence");
216 }
217
218 state S4(ctx: Context, lead: u8) {
221 case b @ 0xa1..=0xfe => match map_two_0212_bytes(lead, b) {
222 0xffff => ctx.err("invalid sequence"),
224 ch => ctx.emit(ch as u32)
225 };
226 case _ => ctx.backup_and_err(1, "invalid sequence");
227 }
228}
229
230#[cfg(test)]
231mod eucjp_tests {
232 extern crate test;
233 use super::EUCJPEncoding;
234 use crate::testutils;
235 use crate::types::*;
236
237 #[test]
238 fn test_encoder_valid() {
239 let mut e = EUCJPEncoding.raw_encoder();
240 assert_feed_ok!(e, "A", "", [0x41]);
241 assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
242 assert_feed_ok!(e, "", "", []);
243 assert_feed_ok!(e, "\u{a5}", "", [0x5c]);
244 assert_feed_ok!(e, "\u{203e}", "", [0x7e]);
245 assert_feed_ok!(
246 e,
247 "\u{306b}\u{307b}\u{3093}",
248 "",
249 [0xa4, 0xcb, 0xa4, 0xdb, 0xa4, 0xf3]
250 );
251 assert_feed_ok!(
252 e,
253 "\u{ff86}\u{ff8e}\u{ff9d}",
254 "",
255 [0x8e, 0xc6, 0x8e, 0xce, 0x8e, 0xdd]
256 );
257 assert_feed_ok!(e, "\u{65e5}\u{672c}", "", [0xc6, 0xfc, 0xcb, 0xdc]);
258 assert_finish_ok!(e, []);
259 }
260
261 #[test]
262 fn test_encoder_double_mapped() {
263 let mut e = EUCJPEncoding.raw_encoder();
266 assert_feed_ok!(
267 e,
268 "\u{9ed1}\u{2170}\u{ffe2}",
269 "",
270 [0xfc, 0xee, 0xfc, 0xf1, 0xa2, 0xcc]
271 );
272 assert_finish_ok!(e, []);
273 }
274
275 #[test]
276 fn test_encoder_invalid() {
277 let mut e = EUCJPEncoding.raw_encoder();
278 assert_feed_err!(e, "", "\u{ffff}", "", []);
279 assert_feed_err!(e, "?", "\u{ffff}", "!", [0x3f]);
280 assert_feed_err!(e, "", "\u{736c}", "\u{8c78}", []);
282 assert_finish_ok!(e, []);
283 }
284
285 #[test]
286 fn test_decoder_valid() {
287 let mut d = EUCJPEncoding.raw_decoder();
288 assert_feed_ok!(d, [0x41], [], "A");
289 assert_feed_ok!(d, [0x42, 0x43], [], "BC");
290 assert_feed_ok!(d, [], [], "");
291 assert_feed_ok!(d, [0x5c], [], "\\");
292 assert_feed_ok!(d, [0x7e], [], "~");
293 assert_feed_ok!(
294 d,
295 [0xa4, 0xcb, 0xa4, 0xdb, 0xa4, 0xf3],
296 [],
297 "\u{306b}\u{307b}\u{3093}"
298 );
299 assert_feed_ok!(
300 d,
301 [0x8e, 0xc6, 0x8e, 0xce, 0x8e, 0xdd],
302 [],
303 "\u{ff86}\u{ff8e}\u{ff9d}"
304 );
305 assert_feed_ok!(d, [0xc6, 0xfc, 0xcb, 0xdc], [], "\u{65e5}\u{672c}");
306 assert_feed_ok!(d, [0x8f, 0xcb, 0xc6, 0xec, 0xb8], [], "\u{736c}\u{8c78}");
307 assert_finish_ok!(d, "");
308 }
309
310 #[test]
311 fn test_decoder_valid_partial() {
312 let mut d = EUCJPEncoding.raw_decoder();
313 assert_feed_ok!(d, [], [0xa4], "");
314 assert_feed_ok!(d, [0xcb], [0xa4], "\u{306b}");
315 assert_feed_ok!(d, [0xdb], [0xa4], "\u{307b}");
316 assert_feed_ok!(d, [0xf3], [], "\u{3093}");
317 assert_feed_ok!(d, [], [0x8e], "");
318 assert_feed_ok!(d, [0xc6], [0x8e], "\u{ff86}");
319 assert_feed_ok!(d, [0xce], [0x8e], "\u{ff8e}");
320 assert_feed_ok!(d, [0xdd], [], "\u{ff9d}");
321 assert_feed_ok!(d, [], [0xc6], "");
322 assert_feed_ok!(d, [0xfc], [0xcb], "\u{65e5}");
323 assert_feed_ok!(d, [0xdc], [], "\u{672c}");
324 assert_feed_ok!(d, [], [0x8f], "");
325 assert_feed_ok!(d, [], [0xcb], "");
326 assert_feed_ok!(d, [0xc6], [0xec], "\u{736c}");
327 assert_feed_ok!(d, [0xb8], [], "\u{8c78}");
328 assert_feed_ok!(d, [], [0x8f, 0xcb], "");
329 assert_feed_ok!(d, [0xc6, 0xec, 0xb8], [], "\u{736c}\u{8c78}");
330 assert_finish_ok!(d, "");
331 }
332
333 #[test]
334 fn test_decoder_invalid_lone_lead_immediate_test_finish() {
335 for i in 0x8e..0x90 {
336 let mut d = EUCJPEncoding.raw_decoder();
337 assert_feed_ok!(d, [], [i], ""); assert_finish_err!(d, "");
339 }
340
341 for i in 0xa1..0xff {
342 let mut d = EUCJPEncoding.raw_decoder();
343 assert_feed_ok!(d, [], [i], ""); assert_finish_err!(d, "");
345 }
346
347 let mut d = EUCJPEncoding.raw_decoder();
349 for i in 0x80..0x8e {
350 assert_feed_err!(d, [], [i], [], "");
351 }
352 for i in 0x90..0xa1 {
353 assert_feed_err!(d, [], [i], [], "");
354 }
355 assert_feed_err!(d, [], [0xff], [], "");
356 assert_finish_ok!(d, "");
357 }
358
359 #[test]
360 fn test_decoder_invalid_lone_lead_followed_by_space() {
361 for i in 0x80..0x100 {
362 let i = i as u8;
363 let mut d = EUCJPEncoding.raw_decoder();
364 assert_feed_err!(d, [], [i], [0x20], "");
365 assert_finish_ok!(d, "");
366 }
367 }
368
369 #[test]
370 fn test_decoder_invalid_lead_followed_by_invalid_trail() {
371 for i in 0x80..0x100 {
372 let i = i as u8;
373 let mut d = EUCJPEncoding.raw_decoder();
374 assert_feed_err!(d, [], [i], [0x80], "");
375 assert_feed_err!(d, [], [i], [0xff], "");
376 assert_finish_ok!(d, "");
377 }
378 }
379
380 #[test]
381 fn test_decoder_invalid_lone_lead_for_0212_immediate_test_finish() {
382 for i in 0xa1..0xff {
383 let mut d = EUCJPEncoding.raw_decoder();
384 assert_feed_ok!(d, [], [0x8f, i], ""); assert_finish_err!(d, "");
386 }
387 }
388
389 #[test]
390 fn test_decoder_invalid_lone_lead_for_0212_immediate_test_finish_partial() {
391 for i in 0xa1..0xff {
392 let mut d = EUCJPEncoding.raw_decoder();
393 assert_feed_ok!(d, [], [0x8f], "");
394 assert_feed_ok!(d, [], [i], ""); assert_finish_err!(d, "");
396 }
397 }
398
399 #[test]
400 fn test_decoder_invalid_trail_for_0201() {
401 for i in 0..0xa1 {
402 let mut d = EUCJPEncoding.raw_decoder();
403 assert_feed_err!(d, [], [0x8e], [i], "");
404 assert_finish_ok!(d, "");
405 }
406
407 for i in 0xe0..0xff {
408 let mut d = EUCJPEncoding.raw_decoder();
409 assert_feed_err!(d, [], [0x8e, i], [], "");
410 assert_finish_ok!(d, "");
411 }
412 }
413
414 #[test]
415 fn test_decoder_invalid_trail_for_0201_partial() {
416 for i in 0..0xa1 {
417 let mut d = EUCJPEncoding.raw_decoder();
418 assert_feed_ok!(d, [], [0x8e], "");
419 assert_feed_err!(d, [], [], [i], "");
420 assert_finish_ok!(d, "");
421 }
422
423 for i in 0xe0..0xff {
424 let mut d = EUCJPEncoding.raw_decoder();
425 assert_feed_ok!(d, [], [0x8e], "");
426 assert_feed_err!(d, [], [i], [], "");
427 assert_finish_ok!(d, "");
428 }
429 }
430
431 #[test]
432 fn test_decoder_invalid_middle_for_0212() {
433 for i in 0..0xa1 {
434 let mut d = EUCJPEncoding.raw_decoder();
435 assert_feed_err!(d, [], [0x8f], [i], "");
436 assert_finish_ok!(d, "");
437 }
438 }
439
440 #[test]
441 fn test_decoder_invalid_middle_for_0212_partial() {
442 for i in 0..0xa1 {
443 let mut d = EUCJPEncoding.raw_decoder();
444 assert_feed_ok!(d, [], [0x8f], "");
445 assert_feed_err!(d, [], [], [i], "");
446 assert_finish_ok!(d, "");
447 }
448 }
449
450 #[test]
451 fn test_decoder_invalid_trail_for_0212() {
452 for i in 0..0xa1 {
453 let mut d = EUCJPEncoding.raw_decoder();
454 assert_feed_err!(d, [], [0x8f, 0xa1], [i], "");
455 assert_finish_ok!(d, "");
456 }
457 }
458
459 #[test]
460 fn test_decoder_invalid_trail_for_0212_partial() {
461 for i in 0..0xa1 {
462 let mut d = EUCJPEncoding.raw_decoder();
463 assert_feed_ok!(d, [], [0x8f], "");
464 assert_feed_ok!(d, [], [0xa1], "");
465 assert_feed_err!(d, [], [], [i], "");
466 assert_finish_ok!(d, "");
467 }
468 }
469
470 #[test]
471 fn test_decoder_feed_after_finish() {
472 let mut d = EUCJPEncoding.raw_decoder();
473 assert_feed_ok!(d, [0xa4, 0xa2], [0xa4], "\u{3042}");
474 assert_finish_err!(d, "");
475 assert_feed_ok!(d, [0xa4, 0xa2], [], "\u{3042}");
476 assert_finish_ok!(d, "");
477 }
478
479 #[bench]
480 fn bench_encode_short_text(bencher: &mut test::Bencher) {
481 let s = testutils::JAPANESE_TEXT;
482 bencher.bytes = s.len() as u64;
483 bencher.iter(|| test::black_box(EUCJPEncoding.encode(s, EncoderTrap::Strict)))
484 }
485
486 #[bench]
487 fn bench_decode_short_text(bencher: &mut test::Bencher) {
488 let s = EUCJPEncoding
489 .encode(testutils::JAPANESE_TEXT, EncoderTrap::Strict)
490 .ok()
491 .unwrap();
492 bencher.bytes = s.len() as u64;
493 bencher.iter(|| test::black_box(EUCJPEncoding.decode(&s, DecoderTrap::Strict)))
494 }
495}
496
497#[derive(Clone, Copy)]
513pub struct Windows31JEncoding;
514
515impl Encoding for Windows31JEncoding {
516 fn name(&self) -> &'static str {
517 "windows-31j"
518 }
519 fn whatwg_name(&self) -> Option<&'static str> {
520 Some("shift_jis")
521 } fn raw_encoder(&self) -> Box<dyn RawEncoder> {
523 Windows31JEncoder::new()
524 }
525 fn raw_decoder(&self) -> Box<dyn RawDecoder> {
526 Windows31JDecoder::new()
527 }
528}
529
530#[derive(Clone, Copy)]
532pub struct Windows31JEncoder;
533
534impl Windows31JEncoder {
535 #[allow(clippy::new_ret_no_self)]
536 pub fn new() -> Box<dyn RawEncoder> {
537 Box::new(Windows31JEncoder)
538 }
539}
540
541impl RawEncoder for Windows31JEncoder {
542 fn from_self(&self) -> Box<dyn RawEncoder> {
543 Windows31JEncoder::new()
544 }
545 fn is_ascii_compatible(&self) -> bool {
546 true
547 }
548
549 fn raw_feed(
550 &mut self,
551 input: &str,
552 output: &mut dyn ByteWriter,
553 ) -> (usize, Option<CodecError>) {
554 output.writer_hint(input.len());
555
556 for ((i, j), ch) in input.index_iter() {
557 match ch {
558 '\u{0}'..='\u{80}' => {
559 output.write_byte(ch as u8);
560 }
561 '\u{a5}' => {
562 output.write_byte(0x5c);
563 }
564 '\u{203e}' => {
565 output.write_byte(0x7e);
566 }
567 '\u{ff61}'..='\u{ff9f}' => {
568 output.write_byte((ch as usize - 0xff61 + 0xa1) as u8);
569 }
570 _ => {
571 let ptr = index::jis0208::backward_remapped(ch as u32);
573 if ptr == 0xffff {
574 return (
575 i,
576 Some(CodecError {
577 upto: j as isize,
578 cause: "unrepresentable character".into(),
579 }),
580 );
581 } else {
582 let lead = ptr / 188;
583 let leadoffset = if lead < 0x1f { 0x81 } else { 0xc1 };
584 let trail = ptr % 188;
585 let trailoffset = if trail < 0x3f { 0x40 } else { 0x41 };
586 output.write_byte((lead + leadoffset) as u8);
587 output.write_byte((trail + trailoffset) as u8);
588 }
589 }
590 }
591 }
592 (input.len(), None)
593 }
594
595 fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
596 None
597 }
598}
599
600#[derive(Clone, Copy)]
602struct Windows31JDecoder {
603 st: windows31j::State,
604}
605
606impl Windows31JDecoder {
607 #[allow(clippy::new_ret_no_self)]
608 pub fn new() -> Box<dyn RawDecoder> {
609 Box::new(Windows31JDecoder {
610 st: Default::default(),
611 })
612 }
613}
614
615impl RawDecoder for Windows31JDecoder {
616 fn from_self(&self) -> Box<dyn RawDecoder> {
617 Windows31JDecoder::new()
618 }
619 fn is_ascii_compatible(&self) -> bool {
620 true
621 }
622
623 fn raw_feed(
624 &mut self,
625 input: &[u8],
626 output: &mut dyn StringWriter,
627 ) -> (usize, Option<CodecError>) {
628 let (st, processed, err) = windows31j::raw_feed(self.st, input, output, &());
629 self.st = st;
630 (processed, err)
631 }
632
633 fn raw_finish(&mut self, output: &mut dyn StringWriter) -> Option<CodecError> {
634 let (st, err) = windows31j::raw_finish(self.st, output, &());
635 self.st = st;
636 err
637 }
638}
639
640stateful_decoder! {
641 module windows31j;
642
643 internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 {
644 use crate::index_japanese as index;
645
646 let lead = lead as u16;
647 let trail = trail as u16;
648 let leadoffset = if lead < 0xa0 {0x81} else {0xc1};
649 let trailoffset = if trail < 0x7f {0x40} else {0x41};
650 let index = match (lead, trail) {
651 (0xf0..=0xf9, 0x40..=0x7e) | (0xf0..=0xf9, 0x80..=0xfc) =>
652 return (0xe000 + (lead - 0xf0) * 188 + trail - trailoffset) as u32,
653 (0x81..=0x9f, 0x40..=0x7e) | (0x81..=0x9f, 0x80..=0xfc) |
654 (0xe0..=0xfc, 0x40..=0x7e) | (0xe0..=0xfc, 0x80..=0xfc) =>
655 (lead - leadoffset) * 188 + trail - trailoffset,
656 _ => 0xffff,
657 };
658 index::jis0208::forward(index)
659 }
660
661initial:
662 state S0(ctx: Context) {
664 case b @ 0x00..=0x80 => ctx.emit(b as u32);
665 case b @ 0xa1..=0xdf => ctx.emit(0xff61 + b as u32 - 0xa1);
666 case b @ 0x81..=0x9f, b @ 0xe0..=0xfc => S1(ctx, b);
667 case _ => ctx.err("invalid sequence");
668 }
669
670transient:
671 state S1(ctx: Context, lead: u8) {
673 case b => match map_two_0208_bytes(lead, b) {
674 0xffff => ctx.backup_and_err(1, "invalid sequence"), ch => ctx.emit(ch)
676 };
677 }
678}
679
680#[cfg(test)]
681mod windows31j_tests {
682 extern crate test;
683 use super::Windows31JEncoding;
684 use crate::testutils;
685 use crate::types::*;
686
687 #[test]
688 fn test_encoder_valid() {
689 let mut e = Windows31JEncoding.raw_encoder();
690 assert_feed_ok!(e, "A", "", [0x41]);
691 assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
692 assert_feed_ok!(e, "", "", []);
693 assert_feed_ok!(e, "\u{a5}", "", [0x5c]);
694 assert_feed_ok!(e, "\u{203e}", "", [0x7e]);
695 assert_feed_ok!(
696 e,
697 "\u{306b}\u{307b}\u{3093}",
698 "",
699 [0x82, 0xc9, 0x82, 0xd9, 0x82, 0xf1]
700 );
701 assert_feed_ok!(e, "\u{ff86}\u{ff8e}\u{ff9d}", "", [0xc6, 0xce, 0xdd]);
702 assert_feed_ok!(e, "\u{65e5}\u{672c}", "", [0x93, 0xfa, 0x96, 0x7b]);
703 assert_finish_ok!(e, []);
704 }
705
706 #[test]
707 fn test_encoder_no_eudc() {
708 let mut e = Windows31JEncoding.raw_encoder();
709 assert_feed_err!(e, "", "\u{e000}", "", []);
710 assert_feed_err!(e, "", "\u{e757}", "", []);
711 assert_feed_err!(e, "", "\u{e758}", "", []);
712 assert_finish_ok!(e, []);
713 }
714
715 #[test]
716 fn test_encoder_double_mapped() {
717 let mut e = Windows31JEncoding.raw_encoder();
720 assert_feed_ok!(
721 e,
722 "\u{9ed1}\u{2170}\u{ffe2}",
723 "",
724 [0xfc, 0x4b, 0xfa, 0x40, 0x81, 0xca]
725 );
726 assert_finish_ok!(e, []);
727 }
728
729 #[test]
730 fn test_encoder_invalid() {
731 let mut e = Windows31JEncoding.raw_encoder();
732 assert_feed_err!(e, "", "\u{ffff}", "", []);
733 assert_feed_err!(e, "?", "\u{ffff}", "!", [0x3f]);
734 assert_feed_err!(e, "", "\u{736c}", "\u{8c78}", []);
735 assert_finish_ok!(e, []);
736 }
737
738 #[test]
739 fn test_decoder_valid() {
740 let mut d = Windows31JEncoding.raw_decoder();
741 assert_feed_ok!(d, [0x41], [], "A");
742 assert_feed_ok!(d, [0x42, 0x43], [], "BC");
743 assert_feed_ok!(d, [], [], "");
744 assert_feed_ok!(d, [0x5c], [], "\\");
745 assert_feed_ok!(d, [0x7e], [], "~");
746 assert_feed_ok!(d, [0x80], [], "\u{80}"); assert_feed_ok!(
748 d,
749 [0x82, 0xc9, 0x82, 0xd9, 0x82, 0xf1],
750 [],
751 "\u{306b}\u{307b}\u{3093}"
752 );
753 assert_feed_ok!(d, [0xc6, 0xce, 0xdd], [], "\u{ff86}\u{ff8e}\u{ff9d}");
754 assert_feed_ok!(d, [0x93, 0xfa, 0x96, 0x7b], [], "\u{65e5}\u{672c}");
755 assert_finish_ok!(d, "");
756 }
757
758 #[test]
759 fn test_decoder_eudc() {
760 let mut d = Windows31JEncoding.raw_decoder();
761 assert_feed_ok!(d, [], [0xf0], "");
762 assert_feed_ok!(d, [0x40], [], "\u{e000}");
763 assert_feed_ok!(d, [0xf9, 0xfc], [], "\u{e757}");
764 assert_feed_err!(d, [], [0xf0], [0x00], "");
765 assert_feed_err!(d, [], [0xf0], [0xff], "");
766 assert_finish_ok!(d, "");
767 }
768
769 #[test]
770 fn test_decoder_invalid_lone_lead_immediate_test_finish() {
771 for i in 0x81..0xa0 {
772 let mut d = Windows31JEncoding.raw_decoder();
773 assert_feed_ok!(d, [], [i], ""); assert_finish_err!(d, "");
775 }
776
777 for i in 0xe0..0xfd {
778 let mut d = Windows31JEncoding.raw_decoder();
779 assert_feed_ok!(d, [], [i], ""); assert_finish_err!(d, "");
781 }
782
783 let mut d = Windows31JEncoding.raw_decoder();
785 assert_feed_err!(d, [], [0xa0], [], "");
786 assert_feed_err!(d, [], [0xfd], [], "");
787 assert_feed_err!(d, [], [0xfe], [], "");
788 assert_feed_err!(d, [], [0xff], [], "");
789 assert_finish_ok!(d, "");
790 }
791
792 #[test]
793 fn test_decoder_invalid_lone_lead_followed_by_space() {
794 for i in 0x81..0xa0 {
795 let mut d = Windows31JEncoding.raw_decoder();
796 assert_feed_err!(d, [], [i], [0x20], "");
797 assert_finish_ok!(d, "");
798 }
799
800 for i in 0xe0..0xfd {
801 let mut d = Windows31JEncoding.raw_decoder();
802 assert_feed_err!(d, [], [i], [0x20], "");
803 assert_finish_ok!(d, "");
804 }
805 }
806
807 #[test]
808 fn test_decoder_invalid_lead_followed_by_invalid_trail() {
809 for i in 0x81..0xa0 {
810 let mut d = Windows31JEncoding.raw_decoder();
811 assert_feed_err!(d, [], [i], [0x3f], "");
812 assert_feed_err!(d, [], [i], [0x7f], "");
813 assert_feed_err!(d, [], [i], [0xfd], "");
814 assert_feed_err!(d, [], [i], [0xfe], "");
815 assert_feed_err!(d, [], [i], [0xff], "");
816 assert_finish_ok!(d, "");
817 }
818
819 for i in 0xe0..0xfd {
820 let mut d = Windows31JEncoding.raw_decoder();
821 assert_feed_err!(d, [], [i], [0x3f], "");
822 assert_feed_err!(d, [], [i], [0x7f], "");
823 assert_feed_err!(d, [], [i], [0xfd], "");
824 assert_feed_err!(d, [], [i], [0xfe], "");
825 assert_feed_err!(d, [], [i], [0xff], "");
826 assert_finish_ok!(d, "");
827 }
828 }
829
830 #[test]
831 fn test_decoder_invalid_lead_followed_by_invalid_trail_partial() {
832 for i in 0x81..0xa0 {
833 let mut d = Windows31JEncoding.raw_decoder();
834 assert_feed_ok!(d, [], [i], "");
835 assert_feed_err!(d, [], [], [0xff], "");
836 assert_finish_ok!(d, "");
837 }
838
839 for i in 0xe0..0xfd {
840 let mut d = Windows31JEncoding.raw_decoder();
841 assert_feed_ok!(d, [], [i], "");
842 assert_feed_err!(d, [], [], [0xff], "");
843 assert_finish_ok!(d, "");
844 }
845 }
846
847 #[test]
848 fn test_decoder_feed_after_finish() {
849 let mut d = Windows31JEncoding.raw_decoder();
850 assert_feed_ok!(d, [0x82, 0xa0], [0x82], "\u{3042}");
851 assert_finish_err!(d, "");
852 assert_feed_ok!(d, [0x82, 0xa0], [], "\u{3042}");
853 assert_finish_ok!(d, "");
854 }
855
856 #[bench]
857 fn bench_encode_short_text(bencher: &mut test::Bencher) {
858 let s = testutils::JAPANESE_TEXT;
859 bencher.bytes = s.len() as u64;
860 bencher.iter(|| test::black_box(Windows31JEncoding.encode(s, EncoderTrap::Strict)))
861 }
862
863 #[bench]
864 fn bench_decode_short_text(bencher: &mut test::Bencher) {
865 let s = Windows31JEncoding
866 .encode(testutils::JAPANESE_TEXT, EncoderTrap::Strict)
867 .ok()
868 .unwrap();
869 bencher.bytes = s.len() as u64;
870 bencher.iter(|| test::black_box(Windows31JEncoding.decode(&s, DecoderTrap::Strict)))
871 }
872}
873
874#[derive(Clone, Copy)]
888pub struct ISO2022JPEncoding;
889
890impl Encoding for ISO2022JPEncoding {
891 fn name(&self) -> &'static str {
892 "iso-2022-jp"
893 }
894 fn whatwg_name(&self) -> Option<&'static str> {
895 Some("iso-2022-jp")
896 }
897 fn raw_encoder(&self) -> Box<dyn RawEncoder> {
898 ISO2022JPEncoder::new()
899 }
900 fn raw_decoder(&self) -> Box<dyn RawDecoder> {
901 ISO2022JPDecoder::new()
902 }
903}
904
905#[allow(clippy::upper_case_acronyms)]
906#[derive(PartialEq, Clone, Copy)]
907enum ISO2022JPState {
908 ASCII, Katakana, Lead, }
912
913#[derive(Clone, Copy)]
915pub struct ISO2022JPEncoder {
916 st: ISO2022JPState,
917}
918
919impl ISO2022JPEncoder {
920 #[allow(clippy::new_ret_no_self)]
921 pub fn new() -> Box<dyn RawEncoder> {
922 Box::new(ISO2022JPEncoder { st: ASCII })
923 }
924}
925
926impl RawEncoder for ISO2022JPEncoder {
927 fn from_self(&self) -> Box<dyn RawEncoder> {
928 ISO2022JPEncoder::new()
929 }
930 fn is_ascii_compatible(&self) -> bool {
931 true
932 }
933
934 fn raw_feed(
935 &mut self,
936 input: &str,
937 output: &mut dyn ByteWriter,
938 ) -> (usize, Option<CodecError>) {
939 output.writer_hint(input.len());
940
941 let mut st = self.st;
942 macro_rules! ensure_ASCII(
943 () => (if st != ASCII { output.write_bytes(b"\x1b(B"); st = ASCII; })
944 );
945 macro_rules! ensure_Katakana(
946 () => (if st != Katakana { output.write_bytes(b"\x1b(I"); st = Katakana; })
947 );
948 macro_rules! ensure_Lead(
949 () => (if st != Lead { output.write_bytes(b"\x1b$B"); st = Lead; })
950 );
951
952 for ((i, j), ch) in input.index_iter() {
953 match ch {
954 '\u{0}'..='\u{7f}' => {
955 ensure_ASCII!();
956 output.write_byte(ch as u8);
957 }
958 '\u{a5}' => {
959 ensure_ASCII!();
960 output.write_byte(0x5c);
961 }
962 '\u{203e}' => {
963 ensure_ASCII!();
964 output.write_byte(0x7e);
965 }
966 '\u{ff61}'..='\u{ff9f}' => {
967 ensure_Katakana!();
968 output.write_byte((ch as usize - 0xff61 + 0x21) as u8);
969 }
970 _ => {
971 let ptr = index::jis0208::backward(ch as u32);
972 if ptr == 0xffff {
973 self.st = st; return (
975 i,
976 Some(CodecError {
977 upto: j as isize,
978 cause: "unrepresentable character".into(),
979 }),
980 );
981 } else {
982 ensure_Lead!();
983 let lead = ptr / 94 + 0x21;
984 let trail = ptr % 94 + 0x21;
985 output.write_byte(lead as u8);
986 output.write_byte(trail as u8);
987 }
988 }
989 }
990 }
991
992 self.st = st;
993 (input.len(), None)
994 }
995
996 fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
997 None
998 }
999}
1000
1001#[derive(Clone, Copy)]
1003struct ISO2022JPDecoder {
1004 st: iso2022jp::State,
1005}
1006
1007impl ISO2022JPDecoder {
1008 #[allow(clippy::new_ret_no_self)]
1009 pub fn new() -> Box<dyn RawDecoder> {
1010 Box::new(ISO2022JPDecoder {
1011 st: Default::default(),
1012 })
1013 }
1014}
1015
1016impl RawDecoder for ISO2022JPDecoder {
1017 fn from_self(&self) -> Box<dyn RawDecoder> {
1018 ISO2022JPDecoder::new()
1019 }
1020 fn is_ascii_compatible(&self) -> bool {
1021 false
1022 }
1023
1024 fn raw_feed(
1025 &mut self,
1026 input: &[u8],
1027 output: &mut dyn StringWriter,
1028 ) -> (usize, Option<CodecError>) {
1029 let (st, processed, err) = iso2022jp::raw_feed(self.st, input, output, &());
1030 self.st = st;
1031 (processed, err)
1032 }
1033
1034 fn raw_finish(&mut self, output: &mut dyn StringWriter) -> Option<CodecError> {
1035 let (st, err) = iso2022jp::raw_finish(self.st, output, &());
1036 self.st = st;
1037 err
1038 }
1039}
1040
1041stateful_decoder! {
1042 module iso2022jp;
1043
1044 internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 {
1045 use crate::index_japanese as index;
1046
1047 let lead = lead as u16;
1048 let trail = trail as u16;
1049 let index = match (lead, trail) {
1050 (0x21..=0x7e, 0x21..=0x7e) => (lead - 0x21) * 94 + trail - 0x21,
1051 _ => 0xffff,
1052 };
1053 index::jis0208::forward(index)
1054 }
1055
1056 internal pub fn map_two_0212_bytes(lead: u8, trail: u8) -> u32 {
1057 use crate::index_japanese as index;
1058
1059 let lead = lead as u16;
1060 let trail = trail as u16;
1061 let index = match (lead, trail) {
1062 (0x21..=0x7e, 0x21..=0x7e) => (lead - 0x21) * 94 + trail - 0x21,
1063 _ => 0xffff,
1064 };
1065 index::jis0212::forward(index)
1066 }
1067
1068initial:
1069 state ASCII(ctx: Context) {
1071 case 0x1b => EscapeStart(ctx);
1072 case b @ 0x00..=0x7f => ctx.emit(b as u32), ASCII(ctx);
1073 case _ => ctx.err("invalid sequence"), ASCII(ctx);
1074 final => ctx.reset();
1075 }
1076
1077checkpoint:
1078 state Lead0208(ctx: Context) {
1080 case 0x0a => ctx.emit(0x000a); case 0x1b => EscapeStart(ctx);
1082 case b => Trail0208(ctx, b);
1083 final => ctx.reset();
1084 }
1085
1086 state Lead0212(ctx: Context) {
1088 case 0x0a => ctx.emit(0x000a); case 0x1b => EscapeStart(ctx);
1090 case b => Trail0212(ctx, b);
1091 final => ctx.reset();
1092 }
1093
1094 state Katakana(ctx: Context) {
1096 case 0x1b => EscapeStart(ctx);
1097 case b @ 0x21..=0x5f => ctx.emit(0xff61 + b as u32 - 0x21), Katakana(ctx);
1098 case _ => ctx.err("invalid sequence"), Katakana(ctx);
1099 final => ctx.reset();
1100 }
1101
1102transient:
1103 state EscapeStart(ctx: Context) {
1106 case 0x24 => EscapeMiddle24(ctx); case 0x28 => EscapeMiddle28(ctx); case _ => ctx.backup_and_err(1, "invalid sequence");
1109 final => ctx.err("incomplete sequence");
1110 }
1111
1112 state EscapeMiddle24(ctx: Context) {
1115 case 0x40, 0x42 => Lead0208(ctx); case 0x28 => EscapeFinal(ctx); case _ => ctx.backup_and_err(2, "invalid sequence");
1118 final => ctx.err("incomplete sequence");
1119 }
1120
1121 state EscapeMiddle28(ctx: Context) {
1124 case 0x42, 0x4a => ctx.reset(); case 0x49 => Katakana(ctx); case _ => ctx.backup_and_err(2, "invalid sequence");
1127 final => ctx.err("incomplete sequence");
1128 }
1129
1130 state EscapeFinal(ctx: Context) {
1133 case 0x44 => Lead0212(ctx); case _ => ctx.backup_and_err(3, "invalid sequence");
1135 final => ctx.backup_and_err(1, "incomplete sequence");
1136 }
1137
1138 state Trail0208(ctx: Context, lead: u8) {
1140 case b =>
1141 match map_two_0208_bytes(lead, b) {
1142 0xffff => ctx.err("invalid sequence"),
1143 ch => ctx.emit(ch as u32)
1144 },
1145 Lead0208(ctx);
1146 final => ctx.err("incomplete sequence");
1147 }
1148
1149 state Trail0212(ctx: Context, lead: u8) {
1151 case b =>
1152 match map_two_0212_bytes(lead, b) {
1153 0xffff => ctx.err("invalid sequence"),
1154 ch => ctx.emit(ch as u32)
1155 },
1156 Lead0212(ctx);
1157 final => ctx.err("incomplete sequence");
1158 }
1159}
1160
1161#[cfg(test)]
1162mod iso2022jp_tests {
1163 extern crate test;
1164 use super::ISO2022JPEncoding;
1165 use crate::testutils;
1166 use crate::types::*;
1167
1168 #[test]
1169 fn test_encoder_valid() {
1170 let mut e = ISO2022JPEncoding.raw_encoder();
1171 assert_feed_ok!(e, "A", "", [0x41]);
1172 assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
1173 assert_feed_ok!(e, "\x1b\x24\x42", "", [0x1b, 0x24, 0x42]); assert_feed_ok!(e, "", "", []);
1175 assert_feed_ok!(e, "\u{a5}", "", [0x5c]);
1176 assert_feed_ok!(e, "\u{203e}", "", [0x7e]);
1177 assert_feed_ok!(
1178 e,
1179 "\u{306b}\u{307b}\u{3093}",
1180 "",
1181 [0x1b, 0x24, 0x42, 0x24, 0x4b, 0x24, 0x5b, 0x24, 0x73]
1182 );
1183 assert_feed_ok!(e, "\u{65e5}\u{672c}", "", [0x46, 0x7c, 0x4b, 0x5c]);
1184 assert_feed_ok!(
1185 e,
1186 "\u{ff86}\u{ff8e}\u{ff9d}",
1187 "",
1188 [0x1b, 0x28, 0x49, 0x46, 0x4e, 0x5d]
1189 );
1190 assert_feed_ok!(e, "XYZ", "", [0x1b, 0x28, 0x42, 0x58, 0x59, 0x5a]);
1191 assert_finish_ok!(e, []);
1192
1193 const AD: &str = "\x20";
1200 const BD: &str = "\u{30cd}";
1201 const CD: &str = "\u{ff88}";
1202 const AE: &[u8] = &[0x1b, 0x28, 0x42, 0x20];
1203 const BE: &[u8] = &[0x1b, 0x24, 0x42, 0x25, 0x4d];
1204 const CE: &[u8] = &[0x1b, 0x28, 0x49, 0x48];
1205 let mut e = ISO2022JPEncoding.raw_encoder();
1206 let decoded: String = ["\x20", BD, CD, AD, CD, BD, AD].concat();
1207 let encoded: Vec<_> = [&[0x20][..], BE, CE, AE, CE, BE, AE].concat();
1208 assert_feed_ok!(e, decoded, "", encoded);
1209 assert_finish_ok!(e, []);
1210 }
1211
1212 #[test]
1213 fn test_encoder_invalid() {
1214 let mut e = ISO2022JPEncoding.raw_encoder();
1215 assert_feed_err!(e, "", "\u{ffff}", "", []);
1216 assert_feed_err!(e, "?", "\u{ffff}", "!", [0x3f]);
1217 assert_feed_err!(e, "", "\u{736c}", "\u{8c78}", []);
1219 assert_finish_ok!(e, []);
1220 }
1221
1222 #[test]
1223 fn test_decoder_valid() {
1224 let mut d = ISO2022JPEncoding.raw_decoder();
1225 assert_feed_ok!(d, [0x41], [], "A");
1226 assert_feed_ok!(d, [0x42, 0x43], [], "BC");
1227 assert_feed_ok!(d, [0x1b, 0x28, 0x4a, 0x44, 0x45, 0x46], [], "DEF");
1228 assert_feed_ok!(d, [], [], "");
1229 assert_feed_ok!(d, [0x5c], [], "\\");
1230 assert_feed_ok!(d, [0x7e], [], "~");
1231 assert_feed_ok!(
1232 d,
1233 [0x1b, 0x24, 0x42, 0x24, 0x4b, 0x1b, 0x24, 0x42, 0x24, 0x5b, 0x24, 0x73],
1234 [],
1235 "\u{306b}\u{307b}\u{3093}"
1236 );
1237 assert_feed_ok!(d, [0x46, 0x7c, 0x4b, 0x5c], [], "\u{65e5}\u{672c}");
1238 assert_feed_ok!(
1239 d,
1240 [0x1b, 0x28, 0x49, 0x46, 0x4e, 0x5d],
1241 [],
1242 "\u{ff86}\u{ff8e}\u{ff9d}"
1243 );
1244 assert_feed_ok!(
1245 d,
1246 [0x1b, 0x24, 0x28, 0x44, 0x4b, 0x46, 0x1b, 0x24, 0x40, 0x6c, 0x38],
1247 [],
1248 "\u{736c}\u{8c78}"
1249 );
1250 assert_feed_ok!(d, [0x1b, 0x28, 0x42, 0x58, 0x59, 0x5a], [], "XYZ");
1251 assert_finish_ok!(d, "");
1252
1253 let mut d = ISO2022JPEncoding.raw_decoder();
1254 assert_feed_ok!(
1255 d,
1256 [0x1b, 0x24, 0x42, 0x24, 0x4b, 0x24, 0x5b, 0x24, 0x73],
1257 [],
1258 "\u{306b}\u{307b}\u{3093}"
1259 );
1260 assert_finish_ok!(d, "");
1261
1262 let mut d = ISO2022JPEncoding.raw_decoder();
1263 assert_feed_ok!(
1264 d,
1265 [0x1b, 0x28, 0x49, 0x46, 0x4e, 0x5d],
1266 [],
1267 "\u{ff86}\u{ff8e}\u{ff9d}"
1268 );
1269 assert_finish_ok!(d, "");
1270
1271 let mut d = ISO2022JPEncoding.raw_decoder();
1272 assert_feed_ok!(d, [0x1b, 0x24, 0x28, 0x44, 0x4b, 0x46], [], "\u{736c}");
1273 assert_finish_ok!(d, "");
1274
1275 const AD: &str = "\x20";
1282 const BD: &str = "\u{30cd}";
1283 const CD: &str = "\u{ff88}";
1284 const DD: &str = "\u{793b}";
1285 const AE: &[u8] = &[0x1b, 0x28, 0x42, 0x20];
1286 const BE: &[u8] = &[0x1b, 0x24, 0x42, 0x25, 0x4d];
1287 const CE: &[u8] = &[0x1b, 0x28, 0x49, 0x48];
1288 const DE: &[u8] = &[0x1b, 0x24, 0x28, 0x44, 0x50, 0x4b];
1289 let mut d = ISO2022JPEncoding.raw_decoder();
1290 let dec: String = [
1291 "\x20", AD, BD, BD, CD, CD, AD, CD, BD, AD, DD, DD, BD, DD, CD, DD, AD,
1292 ]
1293 .concat();
1294 let enc: Vec<_> = [
1295 &[0x20][..],
1296 AE,
1297 BE,
1298 BE,
1299 CE,
1300 CE,
1301 AE,
1302 CE,
1303 BE,
1304 AE,
1305 DE,
1306 DE,
1307 BE,
1308 DE,
1309 CE,
1310 DE,
1311 AE,
1312 ]
1313 .concat();
1314 assert_feed_ok!(d, enc, [], dec);
1315 assert_finish_ok!(d, "");
1316 }
1317
1318 #[test]
1319 fn test_decoder_valid_partial() {
1320 let mut d = ISO2022JPEncoding.raw_decoder();
1321
1322 assert_feed_ok!(d, [], [0x1b], "");
1323 assert_feed_ok!(d, [], [0x28], "");
1324 assert_feed_ok!(d, [0x4a, 0x41], [], "A");
1325 assert_feed_ok!(d, [], [0x1b, 0x28], "");
1326 assert_feed_ok!(d, [0x4a, 0x42], [0x1b], "B");
1327 assert_feed_ok!(d, [0x28, 0x4a, 0x43], [], "C");
1328
1329 assert_feed_ok!(d, [], [0x1b], "");
1330 assert_feed_ok!(d, [], [0x24], "");
1331 assert_feed_ok!(d, [0x42], [0x24], "");
1332 assert_feed_ok!(d, [0x4b], [0x1b, 0x24], "\u{306b}");
1333 assert_feed_ok!(d, [0x42, 0x24, 0x5b], [], "\u{307b}");
1334 assert_feed_ok!(d, [], [0x1b], "");
1335 assert_feed_ok!(d, [0x24, 0x42, 0x24, 0x73], [], "\u{3093}");
1336
1337 assert_feed_ok!(d, [], [0x1b], "");
1338 assert_feed_ok!(d, [], [0x28], "");
1339 assert_feed_ok!(d, [0x49, 0x46], [], "\u{ff86}");
1340 assert_feed_ok!(d, [], [0x1b, 0x28], "");
1341 assert_feed_ok!(d, [0x49, 0x4e], [0x1b], "\u{ff8e}");
1342 assert_feed_ok!(d, [0x28, 0x49, 0x5d], [], "\u{ff9d}");
1343
1344 assert_feed_ok!(d, [], [0x1b, 0x24], "");
1345 assert_feed_ok!(d, [], [0x28], "");
1346 assert_feed_ok!(d, [0x44], [0x4b], "");
1347 assert_feed_ok!(d, [0x46], [0x1b, 0x24, 0x28], "\u{736c}");
1348 assert_feed_ok!(d, [0x44, 0x4b, 0x46], [], "\u{736c}");
1349
1350 assert_finish_ok!(d, "");
1351 }
1352
1353 #[test]
1354 fn test_decoder_carriage_return() {
1355 let mut d = ISO2022JPEncoding.raw_decoder();
1357 assert_feed_ok!(
1358 d,
1359 [0x1b, 0x24, 0x42, 0x25, 0x4d, 0x0a, 0x25, 0x4d],
1360 [],
1361 "\u{30cd}\n\x25\x4d"
1362 );
1363 assert_feed_ok!(
1364 d,
1365 [0x1b, 0x24, 0x28, 0x44, 0x50, 0x4b, 0x0a, 0x50, 0x4b],
1366 [],
1367 "\u{793b}\n\x50\x4b"
1368 );
1369 assert_finish_ok!(d, "");
1370
1371 let mut d = ISO2022JPEncoding.raw_decoder();
1373 assert_feed_err!(d, [0x1b, 0x28, 0x49, 0x48], [0x0a], [], "\u{ff88}"); assert_feed_err!(d, [0x1b, 0x24, 0x42], [0x25, 0x0a], [], ""); assert_finish_ok!(d, "");
1376 }
1377
1378 #[test]
1379 fn test_decoder_invalid_partial() {
1380 let mut d = ISO2022JPEncoding.raw_decoder();
1381 assert_feed_ok!(d, [0x1b, 0x24, 0x42, 0x24, 0x4b], [0x24], "\u{306b}");
1382 assert_finish_err!(d, "");
1383
1384 let mut d = ISO2022JPEncoding.raw_decoder();
1385 assert_feed_ok!(d, [0x1b, 0x24, 0x28, 0x44, 0x4b, 0x46], [0x50], "\u{736c}");
1386 assert_finish_err!(d, "");
1387 }
1388
1389 #[test]
1390 fn test_decoder_invalid_partial_escape() {
1391 let mut d = ISO2022JPEncoding.raw_decoder();
1392 assert_feed_ok!(d, [], [0x1b], "");
1393 assert_finish_err!(d, "");
1394
1395 let mut d = ISO2022JPEncoding.raw_decoder();
1396 assert_feed_ok!(d, [], [0x1b, 0x24], "");
1397 assert_finish_err!(d, ""); let mut d = ISO2022JPEncoding.raw_decoder();
1400 assert_feed_ok!(d, [], [0x1b, 0x24, 0x28], "");
1401 assert_finish_err!(d, -1, ""); let mut d = ISO2022JPEncoding.raw_decoder();
1404 assert_feed_ok!(d, [], [0x1b, 0x28], "");
1405 assert_finish_err!(d, ""); assert_eq!(
1408 ISO2022JPEncoding.decode(&[0x1b], DecoderTrap::Replace),
1409 Ok("\u{fffd}".to_string())
1410 );
1411 assert_eq!(
1412 ISO2022JPEncoding.decode(&[0x1b, 0x24], DecoderTrap::Replace),
1413 Ok("\u{fffd}".to_string())
1414 );
1415 assert_eq!(
1416 ISO2022JPEncoding.decode(&[0x1b, 0x24, 0x28], DecoderTrap::Replace),
1417 Ok("\u{fffd}\x28".to_string())
1418 );
1419 assert_eq!(
1420 ISO2022JPEncoding.decode(&[0x1b, 0x28], DecoderTrap::Replace),
1421 Ok("\u{fffd}".to_string())
1422 );
1423 }
1424
1425 #[test]
1426 fn test_decoder_invalid_escape() {
1427 let mut d = ISO2022JPEncoding.raw_decoder();
1429 macro_rules! reset(() => (
1430 assert_feed_ok!(d, [0x41, 0x42, 0x43, 0x1b, 0x24, 0x42, 0x21, 0x21], [],
1431 "ABC\u{3000}")
1432 ));
1433
1434 reset!();
1435 assert_feed_ok!(d, [], [0x1b], "");
1436 assert_feed_err!(d, [], [], [0x00], "");
1437 reset!();
1438 assert_feed_err!(d, [], [0x1b], [0x0a], "");
1439 reset!();
1440 assert_feed_err!(d, [], [0x1b], [0x20], "");
1441 reset!();
1442 assert_feed_err!(d, [], [0x1b], [0x21, 0x5a], ""); reset!();
1444 assert_feed_err!(d, [], [0x1b], [0x22, 0x5a], ""); reset!();
1446 assert_feed_err!(d, [], [0x1b], [0x24, 0x5a], ""); reset!();
1448 assert_feed_ok!(d, [], [0x1b, 0x24], "");
1449 assert_feed_err!(d, -1, [], [], [0x24, 0x5a], "");
1450 reset!();
1451 assert_feed_err!(d, [], [0x1b], [0x24, 0x28, 0x5a], ""); reset!();
1453 assert_feed_ok!(d, [], [0x1b, 0x24, 0x28], "");
1454 assert_feed_err!(d, -2, [], [], [0x24, 0x28, 0x5a], "");
1455 reset!();
1456 assert_feed_err!(d, [], [0x1b], [0x24, 0x29, 0x5a], ""); reset!();
1458 assert_feed_err!(d, [], [0x1b], [0x24, 0x2a, 0x5a], ""); reset!();
1460 assert_feed_err!(d, [], [0x1b], [0x24, 0x2b, 0x5a], ""); reset!();
1462 assert_feed_err!(d, [], [0x1b], [0x24, 0x2d, 0x5a], ""); reset!();
1464 assert_feed_err!(d, [], [0x1b], [0x24, 0x2e, 0x5a], ""); reset!();
1466 assert_feed_err!(d, [], [0x1b], [0x24, 0x2f, 0x5a], ""); reset!();
1468 assert_feed_err!(d, [], [0x1b], [0x25, 0x5a], ""); reset!();
1470 assert_feed_err!(d, [], [0x1b], [0x25, 0x2f, 0x5a], ""); reset!();
1472 assert_feed_err!(d, [], [0x1b], [0x28, 0x5a], ""); reset!();
1474 assert_feed_ok!(d, [], [0x1b, 0x28], "");
1475 assert_feed_err!(d, -1, [], [], [0x28, 0x5a], "");
1476 reset!();
1477 assert_feed_err!(d, [], [0x1b], [0x29, 0x5a], ""); reset!();
1479 assert_feed_err!(d, [], [0x1b], [0x2a, 0x5a], ""); reset!();
1481 assert_feed_err!(d, [], [0x1b], [0x2b, 0x5a], ""); reset!();
1483 assert_feed_err!(d, [], [0x1b], [0x2d, 0x5a], ""); reset!();
1485 assert_feed_err!(d, [], [0x1b], [0x2e, 0x5a], ""); reset!();
1487 assert_feed_err!(d, [], [0x1b], [0x2f, 0x5a], ""); reset!();
1489 assert_feed_err!(d, [], [0x1b], [0x4e], ""); reset!();
1491 assert_feed_err!(d, [], [0x1b], [0x4f], ""); reset!();
1493 assert_feed_err!(d, [], [0x1b], [0x6e], ""); reset!();
1495 assert_feed_err!(d, [], [0x1b], [0x6f], ""); reset!();
1497 assert_feed_err!(d, [], [0x1b], [0x7c], ""); reset!();
1499 assert_feed_err!(d, [], [0x1b], [0x7d], ""); reset!();
1501 assert_feed_err!(d, [], [0x1b], [0x7e], ""); reset!();
1503 assert_feed_err!(d, [], [0x1b], [0xff], "");
1504 reset!();
1505 assert_finish_ok!(d, "");
1506 }
1507
1508 #[test]
1509 fn test_decoder_invalid_out_or_range() {
1510 let mut d = ISO2022JPEncoding.raw_decoder();
1511 assert_feed_err!(d, [], [0x80], [], "");
1512 assert_feed_err!(d, [], [0xff], [], "");
1513 assert_feed_err!(d, [0x1b, 0x24, 0x42], [0x80, 0x21], [], "");
1514 assert_feed_err!(d, [0x1b, 0x24, 0x42], [0x21, 0x80], [], "");
1515 assert_feed_err!(d, [0x1b, 0x24, 0x42], [0x20, 0x21], [], "");
1516 assert_feed_err!(d, [0x1b, 0x24, 0x42], [0x21, 0x20], [], "");
1517 assert_feed_err!(d, [0x1b, 0x28, 0x49], [0x20], [], "");
1518 assert_feed_err!(d, [0x1b, 0x28, 0x49], [0x60], [], "");
1519 assert_feed_err!(d, [0x1b, 0x24, 0x28, 0x44], [0x80, 0x21], [], "");
1520 assert_feed_err!(d, [0x1b, 0x24, 0x28, 0x44], [0x21, 0x80], [], "");
1521 assert_feed_err!(d, [0x1b, 0x24, 0x28, 0x44], [0x20, 0x21], [], "");
1522 assert_feed_err!(d, [0x1b, 0x24, 0x28, 0x44], [0x21, 0x20], [], "");
1523 assert_finish_ok!(d, "");
1524 }
1525
1526 #[test]
1527 fn test_decoder_feed_after_finish() {
1528 let mut d = ISO2022JPEncoding.raw_decoder();
1529 assert_feed_ok!(
1530 d,
1531 [0x24, 0x22, 0x1b, 0x24, 0x42, 0x24, 0x22],
1532 [0x24],
1533 "\x24\x22\u{3042}"
1534 );
1535 assert_finish_err!(d, "");
1536 assert_feed_ok!(
1537 d,
1538 [0x24, 0x22, 0x1b, 0x24, 0x42, 0x24, 0x22],
1539 [],
1540 "\x24\x22\u{3042}"
1541 );
1542 assert_finish_ok!(d, "");
1543 }
1544
1545 #[bench]
1546 fn bench_encode_short_text(bencher: &mut test::Bencher) {
1547 let s = testutils::JAPANESE_TEXT;
1548 bencher.bytes = s.len() as u64;
1549 bencher.iter(|| test::black_box(ISO2022JPEncoding.encode(s, EncoderTrap::Strict)))
1550 }
1551
1552 #[bench]
1553 fn bench_decode_short_text(bencher: &mut test::Bencher) {
1554 let s = ISO2022JPEncoding
1555 .encode(testutils::JAPANESE_TEXT, EncoderTrap::Strict)
1556 .ok()
1557 .unwrap();
1558 bencher.bytes = s.len() as u64;
1559 bencher.iter(|| test::black_box(ISO2022JPEncoding.decode(&s, DecoderTrap::Strict)))
1560 }
1561}