1#![allow(clippy::unicode_not_nfc)]
2use crate::types::{Mode, Version};
4use std::slice::Iter;
5
6#[cfg(feature = "bench")]
7extern crate test;
8
9#[derive(PartialEq, Eq, Debug, Copy, Clone)]
14pub struct Segment {
15 pub mode: Mode,
17
18 pub begin: usize,
20
21 pub end: usize,
23}
24
25impl Segment {
26 pub fn encoded_len(&self, version: Version) -> usize {
29 let byte_size = self.end - self.begin;
30 let chars_count = if self.mode == Mode::Kanji { byte_size / 2 } else { byte_size };
31
32 let mode_bits_count = version.mode_bits_count();
33 let length_bits_count = self.mode.length_bits_count(version);
34 let data_bits_count = self.mode.data_bits_count(chars_count);
35
36 mode_bits_count + length_bits_count + data_bits_count
37 }
38}
39
40struct EcsIter<I> {
55 base: I,
56 index: usize,
57 ended: bool,
58}
59
60impl<'a, I: Iterator<Item = &'a u8>> Iterator for EcsIter<I> {
61 type Item = (usize, ExclCharSet);
62
63 fn next(&mut self) -> Option<(usize, ExclCharSet)> {
64 if self.ended {
65 return None;
66 }
67
68 match self.base.next() {
69 None => {
70 self.ended = true;
71 Some((self.index, ExclCharSet::End))
72 }
73 Some(c) => {
74 let old_index = self.index;
75 self.index += 1;
76 Some((old_index, ExclCharSet::from_u8(*c)))
77 }
78 }
79 }
80}
81
82pub struct Parser<'a> {
84 ecs_iter: EcsIter<Iter<'a, u8>>,
85 state: State,
86 begin: usize,
87 pending_single_byte: bool,
88}
89
90impl<'a> Parser<'a> {
91 pub fn new(data: &[u8]) -> Parser {
103 Parser {
104 ecs_iter: EcsIter { base: data.iter(), index: 0, ended: false },
105 state: State::Init,
106 begin: 0,
107 pending_single_byte: false,
108 }
109 }
110}
111
112impl<'a> Iterator for Parser<'a> {
113 type Item = Segment;
114
115 fn next(&mut self) -> Option<Segment> {
116 if self.pending_single_byte {
117 self.pending_single_byte = false;
118 self.begin += 1;
119 return Some(Segment { mode: Mode::Byte, begin: self.begin - 1, end: self.begin });
120 }
121
122 loop {
123 let (i, ecs) = match self.ecs_iter.next() {
124 None => return None,
125 Some(a) => a,
126 };
127 let (next_state, action) = STATE_TRANSITION[self.state as usize + ecs as usize];
128 self.state = next_state;
129
130 let old_begin = self.begin;
131 let push_mode = match action {
132 Action::Idle => continue,
133 Action::Numeric => Mode::Numeric,
134 Action::Alpha => Mode::Alphanumeric,
135 Action::Byte => Mode::Byte,
136 Action::Kanji => Mode::Kanji,
137 Action::KanjiAndSingleByte => {
138 let next_begin = i - 1;
139 if self.begin == next_begin {
140 Mode::Byte
141 } else {
142 self.pending_single_byte = true;
143 self.begin = next_begin;
144 return Some(Segment { mode: Mode::Kanji, begin: old_begin, end: next_begin });
145 }
146 }
147 };
148
149 self.begin = i;
150 return Some(Segment { mode: push_mode, begin: old_begin, end: i });
151 }
152 }
153}
154
155#[cfg(test)]
156mod parse_tests {
157 use crate::optimize::{Parser, Segment};
158 use crate::types::Mode;
159
160 fn parse(data: &[u8]) -> Vec<Segment> {
161 Parser::new(data).collect()
162 }
163
164 #[test]
165 fn test_parse_1() {
166 let segs = parse(b"01049123451234591597033130128%10ABC123");
167 assert_eq!(
168 segs,
169 vec![
170 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
171 Segment { mode: Mode::Alphanumeric, begin: 29, end: 30 },
172 Segment { mode: Mode::Numeric, begin: 30, end: 32 },
173 Segment { mode: Mode::Alphanumeric, begin: 32, end: 35 },
174 Segment { mode: Mode::Numeric, begin: 35, end: 38 },
175 ]
176 );
177 }
178
179 #[test]
180 fn test_parse_shift_jis_example_1() {
181 let segs = parse(b"\x82\xa0\x81\x41\x41\xb1\x81\xf0"); assert_eq!(
183 segs,
184 vec![
185 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
186 Segment { mode: Mode::Alphanumeric, begin: 4, end: 5 },
187 Segment { mode: Mode::Byte, begin: 5, end: 6 },
188 Segment { mode: Mode::Kanji, begin: 6, end: 8 },
189 ]
190 );
191 }
192
193 #[test]
194 fn test_parse_utf_8() {
195 let segs = parse(b"\xe3\x81\x82\xe3\x80\x81A\xef\xbd\xb1\xe2\x84\xab");
197 assert_eq!(
198 segs,
199 vec![
200 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
201 Segment { mode: Mode::Byte, begin: 4, end: 5 },
202 Segment { mode: Mode::Kanji, begin: 5, end: 7 },
203 Segment { mode: Mode::Byte, begin: 7, end: 10 },
204 Segment { mode: Mode::Kanji, begin: 10, end: 12 },
205 Segment { mode: Mode::Byte, begin: 12, end: 13 },
206 ]
207 );
208 }
209
210 #[test]
211 fn test_not_kanji_1() {
212 let segs = parse(b"\x81\x30");
213 assert_eq!(
214 segs,
215 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Numeric, begin: 1, end: 2 }]
216 );
217 }
218
219 #[test]
220 fn test_not_kanji_2() {
221 let segs = parse(b"\xeb\xc0");
224 assert_eq!(
225 segs,
226 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Byte, begin: 1, end: 2 }]
227 );
228 }
229
230 #[test]
231 fn test_not_kanji_3() {
232 let segs = parse(b"\x81\x7f");
233 assert_eq!(
234 segs,
235 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Byte, begin: 1, end: 2 }]
236 );
237 }
238
239 #[test]
240 fn test_not_kanji_4() {
241 let segs = parse(b"\x81\x40\x81");
242 assert_eq!(
243 segs,
244 vec![Segment { mode: Mode::Kanji, begin: 0, end: 2 }, Segment { mode: Mode::Byte, begin: 2, end: 3 }]
245 );
246 }
247}
248
249pub struct Optimizer<I> {
254 parser: I,
255 last_segment: Segment,
256 last_segment_size: usize,
257 version: Version,
258 ended: bool,
259}
260
261impl<I: Iterator<Item = Segment>> Optimizer<I> {
262 pub fn new(mut segments: I, version: Version) -> Self {
269 match segments.next() {
270 None => Self {
271 parser: segments,
272 last_segment: Segment { mode: Mode::Numeric, begin: 0, end: 0 },
273 last_segment_size: 0,
274 version,
275 ended: true,
276 },
277 Some(segment) => Self {
278 parser: segments,
279 last_segment: segment,
280 last_segment_size: segment.encoded_len(version),
281 version,
282 ended: false,
283 },
284 }
285 }
286}
287
288impl<'a> Parser<'a> {
289 pub fn optimize(self, version: Version) -> Optimizer<Parser<'a>> {
290 Optimizer::new(self, version)
291 }
292}
293
294impl<I: Iterator<Item = Segment>> Iterator for Optimizer<I> {
295 type Item = Segment;
296
297 fn next(&mut self) -> Option<Segment> {
298 if self.ended {
299 return None;
300 }
301
302 loop {
303 match self.parser.next() {
304 None => {
305 self.ended = true;
306 return Some(self.last_segment);
307 }
308 Some(segment) => {
309 let seg_size = segment.encoded_len(self.version);
310
311 let new_segment = Segment {
312 mode: self.last_segment.mode.max(segment.mode),
313 begin: self.last_segment.begin,
314 end: segment.end,
315 };
316 let new_size = new_segment.encoded_len(self.version);
317
318 if self.last_segment_size + seg_size >= new_size {
319 self.last_segment = new_segment;
320 self.last_segment_size = new_size;
321 } else {
322 let old_segment = self.last_segment;
323 self.last_segment = segment;
324 self.last_segment_size = seg_size;
325 return Some(old_segment);
326 }
327 }
328 }
329 }
330 }
331}
332
333pub fn total_encoded_len(segments: &[Segment], version: Version) -> usize {
335 segments.iter().map(|seg| seg.encoded_len(version)).sum()
336}
337
338#[cfg(test)]
339mod optimize_tests {
340 use crate::optimize::{total_encoded_len, Optimizer, Segment};
341 use crate::types::{Mode, Version};
342
343 fn test_optimization_result(given: &[Segment], expected: &[Segment], version: Version) {
344 let prev_len = total_encoded_len(&*given, version);
345 let opt_segs = Optimizer::new(given.iter().copied(), version).collect::<Vec<_>>();
346 let new_len = total_encoded_len(&*opt_segs, version);
347 if given != opt_segs {
348 assert!(prev_len > new_len, "{} > {}", prev_len, new_len);
349 }
350 assert_eq!(
351 opt_segs,
352 expected,
353 "Optimization gave something better: {} < {} ({:?})",
354 new_len,
355 total_encoded_len(&*expected, version),
356 opt_segs
357 );
358 }
359
360 #[test]
361 fn test_example_1() {
362 test_optimization_result(
363 &[
364 Segment { mode: Mode::Alphanumeric, begin: 0, end: 3 },
365 Segment { mode: Mode::Numeric, begin: 3, end: 6 },
366 Segment { mode: Mode::Byte, begin: 6, end: 10 },
367 ],
368 &[Segment { mode: Mode::Alphanumeric, begin: 0, end: 6 }, Segment { mode: Mode::Byte, begin: 6, end: 10 }],
369 Version::Normal(1),
370 );
371 }
372
373 #[test]
374 fn test_example_2() {
375 test_optimization_result(
376 &[
377 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
378 Segment { mode: Mode::Alphanumeric, begin: 29, end: 30 },
379 Segment { mode: Mode::Numeric, begin: 30, end: 32 },
380 Segment { mode: Mode::Alphanumeric, begin: 32, end: 35 },
381 Segment { mode: Mode::Numeric, begin: 35, end: 38 },
382 ],
383 &[
384 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
385 Segment { mode: Mode::Alphanumeric, begin: 29, end: 38 },
386 ],
387 Version::Normal(9),
388 );
389 }
390
391 #[test]
392 fn test_example_3() {
393 test_optimization_result(
394 &[
395 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
396 Segment { mode: Mode::Alphanumeric, begin: 4, end: 5 },
397 Segment { mode: Mode::Byte, begin: 5, end: 6 },
398 Segment { mode: Mode::Kanji, begin: 6, end: 8 },
399 ],
400 &[Segment { mode: Mode::Byte, begin: 0, end: 8 }],
401 Version::Normal(1),
402 );
403 }
404
405 #[test]
406 fn test_example_4() {
407 test_optimization_result(
408 &[Segment { mode: Mode::Kanji, begin: 0, end: 10 }, Segment { mode: Mode::Byte, begin: 10, end: 11 }],
409 &[Segment { mode: Mode::Kanji, begin: 0, end: 10 }, Segment { mode: Mode::Byte, begin: 10, end: 11 }],
410 Version::Normal(1),
411 );
412 }
413
414 #[test]
415 fn test_annex_j_guideline_1a() {
416 test_optimization_result(
417 &[
418 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
419 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
420 ],
421 &[
422 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
423 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
424 ],
425 Version::Micro(2),
426 );
427 }
428
429 #[test]
430 fn test_annex_j_guideline_1b() {
431 test_optimization_result(
432 &[
433 Segment { mode: Mode::Numeric, begin: 0, end: 2 },
434 Segment { mode: Mode::Alphanumeric, begin: 2, end: 4 },
435 ],
436 &[Segment { mode: Mode::Alphanumeric, begin: 0, end: 4 }],
437 Version::Micro(2),
438 );
439 }
440
441 #[test]
442 fn test_annex_j_guideline_1c() {
443 test_optimization_result(
444 &[
445 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
446 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
447 ],
448 &[Segment { mode: Mode::Alphanumeric, begin: 0, end: 4 }],
449 Version::Micro(3),
450 );
451 }
452}
453
454#[cfg(feature = "bench")]
455#[bench]
456fn bench_optimize(bencher: &mut test::Bencher) {
457 use crate::types::Version;
458
459 let data = b"QR\x83R\x81[\x83h\x81i\x83L\x83\x85\x81[\x83A\x81[\x83\x8b\x83R\x81[\x83h\x81j\
460 \x82\xc6\x82\xcd\x81A1994\x94N\x82\xc9\x83f\x83\x93\x83\\\x81[\x82\xcc\x8aJ\
461 \x94\xad\x95\x94\x96\xe5\x81i\x8c\xbb\x8d\xdd\x82\xcd\x95\xaa\x97\xa3\x82\xb5\x83f\
462 \x83\x93\x83\\\x81[\x83E\x83F\x81[\x83u\x81j\x82\xaa\x8aJ\x94\xad\x82\xb5\x82\xbd\
463 \x83}\x83g\x83\x8a\x83b\x83N\x83X\x8c^\x93\xf1\x8e\x9f\x8c\xb3\x83R\x81[\x83h\
464 \x82\xc5\x82\xa0\x82\xe9\x81B\x82\xc8\x82\xa8\x81AQR\x83R\x81[\x83h\x82\xc6\
465 \x82\xa2\x82\xa4\x96\xbc\x8f\xcc\x81i\x82\xa8\x82\xe6\x82\xd1\x92P\x8c\xea\x81j\
466 \x82\xcd\x83f\x83\x93\x83\\\x81[\x83E\x83F\x81[\x83u\x82\xcc\x93o\x98^\x8f\xa4\
467 \x95W\x81i\x91\xe64075066\x8d\x86\x81j\x82\xc5\x82\xa0\x82\xe9\x81BQR\x82\xcd\
468 Quick Response\x82\xc9\x97R\x97\x88\x82\xb5\x81A\x8d\x82\x91\xac\x93\xc7\x82\xdd\
469 \x8e\xe6\x82\xe8\x82\xaa\x82\xc5\x82\xab\x82\xe9\x82\xe6\x82\xa4\x82\xc9\x8aJ\
470 \x94\xad\x82\xb3\x82\xea\x82\xbd\x81B\x93\x96\x8f\x89\x82\xcd\x8e\xa9\x93\xae\
471 \x8e\xd4\x95\x94\x95i\x8dH\x8f\xea\x82\xe2\x94z\x91\x97\x83Z\x83\x93\x83^\x81[\
472 \x82\xc8\x82\xc7\x82\xc5\x82\xcc\x8eg\x97p\x82\xf0\x94O\x93\xaa\x82\xc9\x8aJ\
473 \x94\xad\x82\xb3\x82\xea\x82\xbd\x82\xaa\x81A\x8c\xbb\x8d\xdd\x82\xc5\x82\xcd\x83X\
474 \x83}\x81[\x83g\x83t\x83H\x83\x93\x82\xcc\x95\x81\x8by\x82\xc8\x82\xc7\x82\xc9\
475 \x82\xe6\x82\xe8\x93\xfa\x96{\x82\xc9\x8c\xc0\x82\xe7\x82\xb8\x90\xa2\x8aE\x93I\
476 \x82\xc9\x95\x81\x8by\x82\xb5\x82\xc4\x82\xa2\x82\xe9\x81B";
477 bencher.iter(|| Parser::new(data).optimize(Version::Normal(15)));
478}
479
480#[derive(Copy, Clone)]
488enum ExclCharSet {
489 End = 0,
491
492 Symbol = 1,
495
496 Numeric = 2,
498
499 Alpha = 3,
502
503 KanjiHi1 = 4,
505
506 KanjiHi2 = 5,
508
509 KanjiHi3 = 6,
513
514 KanjiLo1 = 7,
518
519 KanjiLo2 = 8,
524
525 Byte = 9,
527}
528
529impl ExclCharSet {
530 fn from_u8(c: u8) -> Self {
532 match c {
533 0x20 | 0x24 | 0x25 | 0x2a | 0x2b | 0x2d..=0x2f | 0x3a => ExclCharSet::Symbol,
534 0x30..=0x39 => ExclCharSet::Numeric,
535 0x41..=0x5a => ExclCharSet::Alpha,
536 0x81..=0x9f => ExclCharSet::KanjiHi1,
537 0xe0..=0xea => ExclCharSet::KanjiHi2,
538 0xeb => ExclCharSet::KanjiHi3,
539 0x40 | 0x5b..=0x7e | 0x80 | 0xa0..=0xbf => ExclCharSet::KanjiLo1,
540 0xc0..=0xdf | 0xec..=0xfc => ExclCharSet::KanjiLo2,
541 _ => ExclCharSet::Byte,
542 }
543 }
544}
545
546#[derive(Copy, Clone)]
548enum State {
549 Init = 0,
551
552 Numeric = 10,
554
555 Alpha = 20,
557
558 Byte = 30,
560
561 KanjiHi12 = 40,
564
565 KanjiHi3 = 50,
568
569 Kanji = 60,
571}
572
573#[derive(Copy, Clone)]
575enum Action {
576 Idle,
578
579 Numeric,
581
582 Alpha,
584
585 Byte,
587
588 Kanji,
590
591 KanjiAndSingleByte,
594}
595
596static STATE_TRANSITION: [(State, Action); 70] = [
597 (State::Init, Action::Idle), (State::Alpha, Action::Idle), (State::Numeric, Action::Idle), (State::Alpha, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi3, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Init, Action::Numeric), (State::Alpha, Action::Numeric), (State::Numeric, Action::Idle), (State::Alpha, Action::Numeric), (State::KanjiHi12, Action::Numeric), (State::KanjiHi12, Action::Numeric), (State::KanjiHi3, Action::Numeric), (State::Byte, Action::Numeric), (State::Byte, Action::Numeric), (State::Byte, Action::Numeric), (State::Init, Action::Alpha), (State::Alpha, Action::Idle), (State::Numeric, Action::Alpha), (State::Alpha, Action::Idle), (State::KanjiHi12, Action::Alpha), (State::KanjiHi12, Action::Alpha), (State::KanjiHi3, Action::Alpha), (State::Byte, Action::Alpha), (State::Byte, Action::Alpha), (State::Byte, Action::Alpha), (State::Init, Action::Byte), (State::Alpha, Action::Byte), (State::Numeric, Action::Byte), (State::Alpha, Action::Byte), (State::KanjiHi12, Action::Byte), (State::KanjiHi12, Action::Byte), (State::KanjiHi3, Action::Byte), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Init, Action::KanjiAndSingleByte), (State::Alpha, Action::KanjiAndSingleByte), (State::Numeric, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Byte, Action::KanjiAndSingleByte), (State::Init, Action::KanjiAndSingleByte), (State::Alpha, Action::KanjiAndSingleByte), (State::Numeric, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::KanjiHi12, Action::KanjiAndSingleByte), (State::KanjiHi3, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Byte, Action::KanjiAndSingleByte), (State::Byte, Action::KanjiAndSingleByte), (State::Init, Action::Kanji), (State::Alpha, Action::Kanji), (State::Numeric, Action::Kanji), (State::Alpha, Action::Kanji), (State::KanjiHi12, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi3, Action::Idle), (State::Byte, Action::Kanji), (State::Byte, Action::Kanji), (State::Byte, Action::Kanji), ];
677
678