1#![allow(clippy::unicode_not_nfc)]
2use crate::types::{Mode, Version};
13use std::slice::Iter;
14
15#[derive(PartialEq, Eq, Debug, Copy, Clone)]
20pub struct Segment {
21 pub mode: Mode,
23
24 pub begin: usize,
26
27 pub end: usize,
29}
30
31impl Segment {
32 pub fn encoded_len(&self, version: Version) -> usize {
35 let byte_size = self.end - self.begin;
36 let chars_count = if self.mode == Mode::Kanji { byte_size / 2 } else { byte_size };
37
38 let mode_bits_count = version.mode_bits_count();
39 let length_bits_count = self.mode.length_bits_count(version);
40 let data_bits_count = self.mode.data_bits_count(chars_count);
41
42 mode_bits_count + length_bits_count + data_bits_count
43 }
44}
45
46struct EcsIter<I> {
61 base: I,
62 index: usize,
63 ended: bool,
64}
65
66impl<'a, I: Iterator<Item = &'a u8>> Iterator for EcsIter<I> {
67 type Item = (usize, ExclCharSet);
68
69 fn next(&mut self) -> Option<(usize, ExclCharSet)> {
70 if self.ended {
71 return None;
72 }
73
74 match self.base.next() {
75 None => {
76 self.ended = true;
77 Some((self.index, ExclCharSet::End))
78 }
79 Some(c) => {
80 let old_index = self.index;
81 self.index += 1;
82 Some((old_index, ExclCharSet::from_u8(*c)))
83 }
84 }
85 }
86}
87
88pub struct Parser<'a> {
90 ecs_iter: EcsIter<Iter<'a, u8>>,
91 state: State,
92 begin: usize,
93 pending_single_byte: bool,
94}
95
96impl<'a> Parser<'a> {
97 pub fn new(data: &[u8]) -> Parser<'_> {
109 Parser {
110 ecs_iter: EcsIter { base: data.iter(), index: 0, ended: false },
111 state: State::Init,
112 begin: 0,
113 pending_single_byte: false,
114 }
115 }
116}
117
118impl<'a> Iterator for Parser<'a> {
119 type Item = Segment;
120
121 fn next(&mut self) -> Option<Segment> {
122 if self.pending_single_byte {
123 self.pending_single_byte = false;
124 self.begin += 1;
125 return Some(Segment { mode: Mode::Byte, begin: self.begin - 1, end: self.begin });
126 }
127
128 loop {
129 let (i, ecs) = self.ecs_iter.next()?;
130 let (next_state, action) = STATE_TRANSITION[self.state as usize + ecs as usize];
131 self.state = next_state;
132
133 let old_begin = self.begin;
134 let push_mode = match action {
135 Action::Idle => continue,
136 Action::Numeric => Mode::Numeric,
137 Action::Alpha => Mode::Alphanumeric,
138 Action::Byte => Mode::Byte,
139 Action::Kanji => Mode::Kanji,
140 Action::KanjiAndSingleByte => {
141 let next_begin = i - 1;
142 if self.begin == next_begin {
143 Mode::Byte
144 } else {
145 self.pending_single_byte = true;
146 self.begin = next_begin;
147 return Some(Segment { mode: Mode::Kanji, begin: old_begin, end: next_begin });
148 }
149 }
150 };
151
152 self.begin = i;
153 return Some(Segment { mode: push_mode, begin: old_begin, end: i });
154 }
155 }
156}
157
158#[cfg(test)]
159mod parse_tests {
160 use crate::optimize::{Parser, Segment};
161 use crate::types::Mode;
162
163 fn parse(data: &[u8]) -> Vec<Segment> {
164 Parser::new(data).collect()
165 }
166
167 #[test]
168 fn test_parse_1() {
169 let segs = parse(b"01049123451234591597033130128%10ABC123");
170 assert_eq!(
171 segs,
172 vec![
173 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
174 Segment { mode: Mode::Alphanumeric, begin: 29, end: 30 },
175 Segment { mode: Mode::Numeric, begin: 30, end: 32 },
176 Segment { mode: Mode::Alphanumeric, begin: 32, end: 35 },
177 Segment { mode: Mode::Numeric, begin: 35, end: 38 },
178 ]
179 );
180 }
181
182 #[test]
183 fn test_parse_shift_jis_example_1() {
184 let segs = parse(b"\x82\xa0\x81\x41\x41\xb1\x81\xf0"); assert_eq!(
186 segs,
187 vec![
188 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
189 Segment { mode: Mode::Alphanumeric, begin: 4, end: 5 },
190 Segment { mode: Mode::Byte, begin: 5, end: 6 },
191 Segment { mode: Mode::Kanji, begin: 6, end: 8 },
192 ]
193 );
194 }
195
196 #[test]
197 fn test_parse_utf_8() {
198 let segs = parse(b"\xe3\x81\x82\xe3\x80\x81A\xef\xbd\xb1\xe2\x84\xab");
200 assert_eq!(
201 segs,
202 vec![
203 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
204 Segment { mode: Mode::Byte, begin: 4, end: 5 },
205 Segment { mode: Mode::Kanji, begin: 5, end: 7 },
206 Segment { mode: Mode::Byte, begin: 7, end: 10 },
207 Segment { mode: Mode::Kanji, begin: 10, end: 12 },
208 Segment { mode: Mode::Byte, begin: 12, end: 13 },
209 ]
210 );
211 }
212
213 #[test]
214 fn test_not_kanji_1() {
215 let segs = parse(b"\x81\x30");
216 assert_eq!(
217 segs,
218 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Numeric, begin: 1, end: 2 }]
219 );
220 }
221
222 #[test]
223 fn test_not_kanji_2() {
224 let segs = parse(b"\xeb\xc0");
227 assert_eq!(
228 segs,
229 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Byte, begin: 1, end: 2 }]
230 );
231 }
232
233 #[test]
234 fn test_not_kanji_3() {
235 let segs = parse(b"\x81\x7f");
236 assert_eq!(
237 segs,
238 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Byte, begin: 1, end: 2 }]
239 );
240 }
241
242 #[test]
243 fn test_not_kanji_4() {
244 let segs = parse(b"\x81\x40\x81");
245 assert_eq!(
246 segs,
247 vec![Segment { mode: Mode::Kanji, begin: 0, end: 2 }, Segment { mode: Mode::Byte, begin: 2, end: 3 }]
248 );
249 }
250}
251
252pub struct Optimizer<I> {
257 parser: I,
258 last_segment: Segment,
259 last_segment_size: usize,
260 version: Version,
261 ended: bool,
262}
263
264impl<I: Iterator<Item = Segment>> Optimizer<I> {
265 pub fn new(mut segments: I, version: Version) -> Self {
272 match segments.next() {
273 None => Self {
274 parser: segments,
275 last_segment: Segment { mode: Mode::Numeric, begin: 0, end: 0 },
276 last_segment_size: 0,
277 version,
278 ended: true,
279 },
280 Some(segment) => Self {
281 parser: segments,
282 last_segment: segment,
283 last_segment_size: segment.encoded_len(version),
284 version,
285 ended: false,
286 },
287 }
288 }
289}
290
291impl<'a> Parser<'a> {
292 pub fn optimize(self, version: Version) -> Optimizer<Parser<'a>> {
293 Optimizer::new(self, version)
294 }
295}
296
297impl<I: Iterator<Item = Segment>> Iterator for Optimizer<I> {
298 type Item = Segment;
299
300 fn next(&mut self) -> Option<Segment> {
301 if self.ended {
302 return None;
303 }
304
305 loop {
306 match self.parser.next() {
307 None => {
308 self.ended = true;
309 return Some(self.last_segment);
310 }
311 Some(segment) => {
312 let seg_size = segment.encoded_len(self.version);
313
314 let new_segment = Segment {
315 mode: self.last_segment.mode.max(segment.mode),
316 begin: self.last_segment.begin,
317 end: segment.end,
318 };
319 let new_size = new_segment.encoded_len(self.version);
320
321 if self.last_segment_size + seg_size >= new_size {
322 self.last_segment = new_segment;
323 self.last_segment_size = new_size;
324 } else {
325 let old_segment = self.last_segment;
326 self.last_segment = segment;
327 self.last_segment_size = seg_size;
328 return Some(old_segment);
329 }
330 }
331 }
332 }
333 }
334}
335
336pub fn total_encoded_len(segments: &[Segment], version: Version) -> usize {
338 segments.iter().map(|seg| seg.encoded_len(version)).sum()
339}
340
341#[cfg(test)]
342mod optimize_tests {
343 use crate::optimize::{Optimizer, Segment, total_encoded_len};
344 use crate::types::{Mode, Version};
345
346 fn test_optimization_result(given: &[Segment], expected: &[Segment], version: Version) {
347 let prev_len = total_encoded_len(given, version);
348 let opt_segs = Optimizer::new(given.iter().copied(), version).collect::<Vec<_>>();
349 let new_len = total_encoded_len(&opt_segs, version);
350 if given != opt_segs {
351 assert!(prev_len > new_len, "{prev_len} > {new_len}");
352 }
353 assert_eq!(
354 opt_segs,
355 expected,
356 "Optimization gave something better: {} < {} ({:?})",
357 new_len,
358 total_encoded_len(expected, version),
359 opt_segs
360 );
361 }
362
363 #[test]
364 fn test_example_1() {
365 test_optimization_result(
366 &[
367 Segment { mode: Mode::Alphanumeric, begin: 0, end: 3 },
368 Segment { mode: Mode::Numeric, begin: 3, end: 6 },
369 Segment { mode: Mode::Byte, begin: 6, end: 10 },
370 ],
371 &[Segment { mode: Mode::Alphanumeric, begin: 0, end: 6 }, Segment { mode: Mode::Byte, begin: 6, end: 10 }],
372 Version::Normal(1),
373 );
374 }
375
376 #[test]
377 fn test_example_2() {
378 test_optimization_result(
379 &[
380 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
381 Segment { mode: Mode::Alphanumeric, begin: 29, end: 30 },
382 Segment { mode: Mode::Numeric, begin: 30, end: 32 },
383 Segment { mode: Mode::Alphanumeric, begin: 32, end: 35 },
384 Segment { mode: Mode::Numeric, begin: 35, end: 38 },
385 ],
386 &[
387 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
388 Segment { mode: Mode::Alphanumeric, begin: 29, end: 38 },
389 ],
390 Version::Normal(9),
391 );
392 }
393
394 #[test]
395 fn test_example_3() {
396 test_optimization_result(
397 &[
398 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
399 Segment { mode: Mode::Alphanumeric, begin: 4, end: 5 },
400 Segment { mode: Mode::Byte, begin: 5, end: 6 },
401 Segment { mode: Mode::Kanji, begin: 6, end: 8 },
402 ],
403 &[Segment { mode: Mode::Byte, begin: 0, end: 8 }],
404 Version::Normal(1),
405 );
406 }
407
408 #[test]
409 fn test_example_4() {
410 test_optimization_result(
411 &[Segment { mode: Mode::Kanji, begin: 0, end: 10 }, Segment { mode: Mode::Byte, begin: 10, end: 11 }],
412 &[Segment { mode: Mode::Kanji, begin: 0, end: 10 }, Segment { mode: Mode::Byte, begin: 10, end: 11 }],
413 Version::Normal(1),
414 );
415 }
416
417 #[test]
418 fn test_annex_j_guideline_1a() {
419 test_optimization_result(
420 &[
421 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
422 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
423 ],
424 &[
425 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
426 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
427 ],
428 Version::Micro(2),
429 );
430 }
431
432 #[test]
433 fn test_annex_j_guideline_1b() {
434 test_optimization_result(
435 &[
436 Segment { mode: Mode::Numeric, begin: 0, end: 2 },
437 Segment { mode: Mode::Alphanumeric, begin: 2, end: 4 },
438 ],
439 &[Segment { mode: Mode::Alphanumeric, begin: 0, end: 4 }],
440 Version::Micro(2),
441 );
442 }
443
444 #[test]
445 fn test_annex_j_guideline_1c() {
446 test_optimization_result(
447 &[
448 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
449 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
450 ],
451 &[Segment { mode: Mode::Alphanumeric, begin: 0, end: 4 }],
452 Version::Micro(3),
453 );
454 }
455}
456
457#[derive(Copy, Clone)]
465enum ExclCharSet {
466 End = 0,
468
469 Symbol = 1,
472
473 Numeric = 2,
475
476 Alpha = 3,
479
480 KanjiHi1 = 4,
482
483 KanjiHi2 = 5,
485
486 KanjiHi3 = 6,
490
491 KanjiLo1 = 7,
495
496 KanjiLo2 = 8,
501
502 Byte = 9,
504}
505
506impl ExclCharSet {
507 fn from_u8(c: u8) -> Self {
509 match c {
510 0x20 | 0x24 | 0x25 | 0x2a | 0x2b | 0x2d..=0x2f | 0x3a => ExclCharSet::Symbol,
511 0x30..=0x39 => ExclCharSet::Numeric,
512 0x41..=0x5a => ExclCharSet::Alpha,
513 0x81..=0x9f => ExclCharSet::KanjiHi1,
514 0xe0..=0xea => ExclCharSet::KanjiHi2,
515 0xeb => ExclCharSet::KanjiHi3,
516 0x40 | 0x5b..=0x7e | 0x80 | 0xa0..=0xbf => ExclCharSet::KanjiLo1,
517 0xc0..=0xdf | 0xec..=0xfc => ExclCharSet::KanjiLo2,
518 _ => ExclCharSet::Byte,
519 }
520 }
521}
522
523#[derive(Copy, Clone)]
525enum State {
526 Init = 0,
528
529 Numeric = 10,
531
532 Alpha = 20,
534
535 Byte = 30,
537
538 KanjiHi12 = 40,
541
542 KanjiHi3 = 50,
545
546 Kanji = 60,
548}
549
550#[derive(Copy, Clone)]
552enum Action {
553 Idle,
555
556 Numeric,
558
559 Alpha,
561
562 Byte,
564
565 Kanji,
567
568 KanjiAndSingleByte,
571}
572
573static STATE_TRANSITION: [(State, Action); 70] = [
574 (State::Init, Action::Idle), (State::Alpha, Action::Idle), (State::Numeric, Action::Idle), (State::Alpha, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi3, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Init, Action::Numeric), (State::Alpha, Action::Numeric), (State::Numeric, Action::Idle), (State::Alpha, Action::Numeric), (State::KanjiHi12, Action::Numeric), (State::KanjiHi12, Action::Numeric), (State::KanjiHi3, Action::Numeric), (State::Byte, Action::Numeric), (State::Byte, Action::Numeric), (State::Byte, Action::Numeric), (State::Init, Action::Alpha), (State::Alpha, Action::Idle), (State::Numeric, Action::Alpha), (State::Alpha, Action::Idle), (State::KanjiHi12, Action::Alpha), (State::KanjiHi12, Action::Alpha), (State::KanjiHi3, Action::Alpha), (State::Byte, Action::Alpha), (State::Byte, Action::Alpha), (State::Byte, Action::Alpha), (State::Init, Action::Byte), (State::Alpha, Action::Byte), (State::Numeric, Action::Byte), (State::Alpha, Action::Byte), (State::KanjiHi12, Action::Byte), (State::KanjiHi12, Action::Byte), (State::KanjiHi3, Action::Byte), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Init, Action::KanjiAndSingleByte), (State::Alpha, Action::KanjiAndSingleByte), (State::Numeric, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Byte, Action::KanjiAndSingleByte), (State::Init, Action::KanjiAndSingleByte), (State::Alpha, Action::KanjiAndSingleByte), (State::Numeric, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::KanjiHi12, Action::KanjiAndSingleByte), (State::KanjiHi3, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Byte, Action::KanjiAndSingleByte), (State::Byte, Action::KanjiAndSingleByte), (State::Init, Action::Kanji), (State::Alpha, Action::Kanji), (State::Numeric, Action::Kanji), (State::Alpha, Action::Kanji), (State::KanjiHi12, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi3, Action::Idle), (State::Byte, Action::Kanji), (State::Byte, Action::Kanji), (State::Byte, Action::Kanji), ];
654
655