use crate::types::{Mode, Version};
use std::borrow::Borrow;
use std::slice::Iter;
#[derive(PartialEq, Eq, Debug, Copy, Clone)]
pub struct Segment {
pub mode: Mode,
pub begin: usize,
pub end: usize,
}
impl Segment {
pub fn encoded_len(&self, version: Version) -> usize {
let byte_size = self.end - self.begin;
let chars_count = if self.mode == Mode::Kanji {
byte_size / 2
} else {
byte_size
};
let mode_bits_count = version.mode_bits_count();
let length_bits_count = self.mode.length_bits_count(version);
let data_bits_count = self.mode.data_bits_count(chars_count);
mode_bits_count + length_bits_count + data_bits_count
}
fn assert_invariants(&self) {
assert!(self.begin < self.end);
}
}
struct EcsIter<I> {
base: I,
index: usize,
ended: bool,
}
impl<'a, I: Iterator<Item = &'a u8>> Iterator for EcsIter<I> {
type Item = (usize, ExclCharSet);
fn next(&mut self) -> Option<(usize, ExclCharSet)> {
if self.ended {
return None;
}
match self.base.next() {
None => {
self.ended = true;
Some((self.index, ExclCharSet::End))
}
Some(c) => {
let old_index = self.index;
self.index += 1;
Some((old_index, ExclCharSet::from_u8(*c)))
}
}
}
}
pub struct Parser<'a> {
ecs_iter: EcsIter<Iter<'a, u8>>,
state: State,
begin: usize,
pending_single_byte: bool,
}
impl<'a> Parser<'a> {
pub fn new(data: &[u8]) -> Parser {
Parser {
ecs_iter: EcsIter {
base: data.iter(),
index: 0,
ended: false,
},
state: State::Init,
begin: 0,
pending_single_byte: false,
}
}
}
impl<'a> Iterator for Parser<'a> {
type Item = Segment;
fn next(&mut self) -> Option<Segment> {
if self.pending_single_byte {
self.pending_single_byte = false;
self.begin += 1;
return Some(Segment {
mode: Mode::Byte,
begin: self.begin - 1,
end: self.begin,
});
}
loop {
let (i, ecs) = match self.ecs_iter.next() {
None => return None,
Some(a) => a,
};
let (next_state, action) = STATE_TRANSITION[self.state as usize + ecs as usize];
self.state = next_state;
let old_begin = self.begin;
let push_mode = match action {
Action::Idle => continue,
Action::Numeric => Mode::Numeric,
Action::Alpha => Mode::Alphanumeric,
Action::Byte => Mode::Byte,
Action::Kanji => Mode::Kanji,
Action::KanjiAndSingleByte => {
let next_begin = i - 1;
if self.begin == next_begin {
Mode::Byte
} else {
self.pending_single_byte = true;
self.begin = next_begin;
return Some(Segment {
mode: Mode::Kanji,
begin: old_begin,
end: next_begin,
});
}
}
};
self.begin = i;
return Some(Segment {
mode: push_mode,
begin: old_begin,
end: i,
});
}
}
}
#[cfg(test)]
mod parse_tests {
use crate::optimize::{Parser, Segment};
use crate::types::Mode;
fn parse(data: &[u8]) -> Vec<Segment> {
Parser::new(data).collect()
}
#[test]
fn test_parse_1() {
let segs = parse(b"01049123451234591597033130128%10ABC123");
assert_eq!(
segs,
vec![
Segment {
mode: Mode::Numeric,
begin: 0,
end: 29
},
Segment {
mode: Mode::Alphanumeric,
begin: 29,
end: 30
},
Segment {
mode: Mode::Numeric,
begin: 30,
end: 32
},
Segment {
mode: Mode::Alphanumeric,
begin: 32,
end: 35
},
Segment {
mode: Mode::Numeric,
begin: 35,
end: 38
},
]
);
}
#[test]
fn test_parse_shift_jis_example_1() {
let segs = parse(b"\x82\xa0\x81\x41\x41\xb1\x81\xf0"); assert_eq!(
segs,
vec![
Segment {
mode: Mode::Kanji,
begin: 0,
end: 4
},
Segment {
mode: Mode::Alphanumeric,
begin: 4,
end: 5
},
Segment {
mode: Mode::Byte,
begin: 5,
end: 6
},
Segment {
mode: Mode::Kanji,
begin: 6,
end: 8
},
]
);
}
#[test]
fn test_parse_utf_8() {
let segs = parse(b"\xe3\x81\x82\xe3\x80\x81A\xef\xbd\xb1\xe2\x84\xab");
assert_eq!(
segs,
vec![
Segment {
mode: Mode::Kanji,
begin: 0,
end: 4
},
Segment {
mode: Mode::Byte,
begin: 4,
end: 5
},
Segment {
mode: Mode::Kanji,
begin: 5,
end: 7
},
Segment {
mode: Mode::Byte,
begin: 7,
end: 10
},
Segment {
mode: Mode::Kanji,
begin: 10,
end: 12
},
Segment {
mode: Mode::Byte,
begin: 12,
end: 13
},
]
);
}
#[test]
fn test_not_kanji_1() {
let segs = parse(b"\x81\x30");
assert_eq!(
segs,
vec![
Segment {
mode: Mode::Byte,
begin: 0,
end: 1
},
Segment {
mode: Mode::Numeric,
begin: 1,
end: 2
},
]
);
}
#[test]
fn test_not_kanji_2() {
let segs = parse(b"\xeb\xc0");
assert_eq!(
segs,
vec![
Segment {
mode: Mode::Byte,
begin: 0,
end: 1
},
Segment {
mode: Mode::Byte,
begin: 1,
end: 2
},
]
);
}
#[test]
fn test_not_kanji_3() {
let segs = parse(b"\x81\x7f");
assert_eq!(
segs,
vec![
Segment {
mode: Mode::Byte,
begin: 0,
end: 1
},
Segment {
mode: Mode::Byte,
begin: 1,
end: 2
},
]
);
}
#[test]
fn test_not_kanji_4() {
let segs = parse(b"\x81\x40\x81");
assert_eq!(
segs,
vec![
Segment {
mode: Mode::Kanji,
begin: 0,
end: 2
},
Segment {
mode: Mode::Byte,
begin: 2,
end: 3
},
]
);
}
}
mod shortest_path {
use super::Segment;
use crate::types::{Mode, Version};
use std::cmp::{Ordering, PartialOrd};
#[derive(Clone)]
struct Predecessor {
index_of_segment: usize,
mode: Mode,
}
#[derive(Clone)]
struct ShortestPath {
length: usize,
predecessor: Option<Predecessor>,
}
pub struct AlgorithmState<'a> {
paths: Vec<Option<ShortestPath>>,
segments: &'a [Segment],
version: Version,
}
fn get_path_index(index_of_segment: usize, mode: Mode) -> usize {
index_of_segment * Mode::ALL_MODES.len() + (mode as usize)
}
impl<'a> AlgorithmState<'a> {
fn get_shortest_path(
&self,
index_of_segment: usize,
ending_mode: Mode,
) -> &Option<ShortestPath> {
&self.paths[get_path_index(index_of_segment, ending_mode)]
}
fn get_end_of_predecessor(&self, predecessor: &Option<Predecessor>) -> usize {
match predecessor {
None => 0,
Some(predecessor) => self.segments[predecessor.index_of_segment].end,
}
}
fn compute_shortest_path(
&self,
index_of_segment: usize,
desired_mode: Mode,
) -> Option<ShortestPath> {
let curr_segment = self.segments[index_of_segment];
match curr_segment.mode.partial_cmp(&desired_mode) {
None | Some(Ordering::Greater) => return None,
Some(Ordering::Less) | Some(Ordering::Equal) => (),
}
let length_of_reencoding_curr_segment_in_desired_mode = {
let reencoded_segment = Segment {
begin: curr_segment.begin,
end: curr_segment.end,
mode: desired_mode,
};
reencoded_segment.encoded_len(self.version)
};
if index_of_segment == 0 {
return Some(ShortestPath {
length: length_of_reencoding_curr_segment_in_desired_mode,
predecessor: None,
});
}
let prev_index = index_of_segment - 1;
Mode::ALL_MODES
.iter()
.filter_map(|&prev_mode| {
self.get_shortest_path(prev_index, prev_mode)
.as_ref()
.map(|prev_path| (prev_mode, prev_path))
})
.map(|(prev_mode, prev_path)| {
if prev_mode == desired_mode {
let merged_length = {
let merged_segment = Segment {
begin: self.get_end_of_predecessor(&prev_path.predecessor),
end: curr_segment.end,
mode: desired_mode,
};
merged_segment.encoded_len(self.version)
};
let length_up_to_merged_segment = match prev_path.predecessor.as_ref() {
None => 0,
Some(predecessor) => {
self.get_shortest_path(
predecessor.index_of_segment,
predecessor.mode,
)
.as_ref()
.expect("Predecessors should point at a valid path")
.length
}
};
ShortestPath {
length: length_up_to_merged_segment + merged_length,
predecessor: prev_path.predecessor.clone(),
}
} else {
ShortestPath {
length: prev_path.length
+ length_of_reencoding_curr_segment_in_desired_mode,
predecessor: Some(Predecessor {
index_of_segment: prev_index,
mode: prev_mode,
}),
}
}
})
.min_by_key(|path| path.length)
}
pub fn find_shortest_paths(segments: &'a [Segment], version: Version) -> Self {
let mut this = AlgorithmState::<'a> {
segments,
paths: vec![None; segments.len() * Mode::ALL_MODES.len()],
version,
};
for index_of_segment in 0..segments.len() {
for &mode in Mode::ALL_MODES {
this.paths[get_path_index(index_of_segment, mode)] =
this.compute_shortest_path(index_of_segment, mode);
}
}
this
}
pub fn construct_best_segmentation(&self) -> Vec<Segment> {
let mut result = Vec::new();
let mut curr_index_of_segment = self.segments.len() - 1;
let (mut best_mode, mut shortest_path) = Mode::ALL_MODES
.iter()
.filter_map(|&mode| {
self.get_shortest_path(curr_index_of_segment, mode)
.as_ref()
.map(|shortest_path| (mode, shortest_path))
})
.min_by_key(|(_mode, path)| path.length)
.expect("At least one path should always exist");
loop {
result.push(Segment {
begin: self.get_end_of_predecessor(&shortest_path.predecessor),
end: self.segments[curr_index_of_segment].end,
mode: best_mode,
});
match shortest_path.predecessor.as_ref() {
None => {
result.reverse();
return result;
}
Some(predecessor) => {
curr_index_of_segment = predecessor.index_of_segment;
best_mode = predecessor.mode;
shortest_path = self
.get_shortest_path(curr_index_of_segment, best_mode)
.as_ref()
.expect("Predecessors should point at valid paths");
}
};
}
}
}
}
fn assert_valid_segmentation(segments: &[Segment]) {
if segments.is_empty() {
return;
}
for segment in segments.iter() {
segment.assert_invariants();
}
let consecutive_pairs = segments[..(segments.len() - 1)]
.iter()
.zip(segments[1..].iter());
for (prev, next) in consecutive_pairs {
assert_eq!(prev.end, next.begin, "Non-adjacent segments");
}
}
pub fn optimize_segmentation(segments: &[Segment], version: Version) -> Vec<Segment> {
assert_valid_segmentation(segments);
if segments.is_empty() {
return Vec::new();
}
let optimized_segments = shortest_path::AlgorithmState::find_shortest_paths(segments, version)
.construct_best_segmentation();
#[cfg(fuzzing)]
optimize_test_helpers::assert_valid_optimization(&segments, &*optimized_segments, version);
optimized_segments
}
pub fn total_encoded_len<I>(segments: I, version: Version) -> usize
where
I: IntoIterator,
I::Item: Borrow<Segment>,
{
segments
.into_iter()
.map(|seg| seg.borrow().encoded_len(version))
.sum()
}
#[cfg(any(fuzzing, test))]
mod optimize_test_helpers {
use super::{assert_valid_segmentation, total_encoded_len, Segment};
use crate::types::{Mode, Version};
use std::cmp::Ordering;
pub fn assert_segmentation_equivalence(given: &[Segment], opt: &[Segment]) {
if given.is_empty() {
assert!(opt.is_empty());
return;
}
let begin = given.first().unwrap().begin;
let end = given.last().unwrap().end;
assert!(!opt.is_empty());
assert_eq!(begin, opt.first().unwrap().begin);
assert_eq!(end, opt.last().unwrap().end);
for i in begin..end {
fn find_mode(segments: &[Segment], i: usize) {
segments
.iter()
.filter(|s| (s.begin <= i) && (i < s.end))
.next()
.expect("Expecting exactly one segment")
.mode;
}
let given_mode = find_mode(&*given, i);
let opt_mode = find_mode(&*given, i);
match given_mode.partial_cmp(&opt_mode) {
Some(Ordering::Less) | Some(Ordering::Equal) => (),
_ => panic!(
"Character #{} is {:?}, which {:?} may not represent",
i, given_mode, opt_mode,
),
}
}
}
pub fn assert_valid_optimization(input: &[Segment], optimized: &[Segment], version: Version) {
assert_valid_segmentation(input);
assert_valid_segmentation(optimized);
assert_segmentation_equivalence(input, optimized);
let input_len = total_encoded_len(input, version);
let optimized_len = total_encoded_len(optimized, version);
assert!(optimized_len <= input_len);
let single_bytes_segment_len = if input.is_empty() {
0
} else {
Segment {
begin: input.first().unwrap().begin,
end: input.last().unwrap().end,
mode: Mode::Byte,
}
.encoded_len(version)
};
assert!(optimized_len <= single_bytes_segment_len);
}
}
#[cfg(test)]
mod optimize_tests {
use super::optimize_test_helpers::*;
use super::{assert_valid_segmentation, optimize_segmentation, total_encoded_len, Segment};
use crate::types::{Mode, Version};
use std::cmp::Ordering;
fn test_optimization_result(given: Vec<Segment>, expected: Vec<Segment>, version: Version) {
assert_valid_segmentation(&given);
assert_valid_segmentation(&expected);
assert_segmentation_equivalence(&given, &expected);
let opt_segs = optimize_segmentation(given.as_slice(), version);
assert_valid_optimization(&given, &opt_segs, version);
if opt_segs != expected {
let actual_len = total_encoded_len(&opt_segs, version);
let expected_len = total_encoded_len(&expected, version);
let msg = match actual_len.cmp(&expected_len) {
Ordering::Less => "Optimization gave something better than expected",
Ordering::Equal => "Optimization gave something different, but just as short",
Ordering::Greater => "Optimization gave something worse than expected",
};
panic!(
"{}: expected_len={}; actual_len={}; opt_segs=({:?})",
msg, expected_len, actual_len, opt_segs
);
}
}
#[test]
fn test_example_1() {
test_optimization_result(
vec![
Segment {
mode: Mode::Alphanumeric,
begin: 0,
end: 3,
},
Segment {
mode: Mode::Numeric,
begin: 3,
end: 6,
},
Segment {
mode: Mode::Byte,
begin: 6,
end: 10,
},
],
vec![
Segment {
mode: Mode::Alphanumeric,
begin: 0,
end: 6,
},
Segment {
mode: Mode::Byte,
begin: 6,
end: 10,
},
],
Version::Normal(1),
);
}
#[test]
fn test_example_2() {
test_optimization_result(
vec![
Segment {
mode: Mode::Numeric,
begin: 0,
end: 29,
},
Segment {
mode: Mode::Alphanumeric,
begin: 29,
end: 30,
},
Segment {
mode: Mode::Numeric,
begin: 30,
end: 32,
},
Segment {
mode: Mode::Alphanumeric,
begin: 32,
end: 35,
},
Segment {
mode: Mode::Numeric,
begin: 35,
end: 38,
},
],
vec![
Segment {
mode: Mode::Numeric,
begin: 0,
end: 29,
},
Segment {
mode: Mode::Alphanumeric,
begin: 29,
end: 38,
},
],
Version::Normal(9),
);
}
#[test]
fn test_example_3() {
test_optimization_result(
vec![
Segment {
mode: Mode::Kanji,
begin: 0,
end: 4,
},
Segment {
mode: Mode::Alphanumeric,
begin: 4,
end: 5,
},
Segment {
mode: Mode::Byte,
begin: 5,
end: 6,
},
Segment {
mode: Mode::Kanji,
begin: 6,
end: 8,
},
],
vec![Segment {
mode: Mode::Byte,
begin: 0,
end: 8,
}],
Version::Normal(1),
);
}
#[test]
fn test_example_4() {
test_optimization_result(
vec![
Segment {
mode: Mode::Kanji,
begin: 0,
end: 10,
},
Segment {
mode: Mode::Byte,
begin: 10,
end: 11,
},
],
vec![
Segment {
mode: Mode::Kanji,
begin: 0,
end: 10,
},
Segment {
mode: Mode::Byte,
begin: 10,
end: 11,
},
],
Version::Normal(1),
);
}
#[test]
fn test_empty() {
test_optimization_result(vec![], vec![], Version::Normal(1));
}
#[test]
fn test_example_where_greedy_merging_is_suboptimal() {
let mut input = vec![Segment {
mode: Mode::Byte,
begin: 0,
end: 10,
}];
for _ in 0..5 {
for &mode in [Mode::Alphanumeric, Mode::Numeric].iter() {
let begin = input.last().unwrap().end;
let end = begin + 1;
input.push(Segment { mode, begin, end });
}
}
test_optimization_result(
input,
vec![
Segment {
mode: Mode::Byte,
begin: 0,
end: 10,
},
Segment {
mode: Mode::Alphanumeric,
begin: 10,
end: 20,
},
],
Version::Normal(40),
);
}
#[test]
fn test_example_where_merging_two_consecutive_segments_is_always_harmful() {
let mut input = vec![];
for _ in 0..5 {
for &mode in [Mode::Byte, Mode::Alphanumeric].iter() {
let begin = input.last().map(|s: &Segment| s.end).unwrap_or_default();
let end = begin + 10;
input.push(Segment { mode, begin, end });
}
}
test_optimization_result(
input,
vec![
Segment {
mode: Mode::Byte,
begin: 0,
end: 90,
},
Segment {
mode: Mode::Alphanumeric,
begin: 90,
end: 100,
},
],
Version::Normal(40),
);
}
#[test]
fn test_optimize_base64() {
let input: &[u8] = include_bytes!("../test_data/large_base64.in");
let input: Vec<Segment> = super::Parser::new(input).collect();
test_optimization_result(
input,
vec![
Segment {
mode: Mode::Byte,
begin: 0,
end: 334,
},
Segment {
mode: Mode::Alphanumeric,
begin: 334,
end: 349,
},
Segment {
mode: Mode::Byte,
begin: 349,
end: 387,
},
Segment {
mode: Mode::Alphanumeric,
begin: 387,
end: 402,
},
Segment {
mode: Mode::Byte,
begin: 402,
end: 850,
},
Segment {
mode: Mode::Alphanumeric,
begin: 850,
end: 866,
},
Segment {
mode: Mode::Byte,
begin: 866,
end: 1146,
},
Segment {
mode: Mode::Alphanumeric,
begin: 1146,
end: 1162,
},
Segment {
mode: Mode::Byte,
begin: 1162,
end: 2474,
},
Segment {
mode: Mode::Alphanumeric,
begin: 2474,
end: 2489,
},
Segment {
mode: Mode::Byte,
begin: 2489,
end: 2618,
},
Segment {
mode: Mode::Alphanumeric,
begin: 2618,
end: 2641,
},
Segment {
mode: Mode::Byte,
begin: 2641,
end: 2707,
},
Segment {
mode: Mode::Alphanumeric,
begin: 2707,
end: 2722,
},
Segment {
mode: Mode::Byte,
begin: 2722,
end: 2880,
},
],
Version::Normal(40),
);
}
#[test]
fn test_annex_j_guideline_1a() {
test_optimization_result(
vec![
Segment {
mode: Mode::Numeric,
begin: 0,
end: 3,
},
Segment {
mode: Mode::Alphanumeric,
begin: 3,
end: 4,
},
],
vec![
Segment {
mode: Mode::Numeric,
begin: 0,
end: 3,
},
Segment {
mode: Mode::Alphanumeric,
begin: 3,
end: 4,
},
],
Version::Micro(2),
);
}
#[test]
fn test_annex_j_guideline_1b() {
test_optimization_result(
vec![
Segment {
mode: Mode::Numeric,
begin: 0,
end: 2,
},
Segment {
mode: Mode::Alphanumeric,
begin: 2,
end: 4,
},
],
vec![Segment {
mode: Mode::Alphanumeric,
begin: 0,
end: 4,
}],
Version::Micro(2),
);
}
#[test]
fn test_annex_j_guideline_1c() {
test_optimization_result(
vec![
Segment {
mode: Mode::Numeric,
begin: 0,
end: 3,
},
Segment {
mode: Mode::Alphanumeric,
begin: 3,
end: 4,
},
],
vec![Segment {
mode: Mode::Alphanumeric,
begin: 0,
end: 4,
}],
Version::Micro(3),
);
}
}
#[cfg(bench)]
mod bench {
use super::{optimize_segmentation, total_encoded_len, Parser, Segment};
use crate::Version;
fn bench_optimize(data: &[u8], version: Version, bencher: &mut test::Bencher) {
bencher.iter(|| {
let segments: Vec<Segment> = Parser::new(data).collect();
let segments = optimize_segmentation(segments.as_slice(), version);
test::black_box(total_encoded_len(segments, version))
});
}
#[bench]
fn bench_optimize_example1(bencher: &mut test::Bencher) {
let data = b"QR\x83R\x81[\x83h\x81i\x83L\x83\x85\x81[\x83A\x81[\x83\x8b\x83R\x81[\x83h\x81j\
\x82\xc6\x82\xcd\x81A1994\x94N\x82\xc9\x83f\x83\x93\x83\\\x81[\x82\xcc\x8aJ\
\x94\xad\x95\x94\x96\xe5\x81i\x8c\xbb\x8d\xdd\x82\xcd\x95\xaa\x97\xa3\x82\xb5\x83f\
\x83\x93\x83\\\x81[\x83E\x83F\x81[\x83u\x81j\x82\xaa\x8aJ\x94\xad\x82\xb5\x82\xbd\
\x83}\x83g\x83\x8a\x83b\x83N\x83X\x8c^\x93\xf1\x8e\x9f\x8c\xb3\x83R\x81[\x83h\
\x82\xc5\x82\xa0\x82\xe9\x81B\x82\xc8\x82\xa8\x81AQR\x83R\x81[\x83h\x82\xc6\
\x82\xa2\x82\xa4\x96\xbc\x8f\xcc\x81i\x82\xa8\x82\xe6\x82\xd1\x92P\x8c\xea\x81j\
\x82\xcd\x83f\x83\x93\x83\\\x81[\x83E\x83F\x81[\x83u\x82\xcc\x93o\x98^\x8f\xa4\
\x95W\x81i\x91\xe64075066\x8d\x86\x81j\x82\xc5\x82\xa0\x82\xe9\x81BQR\x82\xcd\
Quick Response\x82\xc9\x97R\x97\x88\x82\xb5\x81A\x8d\x82\x91\xac\x93\xc7\x82\xdd\
\x8e\xe6\x82\xe8\x82\xaa\x82\xc5\x82\xab\x82\xe9\x82\xe6\x82\xa4\x82\xc9\x8aJ\
\x94\xad\x82\xb3\x82\xea\x82\xbd\x81B\x93\x96\x8f\x89\x82\xcd\x8e\xa9\x93\xae\
\x8e\xd4\x95\x94\x95i\x8dH\x8f\xea\x82\xe2\x94z\x91\x97\x83Z\x83\x93\x83^\x81[\
\x82\xc8\x82\xc7\x82\xc5\x82\xcc\x8eg\x97p\x82\xf0\x94O\x93\xaa\x82\xc9\x8aJ\
\x94\xad\x82\xb3\x82\xea\x82\xbd\x82\xaa\x81A\x8c\xbb\x8d\xdd\x82\xc5\x82\xcd\x83X\
\x83}\x81[\x83g\x83t\x83H\x83\x93\x82\xcc\x95\x81\x8by\x82\xc8\x82\xc7\x82\xc9\
\x82\xe6\x82\xe8\x93\xfa\x96{\x82\xc9\x8c\xc0\x82\xe7\x82\xb8\x90\xa2\x8aE\x93I\
\x82\xc9\x95\x81\x8by\x82\xb5\x82\xc4\x82\xa2\x82\xe9\x81B";
bench_optimize(data, Version::Normal(15), bencher);
}
#[bench]
fn bench_optimize_base64(bencher: &mut test::Bencher) {
let data = include_bytes!("../test_data/large_base64.in");
bench_optimize(data, Version::Normal(40), bencher);
}
}
#[derive(Copy, Clone)]
enum ExclCharSet {
End = 0,
Symbol = 1,
Numeric = 2,
Alpha = 3,
KanjiHi1 = 4,
KanjiHi2 = 5,
KanjiHi3 = 6,
KanjiLo1 = 7,
KanjiLo2 = 8,
Byte = 9,
}
impl ExclCharSet {
fn from_u8(c: u8) -> Self {
match c {
0x20 | 0x24 | 0x25 | 0x2a | 0x2b | 0x2d..=0x2f | 0x3a => ExclCharSet::Symbol,
0x30..=0x39 => ExclCharSet::Numeric,
0x41..=0x5a => ExclCharSet::Alpha,
0x81..=0x9f => ExclCharSet::KanjiHi1,
0xe0..=0xea => ExclCharSet::KanjiHi2,
0xeb => ExclCharSet::KanjiHi3,
0x40 | 0x5b..=0x7e | 0x80 | 0xa0..=0xbf => ExclCharSet::KanjiLo1,
0xc0..=0xdf | 0xec..=0xfc => ExclCharSet::KanjiLo2,
_ => ExclCharSet::Byte,
}
}
}
#[derive(Copy, Clone)]
enum State {
Init = 0,
Numeric = 10,
Alpha = 20,
Byte = 30,
KanjiHi12 = 40,
KanjiHi3 = 50,
Kanji = 60,
}
#[derive(Copy, Clone)]
enum Action {
Idle,
Numeric,
Alpha,
Byte,
Kanji,
KanjiAndSingleByte,
}
static STATE_TRANSITION: [(State, Action); 70] = [
(State::Init, Action::Idle), (State::Alpha, Action::Idle), (State::Numeric, Action::Idle), (State::Alpha, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi3, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Init, Action::Numeric), (State::Alpha, Action::Numeric), (State::Numeric, Action::Idle), (State::Alpha, Action::Numeric), (State::KanjiHi12, Action::Numeric), (State::KanjiHi12, Action::Numeric), (State::KanjiHi3, Action::Numeric), (State::Byte, Action::Numeric), (State::Byte, Action::Numeric), (State::Byte, Action::Numeric), (State::Init, Action::Alpha), (State::Alpha, Action::Idle), (State::Numeric, Action::Alpha), (State::Alpha, Action::Idle), (State::KanjiHi12, Action::Alpha), (State::KanjiHi12, Action::Alpha), (State::KanjiHi3, Action::Alpha), (State::Byte, Action::Alpha), (State::Byte, Action::Alpha), (State::Byte, Action::Alpha), (State::Init, Action::Byte), (State::Alpha, Action::Byte), (State::Numeric, Action::Byte), (State::Alpha, Action::Byte), (State::KanjiHi12, Action::Byte), (State::KanjiHi12, Action::Byte), (State::KanjiHi3, Action::Byte), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Init, Action::KanjiAndSingleByte), (State::Alpha, Action::KanjiAndSingleByte), (State::Numeric, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Byte, Action::KanjiAndSingleByte), (State::Init, Action::KanjiAndSingleByte), (State::Alpha, Action::KanjiAndSingleByte), (State::Numeric, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::KanjiHi12, Action::KanjiAndSingleByte), (State::KanjiHi3, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Byte, Action::KanjiAndSingleByte), (State::Byte, Action::KanjiAndSingleByte), (State::Init, Action::Kanji), (State::Alpha, Action::Kanji), (State::Numeric, Action::Kanji), (State::Alpha, Action::Kanji), (State::KanjiHi12, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi3, Action::Idle), (State::Byte, Action::Kanji), (State::Byte, Action::Kanji), (State::Byte, Action::Kanji), ];