1use crate::index_korean as index;
8use crate::types::*;
9use crate::util::StrCharIndex;
10use std::convert::Into;
11use std::default::Default;
12
13#[derive(Clone, Copy)]
25pub struct Windows949Encoding;
26
27impl Encoding for Windows949Encoding {
28 fn name(&self) -> &'static str {
29 "windows-949"
30 }
31 fn whatwg_name(&self) -> Option<&'static str> {
32 Some("euc-kr")
33 } fn raw_encoder(&self) -> Box<dyn RawEncoder> {
35 Windows949Encoder::new()
36 }
37 fn raw_decoder(&self) -> Box<dyn RawDecoder> {
38 Windows949Decoder::new()
39 }
40}
41
42#[derive(Clone, Copy)]
44pub struct Windows949Encoder;
45
46impl Windows949Encoder {
47 #[allow(clippy::new_ret_no_self)]
48 pub fn new() -> Box<dyn RawEncoder> {
49 Box::new(Windows949Encoder)
50 }
51}
52
53impl RawEncoder for Windows949Encoder {
54 fn from_self(&self) -> Box<dyn RawEncoder> {
55 Windows949Encoder::new()
56 }
57 fn is_ascii_compatible(&self) -> bool {
58 true
59 }
60
61 fn raw_feed(
62 &mut self,
63 input: &str,
64 output: &mut dyn ByteWriter,
65 ) -> (usize, Option<CodecError>) {
66 output.writer_hint(input.len());
67
68 for ((i, j), ch) in input.index_iter() {
69 if ch <= '\u{7f}' {
70 output.write_byte(ch as u8);
71 } else {
72 let ptr = index::euc_kr::backward(ch as u32);
73 if ptr == 0xffff {
74 return (
75 i,
76 Some(CodecError {
77 upto: j as isize,
78 cause: "unrepresentable character".into(),
79 }),
80 );
81 } else {
82 output.write_byte((ptr / 190 + 0x81) as u8);
83 output.write_byte((ptr % 190 + 0x41) as u8);
84 }
85 }
86 }
87 (input.len(), None)
88 }
89
90 fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
91 None
92 }
93}
94
95#[derive(Clone, Copy)]
97struct Windows949Decoder {
98 st: windows949::State,
99}
100
101impl Windows949Decoder {
102 #[allow(clippy::new_ret_no_self)]
103 pub fn new() -> Box<dyn RawDecoder> {
104 Box::new(Windows949Decoder {
105 st: Default::default(),
106 })
107 }
108}
109
110impl RawDecoder for Windows949Decoder {
111 fn from_self(&self) -> Box<dyn RawDecoder> {
112 Windows949Decoder::new()
113 }
114 fn is_ascii_compatible(&self) -> bool {
115 true
116 }
117
118 fn raw_feed(
119 &mut self,
120 input: &[u8],
121 output: &mut dyn StringWriter,
122 ) -> (usize, Option<CodecError>) {
123 let (st, processed, err) = windows949::raw_feed(self.st, input, output, &());
124 self.st = st;
125 (processed, err)
126 }
127
128 fn raw_finish(&mut self, output: &mut dyn StringWriter) -> Option<CodecError> {
129 let (st, err) = windows949::raw_finish(self.st, output, &());
130 self.st = st;
131 err
132 }
133}
134
135stateful_decoder! {
136 module windows949;
137
138 internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 {
139 use crate::index_korean as index;
140
141 let lead = lead as u16;
142 let trail = trail as u16;
143 let index = match (lead, trail) {
144 (0x81..=0xfe, 0x41..=0xfe) => (lead - 0x81) * 190 + (trail - 0x41),
145 (_, _) => 0xffff,
146 };
147 index::euc_kr::forward(index)
148 }
149
150initial:
151 state S0(ctx: Context) {
153 case b @ 0x00..=0x7f => ctx.emit(b as u32);
154 case b @ 0x81..=0xfe => S1(ctx, b);
155 case _ => ctx.err("invalid sequence");
156 }
157
158transient:
159 state S1(ctx: Context, lead: u8) {
161 case b => match map_two_bytes(lead, b) {
162 0xffff => {
163 let backup = if b < 0x80 {1} else {0};
164 ctx.backup_and_err(backup, "invalid sequence")
165 },
166 ch => ctx.emit(ch as u32)
167 };
168 }
169}
170
171#[cfg(test)]
172mod windows949_tests {
173 extern crate test;
174 use super::Windows949Encoding;
175 use crate::testutils;
176 use crate::types::*;
177
178 #[test]
179 fn test_encoder_valid() {
180 let mut e = Windows949Encoding.raw_encoder();
181 assert_feed_ok!(e, "A", "", [0x41]);
182 assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
183 assert_feed_ok!(e, "", "", []);
184 assert_feed_ok!(e, "\u{ac00}", "", [0xb0, 0xa1]);
185 assert_feed_ok!(e, "\u{b098}\u{b2e4}", "", [0xb3, 0xaa, 0xb4, 0xd9]);
186 assert_feed_ok!(
187 e,
188 "\u{bdc1}\u{314b}\u{d7a3}",
189 "",
190 [0x94, 0xee, 0xa4, 0xbb, 0xc6, 0x52]
191 );
192 assert_finish_ok!(e, []);
193 }
194
195 #[test]
196 fn test_encoder_invalid() {
197 let mut e = Windows949Encoding.raw_encoder();
198 assert_feed_err!(e, "", "\u{ffff}", "", []);
199 assert_feed_err!(e, "?", "\u{ffff}", "!", [0x3f]);
200 assert_feed_err!(e, "?", "\u{fffd}", "!", [0x3f]); assert_finish_ok!(e, []);
202 }
203
204 #[test]
205 fn test_decoder_valid() {
206 let mut d = Windows949Encoding.raw_decoder();
207 assert_feed_ok!(d, [0x41], [], "A");
208 assert_feed_ok!(d, [0x42, 0x43], [], "BC");
209 assert_feed_ok!(d, [], [], "");
210 assert_feed_ok!(d, [0xb0, 0xa1], [], "\u{ac00}");
211 assert_feed_ok!(d, [0xb3, 0xaa, 0xb4, 0xd9], [], "\u{b098}\u{b2e4}");
212 assert_feed_ok!(
213 d,
214 [0x94, 0xee, 0xa4, 0xbb, 0xc6, 0x52, 0xc1, 0x64],
215 [],
216 "\u{bdc1}\u{314b}\u{d7a3}\u{d58f}"
217 );
218 assert_finish_ok!(d, "");
219 }
220
221 #[test]
222 fn test_decoder_valid_partial() {
223 let mut d = Windows949Encoding.raw_decoder();
224 assert_feed_ok!(d, [], [0xb0], "");
225 assert_feed_ok!(d, [0xa1], [], "\u{ac00}");
226 assert_feed_ok!(d, [0xb3, 0xaa], [0xb4], "\u{b098}");
227 assert_feed_ok!(d, [0xd9], [0x94], "\u{b2e4}");
228 assert_feed_ok!(d, [0xee, 0xa4, 0xbb], [0xc6], "\u{bdc1}\u{314b}");
229 assert_feed_ok!(d, [0x52, 0xc1, 0x64], [], "\u{d7a3}\u{d58f}");
230 assert_finish_ok!(d, "");
231 }
232
233 #[test]
234 fn test_decoder_invalid_lone_lead_immediate_test_finish() {
235 for i in 0x81..0xff {
236 let mut d = Windows949Encoding.raw_decoder();
237 assert_feed_ok!(d, [], [i], ""); assert_finish_err!(d, "");
239 }
240
241 let mut d = Windows949Encoding.raw_decoder();
243 assert_feed_err!(d, [], [0x80], [], "");
244 assert_feed_err!(d, [], [0xff], [], "");
245 assert_finish_ok!(d, "");
246 }
247
248 #[test]
249 fn test_decoder_invalid_lone_lead_followed_by_space() {
250 for i in 0x80..0x100 {
251 let i = i as u8;
252 let mut d = Windows949Encoding.raw_decoder();
253 assert_feed_err!(d, [], [i], [0x20], "");
254 assert_finish_ok!(d, "");
255 }
256 }
257
258 #[test]
259 fn test_decoder_invalid_lead_followed_by_invalid_trail() {
260 for i in 0x81..0xff {
263 let mut d = Windows949Encoding.raw_decoder();
264 assert_feed_err!(d, [], [i, 0x80], [0x20], "");
265 assert_feed_err!(d, [], [i, 0xff], [0x20], "");
266 assert_finish_ok!(d, "");
267
268 let mut d = Windows949Encoding.raw_decoder();
269 assert_feed_ok!(d, [], [i], "");
270 assert_feed_err!(d, [], [0x80], [0x20], "");
271 assert_feed_ok!(d, [], [i], "");
272 assert_feed_err!(d, [], [0xff], [0x20], "");
273 assert_finish_ok!(d, "");
274 }
275
276 let mut d = Windows949Encoding.raw_decoder();
277 assert_feed_err!(d, [], [0x80], [0x80], "");
278 assert_feed_err!(d, [], [0x80], [0xff], "");
279 assert_feed_err!(d, [], [0xff], [0x80], "");
280 assert_feed_err!(d, [], [0xff], [0xff], "");
281 assert_finish_ok!(d, "");
282 }
283
284 #[test]
285 fn test_decoder_invalid_boundary() {
286 let mut d = Windows949Encoding.raw_decoder();
290 assert_feed_ok!(d, [], [0xc6], "");
291 assert_feed_err!(d, [], [], [0x53], "");
292 assert_finish_ok!(d, "");
293 }
294
295 #[test]
296 fn test_decoder_feed_after_finish() {
297 let mut d = Windows949Encoding.raw_decoder();
298 assert_feed_ok!(d, [0xb0, 0xa1], [0xb0], "\u{ac00}");
299 assert_finish_err!(d, "");
300 assert_feed_ok!(d, [0xb0, 0xa1], [], "\u{ac00}");
301 assert_finish_ok!(d, "");
302 }
303
304 #[bench]
305 fn bench_encode_short_text(bencher: &mut test::Bencher) {
306 let s = testutils::KOREAN_TEXT;
307 bencher.bytes = s.len() as u64;
308 bencher.iter(|| test::black_box(Windows949Encoding.encode(s, EncoderTrap::Strict)))
309 }
310
311 #[bench]
312 fn bench_decode_short_text(bencher: &mut test::Bencher) {
313 let s = Windows949Encoding
314 .encode(testutils::KOREAN_TEXT, EncoderTrap::Strict)
315 .ok()
316 .unwrap();
317 bencher.bytes = s.len() as u64;
318 bencher.iter(|| test::black_box(Windows949Encoding.decode(&s, DecoderTrap::Strict)))
319 }
320}