encoding/codec/
ascii.rs

1// This is a part of encoding-next.
2// Copyright (c) 2013-2015, Kang Seonghoon.
3// See README.md and LICENSE.txt for details.
4
5//! 7-bit ASCII encoding.
6
7use crate::types::*;
8use std::convert::Into;
9
10/**
11 * ASCII, also known as ISO/IEC 646:US.
12 *
13 * It is both a basis and a lowest common denominator of many other encodings
14 * including UTF-8, which Rust internally assumes.
15 */
16#[derive(Clone, Copy)]
17pub struct ASCIIEncoding;
18
19impl Encoding for ASCIIEncoding {
20    fn name(&self) -> &'static str {
21        "ascii"
22    }
23    fn raw_encoder(&self) -> Box<dyn RawEncoder> {
24        ASCIIEncoder::new()
25    }
26    fn raw_decoder(&self) -> Box<dyn RawDecoder> {
27        ASCIIDecoder::new()
28    }
29}
30
31/// An encoder for ASCII.
32#[derive(Clone, Copy)]
33pub struct ASCIIEncoder;
34
35impl ASCIIEncoder {
36    #[allow(clippy::new_ret_no_self)]
37    pub fn new() -> Box<dyn RawEncoder> {
38        Box::new(ASCIIEncoder)
39    }
40}
41
42impl RawEncoder for ASCIIEncoder {
43    fn from_self(&self) -> Box<dyn RawEncoder> {
44        ASCIIEncoder::new()
45    }
46    fn is_ascii_compatible(&self) -> bool {
47        true
48    }
49
50    fn raw_feed(
51        &mut self,
52        input: &str,
53        output: &mut dyn ByteWriter,
54    ) -> (usize, Option<CodecError>) {
55        output.writer_hint(input.len());
56
57        match input.as_bytes().iter().position(|&ch| ch >= 0x80) {
58            Some(first_error) => {
59                output.write_bytes(&input.as_bytes()[..first_error]);
60                let len = input[first_error..].chars().next().unwrap().len_utf8();
61                (
62                    first_error,
63                    Some(CodecError {
64                        upto: (first_error + len) as isize,
65                        cause: "unrepresentable character".into(),
66                    }),
67                )
68            }
69            None => {
70                output.write_bytes(input.as_bytes());
71                (input.len(), None)
72            }
73        }
74    }
75
76    fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
77        None
78    }
79}
80
81/// A decoder for ASCII.
82#[derive(Clone, Copy)]
83pub struct ASCIIDecoder;
84
85impl ASCIIDecoder {
86    #[allow(clippy::new_ret_no_self)]
87    pub fn new() -> Box<dyn RawDecoder> {
88        Box::new(ASCIIDecoder)
89    }
90}
91
92impl RawDecoder for ASCIIDecoder {
93    fn from_self(&self) -> Box<dyn RawDecoder> {
94        ASCIIDecoder::new()
95    }
96    fn is_ascii_compatible(&self) -> bool {
97        true
98    }
99
100    fn raw_feed(
101        &mut self,
102        input: &[u8],
103        output: &mut dyn StringWriter,
104    ) -> (usize, Option<CodecError>) {
105        output.writer_hint(input.len());
106
107        fn write_ascii_bytes(output: &mut dyn StringWriter, buf: &[u8]) {
108            output.write_str(unsafe { std::str::from_utf8_unchecked(buf) })
109        }
110
111        match input.iter().position(|&ch| ch >= 0x80) {
112            Some(first_error) => {
113                write_ascii_bytes(output, &input[..first_error]);
114                (
115                    first_error,
116                    Some(CodecError {
117                        upto: first_error as isize + 1,
118                        cause: "invalid sequence".into(),
119                    }),
120                )
121            }
122            None => {
123                write_ascii_bytes(output, input);
124                (input.len(), None)
125            }
126        }
127    }
128
129    fn raw_finish(&mut self, _output: &mut dyn StringWriter) -> Option<CodecError> {
130        None
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    extern crate test;
137    use super::ASCIIEncoding;
138    use crate::testutils;
139    use crate::types::*;
140
141    #[test]
142    fn test_encoder() {
143        let mut e = ASCIIEncoding.raw_encoder();
144        assert_feed_ok!(e, "A", "", [0x41]);
145        assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
146        assert_feed_ok!(e, "", "", []);
147        assert_feed_err!(e, "", "\u{a0}", "", []);
148        assert_feed_err!(e, "X", "\u{a0}", "Z", [0x58]);
149        assert_finish_ok!(e, []);
150    }
151
152    #[test]
153    fn test_decoder() {
154        let mut d = ASCIIEncoding.raw_decoder();
155        assert_feed_ok!(d, [0x41], [], "A");
156        assert_feed_ok!(d, [0x42, 0x43], [], "BC");
157        assert_feed_ok!(d, [], [], "");
158        assert_feed_err!(d, [], [0xa0], [], "");
159        assert_feed_err!(d, [0x58], [0xa0], [0x5a], "X");
160        assert_finish_ok!(d, "");
161    }
162
163    #[bench]
164    fn bench_encode(bencher: &mut test::Bencher) {
165        let s = testutils::ASCII_TEXT;
166        bencher.bytes = s.len() as u64;
167        bencher.iter(|| test::black_box(ASCIIEncoding.encode(s, EncoderTrap::Strict)))
168    }
169
170    #[bench]
171    fn bench_decode(bencher: &mut test::Bencher) {
172        let s = testutils::ASCII_TEXT.as_bytes();
173        bencher.bytes = s.len() as u64;
174        bencher.iter(|| test::black_box(ASCIIEncoding.decode(s, DecoderTrap::Strict)))
175    }
176
177    #[bench]
178    fn bench_encode_replace(bencher: &mut test::Bencher) {
179        let s = testutils::KOREAN_TEXT;
180        bencher.bytes = s.len() as u64;
181        bencher.iter(|| test::black_box(ASCIIEncoding.encode(s, EncoderTrap::Replace)))
182    }
183
184    #[bench]
185    fn bench_decode_replace(bencher: &mut test::Bencher) {
186        let s = testutils::KOREAN_TEXT.as_bytes();
187        bencher.bytes = s.len() as u64;
188        bencher.iter(|| test::black_box(ASCIIEncoding.decode(s, DecoderTrap::Replace)))
189    }
190}