encoding/codec/
singlebyte.rs

1// This is a part of encoding-next.
2// Copyright (c) 2013-2015, Kang Seonghoon.
3// See README.md and LICENSE.txt for details.
4
5//! Common codec implementation for single-byte encodings.
6
7use crate::types::*;
8use crate::util::{as_char, StrCharIndex};
9use std::convert::Into;
10
11/// A common framework for single-byte encodings based on ASCII.
12#[derive(Copy, Clone)]
13pub struct SingleByteEncoding {
14    pub name: &'static str,
15    pub whatwg_name: Option<&'static str>,
16    pub index_forward: fn(u8) -> u16,
17    pub index_backward: fn(u32) -> u8,
18}
19
20impl Encoding for SingleByteEncoding {
21    fn name(&self) -> &'static str {
22        self.name
23    }
24    fn whatwg_name(&self) -> Option<&'static str> {
25        self.whatwg_name
26    }
27    fn raw_encoder(&self) -> Box<dyn RawEncoder> {
28        SingleByteEncoder::new(self.index_backward)
29    }
30    fn raw_decoder(&self) -> Box<dyn RawDecoder> {
31        SingleByteDecoder::new(self.index_forward)
32    }
33}
34
35/// An encoder for single-byte encodings based on ASCII.
36#[derive(Clone, Copy)]
37pub struct SingleByteEncoder {
38    index_backward: fn(u32) -> u8,
39}
40
41impl SingleByteEncoder {
42    #[allow(clippy::new_ret_no_self)]
43    pub fn new(index_backward: fn(u32) -> u8) -> Box<dyn RawEncoder> {
44        Box::new(SingleByteEncoder { index_backward })
45    }
46}
47
48impl RawEncoder for SingleByteEncoder {
49    fn from_self(&self) -> Box<dyn RawEncoder> {
50        SingleByteEncoder::new(self.index_backward)
51    }
52    fn is_ascii_compatible(&self) -> bool {
53        true
54    }
55
56    fn raw_feed(
57        &mut self,
58        input: &str,
59        output: &mut dyn ByteWriter,
60    ) -> (usize, Option<CodecError>) {
61        output.writer_hint(input.len());
62
63        for ((i, j), ch) in input.index_iter() {
64            if ch <= '\u{7f}' {
65                output.write_byte(ch as u8);
66                continue;
67            } else {
68                let index = (self.index_backward)(ch as u32);
69                if index != 0 {
70                    output.write_byte(index);
71                } else {
72                    return (
73                        i,
74                        Some(CodecError {
75                            upto: j as isize,
76                            cause: "unrepresentable character".into(),
77                        }),
78                    );
79                }
80            }
81        }
82        (input.len(), None)
83    }
84
85    fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
86        None
87    }
88}
89
90/// A decoder for single-byte encodings based on ASCII.
91#[derive(Clone, Copy)]
92pub struct SingleByteDecoder {
93    index_forward: fn(u8) -> u16,
94}
95
96impl SingleByteDecoder {
97    #[allow(clippy::new_ret_no_self)]
98    pub fn new(index_forward: fn(u8) -> u16) -> Box<dyn RawDecoder> {
99        Box::new(SingleByteDecoder { index_forward })
100    }
101}
102
103impl RawDecoder for SingleByteDecoder {
104    fn from_self(&self) -> Box<dyn RawDecoder> {
105        SingleByteDecoder::new(self.index_forward)
106    }
107    fn is_ascii_compatible(&self) -> bool {
108        true
109    }
110
111    fn raw_feed(
112        &mut self,
113        input: &[u8],
114        output: &mut dyn StringWriter,
115    ) -> (usize, Option<CodecError>) {
116        output.writer_hint(input.len());
117
118        let mut i = 0;
119        let len = input.len();
120        while i < len {
121            if input[i] <= 0x7f {
122                output.write_char(input[i] as char);
123            } else {
124                let ch = (self.index_forward)(input[i]);
125                if ch != 0xffff {
126                    output.write_char(as_char(ch as u32));
127                } else {
128                    return (
129                        i,
130                        Some(CodecError {
131                            upto: i as isize + 1,
132                            cause: "invalid sequence".into(),
133                        }),
134                    );
135                }
136            }
137            i += 1;
138        }
139        (i, None)
140    }
141
142    fn raw_finish(&mut self, _output: &mut dyn StringWriter) -> Option<CodecError> {
143        None
144    }
145}
146
147/// Algorithmic mapping for ISO 8859-1.
148pub mod iso_8859_1 {
149    #[inline]
150    pub fn forward(code: u8) -> u16 {
151        code as u16
152    }
153    #[inline]
154    pub fn backward(code: u32) -> u8 {
155        if (code & !0x7f) == 0x80 {
156            code as u8
157        } else {
158            0
159        }
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use crate::all::ISO_8859_2;
166    use crate::types::*;
167
168    #[test]
169    fn test_encoder_non_bmp() {
170        let mut e = ISO_8859_2.raw_encoder();
171        assert_feed_err!(e, "A", "\u{FFFF}", "B", [0x41]);
172        assert_feed_err!(e, "A", "\u{10000}", "B", [0x41]);
173    }
174}