minify/io/
unstable.rs

1use std::io::{Error, ErrorKind, Read};
2use std::{error, fmt, result, str::from_utf8};
3
4pub type Result<T> = result::Result<T, Error>;
5
6/// An iterator over the `char`s of a reader.
7///
8/// This struct is generally created by calling [`chars`][chars] on a reader.
9/// Please see the documentation of `chars()` for more details.
10///
11/// [chars]: trait.Read.html#method.chars
12#[derive(Debug)]
13pub struct Chars<R> {
14    pub inner: R,
15}
16
17impl<R: Read> Iterator for Chars<R> {
18    type Item = result::Result<char, CharsError>;
19
20    fn next(&mut self) -> Option<result::Result<char, CharsError>> {
21        let first_byte = match read_one_byte(&mut self.inner)? {
22            Ok(b) => b,
23            Err(e) => return Some(Err(CharsError::Other(e))),
24        };
25
26        let width = utf8_char_width(first_byte);
27        if width == 1 {
28            return Some(Ok(first_byte as char));
29        }
30        if width == 0 {
31            return Some(Err(CharsError::NotUtf8));
32        }
33        let mut buf = [first_byte, 0, 0, 0];
34        {
35            let mut start = 1;
36            while start < width {
37                match self.inner.read(&mut buf[start..width]) {
38                    Ok(0) => return Some(Err(CharsError::NotUtf8)),
39                    Ok(n) => start += n,
40                    Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
41                    Err(e) => return Some(Err(CharsError::Other(e))),
42                }
43            }
44        }
45        Some(match from_utf8(&buf[..width]).ok() {
46            Some(s) => Ok(s.chars().next().unwrap()),
47            None => Err(CharsError::NotUtf8),
48        })
49    }
50}
51
52fn read_one_byte(reader: &mut dyn Read) -> Option<Result<u8>> {
53    let mut buf = [0];
54    loop {
55        return match reader.read(&mut buf) {
56            Ok(0) => None,
57            Ok(..) => Some(Ok(buf[0])),
58            Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
59            Err(e) => Some(Err(e)),
60        };
61    }
62}
63
64pub fn utf8_char_width(b: u8) -> usize {
65    UTF8_CHAR_WIDTH[b as usize] as usize
66}
67
68// https://tools.ietf.org/html/rfc3629
69static UTF8_CHAR_WIDTH: [u8; 256] = [
70    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
71    1, // 0x1F
72    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73    1, // 0x3F
74    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75    1, // 0x5F
76    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77    1, // 0x7F
78    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79    0, // 0x9F
80    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81    0, // 0xBF
82    0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83    2, // 0xDF
84    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
85    4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
86];
87
88/// An enumeration of possible errors that can be generated from the `Chars`
89/// adapter.
90#[derive(Debug)]
91pub enum CharsError {
92    /// Variant representing that the underlying stream was read successfully
93    /// but it did not contain valid utf8 data.
94    NotUtf8,
95
96    /// Variant representing that an I/O error occurred.
97    Other(Error),
98}
99
100impl fmt::Display for CharsError {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        match *self {
103            Self::NotUtf8 => write!(f, "byte stream did not contain valid utf8"),
104            Self::Other(ref e) => e.fmt(f),
105        }
106    }
107}
108
109impl error::Error for CharsError {
110    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
111        match *self {
112            Self::NotUtf8 => None,
113            Self::Other(ref e) => Some(e),
114        }
115    }
116}