encodingbufreader/
lib.rs

1//! # encodingbufreader
2//!
3//! `encodingbufreader` is a BufReader with encoding.
4use encoding::{DecoderTrap, EncodingRef};
5use std::io::{self, BufRead, BufReader, Result};
6
7#[derive(Debug)]
8pub struct Lines<R>
9where
10    R: io::Read,
11{
12    buf: BufReaderEncoding<R>,
13}
14
15impl<R: io::Read> Iterator for Lines<R> {
16    type Item = Result<String>;
17
18    fn next(&mut self) -> Option<Result<String>> {
19        let mut buf = String::new();
20        match self.buf.read_line(&mut buf) {
21            Ok(0) => None,
22            Ok(_n) => {
23                if buf.ends_with("\n") {
24                    buf.pop();
25                    if buf.ends_with("\r") {
26                        buf.pop();
27                    }
28                }
29                Some(Ok(buf))
30            }
31            Err(e) => Some(Err(e)),
32        }
33    }
34}
35/// Modificate std::io::BufReader
36pub struct BufReaderEncoding<R> {
37    encoder: EncodingRef,
38    inner: BufReader<R>,
39    buf: Vec<u8>,
40}
41
42impl<R> std::fmt::Debug for BufReaderEncoding<R>
43where
44    R: std::fmt::Debug,
45{
46    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
47        self.inner.fmt(f)
48    }
49}
50
51impl<R: io::Read> BufReaderEncoding<R> {
52    pub fn new(inner: R, encoder: EncodingRef) -> BufReaderEncoding<R> {
53        BufReaderEncoding {
54            encoder: encoder,
55            inner: BufReader::new(inner),
56            buf: Vec::new(),
57        }
58    }
59    pub fn with_capacity(cap: usize, inner: R, encoder: EncodingRef) -> BufReaderEncoding<R> {
60        BufReaderEncoding {
61            encoder: encoder,
62            inner: BufReader::with_capacity(cap, inner),
63            buf: Vec::new(),
64        }
65    }
66    fn append_to_string(&mut self, buf: &mut String) -> Result<usize> {
67        let len = buf.len();
68        let ret = self.inner.read_until(b'\n', &mut self.buf);
69
70        if self
71            .encoder
72            .decode_to(&self.buf[len..], DecoderTrap::Replace, buf)
73            .is_err()
74        {
75            ret.and_then(|_| {
76                Err(io::Error::new(
77                    io::ErrorKind::InvalidData,
78                    "stream did not contain valid character",
79                ))
80            })
81        } else {
82            self.buf.clear();
83            ret
84        }
85    }
86    /// Returns an iterator over the lines of this reader.
87    ///
88    /// The iterator returned from this function will yield instances of
89    /// [`io::Result`]`<`[`String`]`>`. Each string returned will *not* have a newline
90    /// byte (the 0xA byte) or CRLF (0xD, 0xA bytes) at the end.
91    ///
92    /// [`io::Result`]: type.Result.html
93    /// [`String`]: ../string/struct.String.html
94    ///
95    /// # Examples
96    ///
97    /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
98    /// this example, we use [`Cursor`] to iterate over all the lines in a byte
99    /// slice.
100    ///
101    /// [`Cursor`]: struct.Cursor.html
102    ///
103    /// ```
104    /// use encodingbufreader::{BufReaderEncoding};
105    /// use encoding::all::UTF_8;
106    /// let bytes = "This string\nwill be read".as_bytes();
107    ///
108    /// let mut lines_iter = BufReaderEncoding::new(bytes,UTF_8).map(|l| l.unwrap());
109    /// assert_eq!(lines_iter.next(), Some(String::from("This string")));
110    /// assert_eq!(lines_iter.next(), Some(String::from("will be read")));
111    /// assert_eq!(lines_iter.next(), None);
112    /// ```
113    ///
114    /// # Errors
115    ///
116    /// Each line of the iterator has the same error semantics as [`BufRead::read_line`].
117    ///
118    /// [`BufReaderEncoding::read_line`]: BufReaderEncoding.html#method.read_line
119    pub fn lines(self) -> Lines<R> {
120        Lines { buf: self }
121    }
122    /// Read all bytes until a newline (the 0xA byte) is reached, and append
123    /// them to the provided buffer.
124    ///
125    /// This function will read bytes from the underlying stream until the
126    /// newline delimiter (the 0xA byte) or EOF is found. Once found, all bytes
127    /// up to, and including, the delimiter (if found) will be appended to
128    /// `buf`.
129    ///
130    /// If successful, this function will return the total number of bytes read.
131    ///
132    /// If this function returns `Ok(0)`, the stream has reached EOF.
133    ///
134    /// # Errors
135    ///
136    /// This function has the same error semantics as [`std::io::Read::read_until`] and will
137    /// also return an error if the read bytes are not valid encoding. If an I/O
138    /// error is encountered then `buf` may contain some bytes already read in
139    /// the event that all data read so far was valid encoding.
140    ///
141    ///
142    /// # Examples
143    ///
144    ///
145    /// ```
146    /// use encodingbufreader::{BufReaderEncoding};
147    /// use encoding::all::GB18030;
148    /// let bytes: &[u8] = &[
149    ///             213, 226, 202, 199, 210, 187, 184, 246, 215, 214, 183, 251, 180, 174, 10, 189, 171,
150    ///             187, 225, 177, 187, 182, 193, 200, 161,
151    ///         ];
152    /// let mut bufreader = BufReaderEncoding::new(bytes, GB18030);
153    /// let mut buf = String::new();
154    /// let num_bytes = bufreader
155    ///     .read_line(&mut buf)
156    ///     .expect("reading from bytes won't fail");
157    /// assert_eq!(num_bytes, 15);
158    /// assert_eq!(buf, "这是一个字符串\n");
159    /// ```
160    pub fn read_line(&mut self, buf: &mut String) -> Result<usize> {
161        self.append_to_string(buf)
162    }
163    pub fn set_encoder(&mut self, encoder: encoding::EncodingRef) {
164        self.encoder = encoder;
165    }
166}
167
168impl<R: io::Read> io::BufRead for BufReaderEncoding<R> {
169    fn fill_buf(&mut self) -> io::Result<&[u8]> {
170        self.inner.fill_buf()
171    }
172
173    fn consume(&mut self, amt: usize) {
174        self.inner.consume(amt);
175    }
176}
177impl<R: io::Read> io::Read for BufReaderEncoding<R> {
178    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
179        self.inner.read(buf)
180    }
181}
182#[cfg(test)]
183mod tests {
184    use super::BufReaderEncoding;
185    use encoding::all::{GB18030, UTF_8};
186
187    #[test]
188    fn test_decodeuft8() {
189        let bytes = "This string\nwill be read".as_bytes();
190        let mut lines_iter = BufReaderEncoding::new(bytes, UTF_8)
191            .lines()
192            .map(|l| l.unwrap());
193        assert_eq!(lines_iter.next(), Some(String::from("This string")));
194        assert_eq!(lines_iter.next(), Some(String::from("will be read")));
195        assert_eq!(lines_iter.next(), None);
196    }
197    #[test]
198    fn test_decode_gb18030() {
199        let bytes: &[u8] = &[
200            213, 226, 202, 199, 210, 187, 184, 246, 215, 214, 183, 251, 180, 174, 10, 189, 171,
201            187, 225, 177, 187, 182, 193, 200, 161,
202        ];
203        let mut lines_iter = BufReaderEncoding::new(bytes, GB18030)
204            .lines()
205            .map(|l| l.unwrap());
206        assert_eq!(
207            lines_iter.next(),
208            Some(String::from("这是一个字符串"))
209        );
210        assert_eq!(lines_iter.next(), Some(String::from("将会被读取")));
211        assert_eq!(lines_iter.next(), None);
212    }
213    #[test]
214    fn test_decode_readline() {
215        let bytes: &[u8] = &[
216            213, 226, 202, 199, 210, 187, 184, 246, 215, 214, 183, 251, 180, 174, 10, 189, 171,
217            187, 225, 177, 187, 182, 193, 200, 161,
218        ];
219        let mut reader = BufReaderEncoding::new(bytes, GB18030);
220        let mut buf = String::new();
221        let num_bytes = reader
222            .read_line(&mut buf)
223            .expect("reading from bytes won't fail");
224        assert_eq!(num_bytes, 15);
225        assert_eq!(buf, "这是一个字符串\n");
226    }
227}