encodingbufreader/lib.rs
1//! # encodingbufreader
2//!
3//! `encodingbufreader` is a BufReader with encoding.
4use encoding::{DecoderTrap, EncodingRef};
5use std::io::{self, BufRead, BufReader, Result};
6
7#[derive(Debug)]
8pub struct Lines<R>
9where
10 R: io::Read,
11{
12 buf: BufReaderEncoding<R>,
13}
14
15impl<R: io::Read> Iterator for Lines<R> {
16 type Item = Result<String>;
17
18 fn next(&mut self) -> Option<Result<String>> {
19 let mut buf = String::new();
20 match self.buf.read_line(&mut buf) {
21 Ok(0) => None,
22 Ok(_n) => {
23 if buf.ends_with("\n") {
24 buf.pop();
25 if buf.ends_with("\r") {
26 buf.pop();
27 }
28 }
29 Some(Ok(buf))
30 }
31 Err(e) => Some(Err(e)),
32 }
33 }
34}
35/// Modificate std::io::BufReader
36pub struct BufReaderEncoding<R> {
37 encoder: EncodingRef,
38 inner: BufReader<R>,
39 buf: Vec<u8>,
40}
41
42impl<R> std::fmt::Debug for BufReaderEncoding<R>
43where
44 R: std::fmt::Debug,
45{
46 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
47 self.inner.fmt(f)
48 }
49}
50
51impl<R: io::Read> BufReaderEncoding<R> {
52 pub fn new(inner: R, encoder: EncodingRef) -> BufReaderEncoding<R> {
53 BufReaderEncoding {
54 encoder: encoder,
55 inner: BufReader::new(inner),
56 buf: Vec::new(),
57 }
58 }
59 pub fn with_capacity(cap: usize, inner: R, encoder: EncodingRef) -> BufReaderEncoding<R> {
60 BufReaderEncoding {
61 encoder: encoder,
62 inner: BufReader::with_capacity(cap, inner),
63 buf: Vec::new(),
64 }
65 }
66 fn append_to_string(&mut self, buf: &mut String) -> Result<usize> {
67 let len = buf.len();
68 let ret = self.inner.read_until(b'\n', &mut self.buf);
69
70 if self
71 .encoder
72 .decode_to(&self.buf[len..], DecoderTrap::Replace, buf)
73 .is_err()
74 {
75 ret.and_then(|_| {
76 Err(io::Error::new(
77 io::ErrorKind::InvalidData,
78 "stream did not contain valid character",
79 ))
80 })
81 } else {
82 self.buf.clear();
83 ret
84 }
85 }
86 /// Returns an iterator over the lines of this reader.
87 ///
88 /// The iterator returned from this function will yield instances of
89 /// [`io::Result`]`<`[`String`]`>`. Each string returned will *not* have a newline
90 /// byte (the 0xA byte) or CRLF (0xD, 0xA bytes) at the end.
91 ///
92 /// [`io::Result`]: type.Result.html
93 /// [`String`]: ../string/struct.String.html
94 ///
95 /// # Examples
96 ///
97 /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
98 /// this example, we use [`Cursor`] to iterate over all the lines in a byte
99 /// slice.
100 ///
101 /// [`Cursor`]: struct.Cursor.html
102 ///
103 /// ```
104 /// use encodingbufreader::{BufReaderEncoding};
105 /// use encoding::all::UTF_8;
106 /// let bytes = "This string\nwill be read".as_bytes();
107 ///
108 /// let mut lines_iter = BufReaderEncoding::new(bytes,UTF_8).map(|l| l.unwrap());
109 /// assert_eq!(lines_iter.next(), Some(String::from("This string")));
110 /// assert_eq!(lines_iter.next(), Some(String::from("will be read")));
111 /// assert_eq!(lines_iter.next(), None);
112 /// ```
113 ///
114 /// # Errors
115 ///
116 /// Each line of the iterator has the same error semantics as [`BufRead::read_line`].
117 ///
118 /// [`BufReaderEncoding::read_line`]: BufReaderEncoding.html#method.read_line
119 pub fn lines(self) -> Lines<R> {
120 Lines { buf: self }
121 }
122 /// Read all bytes until a newline (the 0xA byte) is reached, and append
123 /// them to the provided buffer.
124 ///
125 /// This function will read bytes from the underlying stream until the
126 /// newline delimiter (the 0xA byte) or EOF is found. Once found, all bytes
127 /// up to, and including, the delimiter (if found) will be appended to
128 /// `buf`.
129 ///
130 /// If successful, this function will return the total number of bytes read.
131 ///
132 /// If this function returns `Ok(0)`, the stream has reached EOF.
133 ///
134 /// # Errors
135 ///
136 /// This function has the same error semantics as [`std::io::Read::read_until`] and will
137 /// also return an error if the read bytes are not valid encoding. If an I/O
138 /// error is encountered then `buf` may contain some bytes already read in
139 /// the event that all data read so far was valid encoding.
140 ///
141 ///
142 /// # Examples
143 ///
144 ///
145 /// ```
146 /// use encodingbufreader::{BufReaderEncoding};
147 /// use encoding::all::GB18030;
148 /// let bytes: &[u8] = &[
149 /// 213, 226, 202, 199, 210, 187, 184, 246, 215, 214, 183, 251, 180, 174, 10, 189, 171,
150 /// 187, 225, 177, 187, 182, 193, 200, 161,
151 /// ];
152 /// let mut bufreader = BufReaderEncoding::new(bytes, GB18030);
153 /// let mut buf = String::new();
154 /// let num_bytes = bufreader
155 /// .read_line(&mut buf)
156 /// .expect("reading from bytes won't fail");
157 /// assert_eq!(num_bytes, 15);
158 /// assert_eq!(buf, "这是一个字符串\n");
159 /// ```
160 pub fn read_line(&mut self, buf: &mut String) -> Result<usize> {
161 self.append_to_string(buf)
162 }
163 pub fn set_encoder(&mut self, encoder: encoding::EncodingRef) {
164 self.encoder = encoder;
165 }
166}
167
168impl<R: io::Read> io::BufRead for BufReaderEncoding<R> {
169 fn fill_buf(&mut self) -> io::Result<&[u8]> {
170 self.inner.fill_buf()
171 }
172
173 fn consume(&mut self, amt: usize) {
174 self.inner.consume(amt);
175 }
176}
177impl<R: io::Read> io::Read for BufReaderEncoding<R> {
178 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
179 self.inner.read(buf)
180 }
181}
182#[cfg(test)]
183mod tests {
184 use super::BufReaderEncoding;
185 use encoding::all::{GB18030, UTF_8};
186
187 #[test]
188 fn test_decodeuft8() {
189 let bytes = "This string\nwill be read".as_bytes();
190 let mut lines_iter = BufReaderEncoding::new(bytes, UTF_8)
191 .lines()
192 .map(|l| l.unwrap());
193 assert_eq!(lines_iter.next(), Some(String::from("This string")));
194 assert_eq!(lines_iter.next(), Some(String::from("will be read")));
195 assert_eq!(lines_iter.next(), None);
196 }
197 #[test]
198 fn test_decode_gb18030() {
199 let bytes: &[u8] = &[
200 213, 226, 202, 199, 210, 187, 184, 246, 215, 214, 183, 251, 180, 174, 10, 189, 171,
201 187, 225, 177, 187, 182, 193, 200, 161,
202 ];
203 let mut lines_iter = BufReaderEncoding::new(bytes, GB18030)
204 .lines()
205 .map(|l| l.unwrap());
206 assert_eq!(
207 lines_iter.next(),
208 Some(String::from("这是一个字符串"))
209 );
210 assert_eq!(lines_iter.next(), Some(String::from("将会被读取")));
211 assert_eq!(lines_iter.next(), None);
212 }
213 #[test]
214 fn test_decode_readline() {
215 let bytes: &[u8] = &[
216 213, 226, 202, 199, 210, 187, 184, 246, 215, 214, 183, 251, 180, 174, 10, 189, 171,
217 187, 225, 177, 187, 182, 193, 200, 161,
218 ];
219 let mut reader = BufReaderEncoding::new(bytes, GB18030);
220 let mut buf = String::new();
221 let num_bytes = reader
222 .read_line(&mut buf)
223 .expect("reading from bytes won't fail");
224 assert_eq!(num_bytes, 15);
225 assert_eq!(buf, "这是一个字符串\n");
226 }
227}