use std::io::{self, Error, ErrorKind};
use std::str::{from_utf8, from_utf8_unchecked};
#[deny(missing_crate_level_docs, missing_docs, missing_doc_code_examples)]
pub trait BufRead: io::BufRead {
fn read_utf8(&mut self, buf: &mut String) -> io::Result<usize> {
let read_bytes = match self.fill_buf() {
Ok(r) => r,
Err(e) => return Err(e),
};
match from_utf8(read_bytes) {
Ok(s) => {
let used = read_bytes.len();
buf.push_str(s);
self.consume(used);
Ok(used)
}
Err(e) => {
let used = e.valid_up_to();
if used == 0 {
if read_bytes.len() < 4 {
let mut v = Vec::from(read_bytes);
self.consume(v.len());
let additional_bytes = match self.fill_buf() {
Ok(r) => r,
Err(e) => return Err(e),
};
if additional_bytes.len() == 0 {
return Err(Error::from(ErrorKind::InvalidData));
} else if additional_bytes.len() + v.len() < 4 {
return Err(Error::new(
ErrorKind::InvalidInput,
format!(
"Internal buffer capacity of at least 2 bytes expected to be \
able to read utf-8, but it is: {}",
additional_bytes.len()
),
));
}
for i in 0..(4 - v.len()) {
v.push(additional_bytes[i]);
match from_utf8(v.as_slice()) {
Ok(s) => {
buf.push_str(s);
self.consume(i + 1);
return Ok(v.len());
}
Err(_) => {} }
}
}
return Err(Error::new(ErrorKind::InvalidData, e));
}
buf.push_str(unsafe { from_utf8_unchecked(&read_bytes[..used]) });
self.consume(used);
Ok(used)
}
}
}
}
impl<R: io::BufRead> BufRead for R {}
#[cfg(test)]
mod tests {
#[test]
fn readme_simple_example() {
use crate::BufRead;
use std::io::BufReader;
let mut buf = String::new();
assert_eq!(
4,
BufReader::<&[u8]>::new("💖".as_ref())
.read_utf8(&mut buf)
.unwrap()
);
assert_eq!("💖", buf.as_str());
}
#[test]
fn codepoint_on_buffer_boundary() {
use crate::BufRead;
use std::io::BufReader;
let mut reader = BufReader::<&[u8]>::with_capacity(4, "0💖0💖0u0💖0u0💖".as_ref());
let mut buf = String::new();
assert_eq!(1, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0", buf.as_str());
assert_eq!(4, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0💖", buf.as_str());
assert_eq!(1, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0💖0", buf.as_str());
assert_eq!(4, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0💖0💖", buf.as_str());
assert_eq!(2, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0💖0💖0u", buf.as_str());
assert_eq!(1, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0💖0💖0u0", buf.as_str());
assert_eq!(4, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0💖0💖0u0💖", buf.as_str());
assert_eq!(3, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0💖0💖0u0💖0u0", buf.as_str());
assert_eq!(4, reader.read_utf8(&mut buf).unwrap());
assert_eq!("0💖0💖0u0💖0u0💖", buf.as_str());
}
#[test]
fn two_bytes_capacity() {
use crate::BufRead;
use std::io::BufReader;
let mut reader = BufReader::<&[u8]>::with_capacity(2, "💖".as_ref());
let mut buf = String::new();
assert_eq!(4, reader.read_utf8(&mut buf).unwrap());
assert_eq!("💖", buf.as_str());
}
#[test]
fn one_byte_capacity() {
use crate::BufRead;
use std::io::{BufReader, ErrorKind};
let mut reader = BufReader::<&[u8]>::with_capacity(1, "€".as_ref());
let mut buf = String::new();
let err = reader.read_utf8(&mut buf);
assert!(err.is_err());
let err = err.unwrap_err();
assert_eq!(ErrorKind::InvalidInput, err.kind());
}
}