use std::fmt;
use std::io;
use std::io::{Error, ErrorKind, Read, Write};
pub fn write_char<W: Write>(writer: &mut W, c: char) -> io::Result<()> {
let mut buf = [0u8; 4];
let utf8 = encode_char_utf8(c, &mut buf);
writer.write_all(utf8)
}
fn encode_char_utf8(c: char, buf: &mut [u8]) -> &[u8] {
let c = c as u32;
if c <= 0x7f {
buf[0] = c as u8;
&buf[..1]
} else if c <= 0x7ff {
buf[1] = 0b1000_0000 | (c & 0b0011_1111) as u8;
buf[0] = 0b1100_0000 | ((c >> 6) & 0b0001_1111) as u8;
&buf[..2]
} else if c <= 0xffff {
buf[2] = 0b1000_0000 | (c & 0b0011_1111) as u8;
buf[1] = 0b1000_0000 | ((c >> 6) & 0b0011_1111) as u8;
buf[0] = 0b1110_0000 | ((c >> 12) & 0b0000_1111) as u8;
&buf[..3]
} else {
buf[3] = 0b1000_0000 | (c & 0b0011_1111) as u8;
buf[2] = 0b1000_0000 | ((c >> 6) & 0b0011_1111) as u8;
buf[1] = 0b1000_0000 | ((c >> 12) & 0b0011_1111) as u8;
buf[0] = 0b1111_0000 | ((c >> 18) & 0b0000_0111) as u8;
&buf[..4]
}
}
fn utf8_char_bytes(first: u8) -> usize {
if first & 0b1000_0000 == 0 {
1
} else if first & 0b1110_0000 == 0b1100_0000 {
2
} else if first & 0b1111_0000 == 0b1110_0000 {
3
} else if first & 0b1111_1000 == 0b1111_0000 {
4
} else {
0
}
}
pub struct Chars<R> {
inner: R,
}
pub fn chars<R: Read>(reader: R) -> Chars<R> {
Chars { inner: reader }
}
#[derive(Debug)]
pub enum CharsError {
NotUtf8,
Other(Error),
}
impl<R: Read> Iterator for Chars<R> {
type Item = Result<char, CharsError>;
fn next(&mut self) -> Option<Result<char, CharsError>> {
let first_byte = match read_a_byte(&mut self.inner) {
None => return None,
Some(Ok(b)) => b,
Some(Err(e)) => return Some(Err(CharsError::Other(e))),
};
let width = utf8_char_bytes(first_byte);
if width == 1 {
return Some(Ok(first_byte as char));
}
if width == 0 {
return Some(Err(CharsError::NotUtf8));
}
let mut buf = [first_byte, 0, 0, 0];
{
let mut start = 1;
while start < width {
match self.inner.read(&mut buf[start..width]) {
Ok(0) => return Some(Err(CharsError::NotUtf8)),
Ok(n) => start += n,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Some(Err(CharsError::Other(e))),
}
}
}
Some(match ::std::str::from_utf8(&buf[..width]).ok() {
Some(s) => Ok(s.chars().next().unwrap()),
None => Err(CharsError::NotUtf8),
})
}
}
fn read_a_byte<R: Read>(reader: &mut R) -> Option<io::Result<u8>> {
let mut buf = [0];
loop {
return match reader.read(&mut buf) {
Ok(0) => None,
Ok(..) => Some(Ok(buf[0])),
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => Some(Err(e)),
};
}
}
impl fmt::Display for CharsError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
CharsError::NotUtf8 => "byte stream did not contain valid utf8".fmt(f),
CharsError::Other(ref e) => e.fmt(f),
}
}
}
#[cfg(test)]
mod test {
use super::encode_char_utf8;
#[test]
fn test_encode_char_utf8() {
do_test_encode_char_utf8('$', &[0x24]);
do_test_encode_char_utf8('¢', &[0xc2, 0xa2]);
do_test_encode_char_utf8('€', &[0xe2, 0x82, 0xac]);
do_test_encode_char_utf8('\u{10348}', &[0xf0, 0x90, 0x8d, 0x88]);
}
fn do_test_encode_char_utf8(c: char, expected: &[u8]) {
let mut buf = [0u8; 4];
let utf8 = encode_char_utf8(c, &mut buf);
assert_eq!(utf8, expected);
}
}