#![cfg(feature = "std")]
use super::scan::CharSource;
use std::boxed::Box;
use std::fs::File;
use std::io::Read;
use std::path::Path;
use std::{char, error, fmt, io};
impl<'a> CharSource for &'a str {
fn next(&mut self) -> Result<Option<char>, io::Error> {
let res = match self.chars().next() {
Some(ch) => ch,
None => return Ok(None),
};
*self = &self[res.len_utf8()..];
Ok(Some(res))
}
}
pub struct AsciiFile {
file: File,
buf: Option<(Box<[u8]>, usize, usize)>,
}
impl AsciiFile {
pub fn new(file: File) -> Self {
AsciiFile {
file,
buf: Some((vec![0u8; CAP].into_boxed_slice(), 0, 0)),
}
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
File::open(path).map(Self::new)
}
}
impl CharSource for AsciiFile {
fn next(&mut self) -> Result<Option<char>, io::Error> {
let err =
if let Some((ref mut buf, ref mut len, ref mut pos)) = self.buf {
if *pos < *len {
let res = buf[*pos];
if res.is_ascii() {
*pos += 1;
return Ok(Some(res as char));
}
Err(io::Error::new(
io::ErrorKind::InvalidData,
AsciiError(res),
))
} else {
match self.file.read(buf) {
Ok(0) => Ok(None),
Ok(read_len) => {
*len = read_len;
let res = buf[0];
if res.is_ascii() {
*pos = 1;
return Ok(Some(res as char));
}
Err(io::Error::new(
io::ErrorKind::InvalidData,
AsciiError(res),
))
}
Err(err) => Err(err),
}
}
} else {
return Ok(None);
};
self.buf = None;
err
}
}
pub struct Utf8File(OctetFile);
impl Utf8File {
pub fn new(file: File) -> Self {
Utf8File(OctetFile::new(file))
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
File::open(path).map(Self::new)
}
}
impl CharSource for Utf8File {
fn next(&mut self) -> Result<Option<char>, io::Error> {
let first = match self.0.next()? {
Some(ch) => ch,
None => return Ok(None),
};
if first.is_ascii() {
return Ok(Some(first as char));
}
let second = match self.0.next()? {
Some(ch) => ch,
None => {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF",
))
}
};
if first < 0xC0 || second < 0x80 {
return Err(Utf8Error.into());
}
if first < 0xE0 {
return Ok(Some(unsafe {
char::from_u32_unchecked(
(u32::from(first & 0x1F)) << 6 | u32::from(second & 0x3F),
)
}));
}
let third = match self.0.next()? {
Some(ch) => ch,
None => {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF",
))
}
};
if third < 0x80 {
return Err(Utf8Error.into());
}
if first < 0xF0 {
return Ok(Some(unsafe {
char::from_u32_unchecked(
(u32::from(first & 0x0F)) << 12
| (u32::from(second & 0x3F)) << 6
| u32::from(third & 0x3F),
)
}));
}
let fourth = match self.0.next()? {
Some(ch) => ch,
None => {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF",
))
}
};
if first > 0xF7 || fourth < 0x80 {
return Err(Utf8Error.into());
}
Ok(Some(unsafe {
char::from_u32_unchecked(
(u32::from(first & 0x07)) << 18
| (u32::from(second & 0x3F)) << 12
| (u32::from(third & 0x3F)) << 6
| u32::from(fourth & 0x3F),
)
}))
}
}
impl From<Utf8Error> for io::Error {
fn from(err: Utf8Error) -> Self {
io::Error::new(io::ErrorKind::Other, err)
}
}
pub struct OctetFile {
file: File,
buf: Option<(Box<[u8]>, usize, usize)>,
}
const CAP: usize = 8 * 1024;
impl OctetFile {
pub fn new(file: File) -> Self {
OctetFile {
file,
buf: Some((vec![0u8; CAP].into_boxed_slice(), 0, 0)),
}
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
File::open(path).map(Self::new)
}
#[inline]
fn next(&mut self) -> Result<Option<u8>, io::Error> {
let err =
if let Some((ref mut buf, ref mut len, ref mut pos)) = self.buf {
if *pos < *len {
let res = buf[*pos];
*pos += 1;
return Ok(Some(res));
} else {
match self.file.read(buf) {
Ok(0) => Ok(None),
Ok(read_len) => {
*len = read_len;
let res = buf[0];
if res.is_ascii() {
*pos = 1;
return Ok(Some(res));
}
Err(io::Error::new(
io::ErrorKind::InvalidData,
AsciiError(res),
))
}
Err(err) => Err(err),
}
}
} else {
return Ok(None);
};
self.buf = None;
err
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct AsciiError(u8);
impl fmt::Display for AsciiError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "invalid ASCII character '{}'", self.0)
}
}
impl error::Error for AsciiError {}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Utf8Error;
impl fmt::Display for Utf8Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("invalid UTF-8 sequence")
}
}
impl error::Error for Utf8Error {}