use std::char::REPLACEMENT_CHARACTER;
use std::fmt::{self, Formatter};
use std::fs::File;
use std::intrinsics::copy;
use std::io::{ErrorKind, Read};
use std::path::Path;
use std::str::{from_utf8, from_utf8_unchecked};
use crate::utf8::*;
use crate::whitespaces::*;
use crate::ScannerError;
use crate::BUFFER_SIZE;
#[derive(Educe)]
#[educe(Debug)]
pub struct Scanner<R: Read> {
#[educe(Debug(ignore))]
reader: R,
#[educe(Debug(method = "fmt"))]
buf: [u8; BUFFER_SIZE],
buf_length: usize,
buf_offset: usize,
}
impl<R: Read> Scanner<R> {
#[inline]
pub fn new(reader: R) -> Scanner<R> {
Scanner {
reader,
buf: [0; BUFFER_SIZE],
buf_length: 0,
buf_offset: 0,
}
}
}
impl Scanner<File> {
#[inline]
pub fn scan_path<P: AsRef<Path>>(path: P) -> Result<Scanner<File>, ScannerError> {
let reader = File::open(path)?;
Ok(Scanner::new(reader))
}
}
impl<R: Read> Scanner<R> {
fn buf_left_shift(&mut self, distance: usize) {
debug_assert!(self.buf_length >= distance);
self.buf_offset += distance;
if BUFFER_SIZE - self.buf_offset < 32 {
unsafe {
copy(
self.buf.as_ptr().add(self.buf_offset),
self.buf.as_mut_ptr(),
self.buf_length,
);
}
self.buf_offset = 0;
}
self.buf_length -= distance;
}
}
impl<R: Read> Scanner<R> {
pub fn next_char(&mut self) -> Result<Option<char>, ScannerError> {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let e = self.buf[self.buf_offset];
let width = utf8_char_width(e);
match width {
0 => {
self.buf_left_shift(1);
Ok(Some(REPLACEMENT_CHARACTER))
}
1 => {
self.buf_left_shift(1);
Ok(Some(e as char))
}
_ => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..]) {
Ok(0) => {
self.buf_left_shift(1);
return Ok(Some(REPLACEMENT_CHARACTER));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
let char_str_bytes = &self.buf[self.buf_offset..(self.buf_offset + width)];
match from_utf8(char_str_bytes) {
Ok(char_str) => {
let c = char_str.chars().next();
self.buf_left_shift(width);
Ok(c)
}
Err(_) => {
self.buf_left_shift(1);
Ok(Some(REPLACEMENT_CHARACTER))
}
}
}
}
}
pub fn next_line(&mut self) -> Result<Option<String>, ScannerError> {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let mut temp = String::new();
loop {
let e = self.buf[self.buf_offset];
let width = utf8_char_width(e);
match width {
0 => {
self.buf_left_shift(1);
temp.push(REPLACEMENT_CHARACTER);
}
1 => {
if e == b'\n' {
if self.buf_length == 1 {
let size = self
.reader
.read(&mut self.buf[(self.buf_offset + self.buf_length)..])?;
if size == 0 {
self.buf_left_shift(1);
return Ok(Some(temp));
}
self.buf_length += size;
}
if self.buf[self.buf_offset + 1] == b'\r' {
self.buf_left_shift(2);
} else {
self.buf_left_shift(1);
}
return Ok(Some(temp));
} else if e == b'\r' {
if self.buf_length == 1 {
let size = self
.reader
.read(&mut self.buf[(self.buf_offset + self.buf_length)..])?;
if size == 0 {
self.buf_left_shift(1);
return Ok(Some(temp));
}
self.buf_length += size;
}
if self.buf[self.buf_offset + 1] == b'\n' {
self.buf_left_shift(2);
} else {
self.buf_left_shift(1);
}
return Ok(Some(temp));
}
self.buf_left_shift(1);
temp.push(e as char);
}
_ => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
temp.push_str(
String::from_utf8_lossy(
&self.buf
[self.buf_offset..(self.buf_offset + self.buf_length)],
)
.as_ref(),
);
self.buf_left_shift(self.buf_length);
return Ok(Some(temp));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
let char_str_bytes = &self.buf[self.buf_offset..(self.buf_offset + width)];
match from_utf8(char_str_bytes) {
Ok(char_str) => {
temp.push_str(char_str);
self.buf_left_shift(width);
}
Err(_) => {
self.buf_left_shift(1);
temp.push(REPLACEMENT_CHARACTER);
}
}
}
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(Some(temp));
}
self.buf_length += size;
}
}
}
pub fn next_line_raw(&mut self) -> Result<Option<Vec<u8>>, ScannerError> {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let mut temp = Vec::new();
loop {
let e = self.buf[self.buf_offset];
let width = utf8_char_width(e);
match width {
0 => {
self.buf_left_shift(1);
temp.push(e);
}
1 => {
if e == b'\n' {
if self.buf_length == 1 {
let size = self
.reader
.read(&mut self.buf[(self.buf_offset + self.buf_length)..])?;
if size == 0 {
self.buf_left_shift(1);
return Ok(Some(temp));
}
self.buf_length += size;
}
if self.buf[self.buf_offset + 1] == b'\r' {
self.buf_left_shift(2);
} else {
self.buf_left_shift(1);
}
return Ok(Some(temp));
} else if e == b'\r' {
if self.buf_length == 1 {
let size = self
.reader
.read(&mut self.buf[(self.buf_offset + self.buf_length)..])?;
if size == 0 {
self.buf_left_shift(1);
return Ok(Some(temp));
}
self.buf_length += size;
}
if self.buf[self.buf_offset + 1] == b'\n' {
self.buf_left_shift(2);
} else {
self.buf_left_shift(1);
}
return Ok(Some(temp));
}
self.buf_left_shift(1);
temp.push(e);
}
_ => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
temp.extend_from_slice(
&self.buf[self.buf_offset..(self.buf_offset + self.buf_length)],
);
self.buf_left_shift(self.buf_length);
return Ok(Some(temp));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
let char_str_bytes = &self.buf[self.buf_offset..(self.buf_offset + width)];
temp.extend_from_slice(char_str_bytes);
self.buf_left_shift(width);
}
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(Some(temp));
}
self.buf_length += size;
}
}
}
pub fn drop_next_line(&mut self) -> Result<Option<usize>, ScannerError> {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let mut c = 0;
loop {
let e = self.buf[self.buf_offset];
let width = utf8_char_width(e);
match width {
0 => {
self.buf_left_shift(1);
c += 1;
}
1 => {
if e == b'\n' {
if self.buf_length == 1 {
let size = self
.reader
.read(&mut self.buf[(self.buf_offset + self.buf_length)..])?;
if size == 0 {
self.buf_left_shift(1);
return Ok(Some(c));
}
self.buf_length += size;
}
if self.buf[self.buf_offset + 1] == b'\r' {
self.buf_left_shift(2);
} else {
self.buf_left_shift(1);
}
return Ok(Some(c));
} else if e == b'\r' {
if self.buf_length == 1 {
let size = self
.reader
.read(&mut self.buf[(self.buf_offset + self.buf_length)..])?;
if size == 0 {
self.buf_left_shift(1);
return Ok(Some(c));
}
self.buf_length += size;
}
if self.buf[self.buf_offset + 1] == b'\n' {
self.buf_left_shift(2);
} else {
self.buf_left_shift(1);
}
return Ok(Some(c));
}
self.buf_left_shift(1);
c += 1;
}
_ => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
self.buf_left_shift(self.buf_length);
c += self.buf_length;
return Ok(Some(c));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
self.buf_left_shift(width);
c += width;
}
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(Some(c));
}
self.buf_length += size;
}
}
}
}
impl<R: Read> Scanner<R> {
pub fn skip_whitespaces(&mut self) -> Result<bool, ScannerError> {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(false);
}
self.buf_length += size;
}
loop {
let e = self.buf[self.buf_offset];
let width = utf8_char_width(e);
match width {
0 => {
break;
}
1 => {
if !is_whitespace_1(e) {
break;
}
self.buf_left_shift(1);
}
3 => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
return Ok(true);
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
if is_whitespace_3(
self.buf[self.buf_offset],
self.buf[self.buf_offset + 1],
self.buf[self.buf_offset + 2],
) {
self.buf_left_shift(3);
} else {
break;
}
}
_ => {
break;
}
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(true);
}
self.buf_length += size;
}
}
Ok(true)
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Option<String>, ScannerError> {
if !self.skip_whitespaces()? {
return Ok(None);
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let mut temp = String::new();
loop {
let e = self.buf[self.buf_offset];
let width = utf8_char_width(e);
match width {
0 => {
self.buf_left_shift(1);
temp.push(REPLACEMENT_CHARACTER);
}
1 => {
if is_whitespace_1(e) {
self.buf_left_shift(1);
return Ok(Some(temp));
}
self.buf_left_shift(1);
temp.push(e as char);
}
3 => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
temp.push_str(
String::from_utf8_lossy(
&self.buf
[self.buf_offset..(self.buf_offset + self.buf_length)],
)
.as_ref(),
);
self.buf_left_shift(self.buf_length);
return Ok(Some(temp));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
if is_whitespace_3(
self.buf[self.buf_offset],
self.buf[self.buf_offset + 1],
self.buf[self.buf_offset + 2],
) {
self.buf_left_shift(3);
return Ok(Some(temp));
} else {
let char_str_bytes = &self.buf[self.buf_offset..(self.buf_offset + width)];
match from_utf8(char_str_bytes) {
Ok(char_str) => {
temp.push_str(char_str);
self.buf_left_shift(width);
}
Err(_) => {
self.buf_left_shift(1);
temp.push(REPLACEMENT_CHARACTER);
}
}
}
}
_ => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
temp.push_str(
String::from_utf8_lossy(
&self.buf
[self.buf_offset..(self.buf_offset + self.buf_length)],
)
.as_ref(),
);
self.buf_left_shift(self.buf_length);
return Ok(Some(temp));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
let char_str_bytes = &self.buf[self.buf_offset..(self.buf_offset + width)];
match from_utf8(char_str_bytes) {
Ok(char_str) => {
temp.push_str(char_str);
self.buf_left_shift(width);
}
Err(_) => {
self.buf_left_shift(1);
temp.push(REPLACEMENT_CHARACTER);
}
}
}
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(Some(temp));
}
self.buf_length += size;
}
}
}
pub fn next_raw(&mut self) -> Result<Option<Vec<u8>>, ScannerError> {
if !self.skip_whitespaces()? {
return Ok(None);
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let mut temp = Vec::new();
loop {
let e = self.buf[self.buf_offset];
let width = utf8_char_width(e);
match width {
0 => {
self.buf_left_shift(1);
temp.push(e);
}
1 => {
if is_whitespace_1(e) {
self.buf_left_shift(1);
return Ok(Some(temp));
}
self.buf_left_shift(1);
temp.push(e);
}
3 => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
self.buf_left_shift(self.buf_length);
return Ok(Some(temp));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
if is_whitespace_3(
self.buf[self.buf_offset],
self.buf[self.buf_offset + 1],
self.buf[self.buf_offset + 2],
) {
self.buf_left_shift(3);
return Ok(Some(temp));
} else {
let char_str_bytes = &self.buf[self.buf_offset..(self.buf_offset + width)];
temp.extend_from_slice(char_str_bytes);
self.buf_left_shift(width);
}
}
_ => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
temp.extend_from_slice(
&self.buf[self.buf_offset..(self.buf_offset + self.buf_length)],
);
self.buf_left_shift(self.buf_length);
return Ok(Some(temp));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
let char_str_bytes = &self.buf[self.buf_offset..(self.buf_offset + width)];
temp.extend_from_slice(char_str_bytes);
self.buf_left_shift(width);
}
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(Some(temp));
}
self.buf_length += size;
}
}
}
pub fn drop_next(&mut self) -> Result<Option<usize>, ScannerError> {
if !self.skip_whitespaces()? {
return Ok(None);
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let mut c = 0;
loop {
let e = self.buf[self.buf_offset];
let width = utf8_char_width(e);
match width {
0 => {
self.buf_left_shift(1);
c += 1;
}
1 => {
if is_whitespace_1(e) {
self.buf_left_shift(1);
return Ok(Some(c));
}
self.buf_left_shift(1);
c += 1;
}
3 => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
self.buf_left_shift(self.buf_length);
c += self.buf_length;
return Ok(Some(c));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
if is_whitespace_3(
self.buf[self.buf_offset],
self.buf[self.buf_offset + 1],
self.buf[self.buf_offset + 2],
) {
self.buf_left_shift(3);
return Ok(Some(c));
} else {
self.buf_left_shift(width);
}
}
_ => {
while self.buf_length < width {
match self.reader.read(&mut self.buf[(self.buf_offset + self.buf_length)..])
{
Ok(0) => {
self.buf_left_shift(self.buf_length);
c += self.buf_length;
return Ok(Some(c));
}
Ok(c) => self.buf_length += c,
Err(ref err) if err.kind() == ErrorKind::Interrupted => (),
Err(err) => return Err(err.into()),
}
}
self.buf_left_shift(width);
}
}
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(Some(c));
}
self.buf_length += size;
}
}
}
}
impl<R: Read> Scanner<R> {
pub fn next_bytes(&mut self, number_of_bytes: usize) -> Result<Option<Vec<u8>>, ScannerError> {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let mut temp = Vec::new();
let mut c = 0;
while c < number_of_bytes {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(Some(temp));
}
self.buf_length += size;
}
let dropping_bytes = self.buf_length.min(number_of_bytes - c);
temp.extend_from_slice(
&mut self.buf[self.buf_offset..(self.buf_offset + dropping_bytes)],
);
self.buf_left_shift(dropping_bytes);
c += dropping_bytes;
}
Ok(Some(temp))
}
pub fn drop_next_bytes(
&mut self,
number_of_bytes: usize,
) -> Result<Option<usize>, ScannerError> {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(None);
}
self.buf_length += size;
}
let mut c = 0;
while c < number_of_bytes {
if self.buf_length == 0 {
let size = self.reader.read(&mut self.buf[self.buf_offset..])?;
if size == 0 {
return Ok(Some(c));
}
self.buf_length += size;
}
let dropping_bytes = self.buf_length.min(number_of_bytes - c);
self.buf_left_shift(dropping_bytes);
c += dropping_bytes;
}
Ok(Some(c))
}
}
impl<R: Read> Scanner<R> {
#[inline]
pub fn next_u8(&mut self) -> Result<Option<u8>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_u16(&mut self) -> Result<Option<u16>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_u32(&mut self) -> Result<Option<u32>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_u64(&mut self) -> Result<Option<u64>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_u128(&mut self) -> Result<Option<u128>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_usize(&mut self) -> Result<Option<usize>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_i8(&mut self) -> Result<Option<i8>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_i16(&mut self) -> Result<Option<i16>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_i32(&mut self) -> Result<Option<i32>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_i64(&mut self) -> Result<Option<i64>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_i128(&mut self) -> Result<Option<i128>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_isize(&mut self) -> Result<Option<isize>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_f32(&mut self) -> Result<Option<f32>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
#[inline]
pub fn next_f64(&mut self) -> Result<Option<f64>, ScannerError> {
let result = self.next_raw()?;
match result {
Some(s) => Ok(Some(unsafe { from_utf8_unchecked(&s) }.parse()?)),
None => Ok(None),
}
}
}
#[inline]
fn fmt(s: &[u8], f: &mut Formatter) -> fmt::Result {
let mut list = f.debug_list();
for n in s.iter() {
list.entry(n);
}
Ok(())
}