use crate::errors::{error, nil, New};
use crate::types::{byte, int, rune, string};
use std::io::{BufRead, Read, Write as _};
#[allow(non_snake_case)]
pub fn ErrTooLong() -> error { New("bufio.Scanner: token too long") }
#[allow(non_snake_case)]
pub fn ErrFinalToken() -> error { New("final token") }
pub const MaxScanTokenSize: usize = 64 * 1024;
pub type SplitFunc = fn(data: &[byte], at_eof: bool) -> (int, Option<Vec<byte>>, error);
pub struct Scanner<R: Read> {
reader: R,
split: SplitFunc,
max_token: usize,
buffer: Vec<byte>,
start: usize,
end: usize,
token: Vec<byte>,
last_err: error,
at_eof: bool,
done: bool,
empties: usize,
}
#[allow(non_snake_case)]
pub fn NewScanner<R: Read>(r: R) -> Scanner<R> {
Scanner {
reader: r,
split: ScanLines,
max_token: MaxScanTokenSize,
buffer: Vec::with_capacity(4096),
start: 0, end: 0,
token: Vec::new(),
last_err: nil,
at_eof: false,
done: false,
empties: 0,
}
}
impl<R: Read> Scanner<R> {
pub fn Split(&mut self, f: SplitFunc) { self.split = f; }
pub fn MaxTokenSize(&mut self, n: int) { self.max_token = n.max(0) as usize; }
pub fn Err(&self) -> &error { &self.last_err }
pub fn Bytes(&self) -> &[byte] { &self.token }
pub fn Text(&self) -> &str {
std::str::from_utf8(&self.token).unwrap_or("")
}
pub fn Scan(&mut self) -> bool {
if self.done { return false; }
loop {
let data = &self.buffer[self.start..self.end];
let (advance, token, err) = (self.split)(data, self.at_eof);
if err != nil {
self.last_err = err;
self.done = true;
if let Some(tok) = token {
self.token = tok;
return true;
}
return false;
}
if advance < 0 || (advance as usize) > data.len() {
self.last_err = New("bufio.Scanner: SplitFunc returned invalid advance");
self.done = true;
return false;
}
self.start += advance as usize;
if let Some(tok) = token {
self.token = tok;
if advance > 0 { self.empties = 0; }
else {
self.empties += 1;
if self.empties > 100 {
self.last_err = New("bufio.Scanner: too many empty tokens without progress");
self.done = true;
return false;
}
}
return true;
}
if self.start > 0 {
self.buffer.copy_within(self.start..self.end, 0);
self.end -= self.start;
self.start = 0;
}
if self.end >= self.buffer.len() {
let cap = self.buffer.capacity();
let new_cap = (cap * 2).max(4096).max(self.end + 1);
if new_cap > self.max_token {
self.last_err = ErrTooLong();
self.done = true;
return false;
}
self.buffer.resize(new_cap, 0);
}
if self.at_eof {
self.done = true;
if self.end > 0 {
let data = &self.buffer[self.start..self.end];
let (_, token2, err2) = (self.split)(data, true);
if err2 != nil { self.last_err = err2; }
if let Some(tok) = token2 {
self.token = tok;
self.start = self.end;
return true;
}
}
return false;
}
let mut scratch = [0u8; 4096];
let avail = self.buffer.len() - self.end;
let want = std::cmp::min(avail, scratch.len());
match self.reader.read(&mut scratch[..want]) {
Ok(0) => {
self.at_eof = true;
}
Ok(n) => {
self.buffer[self.end..self.end + n].copy_from_slice(&scratch[..n]);
self.end += n;
}
Err(e) => {
let msg = format!("{}", e);
if msg.to_lowercase().contains("unexpectedeof") || msg.to_lowercase().contains("eof") {
self.at_eof = true;
} else {
self.last_err = New(&format!("bufio.Scanner: {}", e));
self.done = true;
return false;
}
}
}
}
}
}
#[allow(non_snake_case)]
pub fn ScanBytes(data: &[byte], _at_eof: bool) -> (int, Option<Vec<byte>>, error) {
if data.is_empty() { return (0, None, nil); }
(1, Some(vec![data[0]]), nil)
}
#[allow(non_snake_case)]
pub fn ScanRunes(data: &[byte], at_eof: bool) -> (int, Option<Vec<byte>>, error) {
if data.is_empty() { return (0, None, nil); }
if data[0] < 0x80 { return (1, Some(vec![data[0]]), nil); }
let width = match data[0] {
0xC0..=0xDF => 2,
0xE0..=0xEF => 3,
0xF0..=0xF7 => 4,
_ => {
return (1, Some({
let mut buf = vec![0u8; 4];
let n = crate::unicode::utf8::EncodeRune(&mut buf, crate::unicode::RuneError);
buf.truncate(n as usize);
buf
}), nil);
}
};
if data.len() < width {
if at_eof {
return (data.len() as int, Some({
let mut buf = vec![0u8; 4];
let n = crate::unicode::utf8::EncodeRune(&mut buf, crate::unicode::RuneError);
buf.truncate(n as usize);
buf
}), nil);
}
return (0, None, nil);
}
for i in 1..width {
if (data[i] & 0xC0) != 0x80 {
return (1, Some({
let mut buf = vec![0u8; 4];
let n = crate::unicode::utf8::EncodeRune(&mut buf, crate::unicode::RuneError);
buf.truncate(n as usize);
buf
}), nil);
}
}
(width as int, Some(data[..width].to_vec()), nil)
}
#[allow(non_snake_case)]
pub fn ScanLines(data: &[byte], at_eof: bool) -> (int, Option<Vec<byte>>, error) {
if at_eof && data.is_empty() { return (0, None, nil); }
if let Some(i) = data.iter().position(|&b| b == b'\n') {
let tok_end = if i > 0 && data[i-1] == b'\r' { i - 1 } else { i };
return ((i + 1) as int, Some(data[..tok_end].to_vec()), nil);
}
if at_eof {
return (data.len() as int, Some(data.to_vec()), nil);
}
(0, None, nil)
}
#[allow(non_snake_case)]
pub fn ScanWords(data: &[byte], at_eof: bool) -> (int, Option<Vec<byte>>, error) {
let mut start = 0usize;
while start < data.len() {
let (r, size) = decode_rune(&data[start..]);
if !IsSpace(r) { break; }
start += size;
}
let mut i = start;
while i < data.len() {
let (r, size) = decode_rune(&data[i..]);
if IsSpace(r) {
return ((i + next_rune_size(&data[i..])) as int, Some(data[start..i].to_vec()), nil);
}
i += size;
}
if at_eof && data.len() > start {
return (data.len() as int, Some(data[start..].to_vec()), nil);
}
(start as int, None, nil)
}
fn decode_rune(data: &[byte]) -> (rune, usize) {
if data.is_empty() { return (crate::unicode::RuneError, 0); }
let c = data[0];
if c < 0x80 { return (c as rune, 1); }
let (expected, first) = match c {
0xC0..=0xDF => (2, (c & 0x1F) as u32),
0xE0..=0xEF => (3, (c & 0x0F) as u32),
0xF0..=0xF7 => (4, (c & 0x07) as u32),
_ => return (crate::unicode::RuneError, 1),
};
if data.len() < expected { return (crate::unicode::RuneError, 1); }
let mut acc = first;
for i in 1..expected {
let b = data[i];
if (b & 0xC0) != 0x80 { return (crate::unicode::RuneError, 1); }
acc = (acc << 6) | ((b & 0x3F) as u32);
}
(acc as rune, expected)
}
fn next_rune_size(data: &[byte]) -> usize {
let (_, size) = decode_rune(data);
if size == 0 { 1 } else { size }
}
#[allow(non_snake_case)]
pub fn IsSpace(r: rune) -> bool {
crate::unicode::IsSpace(r)
}
#[allow(non_snake_case)]
pub fn ReadLines<R: Read>(r: R) -> (crate::types::slice<string>, error) {
let mut sc = NewScanner(r);
let mut lines = crate::types::slice::<string>::new();
while sc.Scan() {
lines.push(sc.Text().into());
}
let err = sc.Err().clone();
(lines, err)
}
pub struct Reader<R: std::io::Read> {
inner: std::io::BufReader<R>,
unread: Option<crate::types::byte>,
}
#[allow(non_snake_case)]
pub fn NewReader<R: std::io::Read>(r: R) -> Reader<R> {
Reader { inner: std::io::BufReader::new(r), unread: None }
}
impl<R: std::io::Read> Reader<R> {
#[allow(non_snake_case)]
pub fn ReadString(&mut self, delim: crate::types::byte) -> (string, error) {
let mut buf = Vec::<u8>::new();
if let Some(b) = self.unread.take() {
buf.push(b);
if b == delim {
return (String::from_utf8_lossy(&buf).into_owned().into(), nil);
}
}
match self.inner.read_until(delim, &mut buf) {
Ok(0) if buf.is_empty() => ("".into(), New("EOF")),
Ok(0) => (String::from_utf8_lossy(&buf).into_owned().into(), New("EOF")),
Ok(_) => {
if buf.last() != Some(&delim) {
(String::from_utf8_lossy(&buf).into_owned().into(), New("EOF"))
} else {
(String::from_utf8_lossy(&buf).into_owned().into(), nil)
}
}
Err(e) => (String::from_utf8_lossy(&buf).into_owned().into(), New(&format!("bufio.ReadString: {}", e))),
}
}
#[allow(non_snake_case)]
pub fn ReadBytes(&mut self, delim: crate::types::byte) -> (Vec<crate::types::byte>, error) {
let mut buf = Vec::<u8>::new();
if let Some(b) = self.unread.take() {
buf.push(b);
if b == delim { return (buf, nil); }
}
match self.inner.read_until(delim, &mut buf) {
Ok(0) if buf.is_empty() => (buf, New("EOF")),
Ok(0) => (buf, New("EOF")),
Ok(_) => {
let last_is_delim = buf.last() == Some(&delim);
(buf, if last_is_delim { nil } else { New("EOF") })
}
Err(e) => (buf, New(&format!("bufio.ReadBytes: {}", e))),
}
}
#[allow(non_snake_case)]
pub fn ReadByte(&mut self) -> (crate::types::byte, error) {
if let Some(b) = self.unread.take() {
return (b, nil);
}
let mut one = [0u8; 1];
match self.inner.read(&mut one) {
Ok(0) => (0, New("EOF")),
Ok(_) => (one[0], nil),
Err(e) => (0, New(&format!("bufio.ReadByte: {}", e))),
}
}
#[allow(non_snake_case)]
pub fn UnreadByte(&mut self) -> error {
nil
}
#[allow(non_snake_case)]
pub fn ReadRune(&mut self) -> (crate::types::rune, int, error) {
let (b0, err) = self.ReadByte();
if err != nil {
return (0, 0, err);
}
let (expected, first): (usize, u32) = match b0 {
0x00..=0x7F => return (b0 as crate::types::rune, 1, nil),
0xC0..=0xDF => (2, (b0 & 0x1F) as u32),
0xE0..=0xEF => (3, (b0 & 0x0F) as u32),
0xF0..=0xF7 => (4, (b0 & 0x07) as u32),
_ => return (crate::unicode::RuneError, 1, nil),
};
let mut acc = first;
for _ in 1..expected {
let (b, e) = self.ReadByte();
if e != nil {
return (crate::unicode::RuneError, expected as int, e);
}
acc = (acc << 6) | ((b & 0x3F) as u32);
}
(acc as crate::types::rune, expected as int, nil)
}
}
pub struct Writer<W: std::io::Write> {
inner: std::io::BufWriter<W>,
}
#[allow(non_snake_case)]
pub fn NewWriter<W: std::io::Write>(w: W) -> Writer<W> {
Writer { inner: std::io::BufWriter::new(w) }
}
impl<W: std::io::Write> Writer<W> {
#[allow(non_snake_case)]
pub fn WriteString(&mut self, s: impl AsRef<str>) -> (int, error) {
let s = s.as_ref();
match std::io::Write::write(&mut self.inner, s.as_bytes()) {
Ok(n) => (n as int, nil),
Err(e) => (0, New(&format!("bufio.WriteString: {}", e))),
}
}
#[allow(non_snake_case)]
pub fn WriteByte(&mut self, b: crate::types::byte) -> error {
match std::io::Write::write(&mut self.inner, &[b]) {
Ok(_) => nil,
Err(e) => New(&format!("bufio.WriteByte: {}", e)),
}
}
#[allow(non_snake_case)]
pub fn Write(&mut self, p: &[crate::types::byte]) -> (int, error) {
match std::io::Write::write(&mut self.inner, p) {
Ok(n) => (n as int, nil),
Err(e) => (0, New(&format!("bufio.Write: {}", e))),
}
}
#[allow(non_snake_case)]
pub fn Flush(&mut self) -> error {
match self.inner.flush() {
Ok(()) => nil,
Err(e) => New(&format!("bufio.Flush: {}", e)),
}
}
}
impl<W: std::io::Write> std::io::Write for Writer<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { self.inner.write(buf) }
fn flush(&mut self) -> std::io::Result<()> { self.inner.flush() }
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn scans_lines_stripping_newlines() {
let input = "alpha\nbeta\r\ngamma";
let mut sc = NewScanner(Cursor::new(input));
let mut seen: Vec<String> = Vec::new();
while sc.Scan() {
seen.push(sc.Text().into());
}
assert_eq!(seen, vec!["alpha", "beta", "gamma"]);
assert!(sc.Err() == &nil);
}
#[test]
fn empty_reader_scan_returns_false() {
let mut sc = NewScanner(Cursor::new(""));
assert!(!sc.Scan());
assert!(sc.Err() == &nil);
}
#[test]
fn read_lines_convenience() {
let (lines, err) = ReadLines(Cursor::new("one\ntwo\nthree\n"));
assert!(err == nil);
assert_eq!(lines, vec!["one", "two", "three"]);
}
#[test]
fn reader_read_string_until_delim() {
let mut r = NewReader(Cursor::new("alpha,beta,gamma"));
let (s, err) = r.ReadString(b',');
assert!(err == nil);
assert_eq!(s, "alpha,");
let (s, err) = r.ReadString(b',');
assert!(err == nil);
assert_eq!(s, "beta,");
let (s, err) = r.ReadString(b',');
assert!(err != nil); assert_eq!(s, "gamma");
}
#[test]
fn reader_read_byte_and_rune() {
let mut r = NewReader(Cursor::new("aλb"));
let (b, _) = r.ReadByte();
assert_eq!(b, b'a');
let (rune, n, err) = r.ReadRune();
assert!(err == nil);
assert_eq!(n, 2);
assert_eq!(rune, 'λ' as crate::types::rune);
let (b, _) = r.ReadByte();
assert_eq!(b, b'b');
}
#[test]
fn writer_buffers_and_flushes() {
let mut buf: Vec<u8> = Vec::new();
{
let mut w = NewWriter(&mut buf);
let _ = w.WriteString("hello ");
let _ = w.WriteString("world");
let _ = w.Flush();
}
assert_eq!(buf, b"hello world");
}
}