use std::io::{self, Read};
const BUF_SIZE: usize = 8 * 1024;
struct Common<T>
where
T: Read,
{
source: T,
working_buf: [u8; BUF_SIZE],
unconsumed_bytes: usize,
}
impl<T> Common<T>
where
T: Read,
{
fn new(source: T) -> Self {
Self {
source,
working_buf: [0; BUF_SIZE],
unconsumed_bytes: 0,
}
}
}
pub struct CharWhitelist<T>
where
T: Read,
{
common: Common<T>,
allowed_chars: Vec<char>,
}
impl<T> CharWhitelist<T>
where
T: Read,
{
pub fn new(source: T, allowed_chars: &str) -> Self {
Self {
common: Common::new(source),
allowed_chars: allowed_chars.chars().collect(),
}
}
fn next(&mut self) -> Option<io::Result<usize>> {
let mut read_previously = self.common.unconsumed_bytes;
loop {
let read_this_time = self
.common
.source
.read(&mut self.common.working_buf[read_previously..]);
if let Err(e) = read_this_time {
return Some(Err(e));
}
let read_this_time = read_this_time.unwrap();
if read_this_time < 1 {
return Some(Ok(read_previously));
}
assert!(read_this_time > 0);
let buf = String::from_utf8_lossy(
&self.common.working_buf[read_previously..read_previously + read_this_time],
)
.into_owned();
for c in buf.chars() {
if self.allowed_chars.contains(&c) {
let _ = c.encode_utf8(&mut self.common.working_buf[read_previously..]);
read_previously += c.len_utf8();
}
}
}
}
}
pub struct CommentStrip<T>
where
T: Read,
{
common: Common<T>,
ignore_until_next_newline: bool,
}
impl<T> CommentStrip<T>
where
T: Read,
{
pub fn new(source: T) -> Self {
Self {
common: Common::new(source),
ignore_until_next_newline: false,
}
}
fn next(&mut self) -> Option<io::Result<usize>> {
let mut read_previously = self.common.unconsumed_bytes;
loop {
let read_this_time = self
.common
.source
.read(&mut self.common.working_buf[read_previously..]);
if let Err(e) = read_this_time {
return Some(Err(e));
}
let read_this_time = read_this_time.unwrap();
if read_this_time < 1 {
return Some(Ok(read_previously));
}
assert!(read_this_time > 0);
let mut buf = String::from_utf8_lossy(
&self.common.working_buf[read_previously..read_previously + read_this_time],
)
.into_owned();
buf = if self.ignore_until_next_newline {
match buf.find('\n') {
Some(idx) => {
self.ignore_until_next_newline = false;
buf[idx + 1..].to_string()
}
None => String::new(),
}
} else {
buf
};
if buf.is_empty() {
return Some(Ok(read_previously));
}
loop {
let start_idx = buf.find('#');
if start_idx.is_none() {
break;
}
let start_idx = start_idx.unwrap();
buf = match buf[start_idx..].find('\n') {
Some(end_idx) => {
String::from(&buf[..start_idx]) + &buf[start_idx + end_idx + 1..]
}
None => {
self.ignore_until_next_newline = true;
buf[..start_idx].to_string()
}
};
}
let remaining_len = buf.len();
if remaining_len > 0 {
self.common.working_buf[read_previously..read_previously + remaining_len]
.copy_from_slice(buf.as_bytes());
read_previously += remaining_len;
}
}
}
}
macro_rules! impl_read_trait_for_stream_iter {
($MyType:ty) => {
impl<T> Read for $MyType
where
T: Read,
{
fn read(&mut self, out_buf: &mut [u8]) -> io::Result<usize> {
let mut bytes_given = 0;
if self.common.unconsumed_bytes >= out_buf.len() {
let out_buf_len = out_buf.len();
out_buf[..out_buf_len].copy_from_slice(&self.common.working_buf[..out_buf_len]);
self.common
.working_buf
.copy_within(out_buf_len..self.common.unconsumed_bytes, 0);
self.common.unconsumed_bytes -= out_buf_len;
return Ok(out_buf.len());
} else {
out_buf[..self.common.unconsumed_bytes]
.copy_from_slice(&self.common.working_buf[..self.common.unconsumed_bytes]);
bytes_given += self.common.unconsumed_bytes;
self.common.unconsumed_bytes = 0;
}
assert_eq!(self.common.unconsumed_bytes, 0);
let next_res = self.next();
if let Some(Ok(working_buf_len)) = next_res {
let max_bytes_to_give = out_buf.len() - bytes_given;
if working_buf_len >= max_bytes_to_give {
out_buf[bytes_given..]
.copy_from_slice(&self.common.working_buf[..max_bytes_to_give]);
self.common
.working_buf
.copy_within(max_bytes_to_give..working_buf_len, 0);
self.common.unconsumed_bytes = working_buf_len - max_bytes_to_give;
bytes_given += max_bytes_to_give;
return Ok(bytes_given);
} else {
out_buf[bytes_given..bytes_given + working_buf_len]
.copy_from_slice(&self.common.working_buf[..working_buf_len]);
bytes_given += working_buf_len;
return Ok(bytes_given);
}
} else if let Some(Err(e)) = next_res {
return Err(e);
} else {
return Ok(bytes_given);
}
}
}
};
}
impl_read_trait_for_stream_iter!(CharWhitelist<T>);
impl_read_trait_for_stream_iter!(CommentStrip<T>);
#[cfg(test)]
fn read_to_string(mut buf: impl Read) -> String {
let mut s = String::new();
buf.read_to_string(&mut s).unwrap();
s
}
#[cfg(test)]
mod test_comment_strip_iter {
use super::{read_to_string, CommentStrip, BUF_SIZE};
#[test]
fn empty_is_empty() {
let s = "".as_bytes();
assert_eq!(read_to_string(CommentStrip::new(s)).len(), 0);
}
#[test]
fn ignore_all_short() {
for s in &["#foo baz", "#foo baz\n", "# ", "# \n", "#", "#\n"] {
assert_eq!(read_to_string(CommentStrip::new(s.as_bytes())).len(), 0);
}
}
#[test]
fn ignore_all_long_1() {
let mut s = vec!['#' as u8];
s.append(&mut vec![' ' as u8; BUF_SIZE - 2]);
assert_eq!(s.len(), BUF_SIZE - 1);
assert_eq!(read_to_string(CommentStrip::new(&s[..])).len(), 0);
let mut s = vec!['#' as u8];
s.append(&mut vec![' ' as u8; BUF_SIZE - 1]);
assert_eq!(s.len(), BUF_SIZE);
assert_eq!(read_to_string(CommentStrip::new(&s[..])).len(), 0);
let mut s = vec!['#' as u8];
s.append(&mut vec![' ' as u8; BUF_SIZE]);
assert_eq!(s.len(), BUF_SIZE + 1);
assert_eq!(read_to_string(CommentStrip::new(&s[..])).len(), 0);
let mut s = vec!['#' as u8];
s.append(&mut vec![' ' as u8; BUF_SIZE * 2 + 2]);
assert_eq!(s.len(), BUF_SIZE * 2 + 3);
assert_eq!(read_to_string(CommentStrip::new(&s[..])).len(), 0);
}
#[test]
fn ignore_all_long_2() {
let mut s = vec!['#' as u8];
s.append(&mut vec![' ' as u8; BUF_SIZE - 3]);
s.push('\n' as u8);
assert_eq!(s.len(), BUF_SIZE - 1);
assert_eq!(read_to_string(CommentStrip::new(&s[..])).len(), 0);
let mut s = vec!['#' as u8];
s.append(&mut vec![' ' as u8; BUF_SIZE - 2]);
s.push('\n' as u8);
assert_eq!(s.len(), BUF_SIZE);
assert_eq!(read_to_string(CommentStrip::new(&s[..])).len(), 0);
let mut s = vec!['#' as u8];
s.append(&mut vec![' ' as u8; BUF_SIZE - 1]);
s.push('\n' as u8);
assert_eq!(s.len(), BUF_SIZE + 1);
assert_eq!(read_to_string(CommentStrip::new(&s[..])).len(), 0);
let mut s = vec!['#' as u8];
s.append(&mut vec![' ' as u8; BUF_SIZE * 2 + 1]);
s.push('\n' as u8);
assert_eq!(s.len(), BUF_SIZE * 2 + 3);
assert_eq!(read_to_string(CommentStrip::new(&s[..])).len(), 0);
}
#[test]
fn keep_end_short() {
for s in &["#\nfoo", "# \nfoo"] {
let out = read_to_string(CommentStrip::new(s.as_bytes()));
assert_eq!(out, "foo");
}
for s in &["#\nfoo foo", "# \nfoo foo"] {
let out = read_to_string(CommentStrip::new(s.as_bytes()));
assert_eq!(out, "foo foo");
}
for s in &["#\nfoo \n foo", "# \nfoo \n foo"] {
let out = read_to_string(CommentStrip::new(s.as_bytes()));
assert_eq!(out, "foo \n foo");
}
}
#[test]
fn keep_end_long() {
let content = " foo \n foo ";
let mut s = vec!['#' as u8; BUF_SIZE - content.len() - 1 - 1];
s.push('\n' as u8);
for c in content.chars() {
s.push(c as u8);
}
assert_eq!(s.len(), BUF_SIZE - 1);
assert_eq!(read_to_string(CommentStrip::new(&s[..])), content);
let mut s = vec!['#' as u8; BUF_SIZE - content.len() - 1];
s.push('\n' as u8);
for c in content.chars() {
s.push(c as u8);
}
assert_eq!(s.len(), BUF_SIZE);
assert_eq!(read_to_string(CommentStrip::new(&s[..])), content);
let mut s = vec!['#' as u8; BUF_SIZE - content.len() - 1 + 1];
s.push('\n' as u8);
for c in content.chars() {
s.push(c as u8);
}
assert_eq!(s.len(), BUF_SIZE + 1);
assert_eq!(read_to_string(CommentStrip::new(&s[..])), content);
let mut s = vec!['#' as u8; BUF_SIZE + 1];
s.push('\n' as u8);
for c in content.chars() {
s.push(c as u8);
}
assert_eq!(s.len(), BUF_SIZE + 2 + content.len());
assert_eq!(read_to_string(CommentStrip::new(&s[..])), content);
}
}
#[cfg(test)]
mod test_char_whitelist_iter {
use super::{read_to_string, CharWhitelist};
#[test]
fn empty_whitelist() {
let in_buf = "A\u{00a1}\u{01d6a9}".as_bytes();
assert_eq!(read_to_string(CharWhitelist::new(in_buf, "")).len(), 0);
}
#[test]
fn whitelist_allows_all() {
let s = "A\u{00a1}\u{1d6a9}";
assert_eq!(read_to_string(CharWhitelist::new(s.as_bytes(), s)), s);
}
#[test]
fn whitelist_allows_single() {
for allowed in vec!["A", "\u{00a1}", "\u{1d6a9}"] {
let in_buf = "A\u{00a1}\u{1d6a9}".as_bytes();
assert_eq!(read_to_string(CharWhitelist::new(in_buf, allowed)), allowed);
}
}
}
#[cfg(test)]
mod test_comment_strip_iter_read {
use super::{CommentStrip, BUF_SIZE};
use std::io::Read;
#[test]
fn just_comment_returns_empty() {
let in_buf = "# foo \n".as_bytes();
let mut out_buf = [0; 1];
let mut csi = CommentStrip::new(&in_buf[..]);
let len = csi.read(&mut out_buf).unwrap();
assert_eq!(len, 0);
}
#[test]
fn just_byte_before_comment() {
let in_buf = "a# foo \n".as_bytes();
let mut out_buf = [0; BUF_SIZE];
let mut csi = CommentStrip::new(&in_buf[..]);
let len = csi.read(&mut out_buf).unwrap();
assert_eq!(String::from_utf8_lossy(&out_buf[..len]), "a");
}
#[test]
fn just_byte_after_comment() {
let in_buf = "# foo \na".as_bytes();
let mut out_buf = [0; BUF_SIZE];
let mut csi = CommentStrip::new(&in_buf[..]);
let len = csi.read(&mut out_buf).unwrap();
assert_eq!(String::from_utf8_lossy(&out_buf[..len]), "a");
}
#[test]
fn just_byte_before_and_after_comment() {
let in_buf = "a# foo \nB".as_bytes();
let mut out_buf = [0; BUF_SIZE];
let mut csi = CommentStrip::new(&in_buf[..]);
let len = csi.read(&mut out_buf).unwrap();
assert_eq!(String::from_utf8_lossy(&out_buf[..len]), "aB");
}
}
macro_rules! impl_tests_for_common_read {
($mod_name:ident, $MyType:ident) => {
#[cfg(test)]
mod $mod_name {
use super::{$MyType, BUF_SIZE};
use std::io::Read;
#[test]
fn empty() {
let in_buf = vec![];
let mut out_buf = [0; 1];
let mut csi = $MyType::new(&in_buf[..]);
let len = csi.read(&mut out_buf).unwrap();
assert_eq!(len, 0);
}
#[test]
fn many_tiny_reads() {
let in_buf = "abc123".as_bytes();
let mut out_buf = [0; 1];
let mut acc = String::new();
let mut csi = $MyType::new(&in_buf[..]);
for _ in 0..in_buf.len() {
let len = csi.read(&mut out_buf).unwrap();
acc += &String::from_utf8_lossy(&out_buf[..len]);
}
assert_eq!(acc, "abc123");
assert_eq!(csi.read(&mut out_buf).unwrap(), 0);
}
#[test]
fn big_inbuf_tiny_outbuf() {
let mut in_buf = vec!['a' as u8; BUF_SIZE / 2];
in_buf.append(&mut vec!['b' as u8; BUF_SIZE / 2]);
in_buf.append(&mut vec!['c' as u8; BUF_SIZE / 2]);
in_buf.append(&mut vec!['d' as u8; BUF_SIZE / 2]);
in_buf.append(&mut vec!['e' as u8; BUF_SIZE / 2]);
let mut out_buf = [0; 2];
let mut acc = String::new();
let mut csi = $MyType::new(&in_buf[..]);
loop {
let len = csi.read(&mut out_buf).unwrap();
acc += &String::from_utf8_lossy(&out_buf[..len]);
if len < 1 {
break;
}
}
assert_eq!(acc, String::from_utf8_lossy(&in_buf[..]));
}
#[test]
fn big_inbuf_just_smaller_outbuf() {
let mut in_buf = vec!['a' as u8; BUF_SIZE / 2];
in_buf.append(&mut vec!['b' as u8; BUF_SIZE / 2]);
in_buf.append(&mut vec!['c' as u8; BUF_SIZE / 2]);
in_buf.append(&mut vec!['d' as u8; BUF_SIZE / 2]);
assert_eq!(in_buf.len(), BUF_SIZE * 2);
let mut out_buf = [0; BUF_SIZE * 2 - 1];
let mut acc = String::new();
let mut csi = $MyType::new(&in_buf[..]);
loop {
let len = csi.read(&mut out_buf).unwrap();
acc += &String::from_utf8_lossy(&out_buf[..len]);
if len < 1 {
break;
}
}
assert_eq!(acc, String::from_utf8_lossy(&in_buf[..]));
}
#[test]
fn big_inbuf_just_larger_outbuf() {
let mut in_buf = vec!['a' as u8; BUF_SIZE / 2];
in_buf.append(&mut vec!['b' as u8; BUF_SIZE / 2]);
in_buf.append(&mut vec!['c' as u8; BUF_SIZE / 2]);
in_buf.append(&mut vec!['d' as u8; BUF_SIZE / 2]);
assert_eq!(in_buf.len(), BUF_SIZE * 2);
let mut out_buf = [0; BUF_SIZE * 2 + 1];
let mut acc = String::new();
let mut csi = $MyType::new(&in_buf[..]);
loop {
let len = csi.read(&mut out_buf).unwrap();
acc += &String::from_utf8_lossy(&out_buf[..len]);
if len < 1 {
break;
}
}
assert_eq!(acc, String::from_utf8_lossy(&in_buf[..]));
}
}
};
}
impl_tests_for_common_read!(test_read_common_with_comment_strip_iter, CommentStrip);