use std::sync::LazyLock;
use bytes::{Buf, BufMut, BytesMut};
use crate::pgp::{line_writer::LineBreak, util::fill_buffer};
static RE: LazyLock<regex::bytes::Regex> =
LazyLock::new(|| regex::bytes::Regex::new(r"(\r\n|\n)").expect("valid regex"));
pub struct NormalizedReader<R>
where
R: std::io::Read,
{
line_break: LineBreak,
source: R,
in_buffer: [u8; BUF_SIZE / 2],
replaced: BytesMut,
is_done: bool,
}
const BUF_SIZE: usize = 1024;
impl<R: std::io::Read> NormalizedReader<R> {
pub fn new(source: R, line_break: LineBreak) -> Self {
Self {
source,
line_break,
in_buffer: [0u8; BUF_SIZE / 2],
replaced: BytesMut::with_capacity(BUF_SIZE),
is_done: false,
}
}
fn fill_buffer(&mut self) -> std::io::Result<()> {
let last_char = self.in_buffer[self.in_buffer.len() - 1];
let read = fill_buffer(&mut self.source, &mut self.in_buffer, None)?;
if read < self.in_buffer.len() {
self.is_done = true;
}
self.cleanup_buffer(read, last_char);
Ok(())
}
fn cleanup_buffer(&mut self, read: usize, last_char: u8) {
const CR: u8 = b'\r';
const LF: u8 = b'\n';
self.replaced.clear();
let mut start = 0;
let mut end = read;
if read == self.in_buffer.len() && self.in_buffer[self.in_buffer.len() - 1] == CR {
end = read - 1;
}
let edge_case = [last_char, self.in_buffer[0]];
match (edge_case, read > 0) {
([CR, LF], true) => {
let res = RE.replace_all(&edge_case, self.line_break.as_ref());
self.replaced.extend_from_slice(&res);
start = 1;
}
([CR, _], _) => {
self.replaced.put_u8(CR);
}
_ => {}
}
let res = RE.replace_all(&self.in_buffer[start..end], self.line_break.as_ref());
self.replaced.extend_from_slice(&res);
}
}
impl<R: std::io::Read> std::io::Read for NormalizedReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if !self.replaced.has_remaining() {
if self.is_done {
return Ok(0);
}
self.fill_buffer()?;
}
let to_write = self.replaced.remaining().min(buf.len());
self.replaced.copy_to_slice(&mut buf[..to_write]);
Ok(to_write)
}
}
pub(crate) fn normalize_lines(s: &str, line_break: LineBreak) -> std::borrow::Cow<'_, str> {
let bytes = RE.replace_all(s.as_bytes(), line_break.as_ref());
match bytes {
std::borrow::Cow::Borrowed(bytes) => {
std::borrow::Cow::Borrowed(std::str::from_utf8(bytes).expect("valid bytes in"))
}
std::borrow::Cow::Owned(bytes) => {
std::borrow::Cow::Owned(std::string::String::from_utf8(bytes).expect("valid bytes in"))
}
}
}
#[cfg(test)]
mod tests {
use std::io::Read;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
use super::*;
use crate::pgp::util::test::{check_strings, random_string, ChaosReader};
#[test]
fn reader_normalized_lf() {
let input = "This is a string \n with \r some \n\r\n random newlines\r\r\n\n";
let mut out = String::new();
NormalizedReader::new(&mut input.as_bytes(), LineBreak::Lf)
.read_to_string(&mut out)
.unwrap();
check_strings(
out,
"This is a string \n with \r some \n\n random newlines\r\n\n",
);
}
#[test]
fn reader_normalized_cr() {
let input = "This is a string \n with \r some \n\r\n random newlines\r\r\n\n";
let mut out = String::new();
NormalizedReader::new(&mut input.as_bytes(), LineBreak::Cr)
.read_to_string(&mut out)
.unwrap();
check_strings(
out,
"This is a string \r with \r some \r\r random newlines\r\r\r",
);
}
#[test]
fn reader_normalized_crlf_fixed() {
let input = "This is a string \n with \r some \n\r\n random newlines\r\r\n\n";
let mut out = String::new();
NormalizedReader::new(&mut input.as_bytes(), LineBreak::Crlf)
.read_to_string(&mut out)
.unwrap();
check_strings(
"This is a string \r\n with \r some \r\n\r\n random newlines\r\r\n\r\n",
out,
);
}
#[test]
fn reader_normalized_crlf_random() {
let mut rng = ChaCha8Rng::seed_from_u64(1);
for _ in 0..100 {
let size = rng.gen_range(1..10000);
let input = random_string(&mut rng, size);
let reader = ChaosReader::new(&mut rng, input.clone());
let mut out = String::new();
NormalizedReader::new(reader, LineBreak::Crlf)
.read_to_string(&mut out)
.unwrap();
let normalized_input = normalize_lines(&input, LineBreak::Crlf);
check_strings(&normalized_input, out);
}
}
#[test]
fn reader_normalized_crlf_then_lf_edge_case() {
let input_string = "a \n ".repeat(512);
let mut out_crlf = String::new();
NormalizedReader::new(input_string.as_bytes(), LineBreak::Crlf)
.read_to_string(&mut out_crlf)
.unwrap();
let mut reverted = String::new();
NormalizedReader::new(out_crlf.as_bytes(), LineBreak::Lf)
.read_to_string(&mut reverted)
.unwrap();
check_strings(input_string, reverted);
}
}