use std::collections::VecDeque;
use std::fs::File;
use std::io::{BufRead, Read, Seek, SeekFrom, Write};
use uucore::error::UResult;
pub const BLOCK_SIZE: u64 = 1 << 16;
pub const BUFFER_SIZE: usize = 8192;
pub struct ReverseChunks<'a> {
file: &'a File,
size: u64,
max_blocks_to_read: usize,
block_idx: usize,
}
impl<'a> ReverseChunks<'a> {
pub fn new(file: &'a mut File) -> Self {
let current = if cfg!(unix) {
file.stream_position().unwrap()
} else {
0
};
let size = file.seek(SeekFrom::End(0)).unwrap() - current;
let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
let block_idx = 0;
ReverseChunks {
file,
size,
max_blocks_to_read,
block_idx,
}
}
}
impl Iterator for ReverseChunks<'_> {
type Item = Vec<u8>;
fn next(&mut self) -> Option<Self::Item> {
if self.block_idx >= self.max_blocks_to_read {
return None;
}
let block_size = if self.block_idx == self.max_blocks_to_read - 1 {
self.size % BLOCK_SIZE
} else {
BLOCK_SIZE
};
let mut buf = vec![0; BLOCK_SIZE as usize];
let pos = self
.file
.seek(SeekFrom::Current(-(block_size as i64)))
.unwrap();
self.file
.read_exact(&mut buf[0..(block_size as usize)])
.unwrap();
let pos2 = self
.file
.seek(SeekFrom::Current(-(block_size as i64)))
.unwrap();
assert_eq!(pos, pos2);
self.block_idx += 1;
Some(buf[0..(block_size as usize)].to_vec())
}
}
type ChunkBuffer = [u8; BUFFER_SIZE];
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct BytesChunk {
buffer: ChunkBuffer,
bytes: usize,
}
impl BytesChunk {
#[allow(clippy::new_without_default)]
pub fn new() -> Self {
Self {
buffer: [0; BUFFER_SIZE],
bytes: 0,
}
}
fn from_chunk(chunk: &Self, offset: usize) -> Self {
if offset >= chunk.bytes {
return Self::new();
}
let mut buffer: ChunkBuffer = [0; BUFFER_SIZE];
let slice = chunk.get_buffer_with(offset);
buffer[..slice.len()].copy_from_slice(slice);
Self {
buffer,
bytes: chunk.bytes - offset,
}
}
pub fn get_buffer(&self) -> &[u8] {
&self.buffer[..self.bytes]
}
pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
&self.buffer[offset..self.bytes]
}
pub fn has_data(&self) -> bool {
self.bytes > 0
}
pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult<Option<usize>> {
let num_bytes = filehandle.read(&mut self.buffer)?;
self.bytes = num_bytes;
if num_bytes == 0 {
return Ok(None);
}
Ok(Some(self.bytes))
}
}
pub struct BytesChunkBuffer {
num_print: u64,
bytes: u64,
chunks: VecDeque<Box<BytesChunk>>,
}
impl BytesChunkBuffer {
pub fn new(num_print: u64) -> Self {
Self {
bytes: 0,
num_print,
chunks: VecDeque::new(),
}
}
pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> {
let mut chunk = Box::new(BytesChunk::new());
while chunk.fill(reader)?.is_some() {
self.bytes += chunk.bytes as u64;
self.chunks.push_back(chunk.clone());
let first = &self.chunks[0];
if self.bytes - first.bytes as u64 > self.num_print {
chunk = self.chunks.pop_front().unwrap();
self.bytes -= chunk.bytes as u64;
} else {
*chunk = BytesChunk::new();
}
}
if self.chunks.is_empty() {
return Ok(());
}
let chunk = self.chunks.pop_front().unwrap();
let offset = self.bytes.saturating_sub(self.num_print) as usize;
self.chunks
.push_front(Box::new(BytesChunk::from_chunk(&chunk, offset)));
Ok(())
}
pub fn print(&self, writer: &mut impl Write) -> UResult<()> {
for chunk in &self.chunks {
writer.write_all(chunk.get_buffer())?;
}
Ok(())
}
pub fn has_data(&self) -> bool {
!self.chunks.is_empty()
}
}
#[derive(Clone, Debug)]
pub struct LinesChunk {
chunk: BytesChunk,
lines: usize,
delimiter: u8,
}
impl LinesChunk {
pub fn new(delimiter: u8) -> Self {
Self {
chunk: BytesChunk::new(),
lines: 0,
delimiter,
}
}
fn count_lines(&self) -> usize {
memchr::memchr_iter(self.delimiter, self.get_buffer()).count()
}
fn from_chunk(chunk: &Self, offset: usize) -> Self {
if offset > chunk.lines {
return Self::new(chunk.delimiter);
}
let bytes_offset = chunk.calculate_bytes_offset_from(offset);
let new_chunk = BytesChunk::from_chunk(&chunk.chunk, bytes_offset);
Self {
chunk: new_chunk,
lines: chunk.lines - offset,
delimiter: chunk.delimiter,
}
}
pub fn has_data(&self) -> bool {
self.chunk.has_data()
}
pub fn get_buffer(&self) -> &[u8] {
self.chunk.get_buffer()
}
pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
self.chunk.get_buffer_with(offset)
}
pub fn get_lines(&self) -> usize {
self.lines
}
pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult<Option<usize>> {
match self.chunk.fill(filehandle)? {
None => {
self.lines = 0;
Ok(None)
}
Some(bytes) => {
self.lines = self.count_lines();
Ok(Some(bytes))
}
}
}
fn calculate_bytes_offset_from(&self, offset: usize) -> usize {
let mut lines_offset = offset;
let mut bytes_offset = 0;
for byte in self.get_buffer() {
if lines_offset == 0 {
break;
}
if byte == &self.delimiter {
lines_offset -= 1;
}
bytes_offset += 1;
}
bytes_offset
}
pub fn write_lines(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
self.write_bytes(writer, self.calculate_bytes_offset_from(offset))
}
pub fn write_bytes(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
writer.write_all(self.get_buffer_with(offset))?;
Ok(())
}
}
pub struct LinesChunkBuffer {
delimiter: u8,
lines: u64,
num_print: u64,
chunks: VecDeque<Box<LinesChunk>>,
}
impl LinesChunkBuffer {
pub fn new(delimiter: u8, num_print: u64) -> Self {
Self {
delimiter,
num_print,
lines: 0,
chunks: VecDeque::new(),
}
}
pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> {
let mut chunk = Box::new(LinesChunk::new(self.delimiter));
while chunk.fill(reader)?.is_some() {
self.lines += chunk.lines as u64;
self.chunks.push_back(chunk.clone());
let first = &self.chunks[0];
if self.lines - first.lines as u64 > self.num_print {
chunk = self.chunks.pop_front().unwrap();
self.lines -= chunk.lines as u64;
} else {
*chunk = LinesChunk::new(self.delimiter);
}
}
if self.chunks.is_empty() {
return Ok(());
}
let length = &self.chunks.len();
let last = &mut self.chunks[length - 1];
if !last.get_buffer().ends_with(&[self.delimiter]) {
last.lines += 1;
self.lines += 1;
}
let chunk = loop {
let chunk = self.chunks.pop_front().unwrap();
let skip = self.lines - chunk.lines as u64 > self.num_print;
if skip {
self.lines -= chunk.lines as u64;
} else {
break chunk;
}
};
let skip_lines = self.lines.saturating_sub(self.num_print) as usize;
let chunk = LinesChunk::from_chunk(&chunk, skip_lines);
self.chunks.push_front(Box::new(chunk));
Ok(())
}
pub fn write(&self, mut writer: impl Write) -> UResult<()> {
for chunk in &self.chunks {
chunk.write_bytes(&mut writer, 0)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::chunks::{BUFFER_SIZE, BytesChunk};
#[test]
fn test_bytes_chunk_from_when_offset_is_zero() {
let mut chunk = BytesChunk::new();
chunk.bytes = BUFFER_SIZE;
chunk.buffer[1] = 1;
let other = BytesChunk::from_chunk(&chunk, 0);
assert_eq!(other, chunk);
chunk.bytes = 2;
let other = BytesChunk::from_chunk(&chunk, 0);
assert_eq!(other, chunk);
chunk.bytes = 1;
let other = BytesChunk::from_chunk(&chunk, 0);
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
assert_eq!(other.bytes, chunk.bytes);
chunk.bytes = BUFFER_SIZE;
let other = BytesChunk::from_chunk(&chunk, 2);
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
assert_eq!(other.bytes, BUFFER_SIZE - 2);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_not_zero() {
let mut chunk = BytesChunk::new();
chunk.bytes = BUFFER_SIZE;
chunk.buffer[1] = 1;
let other = BytesChunk::from_chunk(&chunk, 1);
let mut expected_buffer = [0; BUFFER_SIZE];
expected_buffer[0] = 1;
assert_eq!(other.buffer, expected_buffer);
assert_eq!(other.bytes, BUFFER_SIZE - 1);
let other = BytesChunk::from_chunk(&chunk, 2);
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
assert_eq!(other.bytes, BUFFER_SIZE - 2);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_1() {
let mut chunk = BytesChunk::new();
chunk.bytes = BUFFER_SIZE;
let new_chunk = BytesChunk::from_chunk(&chunk, BUFFER_SIZE + 1);
assert_eq!(0, new_chunk.bytes);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_2() {
let mut chunk = BytesChunk::new();
chunk.bytes = 0;
let new_chunk = BytesChunk::from_chunk(&chunk, 1);
assert_eq!(0, new_chunk.bytes);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_3() {
let mut chunk = BytesChunk::new();
chunk.bytes = 1;
let new_chunk = BytesChunk::from_chunk(&chunk, 2);
assert_eq!(0, new_chunk.bytes);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_equal_to_chunk_size() {
let mut chunk = BytesChunk::new();
chunk.buffer[0] = 1;
chunk.bytes = 1;
let new_chunk = BytesChunk::from_chunk(&chunk, 1);
assert_eq!(0, new_chunk.bytes);
}
}