use itertools::Itertools;
use std::io::{BufRead, BufReader, Read};
use thiserror::Error;
const BLOCK_SIZE: usize = 512;
#[derive(Error, Debug)]
pub enum TzgrepError {
#[error("io error: {0}")]
IoError(#[from] std::io::Error),
#[error("utf-8 convert error: {0}")]
Utf8Error(#[from] std::str::Utf8Error),
#[error("invalid octal: {0}")]
OctalError(u8),
}
#[derive(Debug)]
#[non_exhaustive]
enum FileType {
Regular,
EndOfArchive,
Other,
}
impl FileType {
fn new(c: u8) -> Self {
use FileType::*;
match c {
0x00 | 0x30 => Regular,
_ => Other,
}
}
}
#[derive(Debug)]
struct Header<'a> {
name: &'a str,
size: u64,
filetype: FileType,
}
impl<'a> Header<'a> {
fn parse_str(buf: &[u8]) -> Result<&str, TzgrepError> {
let end = buf.iter().find_position(|&&x| x == 0);
if let Some((p, _)) = end {
Ok(std::str::from_utf8(&buf[0..p])?)
} else {
Ok(std::str::from_utf8(buf)?)
}
}
fn parse_octal(buf: &[u8]) -> Result<u64, TzgrepError> {
let mut r = 0;
for c in buf.iter() {
if c & 0xF8 != 0x30 {
return Err(TzgrepError::OctalError(*c));
}
r = r << 3 | (c & 0x07) as u64;
}
Ok(r)
}
fn new(buf: &'a [u8; BLOCK_SIZE]) -> Result<Self, TzgrepError> {
let name = Self::parse_str(&buf[0..100])?;
if name.is_empty() {
return Ok(Self {
name,
size: 0,
filetype: FileType::EndOfArchive,
});
}
let size = Self::parse_octal(&buf[124..135])?;
let filetype = FileType::new(buf[156]);
Ok(Self {
name,
size,
filetype,
})
}
}
fn num_blocks(size: usize) -> usize {
size.div_ceil(BLOCK_SIZE)
}
fn foreach_line<R: BufRead, F: FnMut(&str, usize, &str)>(
filename: &str,
mut input: R,
func: &mut F,
) {
let mut line = String::new();
for line_number in 1.. {
let r = input.read_line(&mut line);
if let Ok(0) = r {
return;
} else if r.is_err() {
break;
}
func(filename, line_number, &line);
line.clear();
}
let mut buffer = [0; BLOCK_SIZE];
loop {
let r = input.read(&mut buffer);
if let Ok(0) = r {
return;
} else if r.is_err() {
break;
}
}
}
pub fn tar_foreach<R: Read, F: FnMut(&str, usize, &str)>(
mut input: R,
func: &mut F,
) -> Result<(), TzgrepError> {
let mut buffer = [0; BLOCK_SIZE];
loop {
input.read_exact(&mut buffer)?;
let h = Header::new(&buffer)?;
use FileType::*;
match h.filetype {
Regular => {
let iw = BufReader::new(input.by_ref().take(h.size));
foreach_line(h.name, iw, func);
let size = h.size as usize;
let remain = num_blocks(size) * BLOCK_SIZE - size;
input.read_exact(&mut buffer[0..remain])?;
}
EndOfArchive => break,
_ => {
for _ in 0..num_blocks(h.size as usize) {
input.read_exact(&mut buffer)?;
}
}
}
}
Ok(())
}