use crate::read::BoundedReader;
use crate::{BookError, Result};
use aversion::group::{DataSink, DataSourceExt};
use aversion::util::cbor::CborData;
use aversion::{assign_message_ids, FromVersion, UpgradeLatest, Versioned};
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use serde::{Deserialize, Serialize};
use std::convert::TryInto;
use std::fs::File;
use std::io::{self, Cursor, Read, Seek, SeekFrom, Write};
use std::num::NonZeroU64;
use std::thread::panicking;
const BOOK_V1_MAGIC: u32 = 0xFF33_0001;
const HEADER_SIZE: usize = 4096;
const MAX_TOC_SIZE: u64 = 0x400_0000;
#[derive(Debug, Versioned, UpgradeLatest, Serialize, Deserialize)]
pub struct FileHeaderV1 {
bookwriter_magic: u32,
pub user_magic: u32,
}
pub type FileHeader = FileHeaderV1;
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub struct FileSpanV1 {
pub offset: u64,
pub length: NonZeroU64,
}
impl FileSpanV1 {
pub fn from_offset_length(offset: usize, length: usize) -> Option<Self> {
let offset = offset as u64;
let length = length as u64;
NonZeroU64::new(length).map(|length| FileSpanV1 { offset, length })
}
}
type FileSpan = FileSpanV1;
#[derive(Debug, Serialize, Deserialize)]
struct TocEntryV1 {
id: u64,
span: Option<FileSpanV1>,
}
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub struct TocEntryV2 {
pub id: Box<[u8]>,
pub span: Option<FileSpanV1>,
}
type TocEntry = TocEntryV2;
#[derive(Debug, Default, Serialize, Deserialize, Versioned)]
struct TocV1(Vec<TocEntryV1>);
#[derive(Debug, Default, Serialize, Deserialize, Versioned, UpgradeLatest)]
pub struct TocV2(Vec<TocEntryV2>);
impl FromVersion<TocV1> for TocV2 {
fn from_version(v1: TocV1) -> Self {
let entries =
v1.0.into_iter()
.map(|v1_entry| TocEntryV2 {
id: Box::new(v1_entry.id.to_be_bytes()),
span: v1_entry.span,
})
.collect();
TocV2(entries)
}
}
type Toc = TocV2;
impl Toc {
fn add(&mut self, entry: TocEntry) {
self.0.push(entry);
}
fn iter(&self) -> impl Iterator<Item = &TocEntry> {
self.0.iter()
}
fn get_chapter<Id>(&self, id: Id) -> Result<&TocEntry>
where
Id: Into<ChapterId>,
{
let id: ChapterId = id.into();
let entry = self.iter().find(|entry| entry.id == id.0);
entry.ok_or(BookError::NoChapter)
}
}
assign_message_ids! {
FileHeader: 1,
Toc: 2,
}
pub struct ChapterId(pub Box<[u8]>);
impl From<&[u8]> for ChapterId {
fn from(slice: &[u8]) -> Self {
Self(slice.into())
}
}
impl From<Box<[u8]>> for ChapterId {
fn from(boxed: Box<[u8]>) -> Self {
Self(boxed)
}
}
impl From<&str> for ChapterId {
fn from(s: &str) -> Self {
String::from(s).into()
}
}
impl From<String> for ChapterId {
fn from(s: String) -> Self {
Self(s.into_bytes().into_boxed_slice())
}
}
impl From<u64> for ChapterId {
fn from(n: u64) -> Self {
Self(Box::new(n.to_be_bytes()))
}
}
pub struct ChapterWriter<W> {
book: Option<BookWriter<W>>,
id: Box<[u8]>,
offset: usize,
length: usize,
}
impl<W> ChapterWriter<W>
where
W: Write,
{
fn new<Id>(book: BookWriter<W>, id: Id) -> Self
where
Id: Into<ChapterId>,
{
let id: ChapterId = id.into();
let offset = book.current_offset;
ChapterWriter {
book: Some(book),
id: id.0,
offset,
length: 0,
}
}
pub fn close(mut self) -> Result<BookWriter<W>> {
self.flush()?;
let toc_entry = TocEntry {
id: self.id.clone(),
span: FileSpan::from_offset_length(self.offset, self.length),
};
let mut book = self.book.take().unwrap();
book.toc.add(toc_entry);
book.current_offset += self.length;
Ok(book)
}
}
impl<W> Drop for ChapterWriter<W> {
fn drop(&mut self) {
if self.book.is_some() {
if !panicking() {
panic!("ChapterWriter was dropped without calling close()");
}
}
}
}
impl<W> Write for ChapterWriter<W>
where
W: Write,
{
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let book = self.book.as_mut().unwrap();
let bytes_written = book.writer.write(buf)?;
self.length += bytes_written;
Ok(bytes_written)
}
fn flush(&mut self) -> io::Result<()> {
let book = self.book.as_mut().unwrap();
book.writer.flush()
}
}
#[derive(Debug)]
pub struct BookWriter<W> {
writer: W,
current_offset: usize,
header: FileHeader,
toc: Toc,
}
impl<W: Write> BookWriter<W> {
pub fn new(writer: W, user_magic: u32) -> Result<Self> {
let mut this = BookWriter {
writer,
current_offset: 0,
header: FileHeader {
bookwriter_magic: BOOK_V1_MAGIC,
user_magic,
},
toc: Toc::default(),
};
this.write_header()?;
Ok(this)
}
fn write_header(&mut self) -> Result<()> {
let header_buf = Cursor::new(Vec::<u8>::new());
let mut header_writer = CborData::new(header_buf);
header_writer.write_message(&self.header)?;
let mut header_buf = header_writer.into_inner().into_inner();
if header_buf.len() > HEADER_SIZE {
panic!("serialized header exceeds maximum size");
}
header_buf.resize(HEADER_SIZE, 0);
self.writer.write_all(&header_buf)?;
self.current_offset = HEADER_SIZE;
Ok(())
}
pub fn new_chapter<Id>(self, id: Id) -> ChapterWriter<W>
where
Id: Into<ChapterId>,
{
ChapterWriter::new(self, id)
}
pub fn close(mut self) -> Result<W> {
let toc_buf = Cursor::new(Vec::<u8>::new());
let mut toc_writer = CborData::new(toc_buf);
toc_writer.write_message(&self.toc)?;
let mut toc_buf = toc_writer.into_inner().into_inner();
let toc_length = toc_buf.len() as u64;
toc_buf.write_u64::<BigEndian>(toc_length).unwrap();
self.writer.write_all(&toc_buf)?;
self.writer.flush()?;
Ok(self.writer)
}
}
#[derive(Debug)]
pub struct Book<R> {
reader: R,
header: FileHeader,
toc: Toc,
}
impl<R> Book<R> {
pub fn magic(&self) -> u32 {
self.header.user_magic
}
}
#[cfg(target_family = "unix")]
impl Book<File> {
pub fn chapter_reader<Id>(&self, index: Id) -> Result<BoundedReader<&File>>
where
Id: Into<ChapterId>,
{
let toc_entry = self.toc.get_chapter(index)?;
match &toc_entry.span {
None => {
Ok(BoundedReader::empty(&self.reader))
}
Some(span) => Ok(BoundedReader::new(
&self.reader,
span.offset,
span.length.into(),
)),
}
}
pub fn read_chapter<Id>(&self, index: Id) -> Result<Box<[u8]>>
where
Id: Into<ChapterId>,
{
let reader = self.chapter_reader(index)?;
let chapter_len: usize = reader.len().try_into().unwrap();
let mut buf = vec![0u8; chapter_len];
reader.read_exact_at(&mut buf, 0)?;
Ok(buf.into_boxed_slice())
}
}
impl<R> Book<R>
where
R: Read + Seek,
{
pub fn new(mut reader: R) -> Result<Self> {
let mut header_buf = [0u8; HEADER_SIZE];
reader.seek(SeekFrom::Start(0))?;
reader.read_exact(&mut header_buf)?;
let buf_reader = &header_buf[..];
let mut data_src = CborData::new(buf_reader);
let header: FileHeader = data_src.expect_message()?;
if header.bookwriter_magic != BOOK_V1_MAGIC {
return Err(BookError::Serializer);
}
let toc_end = reader.seek(SeekFrom::End(-8))?;
let toc_len = reader.read_u64::<BigEndian>()?;
if toc_len > MAX_TOC_SIZE {
return Err(BookError::Serializer);
}
let toc_offset = toc_end - toc_len;
let toc_reader = BoundedReader::new(&mut reader, toc_offset, toc_len);
let mut data_src = CborData::new(toc_reader);
let toc: Toc = data_src.expect_message()?;
Ok(Book {
reader,
header,
toc,
})
}
pub fn has_chapter<Id>(&self, id: Id) -> bool
where
Id: Into<ChapterId>,
{
self.toc.get_chapter(id).is_ok()
}
pub fn exclusive_chapter_reader<Id>(&mut self, id: Id) -> Result<BoundedReader<&mut R>>
where
Id: Into<ChapterId>,
{
let toc_entry = self.toc.get_chapter(id)?;
match &toc_entry.span {
None => {
Ok(BoundedReader::empty(&mut self.reader))
}
Some(span) => {
self.reader.seek(SeekFrom::Start(span.offset))?;
Ok(BoundedReader::new(
&mut self.reader,
span.offset,
span.length.into(),
))
}
}
}
pub fn exclusive_read_chapter<Id>(&mut self, index: Id) -> Result<Box<[u8]>>
where
Id: Into<ChapterId>,
{
let mut buf = vec![];
let mut reader = self.exclusive_chapter_reader(index)?;
reader.read_to_end(&mut buf)?;
Ok(buf.into_boxed_slice())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn empty_book() {
let magic = 0x1234;
let mut cursor = Cursor::new(Vec::<u8>::new());
{
let book = BookWriter::new(&mut cursor, magic).unwrap();
book.close().unwrap();
}
assert_eq!(cursor.get_ref().len(), 4096 + 9 + 8);
let _ = Book::new(cursor).unwrap();
}
#[test]
fn truncated_book() {
let magic = 0x1234;
let mut cursor = Cursor::new(Vec::<u8>::new());
{
let _book = BookWriter::new(&mut cursor, magic).unwrap();
}
assert_eq!(cursor.get_ref().len(), 4096);
Book::new(cursor).unwrap_err();
}
#[test]
fn simple_book() {
let magic = 0x1234;
let buffer = {
let buffer = Cursor::new(Vec::<u8>::new());
let book = BookWriter::new(buffer, magic).unwrap();
let chapter = book.new_chapter(11);
let book = chapter.close().unwrap();
let mut chapter = book.new_chapter(22);
chapter.write_all(b"This is chapter 22").unwrap();
let book = chapter.close().unwrap();
let mut chapter = book.new_chapter("🦀");
chapter.write_all(b"This is chapter 33").unwrap();
let book = chapter.close().unwrap();
book.close().unwrap()
};
let mut book = Book::new(buffer).unwrap();
let ch1 = book.exclusive_read_chapter(11).unwrap();
assert!(ch1.is_empty());
assert!(!book.has_chapter(1));
let ch2 = book.exclusive_read_chapter(22).unwrap();
assert_eq!(ch2.as_ref(), b"This is chapter 22");
let ch2 = book.exclusive_read_chapter("🦀").unwrap();
assert_eq!(ch2.as_ref(), b"This is chapter 33");
}
#[test]
fn book_file_shared() {
let temp = tempfile::tempfile().unwrap();
let magic = 0x1234;
let file = {
let book = BookWriter::new(temp, magic).unwrap();
let chapter = book.new_chapter(11);
let book = chapter.close().unwrap();
let mut chapter = book.new_chapter(22);
chapter.write_all(b"This is chapter 22").unwrap();
let book = chapter.close().unwrap();
let mut chapter = book.new_chapter("🦀");
chapter.write_all(b"This is chapter 33").unwrap();
let book = chapter.close().unwrap();
book.close().unwrap()
};
let book = Book::new(file).unwrap();
let ch1 = book.read_chapter(11).unwrap();
assert!(ch1.is_empty());
assert!(!book.has_chapter(1));
let ch2 = book.read_chapter(22).unwrap();
assert_eq!(ch2.as_ref(), b"This is chapter 22");
let ch2 = book.read_chapter("🦀").unwrap();
assert_eq!(ch2.as_ref(), b"This is chapter 33");
}
#[test]
fn toc_compat() {
let mut toc = Vec::new();
toc.push(TocEntryV1 {
id: 1234,
span: Some(FileSpanV1 {
length: 33.try_into().unwrap(),
offset: 44,
}),
});
let toc = TocV1(toc);
let toc = TocV2::from_version(toc);
assert_eq!(toc.0.len(), 1);
assert_eq!(
toc.0[0],
TocEntryV2 {
id: Box::new(1234u64.to_be_bytes()),
span: Some(FileSpanV1 {
length: 33.try_into().unwrap(),
offset: 44,
}),
}
)
}
}