use std::collections::HashMap;
use std::convert::AsRef;
use std::error;
use std::fmt;
use std::fs::File;
use std::io::prelude::*;
use std::mem;
use std::slice;
use std::str;
use std::sync::Arc;
use std::path::Path;
use bincode::{serialize, deserialize, Bounded, Infinite};
use crc::crc64::checksum_iso as checksum;
use memmap::{Mmap, Protection};
use page_size::get as get_page_size;
use super::{Error, FILEARCO_ID, Result};
use file_data::FileData;
const VERSION_NUMBER: u64 = 1;
pub struct FileArco {
inner: Arc<Inner>,
}
impl FileArco {
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
let map = Mmap::open_path(path.as_ref(), Protection::Read)?;
let test_header = Header::new(
get_page_size() as u64,
0,
0,
0
);
let test_header_encoded = serialize(&test_header, Infinite).unwrap();
let checksum_size = mem::size_of::<u64>();
if map.len() < test_header_encoded.len() + checksum_size {
return Err(Error::FileArcoV1(FileArcoV1Error::FileTooSmall));
}
let (header, checksum1): (Header, u64) = unsafe {
let ptr = map.ptr().offset(0);
let sl = slice::from_raw_parts(
ptr,
test_header_encoded.len()
);
(
deserialize(sl).unwrap(),
checksum(&sl)
)
};
let header_checksum: u64 = unsafe {
let ptr = map.ptr().offset(test_header_encoded.len() as isize);
let sl = slice::from_raw_parts(ptr, checksum_size);
deserialize(sl).unwrap()
};
if header.id != *FILEARCO_ID {
return Err(Error::FileArcoV1(FileArcoV1Error::NotArchive));
}
if header.version_number != 1 {
return Err(Error::FileArcoV1(FileArcoV1Error::NotV1Archive));
}
if checksum1 != header_checksum {
return Err(Error::FileArcoV1(FileArcoV1Error::CorruptedHeader));
}
if (map.len() as u64) < header.file_length {
return Err(Error::FileArcoV1(FileArcoV1Error::FileTruncated));
}
let (entries, checksum2) = unsafe {
let offset = checksum_size + test_header_encoded.len();
let ptr = map.ptr().offset(offset as isize);
let sl = slice::from_raw_parts(ptr, header.entries_length as usize);
(
deserialize(sl).unwrap(),
checksum(&sl)
)
};
if checksum2 != header.entries_checksum {
return Err(Error::FileArcoV1(FileArcoV1Error::CorruptedEntriesTable));
}
Ok(FileArco {
inner: Arc::new(Inner {
file_offset: header.file_offset,
page_size: header.page_size,
entries: entries,
map: map,
})
})
}
pub fn get<P: AsRef<str>>(&self, file_path: P) -> Option<FileRef> {
if let Some(entry) = self.inner.entries.files.get(file_path.as_ref()) {
let offset = (self.inner.file_offset + entry.offset) as isize;
let address = unsafe { self.inner.map.ptr().offset(offset) };
Some(FileRef {
address: address,
length: entry.length,
aligned_length: entry.aligned_length,
checksum: entry.checksum,
inner: self.inner.clone(),
})
}
else {
None
}
}
pub fn page_size(&self) -> u64 {
self.inner.page_size
}
pub fn make<H: Write>(file_data: FileData, mut out_file: H) -> Result<()> {
let base_path = file_data.path();
let entries = Entries::new(file_data);
let entries_encoded: Vec<u8> = serialize(&entries, Infinite).unwrap();
let header = Header::new(get_page_size() as u64,
entries_encoded.len() as u64,
entries.total_aligned_length(),
checksum(&entries_encoded));
let header_encoded = serialize(&header, Infinite).unwrap();
out_file.write_all(&header_encoded)?;
let header_checksum = checksum(&header_encoded);
let header_checksum_encoded = serialize(
&header_checksum,
Bounded(mem::size_of::<u64>() as u64)
).unwrap();
out_file.write_all(&header_checksum_encoded)?;
out_file.write_all(&entries_encoded)?;
let start_length = header_encoded.len() + header_checksum_encoded.len() +
entries_encoded.len();
let padding_length = (header.file_offset as usize) - start_length;
let padding: Vec<u8> = vec![0u8; padding_length];
out_file.write_all(&padding)?;
for (path, entry) in &entries.files {
let full_path = base_path.to_path_buf().join(Path::new(&path));
let mut in_file = File::open(full_path)?;
let mut buffer = Vec::<u8>::with_capacity(entry.length as usize);
in_file.read_to_end(&mut buffer)?;
out_file.write_all(&buffer)?;
let padding_length = entry.aligned_length - entry.length;
let padding: Vec<u8> = vec![0u8; padding_length as usize];
out_file.write_all(&padding)?;
}
Ok(())
}
}
#[allow(dead_code)]
pub struct FileRef {
address: *const u8,
length: u64,
aligned_length: u64,
checksum: u64,
inner: Arc<Inner>,
}
impl FileRef {
pub fn is_valid(&self) -> bool {
let sl = self.as_slice();
let checksum_computed = checksum(sl);
self.checksum == checksum_computed
}
pub fn as_slice(&self) -> &[u8] {
unsafe {
slice::from_raw_parts(self.address, self.length as usize)
}
}
pub fn as_str(&self) -> Result<&str> {
let sl = unsafe {
slice::from_raw_parts(self.address, self.length as usize)
};
let s = str::from_utf8(sl)?;
Ok(s)
}
pub fn as_raw(&self) -> (*mut (), usize) {
(self.address as *mut (), self.aligned_length as usize)
}
pub fn len(&self) -> u64 {
self.length
}
}
#[derive(Debug)]
pub enum FileArcoV1Error {
CorruptedEntriesTable,
CorruptedHeader,
FileTooSmall,
FileTruncated,
NotArchive,
NotV1Archive,
Other,
}
impl fmt::Display for FileArcoV1Error {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match *self {
FileArcoV1Error::CorruptedEntriesTable => {
write!(fmt, "Corrupted entries table")
},
FileArcoV1Error::CorruptedHeader => {
write!(fmt, "Corrupted header")
},
FileArcoV1Error::FileTooSmall => {
write!(fmt, "File either too small for FileArco v1 archive or truncated")
},
FileArcoV1Error::FileTruncated => {
write!(fmt, "File truncated")
},
FileArcoV1Error::NotArchive => {
write!(fmt, "Not FileArco archive")
},
FileArcoV1Error::NotV1Archive => {
write!(fmt, "Not FileArco v1 archive")
},
FileArcoV1Error::Other => {
write!(fmt, "Something weird happened")
},
}
}
}
impl error::Error for FileArcoV1Error {
fn description(&self) -> &str {
static CORRUPTED_ENTRIES_TABLE: &'static str = "Corrupted entries table";
static CORRUPTED_HEADER: &'static str = "Corrupted header";
static FILE_TOO_SMALL: &'static str = "File either too small for FileArco v1 archive or truncated";
static FILE_TRUNCATED: &'static str = "File truncated";
static NOT_ARCHIVE: &'static str = "Not FileArco archive";
static NOT_V1_ARCHIVE: &'static str = "Not FileArco v1 archive";
static OTHER: &'static str = "Something weird happened";
match *self {
FileArcoV1Error::CorruptedEntriesTable => {
CORRUPTED_ENTRIES_TABLE
},
FileArcoV1Error::CorruptedHeader => {
CORRUPTED_HEADER
},
FileArcoV1Error::FileTooSmall => {
FILE_TOO_SMALL
},
FileArcoV1Error::FileTruncated => {
FILE_TRUNCATED
},
FileArcoV1Error::NotArchive => {
NOT_ARCHIVE
},
FileArcoV1Error::NotV1Archive => {
NOT_V1_ARCHIVE
},
FileArcoV1Error::Other => {
OTHER
}
}
}
fn cause(&self) -> Option<&error::Error> { None }
}
struct Inner {
file_offset: u64,
page_size: u64,
entries: Entries,
map: Mmap,
}
#[repr(C)]
#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct Header {
id: [u8; 8],
version_number: u64,
file_length: u64,
file_offset: u64,
page_size: u64,
entries_length: u64,
entries_checksum: u64,
}
impl Header {
fn new(page_size: u64,
entries_length: u64,
file_contents_length: u64,
entries_checksum: u64) -> Self {
let test_header = Header {
id: *FILEARCO_ID,
version_number: VERSION_NUMBER,
file_length: 0,
file_offset: 0,
page_size: page_size,
entries_length: entries_length,
entries_checksum: entries_checksum,
};
let test_header_encoded = serialize(&test_header, Infinite).unwrap();
let header_length = test_header_encoded.len() as u64;
let file_offset = get_aligned_length(header_length + entries_length);
let file_length = file_offset + file_contents_length;
Header {
id: *FILEARCO_ID,
version_number: VERSION_NUMBER,
file_length: file_length,
file_offset: file_offset,
page_size: page_size,
entries_length: entries_length,
entries_checksum: entries_checksum,
}
}
}
#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct Entries {
files: HashMap<String, Entry>,
}
impl Entries {
fn new(file_data: FileData) -> Self {
let mut files = HashMap::new();
for datum in file_data.into_vec() {
let aligned_length = get_aligned_length(datum.len());
files.insert(datum.name(),
Entry {
offset: 0,
length: datum.len(),
aligned_length: aligned_length,
checksum: datum.checksum(),
}
);
}
let mut offset = 0;
let keys = files.keys().cloned().collect::<Vec<_>>();
for key in keys {
let val = files.get_mut(&key).unwrap();
val.offset = offset;
offset = offset + val.aligned_length;
}
Entries {
files: files
}
}
fn total_aligned_length(&self) -> u64 {
let mut total_length = 0_u64;
let keys = self.files.keys().cloned().collect::<Vec<_>>();
for key in keys {
let val = self.files.get(&key).unwrap();
total_length = total_length + val.aligned_length;
}
total_length
}
}
#[repr(C)]
#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct Entry {
offset: u64,
length: u64,
aligned_length: u64,
checksum: u64,
}
#[inline]
fn get_aligned_length(length: u64) -> u64 {
let page_size = get_page_size() as u64;
(length + (page_size-1)) & !(page_size-1)
}
#[cfg(test)]
mod tests {
use std::fs::create_dir_all;
use memadvise::{advise, Advice};
use super::super::file_data::FileDatum;
use super::*;
fn get_file_data_stub<P: AsRef<Path>>(base_path: P) -> Result<FileData> {
let mut data = Vec::<FileDatum>::new();
data.push(FileDatum::new(
String::from("Cargo.toml"),
328,
10574576474013701409,
));
data.push(FileDatum::new(
String::from("LICENSE-APACHE"),
10771,
8740797956101379381,
));
data.push(FileDatum::new(
String::from("LICENSE-MIT"),
1082,
13423357612537305206,
));
Ok(FileData::new(
base_path.as_ref().to_path_buf(),
data,
))
}
fn get_simple() -> Vec<String> {
let mut v = Vec::<String>::new();
v.push(String::from("Cargo.toml"));
v.push(String::from("LICENSE-APACHE"));
v.push(String::from("LICENSE-MIT"));
v
}
#[test]
fn test_v1_get_rounded_length() {
assert_eq!(get_aligned_length(0), 0);
assert_eq!(get_aligned_length(4096), 4096);
assert_eq!(get_aligned_length(4096+1), 2 * 4096);
assert_eq!(get_aligned_length(2*4096 - 1), 2 * 4096);
}
#[test]
fn test_v1_entries_new() {
let file_data = get_file_data_stub(&Path::new("testarchives/simple")).ok().unwrap();
let entries = Entries::new(file_data);
let simple = get_simple();
for name in simple.iter() {
assert!(entries.files.contains_key(name));
}
}
#[test]
fn test_v1_filearco_make() {
let base_path = Path::new("testarchives/simple");
let file_data = get_file_data_stub(base_path).ok().unwrap();
let archive_path = Path::new("tmptest/test_v1_filearco_make.fac");
if let Some(parent) = archive_path.parent() {
create_dir_all(parent).ok().unwrap();
}
let archive_file = File::create(archive_path).ok().unwrap();
FileArco::make(file_data, archive_file).ok().unwrap();
}
#[test]
fn test_v1_filearco_new() {
let archive_path = Path::new("testarchives/simple_v1.fac");
let simple = get_simple();
match FileArco::new(archive_path) {
Ok(archive) => {
for name in simple.iter() {
assert!(archive.inner.entries.files.contains_key(name));
}
},
Err(err) => {
println!("test_v1_filearco_new {}", err.to_string());
assert!(false); },
}
}
#[test]
fn test_v1_filearco_page_size() {
let archive_path = Path::new("testarchives/simple_v1.fac");
let archive = FileArco::new(archive_path).ok().unwrap();
assert_eq!(archive.page_size(), 4096);
}
#[test]
fn test_v1_filearco_get() {
let archive_path = Path::new("testarchives/simple_v1.fac");
let archive = FileArco::new(archive_path).ok().unwrap();
let base_path = Path::new("testarchives/simple");
let simple = get_file_data_stub(base_path).ok().unwrap();
let svec = simple.into_vec();
for entry in svec.iter() {
if let Some(fileref) = archive.get(entry.name()) {
assert_eq!(fileref.len(), entry.len());
assert!(fileref.is_valid());
}
else {
assert!(false);
}
}
}
#[test]
fn test_v1_fileref_as_slice() {
let dir_path = Path::new("testarchives/simple");
let archive_path = Path::new("testarchives/simple_v1.fac");
let archive = FileArco::new(archive_path).ok().unwrap();
let simple = get_file_data_stub(dir_path).ok().unwrap();
let base_path = simple.path();
let svec = simple.into_vec();
for entry in svec.into_iter() {
let full_name = format!(
"{}/{}",
&base_path.to_string_lossy(),
&entry.name()
);
let full_path = Path::new(&full_name);
let mut in_file = File::open(full_path).ok().unwrap();
let mut contents = Vec::<u8>::with_capacity(entry.len() as usize);
in_file.read_to_end(&mut contents).ok().unwrap();
let archived_file = archive.get(&entry.name()).unwrap();
let length2 = archived_file.len();
assert_eq!(entry.len(), archived_file.as_slice().len() as u64);
assert_eq!(length2, archived_file.as_slice().len() as u64);
assert_eq!(contents, archived_file.as_slice());
}
}
#[test]
fn test_v1_fileref_as_raw() {
let dir_path = Path::new("testarchives/simple");
let archive_path = Path::new("testarchives/simple_v1.fac");
let archive = FileArco::new(archive_path).ok().unwrap();
let simple = get_file_data_stub(dir_path).ok().unwrap();
let svec = simple.into_vec();
for entry in svec.into_iter() {
let archived_file = archive.get(&entry.name()).unwrap();
let (ptr, len) = archived_file.as_raw();
advise(ptr, len, Advice::WillNeed).ok().unwrap();
advise(ptr, len, Advice::DontNeed).ok().unwrap();
}
}
}