use std::fs;
use std::path::{Component, Path, PathBuf};
use crate::FileAtlasRef;
use crate::bottle_error::{BottleError, BottleResult};
use crate::file_atlas::FileAtlas;
use crate::file_list::{Block, FileBlocks, FileList};
use crate::hashing::{Hashing, HashType};
use crate::scanner::Scanner;
#[derive(Clone, PartialEq, Eq)]
pub enum ScanState {
FileList { atlas: FileAtlasRef, file_count: usize, bytes: u64 },
Blocks { blocks: usize, file_count: usize, bytes: u64, total_bytes: u64, unique_bytes: u64 },
}
fn find_common_prefix(paths: &[PathBuf]) -> PathBuf {
if let Some((first_path, paths)) = paths.split_first() {
paths.iter().fold(first_path.to_path_buf(), |path1, path2| {
path1.components().zip(path2.components()).take_while(|(c1, c2)| c1 == c2).map(|(c1, _)| c1).collect()
})
} else {
paths[0].to_path_buf()
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct NormalizedScanPath {
pub path: PathBuf,
pub hidden_prefix: Option<PathBuf>,
}
fn normalize_paths(paths: &[PathBuf]) -> BottleResult<Vec<NormalizedScanPath>> {
let paths: Vec<PathBuf> = paths.iter().map(|path| {
let mut components: Vec<Component> = path.components().collect();
while let Some(Component::CurDir) = components.first() {
components.remove(0);
};
if components.iter().any(|c| {
c == &Component::CurDir || c == &Component::ParentDir
}) {
return Err(BottleError::InvalidAddPath(path.to_path_buf()));
}
Ok(components.iter().collect())
}).collect::<BottleResult<_>>()?;
let (absolute_paths, relative_paths) = paths.iter().enumerate().partition::<Vec<_>, _>(|(_, p)| p.has_root());
let relative_paths: Vec<_> = relative_paths.iter().map(|(i, path)| {
(i, NormalizedScanPath { path: path.to_path_buf(), hidden_prefix: None })
}).collect();
let absolute_paths: Vec<_> = if !absolute_paths.is_empty() {
let hidden_prefix = if absolute_paths.len() == 1 {
absolute_paths[0].1.parent().map(|p| p.to_path_buf())
} else {
Some(find_common_prefix(
&absolute_paths.iter().map(|(_, p)| p.to_path_buf()).collect::<Vec<_>>()
))
};
absolute_paths.iter().map(|(i, path)| {
(i, NormalizedScanPath { path: path.to_path_buf(), hidden_prefix: hidden_prefix.clone() })
}).collect()
} else {
vec![]
};
let mut rv = [ relative_paths, absolute_paths ].concat();
rv.sort_by(|(i, _), (j, _)| i.cmp(j));
let rv = rv.iter().map(|(_, scan_path)| scan_path.clone()).collect::<Vec<_>>();
let normalized_paths = rv.iter().map(|normalized_path| {
match &normalized_path.hidden_prefix {
None => &normalized_path.path,
Some(prefix) => normalized_path.path.strip_prefix(prefix).unwrap(),
}
}).collect::<Vec<_>>();
for i in 0..normalized_paths.len() - 1 {
for j in i + 1 .. normalized_paths.len() {
if normalized_paths[i] == normalized_paths[j] {
return Err(
BottleError::DuplicatePaths(
rv[i].path.to_path_buf(),
rv[j].path.to_path_buf(),
)
);
}
}
}
Ok(rv)
}
pub struct FileScanner<F: FnMut (ScanState)> {
scanner: Scanner,
updater: F,
buffer: Vec<u8>,
file_list: FileList,
}
impl<F: FnMut (ScanState)> FileScanner<F> {
pub fn new(
hash_type: HashType,
min_bits: u8,
pref_bits: u8,
max_bits: u8,
window_bits: u8,
buffer: Vec<u8>,
updater: F,
) -> FileScanner<F> {
FileScanner {
scanner: Scanner::new(hash_type, min_bits, pref_bits, max_bits, window_bits),
updater,
buffer,
file_list: FileList::new(),
}
}
fn build_file_list(&mut self, path: &Path, hidden_prefix: &Option<PathBuf>) -> BottleResult<()> {
for entry in fs::read_dir(path)? {
let entry = entry?;
self.scan_path(&entry.path(), hidden_prefix)?;
}
Ok(())
}
pub fn scan_paths(&mut self, paths: &[PathBuf]) -> BottleResult<()> {
let scan_paths = normalize_paths(paths)?;
for scan_path in scan_paths {
self.scan_path(&scan_path.path, &scan_path.hidden_prefix)?;
}
Ok(())
}
pub fn scan_path(&mut self, path: &Path, hidden_prefix: &Option<PathBuf>) -> BottleResult<()> {
let metadata = fs::symlink_metadata(path)?;
let path = path.to_path_buf();
let normalized_path = match hidden_prefix {
None => path.clone(),
Some(prefix) => path.strip_prefix(prefix).map_err(|_| BottleError::BadPath)?.to_path_buf(),
};
let mut atlas: Option<FileAtlas> = None;
if metadata.file_type().is_symlink() {
let target = fs::read_link(&path)?;
let mut raw_atlas: FileAtlas = (&metadata).try_into()?;
raw_atlas.size = 0;
raw_atlas.symlink_target = Some(target);
atlas = Some(raw_atlas);
} else if metadata.file_type().is_dir() || metadata.file_type().is_file() {
atlas = Some((&metadata).try_into()?);
}
if let Some(mut atlas) = atlas {
atlas.path = path.clone();
atlas.normalized_path = normalized_path;
let atlas = atlas.bobble();
self.file_list.files.push(atlas.clone());
let file_count = self.file_list.total_file_count();
let bytes = self.file_list.total_size();
(self.updater)(ScanState::FileList { atlas: atlas.clone(), file_count, bytes });
if atlas.borrow().is_folder {
self.build_file_list(&path, hidden_prefix)?;
}
}
Ok(())
}
pub fn build_block_list(&mut self, hash_type: HashType) -> std::io::Result<&FileList> {
let mut scanned_size = 0u64;
for atlas in self.file_list.files.clone().iter() {
let is_folder = atlas.borrow().is_folder;
let is_symlink = atlas.borrow().symlink_target.is_some();
if !is_folder && !is_symlink {
let mut f = fs::File::open(&atlas.borrow().path)?;
let mut blocks: Vec<Block> = Vec::new();
let mut digest = Hashing::new(hash_type);
for block in self.scanner.reader_iter(&mut f, &mut self.buffer, &mut digest) {
let block = block?;
let size = block.size;
scanned_size += size as u64;
self.file_list.add_block(atlas, &block);
blocks.push(block);
let blocks = self.file_list.blocks.len();
let file_count = self.file_list.total_file_count();
let bytes = scanned_size;
let total_bytes = self.file_list.total_size();
let unique_bytes = self.file_list.total_block_size();
(self.updater)(ScanState::Blocks { blocks, file_count, bytes, total_bytes, unique_bytes });
}
atlas.borrow_mut().contents = FileBlocks { hash: digest.finalize_reset(), blocks };
}
}
Ok(&self.file_list)
}
}
impl<F: FnMut (ScanState)> From<FileScanner<F>> for FileList {
fn from(file_scanner: FileScanner<F>) -> Self {
file_scanner.file_list
}
}
#[cfg(test)]
mod test {
use std::path::{Path, PathBuf};
use super::{find_common_prefix, NormalizedScanPath, normalize_paths};
fn normalize(paths: &[&str]) -> Vec<NormalizedScanPath> {
let paths: Vec<PathBuf> = paths.iter().map(PathBuf::from).collect();
normalize_paths(&paths).unwrap()
}
fn scan_paths(paths: &[(&str, Option<&str>)]) -> Vec<NormalizedScanPath> {
paths.iter().map(|(path, hidden_prefix)| {
NormalizedScanPath {
path: Path::new(path).to_path_buf(),
hidden_prefix: hidden_prefix.map(|p| Path::new(p).to_path_buf()),
}
}).collect()
}
fn without_prefix(paths: &[NormalizedScanPath]) -> Vec<PathBuf> {
paths.iter().map(|p| {
match &p.hidden_prefix {
None => p.path.to_path_buf(),
Some(prefix) => p.path.strip_prefix(prefix).unwrap().to_path_buf(),
}
}).collect()
}
#[test]
fn common_prefix() {
assert_eq!(
find_common_prefix(&[
"/home/robey/projects/rust/bitbottle/tests",
"/home/robey/projects/rust/bitbottle/src",
"/home/robey/projects/rust/mwgc",
].map(PathBuf::from)),
PathBuf::from("/home/robey/projects/rust")
);
assert_eq!(
find_common_prefix(&[
"/home/robey/projects/rust/bitbottle/tests",
"/etc/init.d/rc5.d",
].map(PathBuf::from)),
PathBuf::from("/")
);
}
#[test]
fn normalize_relative() {
assert_eq!(
normalize(&[ "src/", "docs/" ]),
scan_paths(&[ ("src", None), ("docs", None) ]),
);
assert_eq!(
normalize(&[ "./src/", "docs/" ]),
scan_paths(&[ ("src", None), ("docs", None) ]),
);
}
#[test]
fn normalize_disallow_dots() {
let rv = normalize_paths(&[ PathBuf::from("./src/../src") ]);
assert!(rv.is_err());
assert_eq!(format!("{:?}", rv.unwrap_err()), "InvalidAddPath(\"./src/../src\")");
}
#[test]
fn normalize_absolute() {
let paths = normalize(&[ "/home/robey/src/", "/home/robey/docs" ]);
assert_eq!(
paths,
scan_paths(&[ ("/home/robey/src", Some("/home/robey")), ("/home/robey/docs", Some("/home/robey")) ]),
);
assert_eq!(without_prefix(&paths), [ "src", "docs" ].map(PathBuf::from));
}
#[test]
fn normalize_one_prefix() {
let paths = normalize(&[ "/home/robey/src/" ]);
assert_eq!(
paths,
scan_paths(&[ ("/home/robey/src", Some("/home/robey")) ]),
);
assert_eq!(without_prefix(&paths), [ "src" ].map(PathBuf::from));
}
#[test]
fn normalize_keep_ordering() {
let paths = normalize(&[ "target", "/home/robey/src/", "./tests", "/home/robey/docs" ]);
assert_eq!(
paths,
scan_paths(&[
("target", None),
("/home/robey/src", Some("/home/robey")),
("tests", None),
("/home/robey/docs", Some("/home/robey"))
]),
);
assert_eq!(without_prefix(&paths), [ "target", "src", "tests", "docs" ].map(PathBuf::from));
}
#[test]
fn duplicate_paths() {
let rv = normalize_paths(&[ "target", "/home/robey/src", "/home/robey/target/" ].map(PathBuf::from));
assert!(rv.is_err());
assert_eq!(format!("{:?}", rv.unwrap_err()), "DuplicatePaths(\"target\", \"/home/robey/target\")");
}
}