use anyhow::Context;
use std::ffi::OsString;
use std::{fs, io::BufReader, path::PathBuf};
use crate::container::{
traverse_loose, traverse_packs, Container, ContainerInfo, CountInfo, SizeInfo,
};
use crate::io::ReaderMaker;
use crate::io_loose::insert as loose_insert;
use crate::io_packs::insert as packs_insert;
use crate::Error;
use crate::config::Config;
use crate::db::{self, print_table};
use crate::container::Compression;
use crate::utils::create_dir;
use clap::{Parser, Subcommand};
use human_bytes::human_bytes;
use std::str::FromStr;
use std::{env, fmt::Debug};
use std::io::{self, Write};
pub const DEFAULT_COMPRESSION_ALGORITHM: &str = "zstd:-1";
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
#[arg(short, long, value_name = "FOLDER")]
path: Option<PathBuf>,
#[command(subcommand)]
cmd: Commands,
}
#[derive(Subcommand, Debug)]
enum OptimizeCommands {
Pack {
#[arg(long, default_value_t = false)]
no_compress: bool,
#[arg(long, default_value_t = false)]
no_clean: bool,
},
Repack {
#[arg(short, long, default_value = DEFAULT_COMPRESSION_ALGORITHM, value_name = "COMPRESSION")]
compression: String,
},
}
#[derive(Subcommand, Debug)]
enum Commands {
Init {
#[arg(short, long, default_value_t = 4, value_name = "PACK_SIZE")]
pack_size_gb: u64,
#[arg(short, long, default_value = DEFAULT_COMPRESSION_ALGORITHM, value_name = "COMPRESSION")]
compression: String,
},
Status,
Inspect {
#[arg(short, long, default_value = "auto")]
storage_type: String,
},
AddFiles {
#[arg(required = true, value_name = "FILE(s)")]
paths: Vec<PathBuf>,
#[arg(short, long, default_value = "auto", value_name = "TO")]
to: String,
},
Optimize {
#[command(subcommand)]
cmd: OptimizeCommands,
},
CatFile {
#[arg(required = true)]
id: String,
#[arg(short, long, default_value = "auto", value_name = "FROM")]
from: String,
},
}
fn extract(
id: &str,
cnt: &Container,
st: &StoreType,
mut to: impl Write,
) -> anyhow::Result<Option<u64>> {
let n = match st {
StoreType::Loose => _extract_l(id, cnt, to)?,
StoreType::Packs => _extract_p(id, cnt, to)?,
StoreType::Auto => {
_extract_l(id, cnt, &mut to)?.or_else(|| _extract_p(id, cnt, &mut to).ok()?)
}
};
Ok(n)
}
fn _extract_l(id: &str, cnt: &Container, mut to: impl Write) -> anyhow::Result<Option<u64>> {
let obj = crate::io_loose::extract(id, cnt)?;
if let Some(obj) = obj {
let rdr = obj.make_reader()?;
let mut buf_rdr = BufReader::new(rdr);
let n = std::io::copy(&mut buf_rdr, &mut to).with_context(|| "write object to stdout")?;
anyhow::ensure!(
n == obj.expected_size,
"object has wrong size, expected: {}, got: {}, usually caused by data corruption",
obj.expected_size,
n
);
Ok(Some(n))
} else {
Ok(None)
}
}
fn _extract_p(id: &str, cnt: &Container, mut to: impl Write) -> anyhow::Result<Option<u64>> {
let obj = crate::io_packs::extract(id, cnt)?;
if let Some(obj) = obj {
let rdr = obj.make_reader()?;
let mut buf_rdr = BufReader::new(rdr);
let n = std::io::copy(&mut buf_rdr, &mut to).with_context(|| "write object to stdout")?;
anyhow::ensure!(
n == obj.raw_size,
"object has wrong size, expected: {}, got: {}, usually caused by data corruption",
obj.raw_size,
n
);
Ok(Some(n))
} else {
Ok(None)
}
}
pub enum StoreType {
Auto,
Loose,
Packs,
}
pub fn add_file(
file: &PathBuf,
cnt: &Container,
to: &StoreType,
) -> anyhow::Result<(String, String, u64)> {
let stat = fs::metadata(file).with_context(|| format!("stat {}", file.display()))?;
let expected_size = stat.len();
let (bytes_read, _bytes_write, hash_hex) = match to {
StoreType::Loose | StoreType::Auto => {
let (b_r, hash_hex) = loose_insert(file.clone(), cnt)?;
(b_r, b_r, hash_hex)
},
StoreType::Packs => packs_insert(file.clone(), cnt)?,
};
anyhow::ensure!(
bytes_read == expected_size,
format!(
"bytes streamed: {}, bytes source: {}",
bytes_read, expected_size
)
);
Ok((hash_hex, file.display().to_string(), expected_size))
}
pub fn stat(cnt: &Container) -> anyhow::Result<ContainerInfo> {
cnt.valid()?;
let config_path = cnt.config_file();
let config = fs::File::open(&config_path).map_err(|err| Error::ConfigFileError {
source: err,
path: config_path.clone(),
})?;
let reader = BufReader::new(config);
let config: Config = serde_json::from_reader(reader).map_err(|err| Error::ConfigFileError {
source: err.into(),
path: config_path.clone(),
})?;
let iter_loose = traverse_loose(cnt).with_context(|| "traverse loose by iter")?;
let (loose_files_count, loose_files_size) =
iter_loose
.into_iter()
.fold((0, 0), |(count, size), path| match fs::metadata(path) {
Ok(stat) => (count + 1, size + stat.len()),
Err(_) => (count, size),
});
let packs_db = cnt.packs_db();
let packs_db_size = fs::metadata(&packs_db)?.len();
let (packs_count, packs_size) = db::stat(&packs_db)?;
let iter_packs = traverse_packs(cnt).with_context(|| "traverse packs by iter")?;
let (packs_file_count, packs_file_size) =
iter_packs
.into_iter()
.fold((0, 0), |(count, size), path| match fs::metadata(path) {
Ok(stat) => (count + 1, size + stat.len()),
Err(_) => (count, size),
});
Ok(ContainerInfo {
location: cnt.path.display().to_string(),
id: config.container_id.to_string(),
compression_algorithm: config.compression_algorithm,
count: CountInfo {
loose: loose_files_count,
packs: packs_count,
packs_file: packs_file_count,
},
size: SizeInfo {
loose: loose_files_size,
packs: packs_size,
packs_file: packs_file_size,
packs_db: packs_db_size,
},
})
}
#[allow(clippy::too_many_lines)]
pub fn run_cli(args: &[OsString]) -> anyhow::Result<()> {
let args = Args::parse_from(args);
let cnt_path = args.path.unwrap_or(env::current_dir()?.join("container"));
match args.cmd {
Commands::Init {
pack_size_gb,
compression,
} => {
if !cnt_path.exists() {
create_dir(&cnt_path)?;
}
let config = Config::new(pack_size_gb * 1024 * 1024 * 1024, &compression);
let cnt = Container::new(&cnt_path);
cnt.initialize(&config).with_context(|| {
format!("unable to initialize container at {}", cnt.path.display())
})?;
}
#[allow(clippy::cast_precision_loss)]
Commands::Status => {
let cnt = Container::new(&cnt_path);
let cnt = match cnt.valid() {
Ok(cnt) => cnt,
Err(e) => anyhow::bail!(e),
};
let info = crate::stat(cnt).with_context(|| "unable to get container stat")?;
let state = String::new()
+ "[container]\n"
+ &format!("Location = {}\n", info.location)
+ &format!("Id = {}\n", info.id)
+ &format!("ZipAlgo = {}\n", info.compression_algorithm)
+ "\n[container.count]\n"
+ &format!("Loose objects = {}\n", info.count.loose)
+ &format!("Pack objects = {}\n", info.count.packs)
+ &format!("Pack Files = {}\n", info.count.packs_file)
+ "\n[container.size]\n"
+ &format!("Loose objects (raw) = {}\n", human_bytes(info.size.loose as f64))
+ &format!("Pack objects (raw) = {}\n", human_bytes(info.size.packs as f64))
+ &format!("Pack Files = {}\n", human_bytes(info.size.packs_file as f64))
+ &format!("Pack DB file = {}\n", human_bytes(info.size.packs_db as f64));
io::stdout().write_all(state.as_bytes())?;
}
#[allow(clippy::cast_precision_loss)]
Commands::AddFiles { paths, to } => {
let cnt = Container::new(&cnt_path);
let cnt = match cnt.valid() {
Ok(cnt) => cnt,
Err(e) => anyhow::bail!(e),
};
for path in paths {
if !path.is_file() {
eprintln!("Error: {} is not a file, skipped", path.display());
continue;
}
let to = match to.as_str() {
"auto" => StoreType::Auto,
"loose" => StoreType::Loose,
"packs" => StoreType::Packs,
_ => {
eprintln!("unknown store '{to}', expect 'auto', 'loose' or 'packs'");
std::process::exit(1);
}
};
let (hash_hex, filename, expected_size) = add_file(&path, cnt, &to)?;
println!(
"{} - {}: {}",
hash_hex,
filename,
human_bytes(expected_size as f64)
);
}
}
Commands::Inspect { storage_type } => match storage_type.as_str() {
"loose" => {
let cnt = Container::new(&cnt_path);
cnt.valid()?;
println!(" hash | size ");
for p in traverse_loose(&cnt).with_context(|| "traverse loose by iter")? {
let stat = fs::metadata(&p)?;
let size = stat.len();
println!("{} | {}", &p.display(), size);
}
}
"pack" => {
let cnt = Container::new(&cnt_path);
let db = cnt.packs_db();
let _ = print_table(&db);
}
_ => {
eprintln!("unknown store '{storage_type}', expect 'auto', 'loose' or 'packs'");
std::process::exit(1);
}
},
Commands::Optimize { cmd } => {
match cmd {
OptimizeCommands::Pack {
no_compress,
no_clean,
} => {
let cnt = Container::new(&cnt_path);
let cnt = match cnt.valid() {
Ok(cnt) => cnt,
Err(e) => anyhow::bail!(e),
};
let compression = if no_compress {
Compression::from_str("none")?
} else {
Compression::from_str(DEFAULT_COMPRESSION_ALGORITHM)?
};
crate::maintain::_pack_loose_internal(cnt, &compression).unwrap_or_else(
|err| {
eprintln!("failed on pack loose {err}");
std::process::exit(1);
},
);
if !no_clean {
todo!()
}
}
OptimizeCommands::Repack { compression } => {
todo!()
}
}
}
Commands::CatFile { id, from } => {
let cnt = crate::Container::new(&cnt_path);
let from = match from.as_str() {
"auto" => StoreType::Auto,
"loose" => StoreType::Loose,
"packs" => StoreType::Packs,
_ => {
eprintln!("unknown store '{from}', expect 'auto', 'loose' or 'packs'");
std::process::exit(1);
}
};
let mut to = std::io::stdout();
let n = extract(&id, &cnt, &from, &mut to)?;
if n.is_none() {
eprintln!("object {id} not found");
std::process::exit(1)
}
} };
Ok(())
}
#[cfg(test)]
mod tests {
use std::{collections::HashMap, io::Write};
use tempfile::NamedTempFile;
use crate::{
io_packs,
test_utils::{new_container, PACK_TARGET_SIZE},
};
use super::*;
#[test]
fn cli_add_ten_diff_objs_to_loose() {
let (_tmp_dir, cnt) = new_container(PACK_TARGET_SIZE, "none");
for i in 0..10 {
let mut tf = NamedTempFile::new().unwrap();
write!(tf, "test {i}").unwrap();
let fp = tf.into_temp_path();
add_file(&fp.to_path_buf(), &cnt, &StoreType::Loose).expect("unable to add file {i}");
}
let info = stat(&cnt).expect("fail to audit container stat");
assert_eq!(info.count.loose, 10);
}
#[test]
fn cli_add_ten_same_objs_to_loose() {
let (_tmp_dir, cnt) = new_container(PACK_TARGET_SIZE, "none");
for _i in 0..10 {
let mut tf = NamedTempFile::new().unwrap();
write!(tf, "test x").unwrap();
let fp = tf.into_temp_path();
let _ = add_file(&fp.to_path_buf(), &cnt, &StoreType::Loose)
.expect("unable to add file {i}");
}
let info = stat(&cnt).expect("fail to audit container stat");
assert_eq!(info.count.loose, 1);
}
#[test]
fn cli_add_ten_diff_objs_to_packs() -> anyhow::Result<()> {
let (_tmp_dir, cnt) = new_container(PACK_TARGET_SIZE, "none");
let orig_objs: HashMap<String, String> = (0..10)
.map(|i| {
let content = format!("test {i}");
let mut tf = NamedTempFile::new().unwrap();
write!(tf, "test {i}").unwrap();
let fp = tf.into_temp_path();
let (hash_hex, _, _) = add_file(&fp.to_path_buf(), &cnt, &StoreType::Packs)
.expect("add file to pack failed");
(hash_hex, content)
})
.collect();
for (hash_hex, expected_content) in orig_objs {
let obj = io_packs::extract(&hash_hex, &cnt)?.unwrap();
assert_eq!(
String::from_utf8(obj.try_into().unwrap()).unwrap(),
expected_content
);
}
let info = stat(&cnt).expect("fail to audit container stat");
assert_eq!(info.count.packs, 10);
Ok(())
}
#[test]
fn cli_add_ten_same_objs_to_packs() -> anyhow::Result<()> {
let (_tmp_dir, cnt) = new_container(PACK_TARGET_SIZE, "none");
let orig_objs: HashMap<String, String> = (0..10)
.map(|_| {
let content = "test".to_string();
let mut tf = NamedTempFile::new().unwrap();
write!(tf, "{content}").unwrap();
let fp = tf.into_temp_path();
let (hash_hex, _, _) = add_file(&fp.to_path_buf(), &cnt, &StoreType::Packs)
.expect("add file to pack failed");
(hash_hex, content)
})
.collect();
for (hash_hex, expected_content) in orig_objs {
let obj = io_packs::extract(&hash_hex, &cnt)?.unwrap();
assert_eq!(
String::from_utf8(obj.try_into().unwrap()).unwrap(),
expected_content
);
}
let info = stat(&cnt).expect("fail to audit container stat");
assert_eq!(info.count.packs, 1);
Ok(())
}
}