use std::{
fs,
io::{self, Read as _, Write as _},
path::{Path, PathBuf},
};
use crate::BUFFER_CAPACITY_MAX_DEFAULT;
#[cfg(feature = "async-std")]
pub mod async_std {
pub use crate::async_std::merge::MergeAsyncExt;
}
#[cfg(feature = "tokio")]
pub mod tokio {
pub use crate::tokio::merge::MergeAsyncExt;
}
#[derive(Debug, Clone)]
pub struct Merge {
pub in_dir: Option<PathBuf>,
pub out_file: Option<PathBuf>,
pub cap_max: usize,
}
impl Merge {
pub fn new() -> Self {
Self {
in_dir: None,
out_file: None,
cap_max: BUFFER_CAPACITY_MAX_DEFAULT,
}
}
pub fn from<P: Into<Merge>>(process: P) -> Self {
process.into()
}
pub fn in_dir<InDir: AsRef<Path>>(
mut self,
path: InDir,
) -> Self {
self.in_dir = Some(path.as_ref().to_path_buf());
self
}
pub fn out_file<OutFile: AsRef<Path>>(
mut self,
path: OutFile,
) -> Self {
self.out_file = Some(path.as_ref().to_path_buf());
self
}
pub fn max_buffer_capacity(
mut self,
capacity: usize,
) -> Self {
self.cap_max = capacity;
self
}
pub fn run(&self) -> io::Result<bool> {
let in_dir: &Path = match self.in_dir {
| Some(ref p) => {
let p: &Path = p.as_ref();
if !p.exists() {
return Err(io::Error::new(
io::ErrorKind::NotFound,
"in_dir path not found",
));
}
if !p.is_dir() {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"in_dir is not a directory",
));
}
p
},
| None => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"in_dir is not set",
));
},
};
let out_file: &Path = match self.out_file {
| Some(ref p) => p.as_ref(),
| None => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"out_file is not set",
));
},
};
let input_size: usize = if let Some(file) = fs::read_dir(in_dir)?
.filter_map(Result::ok)
.filter(|entry| entry.path().is_file())
.map(|entry| entry.path())
.next()
{
fs::metadata(file)?.len() as usize
} else {
return Err(io::Error::new(
io::ErrorKind::NotFound,
"No files found in in_dir",
));
};
let buffer_capacity: usize = input_size.min(self.cap_max);
if out_file.exists() {
if out_file.is_dir() {
fs::remove_dir_all(out_file)?;
} else {
fs::remove_file(out_file)?;
}
}
if let Some(parent) = out_file.parent() {
fs::create_dir_all(parent)?;
}
let output: fs::File = fs::OpenOptions::new()
.create(true)
.truncate(false)
.write(true)
.open(out_file)?;
let mut writer: io::BufWriter<fs::File> =
io::BufWriter::with_capacity(buffer_capacity, output);
let mut entries: Vec<PathBuf> = fs::read_dir(in_dir)?
.filter_map(Result::ok)
.filter(|entry| entry.path().is_file())
.map(|entry| entry.path())
.collect();
entries.sort_by_key(|entry| {
entry
.file_name()
.unwrap()
.to_str()
.unwrap()
.parse::<usize>()
.unwrap()
});
for entry in entries {
let input: fs::File =
fs::OpenOptions::new().read(true).open(&entry)?;
let mut reader: io::BufReader<fs::File> =
io::BufReader::with_capacity(buffer_capacity, input);
let mut buffer: Vec<u8> = vec![0; buffer_capacity];
loop {
let read: usize = reader.read(&mut buffer)?;
if read == 0 {
break;
}
writer.write_all(&buffer[..read])?;
}
}
writer.flush()?;
Ok(true)
}
}
impl Default for Merge {
fn default() -> Self {
Self::new()
}
}