use core::fmt;
use std::fmt::Display;
use std::hash::Hash;
use std::io::{ErrorKind, SeekFrom};
use std::iter::Sum;
use std::ops::{Add, AddAssign, BitXor, Deref, Mul, Sub};
use std::{fs, io};
use byte_unit::Byte;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use bytesize::ByteSize;
use hex::FromHexError;
use itertools::{EitherOrBoth, Itertools};
use rayon::iter::{IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator};
use serde::*;
use smallvec::alloc::fmt::Formatter;
use smallvec::alloc::str::FromStr;
use crate::device::DiskDevices;
use crate::group::FileGroup;
use crate::log::{Log, LogExt};
use crate::path::Path;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Serialize, Deserialize)]
pub struct FilePos(pub u64);
impl FilePos {
pub fn zero() -> FilePos {
FilePos(0)
}
}
impl Display for FilePos {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<u64> for FilePos {
fn from(pos: u64) -> Self {
FilePos(pos)
}
}
impl From<usize> for FilePos {
fn from(pos: usize) -> Self {
FilePos(pos as u64)
}
}
impl From<FilePos> for u64 {
fn from(pos: FilePos) -> Self {
pos.0
}
}
impl From<FilePos> for usize {
fn from(pos: FilePos) -> Self {
pos.0 as usize
}
}
impl From<FilePos> for SeekFrom {
fn from(pos: FilePos) -> Self {
SeekFrom::Start(pos.0)
}
}
impl Add<FileLen> for FilePos {
type Output = FilePos;
fn add(self, rhs: FileLen) -> Self::Output {
FilePos(self.0 + rhs.0)
}
}
impl Sub<FileLen> for FilePos {
type Output = FilePos;
fn sub(self, rhs: FileLen) -> Self::Output {
FilePos(self.0 - rhs.0)
}
}
#[derive(
Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Deserialize, Serialize, Default,
)]
pub struct FileLen(pub u64);
impl FileLen {
pub const MAX: FileLen = FileLen(u64::MAX);
pub fn as_pos(self) -> FilePos {
FilePos(self.0)
}
}
impl From<u64> for FileLen {
fn from(l: u64) -> Self {
FileLen(l)
}
}
impl From<usize> for FileLen {
fn from(l: usize) -> Self {
FileLen(l as u64)
}
}
impl From<FileLen> for u64 {
fn from(l: FileLen) -> Self {
l.0
}
}
impl From<FileLen> for usize {
fn from(l: FileLen) -> Self {
l.0 as usize
}
}
impl Add for FileLen {
type Output = FileLen;
fn add(self, rhs: Self) -> Self::Output {
FileLen(self.0 + rhs.0)
}
}
impl AddAssign for FileLen {
fn add_assign(&mut self, rhs: Self) {
self.0 += rhs.0
}
}
impl Sub for FileLen {
type Output = FileLen;
fn sub(self, rhs: Self) -> Self::Output {
FileLen(self.0 - rhs.0)
}
}
impl Mul<u64> for FileLen {
type Output = FileLen;
fn mul(self, rhs: u64) -> Self::Output {
FileLen(self.0 * rhs)
}
}
impl Sum<FileLen> for FileLen {
fn sum<I: Iterator<Item = FileLen>>(iter: I) -> Self {
iter.fold(FileLen(0), |a, b| a + b)
}
}
impl FromStr for FileLen {
type Err = byte_unit::ByteError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let b = Byte::from_str(s)?;
Ok(FileLen(b.get_bytes() as u64))
}
}
impl Display for FileLen {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.pad(format!("{}", ByteSize(self.0)).as_str())
}
}
pub struct FileChunk<'a> {
pub path: &'a Path,
pub pos: FilePos,
pub len: FileLen,
}
impl FileChunk<'_> {
pub fn new(path: &Path, pos: FilePos, len: FileLen) -> FileChunk<'_> {
FileChunk { path, pos, len }
}
}
#[cfg(unix)]
pub type InodeId = u64;
#[cfg(windows)]
pub type InodeId = u128;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct FileId {
pub device: u64,
pub inode: InodeId,
}
impl FileId {
#[cfg(unix)]
pub fn new(file: &Path) -> io::Result<FileId> {
use std::os::unix::fs::MetadataExt;
match fs::metadata(file.to_path_buf()) {
Ok(metadata) => Ok(FileId {
inode: metadata.ino(),
device: metadata.dev(),
}),
Err(e) => Err(io::Error::new(
e.kind(),
format!("Failed to read metadata of {}: {}", file.display(), e),
)),
}
}
#[cfg(windows)]
pub fn new(file: &Path) -> io::Result<FileId> {
Self::from_file(&fs::File::open(file.to_path_buf())?).map_err(|_| {
io::Error::new(
ErrorKind::Other,
format!(
"Failed to read file identifier of {}: {}",
file.display(),
io::Error::last_os_error()
),
)
})
}
#[cfg(windows)]
pub fn from_file(file: &fs::File) -> io::Result<FileId> {
use std::os::windows::io::*;
use winapi::ctypes::c_void;
use winapi::um::fileapi::{
GetFileInformationByHandle, BY_HANDLE_FILE_INFORMATION, FILE_ID_INFO,
};
use winapi::um::minwinbase::FileIdInfo;
use winapi::um::winbase::GetFileInformationByHandleEx;
let handle = file.as_raw_handle();
unsafe {
let mut file_id: FILE_ID_INFO = std::mem::zeroed();
let file_id_ptr = (&mut file_id) as *mut _ as *mut c_void;
const FILE_ID_SIZE: u32 = std::mem::size_of::<FILE_ID_INFO>() as u32;
if GetFileInformationByHandleEx(handle, FileIdInfo, file_id_ptr, FILE_ID_SIZE) != 0 {
return Ok(FileId {
device: file_id.VolumeSerialNumber as u64,
inode: u128::from_be_bytes(file_id.FileId.Identifier),
});
}
let mut file_info: BY_HANDLE_FILE_INFORMATION = std::mem::zeroed();
let file_info_ptr = (&mut file_info) as *mut _;
if GetFileInformationByHandle(handle, file_info_ptr) != 0 {
return Ok(FileId {
device: file_info.dwVolumeSerialNumber as u64,
inode: ((file_info.nFileIndexHigh as u128) << 32)
| file_info.nFileIndexLow as u128,
});
}
Err(io::Error::new(
ErrorKind::Other,
format!(
"Failed to read file identifier: {}",
io::Error::last_os_error()
),
))
}
}
#[cfg(unix)]
pub fn from_metadata(metadata: &fs::Metadata) -> FileId {
use std::os::unix::fs::MetadataExt;
FileId {
inode: metadata.ino(),
device: metadata.dev(),
}
}
pub fn of(f: impl AsRef<FileId>) -> FileId {
*f.as_ref()
}
}
#[derive(Debug)]
pub struct FileMetadata {
id: FileId,
metadata: fs::Metadata,
}
impl FileMetadata {
pub fn new(path: &Path) -> io::Result<FileMetadata> {
let path_buf = path.to_path_buf();
let metadata = fs::metadata(path_buf).map_err(|e| {
io::Error::new(
e.kind(),
format!("Failed to read metadata of {}: {}", path.display(), e),
)
})?;
#[cfg(unix)]
let id = FileId::from_metadata(&metadata);
#[cfg(windows)]
let id = FileId::new(&path)?;
Ok(FileMetadata { id, metadata })
}
pub fn len(&self) -> FileLen {
FileLen(self.metadata.len())
}
pub fn file_id(&self) -> FileId {
self.id
}
pub fn device_id(&self) -> u64 {
self.id.device
}
pub fn inode_id(&self) -> InodeId {
self.id.inode
}
}
impl Deref for FileMetadata {
type Target = fs::Metadata;
fn deref(&self) -> &Self::Target {
&self.metadata
}
}
impl AsRef<FileId> for FileMetadata {
fn as_ref(&self) -> &FileId {
&self.id
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct FileInfo {
pub path: Path,
pub id: FileId,
pub(crate) len: FileLen,
pub(crate) location: u64,
}
const OFFSET_MASK: u64 = 0x0000FFFFFFFFFFFF;
#[cfg(target_os = "linux")]
const DEVICE_MASK: u64 = 0xFFFF000000000000;
impl FileInfo {
fn new(path: Path, devices: &DiskDevices) -> io::Result<FileInfo> {
let device_index = devices.get_by_path(&path).index as u64;
let metadata = FileMetadata::new(&path)?;
let file_len = metadata.len();
let id = metadata.id;
let inode_id = metadata.inode_id();
#[allow(clippy::unnecessary_cast)] Ok(FileInfo {
path,
id,
len: file_len,
location: device_index << 48 | (inode_id as u64) & OFFSET_MASK,
})
}
pub fn get_device_index(&self) -> usize {
(self.location >> 48) as usize
}
#[cfg(target_os = "linux")]
pub fn fetch_physical_location(&mut self) -> io::Result<u64> {
let new_location = get_physical_file_location(self.as_ref())?;
if let Some(new_location) = new_location {
self.location = self.location & DEVICE_MASK | (new_location >> 8) & OFFSET_MASK;
}
Ok(self.location)
}
}
impl AsRef<FileId> for FileInfo {
fn as_ref(&self) -> &FileId {
&self.id
}
}
impl AsRef<Path> for FileInfo {
fn as_ref(&self) -> &Path {
&self.path
}
}
impl From<FileInfo> for Path {
fn from(info: FileInfo) -> Self {
info.path
}
}
pub(crate) fn file_info_or_log_err(
file: Path,
devices: &DiskDevices,
log: &dyn Log,
) -> Option<FileInfo> {
match FileInfo::new(file, devices) {
Ok(info) => Some(info),
Err(e) if e.kind() == ErrorKind::NotFound => None,
Err(e) => {
log.warn(e);
None
}
}
}
#[cfg(target_os = "linux")]
pub(crate) fn get_physical_file_location(path: &Path) -> io::Result<Option<u64>> {
let mut extents = fiemap::fiemap(path.to_path_buf())?;
match extents.next() {
Some(fe) => Ok(Some(fe?.fe_physical)),
None => Ok(None),
}
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct FileHash(Box<[u8]>);
impl FileHash {
pub fn u128_prefix(&self) -> u128 {
self.0
.as_ref()
.read_u128::<LittleEndian>()
.expect("Hash must be at least 128-bit long")
}
}
pub trait AsFileHash {
fn as_file_hash(&self) -> &FileHash;
}
impl AsFileHash for FileHash {
fn as_file_hash(&self) -> &FileHash {
self
}
}
impl<T> AsFileHash for (T, FileHash) {
fn as_file_hash(&self) -> &FileHash {
&self.1
}
}
impl Display for FileHash {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.pad(hex::encode(&self.0).as_str())
}
}
impl From<&[u8]> for FileHash {
fn from(bytes: &[u8]) -> Self {
FileHash(bytes.into())
}
}
impl From<u128> for FileHash {
fn from(hash: u128) -> Self {
let mut bytes: Vec<u8> = vec![];
bytes.write_u128::<LittleEndian>(hash).unwrap();
FileHash(bytes.into_boxed_slice())
}
}
impl FromStr for FileHash {
type Err = FromHexError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(FileHash(hex::decode(s)?.into_boxed_slice()))
}
}
impl BitXor for FileHash {
type Output = Self;
fn bitxor(self, rhs: Self) -> Self::Output {
FileHash(
self.0
.iter()
.zip_longest(rhs.0.as_ref())
.map(|r| match r {
EitherOrBoth::Both(a, b) => a ^ b,
_ => 0,
})
.collect_vec()
.into_boxed_slice(),
)
}
}
impl Serialize for FileHash {
fn serialize<S>(&self, serializer: S) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where
S: Serializer,
{
serializer.collect_str(self.to_string().as_str())
}
}
impl<'de> Deserialize<'de> for FileHash {
fn deserialize<D>(deserializer: D) -> Result<FileHash, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
let h = FileHash::from_str(&s).map_err(de::Error::custom)?;
Ok(h)
}
}
pub(crate) trait FileCollection {
fn count(&self) -> usize;
fn total_size(&self) -> FileLen;
fn for_each_mut<OP>(&mut self, op: OP)
where
OP: Fn(&mut FileInfo) + Sync + Send;
}
impl FileCollection for Vec<FileInfo> {
fn count(&self) -> usize {
self.len()
}
fn total_size(&self) -> FileLen {
self.par_iter().map(|f| f.len).sum()
}
fn for_each_mut<OP>(&mut self, op: OP)
where
OP: Fn(&mut FileInfo) + Sync + Send,
{
self.par_iter_mut().for_each(op)
}
}
impl FileCollection for Vec<FileGroup<FileInfo>> {
fn count(&self) -> usize {
self.iter().map(|g| g.file_count()).sum()
}
fn total_size(&self) -> FileLen {
self.par_iter().map(|g| g.total_size()).sum()
}
fn for_each_mut<OP>(&mut self, op: OP)
where
OP: Fn(&mut FileInfo) + Sync + Send,
{
self.par_iter_mut().flat_map(|g| &mut g.files).for_each(op)
}
}
#[derive(Copy, Clone, Debug)]
pub(crate) enum FileAccess {
Sequential,
Random,
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_format_bytes() {
let file_len = FileLen(16000);
let human_readable = format!("{file_len}");
assert_eq!(human_readable, "16.0 KB");
}
}