use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use anyhow::Result;
#[cfg(not(unix))]
use anyhow::{anyhow, Context};
use serde::{Deserialize, Serialize};
#[cfg(unix)]
use super::file::calc_xxh128_from_file_with_callback;
#[cfg(not(unix))]
use super::file::calc_xxh128_with_callback;
use super::file::FileMeta;
use super::progress::ProgressTracker;
use crate::constants::META_VERSION;
#[cfg(not(unix))]
use crate::utils::{basename, should_skip_dir, should_skip_file};
#[cfg(not(unix))]
use std::fs;
#[cfg(not(unix))]
use std::fs::File;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirSnapshot {
pub dir_name: String,
pub dirs: Vec<DirSnapshot>,
pub files: Vec<FileMeta>,
#[serde(skip_serializing_if = "Option::is_none")]
pub v: Option<String>,
}
impl DirSnapshot {
pub fn build_root(path: &Path) -> Result<Self> {
let total_files = count_files(path)?;
let tracker = ProgressTracker::new(total_files, "构建中...");
let mut node = Self::build_node(path, &tracker)?;
node.v = Some(META_VERSION.to_string());
tracker.finish("构建完成");
Ok(node)
}
pub fn from_reader<R: std::io::Read>(reader: R) -> Result<Self> {
Ok(serde_json::from_reader(reader)?)
}
#[cfg(unix)]
fn build_node(path: &Path, tracker: &ProgressTracker) -> Result<Self> {
unix_walk::build_node(path, tracker)
}
#[cfg(not(unix))]
fn build_node(path: &Path, tracker: &ProgressTracker) -> Result<Self> {
let dir_name = path
.file_name()
.map(basename)
.unwrap_or_else(|| path.to_string_lossy().to_string());
let mut dirs = Vec::new();
let mut files = Vec::new();
let mut entries = fs::read_dir(path)
.with_context(|| format!("无法遍历目录: {}", path.display()))?
.collect::<Result<Vec<_>, _>>()
.with_context(|| format!("读取目录失败: {}", path.display()))?;
entries.sort_unstable_by_key(|e| e.file_name());
for entry in entries {
let file_name = entry.file_name();
let name = file_name.to_string_lossy().to_string();
let full_path = entry.path();
let file_type = entry
.file_type()
.with_context(|| format!("无法读取类型: {}", full_path.display()))?;
if file_type.is_symlink() {
continue;
}
if file_type.is_dir() {
if should_skip_dir(&name) {
continue;
}
let sub_meta = full_path.join("meta.json");
if sub_meta.exists() {
dirs.push(Self::verify_and_load(&full_path, tracker)?);
} else {
dirs.push(Self::build_node(&full_path, tracker)?);
}
continue;
}
if should_skip_file(&name) {
continue;
}
let file_size = entry.metadata().map(|m| m.len()).unwrap_or(0);
tracker.start_file(file_size, &name);
let on_bytes = tracker.bytes_callback();
let on_iop = tracker.iop_callback();
let meta = FileMeta::from_path_with_callback(&full_path, on_bytes, on_iop)?;
files.push(meta);
tracker.finish_file();
}
Ok(Self {
dir_name,
dirs,
files,
v: None,
})
}
pub fn collect_file_map(&self, root: &Path) -> BTreeMap<PathBuf, FileMeta> {
let mut map = BTreeMap::new();
self.collect_into(root.to_path_buf(), &mut map);
map
}
fn collect_into(&self, current: PathBuf, map: &mut BTreeMap<PathBuf, FileMeta>) {
for file in &self.files {
map.insert(current.join(&file.basename), file.clone());
}
for dir in &self.dirs {
let next = current.join(&dir.dir_name);
dir.collect_into(next, map);
}
}
#[cfg(not(unix))]
fn verify_and_load(path: &Path, tracker: &ProgressTracker) -> Result<Self> {
let meta_path = path.join("meta.json");
let meta_file =
File::open(&meta_path).with_context(|| format!("无法读取: {}", meta_path.display()))?;
let mut snapshot: Self = serde_json::from_reader(meta_file)
.with_context(|| format!("无法解析: {}", meta_path.display()))?;
let mut stored = snapshot.collect_file_map(path);
let mut current = BTreeMap::new();
walk_dir_with_progress(path, &mut current, tracker)?;
for (file_path, hash) in current {
if let Some(meta) = stored.remove(&file_path) {
if hash != meta.xxh128 {
return Err(anyhow!(
"校验失败: {}\n 期望: {}\n 当前: {}",
file_path.display(),
meta.xxh128,
hash
));
}
} else {
return Err(anyhow!("文件新增: {}", file_path.display()));
}
}
if let Some((missing_path, _)) = stored.into_iter().next() {
return Err(anyhow!("文件缺失: {}", missing_path.display()));
}
let msg = format!("✓ 校验通过: {}", path.display());
if let Some(multi) = tracker.multi() {
multi.suspend(|| {
eprintln!("{msg}");
});
} else {
eprintln!("{msg}");
}
snapshot.dir_name = path
.file_name()
.map(basename)
.unwrap_or_else(|| path.to_string_lossy().to_string());
snapshot.v = None;
Ok(snapshot)
}
}
pub fn scan_dir_xxh128(path: &Path) -> Result<BTreeMap<PathBuf, String>> {
let total_files = count_files(path)?;
let tracker = ProgressTracker::new(total_files, "扫描中...");
let mut map = BTreeMap::new();
walk_dir_with_progress(path, &mut map, &tracker)?;
tracker.finish("扫描完成");
Ok(map)
}
fn count_files(path: &Path) -> Result<u64> {
#[cfg(unix)]
{
unix_walk::count_files(path)
}
#[cfg(not(unix))]
{
let mut count = 0u64;
count_files_recursive(path, &mut count)?;
Ok(count)
}
}
#[cfg(not(unix))]
fn count_files_recursive(path: &Path, count: &mut u64) -> Result<()> {
let entries = fs::read_dir(path)
.with_context(|| format!("无法遍历目录: {}", path.display()))?
.collect::<Result<Vec<_>, _>>()
.with_context(|| format!("读取目录失败: {}", path.display()))?;
for entry in entries {
let file_name = entry.file_name();
let name = file_name.to_string_lossy().to_string();
let full_path = entry.path();
let file_type = entry
.file_type()
.with_context(|| format!("无法读取类型: {}", full_path.display()))?;
if file_type.is_symlink() {
continue;
}
if file_type.is_dir() {
if should_skip_dir(&name) {
continue;
}
count_files_recursive(&full_path, count)?;
} else if !should_skip_file(&name) {
*count += 1;
}
}
Ok(())
}
fn walk_dir_with_progress(
path: &Path,
map: &mut BTreeMap<PathBuf, String>,
tracker: &ProgressTracker,
) -> Result<()> {
#[cfg(unix)]
{
unix_walk::walk_dir_with_progress(path, map, tracker)
}
#[cfg(not(unix))]
{
let mut entries = fs::read_dir(path)
.with_context(|| format!("无法遍历目录: {}", path.display()))?
.collect::<Result<Vec<_>, _>>()
.with_context(|| format!("读取目录失败: {}", path.display()))?;
entries.sort_unstable_by_key(|e| e.file_name());
for entry in entries {
let file_name = entry.file_name();
let name = file_name.to_string_lossy().to_string();
let full_path = entry.path();
let file_type = entry
.file_type()
.with_context(|| format!("无法读取类型: {}", full_path.display()))?;
if file_type.is_symlink() {
continue;
}
if file_type.is_dir() {
if should_skip_dir(&name) {
continue;
}
walk_dir_with_progress(&full_path, map, tracker)?;
continue;
}
if should_skip_file(&name) {
continue;
}
let file_size = entry.metadata().map(|m| m.len()).unwrap_or(0);
tracker.start_file(file_size, &name);
let on_bytes = tracker.bytes_callback();
let on_iop = tracker.iop_callback();
let hash = calc_xxh128_with_callback(&full_path, on_bytes, on_iop)?;
map.insert(full_path, hash);
tracker.finish_file();
}
Ok(())
}
}
#[cfg(unix)]
mod unix_walk {
use std::collections::BTreeMap;
use std::ffi::{CStr, CString, OsStr, OsString};
use std::fs::File;
use std::io;
use std::mem::MaybeUninit;
use std::os::fd::{AsRawFd, FromRawFd, RawFd};
use std::os::unix::ffi::{OsStrExt, OsStringExt};
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use anyhow::{anyhow, Context, Result};
use super::{calc_xxh128_from_file_with_callback, DirSnapshot, FileMeta, ProgressTracker};
use crate::utils::{basename, should_skip_dir, should_skip_file};
struct DirHandle {
file: File,
}
struct DirEntryInfo {
name: OsString,
stat: libc::stat,
}
enum EntryKind {
Directory,
RegularFile,
Symlink,
Other,
}
impl DirHandle {
fn open_path(path: &Path) -> Result<Self> {
let c_path = cstring_from_path(path)?;
let fd = unsafe { libc::open(c_path.as_ptr(), dir_open_flags()) };
if fd == -1 {
return Err(io::Error::last_os_error())
.with_context(|| format!("无法打开目录: {}", path.display()));
}
Ok(Self {
file: unsafe { File::from_raw_fd(fd) },
})
}
fn raw_fd(&self) -> RawFd {
self.file.as_raw_fd()
}
fn entries(&self, path: &Path) -> Result<Vec<DirEntryInfo>> {
let dup_fd = unsafe { libc::dup(self.raw_fd()) };
if dup_fd == -1 {
return Err(io::Error::last_os_error())
.with_context(|| format!("无法遍历目录: {}", path.display()));
}
let dir = unsafe { libc::fdopendir(dup_fd) };
if dir.is_null() {
let err = io::Error::last_os_error();
unsafe {
libc::close(dup_fd);
}
return Err(err).with_context(|| format!("无法遍历目录: {}", path.display()));
}
let _stream = DirStream(dir);
let mut entries = Vec::new();
loop {
let entry = unsafe { libc::readdir(dir) };
if entry.is_null() {
break;
}
let name_bytes = unsafe { CStr::from_ptr((*entry).d_name.as_ptr()) }.to_bytes();
if name_bytes == b"." || name_bytes == b".." {
continue;
}
let name = OsString::from_vec(name_bytes.to_vec());
let full_path = path.join(&name);
if let Some(stat) = self.stat_child(&name, &full_path)? {
entries.push(DirEntryInfo { name, stat });
}
}
entries.sort_unstable_by(|left, right| left.name.cmp(&right.name));
Ok(entries)
}
fn stat_child(&self, name: &OsStr, path: &Path) -> Result<Option<libc::stat>> {
let c_name = cstring_from_os_str(name, path)?;
let mut stat = MaybeUninit::<libc::stat>::uninit();
let code = unsafe {
libc::fstatat(
self.raw_fd(),
c_name.as_ptr(),
stat.as_mut_ptr(),
libc::AT_SYMLINK_NOFOLLOW,
)
};
if code == -1 {
let err = io::Error::last_os_error();
if err.kind() == io::ErrorKind::NotFound {
return Ok(None);
}
return Err(err).with_context(|| format!("无法读取类型: {}", path.display()));
}
Ok(Some(unsafe { stat.assume_init() }))
}
fn has_regular_child(&self, name: &OsStr, path: &Path) -> Result<bool> {
Ok(matches!(
self.stat_child(name, path)?
.map(|stat| kind_from_mode(stat.st_mode)),
Some(EntryKind::RegularFile)
))
}
fn open_child_dir(&self, entry: &DirEntryInfo, path: &Path) -> Result<Self> {
let c_name = cstring_from_os_str(&entry.name, path)?;
let fd = unsafe { libc::openat(self.raw_fd(), c_name.as_ptr(), dir_open_flags()) };
if fd == -1 {
return Err(io::Error::last_os_error())
.with_context(|| format!("无法打开目录: {}", path.display()));
}
let file = unsafe { File::from_raw_fd(fd) };
let info = file
.metadata()
.with_context(|| format!("无法读取目录信息: {}", path.display()))?;
if !stat_matches(&info, &entry.stat) {
return Err(anyhow!("扫描期间目录被替换: {}", path.display()));
}
Ok(Self { file })
}
fn open_child_file(&self, entry: &DirEntryInfo, path: &Path) -> Result<File> {
let c_name = cstring_from_os_str(&entry.name, path)?;
let fd = unsafe { libc::openat(self.raw_fd(), c_name.as_ptr(), file_open_flags()) };
if fd == -1 {
return Err(io::Error::last_os_error())
.with_context(|| format!("无法打开文件: {}", path.display()));
}
let file = unsafe { File::from_raw_fd(fd) };
let info = file
.metadata()
.with_context(|| format!("无法读取文件信息: {}", path.display()))?;
if !info.is_file() {
return Err(anyhow!("{} 打开后不是普通文件", path.display()));
}
if !stat_matches(&info, &entry.stat) {
return Err(anyhow!("扫描期间文件被替换: {}", path.display()));
}
Ok(file)
}
}
struct DirStream(*mut libc::DIR);
impl Drop for DirStream {
fn drop(&mut self) {
unsafe {
libc::closedir(self.0);
}
}
}
pub(super) fn build_node(path: &Path, tracker: &ProgressTracker) -> Result<DirSnapshot> {
let dir = DirHandle::open_path(path)?;
build_node_at(path, &dir, tracker)
}
pub(super) fn count_files(path: &Path) -> Result<u64> {
let dir = DirHandle::open_path(path)?;
let mut count = 0u64;
count_files_at(path, &dir, &mut count)?;
Ok(count)
}
pub(super) fn walk_dir_with_progress(
path: &Path,
map: &mut BTreeMap<PathBuf, String>,
tracker: &ProgressTracker,
) -> Result<()> {
let dir = DirHandle::open_path(path)?;
walk_dir_with_progress_at(path, &dir, map, tracker)
}
fn build_node_at(
path: &Path,
dir: &DirHandle,
tracker: &ProgressTracker,
) -> Result<DirSnapshot> {
let dir_name = path
.file_name()
.map(basename)
.unwrap_or_else(|| path.to_string_lossy().to_string());
let mut dirs = Vec::new();
let mut files = Vec::new();
for entry in dir.entries(path)? {
let name = entry.name.to_string_lossy().to_string();
let full_path = path.join(&entry.name);
match kind_from_mode(entry.stat.st_mode) {
EntryKind::Symlink => continue,
EntryKind::Directory => {
if should_skip_dir(&name) {
continue;
}
let child = dir.open_child_dir(&entry, &full_path)?;
let child_meta_path = full_path.join("meta.json");
if child.has_regular_child(OsStr::new("meta.json"), &child_meta_path)? {
dirs.push(verify_and_load_at(&full_path, &child, tracker)?);
} else {
dirs.push(build_node_at(&full_path, &child, tracker)?);
}
}
EntryKind::RegularFile => {
if should_skip_file(&name) {
continue;
}
let file_size = stat_size(&entry.stat);
tracker.start_file(file_size, &name);
let file = dir.open_child_file(&entry, &full_path)?;
let on_bytes = tracker.bytes_callback();
let on_iop = tracker.iop_callback();
let meta =
FileMeta::from_open_file_with_callback(&full_path, file, on_bytes, on_iop)?;
files.push(meta);
tracker.finish_file();
}
EntryKind::Other => {
if !should_skip_file(&name) {
return Err(anyhow!(
"不支持的特殊文件: {} (mode {:o})",
full_path.display(),
entry.stat.st_mode
));
}
}
}
}
Ok(DirSnapshot {
dir_name,
dirs,
files,
v: None,
})
}
fn verify_and_load_at(
path: &Path,
dir: &DirHandle,
tracker: &ProgressTracker,
) -> Result<DirSnapshot> {
let meta_name = OsStr::new("meta.json");
let meta_path = path.join(meta_name);
let Some(meta_stat) = dir.stat_child(meta_name, &meta_path)? else {
return build_node_at(path, dir, tracker);
};
let meta_entry = DirEntryInfo {
name: meta_name.to_os_string(),
stat: meta_stat,
};
let meta_file = dir.open_child_file(&meta_entry, &meta_path)?;
let mut snapshot: DirSnapshot = serde_json::from_reader(meta_file)
.with_context(|| format!("无法解析: {}", meta_path.display()))?;
let mut stored = snapshot.collect_file_map(path);
let mut current = BTreeMap::new();
walk_dir_with_progress_at(path, dir, &mut current, tracker)?;
for (file_path, hash) in current {
if let Some(meta) = stored.remove(&file_path) {
if hash != meta.xxh128 {
return Err(anyhow!(
"校验失败: {}\n 期望: {}\n 当前: {}",
file_path.display(),
meta.xxh128,
hash
));
}
} else {
return Err(anyhow!("文件新增: {}", file_path.display()));
}
}
if let Some((missing_path, _)) = stored.into_iter().next() {
return Err(anyhow!("文件缺失: {}", missing_path.display()));
}
let msg = format!("✓ 校验通过: {}", path.display());
if let Some(multi) = tracker.multi() {
multi.suspend(|| {
eprintln!("{msg}");
});
} else {
eprintln!("{msg}");
}
snapshot.dir_name = path
.file_name()
.map(basename)
.unwrap_or_else(|| path.to_string_lossy().to_string());
snapshot.v = None;
Ok(snapshot)
}
fn count_files_at(path: &Path, dir: &DirHandle, count: &mut u64) -> Result<()> {
for entry in dir.entries(path)? {
let name = entry.name.to_string_lossy().to_string();
let full_path = path.join(&entry.name);
match kind_from_mode(entry.stat.st_mode) {
EntryKind::Symlink => continue,
EntryKind::Directory => {
if should_skip_dir(&name) {
continue;
}
let child = dir.open_child_dir(&entry, &full_path)?;
count_files_at(&full_path, &child, count)?;
}
EntryKind::RegularFile => {
if !should_skip_file(&name) {
*count += 1;
}
}
EntryKind::Other => {
if !should_skip_file(&name) {
return Err(anyhow!(
"不支持的特殊文件: {} (mode {:o})",
full_path.display(),
entry.stat.st_mode
));
}
}
}
}
Ok(())
}
fn walk_dir_with_progress_at(
path: &Path,
dir: &DirHandle,
map: &mut BTreeMap<PathBuf, String>,
tracker: &ProgressTracker,
) -> Result<()> {
for entry in dir.entries(path)? {
let name = entry.name.to_string_lossy().to_string();
let full_path = path.join(&entry.name);
match kind_from_mode(entry.stat.st_mode) {
EntryKind::Symlink => continue,
EntryKind::Directory => {
if should_skip_dir(&name) {
continue;
}
let child = dir.open_child_dir(&entry, &full_path)?;
walk_dir_with_progress_at(&full_path, &child, map, tracker)?;
}
EntryKind::RegularFile => {
if should_skip_file(&name) {
continue;
}
let file_size = stat_size(&entry.stat);
tracker.start_file(file_size, &name);
let file = dir.open_child_file(&entry, &full_path)?;
let on_bytes = tracker.bytes_callback();
let on_iop = tracker.iop_callback();
let hash =
calc_xxh128_from_file_with_callback(&full_path, file, on_bytes, on_iop)?;
map.insert(full_path, hash);
tracker.finish_file();
}
EntryKind::Other => {
if !should_skip_file(&name) {
return Err(anyhow!(
"不支持的特殊文件: {} (mode {:o})",
full_path.display(),
entry.stat.st_mode
));
}
}
}
}
Ok(())
}
fn kind_from_mode(mode: libc::mode_t) -> EntryKind {
match mode & libc::S_IFMT as libc::mode_t {
value if value == libc::S_IFDIR as libc::mode_t => EntryKind::Directory,
value if value == libc::S_IFREG as libc::mode_t => EntryKind::RegularFile,
value if value == libc::S_IFLNK as libc::mode_t => EntryKind::Symlink,
_ => EntryKind::Other,
}
}
fn stat_size(stat: &libc::stat) -> u64 {
if stat.st_size >= 0 {
stat.st_size as u64
} else {
0
}
}
fn stat_matches(info: &std::fs::Metadata, stat: &libc::stat) -> bool {
info.dev() == stat.st_dev as u64 && info.ino() == stat.st_ino
}
fn dir_open_flags() -> libc::c_int {
libc::O_RDONLY | libc::O_CLOEXEC | libc::O_DIRECTORY | libc::O_NOFOLLOW
}
fn file_open_flags() -> libc::c_int {
libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOFOLLOW
}
fn cstring_from_path(path: &Path) -> Result<CString> {
CString::new(path.as_os_str().as_bytes())
.with_context(|| format!("路径包含 NUL 字节: {}", path.display()))
}
fn cstring_from_os_str(value: &OsStr, path: &Path) -> Result<CString> {
CString::new(value.as_bytes())
.with_context(|| format!("路径包含 NUL 字节: {}", path.display()))
}
}