use camino::Utf8PathBuf;
use chrono::{DateTime, Utc};
use cordance_core::source::{SourceClass, SourceRecord};
use rayon::prelude::*;
use tracing::warn;
use crate::ScanError;
pub const PARALLEL_THRESHOLD: usize = 256;
type PreparedEntry = (Utf8PathBuf, String, Utf8PathBuf, Option<std::fs::Metadata>);
pub fn walk(root: &Utf8PathBuf) -> Result<Vec<SourceRecord>, ScanError> {
let prepared = discover(root)?;
let mut records: Vec<SourceRecord> = if prepared.len() > PARALLEL_THRESHOLD {
prepared.into_par_iter().map(process_entry).collect()
} else {
prepared.into_iter().map(process_entry).collect()
};
records.sort_by(|a, b| a.path.cmp(&b.path));
Ok(records)
}
fn discover(root: &Utf8PathBuf) -> Result<Vec<PreparedEntry>, ScanError> {
let walker = ignore::WalkBuilder::new(root.as_std_path())
.hidden(false)
.build();
let mut prepared: Vec<PreparedEntry> = Vec::new();
for entry in walker {
let entry = entry.map_err(|e| {
let io = e.io_error().map_or_else(
|| std::io::Error::other(e.to_string()),
|io| std::io::Error::from(io.kind()),
);
ScanError::Io {
path: root.clone(),
source: io,
}
})?;
if entry.file_type().is_none_or(|ft| ft.is_dir()) {
continue;
}
let abs_path: Utf8PathBuf = entry
.path()
.to_str()
.ok_or_else(|| ScanError::NonUtf8Path(entry.path().to_string_lossy().into_owned()))
.map(Utf8PathBuf::from)?;
let relative_native = abs_path
.as_std_path()
.strip_prefix(root.as_std_path())
.unwrap_or(abs_path.as_std_path());
let rel_str = relative_native
.to_str()
.ok_or_else(|| ScanError::NonUtf8Path(relative_native.to_string_lossy().into_owned()))?
.replace('\\', "/");
let rel_path = Utf8PathBuf::from(&rel_str);
let meta = match entry.metadata() {
Ok(m) => Some(m),
Err(e) => {
warn!("failed to read metadata for {abs_path}: {e}");
None
}
};
prepared.push((abs_path, rel_str, rel_path, meta));
}
Ok(prepared)
}
fn process_entry(prepared: PreparedEntry) -> SourceRecord {
let (abs_path, rel_str, rel_path, meta) = prepared;
if crate::blocked::is_blocked(&rel_str) {
let size_bytes = meta.as_ref().map_or(0, std::fs::Metadata::len);
return SourceRecord {
id: SourceRecord::stable_id(SourceClass::BlockedSurface, &rel_path),
path: rel_path,
class: SourceClass::BlockedSurface,
sha256: String::new(),
size_bytes,
modified: None,
blocked: true,
blocked_reason: crate::blocked::block_reason(&rel_str).map(str::to_string),
};
}
let sha256 = match crate::hasher::sha256_file(&abs_path) {
Ok(h) => h,
Err(e) => {
warn!(path = %abs_path, error = %e, "hash failed");
let size_bytes = meta.as_ref().map_or(0, std::fs::Metadata::len);
return SourceRecord {
id: SourceRecord::stable_id(SourceClass::BlockedSurface, &rel_path),
path: rel_path,
class: SourceClass::BlockedSurface,
sha256: String::new(),
size_bytes,
modified: None,
blocked: true,
blocked_reason: Some("hash failed (see logs)".into()),
};
}
};
let (size_bytes, modified) = meta.map_or((0, None), |m| {
let modified: Option<DateTime<Utc>> = m.modified().ok().map(DateTime::from);
(m.len(), modified)
});
let class = crate::classifier::classify(&rel_str);
SourceRecord {
id: SourceRecord::stable_id(class, &rel_path),
path: rel_path,
class,
sha256,
size_bytes,
modified,
blocked: false,
blocked_reason: None,
}
}