use std::fs;
use std::path::Path;
use std::time::SystemTime;
use ignore::WalkBuilder;
use crate::error::SrcwalkError;
pub(crate) const SKIP_DIRS: &[&str] = &[
".git",
"node_modules",
"target",
"dist",
"build",
"__pycache__",
".pycache",
"vendor",
".next",
".nuxt",
"coverage",
".cache",
".tox",
".venv",
".eggs",
".mypy_cache",
".ruff_cache",
".pytest_cache",
".turbo",
".parcel-cache",
".svelte-kit",
"out",
".output",
".vercel",
".netlify",
".gradle",
".idea",
".scala-build",
"target",
".bloop",
".metals",
];
const MMAP_THRESHOLD: u64 = 16_384;
pub(crate) const MINIFIED_CHECK_THRESHOLD: u64 = 100_000;
pub(crate) enum FileBytes {
Heap(Vec<u8>),
Mmap(memmap2::Mmap),
}
impl std::ops::Deref for FileBytes {
type Target = [u8];
fn deref(&self) -> &[u8] {
match self {
FileBytes::Heap(v) => v,
FileBytes::Mmap(m) => m,
}
}
}
pub(crate) fn is_minified_filename(path: &Path) -> bool {
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
return false;
};
let Some(stem_end) = name.rfind('.') else {
return false;
};
let stem = &name[..stem_end];
let dot_min = std::path::Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("min"));
dot_min || stem.to_ascii_lowercase().ends_with("-min")
}
pub(crate) fn looks_minified(bytes: &[u8]) -> bool {
let sample = &bytes[..bytes.len().min(2048)];
let newlines = memchr::memchr_iter(b'\n', sample).count();
newlines < 4
}
pub(crate) fn read_file_bytes(path: &Path, size: u64) -> Option<FileBytes> {
if size == 0 {
return None;
}
if size < MMAP_THRESHOLD {
fs::read(path).ok().map(FileBytes::Heap)
} else {
let file = std::fs::File::open(path).ok()?;
unsafe { memmap2::Mmap::map(&file).ok().map(FileBytes::Mmap) }
}
}
pub(crate) fn walker(scope: &Path, glob: Option<&str>) -> Result<ignore::WalkParallel, SrcwalkError> {
let threads = std::env::var("SRCWALK_THREADS")
.ok()
.and_then(|v| v.parse::<usize>().ok())
.unwrap_or_else(|| {
std::thread::available_parallelism().map_or(4, |n| {
let logical = n.get();
if logical <= 8 {
logical
} else {
(logical * 3 / 4).min(24)
}
})
});
let mut builder = WalkBuilder::new(scope);
builder
.follow_links(true)
.hidden(false)
.git_ignore(false)
.git_global(false)
.git_exclude(false)
.ignore(false)
.parents(false)
.threads(threads)
.filter_entry(|entry| {
if entry.file_type().is_some_and(|ft| ft.is_dir()) {
if let Some(name) = entry.file_name().to_str() {
return !SKIP_DIRS.contains(&name);
}
}
true
});
if let Some(pattern) = glob {
if !pattern.is_empty() {
let mut overrides = ignore::overrides::OverrideBuilder::new(scope);
overrides
.add(pattern)
.map_err(|e| SrcwalkError::InvalidQuery {
query: pattern.to_string(),
reason: format!("invalid glob: {e}"),
})?;
builder.overrides(overrides.build().map_err(|e| SrcwalkError::InvalidQuery {
query: pattern.to_string(),
reason: format!("invalid glob: {e}"),
})?);
}
}
Ok(builder.build_parallel())
}
pub(crate) fn parse_pattern(query: &str) -> (&str, bool) {
if query.starts_with('/') && query.ends_with('/') && query.len() > 2 {
(&query[1..query.len() - 1], true)
} else {
(query, false)
}
}
pub(crate) fn file_metadata(path: &Path) -> (u32, SystemTime) {
match std::fs::metadata(path) {
Ok(meta) => {
let mtime = meta.modified().unwrap_or(SystemTime::UNIX_EPOCH);
let est_lines = (meta.len() / 40).max(1) as u32;
(est_lines, mtime)
}
Err(_) => (0, SystemTime::UNIX_EPOCH),
}
}