use std::borrow::Cow;
use std::fmt::Display;
use std::hash::{DefaultHasher, Hash, Hasher};
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::time::SystemTime;
use anyhow::Result;
use ignore::WalkState;
use itertools::zip_eq;
use owo_colors::OwoColorize;
use prek_consts::CONFIG_FILENAMES;
use rustc_hash::{FxHashMap, FxHashSet};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tracing::{debug, error, instrument, trace};
use crate::cli::run::Selectors;
use crate::config::{self, Config, read_config};
use crate::fs::Simplified;
use crate::git::GIT_ROOT;
use crate::hook::HookSpec;
use crate::hook::{self, Hook, HookBuilder, Repo};
use crate::store::{CacheBucket, Store};
use crate::{git, store, warn_user};
#[derive(Error, Debug)]
pub(crate) enum Error {
#[error(transparent)]
Config(#[from] config::Error),
#[error(transparent)]
Hook(#[from] hook::Error),
#[error(transparent)]
Git(#[from] anyhow::Error),
#[error(
"No `prek.toml` or `.pre-commit-config.yaml` found in the current directory or parent directories.\n\n{} If you just added one, rerun your command with the `--refresh` flag to rescan the workspace.",
"hint:".yellow().bold(),
)]
MissingConfigFile,
#[error("Hook `{hook}` not present in repo `{repo}`")]
HookNotFound { hook: String, repo: String },
#[error(transparent)]
Store(#[from] store::Error),
}
pub(crate) trait HookInitReporter {
fn on_clone_start(&self, repo: &str) -> usize;
fn on_clone_complete(&self, id: usize);
fn on_complete(&self);
}
#[derive(Clone)]
pub(crate) struct Project {
root: PathBuf,
config_path: PathBuf,
relative_path: PathBuf,
idx: usize,
config: Config,
repos: Vec<Arc<Repo>>,
}
impl std::fmt::Debug for Project {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Project")
.field("relative_path", &self.relative_path)
.field("idx", &self.idx)
.field("config", &self.config)
.field("repos", &self.repos)
.finish_non_exhaustive()
}
}
impl Display for Project {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.is_root() {
write!(f, ".")
} else {
write!(f, "{}", self.relative_path.display())
}
}
}
impl PartialEq for Project {
fn eq(&self, other: &Self) -> bool {
self.config_path == other.config_path
}
}
impl Eq for Project {}
impl Hash for Project {
fn hash<H: Hasher>(&self, state: &mut H) {
self.config_path.hash(state);
}
}
impl Project {
pub(crate) fn from_config_file(
config_path: Cow<'_, Path>,
root: Option<PathBuf>,
) -> Result<Self, Error> {
debug!(
path = %config_path.user_display(),
"Loading project configuration"
);
let mut config = read_config(&config_path)?;
let size = config.repos.len();
let config_dir = config_path
.parent()
.expect("config file must have a parent");
for repo in &mut config.repos {
if let config::Repo::Remote(remote) = repo {
let repo_path = Path::new(&remote.repo);
if !remote.repo.starts_with("http://")
&& !remote.repo.starts_with("https://")
&& repo_path.is_relative()
{
let resolved = config_dir.join(repo_path);
if resolved.is_dir() {
remote.repo = resolved.to_string_lossy().into_owned();
}
}
}
}
let root = root.unwrap_or_else(|| config_dir.to_path_buf());
Ok(Self {
root,
config,
config_path: config_path.into_owned(),
idx: 0,
relative_path: PathBuf::new(),
repos: Vec::with_capacity(size),
})
}
fn find_config(path: &Path) -> Option<PathBuf> {
for name in CONFIG_FILENAMES {
let file = path.join(name);
if file.is_file() {
return Some(file);
}
}
None
}
fn find_all_configs(path: &Path) -> Vec<(&'static str, PathBuf)> {
let mut configs = Vec::new();
for &name in CONFIG_FILENAMES {
let file = path.join(name);
if file.is_file() {
configs.push((name, file));
}
}
configs
}
pub(crate) fn from_directory(path: &Path) -> Result<Self, Error> {
let present = Self::find_all_configs(path);
let Some((_, selected)) = present.first() else {
return Err(Error::MissingConfigFile);
};
if present.len() > 1 {
let found = present
.iter()
.map(|(name, _)| format!("`{name}`"))
.collect::<Vec<_>>()
.join(", ");
warn_user!(
"Multiple configuration files found ({found}); using `{selected}`",
found = found,
selected = selected.display(),
);
}
Self::from_config_file(Cow::Borrowed(selected), None)
}
pub(crate) fn discover(config_file: Option<&Path>, dir: &Path) -> Result<Project, Error> {
let git_root = GIT_ROOT.as_ref().map_err(|e| Error::Git(e.into()))?;
if let Some(config) = config_file {
return Project::from_config_file(config.into(), Some(git_root.clone()));
}
let workspace_root = Workspace::find_root(None, dir)?;
debug!("Found project root at `{}`", workspace_root.user_display());
Project::from_directory(&workspace_root)
}
pub(crate) fn with_relative_path(&mut self, relative_path: PathBuf) {
self.relative_path = relative_path;
}
fn with_idx(&mut self, idx: usize) {
self.idx = idx;
}
pub(crate) fn config(&self) -> &Config {
&self.config
}
pub(crate) fn config_file(&self) -> &Path {
&self.config_path
}
pub(crate) fn path(&self) -> &Path {
&self.root
}
pub(crate) fn relative_path(&self) -> &Path {
&self.relative_path
}
pub(crate) fn is_root(&self) -> bool {
self.relative_path.as_os_str().is_empty()
}
pub(crate) fn depth(&self) -> usize {
self.relative_path.components().count()
}
pub(crate) fn idx(&self) -> usize {
self.idx
}
pub(crate) async fn init_hooks(
mut self,
store: &Store,
reporter: Option<&dyn HookInitReporter>,
) -> Result<Vec<Hook>, Error> {
self.init_repos(store, reporter).await?;
let project = Arc::new(self);
let hooks = project.internal_init_hooks().await?;
Ok(hooks)
}
async fn init_repos(
&mut self,
store: &Store,
reporter: Option<&dyn HookInitReporter>,
) -> Result<(), Error> {
let repos = {
let mut seen = FxHashSet::default();
let remotes_iter = self.config.repos.iter().filter_map(|repo| match repo {
config::Repo::Remote(repo) if seen.insert(repo.key()) => Some(repo),
_ => None,
});
let cloned_repos = store.clone_repos(remotes_iter, reporter).await?;
let mut remote_repos = FxHashMap::default();
for (key, path) in cloned_repos {
let repo = Arc::new(Repo::remote(
key.repo().to_string(),
key.rev().to_string(),
path,
)?);
remote_repos.insert(key, repo);
}
let mut repos = Vec::with_capacity(self.config.repos.len());
for repo in &self.config.repos {
match repo {
config::Repo::Remote(repo) => {
let key = repo.key();
let repo = remote_repos.get(&key).expect("repo not found");
repos.push(repo.clone());
}
config::Repo::Local(repo) => {
let repo = Repo::local(repo.hooks.clone());
repos.push(Arc::new(repo));
}
config::Repo::Meta(repo) => {
let repo = Repo::meta(repo.hooks.clone());
repos.push(Arc::new(repo));
}
config::Repo::Builtin(repo) => {
let repo = Repo::builtin(repo.hooks.clone());
repos.push(Arc::new(repo));
}
}
}
repos
};
self.repos = repos;
Ok(())
}
async fn internal_init_hooks(self: Arc<Self>) -> Result<Vec<Hook>, Error> {
let mut hooks = Vec::new();
for (repo_config, repo) in zip_eq(self.config.repos.iter(), self.repos.iter()) {
match repo_config {
config::Repo::Remote(repo_config) => {
for hook_config in &repo_config.hooks {
let Some(manifest_hook) = repo.get_hook(&hook_config.id) else {
return Err(Error::HookNotFound {
hook: hook_config.id.clone(),
repo: repo.to_string(),
});
};
let mut hook_spec = manifest_hook.clone();
hook_spec.apply_remote_hook_overrides(hook_config);
let builder = HookBuilder::new(
self.clone(),
Arc::clone(repo),
hook_spec,
hooks.len(),
);
let hook = builder.build().await?;
hooks.push(hook);
}
}
config::Repo::Local(repo_config) => {
for hook_config in &repo_config.hooks {
let hook_spec = HookSpec::from(hook_config.clone());
let builder = HookBuilder::new(
self.clone(),
Arc::clone(repo),
hook_spec,
hooks.len(),
);
let hook = builder.build().await?;
hooks.push(hook);
}
}
config::Repo::Meta(repo_config) => {
for hook_config in &repo_config.hooks {
let hook_spec = HookSpec::from(hook_config.clone());
let builder = HookBuilder::new(
self.clone(),
Arc::clone(repo),
hook_spec,
hooks.len(),
);
let hook = builder.build().await?;
hooks.push(hook);
}
}
config::Repo::Builtin(repo_config) => {
for hook_config in &repo_config.hooks {
let hook_spec = HookSpec::from(hook_config.clone());
let builder = HookBuilder::new(
self.clone(),
Arc::clone(repo),
hook_spec,
hooks.len(),
);
let hook = builder.build().await?;
hooks.push(hook);
}
}
}
}
Ok(hooks)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct CachedConfigFile {
path: PathBuf,
modified: SystemTime,
size: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub(crate) struct WorkspaceCache {
version: u32,
workspace_root: PathBuf,
created_at: SystemTime,
config_files: Vec<CachedConfigFile>,
}
impl WorkspaceCache {
const CURRENT_VERSION: u32 = 1;
const MAX_CACHE_AGE: u64 = 60 * 60;
fn new(workspace_root: PathBuf, projects: &[Project]) -> Self {
let mut config_files = Vec::new();
for project in projects {
if let Ok(metadata) = std::fs::metadata(&project.config_path) {
config_files.push(CachedConfigFile {
path: project.config_path.clone(),
modified: metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH),
size: metadata.len(),
});
}
}
Self {
version: Self::CURRENT_VERSION,
created_at: SystemTime::now(),
workspace_root,
config_files,
}
}
fn is_valid(&self) -> bool {
if let Ok(elapsed) = self.created_at.elapsed() {
if elapsed.as_secs() > Self::MAX_CACHE_AGE {
debug!(
"Cache is too old ({}s > {}s), invalidating",
elapsed.as_secs(),
Self::MAX_CACHE_AGE
);
return false;
}
}
for cached_file in &self.config_files {
if let Ok(metadata) = std::fs::metadata(&cached_file.path) {
let current_modified = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);
let current_size = metadata.len();
if current_modified != cached_file.modified || current_size != cached_file.size {
debug!(
path = %cached_file.path.display(),
"Config file changed, invalidating cache"
);
return false;
}
} else {
debug!(
path = %cached_file.path.display(),
"Config file no longer exists, invalidating cache"
);
return false;
}
}
if !self.workspace_root.exists() {
debug!("Workspace root no longer exists, invalidating cache");
return false;
}
true
}
fn cache_path(store: &Store, workspace_root: &Path) -> PathBuf {
let mut hasher = DefaultHasher::new();
workspace_root.hash(&mut hasher);
let digest = hex::encode(hasher.finish().to_le_bytes());
store
.cache_path(CacheBucket::Prek)
.join("workspace")
.join(digest)
}
fn load(store: &Store, workspace_root: &Path, refresh: bool) -> Option<Self> {
if refresh {
return None;
}
let cache_path = Self::cache_path(store, workspace_root);
match std::fs::read_to_string(&cache_path) {
Ok(content) => match serde_json::from_str::<Self>(&content) {
Ok(cache) => {
if cache.version == Self::CURRENT_VERSION && cache.is_valid() {
Some(cache)
} else {
let _ = std::fs::remove_file(&cache_path);
None
}
}
Err(e) => {
debug!("Failed to deserialize cache: {}", e);
let _ = std::fs::remove_file(&cache_path);
None
}
},
Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
Err(e) => {
debug!("Failed to read cache file: {}", e);
None
}
}
}
fn save(&self, store: &Store) -> Result<()> {
let cache_path = Self::cache_path(store, &self.workspace_root);
if let Some(parent) = cache_path.parent() {
std::fs::create_dir_all(parent)?;
}
let content = serde_json::to_string_pretty(self)?;
std::fs::write(&cache_path, content)?;
Ok(())
}
pub(crate) fn cached_config_paths(store: &Store) -> FxHashSet<PathBuf> {
let mut paths: FxHashSet<PathBuf> = FxHashSet::default();
let workspace_cache_root = store.cache_path(CacheBucket::Prek).join("workspace");
let entries = match fs_err::read_dir(&workspace_cache_root) {
Ok(entries) => entries,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return paths,
Err(err) => {
debug!(path = %workspace_cache_root.display(), %err, "Failed to read workspace cache directory for tracking bootstrap");
return paths;
}
};
for entry in entries {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
debug!(%err, "Failed to read workspace cache entry for tracking bootstrap");
continue;
}
};
let path = entry.path();
if !path.is_file() {
continue;
}
let content = match fs_err::read_to_string(&path) {
Ok(content) => content,
Err(err) => {
debug!(path = %path.display(), %err, "Failed to read workspace cache file for tracking bootstrap");
continue;
}
};
let cache: WorkspaceCache = match serde_json::from_str(&content) {
Ok(cache) => cache,
Err(err) => {
debug!(path = %path.display(), %err, "Failed to parse workspace cache file for tracking bootstrap");
continue;
}
};
if cache.version != WorkspaceCache::CURRENT_VERSION {
continue;
}
for file in cache.config_files {
paths.insert(file.path);
}
}
paths
}
}
pub(crate) struct Workspace {
root: PathBuf,
projects: Vec<Arc<Project>>,
all_projects: Vec<Project>,
}
impl Workspace {
pub(crate) fn find_root(config_file: Option<&Path>, dir: &Path) -> Result<PathBuf, Error> {
let git_root = GIT_ROOT.as_ref().map_err(|e| Error::Git(e.into()))?;
if config_file.is_some() {
return Ok(git_root.clone());
}
let workspace_root = dir
.ancestors()
.take_while(|p| git_root.parent().map(|root| *p != root).unwrap_or(true))
.find(|p| Project::find_config(p).is_some())
.ok_or(Error::MissingConfigFile)?
.to_path_buf();
debug!("Found workspace root at `{}`", workspace_root.display());
Ok(workspace_root)
}
#[instrument(level = "trace", skip(store, selectors))]
pub(crate) fn discover(
store: &Store,
root: PathBuf,
config: Option<PathBuf>,
selectors: Option<&Selectors>,
refresh: bool,
) -> Result<Self, Error> {
if let Some(config) = config {
let project = Project::from_config_file(config.into(), Some(root.clone()))?;
let arc_project = Arc::new(project.clone());
return Ok(Self {
root,
projects: vec![arc_project],
all_projects: vec![project],
});
}
let projects = if let Some(cache) = WorkspaceCache::load(store, &root, refresh) {
debug!("Loaded workspace from cache");
let projects: Result<Vec<_>, _> = cache
.config_files
.into_iter()
.map(
|config_file| match Project::from_config_file(config_file.path.into(), None) {
Ok(mut project) => {
let relative_path = project
.config_file()
.parent()
.and_then(|p| p.strip_prefix(&root).ok())
.expect("Entry path should be relative to the root")
.to_path_buf();
project.with_relative_path(relative_path);
Ok(project)
}
Err(e) => {
debug!("Failed to load cached project config: {}", e);
Err(e)
}
},
)
.collect();
match projects {
Ok(projects) if !projects.is_empty() => Some(projects),
_ => {
debug!("Cache invalid or empty, performing fresh discovery");
None
}
}
} else {
None
};
let mut all_projects = if let Some(projects) = projects {
projects
} else {
debug!("Performing fresh workspace discovery");
let projects = Self::discover_fresh(&root, selectors)?;
let cache = WorkspaceCache::new(root.clone(), &projects);
if let Err(e) = cache.save(store) {
debug!("Failed to save workspace cache: {}", e);
}
projects
};
Self::sort_and_index_projects(&mut all_projects);
let projects = if let Some(selectors) = selectors {
let selected = all_projects
.iter()
.filter(|p| selectors.matches_path(p.relative_path()))
.cloned()
.map(Arc::new)
.collect::<Vec<_>>();
if selected.is_empty() {
return Err(Error::MissingConfigFile);
}
selected
} else {
all_projects
.iter()
.cloned()
.map(Arc::new)
.collect::<Vec<_>>()
};
if projects.is_empty() {
return Err(Error::MissingConfigFile);
}
Ok(Self {
root,
projects,
all_projects,
})
}
fn discover_fresh(root: &Path, selectors: Option<&Selectors>) -> Result<Vec<Project>, Error> {
let projects = Mutex::new(Ok(Vec::new()));
let git_root = GIT_ROOT.as_ref().map_err(|e| Error::Git(e.into()))?;
let submodules = git::list_submodules(git_root).unwrap_or_else(|e| {
error!("Failed to list git submodules: {e}");
Vec::new()
});
ignore::WalkBuilder::new(root)
.follow_links(false)
.add_custom_ignore_filename(".prekignore")
.build_parallel()
.run(|| {
Box::new(|result| {
let Ok(entry) = result else {
return WalkState::Continue;
};
let Some(file_type) = entry.file_type() else {
return WalkState::Continue;
};
if !file_type.is_dir() {
return WalkState::Continue;
}
if entry.file_name().to_str().is_some_and(|filename| {
filename.starts_with("{{")
&& filename.ends_with("}}")
&& filename.contains("cookiecutter")
}) {
trace!(
path = %entry.path().user_display(),
"Skipping cookiecutter template directory"
);
return WalkState::Skip;
}
if submodules
.iter()
.any(|submodule| entry.path().starts_with(submodule))
{
trace!(
path = %entry.path().user_display(),
"Skipping git submodule"
);
return WalkState::Skip;
}
match Project::from_directory(entry.path()) {
Ok(mut project) => {
let relative_path = entry
.into_path()
.strip_prefix(root)
.expect("Entry path should be relative to the root")
.to_path_buf();
project.with_relative_path(relative_path);
if let Ok(projects) = projects.lock().unwrap().as_mut() {
projects.push(project);
}
}
Err(Error::MissingConfigFile) => {}
Err(e) => {
if let Some(selectors) = selectors {
let relative_path = entry
.path()
.strip_prefix(root)
.expect("Entry path should be relative to the root");
if selectors.matches_path(relative_path) {
*projects.lock().unwrap() = Err(e);
return WalkState::Quit;
}
}
error!(
path = %entry.path().user_display(),
"Skipping project due to error: {e}"
);
return WalkState::Skip;
}
}
WalkState::Continue
})
});
let projects = projects.into_inner().unwrap()?;
if projects.is_empty() {
return Err(Error::MissingConfigFile);
}
Ok(projects)
}
fn sort_and_index_projects(projects: &mut [Project]) {
projects.sort_by(|a, b| {
b.depth()
.cmp(&a.depth())
.then_with(|| a.relative_path.cmp(&b.relative_path))
});
for (idx, project) in projects.iter_mut().enumerate() {
project.with_idx(idx);
}
}
pub(crate) fn root(&self) -> &Path {
&self.root
}
pub(crate) fn projects(&self) -> &[Arc<Project>] {
&self.projects
}
pub(crate) fn all_projects(&self) -> &[Project] {
&self.all_projects
}
async fn init_repos(
&mut self,
store: &Store,
reporter: Option<&dyn HookInitReporter>,
) -> Result<(), Error> {
let project_repos = {
let mut seen = FxHashSet::default();
let remotes_iter = self
.projects
.iter()
.flat_map(|proj| proj.config.repos.iter())
.filter_map(|repo| match repo {
config::Repo::Remote(repo) if seen.insert(repo.key()) => Some(repo),
_ => None,
});
let cloned_repos = store.clone_repos(remotes_iter, reporter).await?;
let mut remote_repos = FxHashMap::default();
for (key, path) in cloned_repos {
let repo = Arc::new(Repo::remote(
key.repo().to_string(),
key.rev().to_string(),
path,
)?);
remote_repos.insert(key, repo);
}
self.projects
.iter()
.map(|project| {
let mut repos = Vec::with_capacity(project.config.repos.len());
for repo in &project.config.repos {
match repo {
config::Repo::Remote(repo) => {
let key = repo.key();
let repo = remote_repos.get(&key).expect("repo not found");
repos.push(repo.clone());
}
config::Repo::Local(repo) => {
let repo = Repo::local(repo.hooks.clone());
repos.push(Arc::new(repo));
}
config::Repo::Meta(repo) => {
let repo = Repo::meta(repo.hooks.clone());
repos.push(Arc::new(repo));
}
config::Repo::Builtin(repo) => {
let repo = Repo::builtin(repo.hooks.clone());
repos.push(Arc::new(repo));
}
}
}
Ok(repos)
})
.collect::<Result<Vec<_>, Error>>()?
};
for (project, repos) in zip_eq(self.projects.iter_mut(), project_repos) {
Arc::get_mut(project).unwrap().repos = repos;
}
Ok(())
}
pub(crate) async fn init_hooks(
&mut self,
store: &Store,
reporter: Option<&dyn HookInitReporter>,
) -> Result<Vec<Hook>, Error> {
self.init_repos(store, reporter).await?;
let mut hooks = Vec::new();
for project in &self.projects {
let project_hooks = Arc::clone(project).internal_init_hooks().await?;
hooks.extend(project_hooks);
}
reporter.map(HookInitReporter::on_complete);
Ok(hooks)
}
pub(crate) async fn check_configs_staged(&self) -> Result<()> {
let config_files = self
.projects
.iter()
.map(|project| project.config_file())
.collect::<Vec<_>>();
let non_staged = git::files_not_staged(&config_files).await?;
let git_root = GIT_ROOT.as_ref()?;
if !non_staged.is_empty() {
let non_staged = non_staged
.into_iter()
.map(|p| git_root.join(p))
.collect::<Vec<_>>();
match non_staged.as_slice() {
[filename] => anyhow::bail!(
"prek configuration file is not staged, run `{}` to stage it",
format!("git add {}", filename.user_display()).cyan()
),
_ => anyhow::bail!(
"The following configuration files are not staged, `git add` them first:\n{}",
non_staged
.iter()
.map(|p| format!(" {}", p.user_display()))
.collect::<Vec<_>>()
.join("\n")
),
}
}
Ok(())
}
}