use std::collections::HashMap;
use std::path::{Component, Path, PathBuf};
use std::sync::Arc;
use tracing::{debug, info};
use crate::datastore::DataStore;
use crate::fs::Fs;
use crate::packs::Pack;
use crate::paths::Pather;
use crate::preprocessing::baseline::{cache_filename_for, hex_sha256, Baseline};
use crate::preprocessing::divergence::DivergenceState;
use crate::preprocessing::PreprocessorRegistry;
use crate::rules::PackEntry;
use crate::{DodotError, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PreprocessMode {
Active,
Passive,
}
fn validate_safe_relative_path(path: &Path, preprocessor: &str, source_file: &Path) -> Result<()> {
let mut has_normal = false;
for component in path.components() {
match component {
Component::Normal(_) => has_normal = true,
Component::CurDir => {}
Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
return Err(DodotError::PreprocessorError {
preprocessor: preprocessor.into(),
source_file: source_file.to_path_buf(),
message: format!(
"unsafe path in preprocessor output: {} (absolute or contains `..`)",
path.display()
),
});
}
}
}
if !has_normal {
return Err(DodotError::PreprocessorError {
preprocessor: preprocessor.into(),
source_file: source_file.to_path_buf(),
message: format!(
"preprocessor produced an empty output path (\"{}\"). This usually means a file like \
`.tmpl` or `.identity` has no stem after stripping the preprocessor extension — \
rename the source file so that it has a non-empty name after stripping.",
path.display()
),
});
}
Ok(())
}
fn normalize_relative(path: &Path) -> PathBuf {
let mut out = PathBuf::new();
for component in path.components() {
if let Component::Normal(n) = component {
out.push(n);
}
}
out
}
#[derive(Debug)]
pub struct PreprocessResult {
pub regular_entries: Vec<PackEntry>,
pub virtual_entries: Vec<PackEntry>,
pub source_map: HashMap<PathBuf, PathBuf>,
pub rendered_bytes: HashMap<PathBuf, Arc<[u8]>>,
pub skipped: Vec<SkippedRender>,
}
#[derive(Debug, Clone)]
pub struct SkippedRender {
pub pack: String,
pub virtual_relative: PathBuf,
pub deployed_path: PathBuf,
pub state: DivergenceState,
}
impl PreprocessResult {
pub fn passthrough(entries: Vec<PackEntry>) -> Self {
Self {
regular_entries: entries,
virtual_entries: Vec::new(),
source_map: HashMap::new(),
rendered_bytes: HashMap::new(),
skipped: Vec::new(),
}
}
pub fn merged_entries(&self) -> Vec<PackEntry> {
let mut all = Vec::with_capacity(self.regular_entries.len() + self.virtual_entries.len());
all.extend(self.regular_entries.iter().cloned());
all.extend(self.virtual_entries.iter().cloned());
all.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
all
}
}
const PREPROCESSED_HANDLER: &str = "preprocessed";
enum DivergenceCheck {
Proceed,
Skip {
state: DivergenceState,
deployed_path: PathBuf,
},
}
fn check_divergence(
fs: &dyn Fs,
paths: &dyn Pather,
pack_name: &str,
virtual_relative: &Path,
source_path: &Path,
) -> Result<DivergenceCheck> {
let cache_filename = cache_filename_for(virtual_relative);
let baseline =
match Baseline::load(fs, paths, pack_name, PREPROCESSED_HANDLER, &cache_filename)? {
Some(b) => b,
None => return Ok(DivergenceCheck::Proceed),
};
let deployed_path = paths
.handler_data_dir(pack_name, PREPROCESSED_HANDLER)
.join(virtual_relative);
if !fs.exists(&deployed_path) {
return Ok(DivergenceCheck::Proceed);
}
let deployed_bytes = fs.read_file(&deployed_path)?;
if hex_sha256(&deployed_bytes) == baseline.rendered_hash {
return Ok(DivergenceCheck::Proceed);
}
let source_changed = match fs.read_file(source_path) {
Ok(bytes) => hex_sha256(&bytes) != baseline.source_hash,
Err(_) => false,
};
let state = if source_changed {
DivergenceState::BothChanged
} else {
DivergenceState::OutputChanged
};
Ok(DivergenceCheck::Skip {
state,
deployed_path,
})
}
#[allow(clippy::too_many_arguments)] pub fn preprocess_pack(
entries: Vec<PackEntry>,
registry: &PreprocessorRegistry,
pack: &Pack,
fs: &dyn Fs,
datastore: &dyn DataStore,
paths: &dyn Pather,
mode: PreprocessMode,
force: bool,
) -> Result<PreprocessResult> {
let mut regular_entries = Vec::new();
let mut preprocessor_entries = Vec::new();
for entry in entries {
if entry.gate_failure.is_some() {
regular_entries.push(entry);
continue;
}
let filename = entry
.relative_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
if !entry.is_dir && registry.is_preprocessor_file(&filename) {
preprocessor_entries.push(entry);
} else {
regular_entries.push(entry);
}
}
debug!(
pack = %pack.name,
preprocessor = preprocessor_entries.len(),
regular = regular_entries.len(),
"partitioned entries"
);
if preprocessor_entries.is_empty() {
return Ok(PreprocessResult {
regular_entries,
virtual_entries: Vec::new(),
source_map: HashMap::new(),
rendered_bytes: HashMap::new(),
skipped: Vec::new(),
});
}
if mode == PreprocessMode::Passive {
return preprocess_pack_passive(
preprocessor_entries,
regular_entries,
registry,
pack,
fs,
paths,
);
}
let mut virtual_entries = Vec::new();
let mut source_map = HashMap::new();
let mut rendered_bytes: HashMap<PathBuf, Arc<[u8]>> = HashMap::new();
let mut skipped: Vec<SkippedRender> = Vec::new();
let mut claimed_paths: std::collections::HashSet<PathBuf> = regular_entries
.iter()
.map(|e| e.relative_path.clone())
.collect();
for entry in &preprocessor_entries {
let filename = entry
.relative_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
let preprocessor = registry
.find_for_file(&filename)
.expect("already checked in partition");
info!(
pack = %pack.name,
preprocessor = preprocessor.name(),
file = %filename,
"expanding"
);
if preprocessor.supports_reverse_merge() {
let source_bytes = fs.read_file(&entry.absolute_path)?;
let source_str = String::from_utf8_lossy(&source_bytes);
crate::preprocessing::conflict::ensure_no_unresolved_markers(
&source_str,
&entry.absolute_path,
)?;
}
let expanded_files = preprocessor.expand(&entry.absolute_path, fs)?;
for expanded in expanded_files {
validate_safe_relative_path(
&expanded.relative_path,
preprocessor.name(),
&entry.absolute_path,
)?;
let virtual_relative = if let Some(parent) = entry.relative_path.parent() {
if parent == Path::new("") {
expanded.relative_path.clone()
} else {
parent.join(&expanded.relative_path)
}
} else {
expanded.relative_path.clone()
};
validate_safe_relative_path(
&virtual_relative,
preprocessor.name(),
&entry.absolute_path,
)?;
let virtual_relative = normalize_relative(&virtual_relative);
if claimed_paths.contains(&virtual_relative) {
return Err(DodotError::PreprocessorCollision {
pack: pack.name.clone(),
source_file: filename.clone(),
expanded_name: virtual_relative.to_string_lossy().into_owned(),
});
}
let mut skip_path: Option<PathBuf> = None;
let participates_in_divergence_guard =
expanded.tracked_render.is_some() || expanded.deploy_mode.is_some();
if !force && !expanded.is_dir && participates_in_divergence_guard {
match check_divergence(
fs,
paths,
&pack.name,
&virtual_relative,
&entry.absolute_path,
)? {
DivergenceCheck::Proceed => {}
DivergenceCheck::Skip {
state,
deployed_path,
} => {
info!(
pack = %pack.name,
file = %virtual_relative.display(),
?state,
"preserving divergent deployed file (skipping write)"
);
skipped.push(SkippedRender {
pack: pack.name.clone(),
virtual_relative: virtual_relative.clone(),
deployed_path: deployed_path.clone(),
state,
});
skip_path = Some(deployed_path);
}
}
}
let was_skipped = skip_path.is_some();
let datastore_path = if let Some(p) = skip_path {
p
} else if expanded.is_dir {
datastore.write_rendered_dir(
&pack.name,
PREPROCESSED_HANDLER,
&virtual_relative.to_string_lossy(),
)?
} else if let Some(mode) = expanded.deploy_mode {
datastore.write_rendered_file_with_mode(
&pack.name,
PREPROCESSED_HANDLER,
&virtual_relative.to_string_lossy(),
&expanded.content,
mode,
)?
} else {
datastore.write_rendered_file(
&pack.name,
PREPROCESSED_HANDLER,
&virtual_relative.to_string_lossy(),
&expanded.content,
)?
};
debug!(
pack = %pack.name,
virtual_path = %virtual_relative.display(),
datastore_path = %datastore_path.display(),
is_dir = expanded.is_dir,
skipped = was_skipped,
"wrote expanded entry"
);
let should_write_baseline = !expanded.is_dir
&& !was_skipped
&& (expanded.tracked_render.is_some() || expanded.deploy_mode.is_some());
if should_write_baseline {
let cache_filename = cache_filename_for(&virtual_relative);
let source_bytes = fs.read_file(&entry.absolute_path)?;
let baseline = Baseline::build(
&entry.absolute_path,
&expanded.content,
&source_bytes,
expanded.tracked_render.as_deref(),
expanded.context_hash.as_ref(),
);
if let Err(err) =
baseline.write(fs, paths, &pack.name, PREPROCESSED_HANDLER, &cache_filename)
{
debug!(
pack = %pack.name,
file = %cache_filename,
error = %err,
"baseline write failed (non-fatal)"
);
} else {
debug!(
pack = %pack.name,
file = %cache_filename,
"baseline written"
);
}
let sidecar = crate::preprocessing::baseline::SecretsSidecar::new(
expanded.secret_line_ranges.clone(),
);
if let Err(err) =
sidecar.write(fs, paths, &pack.name, PREPROCESSED_HANDLER, &cache_filename)
{
debug!(
pack = %pack.name,
file = %cache_filename,
error = %err,
"secrets sidecar write failed (non-fatal)"
);
}
}
claimed_paths.insert(virtual_relative.clone());
source_map.insert(datastore_path.clone(), entry.absolute_path.clone());
if !expanded.is_dir {
let bytes: Arc<[u8]> = if was_skipped {
fs.read_file(&datastore_path)
.map(Arc::from)
.unwrap_or_else(|_| Arc::from(expanded.content.clone()))
} else {
Arc::from(expanded.content.clone())
};
rendered_bytes.insert(datastore_path.clone(), bytes);
}
virtual_entries.push(PackEntry {
relative_path: virtual_relative,
absolute_path: datastore_path,
is_dir: expanded.is_dir,
gate_failure: None,
});
}
}
info!(
pack = %pack.name,
virtual_count = virtual_entries.len(),
"preprocessing complete"
);
Ok(PreprocessResult {
regular_entries,
virtual_entries,
source_map,
rendered_bytes,
skipped,
})
}
fn preprocess_pack_passive(
preprocessor_entries: Vec<PackEntry>,
regular_entries: Vec<PackEntry>,
registry: &PreprocessorRegistry,
pack: &Pack,
fs: &dyn Fs,
paths: &dyn Pather,
) -> Result<PreprocessResult> {
let mut virtual_entries = Vec::new();
let mut source_map = HashMap::new();
let mut rendered_bytes: HashMap<PathBuf, Arc<[u8]>> = HashMap::new();
let mut skipped: Vec<SkippedRender> = Vec::new();
for entry in preprocessor_entries {
let filename = entry
.relative_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
let preprocessor = registry
.find_for_file(&filename)
.expect("already checked in partition");
let stripped = preprocessor.stripped_name(&filename);
let virtual_relative = match entry.relative_path.parent() {
Some(parent) if parent != Path::new("") => parent.join(&stripped),
_ => PathBuf::from(&stripped),
};
let virtual_relative = normalize_relative(&virtual_relative);
let datastore_path = paths
.handler_data_dir(&pack.name, PREPROCESSED_HANDLER)
.join(&virtual_relative);
let cache_filename = cache_filename_for(&virtual_relative);
let baseline =
match Baseline::load(fs, paths, &pack.name, PREPROCESSED_HANDLER, &cache_filename)? {
Some(b) => Some(b),
None => {
debug!(
pack = %pack.name,
file = %virtual_relative.display(),
"passive: no baseline yet — surfacing placeholder (run `dodot up` first)"
);
None
}
};
if baseline.is_some() {
if let Ok(DivergenceCheck::Skip {
state,
deployed_path,
}) = check_divergence(
fs,
paths,
&pack.name,
&virtual_relative,
&entry.absolute_path,
) {
skipped.push(SkippedRender {
pack: pack.name.clone(),
virtual_relative: virtual_relative.clone(),
deployed_path,
state,
});
}
}
if let Some(b) = baseline {
let bytes: Arc<[u8]> = Arc::from(b.rendered_content.into_bytes());
rendered_bytes.insert(datastore_path.clone(), bytes);
}
source_map.insert(datastore_path.clone(), entry.absolute_path.clone());
virtual_entries.push(PackEntry {
relative_path: virtual_relative,
absolute_path: datastore_path,
is_dir: false,
gate_failure: None,
});
}
info!(
pack = %pack.name,
virtual_count = virtual_entries.len(),
skipped_count = skipped.len(),
"passive preprocessing complete"
);
Ok(PreprocessResult {
regular_entries,
virtual_entries,
source_map,
rendered_bytes,
skipped,
})
}
#[cfg(test)]
mod tests;