dodot_lib/preprocessing/mod.rs
1//! Preprocessing pipeline — transforms source files before handler dispatch.
2//!
3//! Preprocessors expand files whose version-controlled source differs from
4//! the deployed artifact (templates, plists, encrypted secrets). The
5//! preprocessing phase runs before handler dispatch, producing virtual
6//! entries that downstream handlers (symlink, shell, path, install,
7//! homebrew) consume transparently.
8//!
9//! See `docs/proposals/preprocessing-pipeline.lex` for the full design.
10
11pub mod baseline;
12pub mod conflict;
13pub mod divergence;
14pub mod identity;
15pub mod no_reverse;
16pub mod pipeline;
17pub mod reverse_merge;
18pub mod template;
19pub mod unarchive;
20
21pub use pipeline::PreprocessMode;
22
23use std::path::{Path, PathBuf};
24
25use serde::Serialize;
26
27use crate::fs::Fs;
28use crate::Result;
29
30/// The safety model for a preprocessor's transformation.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
32pub enum TransformType {
33 /// Source generates destination; reversal is heuristic (templates).
34 Generative,
35 /// Source and destination are lossless representations (plists).
36 Representational,
37 /// Source is decoded on deploy; no reverse path (GPG).
38 Opaque,
39}
40
41/// A single file produced by a preprocessor's expansion.
42///
43/// Construct ad-hoc via the struct literal; tests commonly use
44/// `ExpandedFile { relative_path, content, ..Default::default() }` to
45/// fill in the optional cache-related fields.
46#[derive(Debug, Clone, Default)]
47pub struct ExpandedFile {
48 /// Path relative to the expansion output (usually just the filename).
49 pub relative_path: PathBuf,
50 /// The file content.
51 pub content: Vec<u8>,
52 /// Whether this entry is a directory marker.
53 pub is_dir: bool,
54 /// Marker-annotated rendered output, populated by Generative
55 /// preprocessors that support cache-backed reverse-diff (templates).
56 /// `None` for Representational, Opaque, or generative preprocessors
57 /// that don't track variable boundaries (e.g. unarchive).
58 ///
59 /// When present, the pipeline persists this string in the baseline
60 /// cache so the clean filter and `dodot transform check` can compute
61 /// reverse-diffs without re-rendering — the latter being important
62 /// because re-rendering can re-trigger secret-provider auth prompts.
63 pub tracked_render: Option<String>,
64 /// SHA-256 of the rendering context (variables, env values resolved
65 /// at render time). `None` for preprocessors that don't have a
66 /// meaningful context concept.
67 ///
68 /// The pipeline pairs this with the source-file hash and rendered
69 /// content hash in the baseline cache. `dodot up` re-rendering and
70 /// install/homebrew sentinels both use the context hash to decide
71 /// when work is stale.
72 pub context_hash: Option<[u8; 32]>,
73}
74
75/// The core preprocessor abstraction.
76///
77/// Each preprocessor is a small struct that implements this trait.
78/// Preprocessors are stored in a [`PreprocessorRegistry`] and dispatched
79/// by file extension at preprocessing time.
80///
81/// Preprocessors are pure transformers — they read source files and
82/// produce expanded content. Writing to the datastore is handled by the
83/// pipeline, not by individual preprocessors.
84pub trait Preprocessor: Send + Sync {
85 /// Unique name for this preprocessor (e.g. `"template"`, `"plist"`).
86 fn name(&self) -> &str;
87
88 /// The safety model for this transformation.
89 fn transform_type(&self) -> TransformType;
90
91 /// Whether this preprocessor handles a file with the given name.
92 fn matches_extension(&self, filename: &str) -> bool;
93
94 /// Strip the preprocessor extension to get the logical filename.
95 /// e.g. `"config.toml.tmpl"` → `"config.toml"`.
96 fn stripped_name(&self, filename: &str) -> String;
97
98 /// Expand the source file into one or more output files.
99 ///
100 /// For single-file preprocessors (templates): returns one entry.
101 /// For multi-file preprocessors (archives): returns many entries.
102 ///
103 /// The `source` path points to the original file in the pack directory.
104 ///
105 /// # Memory
106 ///
107 /// Expanded content is held fully in memory via [`Vec<u8>`]. This is
108 /// appropriate for dotfile-sized payloads (configs, small scripts,
109 /// small archives). Preprocessors that may handle very large inputs
110 /// (e.g. multi-hundred-MB archives of pre-built toolchains) should
111 /// consider adding a streaming path rather than materialising the
112 /// entire decoded stream at once.
113 fn expand(&self, source: &Path, fs: &dyn Fs) -> Result<Vec<ExpandedFile>>;
114
115 /// Whether this preprocessor participates in the reverse-merge
116 /// pipeline. Reverse-merge is the cache-backed flow that lets
117 /// `dodot transform check` propagate edits from the deployed file
118 /// back into the source by writing a unified diff (and, for
119 /// ambiguous edits, dodot-conflict marker blocks).
120 ///
121 /// Default `false`. Generative preprocessors that emit a
122 /// [`tracked_render`](ExpandedFile::tracked_render) and want their
123 /// sources scanned for unresolved markers before expansion override
124 /// this to `true`. The pipeline uses the flag to:
125 ///
126 /// - Decide whether to run [`crate::preprocessing::conflict::
127 /// ensure_no_unresolved_markers`] on the source bytes before
128 /// calling `expand` — refusing to render a template that already
129 /// carries an unresolved conflict block (otherwise the markers
130 /// would deploy as garbage).
131 /// - Filter the set of files visited by `dodot transform check` to
132 /// those whose preprocessor knows how to write reverse-diffs.
133 ///
134 /// A preprocessor that returns `true` here MUST also populate
135 /// `tracked_render` on its `ExpandedFile`s; otherwise the cache
136 /// layer has no marker stream to feed into burgertocow.
137 fn supports_reverse_merge(&self) -> bool {
138 false
139 }
140}
141
142/// Registry of available preprocessors.
143///
144/// Preprocessors are checked in registration order. The first preprocessor
145/// whose `matches_extension` returns true for a filename wins.
146pub struct PreprocessorRegistry {
147 preprocessors: Vec<Box<dyn Preprocessor>>,
148}
149
150impl PreprocessorRegistry {
151 /// Create an empty registry.
152 pub fn new() -> Self {
153 Self {
154 preprocessors: Vec::new(),
155 }
156 }
157
158 /// Register a preprocessor.
159 pub fn register(&mut self, preprocessor: Box<dyn Preprocessor>) {
160 self.preprocessors.push(preprocessor);
161 }
162
163 /// Find the preprocessor that handles a given filename, if any.
164 pub fn find_for_file(&self, filename: &str) -> Option<&dyn Preprocessor> {
165 self.preprocessors
166 .iter()
167 .find(|p| p.matches_extension(filename))
168 .map(|p| p.as_ref())
169 }
170
171 /// Whether any registered preprocessor handles this filename.
172 pub fn is_preprocessor_file(&self, filename: &str) -> bool {
173 self.find_for_file(filename).is_some()
174 }
175
176 /// Whether the registry has any preprocessors registered.
177 pub fn is_empty(&self) -> bool {
178 self.preprocessors.is_empty()
179 }
180
181 /// Number of registered preprocessors.
182 pub fn len(&self) -> usize {
183 self.preprocessors.len()
184 }
185}
186
187impl Default for PreprocessorRegistry {
188 fn default() -> Self {
189 Self::new()
190 }
191}
192
193/// The default registry used on the normal execution path.
194///
195/// Contains all user-facing preprocessors:
196/// - [`unarchive::UnarchivePreprocessor`] for `.tar.gz` extraction
197/// - [`template::TemplatePreprocessor`] for Jinja2-style templates
198///
199/// The [`identity`] preprocessor is test-only and is intentionally *not*
200/// registered here (it would match innocuous-looking `.identity` files in
201/// user dotfiles).
202pub fn default_registry(
203 template_config: &crate::config::PreprocessorTemplateSection,
204 pather: &dyn crate::paths::Pather,
205) -> Result<PreprocessorRegistry> {
206 let mut registry = PreprocessorRegistry::new();
207 registry.register(Box::new(unarchive::UnarchivePreprocessor::new()));
208 registry.register(Box::new(template::TemplatePreprocessor::new(
209 template_config.extensions.clone(),
210 template_config.vars.clone(),
211 pather,
212 )?));
213 Ok(registry)
214}
215
216#[cfg(test)]
217mod tests {
218 use super::*;
219
220 // Compile-time check: Preprocessor must be object-safe
221 #[allow(dead_code)]
222 fn assert_object_safe(_: &dyn Preprocessor) {}
223
224 #[allow(dead_code)]
225 fn assert_boxable(_: Box<dyn Preprocessor>) {}
226
227 #[test]
228 fn transform_type_eq() {
229 assert_eq!(TransformType::Generative, TransformType::Generative);
230 assert_ne!(TransformType::Generative, TransformType::Opaque);
231 }
232
233 #[test]
234 fn empty_registry() {
235 let registry = PreprocessorRegistry::new();
236 assert!(registry.is_empty());
237 assert_eq!(registry.len(), 0);
238 assert!(!registry.is_preprocessor_file("anything.txt"));
239 assert!(registry.find_for_file("anything.txt").is_none());
240 }
241
242 #[test]
243 fn registry_finds_preprocessor() {
244 let mut registry = PreprocessorRegistry::new();
245 registry.register(Box::new(
246 crate::preprocessing::identity::IdentityPreprocessor::new(),
247 ));
248
249 assert!(!registry.is_empty());
250 assert_eq!(registry.len(), 1);
251 assert!(registry.is_preprocessor_file("config.toml.identity"));
252 assert!(!registry.is_preprocessor_file("config.toml"));
253
254 let found = registry.find_for_file("config.toml.identity").unwrap();
255 assert_eq!(found.name(), "identity");
256 }
257
258 #[test]
259 fn registry_first_match_wins() {
260 let mut registry = PreprocessorRegistry::new();
261 registry.register(Box::new(
262 crate::preprocessing::identity::IdentityPreprocessor::new(),
263 ));
264 // Registering a second one that matches the same extension
265 registry.register(Box::new(
266 crate::preprocessing::identity::IdentityPreprocessor::with_extension("identity"),
267 ));
268
269 let found = registry.find_for_file("test.identity").unwrap();
270 assert_eq!(found.name(), "identity");
271 }
272
273 #[test]
274 fn registry_multiple_different_preprocessors() {
275 let mut registry = PreprocessorRegistry::new();
276 registry.register(Box::new(
277 crate::preprocessing::identity::IdentityPreprocessor::new(),
278 ));
279 registry.register(Box::new(
280 crate::preprocessing::unarchive::UnarchivePreprocessor::new(),
281 ));
282
283 assert_eq!(registry.len(), 2);
284
285 // Each matches its own extension
286 assert!(registry.is_preprocessor_file("config.toml.identity"));
287 assert!(registry.is_preprocessor_file("bin.tar.gz"));
288
289 // Neither matches the other
290 let identity = registry.find_for_file("config.toml.identity").unwrap();
291 assert_eq!(identity.name(), "identity");
292
293 let unarchive = registry.find_for_file("bin.tar.gz").unwrap();
294 assert_eq!(unarchive.name(), "unarchive");
295
296 // Non-preprocessor files still return None
297 assert!(registry.find_for_file("regular.txt").is_none());
298 }
299
300 #[test]
301 fn registry_does_not_match_partial_extension() {
302 let mut registry = PreprocessorRegistry::new();
303 registry.register(Box::new(
304 crate::preprocessing::identity::IdentityPreprocessor::new(),
305 ));
306
307 // "identity" alone is not ".identity"
308 assert!(!registry.is_preprocessor_file("identity"));
309 // File without the dot prefix shouldn't match
310 assert!(!registry.is_preprocessor_file("fileidentity"));
311 }
312}