Skip to main content

dodot_lib/preprocessing/
mod.rs

1//! Preprocessing pipeline — transforms source files before handler dispatch.
2//!
3//! Preprocessors expand files whose version-controlled source differs from
4//! the deployed artifact (templates, plists, encrypted secrets). The
5//! preprocessing phase runs before handler dispatch, producing virtual
6//! entries that downstream handlers (symlink, shell, path, install,
7//! homebrew) consume transparently.
8//!
9//! See `docs/proposals/preprocessing-pipeline.lex` for the full design.
10
11pub mod baseline;
12pub mod conflict;
13pub mod divergence;
14pub mod identity;
15pub mod no_reverse;
16pub mod pipeline;
17pub mod reverse_merge;
18pub mod template;
19pub mod unarchive;
20
21pub use pipeline::PreprocessMode;
22
23use std::path::{Path, PathBuf};
24
25use serde::Serialize;
26
27use crate::fs::Fs;
28use crate::Result;
29
30/// The safety model for a preprocessor's transformation.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
32pub enum TransformType {
33    /// Source generates destination; reversal is heuristic (templates).
34    Generative,
35    /// Source and destination are lossless representations (plists).
36    Representational,
37    /// Source is decoded on deploy; no reverse path (GPG).
38    Opaque,
39}
40
41/// A single file produced by a preprocessor's expansion.
42///
43/// Construct ad-hoc via the struct literal; tests commonly use
44/// `ExpandedFile { relative_path, content, ..Default::default() }` to
45/// fill in the optional cache-related fields.
46#[derive(Debug, Clone, Default)]
47pub struct ExpandedFile {
48    /// Path relative to the expansion output (usually just the filename).
49    pub relative_path: PathBuf,
50    /// The file content.
51    pub content: Vec<u8>,
52    /// Whether this entry is a directory marker.
53    pub is_dir: bool,
54    /// Marker-annotated rendered output, populated by Generative
55    /// preprocessors that support cache-backed reverse-diff (templates).
56    /// `None` for Representational, Opaque, or generative preprocessors
57    /// that don't track variable boundaries (e.g. unarchive).
58    ///
59    /// When present, the pipeline persists this string in the baseline
60    /// cache so the clean filter and `dodot transform check` can compute
61    /// reverse-diffs without re-rendering — the latter being important
62    /// because re-rendering can re-trigger secret-provider auth prompts.
63    pub tracked_render: Option<String>,
64    /// SHA-256 of the rendering context (variables, env values resolved
65    /// at render time). `None` for preprocessors that don't have a
66    /// meaningful context concept.
67    ///
68    /// The pipeline pairs this with the source-file hash and rendered
69    /// content hash in the baseline cache. `dodot up` re-rendering and
70    /// install/homebrew sentinels both use the context hash to decide
71    /// when work is stale.
72    pub context_hash: Option<[u8; 32]>,
73}
74
75/// The core preprocessor abstraction.
76///
77/// Each preprocessor is a small struct that implements this trait.
78/// Preprocessors are stored in a [`PreprocessorRegistry`] and dispatched
79/// by file extension at preprocessing time.
80///
81/// Preprocessors are pure transformers — they read source files and
82/// produce expanded content. Writing to the datastore is handled by the
83/// pipeline, not by individual preprocessors.
84pub trait Preprocessor: Send + Sync {
85    /// Unique name for this preprocessor (e.g. `"template"`, `"plist"`).
86    fn name(&self) -> &str;
87
88    /// The safety model for this transformation.
89    fn transform_type(&self) -> TransformType;
90
91    /// Whether this preprocessor handles a file with the given name.
92    fn matches_extension(&self, filename: &str) -> bool;
93
94    /// Strip the preprocessor extension to get the logical filename.
95    /// e.g. `"config.toml.tmpl"` → `"config.toml"`.
96    fn stripped_name(&self, filename: &str) -> String;
97
98    /// Expand the source file into one or more output files.
99    ///
100    /// For single-file preprocessors (templates): returns one entry.
101    /// For multi-file preprocessors (archives): returns many entries.
102    ///
103    /// The `source` path points to the original file in the pack directory.
104    ///
105    /// # Memory
106    ///
107    /// Expanded content is held fully in memory via [`Vec<u8>`]. This is
108    /// appropriate for dotfile-sized payloads (configs, small scripts,
109    /// small archives). Preprocessors that may handle very large inputs
110    /// (e.g. multi-hundred-MB archives of pre-built toolchains) should
111    /// consider adding a streaming path rather than materialising the
112    /// entire decoded stream at once.
113    fn expand(&self, source: &Path, fs: &dyn Fs) -> Result<Vec<ExpandedFile>>;
114
115    /// Whether this preprocessor participates in the reverse-merge
116    /// pipeline. Reverse-merge is the cache-backed flow that lets
117    /// `dodot transform check` propagate edits from the deployed file
118    /// back into the source by writing a unified diff (and, for
119    /// ambiguous edits, dodot-conflict marker blocks).
120    ///
121    /// Default `false`. Generative preprocessors that emit a
122    /// [`tracked_render`](ExpandedFile::tracked_render) and want their
123    /// sources scanned for unresolved markers before expansion override
124    /// this to `true`. The pipeline uses the flag to:
125    ///
126    /// - Decide whether to run [`crate::preprocessing::conflict::
127    ///   ensure_no_unresolved_markers`] on the source bytes before
128    ///   calling `expand` — refusing to render a template that already
129    ///   carries an unresolved conflict block (otherwise the markers
130    ///   would deploy as garbage).
131    /// - Filter the set of files visited by `dodot transform check` to
132    ///   those whose preprocessor knows how to write reverse-diffs.
133    ///
134    /// A preprocessor that returns `true` here MUST also populate
135    /// `tracked_render` on its `ExpandedFile`s; otherwise the cache
136    /// layer has no marker stream to feed into burgertocow.
137    fn supports_reverse_merge(&self) -> bool {
138        false
139    }
140}
141
142/// Registry of available preprocessors.
143///
144/// Preprocessors are checked in registration order. The first preprocessor
145/// whose `matches_extension` returns true for a filename wins.
146pub struct PreprocessorRegistry {
147    preprocessors: Vec<Box<dyn Preprocessor>>,
148}
149
150impl PreprocessorRegistry {
151    /// Create an empty registry.
152    pub fn new() -> Self {
153        Self {
154            preprocessors: Vec::new(),
155        }
156    }
157
158    /// Register a preprocessor.
159    pub fn register(&mut self, preprocessor: Box<dyn Preprocessor>) {
160        self.preprocessors.push(preprocessor);
161    }
162
163    /// Find the preprocessor that handles a given filename, if any.
164    pub fn find_for_file(&self, filename: &str) -> Option<&dyn Preprocessor> {
165        self.preprocessors
166            .iter()
167            .find(|p| p.matches_extension(filename))
168            .map(|p| p.as_ref())
169    }
170
171    /// Whether any registered preprocessor handles this filename.
172    pub fn is_preprocessor_file(&self, filename: &str) -> bool {
173        self.find_for_file(filename).is_some()
174    }
175
176    /// Whether the registry has any preprocessors registered.
177    pub fn is_empty(&self) -> bool {
178        self.preprocessors.is_empty()
179    }
180
181    /// Number of registered preprocessors.
182    pub fn len(&self) -> usize {
183        self.preprocessors.len()
184    }
185}
186
187impl Default for PreprocessorRegistry {
188    fn default() -> Self {
189        Self::new()
190    }
191}
192
193/// The default registry used on the normal execution path.
194///
195/// Contains all user-facing preprocessors:
196/// - [`unarchive::UnarchivePreprocessor`] for `.tar.gz` extraction
197/// - [`template::TemplatePreprocessor`] for Jinja2-style templates
198///
199/// The [`identity`] preprocessor is test-only and is intentionally *not*
200/// registered here (it would match innocuous-looking `.identity` files in
201/// user dotfiles).
202pub fn default_registry(
203    template_config: &crate::config::PreprocessorTemplateSection,
204    pather: &dyn crate::paths::Pather,
205) -> Result<PreprocessorRegistry> {
206    let mut registry = PreprocessorRegistry::new();
207    registry.register(Box::new(unarchive::UnarchivePreprocessor::new()));
208    registry.register(Box::new(template::TemplatePreprocessor::new(
209        template_config.extensions.clone(),
210        template_config.vars.clone(),
211        pather,
212    )?));
213    Ok(registry)
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    // Compile-time check: Preprocessor must be object-safe
221    #[allow(dead_code)]
222    fn assert_object_safe(_: &dyn Preprocessor) {}
223
224    #[allow(dead_code)]
225    fn assert_boxable(_: Box<dyn Preprocessor>) {}
226
227    #[test]
228    fn transform_type_eq() {
229        assert_eq!(TransformType::Generative, TransformType::Generative);
230        assert_ne!(TransformType::Generative, TransformType::Opaque);
231    }
232
233    #[test]
234    fn empty_registry() {
235        let registry = PreprocessorRegistry::new();
236        assert!(registry.is_empty());
237        assert_eq!(registry.len(), 0);
238        assert!(!registry.is_preprocessor_file("anything.txt"));
239        assert!(registry.find_for_file("anything.txt").is_none());
240    }
241
242    #[test]
243    fn registry_finds_preprocessor() {
244        let mut registry = PreprocessorRegistry::new();
245        registry.register(Box::new(
246            crate::preprocessing::identity::IdentityPreprocessor::new(),
247        ));
248
249        assert!(!registry.is_empty());
250        assert_eq!(registry.len(), 1);
251        assert!(registry.is_preprocessor_file("config.toml.identity"));
252        assert!(!registry.is_preprocessor_file("config.toml"));
253
254        let found = registry.find_for_file("config.toml.identity").unwrap();
255        assert_eq!(found.name(), "identity");
256    }
257
258    #[test]
259    fn registry_first_match_wins() {
260        let mut registry = PreprocessorRegistry::new();
261        registry.register(Box::new(
262            crate::preprocessing::identity::IdentityPreprocessor::new(),
263        ));
264        // Registering a second one that matches the same extension
265        registry.register(Box::new(
266            crate::preprocessing::identity::IdentityPreprocessor::with_extension("identity"),
267        ));
268
269        let found = registry.find_for_file("test.identity").unwrap();
270        assert_eq!(found.name(), "identity");
271    }
272
273    #[test]
274    fn registry_multiple_different_preprocessors() {
275        let mut registry = PreprocessorRegistry::new();
276        registry.register(Box::new(
277            crate::preprocessing::identity::IdentityPreprocessor::new(),
278        ));
279        registry.register(Box::new(
280            crate::preprocessing::unarchive::UnarchivePreprocessor::new(),
281        ));
282
283        assert_eq!(registry.len(), 2);
284
285        // Each matches its own extension
286        assert!(registry.is_preprocessor_file("config.toml.identity"));
287        assert!(registry.is_preprocessor_file("bin.tar.gz"));
288
289        // Neither matches the other
290        let identity = registry.find_for_file("config.toml.identity").unwrap();
291        assert_eq!(identity.name(), "identity");
292
293        let unarchive = registry.find_for_file("bin.tar.gz").unwrap();
294        assert_eq!(unarchive.name(), "unarchive");
295
296        // Non-preprocessor files still return None
297        assert!(registry.find_for_file("regular.txt").is_none());
298    }
299
300    #[test]
301    fn registry_does_not_match_partial_extension() {
302        let mut registry = PreprocessorRegistry::new();
303        registry.register(Box::new(
304            crate::preprocessing::identity::IdentityPreprocessor::new(),
305        ));
306
307        // "identity" alone is not ".identity"
308        assert!(!registry.is_preprocessor_file("identity"));
309        // File without the dot prefix shouldn't match
310        assert!(!registry.is_preprocessor_file("fileidentity"));
311    }
312}