Skip to main content

dodot_lib/preprocessing/
mod.rs

1//! Preprocessing pipeline — transforms source files before handler dispatch.
2//!
3//! Preprocessors expand files whose version-controlled source differs from
4//! the deployed artifact (templates, plists, encrypted secrets). The
5//! preprocessing phase runs before handler dispatch, producing virtual
6//! entries that downstream handlers (symlink, shell, path, install,
7//! homebrew) consume transparently.
8//!
9//! See `docs/proposals/preprocessing-pipeline.lex` for the full design.
10
11pub mod identity;
12pub mod pipeline;
13pub mod template;
14pub mod unarchive;
15
16use std::path::{Path, PathBuf};
17
18use serde::Serialize;
19
20use crate::fs::Fs;
21use crate::Result;
22
23/// The safety model for a preprocessor's transformation.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
25pub enum TransformType {
26    /// Source generates destination; reversal is heuristic (templates).
27    Generative,
28    /// Source and destination are lossless representations (plists).
29    Representational,
30    /// Source is decoded on deploy; no reverse path (GPG).
31    Opaque,
32}
33
34/// A single file produced by a preprocessor's expansion.
35#[derive(Debug, Clone)]
36pub struct ExpandedFile {
37    /// Path relative to the expansion output (usually just the filename).
38    pub relative_path: PathBuf,
39    /// The file content.
40    pub content: Vec<u8>,
41    /// Whether this entry is a directory marker.
42    pub is_dir: bool,
43}
44
45/// The core preprocessor abstraction.
46///
47/// Each preprocessor is a small struct that implements this trait.
48/// Preprocessors are stored in a [`PreprocessorRegistry`] and dispatched
49/// by file extension at preprocessing time.
50///
51/// Preprocessors are pure transformers — they read source files and
52/// produce expanded content. Writing to the datastore is handled by the
53/// pipeline, not by individual preprocessors.
54pub trait Preprocessor: Send + Sync {
55    /// Unique name for this preprocessor (e.g. `"template"`, `"plist"`).
56    fn name(&self) -> &str;
57
58    /// The safety model for this transformation.
59    fn transform_type(&self) -> TransformType;
60
61    /// Whether this preprocessor handles a file with the given name.
62    fn matches_extension(&self, filename: &str) -> bool;
63
64    /// Strip the preprocessor extension to get the logical filename.
65    /// e.g. `"config.toml.tmpl"` → `"config.toml"`.
66    fn stripped_name(&self, filename: &str) -> String;
67
68    /// Expand the source file into one or more output files.
69    ///
70    /// For single-file preprocessors (templates): returns one entry.
71    /// For multi-file preprocessors (archives): returns many entries.
72    ///
73    /// The `source` path points to the original file in the pack directory.
74    ///
75    /// # Memory
76    ///
77    /// Expanded content is held fully in memory via [`Vec<u8>`]. This is
78    /// appropriate for dotfile-sized payloads (configs, small scripts,
79    /// small archives). Preprocessors that may handle very large inputs
80    /// (e.g. multi-hundred-MB archives of pre-built toolchains) should
81    /// consider adding a streaming path rather than materialising the
82    /// entire decoded stream at once.
83    fn expand(&self, source: &Path, fs: &dyn Fs) -> Result<Vec<ExpandedFile>>;
84}
85
86/// Registry of available preprocessors.
87///
88/// Preprocessors are checked in registration order. The first preprocessor
89/// whose `matches_extension` returns true for a filename wins.
90pub struct PreprocessorRegistry {
91    preprocessors: Vec<Box<dyn Preprocessor>>,
92}
93
94impl PreprocessorRegistry {
95    /// Create an empty registry.
96    pub fn new() -> Self {
97        Self {
98            preprocessors: Vec::new(),
99        }
100    }
101
102    /// Register a preprocessor.
103    pub fn register(&mut self, preprocessor: Box<dyn Preprocessor>) {
104        self.preprocessors.push(preprocessor);
105    }
106
107    /// Find the preprocessor that handles a given filename, if any.
108    pub fn find_for_file(&self, filename: &str) -> Option<&dyn Preprocessor> {
109        self.preprocessors
110            .iter()
111            .find(|p| p.matches_extension(filename))
112            .map(|p| p.as_ref())
113    }
114
115    /// Whether any registered preprocessor handles this filename.
116    pub fn is_preprocessor_file(&self, filename: &str) -> bool {
117        self.find_for_file(filename).is_some()
118    }
119
120    /// Whether the registry has any preprocessors registered.
121    pub fn is_empty(&self) -> bool {
122        self.preprocessors.is_empty()
123    }
124
125    /// Number of registered preprocessors.
126    pub fn len(&self) -> usize {
127        self.preprocessors.len()
128    }
129}
130
131impl Default for PreprocessorRegistry {
132    fn default() -> Self {
133        Self::new()
134    }
135}
136
137/// The default registry used on the normal execution path.
138///
139/// Contains all user-facing preprocessors:
140/// - [`unarchive::UnarchivePreprocessor`] for `.tar.gz` extraction
141/// - [`template::TemplatePreprocessor`] for Jinja2-style templates
142///
143/// The [`identity`] preprocessor is test-only and is intentionally *not*
144/// registered here (it would match innocuous-looking `.identity` files in
145/// user dotfiles).
146pub fn default_registry(
147    template_config: &crate::config::PreprocessorTemplateSection,
148    pather: &dyn crate::paths::Pather,
149) -> Result<PreprocessorRegistry> {
150    let mut registry = PreprocessorRegistry::new();
151    registry.register(Box::new(unarchive::UnarchivePreprocessor::new()));
152    registry.register(Box::new(template::TemplatePreprocessor::new(
153        template_config.extensions.clone(),
154        template_config.vars.clone(),
155        pather,
156    )?));
157    Ok(registry)
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163
164    // Compile-time check: Preprocessor must be object-safe
165    #[allow(dead_code)]
166    fn assert_object_safe(_: &dyn Preprocessor) {}
167
168    #[allow(dead_code)]
169    fn assert_boxable(_: Box<dyn Preprocessor>) {}
170
171    #[test]
172    fn transform_type_eq() {
173        assert_eq!(TransformType::Generative, TransformType::Generative);
174        assert_ne!(TransformType::Generative, TransformType::Opaque);
175    }
176
177    #[test]
178    fn empty_registry() {
179        let registry = PreprocessorRegistry::new();
180        assert!(registry.is_empty());
181        assert_eq!(registry.len(), 0);
182        assert!(!registry.is_preprocessor_file("anything.txt"));
183        assert!(registry.find_for_file("anything.txt").is_none());
184    }
185
186    #[test]
187    fn registry_finds_preprocessor() {
188        let mut registry = PreprocessorRegistry::new();
189        registry.register(Box::new(
190            crate::preprocessing::identity::IdentityPreprocessor::new(),
191        ));
192
193        assert!(!registry.is_empty());
194        assert_eq!(registry.len(), 1);
195        assert!(registry.is_preprocessor_file("config.toml.identity"));
196        assert!(!registry.is_preprocessor_file("config.toml"));
197
198        let found = registry.find_for_file("config.toml.identity").unwrap();
199        assert_eq!(found.name(), "identity");
200    }
201
202    #[test]
203    fn registry_first_match_wins() {
204        let mut registry = PreprocessorRegistry::new();
205        registry.register(Box::new(
206            crate::preprocessing::identity::IdentityPreprocessor::new(),
207        ));
208        // Registering a second one that matches the same extension
209        registry.register(Box::new(
210            crate::preprocessing::identity::IdentityPreprocessor::with_extension("identity"),
211        ));
212
213        let found = registry.find_for_file("test.identity").unwrap();
214        assert_eq!(found.name(), "identity");
215    }
216
217    #[test]
218    fn registry_multiple_different_preprocessors() {
219        let mut registry = PreprocessorRegistry::new();
220        registry.register(Box::new(
221            crate::preprocessing::identity::IdentityPreprocessor::new(),
222        ));
223        registry.register(Box::new(
224            crate::preprocessing::unarchive::UnarchivePreprocessor::new(),
225        ));
226
227        assert_eq!(registry.len(), 2);
228
229        // Each matches its own extension
230        assert!(registry.is_preprocessor_file("config.toml.identity"));
231        assert!(registry.is_preprocessor_file("bin.tar.gz"));
232
233        // Neither matches the other
234        let identity = registry.find_for_file("config.toml.identity").unwrap();
235        assert_eq!(identity.name(), "identity");
236
237        let unarchive = registry.find_for_file("bin.tar.gz").unwrap();
238        assert_eq!(unarchive.name(), "unarchive");
239
240        // Non-preprocessor files still return None
241        assert!(registry.find_for_file("regular.txt").is_none());
242    }
243
244    #[test]
245    fn registry_does_not_match_partial_extension() {
246        let mut registry = PreprocessorRegistry::new();
247        registry.register(Box::new(
248            crate::preprocessing::identity::IdentityPreprocessor::new(),
249        ));
250
251        // "identity" alone is not ".identity"
252        assert!(!registry.is_preprocessor_file("identity"));
253        // File without the dot prefix shouldn't match
254        assert!(!registry.is_preprocessor_file("fileidentity"));
255    }
256}