Skip to main content

dodot_lib/preprocessing/
mod.rs

1//! Preprocessing pipeline — transforms source files before handler dispatch.
2//!
3//! Preprocessors expand files whose version-controlled source differs from
4//! the deployed artifact (templates, plists, encrypted secrets). The
5//! preprocessing phase runs before handler dispatch, producing virtual
6//! entries that downstream handlers (symlink, shell, path, install,
7//! homebrew) consume transparently.
8//!
9//! See `docs/proposals/preprocessing-pipeline.lex` for the full design.
10
11pub mod age;
12pub mod baseline;
13pub mod conflict;
14pub mod divergence;
15pub mod gpg;
16pub mod identity;
17pub mod no_reverse;
18pub mod pipeline;
19pub mod reverse_merge;
20pub mod template;
21pub mod unarchive;
22
23pub use pipeline::PreprocessMode;
24
25use std::path::{Path, PathBuf};
26
27use serde::Serialize;
28
29use crate::fs::Fs;
30use crate::Result;
31
32/// The safety model for a preprocessor's transformation.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
34pub enum TransformType {
35    /// Source generates destination; reversal is heuristic (templates).
36    Generative,
37    /// Source and destination are lossless representations (plists).
38    Representational,
39    /// Source is decoded on deploy; no reverse path (GPG).
40    Opaque,
41}
42
43/// One entry in a per-render secrets sidecar — a span of lines whose
44/// content was produced by a `secret(...)` call, paired with the
45/// reference that produced it.
46///
47/// Lines are 0-indexed and `start..end` is half-open. A single-line
48/// secret occupies line `start` and is encoded as `end == start + 1`
49/// (`start == end` would be an empty range and is never produced).
50/// For Phase S1 every entry is single-line: multi-line secrets are
51/// refused at resolution time per `secrets.lex` §3.4. The `end` field
52/// is preserved in the schema for forward-compatibility but the
53/// renderer never produces `end > start + 1`.
54///
55/// Persisted to disk under `<baseline>.secret.json` (see
56/// `secrets.lex` §3.3); consumed by the dry-run preview rendering
57/// (§7.4) to mask resolved values, and by the burgertocow mask
58/// integration (issue arthur-debert/burgertocow#13) to skip those
59/// lines from the reverse diff.
60#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
61pub struct SecretLineRange {
62    /// First line, 0-indexed, inclusive.
63    pub start: usize,
64    /// One past the last line, 0-indexed, exclusive. `start + 1` for
65    /// a single-line value.
66    pub end: usize,
67    /// The original `secret(...)` argument string, e.g.
68    /// `"op://Personal/DB/password"`. Surfaces in the dry-run
69    /// `[SECRET: <reference>]` placeholder.
70    pub reference: String,
71}
72
73/// A single file produced by a preprocessor's expansion.
74///
75/// Construct ad-hoc via the struct literal; tests commonly use
76/// `ExpandedFile { relative_path, content, ..Default::default() }` to
77/// fill in the optional cache-related fields.
78#[derive(Debug, Clone, Default)]
79pub struct ExpandedFile {
80    /// Path relative to the expansion output (usually just the filename).
81    pub relative_path: PathBuf,
82    /// The file content.
83    pub content: Vec<u8>,
84    /// Whether this entry is a directory marker.
85    pub is_dir: bool,
86    /// Marker-annotated rendered output, populated by Generative
87    /// preprocessors that support cache-backed reverse-diff (templates).
88    /// `None` for Representational, Opaque, or generative preprocessors
89    /// that don't track variable boundaries (e.g. unarchive).
90    ///
91    /// When present, the pipeline persists this string in the baseline
92    /// cache so the clean filter and `dodot transform check` can compute
93    /// reverse-diffs without re-rendering — the latter being important
94    /// because re-rendering can re-trigger secret-provider auth prompts.
95    pub tracked_render: Option<String>,
96    /// SHA-256 of the rendering context (variables, env values resolved
97    /// at render time). `None` for preprocessors that don't have a
98    /// meaningful context concept.
99    ///
100    /// The pipeline pairs this with the source-file hash and rendered
101    /// content hash in the baseline cache. `dodot up` re-rendering and
102    /// install/homebrew sentinels both use the context hash to decide
103    /// when work is stale.
104    pub context_hash: Option<[u8; 32]>,
105    /// Per-render secret-line tracking. Empty when no `secret(...)`
106    /// calls fired (the common case today; will be the common case
107    /// forever for templates that don't use secrets). Populated by
108    /// `TemplatePreprocessor` when a [`crate::secret::SecretRegistry`]
109    /// is wired in. The pipeline persists this as a sidecar JSON
110    /// alongside the baseline.
111    pub secret_line_ranges: Vec<SecretLineRange>,
112    /// Unix mode the rendered datastore file should be chmod'd to
113    /// after the pipeline writes it. `None` (the default) leaves
114    /// the file at whatever umask-derived mode `write_file` produced
115    /// — the pre-S3 behavior for templates / unarchive output.
116    /// Whole-file secret preprocessors (`age`, `gpg`) set this to
117    /// `Some(0o600)` to enforce `secrets.lex` §4.3: rendered
118    /// secrets land 0600 regardless of the source file's mode.
119    /// Ignored when `is_dir` is true.
120    pub deploy_mode: Option<u32>,
121}
122
123/// The core preprocessor abstraction.
124///
125/// Each preprocessor is a small struct that implements this trait.
126/// Preprocessors are stored in a [`PreprocessorRegistry`] and dispatched
127/// by file extension at preprocessing time.
128///
129/// Preprocessors are pure transformers — they read source files and
130/// produce expanded content. Writing to the datastore is handled by the
131/// pipeline, not by individual preprocessors.
132pub trait Preprocessor: Send + Sync {
133    /// Unique name for this preprocessor (e.g. `"template"`, `"plist"`).
134    fn name(&self) -> &str;
135
136    /// The safety model for this transformation.
137    fn transform_type(&self) -> TransformType;
138
139    /// Whether this preprocessor handles a file with the given name.
140    fn matches_extension(&self, filename: &str) -> bool;
141
142    /// Strip the preprocessor extension to get the logical filename.
143    /// e.g. `"config.toml.tmpl"` → `"config.toml"`.
144    fn stripped_name(&self, filename: &str) -> String;
145
146    /// Expand the source file into one or more output files.
147    ///
148    /// For single-file preprocessors (templates): returns one entry.
149    /// For multi-file preprocessors (archives): returns many entries.
150    ///
151    /// The `source` path points to the original file in the pack directory.
152    ///
153    /// # Memory
154    ///
155    /// Expanded content is held fully in memory via [`Vec<u8>`]. This is
156    /// appropriate for dotfile-sized payloads (configs, small scripts,
157    /// small archives). Preprocessors that may handle very large inputs
158    /// (e.g. multi-hundred-MB archives of pre-built toolchains) should
159    /// consider adding a streaming path rather than materialising the
160    /// entire decoded stream at once.
161    fn expand(&self, source: &Path, fs: &dyn Fs) -> Result<Vec<ExpandedFile>>;
162
163    /// Whether this preprocessor participates in the reverse-merge
164    /// pipeline. Reverse-merge is the cache-backed flow that lets
165    /// `dodot transform check` propagate edits from the deployed file
166    /// back into the source by writing a unified diff (and, for
167    /// ambiguous edits, dodot-conflict marker blocks).
168    ///
169    /// Default `false`. Generative preprocessors that emit a
170    /// [`tracked_render`](ExpandedFile::tracked_render) and want their
171    /// sources scanned for unresolved markers before expansion override
172    /// this to `true`. The pipeline uses the flag to:
173    ///
174    /// - Decide whether to run [`crate::preprocessing::conflict::
175    ///   ensure_no_unresolved_markers`] on the source bytes before
176    ///   calling `expand` — refusing to render a template that already
177    ///   carries an unresolved conflict block (otherwise the markers
178    ///   would deploy as garbage).
179    /// - Filter the set of files visited by `dodot transform check` to
180    ///   those whose preprocessor knows how to write reverse-diffs.
181    ///
182    /// A preprocessor that returns `true` here MUST also populate
183    /// `tracked_render` on its `ExpandedFile`s; otherwise the cache
184    /// layer has no marker stream to feed into burgertocow.
185    fn supports_reverse_merge(&self) -> bool {
186        false
187    }
188}
189
190/// Registry of available preprocessors.
191///
192/// Preprocessors are checked in registration order. The first preprocessor
193/// whose `matches_extension` returns true for a filename wins.
194pub struct PreprocessorRegistry {
195    preprocessors: Vec<Box<dyn Preprocessor>>,
196}
197
198impl PreprocessorRegistry {
199    /// Create an empty registry.
200    pub fn new() -> Self {
201        Self {
202            preprocessors: Vec::new(),
203        }
204    }
205
206    /// Register a preprocessor.
207    pub fn register(&mut self, preprocessor: Box<dyn Preprocessor>) {
208        self.preprocessors.push(preprocessor);
209    }
210
211    /// Find the preprocessor that handles a given filename, if any.
212    pub fn find_for_file(&self, filename: &str) -> Option<&dyn Preprocessor> {
213        self.preprocessors
214            .iter()
215            .find(|p| p.matches_extension(filename))
216            .map(|p| p.as_ref())
217    }
218
219    /// Whether any registered preprocessor handles this filename.
220    pub fn is_preprocessor_file(&self, filename: &str) -> bool {
221        self.find_for_file(filename).is_some()
222    }
223
224    /// Whether the registry has any preprocessors registered.
225    pub fn is_empty(&self) -> bool {
226        self.preprocessors.is_empty()
227    }
228
229    /// Number of registered preprocessors.
230    pub fn len(&self) -> usize {
231        self.preprocessors.len()
232    }
233}
234
235impl Default for PreprocessorRegistry {
236    fn default() -> Self {
237        Self::new()
238    }
239}
240
241/// The default registry used on the normal execution path.
242///
243/// Contains all user-facing preprocessors:
244/// - [`unarchive::UnarchivePreprocessor`] for `.tar.gz` extraction
245/// - [`template::TemplatePreprocessor`] for Jinja2-style templates
246///
247/// The [`identity`] preprocessor is test-only and is intentionally *not*
248/// registered here (it would match innocuous-looking `.identity` files in
249/// user dotfiles).
250///
251/// `secret_config` controls whether the template preprocessor gets a
252/// [`SecretRegistry`] wired in. When `[secret] enabled = true` and at
253/// least one provider is enabled, this function builds the registry,
254/// wires it onto the template preprocessor, and returns it via
255/// `out_secret_registry` so the caller can run preflight checks
256/// (`crate::secret::preflight`) before any rendering begins. When
257/// secrets are disabled, the template preprocessor is built without a
258/// registry and `secret(...)` calls in templates surface a config-
259/// pointing render error.
260pub fn default_registry(
261    preprocessor_config: &crate::config::PreprocessorSection,
262    secret_config: &crate::config::SecretSection,
263    pather: &dyn crate::paths::Pather,
264    command_runner: std::sync::Arc<dyn crate::datastore::CommandRunner>,
265) -> Result<(
266    PreprocessorRegistry,
267    Option<std::sync::Arc<crate::secret::SecretRegistry>>,
268)> {
269    use std::sync::Arc;
270
271    let mut registry = PreprocessorRegistry::new();
272    registry.register(Box::new(unarchive::UnarchivePreprocessor::new()));
273
274    let template_config = &preprocessor_config.template;
275    let mut tpl = template::TemplatePreprocessor::new(
276        template_config.extensions.clone(),
277        template_config.vars.clone(),
278        pather,
279    )?;
280
281    let secret_registry = if secret_config.enabled {
282        build_secret_registry(
283            secret_config,
284            Arc::clone(&command_runner),
285            pather.dotfiles_root(),
286        )
287    } else {
288        None
289    };
290
291    if let Some(sr) = &secret_registry {
292        tpl = tpl.with_secret_registry(Arc::clone(sr));
293    }
294
295    registry.register(Box::new(tpl));
296
297    // Whole-file secret preprocessors per `secrets.lex` §4 — opt-in
298    // via `[preprocessor.age|gpg] enabled = true`. Off by default so
299    // a fresh install never shells out to `age` / `gpg` on random
300    // files. Identity for age comes from config first; an empty
301    // string defers to the runtime defaults (`from_env`).
302    if preprocessor_config.age.enabled {
303        let identity_str = preprocessor_config.age.identity.trim();
304        let pp = if identity_str.is_empty() {
305            age::AgePreprocessor::from_env(Arc::clone(&command_runner))
306        } else {
307            age::AgePreprocessor::new(
308                Arc::clone(&command_runner),
309                std::path::PathBuf::from(identity_str),
310                preprocessor_config.age.extensions.clone(),
311            )
312        };
313        registry.register(Box::new(pp));
314    }
315
316    if preprocessor_config.gpg.enabled {
317        registry.register(Box::new(gpg::GpgPreprocessor::new(
318            Arc::clone(&command_runner),
319            preprocessor_config.gpg.extensions.clone(),
320        )));
321    }
322
323    Ok((registry, secret_registry))
324}
325
326/// Construct a [`crate::secret::SecretRegistry`] from the per-provider
327/// `[secret.providers.*]` config blocks. Each enabled provider is
328/// constructed with the shared `CommandRunner` (so tests can inject a
329/// mock runner) and registered. Returns `None` if no provider is
330/// enabled — the secrets layer treats that case as "secrets feature
331/// fully off" and templates with `secret(...)` calls fail loudly.
332///
333/// `dotfiles_root` is the anchor for relative paths in
334/// provider-specific references — currently used by the `sops`
335/// provider, whose `sops:secrets.yaml#k.p` references resolve
336/// `secrets.yaml` relative to this directory.
337///
338/// Public so `commands::up` can build a single registry from the root
339/// config to run [`crate::secret::preflight`] once per run, before any
340/// per-pack template rendering begins (`secrets.lex` §5.4).
341pub fn build_secret_registry(
342    config: &crate::config::SecretSection,
343    runner: std::sync::Arc<dyn crate::datastore::CommandRunner>,
344    dotfiles_root: &std::path::Path,
345) -> Option<std::sync::Arc<crate::secret::SecretRegistry>> {
346    use std::path::PathBuf;
347    use std::sync::Arc;
348
349    let mut reg = crate::secret::SecretRegistry::new();
350    let mut any_enabled = false;
351
352    if config.providers.pass.enabled {
353        let store_dir = if config.providers.pass.store_dir.is_empty() {
354            // Defer to env / default: PassProvider::from_env reads
355            // $PASSWORD_STORE_DIR or falls back to ~/.password-store.
356            None
357        } else {
358            Some(PathBuf::from(&config.providers.pass.store_dir))
359        };
360        let provider = match store_dir {
361            Some(dir) => crate::secret::PassProvider::new(Arc::clone(&runner), dir),
362            None => crate::secret::PassProvider::from_env(Arc::clone(&runner)),
363        };
364        reg.register(Arc::new(provider));
365        any_enabled = true;
366    }
367
368    if config.providers.op.enabled {
369        let provider = crate::secret::OpProvider::from_env(Arc::clone(&runner));
370        reg.register(Arc::new(provider));
371        any_enabled = true;
372    }
373
374    if config.providers.bw.enabled {
375        let provider = crate::secret::BwProvider::from_env(Arc::clone(&runner));
376        reg.register(Arc::new(provider));
377        any_enabled = true;
378    }
379
380    if config.providers.sops.enabled {
381        // sops anchors relative file paths (`sops:secrets.yaml#k`)
382        // at the dotfiles root, so `.sops.yaml` configuration in the
383        // repo root applies. Absolute paths in references bypass
384        // this anchor.
385        let provider =
386            crate::secret::SopsProvider::new(Arc::clone(&runner), dotfiles_root.to_path_buf());
387        reg.register(Arc::new(provider));
388        any_enabled = true;
389    }
390
391    if config.providers.keychain.enabled {
392        // macOS Keychain (`security` CLI). On non-macOS hosts the
393        // probe surfaces NotInstalled with a "use secret-tool"
394        // pointer; we still register the provider so users with
395        // mixed-platform dotfiles get a deterministic preflight
396        // failure rather than a silent "no provider for scheme"
397        // mismatch.
398        let provider = crate::secret::KeychainProvider::from_env(Arc::clone(&runner));
399        reg.register(Arc::new(provider));
400        any_enabled = true;
401    }
402
403    if config.providers.secret_tool.enabled {
404        // freedesktop Secret Service (`secret-tool` CLI). Same
405        // cross-platform stance as `keychain` above.
406        let provider = crate::secret::SecretToolProvider::from_env(Arc::clone(&runner));
407        reg.register(Arc::new(provider));
408        any_enabled = true;
409    }
410
411    if any_enabled {
412        Some(Arc::new(reg))
413    } else {
414        None
415    }
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421
422    // Compile-time check: Preprocessor must be object-safe
423    #[allow(dead_code)]
424    fn assert_object_safe(_: &dyn Preprocessor) {}
425
426    #[allow(dead_code)]
427    fn assert_boxable(_: Box<dyn Preprocessor>) {}
428
429    #[test]
430    fn transform_type_eq() {
431        assert_eq!(TransformType::Generative, TransformType::Generative);
432        assert_ne!(TransformType::Generative, TransformType::Opaque);
433    }
434
435    #[test]
436    fn empty_registry() {
437        let registry = PreprocessorRegistry::new();
438        assert!(registry.is_empty());
439        assert_eq!(registry.len(), 0);
440        assert!(!registry.is_preprocessor_file("anything.txt"));
441        assert!(registry.find_for_file("anything.txt").is_none());
442    }
443
444    #[test]
445    fn registry_finds_preprocessor() {
446        let mut registry = PreprocessorRegistry::new();
447        registry.register(Box::new(
448            crate::preprocessing::identity::IdentityPreprocessor::new(),
449        ));
450
451        assert!(!registry.is_empty());
452        assert_eq!(registry.len(), 1);
453        assert!(registry.is_preprocessor_file("config.toml.identity"));
454        assert!(!registry.is_preprocessor_file("config.toml"));
455
456        let found = registry.find_for_file("config.toml.identity").unwrap();
457        assert_eq!(found.name(), "identity");
458    }
459
460    #[test]
461    fn registry_first_match_wins() {
462        let mut registry = PreprocessorRegistry::new();
463        registry.register(Box::new(
464            crate::preprocessing::identity::IdentityPreprocessor::new(),
465        ));
466        // Registering a second one that matches the same extension
467        registry.register(Box::new(
468            crate::preprocessing::identity::IdentityPreprocessor::with_extension("identity"),
469        ));
470
471        let found = registry.find_for_file("test.identity").unwrap();
472        assert_eq!(found.name(), "identity");
473    }
474
475    #[test]
476    fn registry_multiple_different_preprocessors() {
477        let mut registry = PreprocessorRegistry::new();
478        registry.register(Box::new(
479            crate::preprocessing::identity::IdentityPreprocessor::new(),
480        ));
481        registry.register(Box::new(
482            crate::preprocessing::unarchive::UnarchivePreprocessor::new(),
483        ));
484
485        assert_eq!(registry.len(), 2);
486
487        // Each matches its own extension
488        assert!(registry.is_preprocessor_file("config.toml.identity"));
489        assert!(registry.is_preprocessor_file("bin.tar.gz"));
490
491        // Neither matches the other
492        let identity = registry.find_for_file("config.toml.identity").unwrap();
493        assert_eq!(identity.name(), "identity");
494
495        let unarchive = registry.find_for_file("bin.tar.gz").unwrap();
496        assert_eq!(unarchive.name(), "unarchive");
497
498        // Non-preprocessor files still return None
499        assert!(registry.find_for_file("regular.txt").is_none());
500    }
501
502    /// Stand-in `CommandRunner` for `default_registry` tests — the
503    /// preprocessors are constructed but never invoked, so any
504    /// runner that satisfies the trait works.
505    struct NoopRunner;
506    impl crate::datastore::CommandRunner for NoopRunner {
507        fn run(&self, _: &str, _: &[String]) -> Result<crate::datastore::CommandOutput> {
508            unreachable!("default_registry tests do not invoke runners")
509        }
510    }
511
512    fn make_default_registry(
513        preprocessor: crate::config::PreprocessorSection,
514    ) -> PreprocessorRegistry {
515        let env = crate::testing::TempEnvironment::builder().build();
516        let secret = crate::config::SecretSection {
517            enabled: false,
518            providers: crate::config::SecretProvidersSection {
519                pass: crate::config::SecretProviderPass {
520                    enabled: false,
521                    store_dir: String::new(),
522                },
523                op: crate::config::SecretProviderOp { enabled: false },
524                bw: crate::config::SecretProviderBw { enabled: false },
525                sops: crate::config::SecretProviderSops { enabled: false },
526                keychain: crate::config::SecretProviderKeychain { enabled: false },
527                secret_tool: crate::config::SecretProviderSecretTool { enabled: false },
528            },
529        };
530        let runner: std::sync::Arc<dyn crate::datastore::CommandRunner> =
531            std::sync::Arc::new(NoopRunner);
532        let (reg, _) =
533            default_registry(&preprocessor, &secret, env.paths.as_ref(), runner).unwrap();
534        reg
535    }
536
537    fn empty_preprocessor_section() -> crate::config::PreprocessorSection {
538        crate::config::PreprocessorSection {
539            enabled: true,
540            template: crate::config::PreprocessorTemplateSection {
541                extensions: vec!["tmpl".into()],
542                vars: Default::default(),
543                no_reverse: Vec::new(),
544            },
545            age: crate::config::PreprocessorAgeSection {
546                enabled: false,
547                extensions: vec!["age".into()],
548                identity: String::new(),
549            },
550            gpg: crate::config::PreprocessorGpgSection {
551                enabled: false,
552                extensions: vec!["gpg".into(), "asc".into()],
553            },
554        }
555    }
556
557    #[test]
558    fn default_registry_does_not_register_age_or_gpg_when_disabled() {
559        // The opt-in posture from `secrets.lex` §4.1 — without
560        // explicit config flips, neither age nor gpg is registered
561        // and `*.age` / `*.gpg` files in a pack flow through as
562        // regular files (deployed verbatim, no decryption).
563        let reg = make_default_registry(empty_preprocessor_section());
564        assert!(reg.find_for_file("id_ed25519.age").is_none());
565        assert!(reg.find_for_file("Brewfile.gpg").is_none());
566        assert!(reg.find_for_file("notes.asc").is_none());
567        // Sanity: template + unarchive are still registered (the
568        // pre-S3 default set).
569        assert!(reg.find_for_file("config.toml.tmpl").is_some());
570        assert!(reg.find_for_file("bin.tar.gz").is_some());
571    }
572
573    #[test]
574    fn default_registry_registers_age_when_enabled() {
575        let mut pre = empty_preprocessor_section();
576        pre.age.enabled = true;
577        pre.age.identity = "/k/id.txt".into();
578        let reg = make_default_registry(pre);
579        let pp = reg.find_for_file("id_ed25519.age").unwrap();
580        assert_eq!(pp.name(), "age");
581    }
582
583    #[test]
584    fn default_registry_registers_gpg_when_enabled_for_both_extensions() {
585        let mut pre = empty_preprocessor_section();
586        pre.gpg.enabled = true;
587        let reg = make_default_registry(pre);
588        let gpg_pp = reg.find_for_file("Brewfile.gpg").unwrap();
589        assert_eq!(gpg_pp.name(), "gpg");
590        let asc_pp = reg.find_for_file("notes.txt.asc").unwrap();
591        assert_eq!(asc_pp.name(), "gpg");
592    }
593
594    #[test]
595    fn registry_does_not_match_partial_extension() {
596        let mut registry = PreprocessorRegistry::new();
597        registry.register(Box::new(
598            crate::preprocessing::identity::IdentityPreprocessor::new(),
599        ));
600
601        // "identity" alone is not ".identity"
602        assert!(!registry.is_preprocessor_file("identity"));
603        // File without the dot prefix shouldn't match
604        assert!(!registry.is_preprocessor_file("fileidentity"));
605    }
606}