dodot_lib/preprocessing/mod.rs
1//! Preprocessing pipeline — transforms source files before handler dispatch.
2//!
3//! Preprocessors expand files whose version-controlled source differs from
4//! the deployed artifact (templates, plists, encrypted secrets). The
5//! preprocessing phase runs before handler dispatch, producing virtual
6//! entries that downstream handlers (symlink, shell, path, install,
7//! homebrew) consume transparently.
8//!
9//! See `docs/proposals/preprocessing-pipeline.lex` for the full design.
10
11pub mod age;
12pub mod baseline;
13pub mod conflict;
14pub mod divergence;
15pub mod gpg;
16pub mod identity;
17pub mod no_reverse;
18pub mod pipeline;
19pub mod reverse_merge;
20pub mod template;
21pub mod unarchive;
22
23pub use pipeline::PreprocessMode;
24
25use std::path::{Path, PathBuf};
26
27use serde::Serialize;
28
29use crate::fs::Fs;
30use crate::Result;
31
32/// The safety model for a preprocessor's transformation.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
34pub enum TransformType {
35 /// Source generates destination; reversal is heuristic (templates).
36 Generative,
37 /// Source and destination are lossless representations (plists).
38 Representational,
39 /// Source is decoded on deploy; no reverse path (GPG).
40 Opaque,
41}
42
43/// One entry in a per-render secrets sidecar — a span of lines whose
44/// content was produced by a `secret(...)` call, paired with the
45/// reference that produced it.
46///
47/// Lines are 0-indexed and `start..end` is half-open. A single-line
48/// secret occupies line `start` and is encoded as `end == start + 1`
49/// (`start == end` would be an empty range and is never produced).
50/// For Phase S1 every entry is single-line: multi-line secrets are
51/// refused at resolution time per `secrets.lex` §3.4. The `end` field
52/// is preserved in the schema for forward-compatibility but the
53/// renderer never produces `end > start + 1`.
54///
55/// Persisted to disk under `<baseline>.secret.json` (see
56/// `secrets.lex` §3.3); consumed by the dry-run preview rendering
57/// (§7.4) to mask resolved values, and by the burgertocow mask
58/// integration (issue arthur-debert/burgertocow#13) to skip those
59/// lines from the reverse diff.
60#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
61pub struct SecretLineRange {
62 /// First line, 0-indexed, inclusive.
63 pub start: usize,
64 /// One past the last line, 0-indexed, exclusive. `start + 1` for
65 /// a single-line value.
66 pub end: usize,
67 /// The original `secret(...)` argument string, e.g.
68 /// `"op://Personal/DB/password"`. Surfaces in the dry-run
69 /// `[SECRET: <reference>]` placeholder.
70 pub reference: String,
71}
72
73/// A single file produced by a preprocessor's expansion.
74///
75/// Construct ad-hoc via the struct literal; tests commonly use
76/// `ExpandedFile { relative_path, content, ..Default::default() }` to
77/// fill in the optional cache-related fields.
78#[derive(Debug, Clone, Default)]
79pub struct ExpandedFile {
80 /// Path relative to the expansion output (usually just the filename).
81 pub relative_path: PathBuf,
82 /// The file content.
83 pub content: Vec<u8>,
84 /// Whether this entry is a directory marker.
85 pub is_dir: bool,
86 /// Marker-annotated rendered output, populated by Generative
87 /// preprocessors that support cache-backed reverse-diff (templates).
88 /// `None` for Representational, Opaque, or generative preprocessors
89 /// that don't track variable boundaries (e.g. unarchive).
90 ///
91 /// When present, the pipeline persists this string in the baseline
92 /// cache so the clean filter and `dodot transform check` can compute
93 /// reverse-diffs without re-rendering — the latter being important
94 /// because re-rendering can re-trigger secret-provider auth prompts.
95 pub tracked_render: Option<String>,
96 /// SHA-256 of the rendering context (variables, env values resolved
97 /// at render time). `None` for preprocessors that don't have a
98 /// meaningful context concept.
99 ///
100 /// The pipeline pairs this with the source-file hash and rendered
101 /// content hash in the baseline cache. `dodot up` re-rendering and
102 /// install/homebrew sentinels both use the context hash to decide
103 /// when work is stale.
104 pub context_hash: Option<[u8; 32]>,
105 /// Per-render secret-line tracking. Empty when no `secret(...)`
106 /// calls fired (the common case today; will be the common case
107 /// forever for templates that don't use secrets). Populated by
108 /// `TemplatePreprocessor` when a [`crate::secret::SecretRegistry`]
109 /// is wired in. The pipeline persists this as a sidecar JSON
110 /// alongside the baseline.
111 pub secret_line_ranges: Vec<SecretLineRange>,
112 /// Unix mode the rendered datastore file should be chmod'd to
113 /// after the pipeline writes it. `None` (the default) leaves
114 /// the file at whatever umask-derived mode `write_file` produced
115 /// — the pre-S3 behavior for templates / unarchive output.
116 /// Whole-file secret preprocessors (`age`, `gpg`) set this to
117 /// `Some(0o600)` to enforce `secrets.lex` §4.3: rendered
118 /// secrets land 0600 regardless of the source file's mode.
119 /// Ignored when `is_dir` is true.
120 pub deploy_mode: Option<u32>,
121}
122
123/// The core preprocessor abstraction.
124///
125/// Each preprocessor is a small struct that implements this trait.
126/// Preprocessors are stored in a [`PreprocessorRegistry`] and dispatched
127/// by file extension at preprocessing time.
128///
129/// Preprocessors are pure transformers — they read source files and
130/// produce expanded content. Writing to the datastore is handled by the
131/// pipeline, not by individual preprocessors.
132pub trait Preprocessor: Send + Sync {
133 /// Unique name for this preprocessor (e.g. `"template"`, `"plist"`).
134 fn name(&self) -> &str;
135
136 /// The safety model for this transformation.
137 fn transform_type(&self) -> TransformType;
138
139 /// Whether this preprocessor handles a file with the given name.
140 fn matches_extension(&self, filename: &str) -> bool;
141
142 /// Strip the preprocessor extension to get the logical filename.
143 /// e.g. `"config.toml.tmpl"` → `"config.toml"`.
144 fn stripped_name(&self, filename: &str) -> String;
145
146 /// Expand the source file into one or more output files.
147 ///
148 /// For single-file preprocessors (templates): returns one entry.
149 /// For multi-file preprocessors (archives): returns many entries.
150 ///
151 /// The `source` path points to the original file in the pack directory.
152 ///
153 /// # Memory
154 ///
155 /// Expanded content is held fully in memory via [`Vec<u8>`]. This is
156 /// appropriate for dotfile-sized payloads (configs, small scripts,
157 /// small archives). Preprocessors that may handle very large inputs
158 /// (e.g. multi-hundred-MB archives of pre-built toolchains) should
159 /// consider adding a streaming path rather than materialising the
160 /// entire decoded stream at once.
161 fn expand(&self, source: &Path, fs: &dyn Fs) -> Result<Vec<ExpandedFile>>;
162
163 /// Whether this preprocessor participates in the reverse-merge
164 /// pipeline. Reverse-merge is the cache-backed flow that lets
165 /// `dodot transform check` propagate edits from the deployed file
166 /// back into the source by writing a unified diff (and, for
167 /// ambiguous edits, dodot-conflict marker blocks).
168 ///
169 /// Default `false`. Generative preprocessors that emit a
170 /// [`tracked_render`](ExpandedFile::tracked_render) and want their
171 /// sources scanned for unresolved markers before expansion override
172 /// this to `true`. The pipeline uses the flag to:
173 ///
174 /// - Decide whether to run [`crate::preprocessing::conflict::
175 /// ensure_no_unresolved_markers`] on the source bytes before
176 /// calling `expand` — refusing to render a template that already
177 /// carries an unresolved conflict block (otherwise the markers
178 /// would deploy as garbage).
179 /// - Filter the set of files visited by `dodot transform check` to
180 /// those whose preprocessor knows how to write reverse-diffs.
181 ///
182 /// A preprocessor that returns `true` here MUST also populate
183 /// `tracked_render` on its `ExpandedFile`s; otherwise the cache
184 /// layer has no marker stream to feed into burgertocow.
185 fn supports_reverse_merge(&self) -> bool {
186 false
187 }
188}
189
190/// Registry of available preprocessors.
191///
192/// Preprocessors are checked in registration order. The first preprocessor
193/// whose `matches_extension` returns true for a filename wins.
194pub struct PreprocessorRegistry {
195 preprocessors: Vec<Box<dyn Preprocessor>>,
196}
197
198impl PreprocessorRegistry {
199 /// Create an empty registry.
200 pub fn new() -> Self {
201 Self {
202 preprocessors: Vec::new(),
203 }
204 }
205
206 /// Register a preprocessor.
207 pub fn register(&mut self, preprocessor: Box<dyn Preprocessor>) {
208 self.preprocessors.push(preprocessor);
209 }
210
211 /// Find the preprocessor that handles a given filename, if any.
212 pub fn find_for_file(&self, filename: &str) -> Option<&dyn Preprocessor> {
213 self.preprocessors
214 .iter()
215 .find(|p| p.matches_extension(filename))
216 .map(|p| p.as_ref())
217 }
218
219 /// Whether any registered preprocessor handles this filename.
220 pub fn is_preprocessor_file(&self, filename: &str) -> bool {
221 self.find_for_file(filename).is_some()
222 }
223
224 /// Whether the registry has any preprocessors registered.
225 pub fn is_empty(&self) -> bool {
226 self.preprocessors.is_empty()
227 }
228
229 /// Number of registered preprocessors.
230 pub fn len(&self) -> usize {
231 self.preprocessors.len()
232 }
233}
234
235impl Default for PreprocessorRegistry {
236 fn default() -> Self {
237 Self::new()
238 }
239}
240
241/// The default registry used on the normal execution path.
242///
243/// Contains all user-facing preprocessors:
244/// - [`unarchive::UnarchivePreprocessor`] for `.tar.gz` extraction
245/// - [`template::TemplatePreprocessor`] for Jinja2-style templates
246///
247/// The [`identity`] preprocessor is test-only and is intentionally *not*
248/// registered here (it would match innocuous-looking `.identity` files in
249/// user dotfiles).
250///
251/// `secret_config` controls whether the template preprocessor gets a
252/// [`SecretRegistry`] wired in. When `[secret] enabled = true` and at
253/// least one provider is enabled, this function builds the registry,
254/// wires it onto the template preprocessor, and returns it via
255/// `out_secret_registry` so the caller can run preflight checks
256/// (`crate::secret::preflight`) before any rendering begins. When
257/// secrets are disabled, the template preprocessor is built without a
258/// registry and `secret(...)` calls in templates surface a config-
259/// pointing render error.
260pub fn default_registry(
261 preprocessor_config: &crate::config::PreprocessorSection,
262 secret_config: &crate::config::SecretSection,
263 pather: &dyn crate::paths::Pather,
264 command_runner: std::sync::Arc<dyn crate::datastore::CommandRunner>,
265) -> Result<(
266 PreprocessorRegistry,
267 Option<std::sync::Arc<crate::secret::SecretRegistry>>,
268)> {
269 use std::sync::Arc;
270
271 let mut registry = PreprocessorRegistry::new();
272 registry.register(Box::new(unarchive::UnarchivePreprocessor::new()));
273
274 let template_config = &preprocessor_config.template;
275 let mut tpl = template::TemplatePreprocessor::new(
276 template_config.extensions.clone(),
277 template_config.vars.clone(),
278 pather,
279 )?;
280
281 let secret_registry = if secret_config.enabled {
282 build_secret_registry(
283 secret_config,
284 Arc::clone(&command_runner),
285 pather.dotfiles_root(),
286 )
287 } else {
288 None
289 };
290
291 if let Some(sr) = &secret_registry {
292 tpl = tpl.with_secret_registry(Arc::clone(sr));
293 }
294
295 registry.register(Box::new(tpl));
296
297 // Whole-file secret preprocessors per `secrets.lex` §4 — opt-in
298 // via `[preprocessor.age|gpg] enabled = true`. Off by default so
299 // a fresh install never shells out to `age` / `gpg` on random
300 // files. Identity for age comes from config first; an empty
301 // string defers to the runtime defaults (`from_env`).
302 if preprocessor_config.age.enabled {
303 let identity_str = preprocessor_config.age.identity.trim();
304 let pp = if identity_str.is_empty() {
305 age::AgePreprocessor::from_env(Arc::clone(&command_runner))
306 } else {
307 age::AgePreprocessor::new(
308 Arc::clone(&command_runner),
309 std::path::PathBuf::from(identity_str),
310 preprocessor_config.age.extensions.clone(),
311 )
312 };
313 registry.register(Box::new(pp));
314 }
315
316 if preprocessor_config.gpg.enabled {
317 registry.register(Box::new(gpg::GpgPreprocessor::new(
318 Arc::clone(&command_runner),
319 preprocessor_config.gpg.extensions.clone(),
320 )));
321 }
322
323 Ok((registry, secret_registry))
324}
325
326/// Construct a [`crate::secret::SecretRegistry`] from the per-provider
327/// `[secret.providers.*]` config blocks. Each enabled provider is
328/// constructed with the shared `CommandRunner` (so tests can inject a
329/// mock runner) and registered. Returns `None` if no provider is
330/// enabled — the secrets layer treats that case as "secrets feature
331/// fully off" and templates with `secret(...)` calls fail loudly.
332///
333/// `dotfiles_root` is the anchor for relative paths in
334/// provider-specific references — currently used by the `sops`
335/// provider, whose `sops:secrets.yaml#k.p` references resolve
336/// `secrets.yaml` relative to this directory.
337///
338/// Public so `commands::up` can build a single registry from the root
339/// config to run [`crate::secret::preflight`] once per run, before any
340/// per-pack template rendering begins (`secrets.lex` §5.4).
341pub fn build_secret_registry(
342 config: &crate::config::SecretSection,
343 runner: std::sync::Arc<dyn crate::datastore::CommandRunner>,
344 dotfiles_root: &std::path::Path,
345) -> Option<std::sync::Arc<crate::secret::SecretRegistry>> {
346 use std::path::PathBuf;
347 use std::sync::Arc;
348
349 let mut reg = crate::secret::SecretRegistry::new();
350 let mut any_enabled = false;
351
352 if config.providers.pass.enabled {
353 let store_dir = if config.providers.pass.store_dir.is_empty() {
354 // Defer to env / default: PassProvider::from_env reads
355 // $PASSWORD_STORE_DIR or falls back to ~/.password-store.
356 None
357 } else {
358 Some(PathBuf::from(&config.providers.pass.store_dir))
359 };
360 let provider = match store_dir {
361 Some(dir) => crate::secret::PassProvider::new(Arc::clone(&runner), dir),
362 None => crate::secret::PassProvider::from_env(Arc::clone(&runner)),
363 };
364 reg.register(Arc::new(provider));
365 any_enabled = true;
366 }
367
368 if config.providers.op.enabled {
369 let provider = crate::secret::OpProvider::from_env(Arc::clone(&runner));
370 reg.register(Arc::new(provider));
371 any_enabled = true;
372 }
373
374 if config.providers.bw.enabled {
375 let provider = crate::secret::BwProvider::from_env(Arc::clone(&runner));
376 reg.register(Arc::new(provider));
377 any_enabled = true;
378 }
379
380 if config.providers.sops.enabled {
381 // sops anchors relative file paths (`sops:secrets.yaml#k`)
382 // at the dotfiles root, so `.sops.yaml` configuration in the
383 // repo root applies. Absolute paths in references bypass
384 // this anchor.
385 let provider =
386 crate::secret::SopsProvider::new(Arc::clone(&runner), dotfiles_root.to_path_buf());
387 reg.register(Arc::new(provider));
388 any_enabled = true;
389 }
390
391 if config.providers.keychain.enabled {
392 // macOS Keychain (`security` CLI). On non-macOS hosts the
393 // probe surfaces NotInstalled with a "use secret-tool"
394 // pointer; we still register the provider so users with
395 // mixed-platform dotfiles get a deterministic preflight
396 // failure rather than a silent "no provider for scheme"
397 // mismatch.
398 let provider = crate::secret::KeychainProvider::from_env(Arc::clone(&runner));
399 reg.register(Arc::new(provider));
400 any_enabled = true;
401 }
402
403 if config.providers.secret_tool.enabled {
404 // freedesktop Secret Service (`secret-tool` CLI). Same
405 // cross-platform stance as `keychain` above.
406 let provider = crate::secret::SecretToolProvider::from_env(Arc::clone(&runner));
407 reg.register(Arc::new(provider));
408 any_enabled = true;
409 }
410
411 if any_enabled {
412 Some(Arc::new(reg))
413 } else {
414 None
415 }
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 // Compile-time check: Preprocessor must be object-safe
423 #[allow(dead_code)]
424 fn assert_object_safe(_: &dyn Preprocessor) {}
425
426 #[allow(dead_code)]
427 fn assert_boxable(_: Box<dyn Preprocessor>) {}
428
429 #[test]
430 fn transform_type_eq() {
431 assert_eq!(TransformType::Generative, TransformType::Generative);
432 assert_ne!(TransformType::Generative, TransformType::Opaque);
433 }
434
435 #[test]
436 fn empty_registry() {
437 let registry = PreprocessorRegistry::new();
438 assert!(registry.is_empty());
439 assert_eq!(registry.len(), 0);
440 assert!(!registry.is_preprocessor_file("anything.txt"));
441 assert!(registry.find_for_file("anything.txt").is_none());
442 }
443
444 #[test]
445 fn registry_finds_preprocessor() {
446 let mut registry = PreprocessorRegistry::new();
447 registry.register(Box::new(
448 crate::preprocessing::identity::IdentityPreprocessor::new(),
449 ));
450
451 assert!(!registry.is_empty());
452 assert_eq!(registry.len(), 1);
453 assert!(registry.is_preprocessor_file("config.toml.identity"));
454 assert!(!registry.is_preprocessor_file("config.toml"));
455
456 let found = registry.find_for_file("config.toml.identity").unwrap();
457 assert_eq!(found.name(), "identity");
458 }
459
460 #[test]
461 fn registry_first_match_wins() {
462 let mut registry = PreprocessorRegistry::new();
463 registry.register(Box::new(
464 crate::preprocessing::identity::IdentityPreprocessor::new(),
465 ));
466 // Registering a second one that matches the same extension
467 registry.register(Box::new(
468 crate::preprocessing::identity::IdentityPreprocessor::with_extension("identity"),
469 ));
470
471 let found = registry.find_for_file("test.identity").unwrap();
472 assert_eq!(found.name(), "identity");
473 }
474
475 #[test]
476 fn registry_multiple_different_preprocessors() {
477 let mut registry = PreprocessorRegistry::new();
478 registry.register(Box::new(
479 crate::preprocessing::identity::IdentityPreprocessor::new(),
480 ));
481 registry.register(Box::new(
482 crate::preprocessing::unarchive::UnarchivePreprocessor::new(),
483 ));
484
485 assert_eq!(registry.len(), 2);
486
487 // Each matches its own extension
488 assert!(registry.is_preprocessor_file("config.toml.identity"));
489 assert!(registry.is_preprocessor_file("bin.tar.gz"));
490
491 // Neither matches the other
492 let identity = registry.find_for_file("config.toml.identity").unwrap();
493 assert_eq!(identity.name(), "identity");
494
495 let unarchive = registry.find_for_file("bin.tar.gz").unwrap();
496 assert_eq!(unarchive.name(), "unarchive");
497
498 // Non-preprocessor files still return None
499 assert!(registry.find_for_file("regular.txt").is_none());
500 }
501
502 /// Stand-in `CommandRunner` for `default_registry` tests — the
503 /// preprocessors are constructed but never invoked, so any
504 /// runner that satisfies the trait works.
505 struct NoopRunner;
506 impl crate::datastore::CommandRunner for NoopRunner {
507 fn run(&self, _: &str, _: &[String]) -> Result<crate::datastore::CommandOutput> {
508 unreachable!("default_registry tests do not invoke runners")
509 }
510 }
511
512 fn make_default_registry(
513 preprocessor: crate::config::PreprocessorSection,
514 ) -> PreprocessorRegistry {
515 let env = crate::testing::TempEnvironment::builder().build();
516 let secret = crate::config::SecretSection {
517 enabled: false,
518 providers: crate::config::SecretProvidersSection {
519 pass: crate::config::SecretProviderPass {
520 enabled: false,
521 store_dir: String::new(),
522 },
523 op: crate::config::SecretProviderOp { enabled: false },
524 bw: crate::config::SecretProviderBw { enabled: false },
525 sops: crate::config::SecretProviderSops { enabled: false },
526 keychain: crate::config::SecretProviderKeychain { enabled: false },
527 secret_tool: crate::config::SecretProviderSecretTool { enabled: false },
528 },
529 };
530 let runner: std::sync::Arc<dyn crate::datastore::CommandRunner> =
531 std::sync::Arc::new(NoopRunner);
532 let (reg, _) =
533 default_registry(&preprocessor, &secret, env.paths.as_ref(), runner).unwrap();
534 reg
535 }
536
537 fn empty_preprocessor_section() -> crate::config::PreprocessorSection {
538 crate::config::PreprocessorSection {
539 enabled: true,
540 template: crate::config::PreprocessorTemplateSection {
541 extensions: vec!["tmpl".into()],
542 vars: Default::default(),
543 no_reverse: Vec::new(),
544 },
545 age: crate::config::PreprocessorAgeSection {
546 enabled: false,
547 extensions: vec!["age".into()],
548 identity: String::new(),
549 },
550 gpg: crate::config::PreprocessorGpgSection {
551 enabled: false,
552 extensions: vec!["gpg".into(), "asc".into()],
553 },
554 }
555 }
556
557 #[test]
558 fn default_registry_does_not_register_age_or_gpg_when_disabled() {
559 // The opt-in posture from `secrets.lex` §4.1 — without
560 // explicit config flips, neither age nor gpg is registered
561 // and `*.age` / `*.gpg` files in a pack flow through as
562 // regular files (deployed verbatim, no decryption).
563 let reg = make_default_registry(empty_preprocessor_section());
564 assert!(reg.find_for_file("id_ed25519.age").is_none());
565 assert!(reg.find_for_file("Brewfile.gpg").is_none());
566 assert!(reg.find_for_file("notes.asc").is_none());
567 // Sanity: template + unarchive are still registered (the
568 // pre-S3 default set).
569 assert!(reg.find_for_file("config.toml.tmpl").is_some());
570 assert!(reg.find_for_file("bin.tar.gz").is_some());
571 }
572
573 #[test]
574 fn default_registry_registers_age_when_enabled() {
575 let mut pre = empty_preprocessor_section();
576 pre.age.enabled = true;
577 pre.age.identity = "/k/id.txt".into();
578 let reg = make_default_registry(pre);
579 let pp = reg.find_for_file("id_ed25519.age").unwrap();
580 assert_eq!(pp.name(), "age");
581 }
582
583 #[test]
584 fn default_registry_registers_gpg_when_enabled_for_both_extensions() {
585 let mut pre = empty_preprocessor_section();
586 pre.gpg.enabled = true;
587 let reg = make_default_registry(pre);
588 let gpg_pp = reg.find_for_file("Brewfile.gpg").unwrap();
589 assert_eq!(gpg_pp.name(), "gpg");
590 let asc_pp = reg.find_for_file("notes.txt.asc").unwrap();
591 assert_eq!(asc_pp.name(), "gpg");
592 }
593
594 #[test]
595 fn registry_does_not_match_partial_extension() {
596 let mut registry = PreprocessorRegistry::new();
597 registry.register(Box::new(
598 crate::preprocessing::identity::IdentityPreprocessor::new(),
599 ));
600
601 // "identity" alone is not ".identity"
602 assert!(!registry.is_preprocessor_file("identity"));
603 // File without the dot prefix shouldn't match
604 assert!(!registry.is_preprocessor_file("fileidentity"));
605 }
606}