Skip to main content

dodot_lib/preprocessing/
pipeline.rs

1//! Preprocessing pipeline — partitions, expands, and merges entries.
2//!
3//! This module contains the core pipeline function that runs between
4//! directory walking and rule matching. It identifies preprocessor files,
5//! expands them, writes results to the datastore, checks for collisions,
6//! and produces virtual entries for the handler pipeline.
7
8use std::collections::HashMap;
9use std::path::{Component, Path, PathBuf};
10use std::sync::Arc;
11
12use tracing::{debug, info};
13
14use crate::datastore::DataStore;
15use crate::fs::Fs;
16use crate::packs::Pack;
17use crate::paths::Pather;
18use crate::preprocessing::baseline::{cache_filename_for, hex_sha256, Baseline};
19use crate::preprocessing::divergence::DivergenceState;
20use crate::preprocessing::PreprocessorRegistry;
21use crate::rules::PackEntry;
22use crate::{DodotError, Result};
23
24/// Execution envelope for the preprocessing pipeline.
25///
26/// `secrets.lex` §7.4 ("Auth Fatigue and Passive Commands") draws a
27/// hard line between two envelopes:
28///
29/// - **Active** (`dodot up`): evaluates templates, batches `secret()`
30///   calls per provider, prompts for auth once per run, writes
31///   rendered files and baselines to disk.
32/// - **Passive** (`dodot status`, `dodot up --dry-run`): MUST NOT
33///   evaluate templates. Drift detection runs entirely off the
34///   baseline cache. No provider calls. No datastore writes. No
35///   baseline writes.
36///
37/// This enum is the single boolean the pipeline gates on. Active is
38/// the existing behavior; Passive is the §7.4-compliant read-only
39/// path. See issue #121.
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum PreprocessMode {
42    /// Run preprocessors, write rendered outputs to the datastore,
43    /// write baselines to the cache. The original `dodot up` path.
44    Active,
45    /// Read everything from the baseline cache. Skip preprocessor
46    /// expansion (no provider calls), skip datastore writes, skip
47    /// baseline writes. For preprocessor entries with no baseline
48    /// yet, surface a passthrough placeholder so callers can render
49    /// "unknown — run `dodot up` first" without falling through to
50    /// template evaluation.
51    Passive,
52}
53
54/// Validate that a preprocessor-produced path is safe to materialise in
55/// the datastore: relative, no root/prefix/parent-dir components, and
56/// not effectively empty.
57///
58/// Malicious or malformed preprocessor output (tar-slip, absolute paths,
59/// `..` segments) can escape the pack namespace and overwrite arbitrary
60/// files. Empty paths (or paths made up only of `.` components) are
61/// rejected because they would silently fail at the datastore layer with
62/// an opaque error — here we produce a clean diagnostic naming the
63/// preprocessor and source file.
64fn validate_safe_relative_path(path: &Path, preprocessor: &str, source_file: &Path) -> Result<()> {
65    let mut has_normal = false;
66    for component in path.components() {
67        match component {
68            Component::Normal(_) => has_normal = true,
69            Component::CurDir => {}
70            Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
71                return Err(DodotError::PreprocessorError {
72                    preprocessor: preprocessor.into(),
73                    source_file: source_file.to_path_buf(),
74                    message: format!(
75                        "unsafe path in preprocessor output: {} (absolute or contains `..`)",
76                        path.display()
77                    ),
78                });
79            }
80        }
81    }
82    if !has_normal {
83        return Err(DodotError::PreprocessorError {
84            preprocessor: preprocessor.into(),
85            source_file: source_file.to_path_buf(),
86            message: format!(
87                "preprocessor produced an empty output path (\"{}\"). This usually means a file like \
88                 `.tmpl` or `.identity` has no stem after stripping the preprocessor extension — \
89                 rename the source file so that it has a non-empty name after stripping.",
90                path.display()
91            ),
92        });
93    }
94    Ok(())
95}
96
97/// Normalise a validated relative path by dropping `CurDir` components,
98/// so that `./foo` and `foo` are treated as the same virtual path for
99/// collision detection. Only call after [`validate_safe_relative_path`].
100fn normalize_relative(path: &Path) -> PathBuf {
101    let mut out = PathBuf::new();
102    for component in path.components() {
103        if let Component::Normal(n) = component {
104            out.push(n);
105        }
106    }
107    out
108}
109
110/// The result of preprocessing a pack's file entries.
111#[derive(Debug)]
112pub struct PreprocessResult {
113    /// Entries that were NOT preprocessed (pass through unchanged).
114    pub regular_entries: Vec<PackEntry>,
115    /// Virtual entries created by preprocessing (point to datastore files).
116    pub virtual_entries: Vec<PackEntry>,
117    /// Maps virtual entry absolute_path → original source path in pack.
118    pub source_map: HashMap<PathBuf, PathBuf>,
119    /// Maps virtual entry absolute_path → in-memory rendered bytes.
120    /// Populated for every virtual entry the pipeline produces, in
121    /// both Active and Passive modes (Passive sources the bytes from
122    /// `baseline.rendered_content`). Handlers that need the rendered
123    /// content for sentinel hashing (`install`, `homebrew`) consult
124    /// this map first and fall back to disk read for non-template
125    /// files. Without this, Passive callers — where the rendered
126    /// file isn't on disk — couldn't produce correct sentinels for
127    /// templated install scripts or Brewfiles. See issue #121.
128    pub rendered_bytes: HashMap<PathBuf, Arc<[u8]>>,
129    /// Files whose deployed bytes diverged from the cached baseline and
130    /// were therefore preserved instead of being overwritten. Empty
131    /// outside of `dodot up` runs that pass `force = false` and have a
132    /// baseline available. Surfaced to the user as warnings — see
133    /// `docs/proposals/preprocessing-pipeline.lex` §6.4.
134    pub skipped: Vec<SkippedRender>,
135}
136
137/// One file the pipeline refused to overwrite because its deployed
138/// bytes diverged from the cached render.
139///
140/// `dodot up` records these so the caller can warn the user that their
141/// edits were preserved. Resolution paths are `dodot transform check`
142/// (auto-merge via the clean filter) or `dodot up --force` (overwrite).
143#[derive(Debug, Clone)]
144pub struct SkippedRender {
145    /// Pack name (matches `Pack::name`, the on-disk directory name).
146    pub pack: String,
147    /// Virtual relative path inside the pack (post-strip), e.g.
148    /// `config.toml` for a source `config.toml.tmpl`.
149    pub virtual_relative: PathBuf,
150    /// Absolute path of the deployed file we preserved.
151    pub deployed_path: PathBuf,
152    /// Which divergence state we observed. Always `OutputChanged` or
153    /// `BothChanged` — the other states never trigger a skip.
154    pub state: DivergenceState,
155}
156
157impl PreprocessResult {
158    /// Create a passthrough result where all entries are regular (no preprocessing).
159    pub fn passthrough(entries: Vec<PackEntry>) -> Self {
160        Self {
161            regular_entries: entries,
162            virtual_entries: Vec::new(),
163            source_map: HashMap::new(),
164            rendered_bytes: HashMap::new(),
165            skipped: Vec::new(),
166        }
167    }
168
169    /// Return all entries (regular + virtual) merged into one list, sorted by relative path.
170    pub fn merged_entries(&self) -> Vec<PackEntry> {
171        let mut all = Vec::with_capacity(self.regular_entries.len() + self.virtual_entries.len());
172        all.extend(self.regular_entries.iter().cloned());
173        all.extend(self.virtual_entries.iter().cloned());
174        all.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
175        all
176    }
177}
178
179/// The handler name used for preprocessor-expanded files in the datastore.
180const PREPROCESSED_HANDLER: &str = "preprocessed";
181
182/// Result of checking whether the deployed file diverges from the
183/// cached baseline. Used by [`preprocess_pack`] to decide whether to
184/// overwrite or preserve the user's edits.
185enum DivergenceCheck {
186    /// No baseline, no deployed file, or content matches — proceed
187    /// with the normal write.
188    Proceed,
189    /// Deployed bytes diverge from the baseline. Skip the write to
190    /// preserve user edits; surface a warning to the caller.
191    Skip {
192        state: DivergenceState,
193        deployed_path: PathBuf,
194    },
195}
196
197/// Compare the prospective deployed file against the cached baseline.
198///
199/// Returns [`DivergenceCheck::Skip`] when the deployed bytes have
200/// changed since the last successful render — that is the case where
201/// re-rendering would silently destroy a user edit (see
202/// `docs/proposals/preprocessing-pipeline.lex` §6.4).
203///
204/// "Define stale-vs-new from file content, not the runtime
205/// environment": this check operates purely on bytes (source + deployed
206/// hash comparisons against the baseline). Env-var rotations are
207/// intentionally invisible here — users who change a referenced env var
208/// pick up the new value via `dodot up --force`.
209fn check_divergence(
210    fs: &dyn Fs,
211    paths: &dyn Pather,
212    pack_name: &str,
213    virtual_relative: &Path,
214    source_path: &Path,
215) -> Result<DivergenceCheck> {
216    let cache_filename = cache_filename_for(virtual_relative);
217    let baseline =
218        match Baseline::load(fs, paths, pack_name, PREPROCESSED_HANDLER, &cache_filename)? {
219            Some(b) => b,
220            // First-time deploy: no baseline to compare against. Writing
221            // is correct here — there's nothing to overwrite.
222            None => return Ok(DivergenceCheck::Proceed),
223        };
224
225    let deployed_path = paths
226        .handler_data_dir(pack_name, PREPROCESSED_HANDLER)
227        .join(virtual_relative);
228    if !fs.exists(&deployed_path) {
229        // Baseline says we deployed once, but the user (or some other
230        // tool) removed the deployed file. Treat as a fresh deploy —
231        // there's nothing to preserve.
232        return Ok(DivergenceCheck::Proceed);
233    }
234
235    let deployed_bytes = fs.read_file(&deployed_path)?;
236    if hex_sha256(&deployed_bytes) == baseline.rendered_hash {
237        return Ok(DivergenceCheck::Proceed);
238    }
239
240    // Deployed file diverges. Distinguish OutputChanged from BothChanged
241    // for a sharper warning. A read failure on the source is treated as
242    // "source unchanged" — the safer assumption when we can't tell.
243    let source_changed = match fs.read_file(source_path) {
244        Ok(bytes) => hex_sha256(&bytes) != baseline.source_hash,
245        Err(_) => false,
246    };
247    let state = if source_changed {
248        DivergenceState::BothChanged
249    } else {
250        DivergenceState::OutputChanged
251    };
252
253    Ok(DivergenceCheck::Skip {
254        state,
255        deployed_path,
256    })
257}
258
259/// Run the preprocessing pipeline for a pack's file entries.
260///
261/// 1. Partition entries into preprocessor files vs regular files.
262/// 2. **In `PreprocessMode::Active`** (real `dodot up` runs): for each
263///    preprocessor file, expand, write results to datastore (unless the
264///    deployed file has diverged from the cached baseline — see step 5),
265///    write the baseline cache record.
266/// 3. Create virtual `PackEntry`s pointing to the datastore files.
267/// 4. Check for collisions between virtual and regular entries.
268/// 5. **Divergence guard** (Active only): unless `force` is `true`,
269///    compare the prospective deployed file against the cached baseline
270///    before overwriting. When the deployed bytes have changed (the
271///    user edited the deployed file directly), skip the write and
272///    record a [`SkippedRender`] so the caller can warn the user. See
273///    `docs/proposals/preprocessing-pipeline.lex` §6.4.
274/// 6. **In `PreprocessMode::Passive`** (`dodot status`, `up --dry-run`):
275///    skip every disk-mutating step. Sources are never read for marker
276///    scans; preprocessors are never invoked (no provider calls); the
277///    datastore is not touched. Virtual entries are still produced so
278///    the rest of the planner can compute intents — their bytes come
279///    from `baseline.rendered_content` when a baseline exists.
280///    First-time pack templates with no baseline still surface a
281///    placeholder virtual entry (so `dodot status` can render them as
282///    "pending" under the stripped name) but with empty
283///    `rendered_bytes`. Handlers that need rendered content for
284///    sentinel hashing (`install`, `homebrew`) skip intent generation
285///    for those placeholders rather than erroring out — the next real
286///    `dodot up` plans them normally. See [`PreprocessMode`] and
287///    `docs/proposals/secrets.lex` §7.4.
288/// 7. Return the result for merging into the handler pipeline.
289///
290/// Set `force = true` to bypass the divergence guard. Surfaces as
291/// `dodot up --force` in the CLI; needed when the user knows they want
292/// to overwrite a divergent deployed file (e.g. after rotating an env
293/// var that a template references). Ignored in `Passive` mode (no
294/// writes happen there at all).
295#[allow(clippy::too_many_arguments)] // pipeline core: every parameter is load-bearing
296pub fn preprocess_pack(
297    entries: Vec<PackEntry>,
298    registry: &PreprocessorRegistry,
299    pack: &Pack,
300    fs: &dyn Fs,
301    datastore: &dyn DataStore,
302    paths: &dyn Pather,
303    mode: PreprocessMode,
304    force: bool,
305) -> Result<PreprocessResult> {
306    let mut regular_entries = Vec::new();
307    let mut preprocessor_entries = Vec::new();
308
309    // Phase 1: Partition
310    for entry in entries {
311        let filename = entry
312            .relative_path
313            .file_name()
314            .map(|n| n.to_string_lossy().to_string())
315            .unwrap_or_default();
316
317        if !entry.is_dir && registry.is_preprocessor_file(&filename) {
318            preprocessor_entries.push(entry);
319        } else {
320            regular_entries.push(entry);
321        }
322    }
323
324    debug!(
325        pack = %pack.name,
326        preprocessor = preprocessor_entries.len(),
327        regular = regular_entries.len(),
328        "partitioned entries"
329    );
330
331    if preprocessor_entries.is_empty() {
332        return Ok(PreprocessResult {
333            regular_entries,
334            virtual_entries: Vec::new(),
335            source_map: HashMap::new(),
336            rendered_bytes: HashMap::new(),
337            skipped: Vec::new(),
338        });
339    }
340
341    // Passive mode: read everything from the baseline cache. Skip
342    // template evaluation entirely (no provider calls), skip
343    // datastore writes, skip baseline writes. See `PreprocessMode`.
344    if mode == PreprocessMode::Passive {
345        return preprocess_pack_passive(
346            preprocessor_entries,
347            regular_entries,
348            registry,
349            pack,
350            fs,
351            paths,
352        );
353    }
354
355    // Phase 2 & 3: Expand and create virtual entries
356    let mut virtual_entries = Vec::new();
357    let mut source_map = HashMap::new();
358    let mut rendered_bytes: HashMap<PathBuf, Arc<[u8]>> = HashMap::new();
359    let mut skipped: Vec<SkippedRender> = Vec::new();
360
361    // Tracks claimed paths for collision detection. Seeded with regular
362    // entries; virtual entries are added as they're created so two
363    // preprocessors can't both produce the same virtual path (e.g.
364    // `config.toml.identity` and `config.toml.tmpl` both expanding to
365    // `config.toml`).
366    let mut claimed_paths: std::collections::HashSet<PathBuf> = regular_entries
367        .iter()
368        .map(|e| e.relative_path.clone())
369        .collect();
370
371    for entry in &preprocessor_entries {
372        let filename = entry
373            .relative_path
374            .file_name()
375            .map(|n| n.to_string_lossy().to_string())
376            .unwrap_or_default();
377
378        let preprocessor = registry
379            .find_for_file(&filename)
380            .expect("already checked in partition");
381
382        info!(
383            pack = %pack.name,
384            preprocessor = preprocessor.name(),
385            file = %filename,
386            "expanding"
387        );
388
389        // Safety gate: refuse to expand a source carrying unresolved
390        // dodot-conflict markers. Otherwise the markers would render
391        // verbatim through the template engine and deploy as broken
392        // config. Gated on `supports_reverse_merge` so non-tracking
393        // preprocessors (unarchive, identity) don't pay the read cost
394        // — their sources can't naturally carry the marker token.
395        //
396        // Lossy UTF-8 conversion: we read raw bytes and decode lossily
397        // so a non-UTF-8 source for a reverse-merge-capable
398        // preprocessor still gets a clean scan rather than failing
399        // with a generic UTF-8 decode error. The marker token is
400        // ASCII, so the lossy decode preserves it. Templates today
401        // are always UTF-8 in practice; this is defence-in-depth for
402        // future preprocessors.
403        // See preprocessing-pipeline.lex §6.3.
404        if preprocessor.supports_reverse_merge() {
405            let source_bytes = fs.read_file(&entry.absolute_path)?;
406            let source_str = String::from_utf8_lossy(&source_bytes);
407            crate::preprocessing::conflict::ensure_no_unresolved_markers(
408                &source_str,
409                &entry.absolute_path,
410            )?;
411        }
412
413        // Expand the source file
414        let expanded_files = preprocessor.expand(&entry.absolute_path, fs)?;
415
416        for expanded in expanded_files {
417            // Reject unsafe paths from the preprocessor (tar-slip,
418            // absolute paths, parent-dir escapes) before any disk write.
419            validate_safe_relative_path(
420                &expanded.relative_path,
421                preprocessor.name(),
422                &entry.absolute_path,
423            )?;
424
425            // Compute the virtual relative path.
426            // If the source was in a subdirectory (e.g., "subdir/config.toml.identity"),
427            // the virtual entry should preserve the parent (e.g., "subdir/config.toml").
428            let virtual_relative = if let Some(parent) = entry.relative_path.parent() {
429                if parent == Path::new("") {
430                    expanded.relative_path.clone()
431                } else {
432                    parent.join(&expanded.relative_path)
433                }
434            } else {
435                expanded.relative_path.clone()
436            };
437
438            // Defense-in-depth: validate the joined path too (parent
439            // could only come from the pack scanner, but re-check).
440            validate_safe_relative_path(
441                &virtual_relative,
442                preprocessor.name(),
443                &entry.absolute_path,
444            )?;
445
446            // Normalise `./foo` and `foo` to the same canonical form, so
447            // that collision detection and downstream comparisons don't
448            // silently diverge from the datastore's own normalisation.
449            let virtual_relative = normalize_relative(&virtual_relative);
450
451            // Phase 4: Collision check (against both regular entries and
452            // previously-expanded virtual entries)
453            if claimed_paths.contains(&virtual_relative) {
454                return Err(DodotError::PreprocessorCollision {
455                    pack: pack.name.clone(),
456                    source_file: filename.clone(),
457                    expanded_name: virtual_relative.to_string_lossy().into_owned(),
458                });
459            }
460
461            // Write expanded content to datastore, preserving directory
462            // structure. Directories get mkdir'd; files get their content
463            // written. `write_rendered_file` creates any needed parent
464            // directories.
465            //
466            // Divergence guard (§6.4): for tracked-render preprocessors,
467            // check whether the deployed file has diverged from the
468            // cached baseline before overwriting. If it has, skip the
469            // *write* and record a SkippedRender so the caller can warn
470            // the user. `force = true` bypasses the guard. See
471            // `check_divergence` for the byte-level rule.
472            //
473            // The render itself (`preprocessor.expand` above) has
474            // already run by this point — moving the divergence check
475            // ahead of expansion would require knowing every output
476            // path before producing any of them, which the preprocessor
477            // contract doesn't expose. The cost of the spurious render
478            // is the cycles burned plus any one-shot side effects in
479            // expand (e.g. secret-provider prompts for templates that
480            // resolve `{{ secrets.X }}`). For divergent files this
481            // means the prompt fires even though the rendered bytes
482            // are immediately discarded; users who want to avoid that
483            // should resolve the divergence (`dodot transform check`)
484            // before the next `dodot up`. Tracked here for §6.4
485            // follow-up; not blocking the divergence-preservation
486            // contract this guard exists to keep.
487            //
488            // The guard fires regardless of `write_baselines` — it's a
489            // read-only check against the existing cache, and read-only
490            // callers (`dodot status`) need it just as much as `dodot
491            // up` does. Without this, status would re-render and
492            // overwrite the user's edited deployed file silently.
493            let mut skip_path: Option<PathBuf> = None;
494            if !force && !expanded.is_dir && expanded.tracked_render.is_some() {
495                match check_divergence(
496                    fs,
497                    paths,
498                    &pack.name,
499                    &virtual_relative,
500                    &entry.absolute_path,
501                )? {
502                    DivergenceCheck::Proceed => {}
503                    DivergenceCheck::Skip {
504                        state,
505                        deployed_path,
506                    } => {
507                        info!(
508                            pack = %pack.name,
509                            file = %virtual_relative.display(),
510                            ?state,
511                            "preserving divergent deployed file (skipping write)"
512                        );
513                        skipped.push(SkippedRender {
514                            pack: pack.name.clone(),
515                            virtual_relative: virtual_relative.clone(),
516                            deployed_path: deployed_path.clone(),
517                            state,
518                        });
519                        skip_path = Some(deployed_path);
520                    }
521                }
522            }
523            let was_skipped = skip_path.is_some();
524
525            let datastore_path = if let Some(p) = skip_path {
526                p
527            } else if expanded.is_dir {
528                datastore.write_rendered_dir(
529                    &pack.name,
530                    PREPROCESSED_HANDLER,
531                    &virtual_relative.to_string_lossy(),
532                )?
533            } else {
534                datastore.write_rendered_file(
535                    &pack.name,
536                    PREPROCESSED_HANDLER,
537                    &virtual_relative.to_string_lossy(),
538                    &expanded.content,
539                )?
540            };
541
542            debug!(
543                pack = %pack.name,
544                virtual_path = %virtual_relative.display(),
545                datastore_path = %datastore_path.display(),
546                is_dir = expanded.is_dir,
547                skipped = was_skipped,
548                "wrote expanded entry"
549            );
550
551            // Persist a baseline record so future `dodot transform
552            // check` / clean-filter calls can detect drift without
553            // re-rendering. Only write when:
554            //   - the entry is a file (directory entries from archive
555            //     preprocessors carry no rendered content),
556            //   - the preprocessor produced a tracked render (i.e. it's
557            //     a generative-with-tracking preprocessor, currently
558            //     just templates). Plain Generative preprocessors that
559            //     don't support reverse-merge (unarchive) skip the
560            //     baseline because the cache is only meaningful when
561            //     paired with burgertocow tracking, AND
562            //   - the divergence guard didn't skip the write (otherwise
563            //     we'd update the baseline to match a render that never
564            //     hit disk, breaking future divergence detection).
565            //
566            // Mode-gating happens at the function boundary: this whole
567            // branch only runs in `PreprocessMode::Active`. Passive
568            // commands take the early-return at the top of the
569            // function and never reach this code.
570            if let (false, Some(tracked), false) = (
571                expanded.is_dir,
572                expanded.tracked_render.as_deref(),
573                was_skipped,
574            ) {
575                let cache_filename = cache_filename_for(&virtual_relative);
576                let source_bytes = fs.read_file(&entry.absolute_path)?;
577                let baseline = Baseline::build(
578                    &entry.absolute_path,
579                    &expanded.content,
580                    &source_bytes,
581                    Some(tracked),
582                    expanded.context_hash.as_ref(),
583                );
584                if let Err(err) =
585                    baseline.write(fs, paths, &pack.name, PREPROCESSED_HANDLER, &cache_filename)
586                {
587                    // Baseline write failures are reported but not
588                    // fatal: the deployment itself succeeded, and a
589                    // missing baseline only degrades the reverse-merge
590                    // experience (we'll re-baseline next `up`).
591                    debug!(
592                        pack = %pack.name,
593                        file = %cache_filename,
594                        error = %err,
595                        "baseline write failed (non-fatal)"
596                    );
597                } else {
598                    debug!(
599                        pack = %pack.name,
600                        file = %cache_filename,
601                        "baseline written"
602                    );
603                }
604            }
605
606            claimed_paths.insert(virtual_relative.clone());
607            source_map.insert(datastore_path.clone(), entry.absolute_path.clone());
608            // Stash the rendered bytes for downstream handlers
609            // (install/homebrew sentinel hashing) that would
610            // otherwise read them back off disk. Skipped renders
611            // (divergence guard fired) carry the *preserved deployed*
612            // bytes instead — that matches the deployed file the user
613            // is keeping, which is what the next sentinel should
614            // commit to. Directories carry no bytes.
615            if !expanded.is_dir {
616                let bytes: Arc<[u8]> = if was_skipped {
617                    // Read the preserved deployed file. If the read
618                    // fails (race / permissions), fall back to the
619                    // freshly-rendered bytes so the handler still
620                    // gets a value — this only affects the sentinel,
621                    // and the divergence warning has already surfaced.
622                    fs.read_file(&datastore_path)
623                        .map(Arc::from)
624                        .unwrap_or_else(|_| Arc::from(expanded.content.clone()))
625                } else {
626                    Arc::from(expanded.content.clone())
627                };
628                rendered_bytes.insert(datastore_path.clone(), bytes);
629            }
630
631            virtual_entries.push(PackEntry {
632                relative_path: virtual_relative,
633                absolute_path: datastore_path,
634                is_dir: expanded.is_dir,
635            });
636        }
637    }
638
639    info!(
640        pack = %pack.name,
641        virtual_count = virtual_entries.len(),
642        "preprocessing complete"
643    );
644
645    Ok(PreprocessResult {
646        regular_entries,
647        virtual_entries,
648        source_map,
649        rendered_bytes,
650        skipped,
651    })
652}
653
654/// `Passive` half of [`preprocess_pack`].
655///
656/// Walks the same set of preprocessor entries the Active path would
657/// have, but never invokes a preprocessor. For each entry, computes
658/// the would-be virtual relative path via `Preprocessor::stripped_name`.
659/// Two outcomes:
660///
661/// - **Baseline exists** (the file was rendered on a previous `up`):
662///   builds a virtual entry pointing at the would-be datastore
663///   location with `rendered_bytes` sourced from
664///   `baseline.rendered_content`. Runs the read-only divergence
665///   check so callers (status's `Health::Preserved` row) still see
666///   skipped-render rows for divergent deployed files.
667/// - **No baseline** (first-time pack template, never `up`'d):
668///   surfaces a placeholder virtual entry under the stripped name,
669///   with empty `rendered_bytes`. Status renders this as "pending"
670///   under the logical name (`config.toml` rather than the source
671///   `config.toml.tmpl`); handlers that need rendered content for
672///   sentinel hashing (install, homebrew) skip intent generation
673///   for these placeholders rather than crashing. The next real
674///   `dodot up` populates the baseline and plans intents normally.
675///
676/// Source files are not read (no marker scan); the datastore is
677/// not written; the baseline cache is not written.
678///
679/// This contract is what `secrets.lex` §7.4 demands: `dodot status`
680/// and `dodot up --dry-run` MUST NOT trigger template evaluation,
681/// MUST NOT surface provider auth prompts, and MUST NOT mutate disk
682/// state. See issue #121.
683///
684/// Limitation: this assumes a 1:1 source→virtual relationship via
685/// `stripped_name`. That holds for templates (the only shipped
686/// generative-with-tracking preprocessor) and identity-style
687/// preprocessors. Multi-output preprocessors like unarchive cannot
688/// faithfully be passively previewed; if one is added later, this
689/// function should fall back to skipping such entries (which it does
690/// today, since they have no baseline).
691fn preprocess_pack_passive(
692    preprocessor_entries: Vec<PackEntry>,
693    regular_entries: Vec<PackEntry>,
694    registry: &PreprocessorRegistry,
695    pack: &Pack,
696    fs: &dyn Fs,
697    paths: &dyn Pather,
698) -> Result<PreprocessResult> {
699    let mut virtual_entries = Vec::new();
700    let mut source_map = HashMap::new();
701    let mut rendered_bytes: HashMap<PathBuf, Arc<[u8]>> = HashMap::new();
702    let mut skipped: Vec<SkippedRender> = Vec::new();
703
704    for entry in preprocessor_entries {
705        let filename = entry
706            .relative_path
707            .file_name()
708            .map(|n| n.to_string_lossy().to_string())
709            .unwrap_or_default();
710
711        let preprocessor = registry
712            .find_for_file(&filename)
713            .expect("already checked in partition");
714
715        // Logical (stripped) virtual filename — e.g. `config.toml`
716        // for `config.toml.tmpl`. We don't run `expand()` (that would
717        // be the §7.4 violation), so we derive the would-be virtual
718        // path from `stripped_name` plus the source's parent
719        // directory.
720        let stripped = preprocessor.stripped_name(&filename);
721        let virtual_relative = match entry.relative_path.parent() {
722            Some(parent) if parent != Path::new("") => parent.join(&stripped),
723            _ => PathBuf::from(&stripped),
724        };
725        let virtual_relative = normalize_relative(&virtual_relative);
726
727        let datastore_path = paths
728            .handler_data_dir(&pack.name, PREPROCESSED_HANDLER)
729            .join(&virtual_relative);
730
731        // Try to load the cached baseline. If absent, this is a
732        // first-time template that has never been deployed: surface
733        // a placeholder virtual entry (no rendered_bytes) so callers
734        // like `dodot status` can render it as "pending" under the
735        // stripped name. Critically, we do NOT fall through to
736        // template evaluation — that's the §7.4 violation we're
737        // here to fix. Handlers that need rendered bytes for
738        // sentinel hashing (`install`, `homebrew`) will fall back
739        // to disk-read on the missing datastore path and report
740        // pending; symlink-targeted templates render cleanly as
741        // pending without needing the bytes at all.
742        let cache_filename = cache_filename_for(&virtual_relative);
743        let baseline =
744            match Baseline::load(fs, paths, &pack.name, PREPROCESSED_HANDLER, &cache_filename)? {
745                Some(b) => Some(b),
746                None => {
747                    debug!(
748                        pack = %pack.name,
749                        file = %virtual_relative.display(),
750                        "passive: no baseline yet — surfacing placeholder (run `dodot up` first)"
751                    );
752                    None
753                }
754            };
755
756        // Divergence detection (read-only): even though Passive
757        // never writes, status / dry-run callers want to know which
758        // deployed files have drifted from their baseline so they
759        // can surface the same `Health::Preserved` row that the
760        // active path does. The byte comparison is local and free
761        // of side effects — no provider calls, no template eval —
762        // so it stays inside the §7.4 envelope. Skipped only when a
763        // baseline exists (no baseline → no comparison reference).
764        if baseline.is_some() {
765            if let Ok(DivergenceCheck::Skip {
766                state,
767                deployed_path,
768            }) = check_divergence(
769                fs,
770                paths,
771                &pack.name,
772                &virtual_relative,
773                &entry.absolute_path,
774            ) {
775                skipped.push(SkippedRender {
776                    pack: pack.name.clone(),
777                    virtual_relative: virtual_relative.clone(),
778                    deployed_path,
779                    state,
780                });
781            }
782        }
783
784        // Carry the baseline's rendered content forward as the
785        // in-memory bytes for downstream sentinel hashing when a
786        // baseline exists. Without a baseline (first-time pack), no
787        // bytes are available — handlers that need them will see
788        // `m.rendered_bytes == None` and fall back to disk read,
789        // which correctly fails for the missing datastore file and
790        // shows up as "pending" in status.
791        if let Some(b) = baseline {
792            let bytes: Arc<[u8]> = Arc::from(b.rendered_content.into_bytes());
793            rendered_bytes.insert(datastore_path.clone(), bytes);
794        }
795        source_map.insert(datastore_path.clone(), entry.absolute_path.clone());
796        virtual_entries.push(PackEntry {
797            relative_path: virtual_relative,
798            absolute_path: datastore_path,
799            is_dir: false,
800        });
801    }
802
803    info!(
804        pack = %pack.name,
805        virtual_count = virtual_entries.len(),
806        skipped_count = skipped.len(),
807        "passive preprocessing complete"
808    );
809
810    Ok(PreprocessResult {
811        regular_entries,
812        virtual_entries,
813        source_map,
814        rendered_bytes,
815        skipped,
816    })
817}
818
819#[cfg(test)]
820mod tests {
821    use super::*;
822    use crate::datastore::FilesystemDataStore;
823    use crate::handlers::HandlerConfig;
824    use crate::preprocessing::identity::IdentityPreprocessor;
825    use crate::testing::TempEnvironment;
826    use std::sync::Arc;
827
828    fn make_pack(name: &str, path: PathBuf) -> Pack {
829        Pack::new(name.into(), path, HandlerConfig::default())
830    }
831
832    fn make_registry() -> PreprocessorRegistry {
833        let mut registry = PreprocessorRegistry::new();
834        registry.register(Box::new(IdentityPreprocessor::new()));
835        registry
836    }
837
838    fn make_datastore(env: &TempEnvironment) -> FilesystemDataStore {
839        let runner = Arc::new(crate::datastore::ShellCommandRunner::new(false));
840        FilesystemDataStore::new(env.fs.clone(), env.paths.clone(), runner)
841    }
842
843    #[test]
844    fn passthrough_when_no_preprocessor_files() {
845        let env = TempEnvironment::builder()
846            .pack("vim")
847            .file("vimrc", "set nocompatible")
848            .file("gvimrc", "set guifont=Mono")
849            .done()
850            .build();
851
852        let registry = make_registry();
853        let datastore = make_datastore(&env);
854        let pack = make_pack("vim", env.dotfiles_root.join("vim"));
855
856        let entries = vec![
857            PackEntry {
858                relative_path: "vimrc".into(),
859                absolute_path: env.dotfiles_root.join("vim/vimrc"),
860                is_dir: false,
861            },
862            PackEntry {
863                relative_path: "gvimrc".into(),
864                absolute_path: env.dotfiles_root.join("vim/gvimrc"),
865                is_dir: false,
866            },
867        ];
868
869        let result = preprocess_pack(
870            entries,
871            &registry,
872            &pack,
873            env.fs.as_ref(),
874            &datastore,
875            env.paths.as_ref(),
876            crate::preprocessing::PreprocessMode::Active,
877            false,
878        )
879        .unwrap();
880
881        assert_eq!(result.regular_entries.len(), 2);
882        assert!(result.virtual_entries.is_empty());
883        assert!(result.source_map.is_empty());
884    }
885
886    #[test]
887    fn identity_preprocessor_creates_virtual_entry() {
888        let env = TempEnvironment::builder()
889            .pack("app")
890            .file("config.toml.identity", "host = localhost")
891            .done()
892            .build();
893
894        let registry = make_registry();
895        let datastore = make_datastore(&env);
896        let pack = make_pack("app", env.dotfiles_root.join("app"));
897
898        let entries = vec![PackEntry {
899            relative_path: "config.toml.identity".into(),
900            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
901            is_dir: false,
902        }];
903
904        let result = preprocess_pack(
905            entries,
906            &registry,
907            &pack,
908            env.fs.as_ref(),
909            &datastore,
910            env.paths.as_ref(),
911            crate::preprocessing::PreprocessMode::Active,
912            false,
913        )
914        .unwrap();
915
916        assert!(result.regular_entries.is_empty());
917        assert_eq!(result.virtual_entries.len(), 1);
918
919        let virtual_entry = &result.virtual_entries[0];
920        assert_eq!(virtual_entry.relative_path, PathBuf::from("config.toml"));
921        assert!(!virtual_entry.is_dir);
922
923        // Verify the file was written to the datastore
924        let content = env.fs.read_to_string(&virtual_entry.absolute_path).unwrap();
925        assert_eq!(content, "host = localhost");
926
927        // Verify source map
928        assert_eq!(
929            result.source_map[&virtual_entry.absolute_path],
930            env.dotfiles_root.join("app/config.toml.identity")
931        );
932    }
933
934    #[test]
935    fn mixed_pack_partitions_correctly() {
936        let env = TempEnvironment::builder()
937            .pack("app")
938            .file("config.toml.identity", "host = localhost")
939            .file("readme.txt", "hello")
940            .done()
941            .build();
942
943        let registry = make_registry();
944        let datastore = make_datastore(&env);
945        let pack = make_pack("app", env.dotfiles_root.join("app"));
946
947        let entries = vec![
948            PackEntry {
949                relative_path: "config.toml.identity".into(),
950                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
951                is_dir: false,
952            },
953            PackEntry {
954                relative_path: "readme.txt".into(),
955                absolute_path: env.dotfiles_root.join("app/readme.txt"),
956                is_dir: false,
957            },
958        ];
959
960        let result = preprocess_pack(
961            entries,
962            &registry,
963            &pack,
964            env.fs.as_ref(),
965            &datastore,
966            env.paths.as_ref(),
967            crate::preprocessing::PreprocessMode::Active,
968            false,
969        )
970        .unwrap();
971
972        assert_eq!(result.regular_entries.len(), 1);
973        assert_eq!(
974            result.regular_entries[0].relative_path,
975            PathBuf::from("readme.txt")
976        );
977
978        assert_eq!(result.virtual_entries.len(), 1);
979        assert_eq!(
980            result.virtual_entries[0].relative_path,
981            PathBuf::from("config.toml")
982        );
983    }
984
985    #[test]
986    fn collision_detection_rejects_conflict() {
987        let env = TempEnvironment::builder()
988            .pack("app")
989            .file("config.toml.identity", "preprocessed")
990            .file("config.toml", "regular")
991            .done()
992            .build();
993
994        let registry = make_registry();
995        let datastore = make_datastore(&env);
996        let pack = make_pack("app", env.dotfiles_root.join("app"));
997
998        let entries = vec![
999            PackEntry {
1000                relative_path: "config.toml.identity".into(),
1001                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1002                is_dir: false,
1003            },
1004            PackEntry {
1005                relative_path: "config.toml".into(),
1006                absolute_path: env.dotfiles_root.join("app/config.toml"),
1007                is_dir: false,
1008            },
1009        ];
1010
1011        let err = preprocess_pack(
1012            entries,
1013            &registry,
1014            &pack,
1015            env.fs.as_ref(),
1016            &datastore,
1017            env.paths.as_ref(),
1018            crate::preprocessing::PreprocessMode::Active,
1019            false,
1020        )
1021        .unwrap_err();
1022        assert!(
1023            matches!(err, DodotError::PreprocessorCollision { .. }),
1024            "expected PreprocessorCollision, got: {err}"
1025        );
1026    }
1027
1028    #[test]
1029    fn merged_entries_combines_and_sorts() {
1030        let result = PreprocessResult {
1031            regular_entries: vec![PackEntry {
1032                relative_path: "zebra".into(),
1033                absolute_path: "/z".into(),
1034                is_dir: false,
1035            }],
1036            virtual_entries: vec![PackEntry {
1037                relative_path: "alpha".into(),
1038                absolute_path: "/a".into(),
1039                is_dir: false,
1040            }],
1041            source_map: HashMap::new(),
1042            rendered_bytes: HashMap::new(),
1043            skipped: Vec::new(),
1044        };
1045
1046        let merged = result.merged_entries();
1047        assert_eq!(merged.len(), 2);
1048        assert_eq!(merged[0].relative_path, PathBuf::from("alpha"));
1049        assert_eq!(merged[1].relative_path, PathBuf::from("zebra"));
1050    }
1051
1052    #[test]
1053    fn empty_registry_passes_all_through() {
1054        let env = TempEnvironment::builder()
1055            .pack("app")
1056            .file("config.toml.identity", "content")
1057            .done()
1058            .build();
1059
1060        let registry = PreprocessorRegistry::new(); // empty!
1061        let datastore = make_datastore(&env);
1062        let pack = make_pack("app", env.dotfiles_root.join("app"));
1063
1064        let entries = vec![PackEntry {
1065            relative_path: "config.toml.identity".into(),
1066            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1067            is_dir: false,
1068        }];
1069
1070        let result = preprocess_pack(
1071            entries,
1072            &registry,
1073            &pack,
1074            env.fs.as_ref(),
1075            &datastore,
1076            env.paths.as_ref(),
1077            crate::preprocessing::PreprocessMode::Active,
1078            false,
1079        )
1080        .unwrap();
1081
1082        // With no preprocessors registered, the file is treated as regular
1083        assert_eq!(result.regular_entries.len(), 1);
1084        assert!(result.virtual_entries.is_empty());
1085    }
1086
1087    #[test]
1088    fn directories_are_never_preprocessed() {
1089        let env = TempEnvironment::builder()
1090            .pack("app")
1091            .file("bin.identity/tool", "#!/bin/sh")
1092            .done()
1093            .build();
1094
1095        let registry = make_registry();
1096        let datastore = make_datastore(&env);
1097        let pack = make_pack("app", env.dotfiles_root.join("app"));
1098
1099        let entries = vec![PackEntry {
1100            relative_path: "bin.identity".into(),
1101            absolute_path: env.dotfiles_root.join("app/bin.identity"),
1102            is_dir: true, // directory — should NOT be preprocessed
1103        }];
1104
1105        let result = preprocess_pack(
1106            entries,
1107            &registry,
1108            &pack,
1109            env.fs.as_ref(),
1110            &datastore,
1111            env.paths.as_ref(),
1112            crate::preprocessing::PreprocessMode::Active,
1113            false,
1114        )
1115        .unwrap();
1116
1117        assert_eq!(result.regular_entries.len(), 1);
1118        assert!(result.virtual_entries.is_empty());
1119    }
1120
1121    #[test]
1122    fn subdirectory_preprocessor_file_preserves_parent() {
1123        let env = TempEnvironment::builder()
1124            .pack("app")
1125            .file("subdir/config.toml.identity", "nested content")
1126            .done()
1127            .build();
1128
1129        let registry = make_registry();
1130        let datastore = make_datastore(&env);
1131        let pack = make_pack("app", env.dotfiles_root.join("app"));
1132
1133        let entries = vec![PackEntry {
1134            relative_path: "subdir/config.toml.identity".into(),
1135            absolute_path: env.dotfiles_root.join("app/subdir/config.toml.identity"),
1136            is_dir: false,
1137        }];
1138
1139        let result = preprocess_pack(
1140            entries,
1141            &registry,
1142            &pack,
1143            env.fs.as_ref(),
1144            &datastore,
1145            env.paths.as_ref(),
1146            crate::preprocessing::PreprocessMode::Active,
1147            false,
1148        )
1149        .unwrap();
1150
1151        assert_eq!(result.virtual_entries.len(), 1);
1152        assert_eq!(
1153            result.virtual_entries[0].relative_path,
1154            PathBuf::from("subdir/config.toml")
1155        );
1156    }
1157
1158    #[test]
1159    fn multiple_preprocessor_files_in_one_pack() {
1160        let env = TempEnvironment::builder()
1161            .pack("app")
1162            .file("config.toml.identity", "config content")
1163            .file("settings.json.identity", "settings content")
1164            .done()
1165            .build();
1166
1167        let registry = make_registry();
1168        let datastore = make_datastore(&env);
1169        let pack = make_pack("app", env.dotfiles_root.join("app"));
1170
1171        let entries = vec![
1172            PackEntry {
1173                relative_path: "config.toml.identity".into(),
1174                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1175                is_dir: false,
1176            },
1177            PackEntry {
1178                relative_path: "settings.json.identity".into(),
1179                absolute_path: env.dotfiles_root.join("app/settings.json.identity"),
1180                is_dir: false,
1181            },
1182        ];
1183
1184        let result = preprocess_pack(
1185            entries,
1186            &registry,
1187            &pack,
1188            env.fs.as_ref(),
1189            &datastore,
1190            env.paths.as_ref(),
1191            crate::preprocessing::PreprocessMode::Active,
1192            false,
1193        )
1194        .unwrap();
1195
1196        assert!(result.regular_entries.is_empty());
1197        assert_eq!(result.virtual_entries.len(), 2);
1198
1199        let names: Vec<String> = result
1200            .virtual_entries
1201            .iter()
1202            .map(|e| e.relative_path.to_string_lossy().to_string())
1203            .collect();
1204        assert!(names.contains(&"config.toml".to_string()));
1205        assert!(names.contains(&"settings.json".to_string()));
1206
1207        // Each should have a source_map entry
1208        assert_eq!(result.source_map.len(), 2);
1209    }
1210
1211    #[test]
1212    fn pack_with_only_preprocessor_files() {
1213        let env = TempEnvironment::builder()
1214            .pack("app")
1215            .file("only.conf.identity", "the only file")
1216            .done()
1217            .build();
1218
1219        let registry = make_registry();
1220        let datastore = make_datastore(&env);
1221        let pack = make_pack("app", env.dotfiles_root.join("app"));
1222
1223        let entries = vec![PackEntry {
1224            relative_path: "only.conf.identity".into(),
1225            absolute_path: env.dotfiles_root.join("app/only.conf.identity"),
1226            is_dir: false,
1227        }];
1228
1229        let result = preprocess_pack(
1230            entries,
1231            &registry,
1232            &pack,
1233            env.fs.as_ref(),
1234            &datastore,
1235            env.paths.as_ref(),
1236            crate::preprocessing::PreprocessMode::Active,
1237            false,
1238        )
1239        .unwrap();
1240
1241        assert!(result.regular_entries.is_empty());
1242        assert_eq!(result.virtual_entries.len(), 1);
1243        assert_eq!(result.merged_entries().len(), 1);
1244    }
1245
1246    #[test]
1247    fn source_map_is_complete() {
1248        let env = TempEnvironment::builder()
1249            .pack("app")
1250            .file("a.conf.identity", "aaa")
1251            .file("b.conf.identity", "bbb")
1252            .file("regular.txt", "ccc")
1253            .done()
1254            .build();
1255
1256        let registry = make_registry();
1257        let datastore = make_datastore(&env);
1258        let pack = make_pack("app", env.dotfiles_root.join("app"));
1259
1260        let entries = vec![
1261            PackEntry {
1262                relative_path: "a.conf.identity".into(),
1263                absolute_path: env.dotfiles_root.join("app/a.conf.identity"),
1264                is_dir: false,
1265            },
1266            PackEntry {
1267                relative_path: "b.conf.identity".into(),
1268                absolute_path: env.dotfiles_root.join("app/b.conf.identity"),
1269                is_dir: false,
1270            },
1271            PackEntry {
1272                relative_path: "regular.txt".into(),
1273                absolute_path: env.dotfiles_root.join("app/regular.txt"),
1274                is_dir: false,
1275            },
1276        ];
1277
1278        let result = preprocess_pack(
1279            entries,
1280            &registry,
1281            &pack,
1282            env.fs.as_ref(),
1283            &datastore,
1284            env.paths.as_ref(),
1285            crate::preprocessing::PreprocessMode::Active,
1286            false,
1287        )
1288        .unwrap();
1289
1290        // Every virtual entry must have a source_map entry
1291        for ve in &result.virtual_entries {
1292            assert!(
1293                result.source_map.contains_key(&ve.absolute_path),
1294                "virtual entry {} has no source_map entry",
1295                ve.absolute_path.display()
1296            );
1297        }
1298        // No regular entries in the source_map
1299        for re in &result.regular_entries {
1300            assert!(
1301                !result.source_map.contains_key(&re.absolute_path),
1302                "regular entry {} should not be in source_map",
1303                re.absolute_path.display()
1304            );
1305        }
1306    }
1307
1308    #[test]
1309    fn preprocessing_is_idempotent() {
1310        let env = TempEnvironment::builder()
1311            .pack("app")
1312            .file("config.toml.identity", "content")
1313            .done()
1314            .build();
1315
1316        let registry = make_registry();
1317        let datastore = make_datastore(&env);
1318        let pack = make_pack("app", env.dotfiles_root.join("app"));
1319
1320        let make_entries = || {
1321            vec![PackEntry {
1322                relative_path: "config.toml.identity".into(),
1323                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1324                is_dir: false,
1325            }]
1326        };
1327
1328        let result1 = preprocess_pack(
1329            make_entries(),
1330            &registry,
1331            &pack,
1332            env.fs.as_ref(),
1333            &datastore,
1334            env.paths.as_ref(),
1335            crate::preprocessing::PreprocessMode::Active,
1336            false,
1337        )
1338        .unwrap();
1339        let result2 = preprocess_pack(
1340            make_entries(),
1341            &registry,
1342            &pack,
1343            env.fs.as_ref(),
1344            &datastore,
1345            env.paths.as_ref(),
1346            crate::preprocessing::PreprocessMode::Active,
1347            false,
1348        )
1349        .unwrap();
1350
1351        assert_eq!(result1.virtual_entries.len(), result2.virtual_entries.len());
1352        assert_eq!(
1353            result1.virtual_entries[0].relative_path,
1354            result2.virtual_entries[0].relative_path
1355        );
1356
1357        // Datastore file should be the same content
1358        let content1 = env
1359            .fs
1360            .read_to_string(&result1.virtual_entries[0].absolute_path)
1361            .unwrap();
1362        let content2 = env
1363            .fs
1364            .read_to_string(&result2.virtual_entries[0].absolute_path)
1365            .unwrap();
1366        assert_eq!(content1, content2);
1367    }
1368
1369    #[test]
1370    fn expansion_error_propagates() {
1371        let env = TempEnvironment::builder()
1372            .pack("app")
1373            .file("placeholder", "")
1374            .done()
1375            .build();
1376
1377        let registry = make_registry();
1378        let datastore = make_datastore(&env);
1379        let pack = make_pack("app", env.dotfiles_root.join("app"));
1380
1381        // Point to a file that doesn't exist — expansion should fail
1382        let entries = vec![PackEntry {
1383            relative_path: "missing.conf.identity".into(),
1384            absolute_path: env.dotfiles_root.join("app/missing.conf.identity"),
1385            is_dir: false,
1386        }];
1387
1388        let err = preprocess_pack(
1389            entries,
1390            &registry,
1391            &pack,
1392            env.fs.as_ref(),
1393            &datastore,
1394            env.paths.as_ref(),
1395            crate::preprocessing::PreprocessMode::Active,
1396            false,
1397        )
1398        .unwrap_err();
1399        assert!(
1400            matches!(err, DodotError::Fs { .. }),
1401            "expected Fs error for missing file, got: {err}"
1402        );
1403    }
1404
1405    #[test]
1406    fn inter_preprocessor_collision_detected() {
1407        // Two preprocessors produce the same logical name.
1408        // Set up: `config.toml.identity` and `config.toml.other` (custom
1409        // extension) both strip to `config.toml`. The pipeline must
1410        // detect this and refuse rather than silently overwriting.
1411        let env = TempEnvironment::builder()
1412            .pack("app")
1413            .file("config.toml.identity", "a")
1414            .file("config.toml.other", "b")
1415            .done()
1416            .build();
1417
1418        let mut registry = PreprocessorRegistry::new();
1419        registry.register(Box::new(IdentityPreprocessor::new()));
1420        registry.register(Box::new(IdentityPreprocessor::with_extension("other")));
1421
1422        let datastore = make_datastore(&env);
1423        let pack = make_pack("app", env.dotfiles_root.join("app"));
1424
1425        let entries = vec![
1426            PackEntry {
1427                relative_path: "config.toml.identity".into(),
1428                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1429                is_dir: false,
1430            },
1431            PackEntry {
1432                relative_path: "config.toml.other".into(),
1433                absolute_path: env.dotfiles_root.join("app/config.toml.other"),
1434                is_dir: false,
1435            },
1436        ];
1437
1438        let err = preprocess_pack(
1439            entries,
1440            &registry,
1441            &pack,
1442            env.fs.as_ref(),
1443            &datastore,
1444            env.paths.as_ref(),
1445            crate::preprocessing::PreprocessMode::Active,
1446            false,
1447        )
1448        .unwrap_err();
1449        assert!(
1450            matches!(err, DodotError::PreprocessorCollision { .. }),
1451            "expected PreprocessorCollision for inter-preprocessor clash, got: {err}"
1452        );
1453    }
1454
1455    #[test]
1456    fn datastore_preserves_directory_structure() {
1457        // Preprocessor files in subdirectories should land in matching
1458        // subdirectories under the datastore, not be flattened with `__`.
1459        let env = TempEnvironment::builder()
1460            .pack("app")
1461            .file("sub/config.toml.identity", "nested")
1462            .done()
1463            .build();
1464
1465        let registry = make_registry();
1466        let datastore = make_datastore(&env);
1467        let pack = make_pack("app", env.dotfiles_root.join("app"));
1468
1469        let entries = vec![PackEntry {
1470            relative_path: "sub/config.toml.identity".into(),
1471            absolute_path: env.dotfiles_root.join("app/sub/config.toml.identity"),
1472            is_dir: false,
1473        }];
1474
1475        let result = preprocess_pack(
1476            entries,
1477            &registry,
1478            &pack,
1479            env.fs.as_ref(),
1480            &datastore,
1481            env.paths.as_ref(),
1482            crate::preprocessing::PreprocessMode::Active,
1483            false,
1484        )
1485        .unwrap();
1486
1487        assert_eq!(result.virtual_entries.len(), 1);
1488        let datastore_path = &result.virtual_entries[0].absolute_path;
1489
1490        // The datastore path should contain the subdirectory structure, not flattened
1491        let ds_str = datastore_path.to_string_lossy();
1492        assert!(
1493            ds_str.contains("sub/config.toml"),
1494            "datastore path should preserve directory structure, got: {ds_str}"
1495        );
1496        assert!(
1497            !ds_str.contains("__"),
1498            "datastore path should not contain flattening separator, got: {ds_str}"
1499        );
1500
1501        // File should actually exist at that path
1502        assert!(env.fs.exists(datastore_path));
1503        let content = env.fs.read_to_string(datastore_path).unwrap();
1504        assert_eq!(content, "nested");
1505    }
1506
1507    #[test]
1508    fn datastore_distinguishes_sibling_from_flattened_name() {
1509        // Regression test for the flatten-with-`__` edge case: a user could
1510        // have `a/b.txt` and `a__b.txt` both as preprocessor outputs, which
1511        // would have collided under the old flattening scheme. With
1512        // directory-preserving storage they live in distinct datastore paths.
1513        let env = TempEnvironment::builder()
1514            .pack("app")
1515            .file("a/b.txt.identity", "nested")
1516            .file("a__b.txt.identity", "flat")
1517            .done()
1518            .build();
1519
1520        let registry = make_registry();
1521        let datastore = make_datastore(&env);
1522        let pack = make_pack("app", env.dotfiles_root.join("app"));
1523
1524        let entries = vec![
1525            PackEntry {
1526                relative_path: "a/b.txt.identity".into(),
1527                absolute_path: env.dotfiles_root.join("app/a/b.txt.identity"),
1528                is_dir: false,
1529            },
1530            PackEntry {
1531                relative_path: "a__b.txt.identity".into(),
1532                absolute_path: env.dotfiles_root.join("app/a__b.txt.identity"),
1533                is_dir: false,
1534            },
1535        ];
1536
1537        let result = preprocess_pack(
1538            entries,
1539            &registry,
1540            &pack,
1541            env.fs.as_ref(),
1542            &datastore,
1543            env.paths.as_ref(),
1544            crate::preprocessing::PreprocessMode::Active,
1545            false,
1546        )
1547        .unwrap();
1548
1549        assert_eq!(result.virtual_entries.len(), 2);
1550
1551        // Both files must exist with distinct content
1552        let nested = result
1553            .virtual_entries
1554            .iter()
1555            .find(|e| e.relative_path == std::path::Path::new("a/b.txt"))
1556            .expect("nested entry");
1557        let flat = result
1558            .virtual_entries
1559            .iter()
1560            .find(|e| e.relative_path == std::path::Path::new("a__b.txt"))
1561            .expect("flat entry");
1562
1563        assert_ne!(nested.absolute_path, flat.absolute_path);
1564        assert_eq!(
1565            env.fs.read_to_string(&nested.absolute_path).unwrap(),
1566            "nested"
1567        );
1568        assert_eq!(env.fs.read_to_string(&flat.absolute_path).unwrap(), "flat");
1569    }
1570
1571    // ── Path-traversal defenses ─────────────────────────────────
1572
1573    /// Test-only preprocessor that emits a configurable set of
1574    /// [`crate::preprocessing::ExpandedFile`]s — lets tests inject
1575    /// unsafe paths or directory entries without needing a real archive.
1576    struct ScriptedPreprocessor {
1577        name: &'static str,
1578        extension: &'static str,
1579        outputs: Vec<crate::preprocessing::ExpandedFile>,
1580        /// Opt-in flag for tests that exercise the reverse-merge path
1581        /// (e.g. the conflict-marker safety gate). Off by default so
1582        /// existing tests of unsafe-path / directory / collision
1583        /// behaviour aren't accidentally affected by the source-content
1584        /// scan that the gate adds.
1585        supports_reverse_merge: bool,
1586    }
1587
1588    impl Default for ScriptedPreprocessor {
1589        fn default() -> Self {
1590            Self {
1591                name: "scripted",
1592                extension: ".scripted",
1593                outputs: Vec::new(),
1594                supports_reverse_merge: false,
1595            }
1596        }
1597    }
1598
1599    impl crate::preprocessing::Preprocessor for ScriptedPreprocessor {
1600        fn name(&self) -> &str {
1601            self.name
1602        }
1603        fn transform_type(&self) -> crate::preprocessing::TransformType {
1604            crate::preprocessing::TransformType::Opaque
1605        }
1606        fn matches_extension(&self, filename: &str) -> bool {
1607            filename.ends_with(self.extension)
1608        }
1609        fn stripped_name(&self, filename: &str) -> String {
1610            filename
1611                .strip_suffix(self.extension)
1612                .unwrap_or(filename)
1613                .to_string()
1614        }
1615        fn expand(
1616            &self,
1617            _source: &Path,
1618            _fs: &dyn Fs,
1619        ) -> Result<Vec<crate::preprocessing::ExpandedFile>> {
1620            Ok(self.outputs.clone())
1621        }
1622        fn supports_reverse_merge(&self) -> bool {
1623            self.supports_reverse_merge
1624        }
1625    }
1626
1627    #[test]
1628    fn rejects_absolute_path_from_preprocessor() {
1629        let env = TempEnvironment::builder()
1630            .pack("app")
1631            .file("bad.evil", "x")
1632            .done()
1633            .build();
1634
1635        let mut registry = PreprocessorRegistry::new();
1636        registry.register(Box::new(ScriptedPreprocessor {
1637            name: "evil",
1638            extension: ".evil",
1639            outputs: vec![crate::preprocessing::ExpandedFile {
1640                relative_path: PathBuf::from("/etc/passwd"),
1641                content: b"pwn".to_vec(),
1642                is_dir: false,
1643                ..Default::default()
1644            }],
1645            ..Default::default()
1646        }));
1647
1648        let datastore = make_datastore(&env);
1649        let pack = make_pack("app", env.dotfiles_root.join("app"));
1650
1651        let entries = vec![PackEntry {
1652            relative_path: "bad.evil".into(),
1653            absolute_path: env.dotfiles_root.join("app/bad.evil"),
1654            is_dir: false,
1655        }];
1656
1657        let err = preprocess_pack(
1658            entries,
1659            &registry,
1660            &pack,
1661            env.fs.as_ref(),
1662            &datastore,
1663            env.paths.as_ref(),
1664            crate::preprocessing::PreprocessMode::Active,
1665            false,
1666        )
1667        .unwrap_err();
1668        assert!(
1669            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe path")),
1670            "expected unsafe-path error, got: {err}"
1671        );
1672        // Verify the malicious target was not written
1673        assert!(!std::path::Path::new("/etc/passwd.dodot-would-have-written-here").exists());
1674    }
1675
1676    #[test]
1677    fn rejects_parent_dir_escape_from_preprocessor() {
1678        let env = TempEnvironment::builder()
1679            .pack("app")
1680            .file("bad.evil", "x")
1681            .done()
1682            .build();
1683
1684        let mut registry = PreprocessorRegistry::new();
1685        registry.register(Box::new(ScriptedPreprocessor {
1686            name: "evil",
1687            extension: ".evil",
1688            outputs: vec![crate::preprocessing::ExpandedFile {
1689                relative_path: PathBuf::from("../../escape.txt"),
1690                content: b"pwn".to_vec(),
1691                is_dir: false,
1692                ..Default::default()
1693            }],
1694            ..Default::default()
1695        }));
1696
1697        let datastore = make_datastore(&env);
1698        let pack = make_pack("app", env.dotfiles_root.join("app"));
1699
1700        let entries = vec![PackEntry {
1701            relative_path: "bad.evil".into(),
1702            absolute_path: env.dotfiles_root.join("app/bad.evil"),
1703            is_dir: false,
1704        }];
1705
1706        let err = preprocess_pack(
1707            entries,
1708            &registry,
1709            &pack,
1710            env.fs.as_ref(),
1711            &datastore,
1712            env.paths.as_ref(),
1713            crate::preprocessing::PreprocessMode::Active,
1714            false,
1715        )
1716        .unwrap_err();
1717        assert!(
1718            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe path")),
1719            "expected unsafe-path error, got: {err}"
1720        );
1721    }
1722
1723    #[test]
1724    fn directory_entry_is_mkdird_not_written_as_file() {
1725        // A preprocessor emits a directory marker followed by a file
1726        // inside it. The pipeline must mkdir the directory rather than
1727        // writing a file at the directory path (which would break the
1728        // subsequent nested file write).
1729        let env = TempEnvironment::builder()
1730            .pack("app")
1731            .file("bundle.zz", "x")
1732            .done()
1733            .build();
1734
1735        let mut registry = PreprocessorRegistry::new();
1736        registry.register(Box::new(ScriptedPreprocessor {
1737            name: "scripted",
1738            extension: ".zz",
1739            outputs: vec![
1740                crate::preprocessing::ExpandedFile {
1741                    relative_path: PathBuf::from("sub"),
1742                    content: Vec::new(),
1743                    is_dir: true,
1744                    ..Default::default()
1745                },
1746                crate::preprocessing::ExpandedFile {
1747                    relative_path: PathBuf::from("sub/nested.txt"),
1748                    content: b"hello".to_vec(),
1749                    is_dir: false,
1750                    ..Default::default()
1751                },
1752            ],
1753            ..Default::default()
1754        }));
1755
1756        let datastore = make_datastore(&env);
1757        let pack = make_pack("app", env.dotfiles_root.join("app"));
1758
1759        let entries = vec![PackEntry {
1760            relative_path: "bundle.zz".into(),
1761            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1762            is_dir: false,
1763        }];
1764
1765        let result = preprocess_pack(
1766            entries,
1767            &registry,
1768            &pack,
1769            env.fs.as_ref(),
1770            &datastore,
1771            env.paths.as_ref(),
1772            crate::preprocessing::PreprocessMode::Active,
1773            false,
1774        )
1775        .unwrap();
1776
1777        assert_eq!(result.virtual_entries.len(), 2);
1778
1779        let dir_entry = result
1780            .virtual_entries
1781            .iter()
1782            .find(|e| e.is_dir)
1783            .expect("directory entry");
1784        assert!(
1785            env.fs.is_dir(&dir_entry.absolute_path),
1786            "directory entry should be a real directory: {}",
1787            dir_entry.absolute_path.display()
1788        );
1789
1790        let file_entry = result
1791            .virtual_entries
1792            .iter()
1793            .find(|e| !e.is_dir)
1794            .expect("file entry");
1795        assert_eq!(
1796            env.fs.read_to_string(&file_entry.absolute_path).unwrap(),
1797            "hello"
1798        );
1799    }
1800
1801    #[test]
1802    fn rejects_empty_path_from_preprocessor() {
1803        // A preprocessor that produces an empty relative_path (e.g. a
1804        // template file named literally `.tmpl` whose stripped name is
1805        // empty) must be rejected with a clean PreprocessorError, not
1806        // cascaded to the datastore's opaque "empty datastore path"
1807        // message.
1808        let env = TempEnvironment::builder()
1809            .pack("app")
1810            .file("bad.zz", "x")
1811            .done()
1812            .build();
1813
1814        let mut registry = PreprocessorRegistry::new();
1815        registry.register(Box::new(ScriptedPreprocessor {
1816            name: "scripted",
1817            extension: ".zz",
1818            outputs: vec![crate::preprocessing::ExpandedFile {
1819                relative_path: PathBuf::from(""),
1820                content: b"nope".to_vec(),
1821                is_dir: false,
1822                ..Default::default()
1823            }],
1824            ..Default::default()
1825        }));
1826
1827        let datastore = make_datastore(&env);
1828        let pack = make_pack("app", env.dotfiles_root.join("app"));
1829
1830        let entries = vec![PackEntry {
1831            relative_path: "bad.zz".into(),
1832            absolute_path: env.dotfiles_root.join("app/bad.zz"),
1833            is_dir: false,
1834        }];
1835
1836        let err = preprocess_pack(
1837            entries,
1838            &registry,
1839            &pack,
1840            env.fs.as_ref(),
1841            &datastore,
1842            env.paths.as_ref(),
1843            crate::preprocessing::PreprocessMode::Active,
1844            false,
1845        )
1846        .unwrap_err();
1847        assert!(
1848            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("empty output path")),
1849            "expected empty-path error, got: {err}"
1850        );
1851    }
1852
1853    #[test]
1854    fn rejects_curdir_only_path_from_preprocessor() {
1855        // `./` or `.` alone normalises to empty — same rejection.
1856        let env = TempEnvironment::builder()
1857            .pack("app")
1858            .file("bad.zz", "x")
1859            .done()
1860            .build();
1861
1862        let mut registry = PreprocessorRegistry::new();
1863        registry.register(Box::new(ScriptedPreprocessor {
1864            name: "scripted",
1865            extension: ".zz",
1866            outputs: vec![crate::preprocessing::ExpandedFile {
1867                relative_path: PathBuf::from("."),
1868                content: b"nope".to_vec(),
1869                is_dir: false,
1870                ..Default::default()
1871            }],
1872            ..Default::default()
1873        }));
1874
1875        let datastore = make_datastore(&env);
1876        let pack = make_pack("app", env.dotfiles_root.join("app"));
1877
1878        let entries = vec![PackEntry {
1879            relative_path: "bad.zz".into(),
1880            absolute_path: env.dotfiles_root.join("app/bad.zz"),
1881            is_dir: false,
1882        }];
1883
1884        let err = preprocess_pack(
1885            entries,
1886            &registry,
1887            &pack,
1888            env.fs.as_ref(),
1889            &datastore,
1890            env.paths.as_ref(),
1891            crate::preprocessing::PreprocessMode::Active,
1892            false,
1893        )
1894        .unwrap_err();
1895        assert!(
1896            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("empty output path")),
1897            "expected empty-path error, got: {err}"
1898        );
1899    }
1900
1901    #[test]
1902    fn curdir_prefixed_paths_collide_with_plain_paths() {
1903        // Two preprocessor outputs — one `./foo` and one `foo` — must
1904        // be treated as a collision. Before normalisation these lived
1905        // at distinct HashSet keys but the same datastore path, so the
1906        // second write silently clobbered the first.
1907        let env = TempEnvironment::builder()
1908            .pack("app")
1909            .file("bundle.zz", "x")
1910            .done()
1911            .build();
1912
1913        let mut registry = PreprocessorRegistry::new();
1914        registry.register(Box::new(ScriptedPreprocessor {
1915            name: "scripted",
1916            extension: ".zz",
1917            outputs: vec![
1918                crate::preprocessing::ExpandedFile {
1919                    relative_path: PathBuf::from("foo"),
1920                    content: b"first".to_vec(),
1921                    is_dir: false,
1922                    ..Default::default()
1923                },
1924                crate::preprocessing::ExpandedFile {
1925                    relative_path: PathBuf::from("./foo"),
1926                    content: b"second".to_vec(),
1927                    is_dir: false,
1928                    ..Default::default()
1929                },
1930            ],
1931            ..Default::default()
1932        }));
1933
1934        let datastore = make_datastore(&env);
1935        let pack = make_pack("app", env.dotfiles_root.join("app"));
1936
1937        let entries = vec![PackEntry {
1938            relative_path: "bundle.zz".into(),
1939            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1940            is_dir: false,
1941        }];
1942
1943        let err = preprocess_pack(
1944            entries,
1945            &registry,
1946            &pack,
1947            env.fs.as_ref(),
1948            &datastore,
1949            env.paths.as_ref(),
1950            crate::preprocessing::PreprocessMode::Active,
1951            false,
1952        )
1953        .unwrap_err();
1954        assert!(
1955            matches!(err, DodotError::PreprocessorCollision { .. }),
1956            "expected PreprocessorCollision for ./foo vs foo, got: {err}"
1957        );
1958    }
1959
1960    #[test]
1961    fn virtual_entry_relative_path_is_normalized() {
1962        // When a preprocessor emits `./foo`, the resulting virtual entry
1963        // must carry a normalised relative path. Otherwise downstream
1964        // code (e.g. rule matching or status display) sees both shapes
1965        // and treats them as different files.
1966        let env = TempEnvironment::builder()
1967            .pack("app")
1968            .file("bundle.zz", "x")
1969            .done()
1970            .build();
1971
1972        let mut registry = PreprocessorRegistry::new();
1973        registry.register(Box::new(ScriptedPreprocessor {
1974            name: "scripted",
1975            extension: ".zz",
1976            outputs: vec![crate::preprocessing::ExpandedFile {
1977                relative_path: PathBuf::from("./nested/file.txt"),
1978                content: b"hi".to_vec(),
1979                is_dir: false,
1980                ..Default::default()
1981            }],
1982            ..Default::default()
1983        }));
1984
1985        let datastore = make_datastore(&env);
1986        let pack = make_pack("app", env.dotfiles_root.join("app"));
1987
1988        let entries = vec![PackEntry {
1989            relative_path: "bundle.zz".into(),
1990            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1991            is_dir: false,
1992        }];
1993
1994        let result = preprocess_pack(
1995            entries,
1996            &registry,
1997            &pack,
1998            env.fs.as_ref(),
1999            &datastore,
2000            env.paths.as_ref(),
2001            crate::preprocessing::PreprocessMode::Active,
2002            false,
2003        )
2004        .unwrap();
2005
2006        assert_eq!(result.virtual_entries.len(), 1);
2007        assert_eq!(
2008            result.virtual_entries[0].relative_path,
2009            PathBuf::from("nested/file.txt"),
2010            "CurDir components must be stripped from virtual entry"
2011        );
2012    }
2013
2014    // ── Baseline cache integration ──────────────────────────────
2015
2016    #[test]
2017    fn baseline_is_written_when_paths_provided_and_tracked_render_present() {
2018        // End-to-end: a scripted preprocessor that produces a tracked
2019        // render should result in a baseline JSON on disk under
2020        // `<cache>/preprocessor/<pack>/preprocessed/<file>.json`. The
2021        // baseline must round-trip through Baseline::load with all the
2022        // documented fields populated.
2023        let env = TempEnvironment::builder()
2024            .pack("app")
2025            .file("config.toml.tracked", "name = original")
2026            .done()
2027            .build();
2028
2029        let mut registry = PreprocessorRegistry::new();
2030        registry.register(Box::new(ScriptedPreprocessor {
2031            name: "tracked-scripted",
2032            extension: ".tracked",
2033            outputs: vec![crate::preprocessing::ExpandedFile {
2034                relative_path: PathBuf::from("config.toml"),
2035                content: b"name = rendered".to_vec(),
2036                is_dir: false,
2037                tracked_render: Some("name = \u{1e}rendered\u{1f}".into()),
2038                context_hash: Some([0xab; 32]),
2039            }],
2040            ..Default::default()
2041        }));
2042
2043        let datastore = make_datastore(&env);
2044        let pack = make_pack("app", env.dotfiles_root.join("app"));
2045
2046        let entries = vec![PackEntry {
2047            relative_path: "config.toml.tracked".into(),
2048            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2049            is_dir: false,
2050        }];
2051
2052        preprocess_pack(
2053            entries,
2054            &registry,
2055            &pack,
2056            env.fs.as_ref(),
2057            &datastore,
2058            env.paths.as_ref(),
2059            PreprocessMode::Active,
2060            false,
2061        )
2062        .unwrap();
2063
2064        let baseline = crate::preprocessing::baseline::Baseline::load(
2065            env.fs.as_ref(),
2066            env.paths.as_ref(),
2067            "app",
2068            "preprocessed",
2069            "config.toml",
2070        )
2071        .unwrap()
2072        .expect("baseline must be written for a tracked-render expansion");
2073
2074        assert_eq!(baseline.rendered_content, "name = rendered");
2075        assert_eq!(baseline.tracked_render, "name = \u{1e}rendered\u{1f}");
2076        // Source hash is the SHA of the source file's bytes.
2077        assert_eq!(baseline.source_hash.len(), 64);
2078        // Context hash matches the one the preprocessor emitted.
2079        assert!(
2080            baseline.context_hash.chars().all(|c| c == 'a' || c == 'b'),
2081            "context hash should be 0xab repeated, got: {}",
2082            baseline.context_hash
2083        );
2084        assert_eq!(baseline.context_hash.len(), 64);
2085    }
2086
2087    #[test]
2088    fn baseline_is_skipped_in_passive_mode() {
2089        // Passive callers (`dodot status`, `dodot up --dry-run`) MUST
2090        // NOT touch the baseline cache. No baseline should be written
2091        // in that case — overwriting it would erase the
2092        // divergence-detection ground truth captured at the last
2093        // `dodot up`. Per `secrets.lex` §7.4 / issue #121.
2094        let env = TempEnvironment::builder()
2095            .pack("app")
2096            .file("config.toml.tracked", "src")
2097            .done()
2098            .build();
2099
2100        let mut registry = PreprocessorRegistry::new();
2101        registry.register(Box::new(ScriptedPreprocessor {
2102            name: "tracked-scripted",
2103            extension: ".tracked",
2104            outputs: vec![crate::preprocessing::ExpandedFile {
2105                relative_path: PathBuf::from("config.toml"),
2106                content: b"x".to_vec(),
2107                is_dir: false,
2108                tracked_render: Some("x".into()),
2109                context_hash: Some([0; 32]),
2110            }],
2111            ..Default::default()
2112        }));
2113
2114        let datastore = make_datastore(&env);
2115        let pack = make_pack("app", env.dotfiles_root.join("app"));
2116        let entries = vec![PackEntry {
2117            relative_path: "config.toml.tracked".into(),
2118            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2119            is_dir: false,
2120        }];
2121
2122        preprocess_pack(
2123            entries,
2124            &registry,
2125            &pack,
2126            env.fs.as_ref(),
2127            &datastore,
2128            env.paths.as_ref(),
2129            crate::preprocessing::PreprocessMode::Passive,
2130            false,
2131        )
2132        .unwrap();
2133
2134        let path = env
2135            .paths
2136            .preprocessor_baseline_path("app", "preprocessed", "config.toml");
2137        assert!(
2138            !env.fs.exists(&path),
2139            "no baseline should exist after a Passive run, but found: {}",
2140            path.display()
2141        );
2142    }
2143
2144    #[test]
2145    fn baseline_is_skipped_for_preprocessors_without_tracked_render() {
2146        // The identity preprocessor (and unarchive) don't produce a
2147        // tracked render. They still go through the pipeline, but no
2148        // baseline is written — the cache is only meaningful when paired
2149        // with burgertocow's marker stream.
2150        let env = TempEnvironment::builder()
2151            .pack("app")
2152            .file("config.toml.identity", "data")
2153            .done()
2154            .build();
2155
2156        let registry = make_registry(); // identity-only
2157        let datastore = make_datastore(&env);
2158        let pack = make_pack("app", env.dotfiles_root.join("app"));
2159        let entries = vec![PackEntry {
2160            relative_path: "config.toml.identity".into(),
2161            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
2162            is_dir: false,
2163        }];
2164
2165        preprocess_pack(
2166            entries,
2167            &registry,
2168            &pack,
2169            env.fs.as_ref(),
2170            &datastore,
2171            env.paths.as_ref(),
2172            PreprocessMode::Active,
2173            false,
2174        )
2175        .unwrap();
2176
2177        let path = env
2178            .paths
2179            .preprocessor_baseline_path("app", "preprocessed", "config.toml");
2180        assert!(
2181            !env.fs.exists(&path),
2182            "identity preprocessor (no tracked render) should not write a baseline"
2183        );
2184    }
2185
2186    #[test]
2187    fn baseline_overwrites_on_repeated_up() {
2188        // Re-running `up` with a changed source file must replace the
2189        // baseline, not leave the stale one in place — otherwise drift
2190        // detection would compare against an out-of-date baseline.
2191        let env = TempEnvironment::builder()
2192            .pack("app")
2193            .file("config.toml.tracked", "first")
2194            .done()
2195            .build();
2196
2197        let outputs_first = vec![crate::preprocessing::ExpandedFile {
2198            relative_path: PathBuf::from("config.toml"),
2199            content: b"FIRST".to_vec(),
2200            is_dir: false,
2201            tracked_render: Some("FIRST".into()),
2202            context_hash: Some([1; 32]),
2203        }];
2204        let outputs_second = vec![crate::preprocessing::ExpandedFile {
2205            relative_path: PathBuf::from("config.toml"),
2206            content: b"SECOND".to_vec(),
2207            is_dir: false,
2208            tracked_render: Some("SECOND".into()),
2209            context_hash: Some([2; 32]),
2210        }];
2211
2212        let datastore = make_datastore(&env);
2213        let pack = make_pack("app", env.dotfiles_root.join("app"));
2214        let make_entries = || {
2215            vec![PackEntry {
2216                relative_path: "config.toml.tracked".into(),
2217                absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2218                is_dir: false,
2219            }]
2220        };
2221
2222        // First run.
2223        let mut registry1 = PreprocessorRegistry::new();
2224        registry1.register(Box::new(ScriptedPreprocessor {
2225            name: "ts",
2226            extension: ".tracked",
2227            outputs: outputs_first,
2228            ..Default::default()
2229        }));
2230        preprocess_pack(
2231            make_entries(),
2232            &registry1,
2233            &pack,
2234            env.fs.as_ref(),
2235            &datastore,
2236            env.paths.as_ref(),
2237            PreprocessMode::Active,
2238            false,
2239        )
2240        .unwrap();
2241
2242        // Second run with changed outputs.
2243        let mut registry2 = PreprocessorRegistry::new();
2244        registry2.register(Box::new(ScriptedPreprocessor {
2245            name: "ts",
2246            extension: ".tracked",
2247            outputs: outputs_second,
2248            ..Default::default()
2249        }));
2250        preprocess_pack(
2251            make_entries(),
2252            &registry2,
2253            &pack,
2254            env.fs.as_ref(),
2255            &datastore,
2256            env.paths.as_ref(),
2257            PreprocessMode::Active,
2258            false,
2259        )
2260        .unwrap();
2261
2262        let baseline = crate::preprocessing::baseline::Baseline::load(
2263            env.fs.as_ref(),
2264            env.paths.as_ref(),
2265            "app",
2266            "preprocessed",
2267            "config.toml",
2268        )
2269        .unwrap()
2270        .unwrap();
2271        assert_eq!(baseline.rendered_content, "SECOND");
2272    }
2273
2274    #[test]
2275    fn end_to_end_baseline_for_real_template_preprocessor() {
2276        // Exercise the cache write through the actual TemplatePreprocessor
2277        // (rather than ScriptedPreprocessor). This pins the integration
2278        // contract: a `.tmpl` file in a pack produces a baseline that
2279        // contains the rendered content, the tracked render with markers,
2280        // and a non-empty context hash.
2281        use std::collections::HashMap;
2282        let env = TempEnvironment::builder()
2283            .pack("app")
2284            .file("greet.tmpl", "hello {{ name }}")
2285            .done()
2286            .build();
2287
2288        let mut vars = HashMap::new();
2289        vars.insert("name".into(), "Alice".into());
2290        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
2291            vec!["tmpl".into()],
2292            vars,
2293            env.paths.as_ref(),
2294        )
2295        .unwrap();
2296        let mut registry = PreprocessorRegistry::new();
2297        registry.register(Box::new(template_pp));
2298
2299        let datastore = make_datastore(&env);
2300        let pack = make_pack("app", env.dotfiles_root.join("app"));
2301        let entries = vec![PackEntry {
2302            relative_path: "greet.tmpl".into(),
2303            absolute_path: env.dotfiles_root.join("app/greet.tmpl"),
2304            is_dir: false,
2305        }];
2306
2307        preprocess_pack(
2308            entries,
2309            &registry,
2310            &pack,
2311            env.fs.as_ref(),
2312            &datastore,
2313            env.paths.as_ref(),
2314            PreprocessMode::Active,
2315            false,
2316        )
2317        .unwrap();
2318
2319        let baseline = crate::preprocessing::baseline::Baseline::load(
2320            env.fs.as_ref(),
2321            env.paths.as_ref(),
2322            "app",
2323            "preprocessed",
2324            "greet",
2325        )
2326        .unwrap()
2327        .expect("template baseline must be written");
2328
2329        assert_eq!(baseline.rendered_content, "hello Alice");
2330        // The tracked render must contain marker bytes around "Alice".
2331        assert!(
2332            baseline.tracked_render.contains(burgertocow::VAR_START),
2333            "tracked render must contain marker bytes, got: {:?}",
2334            baseline.tracked_render
2335        );
2336        // Context hash is the template preprocessor's deterministic
2337        // hex; non-empty.
2338        assert_eq!(baseline.context_hash.len(), 64);
2339        // Rendered hash is SHA-256 hex.
2340        assert_eq!(baseline.rendered_hash.len(), 64);
2341    }
2342
2343    // ── Conflict-marker safety gate ─────────────────────────────
2344
2345    #[test]
2346    fn conflict_marker_in_template_source_blocks_expansion() {
2347        // The most important test for R2: a template source containing
2348        // a dodot-conflict marker must be refused at the pipeline level
2349        // — otherwise the markers would render verbatim through
2350        // MiniJinja and deploy into the user's config as garbage.
2351        use std::collections::HashMap;
2352        let template_with_conflict = format!(
2353            "name = Alice\n{}\nhost = \"{{{{ env.DB_HOST }}}}\"\n{}\nhost = \"prod\"\n{}\nport = 5432\n",
2354            crate::preprocessing::conflict::MARKER_START,
2355            crate::preprocessing::conflict::MARKER_MID,
2356            crate::preprocessing::conflict::MARKER_END,
2357        );
2358        let env = TempEnvironment::builder()
2359            .pack("app")
2360            .file("config.toml.tmpl", &template_with_conflict)
2361            .done()
2362            .build();
2363
2364        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
2365            vec!["tmpl".into()],
2366            HashMap::new(),
2367            env.paths.as_ref(),
2368        )
2369        .unwrap();
2370        let mut registry = PreprocessorRegistry::new();
2371        registry.register(Box::new(template_pp));
2372
2373        let datastore = make_datastore(&env);
2374        let pack = make_pack("app", env.dotfiles_root.join("app"));
2375        let entries = vec![PackEntry {
2376            relative_path: "config.toml.tmpl".into(),
2377            absolute_path: env.dotfiles_root.join("app/config.toml.tmpl"),
2378            is_dir: false,
2379        }];
2380
2381        let err = preprocess_pack(
2382            entries,
2383            &registry,
2384            &pack,
2385            env.fs.as_ref(),
2386            &datastore,
2387            env.paths.as_ref(),
2388            PreprocessMode::Active,
2389            false,
2390        )
2391        .unwrap_err();
2392
2393        match err {
2394            DodotError::UnresolvedConflictMarker {
2395                source_file,
2396                line_numbers,
2397            } => {
2398                assert!(source_file.ends_with("config.toml.tmpl"));
2399                assert_eq!(line_numbers.len(), 3, "got: {line_numbers:?}");
2400            }
2401            other => panic!("expected UnresolvedConflictMarker, got: {other}"),
2402        }
2403
2404        // Critically: the datastore must NOT carry a partially-rendered
2405        // file from before the gate caught the markers. The pipeline
2406        // refuses on the first scan, before any disk write.
2407        let datastore_path = env
2408            .paths
2409            .data_dir()
2410            .join("packs")
2411            .join("app")
2412            .join("preprocessed")
2413            .join("config.toml");
2414        assert!(
2415            !env.fs.exists(&datastore_path),
2416            "no rendered output should land in the datastore when the gate fires"
2417        );
2418
2419        // Same for the baseline cache.
2420        let baseline_path =
2421            env.paths
2422                .preprocessor_baseline_path("app", "preprocessed", "config.toml");
2423        assert!(
2424            !env.fs.exists(&baseline_path),
2425            "no baseline should be written when the gate fires"
2426        );
2427    }
2428
2429    #[test]
2430    fn conflict_marker_gate_skipped_for_preprocessors_without_reverse_merge() {
2431        // The unarchive / identity preprocessors don't participate in
2432        // reverse-merge, so the gate doesn't read their source files
2433        // (which may not be UTF-8 anyway). Confirm that a marker token
2434        // accidentally present in such a source does NOT block the
2435        // pipeline. We use a ScriptedPreprocessor with
2436        // supports_reverse_merge=false to drive this.
2437        let env = TempEnvironment::builder()
2438            .pack("app")
2439            .file(
2440                "data.scripted",
2441                &format!(
2442                    "header\n{}\nbody\n",
2443                    crate::preprocessing::conflict::MARKER_START
2444                ),
2445            )
2446            .done()
2447            .build();
2448
2449        let mut registry = PreprocessorRegistry::new();
2450        registry.register(Box::new(ScriptedPreprocessor {
2451            name: "bytes-only",
2452            extension: ".scripted",
2453            outputs: vec![crate::preprocessing::ExpandedFile {
2454                relative_path: PathBuf::from("data"),
2455                content: b"emitted".to_vec(),
2456                is_dir: false,
2457                ..Default::default()
2458            }],
2459            supports_reverse_merge: false,
2460        }));
2461
2462        let datastore = make_datastore(&env);
2463        let pack = make_pack("app", env.dotfiles_root.join("app"));
2464        let entries = vec![PackEntry {
2465            relative_path: "data.scripted".into(),
2466            absolute_path: env.dotfiles_root.join("app/data.scripted"),
2467            is_dir: false,
2468        }];
2469
2470        let result = preprocess_pack(
2471            entries,
2472            &registry,
2473            &pack,
2474            env.fs.as_ref(),
2475            &datastore,
2476            env.paths.as_ref(),
2477            crate::preprocessing::PreprocessMode::Active,
2478            false,
2479        )
2480        .expect("non-tracking preprocessor must not be gated by markers in its source");
2481        assert_eq!(result.virtual_entries.len(), 1);
2482    }
2483
2484    #[test]
2485    fn conflict_marker_gate_runs_on_tracking_scripted_preprocessor() {
2486        // Symmetric to the test above: a ScriptedPreprocessor with
2487        // supports_reverse_merge=true must trip the gate when its
2488        // source carries marker lines, even though it's not the real
2489        // template preprocessor. This pins the gate's dispatch to the
2490        // trait flag, not a hard-coded preprocessor name check.
2491        let env = TempEnvironment::builder()
2492            .pack("app")
2493            .file(
2494                "config.toml.tracked",
2495                &format!(
2496                    "ok\n{}\nbody\n{}\n",
2497                    crate::preprocessing::conflict::MARKER_START,
2498                    crate::preprocessing::conflict::MARKER_END
2499                ),
2500            )
2501            .done()
2502            .build();
2503
2504        let mut registry = PreprocessorRegistry::new();
2505        registry.register(Box::new(ScriptedPreprocessor {
2506            name: "tracking-bytes",
2507            extension: ".tracked",
2508            outputs: vec![crate::preprocessing::ExpandedFile {
2509                relative_path: PathBuf::from("config.toml"),
2510                content: b"x".to_vec(),
2511                is_dir: false,
2512                tracked_render: Some("x".into()),
2513                context_hash: Some([0; 32]),
2514            }],
2515            supports_reverse_merge: true,
2516        }));
2517
2518        let datastore = make_datastore(&env);
2519        let pack = make_pack("app", env.dotfiles_root.join("app"));
2520        let entries = vec![PackEntry {
2521            relative_path: "config.toml.tracked".into(),
2522            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2523            is_dir: false,
2524        }];
2525
2526        let err = preprocess_pack(
2527            entries,
2528            &registry,
2529            &pack,
2530            env.fs.as_ref(),
2531            &datastore,
2532            env.paths.as_ref(),
2533            crate::preprocessing::PreprocessMode::Active,
2534            false,
2535        )
2536        .unwrap_err();
2537        assert!(
2538            matches!(err, DodotError::UnresolvedConflictMarker { .. }),
2539            "expected UnresolvedConflictMarker, got: {err}"
2540        );
2541    }
2542
2543    #[test]
2544    fn gate_handles_non_utf8_source_via_lossy_decode() {
2545        // Defence-in-depth: a reverse-merge-capable preprocessor with a
2546        // non-UTF-8 source must not crash the gate with a generic
2547        // UTF-8 decode error. The pipeline reads bytes and decodes
2548        // lossily before scanning for markers — the marker token is
2549        // ASCII so detection works, and a binary-ish source without
2550        // markers passes cleanly.
2551        let env = TempEnvironment::builder()
2552            .pack("app")
2553            .file("config.toml.tracked", "placeholder")
2554            .done()
2555            .build();
2556
2557        // Overwrite with non-UTF-8 bytes: a few invalid sequences plus
2558        // valid ASCII surrounding them. No markers in the bytes.
2559        let bytes: Vec<u8> = vec![
2560            b'h', b'e', b'l', b'l', b'o', b'\n', 0xff, 0xfe, b'\n', b'w', b'o', b'r', b'l', b'd',
2561            b'\n',
2562        ];
2563        env.fs
2564            .write_file(&env.dotfiles_root.join("app/config.toml.tracked"), &bytes)
2565            .unwrap();
2566
2567        let mut registry = PreprocessorRegistry::new();
2568        registry.register(Box::new(ScriptedPreprocessor {
2569            name: "tracking-bytes",
2570            extension: ".tracked",
2571            outputs: vec![crate::preprocessing::ExpandedFile {
2572                relative_path: PathBuf::from("config.toml"),
2573                content: b"x".to_vec(),
2574                is_dir: false,
2575                tracked_render: Some("x".into()),
2576                context_hash: Some([0; 32]),
2577            }],
2578            supports_reverse_merge: true,
2579        }));
2580
2581        let datastore = make_datastore(&env);
2582        let pack = make_pack("app", env.dotfiles_root.join("app"));
2583        let entries = vec![PackEntry {
2584            relative_path: "config.toml.tracked".into(),
2585            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2586            is_dir: false,
2587        }];
2588
2589        // Should NOT error: the gate's lossy decode handles non-UTF-8
2590        // gracefully, and there are no marker lines in the bytes.
2591        let result = preprocess_pack(
2592            entries,
2593            &registry,
2594            &pack,
2595            env.fs.as_ref(),
2596            &datastore,
2597            env.paths.as_ref(),
2598            crate::preprocessing::PreprocessMode::Active,
2599            false,
2600        )
2601        .expect("non-UTF-8 source without markers must not crash the gate");
2602        assert_eq!(result.virtual_entries.len(), 1);
2603    }
2604
2605    #[test]
2606    fn gate_detects_markers_in_non_utf8_source() {
2607        // Round-trip the lossy path: a source that's mostly invalid
2608        // UTF-8 but has a real marker line in valid ASCII still trips
2609        // the gate. This is the safety-critical scenario — we must
2610        // not silently pass a marker-bearing source just because
2611        // surrounding bytes happen to be invalid UTF-8.
2612        let env = TempEnvironment::builder()
2613            .pack("app")
2614            .file("config.toml.tracked", "placeholder")
2615            .done()
2616            .build();
2617
2618        let mut bytes: Vec<u8> = Vec::new();
2619        bytes.extend_from_slice(b"prefix\n");
2620        bytes.push(0xff);
2621        bytes.push(0xfe);
2622        bytes.push(b'\n');
2623        bytes.extend_from_slice(crate::preprocessing::conflict::MARKER_START.as_bytes());
2624        bytes.push(b'\n');
2625        bytes.extend_from_slice(b"body\n");
2626        env.fs
2627            .write_file(&env.dotfiles_root.join("app/config.toml.tracked"), &bytes)
2628            .unwrap();
2629
2630        let mut registry = PreprocessorRegistry::new();
2631        registry.register(Box::new(ScriptedPreprocessor {
2632            name: "tracking-bytes",
2633            extension: ".tracked",
2634            outputs: vec![crate::preprocessing::ExpandedFile {
2635                relative_path: PathBuf::from("config.toml"),
2636                content: b"x".to_vec(),
2637                is_dir: false,
2638                tracked_render: Some("x".into()),
2639                context_hash: Some([0; 32]),
2640            }],
2641            supports_reverse_merge: true,
2642        }));
2643
2644        let datastore = make_datastore(&env);
2645        let pack = make_pack("app", env.dotfiles_root.join("app"));
2646        let entries = vec![PackEntry {
2647            relative_path: "config.toml.tracked".into(),
2648            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2649            is_dir: false,
2650        }];
2651
2652        let err = preprocess_pack(
2653            entries,
2654            &registry,
2655            &pack,
2656            env.fs.as_ref(),
2657            &datastore,
2658            env.paths.as_ref(),
2659            crate::preprocessing::PreprocessMode::Active,
2660            false,
2661        )
2662        .unwrap_err();
2663        assert!(
2664            matches!(err, DodotError::UnresolvedConflictMarker { .. }),
2665            "expected UnresolvedConflictMarker even on non-UTF-8 source, got: {err}"
2666        );
2667    }
2668
2669    #[test]
2670    fn template_renders_normally_after_markers_are_resolved() {
2671        // Once the user removes the markers (the standard resolution
2672        // path), the next `dodot up` must succeed and produce the
2673        // expected rendered output. This is the round-trip check: the
2674        // gate doesn't permanently brick a pack — it just defers
2675        // expansion until the source is clean again.
2676        use std::collections::HashMap;
2677        let env = TempEnvironment::builder()
2678            .pack("app")
2679            .file("greet.tmpl", "hello {{ name }}")
2680            .done()
2681            .build();
2682
2683        let mut vars = HashMap::new();
2684        vars.insert("name".into(), "Alice".into());
2685        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
2686            vec!["tmpl".into()],
2687            vars,
2688            env.paths.as_ref(),
2689        )
2690        .unwrap();
2691        let mut registry = PreprocessorRegistry::new();
2692        registry.register(Box::new(template_pp));
2693
2694        let datastore = make_datastore(&env);
2695        let pack = make_pack("app", env.dotfiles_root.join("app"));
2696        let entries = vec![PackEntry {
2697            relative_path: "greet.tmpl".into(),
2698            absolute_path: env.dotfiles_root.join("app/greet.tmpl"),
2699            is_dir: false,
2700        }];
2701
2702        // Round 1: clean source → success.
2703        let result = preprocess_pack(
2704            entries.clone(),
2705            &registry,
2706            &pack,
2707            env.fs.as_ref(),
2708            &datastore,
2709            env.paths.as_ref(),
2710            PreprocessMode::Active,
2711            false,
2712        )
2713        .expect("clean source should expand successfully");
2714        assert_eq!(result.virtual_entries.len(), 1);
2715
2716        // Round 2: user adds a marker → blocked.
2717        let dirty = format!(
2718            "hello\n{}\n{{{{ name }}}}\n{}\n",
2719            crate::preprocessing::conflict::MARKER_START,
2720            crate::preprocessing::conflict::MARKER_END,
2721        );
2722        env.fs
2723            .write_file(&env.dotfiles_root.join("app/greet.tmpl"), dirty.as_bytes())
2724            .unwrap();
2725        let err = preprocess_pack(
2726            entries.clone(),
2727            &registry,
2728            &pack,
2729            env.fs.as_ref(),
2730            &datastore,
2731            env.paths.as_ref(),
2732            PreprocessMode::Active,
2733            false,
2734        )
2735        .unwrap_err();
2736        assert!(matches!(err, DodotError::UnresolvedConflictMarker { .. }));
2737
2738        // Round 3: user resolves → success again.
2739        env.fs
2740            .write_file(
2741                &env.dotfiles_root.join("app/greet.tmpl"),
2742                b"hello {{ name }}",
2743            )
2744            .unwrap();
2745        let result = preprocess_pack(
2746            entries,
2747            &registry,
2748            &pack,
2749            env.fs.as_ref(),
2750            &datastore,
2751            env.paths.as_ref(),
2752            PreprocessMode::Active,
2753            false,
2754        )
2755        .expect("resolved source should expand again");
2756        assert_eq!(result.virtual_entries.len(), 1);
2757    }
2758
2759    // ── Divergence guard (issue #110, §6.4) ─────────────────────────
2760    //
2761    // Tests that `preprocess_pack` refuses to overwrite a deployed file
2762    // whose bytes have diverged from the cached baseline. The guard
2763    // reads the file content; env vars are intentionally not part of
2764    // the staleness signal — see the §6.4 banner and template.rs.
2765    //
2766    // Helper that runs the template preprocessor end-to-end. We use the
2767    // real TemplatePreprocessor here (not ScriptedPreprocessor) so the
2768    // tests pin the integration contract: a `.tmpl` source produces a
2769    // baseline that subsequent runs read back.
2770    fn run_template_preprocess(
2771        env: &TempEnvironment,
2772        pack_name: &str,
2773        force: bool,
2774    ) -> PreprocessResult {
2775        use std::collections::HashMap;
2776        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
2777            vec!["tmpl".into()],
2778            HashMap::new(),
2779            env.paths.as_ref(),
2780        )
2781        .unwrap();
2782        let mut registry = PreprocessorRegistry::new();
2783        registry.register(Box::new(template_pp));
2784
2785        let datastore = make_datastore(env);
2786        let pack = make_pack(pack_name, env.dotfiles_root.join(pack_name));
2787        let entries = vec![PackEntry {
2788            relative_path: "config.toml.tmpl".into(),
2789            absolute_path: env.dotfiles_root.join(pack_name).join("config.toml.tmpl"),
2790            is_dir: false,
2791        }];
2792
2793        preprocess_pack(
2794            entries,
2795            &registry,
2796            &pack,
2797            env.fs.as_ref(),
2798            &datastore,
2799            env.paths.as_ref(),
2800            PreprocessMode::Active,
2801            force,
2802        )
2803        .unwrap()
2804    }
2805
2806    #[test]
2807    fn divergence_guard_skips_when_deployed_was_edited() {
2808        // Row 3 of the §6.4 matrix: source same, deployed edited.
2809        // The pipeline must preserve the user's edit (skip the write)
2810        // and report it via PreprocessResult::skipped.
2811        let env = TempEnvironment::builder()
2812            .pack("app")
2813            .file("config.toml.tmpl", "name = original")
2814            .done()
2815            .build();
2816
2817        // First run: clean deploy, baseline written.
2818        let first = run_template_preprocess(&env, "app", false);
2819        assert!(first.skipped.is_empty(), "first deploy must not skip");
2820        let deployed_path = &first.virtual_entries[0].absolute_path.clone();
2821
2822        // User edits the deployed file directly.
2823        env.fs
2824            .write_file(deployed_path, b"name = USER EDITED")
2825            .unwrap();
2826
2827        // Second run with the same source → guard fires.
2828        let second = run_template_preprocess(&env, "app", false);
2829        assert_eq!(second.skipped.len(), 1, "deployed-edit must skip");
2830        let skip = &second.skipped[0];
2831        assert_eq!(skip.state, DivergenceState::OutputChanged);
2832        assert_eq!(skip.pack, "app");
2833        assert_eq!(skip.virtual_relative, std::path::Path::new("config.toml"));
2834
2835        // The user's edit must still be on disk; the rendered content
2836        // must NOT have replaced it.
2837        let on_disk = env.fs.read_to_string(deployed_path).unwrap();
2838        assert_eq!(on_disk, "name = USER EDITED");
2839
2840        // The virtual entry must still point at the deployed file so
2841        // downstream rule matching has something to work with.
2842        assert_eq!(second.virtual_entries.len(), 1);
2843        assert_eq!(&second.virtual_entries[0].absolute_path, deployed_path);
2844    }
2845
2846    #[test]
2847    fn divergence_guard_skips_when_both_changed() {
2848        // Row 4: source AND deployed both edited. Same skip behaviour
2849        // (preserve deployed bytes), reported as BothChanged so the
2850        // user gets a sharper warning.
2851        let env = TempEnvironment::builder()
2852            .pack("app")
2853            .file("config.toml.tmpl", "name = original")
2854            .done()
2855            .build();
2856
2857        let first = run_template_preprocess(&env, "app", false);
2858        let deployed_path = first.virtual_entries[0].absolute_path.clone();
2859
2860        // Edit both the source template and the deployed file.
2861        env.fs
2862            .write_file(
2863                &env.dotfiles_root.join("app/config.toml.tmpl"),
2864                b"name = SOURCE EDITED",
2865            )
2866            .unwrap();
2867        env.fs
2868            .write_file(&deployed_path, b"name = USER EDITED")
2869            .unwrap();
2870
2871        let second = run_template_preprocess(&env, "app", false);
2872        assert_eq!(second.skipped.len(), 1);
2873        assert_eq!(second.skipped[0].state, DivergenceState::BothChanged);
2874
2875        // Deployed bytes preserved despite the source edit.
2876        let on_disk = env.fs.read_to_string(&deployed_path).unwrap();
2877        assert_eq!(on_disk, "name = USER EDITED");
2878    }
2879
2880    #[test]
2881    fn divergence_guard_proceeds_when_source_changed_only() {
2882        // Row 2: source edited, deployed still matches the cached
2883        // render. This is the normal "I edited the template, re-deploy"
2884        // path — the guard must NOT fire here.
2885        let env = TempEnvironment::builder()
2886            .pack("app")
2887            .file("config.toml.tmpl", "name = original")
2888            .done()
2889            .build();
2890
2891        let first = run_template_preprocess(&env, "app", false);
2892        let deployed_path = first.virtual_entries[0].absolute_path.clone();
2893
2894        // Source edited; deployed left untouched.
2895        env.fs
2896            .write_file(
2897                &env.dotfiles_root.join("app/config.toml.tmpl"),
2898                b"name = NEW VALUE",
2899            )
2900            .unwrap();
2901
2902        let second = run_template_preprocess(&env, "app", false);
2903        assert!(
2904            second.skipped.is_empty(),
2905            "source-only change must not trigger the guard"
2906        );
2907        let on_disk = env.fs.read_to_string(&deployed_path).unwrap();
2908        assert_eq!(on_disk, "name = NEW VALUE");
2909    }
2910
2911    #[test]
2912    fn divergence_guard_no_op_when_nothing_changed() {
2913        // Row 1: nothing changed. Re-running deploys the same content;
2914        // no skip event.
2915        let env = TempEnvironment::builder()
2916            .pack("app")
2917            .file("config.toml.tmpl", "name = original")
2918            .done()
2919            .build();
2920
2921        let _ = run_template_preprocess(&env, "app", false);
2922        let second = run_template_preprocess(&env, "app", false);
2923        assert!(second.skipped.is_empty());
2924    }
2925
2926    #[test]
2927    fn divergence_guard_overridden_by_force() {
2928        // `dodot up --force` bypasses the guard: the deployed user edit
2929        // gets clobbered by the re-rendered output. This is the
2930        // documented escape hatch (e.g. when an env-var the template
2931        // references has rotated and the user wants the new value).
2932        let env = TempEnvironment::builder()
2933            .pack("app")
2934            .file("config.toml.tmpl", "name = original")
2935            .done()
2936            .build();
2937
2938        let first = run_template_preprocess(&env, "app", false);
2939        let deployed_path = first.virtual_entries[0].absolute_path.clone();
2940
2941        env.fs
2942            .write_file(&deployed_path, b"name = USER EDITED")
2943            .unwrap();
2944
2945        let second = run_template_preprocess(&env, "app", /* force */ true);
2946        assert!(
2947            second.skipped.is_empty(),
2948            "force=true must bypass the guard"
2949        );
2950        let on_disk = env.fs.read_to_string(&deployed_path).unwrap();
2951        assert_eq!(
2952            on_disk, "name = original",
2953            "force must rewrite to the rendered content"
2954        );
2955    }
2956
2957    #[test]
2958    fn divergence_guard_baseline_stays_pinned_to_last_successful_render() {
2959        // Critical invariant: when the guard skips a write, the
2960        // baseline must NOT be updated. Otherwise the next
2961        // `transform check` would compare the user's edit against
2962        // itself and report Synced — losing the divergence signal.
2963        let env = TempEnvironment::builder()
2964            .pack("app")
2965            .file("config.toml.tmpl", "name = original")
2966            .done()
2967            .build();
2968
2969        let first = run_template_preprocess(&env, "app", false);
2970        let deployed_path = first.virtual_entries[0].absolute_path.clone();
2971
2972        // Pin the original baseline timestamp/content for comparison.
2973        let baseline_before = crate::preprocessing::baseline::Baseline::load(
2974            env.fs.as_ref(),
2975            env.paths.as_ref(),
2976            "app",
2977            "preprocessed",
2978            "config.toml",
2979        )
2980        .unwrap()
2981        .unwrap();
2982
2983        env.fs
2984            .write_file(&deployed_path, b"name = USER EDITED")
2985            .unwrap();
2986
2987        let _ = run_template_preprocess(&env, "app", false);
2988
2989        let baseline_after = crate::preprocessing::baseline::Baseline::load(
2990            env.fs.as_ref(),
2991            env.paths.as_ref(),
2992            "app",
2993            "preprocessed",
2994            "config.toml",
2995        )
2996        .unwrap()
2997        .unwrap();
2998
2999        assert_eq!(
3000            baseline_before.rendered_hash, baseline_after.rendered_hash,
3001            "baseline must not be rewritten when the guard skips"
3002        );
3003        assert_eq!(
3004            baseline_before.rendered_content, baseline_after.rendered_content,
3005            "baseline content must not change after a skipped write"
3006        );
3007    }
3008
3009    #[test]
3010    fn divergence_guard_reproceeds_when_user_undoes_their_edit() {
3011        // After the guard fires, if the user reverts their edit (or
3012        // resolves through `dodot transform check`), the next `up`
3013        // must succeed normally — the guard is not sticky.
3014        let env = TempEnvironment::builder()
3015            .pack("app")
3016            .file("config.toml.tmpl", "name = original")
3017            .done()
3018            .build();
3019
3020        let first = run_template_preprocess(&env, "app", false);
3021        let deployed_path = first.virtual_entries[0].absolute_path.clone();
3022
3023        // Edit, then revert.
3024        env.fs
3025            .write_file(&deployed_path, b"name = USER EDITED")
3026            .unwrap();
3027        let blocked = run_template_preprocess(&env, "app", false);
3028        assert_eq!(blocked.skipped.len(), 1);
3029
3030        env.fs
3031            .write_file(&deployed_path, b"name = original")
3032            .unwrap();
3033        let cleared = run_template_preprocess(&env, "app", false);
3034        assert!(
3035            cleared.skipped.is_empty(),
3036            "guard must clear once divergence is gone"
3037        );
3038    }
3039
3040    #[test]
3041    fn divergence_guard_active_for_read_only_callers() {
3042        // Read-only callers (`dodot status`) set `write_baselines =
3043        // false` but still need the divergence guard active —
3044        // otherwise status would silently re-render and overwrite a
3045        // user's deployed-file edit. This test pins the new behavior:
3046        // the guard fires regardless of `write_baselines`, and the
3047        // baseline cache stays pinned to the last `up` (no
3048        // baseline-write side effects from the read-only call).
3049        let env = TempEnvironment::builder()
3050            .pack("app")
3051            .file("config.toml.tmpl", "name = original")
3052            .done()
3053            .build();
3054
3055        // Prime the baseline with a normal `up`.
3056        let _ = run_template_preprocess(&env, "app", false);
3057        let baseline_before = crate::preprocessing::baseline::Baseline::load(
3058            env.fs.as_ref(),
3059            env.paths.as_ref(),
3060            "app",
3061            "preprocessed",
3062            "config.toml",
3063        )
3064        .unwrap()
3065        .unwrap();
3066
3067        // User edits the deployed file directly.
3068        let deployed_path = env
3069            .paths
3070            .handler_data_dir("app", "preprocessed")
3071            .join("config.toml");
3072        env.fs
3073            .write_file(&deployed_path, b"name = USER EDITED")
3074            .unwrap();
3075
3076        // Simulate `status`: write_baselines=false, force=false.
3077        use std::collections::HashMap;
3078        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
3079            vec!["tmpl".into()],
3080            HashMap::new(),
3081            env.paths.as_ref(),
3082        )
3083        .unwrap();
3084        let mut registry = PreprocessorRegistry::new();
3085        registry.register(Box::new(template_pp));
3086        let datastore = make_datastore(&env);
3087        let pack = make_pack("app", env.dotfiles_root.join("app"));
3088        let entries = vec![PackEntry {
3089            relative_path: "config.toml.tmpl".into(),
3090            absolute_path: env.dotfiles_root.join("app/config.toml.tmpl"),
3091            is_dir: false,
3092        }];
3093        let result = preprocess_pack(
3094            entries,
3095            &registry,
3096            &pack,
3097            env.fs.as_ref(),
3098            &datastore,
3099            env.paths.as_ref(),
3100            crate::preprocessing::PreprocessMode::Passive,
3101            /* force */ false,
3102        )
3103        .unwrap();
3104        assert_eq!(
3105            result.skipped.len(),
3106            1,
3107            "guard must fire for read-only callers too"
3108        );
3109        assert_eq!(
3110            env.fs.read_to_string(&deployed_path).unwrap(),
3111            "name = USER EDITED",
3112            "user's deployed-file edit must be preserved"
3113        );
3114
3115        // The baseline cache must NOT have been touched: the read-only
3116        // call leaves the divergence-detection ground truth pinned to
3117        // the last `up`.
3118        let baseline_after = crate::preprocessing::baseline::Baseline::load(
3119            env.fs.as_ref(),
3120            env.paths.as_ref(),
3121            "app",
3122            "preprocessed",
3123            "config.toml",
3124        )
3125        .unwrap()
3126        .unwrap();
3127        assert_eq!(baseline_before, baseline_after);
3128    }
3129}