Skip to main content

dodot_lib/preprocessing/
pipeline.rs

1//! Preprocessing pipeline — partitions, expands, and merges entries.
2//!
3//! This module contains the core pipeline function that runs between
4//! directory walking and rule matching. It identifies preprocessor files,
5//! expands them, writes results to the datastore, checks for collisions,
6//! and produces virtual entries for the handler pipeline.
7
8use std::collections::HashMap;
9use std::path::{Component, Path, PathBuf};
10use std::sync::Arc;
11
12use tracing::{debug, info};
13
14use crate::datastore::DataStore;
15use crate::fs::Fs;
16use crate::packs::Pack;
17use crate::paths::Pather;
18use crate::preprocessing::baseline::{cache_filename_for, hex_sha256, Baseline};
19use crate::preprocessing::divergence::DivergenceState;
20use crate::preprocessing::PreprocessorRegistry;
21use crate::rules::PackEntry;
22use crate::{DodotError, Result};
23
24/// Execution envelope for the preprocessing pipeline.
25///
26/// `secrets.lex` §7.4 ("Auth Fatigue and Passive Commands") draws a
27/// hard line between two envelopes:
28///
29/// - **Active** (`dodot up`): evaluates templates, batches `secret()`
30///   calls per provider, prompts for auth once per run, writes
31///   rendered files and baselines to disk.
32/// - **Passive** (`dodot status`, `dodot up --dry-run`): MUST NOT
33///   evaluate templates. Drift detection runs entirely off the
34///   baseline cache. No provider calls. No datastore writes. No
35///   baseline writes.
36///
37/// This enum is the single boolean the pipeline gates on. Active is
38/// the existing behavior; Passive is the §7.4-compliant read-only
39/// path. See issue #121.
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum PreprocessMode {
42    /// Run preprocessors, write rendered outputs to the datastore,
43    /// write baselines to the cache. The original `dodot up` path.
44    Active,
45    /// Read everything from the baseline cache. Skip preprocessor
46    /// expansion (no provider calls), skip datastore writes, skip
47    /// baseline writes. For preprocessor entries with no baseline
48    /// yet, surface a passthrough placeholder so callers can render
49    /// "unknown — run `dodot up` first" without falling through to
50    /// template evaluation.
51    Passive,
52}
53
54/// Validate that a preprocessor-produced path is safe to materialise in
55/// the datastore: relative, no root/prefix/parent-dir components, and
56/// not effectively empty.
57///
58/// Malicious or malformed preprocessor output (tar-slip, absolute paths,
59/// `..` segments) can escape the pack namespace and overwrite arbitrary
60/// files. Empty paths (or paths made up only of `.` components) are
61/// rejected because they would silently fail at the datastore layer with
62/// an opaque error — here we produce a clean diagnostic naming the
63/// preprocessor and source file.
64fn validate_safe_relative_path(path: &Path, preprocessor: &str, source_file: &Path) -> Result<()> {
65    let mut has_normal = false;
66    for component in path.components() {
67        match component {
68            Component::Normal(_) => has_normal = true,
69            Component::CurDir => {}
70            Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
71                return Err(DodotError::PreprocessorError {
72                    preprocessor: preprocessor.into(),
73                    source_file: source_file.to_path_buf(),
74                    message: format!(
75                        "unsafe path in preprocessor output: {} (absolute or contains `..`)",
76                        path.display()
77                    ),
78                });
79            }
80        }
81    }
82    if !has_normal {
83        return Err(DodotError::PreprocessorError {
84            preprocessor: preprocessor.into(),
85            source_file: source_file.to_path_buf(),
86            message: format!(
87                "preprocessor produced an empty output path (\"{}\"). This usually means a file like \
88                 `.tmpl` or `.identity` has no stem after stripping the preprocessor extension — \
89                 rename the source file so that it has a non-empty name after stripping.",
90                path.display()
91            ),
92        });
93    }
94    Ok(())
95}
96
97/// Normalise a validated relative path by dropping `CurDir` components,
98/// so that `./foo` and `foo` are treated as the same virtual path for
99/// collision detection. Only call after [`validate_safe_relative_path`].
100fn normalize_relative(path: &Path) -> PathBuf {
101    let mut out = PathBuf::new();
102    for component in path.components() {
103        if let Component::Normal(n) = component {
104            out.push(n);
105        }
106    }
107    out
108}
109
110/// The result of preprocessing a pack's file entries.
111#[derive(Debug)]
112pub struct PreprocessResult {
113    /// Entries that were NOT preprocessed (pass through unchanged).
114    pub regular_entries: Vec<PackEntry>,
115    /// Virtual entries created by preprocessing (point to datastore files).
116    pub virtual_entries: Vec<PackEntry>,
117    /// Maps virtual entry absolute_path → original source path in pack.
118    pub source_map: HashMap<PathBuf, PathBuf>,
119    /// Maps virtual entry absolute_path → in-memory rendered bytes.
120    /// Populated for every virtual entry the pipeline produces, in
121    /// both Active and Passive modes (Passive sources the bytes from
122    /// `baseline.rendered_content`). Handlers that need the rendered
123    /// content for sentinel hashing (`install`, `homebrew`) consult
124    /// this map first and fall back to disk read for non-template
125    /// files. Without this, Passive callers — where the rendered
126    /// file isn't on disk — couldn't produce correct sentinels for
127    /// templated install scripts or Brewfiles. See issue #121.
128    pub rendered_bytes: HashMap<PathBuf, Arc<[u8]>>,
129    /// Files whose deployed bytes diverged from the cached baseline and
130    /// were therefore preserved instead of being overwritten. Empty
131    /// outside of `dodot up` runs that pass `force = false` and have a
132    /// baseline available. Surfaced to the user as warnings — see
133    /// `docs/proposals/preprocessing-pipeline.lex` §6.4.
134    pub skipped: Vec<SkippedRender>,
135}
136
137/// One file the pipeline refused to overwrite because its deployed
138/// bytes diverged from the cached render.
139///
140/// `dodot up` records these so the caller can warn the user that their
141/// edits were preserved. Resolution paths are `dodot transform check`
142/// (auto-merge via the clean filter) or `dodot up --force` (overwrite).
143#[derive(Debug, Clone)]
144pub struct SkippedRender {
145    /// Pack name (matches `Pack::name`, the on-disk directory name).
146    pub pack: String,
147    /// Virtual relative path inside the pack (post-strip), e.g.
148    /// `config.toml` for a source `config.toml.tmpl`.
149    pub virtual_relative: PathBuf,
150    /// Absolute path of the deployed file we preserved.
151    pub deployed_path: PathBuf,
152    /// Which divergence state we observed. Always `OutputChanged` or
153    /// `BothChanged` — the other states never trigger a skip.
154    pub state: DivergenceState,
155}
156
157impl PreprocessResult {
158    /// Create a passthrough result where all entries are regular (no preprocessing).
159    pub fn passthrough(entries: Vec<PackEntry>) -> Self {
160        Self {
161            regular_entries: entries,
162            virtual_entries: Vec::new(),
163            source_map: HashMap::new(),
164            rendered_bytes: HashMap::new(),
165            skipped: Vec::new(),
166        }
167    }
168
169    /// Return all entries (regular + virtual) merged into one list, sorted by relative path.
170    pub fn merged_entries(&self) -> Vec<PackEntry> {
171        let mut all = Vec::with_capacity(self.regular_entries.len() + self.virtual_entries.len());
172        all.extend(self.regular_entries.iter().cloned());
173        all.extend(self.virtual_entries.iter().cloned());
174        all.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
175        all
176    }
177}
178
179/// The handler name used for preprocessor-expanded files in the datastore.
180const PREPROCESSED_HANDLER: &str = "preprocessed";
181
182/// Result of checking whether the deployed file diverges from the
183/// cached baseline. Used by [`preprocess_pack`] to decide whether to
184/// overwrite or preserve the user's edits.
185enum DivergenceCheck {
186    /// No baseline, no deployed file, or content matches — proceed
187    /// with the normal write.
188    Proceed,
189    /// Deployed bytes diverge from the baseline. Skip the write to
190    /// preserve user edits; surface a warning to the caller.
191    Skip {
192        state: DivergenceState,
193        deployed_path: PathBuf,
194    },
195}
196
197/// Compare the prospective deployed file against the cached baseline.
198///
199/// Returns [`DivergenceCheck::Skip`] when the deployed bytes have
200/// changed since the last successful render — that is the case where
201/// re-rendering would silently destroy a user edit (see
202/// `docs/proposals/preprocessing-pipeline.lex` §6.4).
203///
204/// "Define stale-vs-new from file content, not the runtime
205/// environment": this check operates purely on bytes (source + deployed
206/// hash comparisons against the baseline). Env-var rotations are
207/// intentionally invisible here — users who change a referenced env var
208/// pick up the new value via `dodot up --force`.
209fn check_divergence(
210    fs: &dyn Fs,
211    paths: &dyn Pather,
212    pack_name: &str,
213    virtual_relative: &Path,
214    source_path: &Path,
215) -> Result<DivergenceCheck> {
216    let cache_filename = cache_filename_for(virtual_relative);
217    let baseline =
218        match Baseline::load(fs, paths, pack_name, PREPROCESSED_HANDLER, &cache_filename)? {
219            Some(b) => b,
220            // First-time deploy: no baseline to compare against. Writing
221            // is correct here — there's nothing to overwrite.
222            None => return Ok(DivergenceCheck::Proceed),
223        };
224
225    let deployed_path = paths
226        .handler_data_dir(pack_name, PREPROCESSED_HANDLER)
227        .join(virtual_relative);
228    if !fs.exists(&deployed_path) {
229        // Baseline says we deployed once, but the user (or some other
230        // tool) removed the deployed file. Treat as a fresh deploy —
231        // there's nothing to preserve.
232        return Ok(DivergenceCheck::Proceed);
233    }
234
235    let deployed_bytes = fs.read_file(&deployed_path)?;
236    if hex_sha256(&deployed_bytes) == baseline.rendered_hash {
237        return Ok(DivergenceCheck::Proceed);
238    }
239
240    // Deployed file diverges. Distinguish OutputChanged from BothChanged
241    // for a sharper warning. A read failure on the source is treated as
242    // "source unchanged" — the safer assumption when we can't tell.
243    let source_changed = match fs.read_file(source_path) {
244        Ok(bytes) => hex_sha256(&bytes) != baseline.source_hash,
245        Err(_) => false,
246    };
247    let state = if source_changed {
248        DivergenceState::BothChanged
249    } else {
250        DivergenceState::OutputChanged
251    };
252
253    Ok(DivergenceCheck::Skip {
254        state,
255        deployed_path,
256    })
257}
258
259/// Run the preprocessing pipeline for a pack's file entries.
260///
261/// 1. Partition entries into preprocessor files vs regular files.
262/// 2. **In `PreprocessMode::Active`** (real `dodot up` runs): for each
263///    preprocessor file, expand, write results to datastore (unless the
264///    deployed file has diverged from the cached baseline — see step 5),
265///    write the baseline cache record.
266/// 3. Create virtual `PackEntry`s pointing to the datastore files.
267/// 4. Check for collisions between virtual and regular entries.
268/// 5. **Divergence guard** (Active only): unless `force` is `true`,
269///    compare the prospective deployed file against the cached baseline
270///    before overwriting. When the deployed bytes have changed (the
271///    user edited the deployed file directly), skip the write and
272///    record a [`SkippedRender`] so the caller can warn the user. See
273///    `docs/proposals/preprocessing-pipeline.lex` §6.4.
274/// 6. **In `PreprocessMode::Passive`** (`dodot status`, `up --dry-run`):
275///    skip every disk-mutating step. Sources are never read for marker
276///    scans; preprocessors are never invoked (no provider calls); the
277///    datastore is not touched. Virtual entries are still produced so
278///    the rest of the planner can compute intents — their bytes come
279///    from `baseline.rendered_content` when a baseline exists.
280///    First-time pack templates with no baseline still surface a
281///    placeholder virtual entry (so `dodot status` can render them as
282///    "pending" under the stripped name) but with empty
283///    `rendered_bytes`. Handlers that need rendered content for
284///    sentinel hashing (`install`, `homebrew`) skip intent generation
285///    for those placeholders rather than erroring out — the next real
286///    `dodot up` plans them normally. See [`PreprocessMode`] and
287///    `docs/proposals/secrets.lex` §7.4.
288/// 7. Return the result for merging into the handler pipeline.
289///
290/// Set `force = true` to bypass the divergence guard. Surfaces as
291/// `dodot up --force` in the CLI; needed when the user knows they want
292/// to overwrite a divergent deployed file (e.g. after rotating an env
293/// var that a template references). Ignored in `Passive` mode (no
294/// writes happen there at all).
295#[allow(clippy::too_many_arguments)] // pipeline core: every parameter is load-bearing
296pub fn preprocess_pack(
297    entries: Vec<PackEntry>,
298    registry: &PreprocessorRegistry,
299    pack: &Pack,
300    fs: &dyn Fs,
301    datastore: &dyn DataStore,
302    paths: &dyn Pather,
303    mode: PreprocessMode,
304    force: bool,
305) -> Result<PreprocessResult> {
306    let mut regular_entries = Vec::new();
307    let mut preprocessor_entries = Vec::new();
308
309    // Phase 1: Partition
310    for entry in entries {
311        let filename = entry
312            .relative_path
313            .file_name()
314            .map(|n| n.to_string_lossy().to_string())
315            .unwrap_or_default();
316
317        if !entry.is_dir && registry.is_preprocessor_file(&filename) {
318            preprocessor_entries.push(entry);
319        } else {
320            regular_entries.push(entry);
321        }
322    }
323
324    debug!(
325        pack = %pack.name,
326        preprocessor = preprocessor_entries.len(),
327        regular = regular_entries.len(),
328        "partitioned entries"
329    );
330
331    if preprocessor_entries.is_empty() {
332        return Ok(PreprocessResult {
333            regular_entries,
334            virtual_entries: Vec::new(),
335            source_map: HashMap::new(),
336            rendered_bytes: HashMap::new(),
337            skipped: Vec::new(),
338        });
339    }
340
341    // Passive mode: read everything from the baseline cache. Skip
342    // template evaluation entirely (no provider calls), skip
343    // datastore writes, skip baseline writes. See `PreprocessMode`.
344    if mode == PreprocessMode::Passive {
345        return preprocess_pack_passive(
346            preprocessor_entries,
347            regular_entries,
348            registry,
349            pack,
350            fs,
351            paths,
352        );
353    }
354
355    // Phase 2 & 3: Expand and create virtual entries
356    let mut virtual_entries = Vec::new();
357    let mut source_map = HashMap::new();
358    let mut rendered_bytes: HashMap<PathBuf, Arc<[u8]>> = HashMap::new();
359    let mut skipped: Vec<SkippedRender> = Vec::new();
360
361    // Tracks claimed paths for collision detection. Seeded with regular
362    // entries; virtual entries are added as they're created so two
363    // preprocessors can't both produce the same virtual path (e.g.
364    // `config.toml.identity` and `config.toml.tmpl` both expanding to
365    // `config.toml`).
366    let mut claimed_paths: std::collections::HashSet<PathBuf> = regular_entries
367        .iter()
368        .map(|e| e.relative_path.clone())
369        .collect();
370
371    for entry in &preprocessor_entries {
372        let filename = entry
373            .relative_path
374            .file_name()
375            .map(|n| n.to_string_lossy().to_string())
376            .unwrap_or_default();
377
378        let preprocessor = registry
379            .find_for_file(&filename)
380            .expect("already checked in partition");
381
382        info!(
383            pack = %pack.name,
384            preprocessor = preprocessor.name(),
385            file = %filename,
386            "expanding"
387        );
388
389        // Safety gate: refuse to expand a source carrying unresolved
390        // dodot-conflict markers. Otherwise the markers would render
391        // verbatim through the template engine and deploy as broken
392        // config. Gated on `supports_reverse_merge` so non-tracking
393        // preprocessors (unarchive, identity) don't pay the read cost
394        // — their sources can't naturally carry the marker token.
395        //
396        // Lossy UTF-8 conversion: we read raw bytes and decode lossily
397        // so a non-UTF-8 source for a reverse-merge-capable
398        // preprocessor still gets a clean scan rather than failing
399        // with a generic UTF-8 decode error. The marker token is
400        // ASCII, so the lossy decode preserves it. Templates today
401        // are always UTF-8 in practice; this is defence-in-depth for
402        // future preprocessors.
403        // See preprocessing-pipeline.lex §6.3.
404        if preprocessor.supports_reverse_merge() {
405            let source_bytes = fs.read_file(&entry.absolute_path)?;
406            let source_str = String::from_utf8_lossy(&source_bytes);
407            crate::preprocessing::conflict::ensure_no_unresolved_markers(
408                &source_str,
409                &entry.absolute_path,
410            )?;
411        }
412
413        // Expand the source file
414        let expanded_files = preprocessor.expand(&entry.absolute_path, fs)?;
415
416        for expanded in expanded_files {
417            // Reject unsafe paths from the preprocessor (tar-slip,
418            // absolute paths, parent-dir escapes) before any disk write.
419            validate_safe_relative_path(
420                &expanded.relative_path,
421                preprocessor.name(),
422                &entry.absolute_path,
423            )?;
424
425            // Compute the virtual relative path.
426            // If the source was in a subdirectory (e.g., "subdir/config.toml.identity"),
427            // the virtual entry should preserve the parent (e.g., "subdir/config.toml").
428            let virtual_relative = if let Some(parent) = entry.relative_path.parent() {
429                if parent == Path::new("") {
430                    expanded.relative_path.clone()
431                } else {
432                    parent.join(&expanded.relative_path)
433                }
434            } else {
435                expanded.relative_path.clone()
436            };
437
438            // Defense-in-depth: validate the joined path too (parent
439            // could only come from the pack scanner, but re-check).
440            validate_safe_relative_path(
441                &virtual_relative,
442                preprocessor.name(),
443                &entry.absolute_path,
444            )?;
445
446            // Normalise `./foo` and `foo` to the same canonical form, so
447            // that collision detection and downstream comparisons don't
448            // silently diverge from the datastore's own normalisation.
449            let virtual_relative = normalize_relative(&virtual_relative);
450
451            // Phase 4: Collision check (against both regular entries and
452            // previously-expanded virtual entries)
453            if claimed_paths.contains(&virtual_relative) {
454                return Err(DodotError::PreprocessorCollision {
455                    pack: pack.name.clone(),
456                    source_file: filename.clone(),
457                    expanded_name: virtual_relative.to_string_lossy().into_owned(),
458                });
459            }
460
461            // Write expanded content to datastore, preserving directory
462            // structure. Directories get mkdir'd; files get their content
463            // written. `write_rendered_file` creates any needed parent
464            // directories.
465            //
466            // Divergence guard (§6.4): for tracked-render preprocessors,
467            // check whether the deployed file has diverged from the
468            // cached baseline before overwriting. If it has, skip the
469            // *write* and record a SkippedRender so the caller can warn
470            // the user. `force = true` bypasses the guard. See
471            // `check_divergence` for the byte-level rule.
472            //
473            // The render itself (`preprocessor.expand` above) has
474            // already run by this point — moving the divergence check
475            // ahead of expansion would require knowing every output
476            // path before producing any of them, which the preprocessor
477            // contract doesn't expose. The cost of the spurious render
478            // is the cycles burned plus any one-shot side effects in
479            // expand (e.g. secret-provider prompts for templates that
480            // resolve `{{ secrets.X }}`). For divergent files this
481            // means the prompt fires even though the rendered bytes
482            // are immediately discarded; users who want to avoid that
483            // should resolve the divergence (`dodot transform check`)
484            // before the next `dodot up`. Tracked here for §6.4
485            // follow-up; not blocking the divergence-preservation
486            // contract this guard exists to keep.
487            //
488            // The guard fires regardless of `write_baselines` — it's a
489            // read-only check against the existing cache, and read-only
490            // callers (`dodot status`) need it just as much as `dodot
491            // up` does. Without this, status would re-render and
492            // overwrite the user's edited deployed file silently.
493            let mut skip_path: Option<PathBuf> = None;
494            // Divergence-guard gate: fires for any preprocessor
495            // that produces a single file we can hash against the
496            // baseline. Templates use `tracked_render` (so they
497            // also get reverse-merge); whole-file secret
498            // preprocessors (`age` / `gpg`) signal participation
499            // via `deploy_mode = Some(0o600)`. `secrets.lex` §4.4
500            // is explicit that whole-file secrets must NOT have
501            // their deployed plaintext silently overwritten on the
502            // next `dodot up` — even though there's no auto-merge
503            // path, the §6.4 preservation contract still applies.
504            let participates_in_divergence_guard =
505                expanded.tracked_render.is_some() || expanded.deploy_mode.is_some();
506            if !force && !expanded.is_dir && participates_in_divergence_guard {
507                match check_divergence(
508                    fs,
509                    paths,
510                    &pack.name,
511                    &virtual_relative,
512                    &entry.absolute_path,
513                )? {
514                    DivergenceCheck::Proceed => {}
515                    DivergenceCheck::Skip {
516                        state,
517                        deployed_path,
518                    } => {
519                        info!(
520                            pack = %pack.name,
521                            file = %virtual_relative.display(),
522                            ?state,
523                            "preserving divergent deployed file (skipping write)"
524                        );
525                        skipped.push(SkippedRender {
526                            pack: pack.name.clone(),
527                            virtual_relative: virtual_relative.clone(),
528                            deployed_path: deployed_path.clone(),
529                            state,
530                        });
531                        skip_path = Some(deployed_path);
532                    }
533                }
534            }
535            let was_skipped = skip_path.is_some();
536
537            let datastore_path = if let Some(p) = skip_path {
538                p
539            } else if expanded.is_dir {
540                datastore.write_rendered_dir(
541                    &pack.name,
542                    PREPROCESSED_HANDLER,
543                    &virtual_relative.to_string_lossy(),
544                )?
545            } else if let Some(mode) = expanded.deploy_mode {
546                // Whole-file secret preprocessors (age / gpg) emit
547                // `deploy_mode = Some(0o600)` per `secrets.lex`
548                // §4.3. Use the atomic create-with-mode datastore
549                // path so the plaintext bytes never sit on disk
550                // under a permissive mode — closes the race window
551                // between `write_file` (lands at umask default,
552                // typically 0644) and `set_permissions` that the
553                // first cut had.
554                datastore.write_rendered_file_with_mode(
555                    &pack.name,
556                    PREPROCESSED_HANDLER,
557                    &virtual_relative.to_string_lossy(),
558                    &expanded.content,
559                    mode,
560                )?
561            } else {
562                datastore.write_rendered_file(
563                    &pack.name,
564                    PREPROCESSED_HANDLER,
565                    &virtual_relative.to_string_lossy(),
566                    &expanded.content,
567                )?
568            };
569
570            debug!(
571                pack = %pack.name,
572                virtual_path = %virtual_relative.display(),
573                datastore_path = %datastore_path.display(),
574                is_dir = expanded.is_dir,
575                skipped = was_skipped,
576                "wrote expanded entry"
577            );
578
579            // Persist a baseline record so future `dodot transform
580            // check` / clean-filter calls can detect drift without
581            // re-rendering. Only write when:
582            //   - the entry is a file (directory entries from archive
583            //     preprocessors carry no rendered content),
584            //   - the preprocessor produced a tracked render (i.e. it's
585            //     a generative-with-tracking preprocessor, currently
586            //     just templates). Plain Generative preprocessors that
587            //     don't support reverse-merge (unarchive) skip the
588            //     baseline because the cache is only meaningful when
589            //     paired with burgertocow tracking, AND
590            //   - the divergence guard didn't skip the write (otherwise
591            //     we'd update the baseline to match a render that never
592            //     hit disk, breaking future divergence detection).
593            //
594            // Mode-gating happens at the function boundary: this whole
595            // branch only runs in `PreprocessMode::Active`. Passive
596            // commands take the early-return at the top of the
597            // function and never reach this code.
598            // Baseline-write gate: write whenever the divergence
599            // guard would fire next time, so the guard has data to
600            // compare against. Templates supply `tracked_render`
601            // (which both unlocks reverse-merge and seeds the
602            // baseline); whole-file secrets supply `deploy_mode`
603            // (no marker stream — `tracked_render = None` — but
604            // rendered_hash is still meaningful for divergence
605            // detection per `secrets.lex` §4.4).
606            let should_write_baseline = !expanded.is_dir
607                && !was_skipped
608                && (expanded.tracked_render.is_some() || expanded.deploy_mode.is_some());
609            if should_write_baseline {
610                let cache_filename = cache_filename_for(&virtual_relative);
611                let source_bytes = fs.read_file(&entry.absolute_path)?;
612                let baseline = Baseline::build(
613                    &entry.absolute_path,
614                    &expanded.content,
615                    &source_bytes,
616                    expanded.tracked_render.as_deref(),
617                    expanded.context_hash.as_ref(),
618                );
619                if let Err(err) =
620                    baseline.write(fs, paths, &pack.name, PREPROCESSED_HANDLER, &cache_filename)
621                {
622                    // Baseline write failures are reported but not
623                    // fatal: the deployment itself succeeded, and a
624                    // missing baseline only degrades the reverse-merge
625                    // experience (we'll re-baseline next `up`).
626                    debug!(
627                        pack = %pack.name,
628                        file = %cache_filename,
629                        error = %err,
630                        "baseline write failed (non-fatal)"
631                    );
632                } else {
633                    debug!(
634                        pack = %pack.name,
635                        file = %cache_filename,
636                        "baseline written"
637                    );
638                }
639
640                // Secrets sidecar (secrets.lex §3.3). Always called;
641                // the writer no-ops when the render had no
642                // `secret(...)` calls AND removes a stale sidecar
643                // from a prior render that DID, so the on-disk
644                // state always matches the latest render.
645                let sidecar = crate::preprocessing::baseline::SecretsSidecar::new(
646                    expanded.secret_line_ranges.clone(),
647                );
648                if let Err(err) =
649                    sidecar.write(fs, paths, &pack.name, PREPROCESSED_HANDLER, &cache_filename)
650                {
651                    // Same non-fatal disposition as baseline writes:
652                    // a missing sidecar means the next reverse-merge
653                    // sees an empty mask and surfaces the secret
654                    // line as a regular (mask-able) divergence,
655                    // which the user can recover from by re-running
656                    // `dodot up`.
657                    debug!(
658                        pack = %pack.name,
659                        file = %cache_filename,
660                        error = %err,
661                        "secrets sidecar write failed (non-fatal)"
662                    );
663                }
664            }
665
666            claimed_paths.insert(virtual_relative.clone());
667            source_map.insert(datastore_path.clone(), entry.absolute_path.clone());
668            // Stash the rendered bytes for downstream handlers
669            // (install/homebrew sentinel hashing) that would
670            // otherwise read them back off disk. Skipped renders
671            // (divergence guard fired) carry the *preserved deployed*
672            // bytes instead — that matches the deployed file the user
673            // is keeping, which is what the next sentinel should
674            // commit to. Directories carry no bytes.
675            if !expanded.is_dir {
676                let bytes: Arc<[u8]> = if was_skipped {
677                    // Read the preserved deployed file. If the read
678                    // fails (race / permissions), fall back to the
679                    // freshly-rendered bytes so the handler still
680                    // gets a value — this only affects the sentinel,
681                    // and the divergence warning has already surfaced.
682                    fs.read_file(&datastore_path)
683                        .map(Arc::from)
684                        .unwrap_or_else(|_| Arc::from(expanded.content.clone()))
685                } else {
686                    Arc::from(expanded.content.clone())
687                };
688                rendered_bytes.insert(datastore_path.clone(), bytes);
689            }
690
691            virtual_entries.push(PackEntry {
692                relative_path: virtual_relative,
693                absolute_path: datastore_path,
694                is_dir: expanded.is_dir,
695            });
696        }
697    }
698
699    info!(
700        pack = %pack.name,
701        virtual_count = virtual_entries.len(),
702        "preprocessing complete"
703    );
704
705    Ok(PreprocessResult {
706        regular_entries,
707        virtual_entries,
708        source_map,
709        rendered_bytes,
710        skipped,
711    })
712}
713
714/// `Passive` half of [`preprocess_pack`].
715///
716/// Walks the same set of preprocessor entries the Active path would
717/// have, but never invokes a preprocessor. For each entry, computes
718/// the would-be virtual relative path via `Preprocessor::stripped_name`.
719/// Two outcomes:
720///
721/// - **Baseline exists** (the file was rendered on a previous `up`):
722///   builds a virtual entry pointing at the would-be datastore
723///   location with `rendered_bytes` sourced from
724///   `baseline.rendered_content`. Runs the read-only divergence
725///   check so callers (status's `Health::Preserved` row) still see
726///   skipped-render rows for divergent deployed files.
727/// - **No baseline** (first-time pack template, never `up`'d):
728///   surfaces a placeholder virtual entry under the stripped name,
729///   with empty `rendered_bytes`. Status renders this as "pending"
730///   under the logical name (`config.toml` rather than the source
731///   `config.toml.tmpl`); handlers that need rendered content for
732///   sentinel hashing (install, homebrew) skip intent generation
733///   for these placeholders rather than crashing. The next real
734///   `dodot up` populates the baseline and plans intents normally.
735///
736/// Source files are not read (no marker scan); the datastore is
737/// not written; the baseline cache is not written.
738///
739/// This contract is what `secrets.lex` §7.4 demands: `dodot status`
740/// and `dodot up --dry-run` MUST NOT trigger template evaluation,
741/// MUST NOT surface provider auth prompts, and MUST NOT mutate disk
742/// state. See issue #121.
743///
744/// Limitation: this assumes a 1:1 source→virtual relationship via
745/// `stripped_name`. That holds for templates (the only shipped
746/// generative-with-tracking preprocessor) and identity-style
747/// preprocessors. Multi-output preprocessors like unarchive cannot
748/// faithfully be passively previewed; if one is added later, this
749/// function should fall back to skipping such entries (which it does
750/// today, since they have no baseline).
751fn preprocess_pack_passive(
752    preprocessor_entries: Vec<PackEntry>,
753    regular_entries: Vec<PackEntry>,
754    registry: &PreprocessorRegistry,
755    pack: &Pack,
756    fs: &dyn Fs,
757    paths: &dyn Pather,
758) -> Result<PreprocessResult> {
759    let mut virtual_entries = Vec::new();
760    let mut source_map = HashMap::new();
761    let mut rendered_bytes: HashMap<PathBuf, Arc<[u8]>> = HashMap::new();
762    let mut skipped: Vec<SkippedRender> = Vec::new();
763
764    for entry in preprocessor_entries {
765        let filename = entry
766            .relative_path
767            .file_name()
768            .map(|n| n.to_string_lossy().to_string())
769            .unwrap_or_default();
770
771        let preprocessor = registry
772            .find_for_file(&filename)
773            .expect("already checked in partition");
774
775        // Logical (stripped) virtual filename — e.g. `config.toml`
776        // for `config.toml.tmpl`. We don't run `expand()` (that would
777        // be the §7.4 violation), so we derive the would-be virtual
778        // path from `stripped_name` plus the source's parent
779        // directory.
780        let stripped = preprocessor.stripped_name(&filename);
781        let virtual_relative = match entry.relative_path.parent() {
782            Some(parent) if parent != Path::new("") => parent.join(&stripped),
783            _ => PathBuf::from(&stripped),
784        };
785        let virtual_relative = normalize_relative(&virtual_relative);
786
787        let datastore_path = paths
788            .handler_data_dir(&pack.name, PREPROCESSED_HANDLER)
789            .join(&virtual_relative);
790
791        // Try to load the cached baseline. If absent, this is a
792        // first-time template that has never been deployed: surface
793        // a placeholder virtual entry (no rendered_bytes) so callers
794        // like `dodot status` can render it as "pending" under the
795        // stripped name. Critically, we do NOT fall through to
796        // template evaluation — that's the §7.4 violation we're
797        // here to fix. Handlers that need rendered bytes for
798        // sentinel hashing (`install`, `homebrew`) will fall back
799        // to disk-read on the missing datastore path and report
800        // pending; symlink-targeted templates render cleanly as
801        // pending without needing the bytes at all.
802        let cache_filename = cache_filename_for(&virtual_relative);
803        let baseline =
804            match Baseline::load(fs, paths, &pack.name, PREPROCESSED_HANDLER, &cache_filename)? {
805                Some(b) => Some(b),
806                None => {
807                    debug!(
808                        pack = %pack.name,
809                        file = %virtual_relative.display(),
810                        "passive: no baseline yet — surfacing placeholder (run `dodot up` first)"
811                    );
812                    None
813                }
814            };
815
816        // Divergence detection (read-only): even though Passive
817        // never writes, status / dry-run callers want to know which
818        // deployed files have drifted from their baseline so they
819        // can surface the same `Health::Preserved` row that the
820        // active path does. The byte comparison is local and free
821        // of side effects — no provider calls, no template eval —
822        // so it stays inside the §7.4 envelope. Skipped only when a
823        // baseline exists (no baseline → no comparison reference).
824        if baseline.is_some() {
825            if let Ok(DivergenceCheck::Skip {
826                state,
827                deployed_path,
828            }) = check_divergence(
829                fs,
830                paths,
831                &pack.name,
832                &virtual_relative,
833                &entry.absolute_path,
834            ) {
835                skipped.push(SkippedRender {
836                    pack: pack.name.clone(),
837                    virtual_relative: virtual_relative.clone(),
838                    deployed_path,
839                    state,
840                });
841            }
842        }
843
844        // Carry the baseline's rendered content forward as the
845        // in-memory bytes for downstream sentinel hashing when a
846        // baseline exists. Without a baseline (first-time pack), no
847        // bytes are available — handlers that need them will see
848        // `m.rendered_bytes == None` and fall back to disk read,
849        // which correctly fails for the missing datastore file and
850        // shows up as "pending" in status.
851        if let Some(b) = baseline {
852            let bytes: Arc<[u8]> = Arc::from(b.rendered_content.into_bytes());
853            rendered_bytes.insert(datastore_path.clone(), bytes);
854        }
855        source_map.insert(datastore_path.clone(), entry.absolute_path.clone());
856        virtual_entries.push(PackEntry {
857            relative_path: virtual_relative,
858            absolute_path: datastore_path,
859            is_dir: false,
860        });
861    }
862
863    info!(
864        pack = %pack.name,
865        virtual_count = virtual_entries.len(),
866        skipped_count = skipped.len(),
867        "passive preprocessing complete"
868    );
869
870    Ok(PreprocessResult {
871        regular_entries,
872        virtual_entries,
873        source_map,
874        rendered_bytes,
875        skipped,
876    })
877}
878
879#[cfg(test)]
880mod tests {
881    use super::*;
882    use crate::datastore::FilesystemDataStore;
883    use crate::handlers::HandlerConfig;
884    use crate::preprocessing::identity::IdentityPreprocessor;
885    use crate::testing::TempEnvironment;
886    use std::sync::Arc;
887
888    fn make_pack(name: &str, path: PathBuf) -> Pack {
889        Pack::new(name.into(), path, HandlerConfig::default())
890    }
891
892    fn make_registry() -> PreprocessorRegistry {
893        let mut registry = PreprocessorRegistry::new();
894        registry.register(Box::new(IdentityPreprocessor::new()));
895        registry
896    }
897
898    fn make_datastore(env: &TempEnvironment) -> FilesystemDataStore {
899        let runner = Arc::new(crate::datastore::ShellCommandRunner::new(false));
900        FilesystemDataStore::new(env.fs.clone(), env.paths.clone(), runner)
901    }
902
903    #[test]
904    fn passthrough_when_no_preprocessor_files() {
905        let env = TempEnvironment::builder()
906            .pack("vim")
907            .file("vimrc", "set nocompatible")
908            .file("gvimrc", "set guifont=Mono")
909            .done()
910            .build();
911
912        let registry = make_registry();
913        let datastore = make_datastore(&env);
914        let pack = make_pack("vim", env.dotfiles_root.join("vim"));
915
916        let entries = vec![
917            PackEntry {
918                relative_path: "vimrc".into(),
919                absolute_path: env.dotfiles_root.join("vim/vimrc"),
920                is_dir: false,
921            },
922            PackEntry {
923                relative_path: "gvimrc".into(),
924                absolute_path: env.dotfiles_root.join("vim/gvimrc"),
925                is_dir: false,
926            },
927        ];
928
929        let result = preprocess_pack(
930            entries,
931            &registry,
932            &pack,
933            env.fs.as_ref(),
934            &datastore,
935            env.paths.as_ref(),
936            crate::preprocessing::PreprocessMode::Active,
937            false,
938        )
939        .unwrap();
940
941        assert_eq!(result.regular_entries.len(), 2);
942        assert!(result.virtual_entries.is_empty());
943        assert!(result.source_map.is_empty());
944    }
945
946    #[test]
947    fn identity_preprocessor_creates_virtual_entry() {
948        let env = TempEnvironment::builder()
949            .pack("app")
950            .file("config.toml.identity", "host = localhost")
951            .done()
952            .build();
953
954        let registry = make_registry();
955        let datastore = make_datastore(&env);
956        let pack = make_pack("app", env.dotfiles_root.join("app"));
957
958        let entries = vec![PackEntry {
959            relative_path: "config.toml.identity".into(),
960            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
961            is_dir: false,
962        }];
963
964        let result = preprocess_pack(
965            entries,
966            &registry,
967            &pack,
968            env.fs.as_ref(),
969            &datastore,
970            env.paths.as_ref(),
971            crate::preprocessing::PreprocessMode::Active,
972            false,
973        )
974        .unwrap();
975
976        assert!(result.regular_entries.is_empty());
977        assert_eq!(result.virtual_entries.len(), 1);
978
979        let virtual_entry = &result.virtual_entries[0];
980        assert_eq!(virtual_entry.relative_path, PathBuf::from("config.toml"));
981        assert!(!virtual_entry.is_dir);
982
983        // Verify the file was written to the datastore
984        let content = env.fs.read_to_string(&virtual_entry.absolute_path).unwrap();
985        assert_eq!(content, "host = localhost");
986
987        // Verify source map
988        assert_eq!(
989            result.source_map[&virtual_entry.absolute_path],
990            env.dotfiles_root.join("app/config.toml.identity")
991        );
992    }
993
994    #[test]
995    fn mixed_pack_partitions_correctly() {
996        let env = TempEnvironment::builder()
997            .pack("app")
998            .file("config.toml.identity", "host = localhost")
999            .file("readme.txt", "hello")
1000            .done()
1001            .build();
1002
1003        let registry = make_registry();
1004        let datastore = make_datastore(&env);
1005        let pack = make_pack("app", env.dotfiles_root.join("app"));
1006
1007        let entries = vec![
1008            PackEntry {
1009                relative_path: "config.toml.identity".into(),
1010                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1011                is_dir: false,
1012            },
1013            PackEntry {
1014                relative_path: "readme.txt".into(),
1015                absolute_path: env.dotfiles_root.join("app/readme.txt"),
1016                is_dir: false,
1017            },
1018        ];
1019
1020        let result = preprocess_pack(
1021            entries,
1022            &registry,
1023            &pack,
1024            env.fs.as_ref(),
1025            &datastore,
1026            env.paths.as_ref(),
1027            crate::preprocessing::PreprocessMode::Active,
1028            false,
1029        )
1030        .unwrap();
1031
1032        assert_eq!(result.regular_entries.len(), 1);
1033        assert_eq!(
1034            result.regular_entries[0].relative_path,
1035            PathBuf::from("readme.txt")
1036        );
1037
1038        assert_eq!(result.virtual_entries.len(), 1);
1039        assert_eq!(
1040            result.virtual_entries[0].relative_path,
1041            PathBuf::from("config.toml")
1042        );
1043    }
1044
1045    #[test]
1046    fn collision_detection_rejects_conflict() {
1047        let env = TempEnvironment::builder()
1048            .pack("app")
1049            .file("config.toml.identity", "preprocessed")
1050            .file("config.toml", "regular")
1051            .done()
1052            .build();
1053
1054        let registry = make_registry();
1055        let datastore = make_datastore(&env);
1056        let pack = make_pack("app", env.dotfiles_root.join("app"));
1057
1058        let entries = vec![
1059            PackEntry {
1060                relative_path: "config.toml.identity".into(),
1061                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1062                is_dir: false,
1063            },
1064            PackEntry {
1065                relative_path: "config.toml".into(),
1066                absolute_path: env.dotfiles_root.join("app/config.toml"),
1067                is_dir: false,
1068            },
1069        ];
1070
1071        let err = preprocess_pack(
1072            entries,
1073            &registry,
1074            &pack,
1075            env.fs.as_ref(),
1076            &datastore,
1077            env.paths.as_ref(),
1078            crate::preprocessing::PreprocessMode::Active,
1079            false,
1080        )
1081        .unwrap_err();
1082        assert!(
1083            matches!(err, DodotError::PreprocessorCollision { .. }),
1084            "expected PreprocessorCollision, got: {err}"
1085        );
1086    }
1087
1088    #[test]
1089    fn merged_entries_combines_and_sorts() {
1090        let result = PreprocessResult {
1091            regular_entries: vec![PackEntry {
1092                relative_path: "zebra".into(),
1093                absolute_path: "/z".into(),
1094                is_dir: false,
1095            }],
1096            virtual_entries: vec![PackEntry {
1097                relative_path: "alpha".into(),
1098                absolute_path: "/a".into(),
1099                is_dir: false,
1100            }],
1101            source_map: HashMap::new(),
1102            rendered_bytes: HashMap::new(),
1103            skipped: Vec::new(),
1104        };
1105
1106        let merged = result.merged_entries();
1107        assert_eq!(merged.len(), 2);
1108        assert_eq!(merged[0].relative_path, PathBuf::from("alpha"));
1109        assert_eq!(merged[1].relative_path, PathBuf::from("zebra"));
1110    }
1111
1112    #[test]
1113    fn empty_registry_passes_all_through() {
1114        let env = TempEnvironment::builder()
1115            .pack("app")
1116            .file("config.toml.identity", "content")
1117            .done()
1118            .build();
1119
1120        let registry = PreprocessorRegistry::new(); // empty!
1121        let datastore = make_datastore(&env);
1122        let pack = make_pack("app", env.dotfiles_root.join("app"));
1123
1124        let entries = vec![PackEntry {
1125            relative_path: "config.toml.identity".into(),
1126            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1127            is_dir: false,
1128        }];
1129
1130        let result = preprocess_pack(
1131            entries,
1132            &registry,
1133            &pack,
1134            env.fs.as_ref(),
1135            &datastore,
1136            env.paths.as_ref(),
1137            crate::preprocessing::PreprocessMode::Active,
1138            false,
1139        )
1140        .unwrap();
1141
1142        // With no preprocessors registered, the file is treated as regular
1143        assert_eq!(result.regular_entries.len(), 1);
1144        assert!(result.virtual_entries.is_empty());
1145    }
1146
1147    #[test]
1148    fn directories_are_never_preprocessed() {
1149        let env = TempEnvironment::builder()
1150            .pack("app")
1151            .file("bin.identity/tool", "#!/bin/sh")
1152            .done()
1153            .build();
1154
1155        let registry = make_registry();
1156        let datastore = make_datastore(&env);
1157        let pack = make_pack("app", env.dotfiles_root.join("app"));
1158
1159        let entries = vec![PackEntry {
1160            relative_path: "bin.identity".into(),
1161            absolute_path: env.dotfiles_root.join("app/bin.identity"),
1162            is_dir: true, // directory — should NOT be preprocessed
1163        }];
1164
1165        let result = preprocess_pack(
1166            entries,
1167            &registry,
1168            &pack,
1169            env.fs.as_ref(),
1170            &datastore,
1171            env.paths.as_ref(),
1172            crate::preprocessing::PreprocessMode::Active,
1173            false,
1174        )
1175        .unwrap();
1176
1177        assert_eq!(result.regular_entries.len(), 1);
1178        assert!(result.virtual_entries.is_empty());
1179    }
1180
1181    #[test]
1182    fn subdirectory_preprocessor_file_preserves_parent() {
1183        let env = TempEnvironment::builder()
1184            .pack("app")
1185            .file("subdir/config.toml.identity", "nested content")
1186            .done()
1187            .build();
1188
1189        let registry = make_registry();
1190        let datastore = make_datastore(&env);
1191        let pack = make_pack("app", env.dotfiles_root.join("app"));
1192
1193        let entries = vec![PackEntry {
1194            relative_path: "subdir/config.toml.identity".into(),
1195            absolute_path: env.dotfiles_root.join("app/subdir/config.toml.identity"),
1196            is_dir: false,
1197        }];
1198
1199        let result = preprocess_pack(
1200            entries,
1201            &registry,
1202            &pack,
1203            env.fs.as_ref(),
1204            &datastore,
1205            env.paths.as_ref(),
1206            crate::preprocessing::PreprocessMode::Active,
1207            false,
1208        )
1209        .unwrap();
1210
1211        assert_eq!(result.virtual_entries.len(), 1);
1212        assert_eq!(
1213            result.virtual_entries[0].relative_path,
1214            PathBuf::from("subdir/config.toml")
1215        );
1216    }
1217
1218    #[test]
1219    fn multiple_preprocessor_files_in_one_pack() {
1220        let env = TempEnvironment::builder()
1221            .pack("app")
1222            .file("config.toml.identity", "config content")
1223            .file("settings.json.identity", "settings content")
1224            .done()
1225            .build();
1226
1227        let registry = make_registry();
1228        let datastore = make_datastore(&env);
1229        let pack = make_pack("app", env.dotfiles_root.join("app"));
1230
1231        let entries = vec![
1232            PackEntry {
1233                relative_path: "config.toml.identity".into(),
1234                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1235                is_dir: false,
1236            },
1237            PackEntry {
1238                relative_path: "settings.json.identity".into(),
1239                absolute_path: env.dotfiles_root.join("app/settings.json.identity"),
1240                is_dir: false,
1241            },
1242        ];
1243
1244        let result = preprocess_pack(
1245            entries,
1246            &registry,
1247            &pack,
1248            env.fs.as_ref(),
1249            &datastore,
1250            env.paths.as_ref(),
1251            crate::preprocessing::PreprocessMode::Active,
1252            false,
1253        )
1254        .unwrap();
1255
1256        assert!(result.regular_entries.is_empty());
1257        assert_eq!(result.virtual_entries.len(), 2);
1258
1259        let names: Vec<String> = result
1260            .virtual_entries
1261            .iter()
1262            .map(|e| e.relative_path.to_string_lossy().to_string())
1263            .collect();
1264        assert!(names.contains(&"config.toml".to_string()));
1265        assert!(names.contains(&"settings.json".to_string()));
1266
1267        // Each should have a source_map entry
1268        assert_eq!(result.source_map.len(), 2);
1269    }
1270
1271    #[test]
1272    fn pack_with_only_preprocessor_files() {
1273        let env = TempEnvironment::builder()
1274            .pack("app")
1275            .file("only.conf.identity", "the only file")
1276            .done()
1277            .build();
1278
1279        let registry = make_registry();
1280        let datastore = make_datastore(&env);
1281        let pack = make_pack("app", env.dotfiles_root.join("app"));
1282
1283        let entries = vec![PackEntry {
1284            relative_path: "only.conf.identity".into(),
1285            absolute_path: env.dotfiles_root.join("app/only.conf.identity"),
1286            is_dir: false,
1287        }];
1288
1289        let result = preprocess_pack(
1290            entries,
1291            &registry,
1292            &pack,
1293            env.fs.as_ref(),
1294            &datastore,
1295            env.paths.as_ref(),
1296            crate::preprocessing::PreprocessMode::Active,
1297            false,
1298        )
1299        .unwrap();
1300
1301        assert!(result.regular_entries.is_empty());
1302        assert_eq!(result.virtual_entries.len(), 1);
1303        assert_eq!(result.merged_entries().len(), 1);
1304    }
1305
1306    #[test]
1307    fn source_map_is_complete() {
1308        let env = TempEnvironment::builder()
1309            .pack("app")
1310            .file("a.conf.identity", "aaa")
1311            .file("b.conf.identity", "bbb")
1312            .file("regular.txt", "ccc")
1313            .done()
1314            .build();
1315
1316        let registry = make_registry();
1317        let datastore = make_datastore(&env);
1318        let pack = make_pack("app", env.dotfiles_root.join("app"));
1319
1320        let entries = vec![
1321            PackEntry {
1322                relative_path: "a.conf.identity".into(),
1323                absolute_path: env.dotfiles_root.join("app/a.conf.identity"),
1324                is_dir: false,
1325            },
1326            PackEntry {
1327                relative_path: "b.conf.identity".into(),
1328                absolute_path: env.dotfiles_root.join("app/b.conf.identity"),
1329                is_dir: false,
1330            },
1331            PackEntry {
1332                relative_path: "regular.txt".into(),
1333                absolute_path: env.dotfiles_root.join("app/regular.txt"),
1334                is_dir: false,
1335            },
1336        ];
1337
1338        let result = preprocess_pack(
1339            entries,
1340            &registry,
1341            &pack,
1342            env.fs.as_ref(),
1343            &datastore,
1344            env.paths.as_ref(),
1345            crate::preprocessing::PreprocessMode::Active,
1346            false,
1347        )
1348        .unwrap();
1349
1350        // Every virtual entry must have a source_map entry
1351        for ve in &result.virtual_entries {
1352            assert!(
1353                result.source_map.contains_key(&ve.absolute_path),
1354                "virtual entry {} has no source_map entry",
1355                ve.absolute_path.display()
1356            );
1357        }
1358        // No regular entries in the source_map
1359        for re in &result.regular_entries {
1360            assert!(
1361                !result.source_map.contains_key(&re.absolute_path),
1362                "regular entry {} should not be in source_map",
1363                re.absolute_path.display()
1364            );
1365        }
1366    }
1367
1368    #[test]
1369    fn preprocessing_is_idempotent() {
1370        let env = TempEnvironment::builder()
1371            .pack("app")
1372            .file("config.toml.identity", "content")
1373            .done()
1374            .build();
1375
1376        let registry = make_registry();
1377        let datastore = make_datastore(&env);
1378        let pack = make_pack("app", env.dotfiles_root.join("app"));
1379
1380        let make_entries = || {
1381            vec![PackEntry {
1382                relative_path: "config.toml.identity".into(),
1383                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1384                is_dir: false,
1385            }]
1386        };
1387
1388        let result1 = preprocess_pack(
1389            make_entries(),
1390            &registry,
1391            &pack,
1392            env.fs.as_ref(),
1393            &datastore,
1394            env.paths.as_ref(),
1395            crate::preprocessing::PreprocessMode::Active,
1396            false,
1397        )
1398        .unwrap();
1399        let result2 = preprocess_pack(
1400            make_entries(),
1401            &registry,
1402            &pack,
1403            env.fs.as_ref(),
1404            &datastore,
1405            env.paths.as_ref(),
1406            crate::preprocessing::PreprocessMode::Active,
1407            false,
1408        )
1409        .unwrap();
1410
1411        assert_eq!(result1.virtual_entries.len(), result2.virtual_entries.len());
1412        assert_eq!(
1413            result1.virtual_entries[0].relative_path,
1414            result2.virtual_entries[0].relative_path
1415        );
1416
1417        // Datastore file should be the same content
1418        let content1 = env
1419            .fs
1420            .read_to_string(&result1.virtual_entries[0].absolute_path)
1421            .unwrap();
1422        let content2 = env
1423            .fs
1424            .read_to_string(&result2.virtual_entries[0].absolute_path)
1425            .unwrap();
1426        assert_eq!(content1, content2);
1427    }
1428
1429    #[test]
1430    fn expansion_error_propagates() {
1431        let env = TempEnvironment::builder()
1432            .pack("app")
1433            .file("placeholder", "")
1434            .done()
1435            .build();
1436
1437        let registry = make_registry();
1438        let datastore = make_datastore(&env);
1439        let pack = make_pack("app", env.dotfiles_root.join("app"));
1440
1441        // Point to a file that doesn't exist — expansion should fail
1442        let entries = vec![PackEntry {
1443            relative_path: "missing.conf.identity".into(),
1444            absolute_path: env.dotfiles_root.join("app/missing.conf.identity"),
1445            is_dir: false,
1446        }];
1447
1448        let err = preprocess_pack(
1449            entries,
1450            &registry,
1451            &pack,
1452            env.fs.as_ref(),
1453            &datastore,
1454            env.paths.as_ref(),
1455            crate::preprocessing::PreprocessMode::Active,
1456            false,
1457        )
1458        .unwrap_err();
1459        assert!(
1460            matches!(err, DodotError::Fs { .. }),
1461            "expected Fs error for missing file, got: {err}"
1462        );
1463    }
1464
1465    #[test]
1466    fn inter_preprocessor_collision_detected() {
1467        // Two preprocessors produce the same logical name.
1468        // Set up: `config.toml.identity` and `config.toml.other` (custom
1469        // extension) both strip to `config.toml`. The pipeline must
1470        // detect this and refuse rather than silently overwriting.
1471        let env = TempEnvironment::builder()
1472            .pack("app")
1473            .file("config.toml.identity", "a")
1474            .file("config.toml.other", "b")
1475            .done()
1476            .build();
1477
1478        let mut registry = PreprocessorRegistry::new();
1479        registry.register(Box::new(IdentityPreprocessor::new()));
1480        registry.register(Box::new(IdentityPreprocessor::with_extension("other")));
1481
1482        let datastore = make_datastore(&env);
1483        let pack = make_pack("app", env.dotfiles_root.join("app"));
1484
1485        let entries = vec![
1486            PackEntry {
1487                relative_path: "config.toml.identity".into(),
1488                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
1489                is_dir: false,
1490            },
1491            PackEntry {
1492                relative_path: "config.toml.other".into(),
1493                absolute_path: env.dotfiles_root.join("app/config.toml.other"),
1494                is_dir: false,
1495            },
1496        ];
1497
1498        let err = preprocess_pack(
1499            entries,
1500            &registry,
1501            &pack,
1502            env.fs.as_ref(),
1503            &datastore,
1504            env.paths.as_ref(),
1505            crate::preprocessing::PreprocessMode::Active,
1506            false,
1507        )
1508        .unwrap_err();
1509        assert!(
1510            matches!(err, DodotError::PreprocessorCollision { .. }),
1511            "expected PreprocessorCollision for inter-preprocessor clash, got: {err}"
1512        );
1513    }
1514
1515    #[test]
1516    fn datastore_preserves_directory_structure() {
1517        // Preprocessor files in subdirectories should land in matching
1518        // subdirectories under the datastore, not be flattened with `__`.
1519        let env = TempEnvironment::builder()
1520            .pack("app")
1521            .file("sub/config.toml.identity", "nested")
1522            .done()
1523            .build();
1524
1525        let registry = make_registry();
1526        let datastore = make_datastore(&env);
1527        let pack = make_pack("app", env.dotfiles_root.join("app"));
1528
1529        let entries = vec![PackEntry {
1530            relative_path: "sub/config.toml.identity".into(),
1531            absolute_path: env.dotfiles_root.join("app/sub/config.toml.identity"),
1532            is_dir: false,
1533        }];
1534
1535        let result = preprocess_pack(
1536            entries,
1537            &registry,
1538            &pack,
1539            env.fs.as_ref(),
1540            &datastore,
1541            env.paths.as_ref(),
1542            crate::preprocessing::PreprocessMode::Active,
1543            false,
1544        )
1545        .unwrap();
1546
1547        assert_eq!(result.virtual_entries.len(), 1);
1548        let datastore_path = &result.virtual_entries[0].absolute_path;
1549
1550        // The datastore path should contain the subdirectory structure, not flattened
1551        let ds_str = datastore_path.to_string_lossy();
1552        assert!(
1553            ds_str.contains("sub/config.toml"),
1554            "datastore path should preserve directory structure, got: {ds_str}"
1555        );
1556        assert!(
1557            !ds_str.contains("__"),
1558            "datastore path should not contain flattening separator, got: {ds_str}"
1559        );
1560
1561        // File should actually exist at that path
1562        assert!(env.fs.exists(datastore_path));
1563        let content = env.fs.read_to_string(datastore_path).unwrap();
1564        assert_eq!(content, "nested");
1565    }
1566
1567    #[test]
1568    fn datastore_distinguishes_sibling_from_flattened_name() {
1569        // Regression test for the flatten-with-`__` edge case: a user could
1570        // have `a/b.txt` and `a__b.txt` both as preprocessor outputs, which
1571        // would have collided under the old flattening scheme. With
1572        // directory-preserving storage they live in distinct datastore paths.
1573        let env = TempEnvironment::builder()
1574            .pack("app")
1575            .file("a/b.txt.identity", "nested")
1576            .file("a__b.txt.identity", "flat")
1577            .done()
1578            .build();
1579
1580        let registry = make_registry();
1581        let datastore = make_datastore(&env);
1582        let pack = make_pack("app", env.dotfiles_root.join("app"));
1583
1584        let entries = vec![
1585            PackEntry {
1586                relative_path: "a/b.txt.identity".into(),
1587                absolute_path: env.dotfiles_root.join("app/a/b.txt.identity"),
1588                is_dir: false,
1589            },
1590            PackEntry {
1591                relative_path: "a__b.txt.identity".into(),
1592                absolute_path: env.dotfiles_root.join("app/a__b.txt.identity"),
1593                is_dir: false,
1594            },
1595        ];
1596
1597        let result = preprocess_pack(
1598            entries,
1599            &registry,
1600            &pack,
1601            env.fs.as_ref(),
1602            &datastore,
1603            env.paths.as_ref(),
1604            crate::preprocessing::PreprocessMode::Active,
1605            false,
1606        )
1607        .unwrap();
1608
1609        assert_eq!(result.virtual_entries.len(), 2);
1610
1611        // Both files must exist with distinct content
1612        let nested = result
1613            .virtual_entries
1614            .iter()
1615            .find(|e| e.relative_path == std::path::Path::new("a/b.txt"))
1616            .expect("nested entry");
1617        let flat = result
1618            .virtual_entries
1619            .iter()
1620            .find(|e| e.relative_path == std::path::Path::new("a__b.txt"))
1621            .expect("flat entry");
1622
1623        assert_ne!(nested.absolute_path, flat.absolute_path);
1624        assert_eq!(
1625            env.fs.read_to_string(&nested.absolute_path).unwrap(),
1626            "nested"
1627        );
1628        assert_eq!(env.fs.read_to_string(&flat.absolute_path).unwrap(), "flat");
1629    }
1630
1631    // ── Path-traversal defenses ─────────────────────────────────
1632
1633    /// Test-only preprocessor that emits a configurable set of
1634    /// [`crate::preprocessing::ExpandedFile`]s — lets tests inject
1635    /// unsafe paths or directory entries without needing a real archive.
1636    struct ScriptedPreprocessor {
1637        name: &'static str,
1638        extension: &'static str,
1639        outputs: Vec<crate::preprocessing::ExpandedFile>,
1640        /// Opt-in flag for tests that exercise the reverse-merge path
1641        /// (e.g. the conflict-marker safety gate). Off by default so
1642        /// existing tests of unsafe-path / directory / collision
1643        /// behaviour aren't accidentally affected by the source-content
1644        /// scan that the gate adds.
1645        supports_reverse_merge: bool,
1646    }
1647
1648    impl Default for ScriptedPreprocessor {
1649        fn default() -> Self {
1650            Self {
1651                name: "scripted",
1652                extension: ".scripted",
1653                outputs: Vec::new(),
1654                supports_reverse_merge: false,
1655            }
1656        }
1657    }
1658
1659    impl crate::preprocessing::Preprocessor for ScriptedPreprocessor {
1660        fn name(&self) -> &str {
1661            self.name
1662        }
1663        fn transform_type(&self) -> crate::preprocessing::TransformType {
1664            crate::preprocessing::TransformType::Opaque
1665        }
1666        fn matches_extension(&self, filename: &str) -> bool {
1667            filename.ends_with(self.extension)
1668        }
1669        fn stripped_name(&self, filename: &str) -> String {
1670            filename
1671                .strip_suffix(self.extension)
1672                .unwrap_or(filename)
1673                .to_string()
1674        }
1675        fn expand(
1676            &self,
1677            _source: &Path,
1678            _fs: &dyn Fs,
1679        ) -> Result<Vec<crate::preprocessing::ExpandedFile>> {
1680            Ok(self.outputs.clone())
1681        }
1682        fn supports_reverse_merge(&self) -> bool {
1683            self.supports_reverse_merge
1684        }
1685    }
1686
1687    #[test]
1688    fn rejects_absolute_path_from_preprocessor() {
1689        let env = TempEnvironment::builder()
1690            .pack("app")
1691            .file("bad.evil", "x")
1692            .done()
1693            .build();
1694
1695        let mut registry = PreprocessorRegistry::new();
1696        registry.register(Box::new(ScriptedPreprocessor {
1697            name: "evil",
1698            extension: ".evil",
1699            outputs: vec![crate::preprocessing::ExpandedFile {
1700                relative_path: PathBuf::from("/etc/passwd"),
1701                content: b"pwn".to_vec(),
1702                is_dir: false,
1703                ..Default::default()
1704            }],
1705            ..Default::default()
1706        }));
1707
1708        let datastore = make_datastore(&env);
1709        let pack = make_pack("app", env.dotfiles_root.join("app"));
1710
1711        let entries = vec![PackEntry {
1712            relative_path: "bad.evil".into(),
1713            absolute_path: env.dotfiles_root.join("app/bad.evil"),
1714            is_dir: false,
1715        }];
1716
1717        let err = preprocess_pack(
1718            entries,
1719            &registry,
1720            &pack,
1721            env.fs.as_ref(),
1722            &datastore,
1723            env.paths.as_ref(),
1724            crate::preprocessing::PreprocessMode::Active,
1725            false,
1726        )
1727        .unwrap_err();
1728        assert!(
1729            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe path")),
1730            "expected unsafe-path error, got: {err}"
1731        );
1732        // Verify the malicious target was not written
1733        assert!(!std::path::Path::new("/etc/passwd.dodot-would-have-written-here").exists());
1734    }
1735
1736    #[test]
1737    fn deploy_mode_some_chmods_rendered_file_to_specified_mode() {
1738        // Pin the §4.3 contract: a preprocessor that emits
1739        // `deploy_mode = Some(0o600)` (the age / gpg providers do)
1740        // sees the rendered datastore file land at exactly mode
1741        // 0600. The default-None case is covered by every other
1742        // existing pipeline test (templates / unarchive pass
1743        // through with umask defaults).
1744        use std::os::unix::fs::PermissionsExt;
1745
1746        let env = TempEnvironment::builder()
1747            .pack("app")
1748            .file("secret.opaque", "src")
1749            .done()
1750            .build();
1751
1752        let mut registry = PreprocessorRegistry::new();
1753        registry.register(Box::new(ScriptedPreprocessor {
1754            name: "opaque-with-mode",
1755            extension: ".opaque",
1756            outputs: vec![crate::preprocessing::ExpandedFile {
1757                relative_path: PathBuf::from("secret"),
1758                content: b"plaintext".to_vec(),
1759                is_dir: false,
1760                deploy_mode: Some(0o600),
1761                ..Default::default()
1762            }],
1763            ..Default::default()
1764        }));
1765
1766        let datastore = make_datastore(&env);
1767        let pack = make_pack("app", env.dotfiles_root.join("app"));
1768
1769        let entries = vec![PackEntry {
1770            relative_path: "secret.opaque".into(),
1771            absolute_path: env.dotfiles_root.join("app/secret.opaque"),
1772            is_dir: false,
1773        }];
1774
1775        preprocess_pack(
1776            entries,
1777            &registry,
1778            &pack,
1779            env.fs.as_ref(),
1780            &datastore,
1781            env.paths.as_ref(),
1782            crate::preprocessing::PreprocessMode::Active,
1783            false,
1784        )
1785        .unwrap();
1786
1787        // The rendered file lives at the standard preprocessed path.
1788        let rendered = env
1789            .paths
1790            .data_dir()
1791            .join("packs/app")
1792            .join(PREPROCESSED_HANDLER)
1793            .join("secret");
1794        assert!(rendered.exists(), "rendered file should exist");
1795        let mode = std::fs::metadata(&rendered).unwrap().permissions().mode() & 0o777;
1796        assert_eq!(
1797            mode, 0o600,
1798            "deploy_mode = Some(0o600) must produce a 0600 file, got {mode:o}"
1799        );
1800    }
1801
1802    #[test]
1803    fn rejects_parent_dir_escape_from_preprocessor() {
1804        let env = TempEnvironment::builder()
1805            .pack("app")
1806            .file("bad.evil", "x")
1807            .done()
1808            .build();
1809
1810        let mut registry = PreprocessorRegistry::new();
1811        registry.register(Box::new(ScriptedPreprocessor {
1812            name: "evil",
1813            extension: ".evil",
1814            outputs: vec![crate::preprocessing::ExpandedFile {
1815                relative_path: PathBuf::from("../../escape.txt"),
1816                content: b"pwn".to_vec(),
1817                is_dir: false,
1818                ..Default::default()
1819            }],
1820            ..Default::default()
1821        }));
1822
1823        let datastore = make_datastore(&env);
1824        let pack = make_pack("app", env.dotfiles_root.join("app"));
1825
1826        let entries = vec![PackEntry {
1827            relative_path: "bad.evil".into(),
1828            absolute_path: env.dotfiles_root.join("app/bad.evil"),
1829            is_dir: false,
1830        }];
1831
1832        let err = preprocess_pack(
1833            entries,
1834            &registry,
1835            &pack,
1836            env.fs.as_ref(),
1837            &datastore,
1838            env.paths.as_ref(),
1839            crate::preprocessing::PreprocessMode::Active,
1840            false,
1841        )
1842        .unwrap_err();
1843        assert!(
1844            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe path")),
1845            "expected unsafe-path error, got: {err}"
1846        );
1847    }
1848
1849    #[test]
1850    fn directory_entry_is_mkdird_not_written_as_file() {
1851        // A preprocessor emits a directory marker followed by a file
1852        // inside it. The pipeline must mkdir the directory rather than
1853        // writing a file at the directory path (which would break the
1854        // subsequent nested file write).
1855        let env = TempEnvironment::builder()
1856            .pack("app")
1857            .file("bundle.zz", "x")
1858            .done()
1859            .build();
1860
1861        let mut registry = PreprocessorRegistry::new();
1862        registry.register(Box::new(ScriptedPreprocessor {
1863            name: "scripted",
1864            extension: ".zz",
1865            outputs: vec![
1866                crate::preprocessing::ExpandedFile {
1867                    relative_path: PathBuf::from("sub"),
1868                    content: Vec::new(),
1869                    is_dir: true,
1870                    ..Default::default()
1871                },
1872                crate::preprocessing::ExpandedFile {
1873                    relative_path: PathBuf::from("sub/nested.txt"),
1874                    content: b"hello".to_vec(),
1875                    is_dir: false,
1876                    ..Default::default()
1877                },
1878            ],
1879            ..Default::default()
1880        }));
1881
1882        let datastore = make_datastore(&env);
1883        let pack = make_pack("app", env.dotfiles_root.join("app"));
1884
1885        let entries = vec![PackEntry {
1886            relative_path: "bundle.zz".into(),
1887            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1888            is_dir: false,
1889        }];
1890
1891        let result = preprocess_pack(
1892            entries,
1893            &registry,
1894            &pack,
1895            env.fs.as_ref(),
1896            &datastore,
1897            env.paths.as_ref(),
1898            crate::preprocessing::PreprocessMode::Active,
1899            false,
1900        )
1901        .unwrap();
1902
1903        assert_eq!(result.virtual_entries.len(), 2);
1904
1905        let dir_entry = result
1906            .virtual_entries
1907            .iter()
1908            .find(|e| e.is_dir)
1909            .expect("directory entry");
1910        assert!(
1911            env.fs.is_dir(&dir_entry.absolute_path),
1912            "directory entry should be a real directory: {}",
1913            dir_entry.absolute_path.display()
1914        );
1915
1916        let file_entry = result
1917            .virtual_entries
1918            .iter()
1919            .find(|e| !e.is_dir)
1920            .expect("file entry");
1921        assert_eq!(
1922            env.fs.read_to_string(&file_entry.absolute_path).unwrap(),
1923            "hello"
1924        );
1925    }
1926
1927    #[test]
1928    fn rejects_empty_path_from_preprocessor() {
1929        // A preprocessor that produces an empty relative_path (e.g. a
1930        // template file named literally `.tmpl` whose stripped name is
1931        // empty) must be rejected with a clean PreprocessorError, not
1932        // cascaded to the datastore's opaque "empty datastore path"
1933        // message.
1934        let env = TempEnvironment::builder()
1935            .pack("app")
1936            .file("bad.zz", "x")
1937            .done()
1938            .build();
1939
1940        let mut registry = PreprocessorRegistry::new();
1941        registry.register(Box::new(ScriptedPreprocessor {
1942            name: "scripted",
1943            extension: ".zz",
1944            outputs: vec![crate::preprocessing::ExpandedFile {
1945                relative_path: PathBuf::from(""),
1946                content: b"nope".to_vec(),
1947                is_dir: false,
1948                ..Default::default()
1949            }],
1950            ..Default::default()
1951        }));
1952
1953        let datastore = make_datastore(&env);
1954        let pack = make_pack("app", env.dotfiles_root.join("app"));
1955
1956        let entries = vec![PackEntry {
1957            relative_path: "bad.zz".into(),
1958            absolute_path: env.dotfiles_root.join("app/bad.zz"),
1959            is_dir: false,
1960        }];
1961
1962        let err = preprocess_pack(
1963            entries,
1964            &registry,
1965            &pack,
1966            env.fs.as_ref(),
1967            &datastore,
1968            env.paths.as_ref(),
1969            crate::preprocessing::PreprocessMode::Active,
1970            false,
1971        )
1972        .unwrap_err();
1973        assert!(
1974            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("empty output path")),
1975            "expected empty-path error, got: {err}"
1976        );
1977    }
1978
1979    #[test]
1980    fn rejects_curdir_only_path_from_preprocessor() {
1981        // `./` or `.` alone normalises to empty — same rejection.
1982        let env = TempEnvironment::builder()
1983            .pack("app")
1984            .file("bad.zz", "x")
1985            .done()
1986            .build();
1987
1988        let mut registry = PreprocessorRegistry::new();
1989        registry.register(Box::new(ScriptedPreprocessor {
1990            name: "scripted",
1991            extension: ".zz",
1992            outputs: vec![crate::preprocessing::ExpandedFile {
1993                relative_path: PathBuf::from("."),
1994                content: b"nope".to_vec(),
1995                is_dir: false,
1996                ..Default::default()
1997            }],
1998            ..Default::default()
1999        }));
2000
2001        let datastore = make_datastore(&env);
2002        let pack = make_pack("app", env.dotfiles_root.join("app"));
2003
2004        let entries = vec![PackEntry {
2005            relative_path: "bad.zz".into(),
2006            absolute_path: env.dotfiles_root.join("app/bad.zz"),
2007            is_dir: false,
2008        }];
2009
2010        let err = preprocess_pack(
2011            entries,
2012            &registry,
2013            &pack,
2014            env.fs.as_ref(),
2015            &datastore,
2016            env.paths.as_ref(),
2017            crate::preprocessing::PreprocessMode::Active,
2018            false,
2019        )
2020        .unwrap_err();
2021        assert!(
2022            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("empty output path")),
2023            "expected empty-path error, got: {err}"
2024        );
2025    }
2026
2027    #[test]
2028    fn curdir_prefixed_paths_collide_with_plain_paths() {
2029        // Two preprocessor outputs — one `./foo` and one `foo` — must
2030        // be treated as a collision. Before normalisation these lived
2031        // at distinct HashSet keys but the same datastore path, so the
2032        // second write silently clobbered the first.
2033        let env = TempEnvironment::builder()
2034            .pack("app")
2035            .file("bundle.zz", "x")
2036            .done()
2037            .build();
2038
2039        let mut registry = PreprocessorRegistry::new();
2040        registry.register(Box::new(ScriptedPreprocessor {
2041            name: "scripted",
2042            extension: ".zz",
2043            outputs: vec![
2044                crate::preprocessing::ExpandedFile {
2045                    relative_path: PathBuf::from("foo"),
2046                    content: b"first".to_vec(),
2047                    is_dir: false,
2048                    ..Default::default()
2049                },
2050                crate::preprocessing::ExpandedFile {
2051                    relative_path: PathBuf::from("./foo"),
2052                    content: b"second".to_vec(),
2053                    is_dir: false,
2054                    ..Default::default()
2055                },
2056            ],
2057            ..Default::default()
2058        }));
2059
2060        let datastore = make_datastore(&env);
2061        let pack = make_pack("app", env.dotfiles_root.join("app"));
2062
2063        let entries = vec![PackEntry {
2064            relative_path: "bundle.zz".into(),
2065            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
2066            is_dir: false,
2067        }];
2068
2069        let err = preprocess_pack(
2070            entries,
2071            &registry,
2072            &pack,
2073            env.fs.as_ref(),
2074            &datastore,
2075            env.paths.as_ref(),
2076            crate::preprocessing::PreprocessMode::Active,
2077            false,
2078        )
2079        .unwrap_err();
2080        assert!(
2081            matches!(err, DodotError::PreprocessorCollision { .. }),
2082            "expected PreprocessorCollision for ./foo vs foo, got: {err}"
2083        );
2084    }
2085
2086    #[test]
2087    fn virtual_entry_relative_path_is_normalized() {
2088        // When a preprocessor emits `./foo`, the resulting virtual entry
2089        // must carry a normalised relative path. Otherwise downstream
2090        // code (e.g. rule matching or status display) sees both shapes
2091        // and treats them as different files.
2092        let env = TempEnvironment::builder()
2093            .pack("app")
2094            .file("bundle.zz", "x")
2095            .done()
2096            .build();
2097
2098        let mut registry = PreprocessorRegistry::new();
2099        registry.register(Box::new(ScriptedPreprocessor {
2100            name: "scripted",
2101            extension: ".zz",
2102            outputs: vec![crate::preprocessing::ExpandedFile {
2103                relative_path: PathBuf::from("./nested/file.txt"),
2104                content: b"hi".to_vec(),
2105                is_dir: false,
2106                ..Default::default()
2107            }],
2108            ..Default::default()
2109        }));
2110
2111        let datastore = make_datastore(&env);
2112        let pack = make_pack("app", env.dotfiles_root.join("app"));
2113
2114        let entries = vec![PackEntry {
2115            relative_path: "bundle.zz".into(),
2116            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
2117            is_dir: false,
2118        }];
2119
2120        let result = preprocess_pack(
2121            entries,
2122            &registry,
2123            &pack,
2124            env.fs.as_ref(),
2125            &datastore,
2126            env.paths.as_ref(),
2127            crate::preprocessing::PreprocessMode::Active,
2128            false,
2129        )
2130        .unwrap();
2131
2132        assert_eq!(result.virtual_entries.len(), 1);
2133        assert_eq!(
2134            result.virtual_entries[0].relative_path,
2135            PathBuf::from("nested/file.txt"),
2136            "CurDir components must be stripped from virtual entry"
2137        );
2138    }
2139
2140    // ── Baseline cache integration ──────────────────────────────
2141
2142    #[test]
2143    fn baseline_is_written_when_paths_provided_and_tracked_render_present() {
2144        // End-to-end: a scripted preprocessor that produces a tracked
2145        // render should result in a baseline JSON on disk under
2146        // `<cache>/preprocessor/<pack>/preprocessed/<file>.json`. The
2147        // baseline must round-trip through Baseline::load with all the
2148        // documented fields populated.
2149        let env = TempEnvironment::builder()
2150            .pack("app")
2151            .file("config.toml.tracked", "name = original")
2152            .done()
2153            .build();
2154
2155        let mut registry = PreprocessorRegistry::new();
2156        registry.register(Box::new(ScriptedPreprocessor {
2157            name: "tracked-scripted",
2158            extension: ".tracked",
2159            outputs: vec![crate::preprocessing::ExpandedFile {
2160                relative_path: PathBuf::from("config.toml"),
2161                content: b"name = rendered".to_vec(),
2162                is_dir: false,
2163                tracked_render: Some("name = \u{1e}rendered\u{1f}".into()),
2164                context_hash: Some([0xab; 32]),
2165                secret_line_ranges: Vec::new(),
2166                deploy_mode: None,
2167            }],
2168            ..Default::default()
2169        }));
2170
2171        let datastore = make_datastore(&env);
2172        let pack = make_pack("app", env.dotfiles_root.join("app"));
2173
2174        let entries = vec![PackEntry {
2175            relative_path: "config.toml.tracked".into(),
2176            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2177            is_dir: false,
2178        }];
2179
2180        preprocess_pack(
2181            entries,
2182            &registry,
2183            &pack,
2184            env.fs.as_ref(),
2185            &datastore,
2186            env.paths.as_ref(),
2187            PreprocessMode::Active,
2188            false,
2189        )
2190        .unwrap();
2191
2192        let baseline = crate::preprocessing::baseline::Baseline::load(
2193            env.fs.as_ref(),
2194            env.paths.as_ref(),
2195            "app",
2196            "preprocessed",
2197            "config.toml",
2198        )
2199        .unwrap()
2200        .expect("baseline must be written for a tracked-render expansion");
2201
2202        assert_eq!(baseline.rendered_content, "name = rendered");
2203        assert_eq!(baseline.tracked_render, "name = \u{1e}rendered\u{1f}");
2204        // Source hash is the SHA of the source file's bytes.
2205        assert_eq!(baseline.source_hash.len(), 64);
2206        // Context hash matches the one the preprocessor emitted.
2207        assert!(
2208            baseline.context_hash.chars().all(|c| c == 'a' || c == 'b'),
2209            "context hash should be 0xab repeated, got: {}",
2210            baseline.context_hash
2211        );
2212        assert_eq!(baseline.context_hash.len(), 64);
2213    }
2214
2215    #[test]
2216    fn baseline_is_skipped_in_passive_mode() {
2217        // Passive callers (`dodot status`, `dodot up --dry-run`) MUST
2218        // NOT touch the baseline cache. No baseline should be written
2219        // in that case — overwriting it would erase the
2220        // divergence-detection ground truth captured at the last
2221        // `dodot up`. Per `secrets.lex` §7.4 / issue #121.
2222        let env = TempEnvironment::builder()
2223            .pack("app")
2224            .file("config.toml.tracked", "src")
2225            .done()
2226            .build();
2227
2228        let mut registry = PreprocessorRegistry::new();
2229        registry.register(Box::new(ScriptedPreprocessor {
2230            name: "tracked-scripted",
2231            extension: ".tracked",
2232            outputs: vec![crate::preprocessing::ExpandedFile {
2233                relative_path: PathBuf::from("config.toml"),
2234                content: b"x".to_vec(),
2235                is_dir: false,
2236                tracked_render: Some("x".into()),
2237                context_hash: Some([0; 32]),
2238                secret_line_ranges: Vec::new(),
2239                deploy_mode: None,
2240            }],
2241            ..Default::default()
2242        }));
2243
2244        let datastore = make_datastore(&env);
2245        let pack = make_pack("app", env.dotfiles_root.join("app"));
2246        let entries = vec![PackEntry {
2247            relative_path: "config.toml.tracked".into(),
2248            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2249            is_dir: false,
2250        }];
2251
2252        preprocess_pack(
2253            entries,
2254            &registry,
2255            &pack,
2256            env.fs.as_ref(),
2257            &datastore,
2258            env.paths.as_ref(),
2259            crate::preprocessing::PreprocessMode::Passive,
2260            false,
2261        )
2262        .unwrap();
2263
2264        let path = env
2265            .paths
2266            .preprocessor_baseline_path("app", "preprocessed", "config.toml");
2267        assert!(
2268            !env.fs.exists(&path),
2269            "no baseline should exist after a Passive run, but found: {}",
2270            path.display()
2271        );
2272    }
2273
2274    #[test]
2275    fn baseline_is_skipped_for_preprocessors_without_tracked_render() {
2276        // The identity preprocessor (and unarchive) don't produce a
2277        // tracked render. They still go through the pipeline, but no
2278        // baseline is written — the cache is only meaningful when paired
2279        // with burgertocow's marker stream.
2280        let env = TempEnvironment::builder()
2281            .pack("app")
2282            .file("config.toml.identity", "data")
2283            .done()
2284            .build();
2285
2286        let registry = make_registry(); // identity-only
2287        let datastore = make_datastore(&env);
2288        let pack = make_pack("app", env.dotfiles_root.join("app"));
2289        let entries = vec![PackEntry {
2290            relative_path: "config.toml.identity".into(),
2291            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
2292            is_dir: false,
2293        }];
2294
2295        preprocess_pack(
2296            entries,
2297            &registry,
2298            &pack,
2299            env.fs.as_ref(),
2300            &datastore,
2301            env.paths.as_ref(),
2302            PreprocessMode::Active,
2303            false,
2304        )
2305        .unwrap();
2306
2307        let path = env
2308            .paths
2309            .preprocessor_baseline_path("app", "preprocessed", "config.toml");
2310        assert!(
2311            !env.fs.exists(&path),
2312            "identity preprocessor (no tracked render) should not write a baseline"
2313        );
2314    }
2315
2316    #[test]
2317    fn baseline_overwrites_on_repeated_up() {
2318        // Re-running `up` with a changed source file must replace the
2319        // baseline, not leave the stale one in place — otherwise drift
2320        // detection would compare against an out-of-date baseline.
2321        let env = TempEnvironment::builder()
2322            .pack("app")
2323            .file("config.toml.tracked", "first")
2324            .done()
2325            .build();
2326
2327        let outputs_first = vec![crate::preprocessing::ExpandedFile {
2328            relative_path: PathBuf::from("config.toml"),
2329            content: b"FIRST".to_vec(),
2330            is_dir: false,
2331            tracked_render: Some("FIRST".into()),
2332            context_hash: Some([1; 32]),
2333            secret_line_ranges: Vec::new(),
2334            deploy_mode: None,
2335        }];
2336        let outputs_second = vec![crate::preprocessing::ExpandedFile {
2337            relative_path: PathBuf::from("config.toml"),
2338            content: b"SECOND".to_vec(),
2339            is_dir: false,
2340            tracked_render: Some("SECOND".into()),
2341            context_hash: Some([2; 32]),
2342            secret_line_ranges: Vec::new(),
2343            deploy_mode: None,
2344        }];
2345
2346        let datastore = make_datastore(&env);
2347        let pack = make_pack("app", env.dotfiles_root.join("app"));
2348        let make_entries = || {
2349            vec![PackEntry {
2350                relative_path: "config.toml.tracked".into(),
2351                absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2352                is_dir: false,
2353            }]
2354        };
2355
2356        // First run.
2357        let mut registry1 = PreprocessorRegistry::new();
2358        registry1.register(Box::new(ScriptedPreprocessor {
2359            name: "ts",
2360            extension: ".tracked",
2361            outputs: outputs_first,
2362            ..Default::default()
2363        }));
2364        preprocess_pack(
2365            make_entries(),
2366            &registry1,
2367            &pack,
2368            env.fs.as_ref(),
2369            &datastore,
2370            env.paths.as_ref(),
2371            PreprocessMode::Active,
2372            false,
2373        )
2374        .unwrap();
2375
2376        // Second run with changed outputs.
2377        let mut registry2 = PreprocessorRegistry::new();
2378        registry2.register(Box::new(ScriptedPreprocessor {
2379            name: "ts",
2380            extension: ".tracked",
2381            outputs: outputs_second,
2382            ..Default::default()
2383        }));
2384        preprocess_pack(
2385            make_entries(),
2386            &registry2,
2387            &pack,
2388            env.fs.as_ref(),
2389            &datastore,
2390            env.paths.as_ref(),
2391            PreprocessMode::Active,
2392            false,
2393        )
2394        .unwrap();
2395
2396        let baseline = crate::preprocessing::baseline::Baseline::load(
2397            env.fs.as_ref(),
2398            env.paths.as_ref(),
2399            "app",
2400            "preprocessed",
2401            "config.toml",
2402        )
2403        .unwrap()
2404        .unwrap();
2405        assert_eq!(baseline.rendered_content, "SECOND");
2406    }
2407
2408    #[test]
2409    fn end_to_end_baseline_for_real_template_preprocessor() {
2410        // Exercise the cache write through the actual TemplatePreprocessor
2411        // (rather than ScriptedPreprocessor). This pins the integration
2412        // contract: a `.tmpl` file in a pack produces a baseline that
2413        // contains the rendered content, the tracked render with markers,
2414        // and a non-empty context hash.
2415        use std::collections::HashMap;
2416        let env = TempEnvironment::builder()
2417            .pack("app")
2418            .file("greet.tmpl", "hello {{ name }}")
2419            .done()
2420            .build();
2421
2422        let mut vars = HashMap::new();
2423        vars.insert("name".into(), "Alice".into());
2424        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
2425            vec!["tmpl".into()],
2426            vars,
2427            env.paths.as_ref(),
2428        )
2429        .unwrap();
2430        let mut registry = PreprocessorRegistry::new();
2431        registry.register(Box::new(template_pp));
2432
2433        let datastore = make_datastore(&env);
2434        let pack = make_pack("app", env.dotfiles_root.join("app"));
2435        let entries = vec![PackEntry {
2436            relative_path: "greet.tmpl".into(),
2437            absolute_path: env.dotfiles_root.join("app/greet.tmpl"),
2438            is_dir: false,
2439        }];
2440
2441        preprocess_pack(
2442            entries,
2443            &registry,
2444            &pack,
2445            env.fs.as_ref(),
2446            &datastore,
2447            env.paths.as_ref(),
2448            PreprocessMode::Active,
2449            false,
2450        )
2451        .unwrap();
2452
2453        let baseline = crate::preprocessing::baseline::Baseline::load(
2454            env.fs.as_ref(),
2455            env.paths.as_ref(),
2456            "app",
2457            "preprocessed",
2458            "greet",
2459        )
2460        .unwrap()
2461        .expect("template baseline must be written");
2462
2463        assert_eq!(baseline.rendered_content, "hello Alice");
2464        // The tracked render must contain marker bytes around "Alice".
2465        assert!(
2466            baseline.tracked_render.contains(burgertocow::VAR_START),
2467            "tracked render must contain marker bytes, got: {:?}",
2468            baseline.tracked_render
2469        );
2470        // Context hash is the template preprocessor's deterministic
2471        // hex; non-empty.
2472        assert_eq!(baseline.context_hash.len(), 64);
2473        // Rendered hash is SHA-256 hex.
2474        assert_eq!(baseline.rendered_hash.len(), 64);
2475    }
2476
2477    // ── Conflict-marker safety gate ─────────────────────────────
2478
2479    #[test]
2480    fn conflict_marker_in_template_source_blocks_expansion() {
2481        // The most important test for R2: a template source containing
2482        // a dodot-conflict marker must be refused at the pipeline level
2483        // — otherwise the markers would render verbatim through
2484        // MiniJinja and deploy into the user's config as garbage.
2485        use std::collections::HashMap;
2486        let template_with_conflict = format!(
2487            "name = Alice\n{}\nhost = \"{{{{ env.DB_HOST }}}}\"\n{}\nhost = \"prod\"\n{}\nport = 5432\n",
2488            crate::preprocessing::conflict::MARKER_START,
2489            crate::preprocessing::conflict::MARKER_MID,
2490            crate::preprocessing::conflict::MARKER_END,
2491        );
2492        let env = TempEnvironment::builder()
2493            .pack("app")
2494            .file("config.toml.tmpl", &template_with_conflict)
2495            .done()
2496            .build();
2497
2498        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
2499            vec!["tmpl".into()],
2500            HashMap::new(),
2501            env.paths.as_ref(),
2502        )
2503        .unwrap();
2504        let mut registry = PreprocessorRegistry::new();
2505        registry.register(Box::new(template_pp));
2506
2507        let datastore = make_datastore(&env);
2508        let pack = make_pack("app", env.dotfiles_root.join("app"));
2509        let entries = vec![PackEntry {
2510            relative_path: "config.toml.tmpl".into(),
2511            absolute_path: env.dotfiles_root.join("app/config.toml.tmpl"),
2512            is_dir: false,
2513        }];
2514
2515        let err = preprocess_pack(
2516            entries,
2517            &registry,
2518            &pack,
2519            env.fs.as_ref(),
2520            &datastore,
2521            env.paths.as_ref(),
2522            PreprocessMode::Active,
2523            false,
2524        )
2525        .unwrap_err();
2526
2527        match err {
2528            DodotError::UnresolvedConflictMarker {
2529                source_file,
2530                line_numbers,
2531            } => {
2532                assert!(source_file.ends_with("config.toml.tmpl"));
2533                assert_eq!(line_numbers.len(), 3, "got: {line_numbers:?}");
2534            }
2535            other => panic!("expected UnresolvedConflictMarker, got: {other}"),
2536        }
2537
2538        // Critically: the datastore must NOT carry a partially-rendered
2539        // file from before the gate caught the markers. The pipeline
2540        // refuses on the first scan, before any disk write.
2541        let datastore_path = env
2542            .paths
2543            .data_dir()
2544            .join("packs")
2545            .join("app")
2546            .join("preprocessed")
2547            .join("config.toml");
2548        assert!(
2549            !env.fs.exists(&datastore_path),
2550            "no rendered output should land in the datastore when the gate fires"
2551        );
2552
2553        // Same for the baseline cache.
2554        let baseline_path =
2555            env.paths
2556                .preprocessor_baseline_path("app", "preprocessed", "config.toml");
2557        assert!(
2558            !env.fs.exists(&baseline_path),
2559            "no baseline should be written when the gate fires"
2560        );
2561    }
2562
2563    #[test]
2564    fn conflict_marker_gate_skipped_for_preprocessors_without_reverse_merge() {
2565        // The unarchive / identity preprocessors don't participate in
2566        // reverse-merge, so the gate doesn't read their source files
2567        // (which may not be UTF-8 anyway). Confirm that a marker token
2568        // accidentally present in such a source does NOT block the
2569        // pipeline. We use a ScriptedPreprocessor with
2570        // supports_reverse_merge=false to drive this.
2571        let env = TempEnvironment::builder()
2572            .pack("app")
2573            .file(
2574                "data.scripted",
2575                &format!(
2576                    "header\n{}\nbody\n",
2577                    crate::preprocessing::conflict::MARKER_START
2578                ),
2579            )
2580            .done()
2581            .build();
2582
2583        let mut registry = PreprocessorRegistry::new();
2584        registry.register(Box::new(ScriptedPreprocessor {
2585            name: "bytes-only",
2586            extension: ".scripted",
2587            outputs: vec![crate::preprocessing::ExpandedFile {
2588                relative_path: PathBuf::from("data"),
2589                content: b"emitted".to_vec(),
2590                is_dir: false,
2591                ..Default::default()
2592            }],
2593            supports_reverse_merge: false,
2594        }));
2595
2596        let datastore = make_datastore(&env);
2597        let pack = make_pack("app", env.dotfiles_root.join("app"));
2598        let entries = vec![PackEntry {
2599            relative_path: "data.scripted".into(),
2600            absolute_path: env.dotfiles_root.join("app/data.scripted"),
2601            is_dir: false,
2602        }];
2603
2604        let result = preprocess_pack(
2605            entries,
2606            &registry,
2607            &pack,
2608            env.fs.as_ref(),
2609            &datastore,
2610            env.paths.as_ref(),
2611            crate::preprocessing::PreprocessMode::Active,
2612            false,
2613        )
2614        .expect("non-tracking preprocessor must not be gated by markers in its source");
2615        assert_eq!(result.virtual_entries.len(), 1);
2616    }
2617
2618    #[test]
2619    fn conflict_marker_gate_runs_on_tracking_scripted_preprocessor() {
2620        // Symmetric to the test above: a ScriptedPreprocessor with
2621        // supports_reverse_merge=true must trip the gate when its
2622        // source carries marker lines, even though it's not the real
2623        // template preprocessor. This pins the gate's dispatch to the
2624        // trait flag, not a hard-coded preprocessor name check.
2625        let env = TempEnvironment::builder()
2626            .pack("app")
2627            .file(
2628                "config.toml.tracked",
2629                &format!(
2630                    "ok\n{}\nbody\n{}\n",
2631                    crate::preprocessing::conflict::MARKER_START,
2632                    crate::preprocessing::conflict::MARKER_END
2633                ),
2634            )
2635            .done()
2636            .build();
2637
2638        let mut registry = PreprocessorRegistry::new();
2639        registry.register(Box::new(ScriptedPreprocessor {
2640            name: "tracking-bytes",
2641            extension: ".tracked",
2642            outputs: vec![crate::preprocessing::ExpandedFile {
2643                relative_path: PathBuf::from("config.toml"),
2644                content: b"x".to_vec(),
2645                is_dir: false,
2646                tracked_render: Some("x".into()),
2647                context_hash: Some([0; 32]),
2648                secret_line_ranges: Vec::new(),
2649                deploy_mode: None,
2650            }],
2651            supports_reverse_merge: true,
2652        }));
2653
2654        let datastore = make_datastore(&env);
2655        let pack = make_pack("app", env.dotfiles_root.join("app"));
2656        let entries = vec![PackEntry {
2657            relative_path: "config.toml.tracked".into(),
2658            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2659            is_dir: false,
2660        }];
2661
2662        let err = preprocess_pack(
2663            entries,
2664            &registry,
2665            &pack,
2666            env.fs.as_ref(),
2667            &datastore,
2668            env.paths.as_ref(),
2669            crate::preprocessing::PreprocessMode::Active,
2670            false,
2671        )
2672        .unwrap_err();
2673        assert!(
2674            matches!(err, DodotError::UnresolvedConflictMarker { .. }),
2675            "expected UnresolvedConflictMarker, got: {err}"
2676        );
2677    }
2678
2679    #[test]
2680    fn gate_handles_non_utf8_source_via_lossy_decode() {
2681        // Defence-in-depth: a reverse-merge-capable preprocessor with a
2682        // non-UTF-8 source must not crash the gate with a generic
2683        // UTF-8 decode error. The pipeline reads bytes and decodes
2684        // lossily before scanning for markers — the marker token is
2685        // ASCII so detection works, and a binary-ish source without
2686        // markers passes cleanly.
2687        let env = TempEnvironment::builder()
2688            .pack("app")
2689            .file("config.toml.tracked", "placeholder")
2690            .done()
2691            .build();
2692
2693        // Overwrite with non-UTF-8 bytes: a few invalid sequences plus
2694        // valid ASCII surrounding them. No markers in the bytes.
2695        let bytes: Vec<u8> = vec![
2696            b'h', b'e', b'l', b'l', b'o', b'\n', 0xff, 0xfe, b'\n', b'w', b'o', b'r', b'l', b'd',
2697            b'\n',
2698        ];
2699        env.fs
2700            .write_file(&env.dotfiles_root.join("app/config.toml.tracked"), &bytes)
2701            .unwrap();
2702
2703        let mut registry = PreprocessorRegistry::new();
2704        registry.register(Box::new(ScriptedPreprocessor {
2705            name: "tracking-bytes",
2706            extension: ".tracked",
2707            outputs: vec![crate::preprocessing::ExpandedFile {
2708                relative_path: PathBuf::from("config.toml"),
2709                content: b"x".to_vec(),
2710                is_dir: false,
2711                tracked_render: Some("x".into()),
2712                context_hash: Some([0; 32]),
2713                secret_line_ranges: Vec::new(),
2714                deploy_mode: None,
2715            }],
2716            supports_reverse_merge: true,
2717        }));
2718
2719        let datastore = make_datastore(&env);
2720        let pack = make_pack("app", env.dotfiles_root.join("app"));
2721        let entries = vec![PackEntry {
2722            relative_path: "config.toml.tracked".into(),
2723            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2724            is_dir: false,
2725        }];
2726
2727        // Should NOT error: the gate's lossy decode handles non-UTF-8
2728        // gracefully, and there are no marker lines in the bytes.
2729        let result = preprocess_pack(
2730            entries,
2731            &registry,
2732            &pack,
2733            env.fs.as_ref(),
2734            &datastore,
2735            env.paths.as_ref(),
2736            crate::preprocessing::PreprocessMode::Active,
2737            false,
2738        )
2739        .expect("non-UTF-8 source without markers must not crash the gate");
2740        assert_eq!(result.virtual_entries.len(), 1);
2741    }
2742
2743    #[test]
2744    fn gate_detects_markers_in_non_utf8_source() {
2745        // Round-trip the lossy path: a source that's mostly invalid
2746        // UTF-8 but has a real marker line in valid ASCII still trips
2747        // the gate. This is the safety-critical scenario — we must
2748        // not silently pass a marker-bearing source just because
2749        // surrounding bytes happen to be invalid UTF-8.
2750        let env = TempEnvironment::builder()
2751            .pack("app")
2752            .file("config.toml.tracked", "placeholder")
2753            .done()
2754            .build();
2755
2756        let mut bytes: Vec<u8> = Vec::new();
2757        bytes.extend_from_slice(b"prefix\n");
2758        bytes.push(0xff);
2759        bytes.push(0xfe);
2760        bytes.push(b'\n');
2761        bytes.extend_from_slice(crate::preprocessing::conflict::MARKER_START.as_bytes());
2762        bytes.push(b'\n');
2763        bytes.extend_from_slice(b"body\n");
2764        env.fs
2765            .write_file(&env.dotfiles_root.join("app/config.toml.tracked"), &bytes)
2766            .unwrap();
2767
2768        let mut registry = PreprocessorRegistry::new();
2769        registry.register(Box::new(ScriptedPreprocessor {
2770            name: "tracking-bytes",
2771            extension: ".tracked",
2772            outputs: vec![crate::preprocessing::ExpandedFile {
2773                relative_path: PathBuf::from("config.toml"),
2774                content: b"x".to_vec(),
2775                is_dir: false,
2776                tracked_render: Some("x".into()),
2777                context_hash: Some([0; 32]),
2778                secret_line_ranges: Vec::new(),
2779                deploy_mode: None,
2780            }],
2781            supports_reverse_merge: true,
2782        }));
2783
2784        let datastore = make_datastore(&env);
2785        let pack = make_pack("app", env.dotfiles_root.join("app"));
2786        let entries = vec![PackEntry {
2787            relative_path: "config.toml.tracked".into(),
2788            absolute_path: env.dotfiles_root.join("app/config.toml.tracked"),
2789            is_dir: false,
2790        }];
2791
2792        let err = preprocess_pack(
2793            entries,
2794            &registry,
2795            &pack,
2796            env.fs.as_ref(),
2797            &datastore,
2798            env.paths.as_ref(),
2799            crate::preprocessing::PreprocessMode::Active,
2800            false,
2801        )
2802        .unwrap_err();
2803        assert!(
2804            matches!(err, DodotError::UnresolvedConflictMarker { .. }),
2805            "expected UnresolvedConflictMarker even on non-UTF-8 source, got: {err}"
2806        );
2807    }
2808
2809    #[test]
2810    fn template_renders_normally_after_markers_are_resolved() {
2811        // Once the user removes the markers (the standard resolution
2812        // path), the next `dodot up` must succeed and produce the
2813        // expected rendered output. This is the round-trip check: the
2814        // gate doesn't permanently brick a pack — it just defers
2815        // expansion until the source is clean again.
2816        use std::collections::HashMap;
2817        let env = TempEnvironment::builder()
2818            .pack("app")
2819            .file("greet.tmpl", "hello {{ name }}")
2820            .done()
2821            .build();
2822
2823        let mut vars = HashMap::new();
2824        vars.insert("name".into(), "Alice".into());
2825        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
2826            vec!["tmpl".into()],
2827            vars,
2828            env.paths.as_ref(),
2829        )
2830        .unwrap();
2831        let mut registry = PreprocessorRegistry::new();
2832        registry.register(Box::new(template_pp));
2833
2834        let datastore = make_datastore(&env);
2835        let pack = make_pack("app", env.dotfiles_root.join("app"));
2836        let entries = vec![PackEntry {
2837            relative_path: "greet.tmpl".into(),
2838            absolute_path: env.dotfiles_root.join("app/greet.tmpl"),
2839            is_dir: false,
2840        }];
2841
2842        // Round 1: clean source → success.
2843        let result = preprocess_pack(
2844            entries.clone(),
2845            &registry,
2846            &pack,
2847            env.fs.as_ref(),
2848            &datastore,
2849            env.paths.as_ref(),
2850            PreprocessMode::Active,
2851            false,
2852        )
2853        .expect("clean source should expand successfully");
2854        assert_eq!(result.virtual_entries.len(), 1);
2855
2856        // Round 2: user adds a marker → blocked.
2857        let dirty = format!(
2858            "hello\n{}\n{{{{ name }}}}\n{}\n",
2859            crate::preprocessing::conflict::MARKER_START,
2860            crate::preprocessing::conflict::MARKER_END,
2861        );
2862        env.fs
2863            .write_file(&env.dotfiles_root.join("app/greet.tmpl"), dirty.as_bytes())
2864            .unwrap();
2865        let err = preprocess_pack(
2866            entries.clone(),
2867            &registry,
2868            &pack,
2869            env.fs.as_ref(),
2870            &datastore,
2871            env.paths.as_ref(),
2872            PreprocessMode::Active,
2873            false,
2874        )
2875        .unwrap_err();
2876        assert!(matches!(err, DodotError::UnresolvedConflictMarker { .. }));
2877
2878        // Round 3: user resolves → success again.
2879        env.fs
2880            .write_file(
2881                &env.dotfiles_root.join("app/greet.tmpl"),
2882                b"hello {{ name }}",
2883            )
2884            .unwrap();
2885        let result = preprocess_pack(
2886            entries,
2887            &registry,
2888            &pack,
2889            env.fs.as_ref(),
2890            &datastore,
2891            env.paths.as_ref(),
2892            PreprocessMode::Active,
2893            false,
2894        )
2895        .expect("resolved source should expand again");
2896        assert_eq!(result.virtual_entries.len(), 1);
2897    }
2898
2899    // ── Divergence guard (issue #110, §6.4) ─────────────────────────
2900    //
2901    // Tests that `preprocess_pack` refuses to overwrite a deployed file
2902    // whose bytes have diverged from the cached baseline. The guard
2903    // reads the file content; env vars are intentionally not part of
2904    // the staleness signal — see the §6.4 banner and template.rs.
2905    //
2906    // Helper that runs the template preprocessor end-to-end. We use the
2907    // real TemplatePreprocessor here (not ScriptedPreprocessor) so the
2908    // tests pin the integration contract: a `.tmpl` source produces a
2909    // baseline that subsequent runs read back.
2910    fn run_template_preprocess(
2911        env: &TempEnvironment,
2912        pack_name: &str,
2913        force: bool,
2914    ) -> PreprocessResult {
2915        use std::collections::HashMap;
2916        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
2917            vec!["tmpl".into()],
2918            HashMap::new(),
2919            env.paths.as_ref(),
2920        )
2921        .unwrap();
2922        let mut registry = PreprocessorRegistry::new();
2923        registry.register(Box::new(template_pp));
2924
2925        let datastore = make_datastore(env);
2926        let pack = make_pack(pack_name, env.dotfiles_root.join(pack_name));
2927        let entries = vec![PackEntry {
2928            relative_path: "config.toml.tmpl".into(),
2929            absolute_path: env.dotfiles_root.join(pack_name).join("config.toml.tmpl"),
2930            is_dir: false,
2931        }];
2932
2933        preprocess_pack(
2934            entries,
2935            &registry,
2936            &pack,
2937            env.fs.as_ref(),
2938            &datastore,
2939            env.paths.as_ref(),
2940            PreprocessMode::Active,
2941            force,
2942        )
2943        .unwrap()
2944    }
2945
2946    #[test]
2947    fn divergence_guard_skips_when_deployed_was_edited() {
2948        // Row 3 of the §6.4 matrix: source same, deployed edited.
2949        // The pipeline must preserve the user's edit (skip the write)
2950        // and report it via PreprocessResult::skipped.
2951        let env = TempEnvironment::builder()
2952            .pack("app")
2953            .file("config.toml.tmpl", "name = original")
2954            .done()
2955            .build();
2956
2957        // First run: clean deploy, baseline written.
2958        let first = run_template_preprocess(&env, "app", false);
2959        assert!(first.skipped.is_empty(), "first deploy must not skip");
2960        let deployed_path = &first.virtual_entries[0].absolute_path.clone();
2961
2962        // User edits the deployed file directly.
2963        env.fs
2964            .write_file(deployed_path, b"name = USER EDITED")
2965            .unwrap();
2966
2967        // Second run with the same source → guard fires.
2968        let second = run_template_preprocess(&env, "app", false);
2969        assert_eq!(second.skipped.len(), 1, "deployed-edit must skip");
2970        let skip = &second.skipped[0];
2971        assert_eq!(skip.state, DivergenceState::OutputChanged);
2972        assert_eq!(skip.pack, "app");
2973        assert_eq!(skip.virtual_relative, std::path::Path::new("config.toml"));
2974
2975        // The user's edit must still be on disk; the rendered content
2976        // must NOT have replaced it.
2977        let on_disk = env.fs.read_to_string(deployed_path).unwrap();
2978        assert_eq!(on_disk, "name = USER EDITED");
2979
2980        // The virtual entry must still point at the deployed file so
2981        // downstream rule matching has something to work with.
2982        assert_eq!(second.virtual_entries.len(), 1);
2983        assert_eq!(&second.virtual_entries[0].absolute_path, deployed_path);
2984    }
2985
2986    #[test]
2987    fn divergence_guard_skips_when_both_changed() {
2988        // Row 4: source AND deployed both edited. Same skip behaviour
2989        // (preserve deployed bytes), reported as BothChanged so the
2990        // user gets a sharper warning.
2991        let env = TempEnvironment::builder()
2992            .pack("app")
2993            .file("config.toml.tmpl", "name = original")
2994            .done()
2995            .build();
2996
2997        let first = run_template_preprocess(&env, "app", false);
2998        let deployed_path = first.virtual_entries[0].absolute_path.clone();
2999
3000        // Edit both the source template and the deployed file.
3001        env.fs
3002            .write_file(
3003                &env.dotfiles_root.join("app/config.toml.tmpl"),
3004                b"name = SOURCE EDITED",
3005            )
3006            .unwrap();
3007        env.fs
3008            .write_file(&deployed_path, b"name = USER EDITED")
3009            .unwrap();
3010
3011        let second = run_template_preprocess(&env, "app", false);
3012        assert_eq!(second.skipped.len(), 1);
3013        assert_eq!(second.skipped[0].state, DivergenceState::BothChanged);
3014
3015        // Deployed bytes preserved despite the source edit.
3016        let on_disk = env.fs.read_to_string(&deployed_path).unwrap();
3017        assert_eq!(on_disk, "name = USER EDITED");
3018    }
3019
3020    #[test]
3021    fn divergence_guard_proceeds_when_source_changed_only() {
3022        // Row 2: source edited, deployed still matches the cached
3023        // render. This is the normal "I edited the template, re-deploy"
3024        // path — the guard must NOT fire here.
3025        let env = TempEnvironment::builder()
3026            .pack("app")
3027            .file("config.toml.tmpl", "name = original")
3028            .done()
3029            .build();
3030
3031        let first = run_template_preprocess(&env, "app", false);
3032        let deployed_path = first.virtual_entries[0].absolute_path.clone();
3033
3034        // Source edited; deployed left untouched.
3035        env.fs
3036            .write_file(
3037                &env.dotfiles_root.join("app/config.toml.tmpl"),
3038                b"name = NEW VALUE",
3039            )
3040            .unwrap();
3041
3042        let second = run_template_preprocess(&env, "app", false);
3043        assert!(
3044            second.skipped.is_empty(),
3045            "source-only change must not trigger the guard"
3046        );
3047        let on_disk = env.fs.read_to_string(&deployed_path).unwrap();
3048        assert_eq!(on_disk, "name = NEW VALUE");
3049    }
3050
3051    #[test]
3052    fn divergence_guard_no_op_when_nothing_changed() {
3053        // Row 1: nothing changed. Re-running deploys the same content;
3054        // no skip event.
3055        let env = TempEnvironment::builder()
3056            .pack("app")
3057            .file("config.toml.tmpl", "name = original")
3058            .done()
3059            .build();
3060
3061        let _ = run_template_preprocess(&env, "app", false);
3062        let second = run_template_preprocess(&env, "app", false);
3063        assert!(second.skipped.is_empty());
3064    }
3065
3066    #[test]
3067    fn divergence_guard_overridden_by_force() {
3068        // `dodot up --force` bypasses the guard: the deployed user edit
3069        // gets clobbered by the re-rendered output. This is the
3070        // documented escape hatch (e.g. when an env-var the template
3071        // references has rotated and the user wants the new value).
3072        let env = TempEnvironment::builder()
3073            .pack("app")
3074            .file("config.toml.tmpl", "name = original")
3075            .done()
3076            .build();
3077
3078        let first = run_template_preprocess(&env, "app", false);
3079        let deployed_path = first.virtual_entries[0].absolute_path.clone();
3080
3081        env.fs
3082            .write_file(&deployed_path, b"name = USER EDITED")
3083            .unwrap();
3084
3085        let second = run_template_preprocess(&env, "app", /* force */ true);
3086        assert!(
3087            second.skipped.is_empty(),
3088            "force=true must bypass the guard"
3089        );
3090        let on_disk = env.fs.read_to_string(&deployed_path).unwrap();
3091        assert_eq!(
3092            on_disk, "name = original",
3093            "force must rewrite to the rendered content"
3094        );
3095    }
3096
3097    #[test]
3098    fn divergence_guard_baseline_stays_pinned_to_last_successful_render() {
3099        // Critical invariant: when the guard skips a write, the
3100        // baseline must NOT be updated. Otherwise the next
3101        // `transform check` would compare the user's edit against
3102        // itself and report Synced — losing the divergence signal.
3103        let env = TempEnvironment::builder()
3104            .pack("app")
3105            .file("config.toml.tmpl", "name = original")
3106            .done()
3107            .build();
3108
3109        let first = run_template_preprocess(&env, "app", false);
3110        let deployed_path = first.virtual_entries[0].absolute_path.clone();
3111
3112        // Pin the original baseline timestamp/content for comparison.
3113        let baseline_before = crate::preprocessing::baseline::Baseline::load(
3114            env.fs.as_ref(),
3115            env.paths.as_ref(),
3116            "app",
3117            "preprocessed",
3118            "config.toml",
3119        )
3120        .unwrap()
3121        .unwrap();
3122
3123        env.fs
3124            .write_file(&deployed_path, b"name = USER EDITED")
3125            .unwrap();
3126
3127        let _ = run_template_preprocess(&env, "app", false);
3128
3129        let baseline_after = crate::preprocessing::baseline::Baseline::load(
3130            env.fs.as_ref(),
3131            env.paths.as_ref(),
3132            "app",
3133            "preprocessed",
3134            "config.toml",
3135        )
3136        .unwrap()
3137        .unwrap();
3138
3139        assert_eq!(
3140            baseline_before.rendered_hash, baseline_after.rendered_hash,
3141            "baseline must not be rewritten when the guard skips"
3142        );
3143        assert_eq!(
3144            baseline_before.rendered_content, baseline_after.rendered_content,
3145            "baseline content must not change after a skipped write"
3146        );
3147    }
3148
3149    #[test]
3150    fn divergence_guard_reproceeds_when_user_undoes_their_edit() {
3151        // After the guard fires, if the user reverts their edit (or
3152        // resolves through `dodot transform check`), the next `up`
3153        // must succeed normally — the guard is not sticky.
3154        let env = TempEnvironment::builder()
3155            .pack("app")
3156            .file("config.toml.tmpl", "name = original")
3157            .done()
3158            .build();
3159
3160        let first = run_template_preprocess(&env, "app", false);
3161        let deployed_path = first.virtual_entries[0].absolute_path.clone();
3162
3163        // Edit, then revert.
3164        env.fs
3165            .write_file(&deployed_path, b"name = USER EDITED")
3166            .unwrap();
3167        let blocked = run_template_preprocess(&env, "app", false);
3168        assert_eq!(blocked.skipped.len(), 1);
3169
3170        env.fs
3171            .write_file(&deployed_path, b"name = original")
3172            .unwrap();
3173        let cleared = run_template_preprocess(&env, "app", false);
3174        assert!(
3175            cleared.skipped.is_empty(),
3176            "guard must clear once divergence is gone"
3177        );
3178    }
3179
3180    #[test]
3181    fn divergence_guard_active_for_read_only_callers() {
3182        // Read-only callers (`dodot status`) set `write_baselines =
3183        // false` but still need the divergence guard active —
3184        // otherwise status would silently re-render and overwrite a
3185        // user's deployed-file edit. This test pins the new behavior:
3186        // the guard fires regardless of `write_baselines`, and the
3187        // baseline cache stays pinned to the last `up` (no
3188        // baseline-write side effects from the read-only call).
3189        let env = TempEnvironment::builder()
3190            .pack("app")
3191            .file("config.toml.tmpl", "name = original")
3192            .done()
3193            .build();
3194
3195        // Prime the baseline with a normal `up`.
3196        let _ = run_template_preprocess(&env, "app", false);
3197        let baseline_before = crate::preprocessing::baseline::Baseline::load(
3198            env.fs.as_ref(),
3199            env.paths.as_ref(),
3200            "app",
3201            "preprocessed",
3202            "config.toml",
3203        )
3204        .unwrap()
3205        .unwrap();
3206
3207        // User edits the deployed file directly.
3208        let deployed_path = env
3209            .paths
3210            .handler_data_dir("app", "preprocessed")
3211            .join("config.toml");
3212        env.fs
3213            .write_file(&deployed_path, b"name = USER EDITED")
3214            .unwrap();
3215
3216        // Simulate `status`: write_baselines=false, force=false.
3217        use std::collections::HashMap;
3218        let template_pp = crate::preprocessing::template::TemplatePreprocessor::new(
3219            vec!["tmpl".into()],
3220            HashMap::new(),
3221            env.paths.as_ref(),
3222        )
3223        .unwrap();
3224        let mut registry = PreprocessorRegistry::new();
3225        registry.register(Box::new(template_pp));
3226        let datastore = make_datastore(&env);
3227        let pack = make_pack("app", env.dotfiles_root.join("app"));
3228        let entries = vec![PackEntry {
3229            relative_path: "config.toml.tmpl".into(),
3230            absolute_path: env.dotfiles_root.join("app/config.toml.tmpl"),
3231            is_dir: false,
3232        }];
3233        let result = preprocess_pack(
3234            entries,
3235            &registry,
3236            &pack,
3237            env.fs.as_ref(),
3238            &datastore,
3239            env.paths.as_ref(),
3240            crate::preprocessing::PreprocessMode::Passive,
3241            /* force */ false,
3242        )
3243        .unwrap();
3244        assert_eq!(
3245            result.skipped.len(),
3246            1,
3247            "guard must fire for read-only callers too"
3248        );
3249        assert_eq!(
3250            env.fs.read_to_string(&deployed_path).unwrap(),
3251            "name = USER EDITED",
3252            "user's deployed-file edit must be preserved"
3253        );
3254
3255        // The baseline cache must NOT have been touched: the read-only
3256        // call leaves the divergence-detection ground truth pinned to
3257        // the last `up`.
3258        let baseline_after = crate::preprocessing::baseline::Baseline::load(
3259            env.fs.as_ref(),
3260            env.paths.as_ref(),
3261            "app",
3262            "preprocessed",
3263            "config.toml",
3264        )
3265        .unwrap()
3266        .unwrap();
3267        assert_eq!(baseline_before, baseline_after);
3268    }
3269}