Skip to main content

dodot_lib/preprocessing/
pipeline.rs

1//! Preprocessing pipeline — partitions, expands, and merges entries.
2//!
3//! This module contains the core pipeline function that runs between
4//! directory walking and rule matching. It identifies preprocessor files,
5//! expands them, writes results to the datastore, checks for collisions,
6//! and produces virtual entries for the handler pipeline.
7
8use std::collections::HashMap;
9use std::path::{Component, Path, PathBuf};
10
11use tracing::{debug, info};
12
13use crate::datastore::DataStore;
14use crate::fs::Fs;
15use crate::packs::Pack;
16use crate::preprocessing::PreprocessorRegistry;
17use crate::rules::PackEntry;
18use crate::{DodotError, Result};
19
20/// Validate that a preprocessor-produced path is safe to materialise in
21/// the datastore: relative, no root/prefix/parent-dir components, and
22/// not effectively empty.
23///
24/// Malicious or malformed preprocessor output (tar-slip, absolute paths,
25/// `..` segments) can escape the pack namespace and overwrite arbitrary
26/// files. Empty paths (or paths made up only of `.` components) are
27/// rejected because they would silently fail at the datastore layer with
28/// an opaque error — here we produce a clean diagnostic naming the
29/// preprocessor and source file.
30fn validate_safe_relative_path(path: &Path, preprocessor: &str, source_file: &Path) -> Result<()> {
31    let mut has_normal = false;
32    for component in path.components() {
33        match component {
34            Component::Normal(_) => has_normal = true,
35            Component::CurDir => {}
36            Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
37                return Err(DodotError::PreprocessorError {
38                    preprocessor: preprocessor.into(),
39                    source_file: source_file.to_path_buf(),
40                    message: format!(
41                        "unsafe path in preprocessor output: {} (absolute or contains `..`)",
42                        path.display()
43                    ),
44                });
45            }
46        }
47    }
48    if !has_normal {
49        return Err(DodotError::PreprocessorError {
50            preprocessor: preprocessor.into(),
51            source_file: source_file.to_path_buf(),
52            message: format!(
53                "preprocessor produced an empty output path (\"{}\"). This usually means a file like \
54                 `.tmpl` or `.identity` has no stem after stripping the preprocessor extension — \
55                 rename the source file so that it has a non-empty name after stripping.",
56                path.display()
57            ),
58        });
59    }
60    Ok(())
61}
62
63/// Normalise a validated relative path by dropping `CurDir` components,
64/// so that `./foo` and `foo` are treated as the same virtual path for
65/// collision detection. Only call after [`validate_safe_relative_path`].
66fn normalize_relative(path: &Path) -> PathBuf {
67    let mut out = PathBuf::new();
68    for component in path.components() {
69        if let Component::Normal(n) = component {
70            out.push(n);
71        }
72    }
73    out
74}
75
76/// The result of preprocessing a pack's file entries.
77#[derive(Debug)]
78pub struct PreprocessResult {
79    /// Entries that were NOT preprocessed (pass through unchanged).
80    pub regular_entries: Vec<PackEntry>,
81    /// Virtual entries created by preprocessing (point to datastore files).
82    pub virtual_entries: Vec<PackEntry>,
83    /// Maps virtual entry absolute_path → original source path in pack.
84    pub source_map: HashMap<PathBuf, PathBuf>,
85}
86
87impl PreprocessResult {
88    /// Create a passthrough result where all entries are regular (no preprocessing).
89    pub fn passthrough(entries: Vec<PackEntry>) -> Self {
90        Self {
91            regular_entries: entries,
92            virtual_entries: Vec::new(),
93            source_map: HashMap::new(),
94        }
95    }
96
97    /// Return all entries (regular + virtual) merged into one list, sorted by relative path.
98    pub fn merged_entries(&self) -> Vec<PackEntry> {
99        let mut all = Vec::with_capacity(self.regular_entries.len() + self.virtual_entries.len());
100        all.extend(self.regular_entries.iter().cloned());
101        all.extend(self.virtual_entries.iter().cloned());
102        all.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
103        all
104    }
105}
106
107/// The handler name used for preprocessor-expanded files in the datastore.
108const PREPROCESSED_HANDLER: &str = "preprocessed";
109
110/// Run the preprocessing pipeline for a pack's file entries.
111///
112/// 1. Partition entries into preprocessor files vs regular files.
113/// 2. For each preprocessor file: expand, write results to datastore.
114/// 3. Create virtual PackEntries pointing to the datastore files.
115/// 4. Check for collisions between virtual and regular entries.
116/// 5. Return the result for merging into the handler pipeline.
117pub fn preprocess_pack(
118    entries: Vec<PackEntry>,
119    registry: &PreprocessorRegistry,
120    pack: &Pack,
121    fs: &dyn Fs,
122    datastore: &dyn DataStore,
123) -> Result<PreprocessResult> {
124    let mut regular_entries = Vec::new();
125    let mut preprocessor_entries = Vec::new();
126
127    // Phase 1: Partition
128    for entry in entries {
129        let filename = entry
130            .relative_path
131            .file_name()
132            .map(|n| n.to_string_lossy().to_string())
133            .unwrap_or_default();
134
135        if !entry.is_dir && registry.is_preprocessor_file(&filename) {
136            preprocessor_entries.push(entry);
137        } else {
138            regular_entries.push(entry);
139        }
140    }
141
142    debug!(
143        pack = %pack.name,
144        preprocessor = preprocessor_entries.len(),
145        regular = regular_entries.len(),
146        "partitioned entries"
147    );
148
149    if preprocessor_entries.is_empty() {
150        return Ok(PreprocessResult {
151            regular_entries,
152            virtual_entries: Vec::new(),
153            source_map: HashMap::new(),
154        });
155    }
156
157    // Phase 2 & 3: Expand and create virtual entries
158    let mut virtual_entries = Vec::new();
159    let mut source_map = HashMap::new();
160
161    // Tracks claimed paths for collision detection. Seeded with regular
162    // entries; virtual entries are added as they're created so two
163    // preprocessors can't both produce the same virtual path (e.g.
164    // `config.toml.identity` and `config.toml.tmpl` both expanding to
165    // `config.toml`).
166    let mut claimed_paths: std::collections::HashSet<PathBuf> = regular_entries
167        .iter()
168        .map(|e| e.relative_path.clone())
169        .collect();
170
171    for entry in &preprocessor_entries {
172        let filename = entry
173            .relative_path
174            .file_name()
175            .map(|n| n.to_string_lossy().to_string())
176            .unwrap_or_default();
177
178        let preprocessor = registry
179            .find_for_file(&filename)
180            .expect("already checked in partition");
181
182        info!(
183            pack = %pack.name,
184            preprocessor = preprocessor.name(),
185            file = %filename,
186            "expanding"
187        );
188
189        // Expand the source file
190        let expanded_files = preprocessor.expand(&entry.absolute_path, fs)?;
191
192        for expanded in expanded_files {
193            // Reject unsafe paths from the preprocessor (tar-slip,
194            // absolute paths, parent-dir escapes) before any disk write.
195            validate_safe_relative_path(
196                &expanded.relative_path,
197                preprocessor.name(),
198                &entry.absolute_path,
199            )?;
200
201            // Compute the virtual relative path.
202            // If the source was in a subdirectory (e.g., "subdir/config.toml.identity"),
203            // the virtual entry should preserve the parent (e.g., "subdir/config.toml").
204            let virtual_relative = if let Some(parent) = entry.relative_path.parent() {
205                if parent == Path::new("") {
206                    expanded.relative_path.clone()
207                } else {
208                    parent.join(&expanded.relative_path)
209                }
210            } else {
211                expanded.relative_path.clone()
212            };
213
214            // Defense-in-depth: validate the joined path too (parent
215            // could only come from the pack scanner, but re-check).
216            validate_safe_relative_path(
217                &virtual_relative,
218                preprocessor.name(),
219                &entry.absolute_path,
220            )?;
221
222            // Normalise `./foo` and `foo` to the same canonical form, so
223            // that collision detection and downstream comparisons don't
224            // silently diverge from the datastore's own normalisation.
225            let virtual_relative = normalize_relative(&virtual_relative);
226
227            // Phase 4: Collision check (against both regular entries and
228            // previously-expanded virtual entries)
229            if claimed_paths.contains(&virtual_relative) {
230                return Err(DodotError::PreprocessorCollision {
231                    pack: pack.name.clone(),
232                    source_file: filename.clone(),
233                    expanded_name: virtual_relative.to_string_lossy().into_owned(),
234                });
235            }
236
237            // Write expanded content to datastore, preserving directory
238            // structure. Directories get mkdir'd; files get their content
239            // written. `write_rendered_file` creates any needed parent
240            // directories.
241            let datastore_path = if expanded.is_dir {
242                datastore.write_rendered_dir(
243                    &pack.name,
244                    PREPROCESSED_HANDLER,
245                    &virtual_relative.to_string_lossy(),
246                )?
247            } else {
248                datastore.write_rendered_file(
249                    &pack.name,
250                    PREPROCESSED_HANDLER,
251                    &virtual_relative.to_string_lossy(),
252                    &expanded.content,
253                )?
254            };
255
256            debug!(
257                pack = %pack.name,
258                virtual_path = %virtual_relative.display(),
259                datastore_path = %datastore_path.display(),
260                is_dir = expanded.is_dir,
261                "wrote expanded entry"
262            );
263
264            claimed_paths.insert(virtual_relative.clone());
265            source_map.insert(datastore_path.clone(), entry.absolute_path.clone());
266
267            virtual_entries.push(PackEntry {
268                relative_path: virtual_relative,
269                absolute_path: datastore_path,
270                is_dir: expanded.is_dir,
271            });
272        }
273    }
274
275    info!(
276        pack = %pack.name,
277        virtual_count = virtual_entries.len(),
278        "preprocessing complete"
279    );
280
281    Ok(PreprocessResult {
282        regular_entries,
283        virtual_entries,
284        source_map,
285    })
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291    use crate::datastore::FilesystemDataStore;
292    use crate::handlers::HandlerConfig;
293    use crate::preprocessing::identity::IdentityPreprocessor;
294    use crate::testing::TempEnvironment;
295    use std::sync::Arc;
296
297    fn make_pack(name: &str, path: PathBuf) -> Pack {
298        Pack::new(name.into(), path, HandlerConfig::default())
299    }
300
301    fn make_registry() -> PreprocessorRegistry {
302        let mut registry = PreprocessorRegistry::new();
303        registry.register(Box::new(IdentityPreprocessor::new()));
304        registry
305    }
306
307    fn make_datastore(env: &TempEnvironment) -> FilesystemDataStore {
308        let runner = Arc::new(crate::datastore::ShellCommandRunner);
309        FilesystemDataStore::new(env.fs.clone(), env.paths.clone(), runner)
310    }
311
312    #[test]
313    fn passthrough_when_no_preprocessor_files() {
314        let env = TempEnvironment::builder()
315            .pack("vim")
316            .file("vimrc", "set nocompatible")
317            .file("gvimrc", "set guifont=Mono")
318            .done()
319            .build();
320
321        let registry = make_registry();
322        let datastore = make_datastore(&env);
323        let pack = make_pack("vim", env.dotfiles_root.join("vim"));
324
325        let entries = vec![
326            PackEntry {
327                relative_path: "vimrc".into(),
328                absolute_path: env.dotfiles_root.join("vim/vimrc"),
329                is_dir: false,
330            },
331            PackEntry {
332                relative_path: "gvimrc".into(),
333                absolute_path: env.dotfiles_root.join("vim/gvimrc"),
334                is_dir: false,
335            },
336        ];
337
338        let result =
339            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
340
341        assert_eq!(result.regular_entries.len(), 2);
342        assert!(result.virtual_entries.is_empty());
343        assert!(result.source_map.is_empty());
344    }
345
346    #[test]
347    fn identity_preprocessor_creates_virtual_entry() {
348        let env = TempEnvironment::builder()
349            .pack("app")
350            .file("config.toml.identity", "host = localhost")
351            .done()
352            .build();
353
354        let registry = make_registry();
355        let datastore = make_datastore(&env);
356        let pack = make_pack("app", env.dotfiles_root.join("app"));
357
358        let entries = vec![PackEntry {
359            relative_path: "config.toml.identity".into(),
360            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
361            is_dir: false,
362        }];
363
364        let result =
365            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
366
367        assert!(result.regular_entries.is_empty());
368        assert_eq!(result.virtual_entries.len(), 1);
369
370        let virtual_entry = &result.virtual_entries[0];
371        assert_eq!(virtual_entry.relative_path, PathBuf::from("config.toml"));
372        assert!(!virtual_entry.is_dir);
373
374        // Verify the file was written to the datastore
375        let content = env.fs.read_to_string(&virtual_entry.absolute_path).unwrap();
376        assert_eq!(content, "host = localhost");
377
378        // Verify source map
379        assert_eq!(
380            result.source_map[&virtual_entry.absolute_path],
381            env.dotfiles_root.join("app/config.toml.identity")
382        );
383    }
384
385    #[test]
386    fn mixed_pack_partitions_correctly() {
387        let env = TempEnvironment::builder()
388            .pack("app")
389            .file("config.toml.identity", "host = localhost")
390            .file("readme.txt", "hello")
391            .done()
392            .build();
393
394        let registry = make_registry();
395        let datastore = make_datastore(&env);
396        let pack = make_pack("app", env.dotfiles_root.join("app"));
397
398        let entries = vec![
399            PackEntry {
400                relative_path: "config.toml.identity".into(),
401                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
402                is_dir: false,
403            },
404            PackEntry {
405                relative_path: "readme.txt".into(),
406                absolute_path: env.dotfiles_root.join("app/readme.txt"),
407                is_dir: false,
408            },
409        ];
410
411        let result =
412            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
413
414        assert_eq!(result.regular_entries.len(), 1);
415        assert_eq!(
416            result.regular_entries[0].relative_path,
417            PathBuf::from("readme.txt")
418        );
419
420        assert_eq!(result.virtual_entries.len(), 1);
421        assert_eq!(
422            result.virtual_entries[0].relative_path,
423            PathBuf::from("config.toml")
424        );
425    }
426
427    #[test]
428    fn collision_detection_rejects_conflict() {
429        let env = TempEnvironment::builder()
430            .pack("app")
431            .file("config.toml.identity", "preprocessed")
432            .file("config.toml", "regular")
433            .done()
434            .build();
435
436        let registry = make_registry();
437        let datastore = make_datastore(&env);
438        let pack = make_pack("app", env.dotfiles_root.join("app"));
439
440        let entries = vec![
441            PackEntry {
442                relative_path: "config.toml.identity".into(),
443                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
444                is_dir: false,
445            },
446            PackEntry {
447                relative_path: "config.toml".into(),
448                absolute_path: env.dotfiles_root.join("app/config.toml"),
449                is_dir: false,
450            },
451        ];
452
453        let err =
454            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
455        assert!(
456            matches!(err, DodotError::PreprocessorCollision { .. }),
457            "expected PreprocessorCollision, got: {err}"
458        );
459    }
460
461    #[test]
462    fn merged_entries_combines_and_sorts() {
463        let result = PreprocessResult {
464            regular_entries: vec![PackEntry {
465                relative_path: "zebra".into(),
466                absolute_path: "/z".into(),
467                is_dir: false,
468            }],
469            virtual_entries: vec![PackEntry {
470                relative_path: "alpha".into(),
471                absolute_path: "/a".into(),
472                is_dir: false,
473            }],
474            source_map: HashMap::new(),
475        };
476
477        let merged = result.merged_entries();
478        assert_eq!(merged.len(), 2);
479        assert_eq!(merged[0].relative_path, PathBuf::from("alpha"));
480        assert_eq!(merged[1].relative_path, PathBuf::from("zebra"));
481    }
482
483    #[test]
484    fn empty_registry_passes_all_through() {
485        let env = TempEnvironment::builder()
486            .pack("app")
487            .file("config.toml.identity", "content")
488            .done()
489            .build();
490
491        let registry = PreprocessorRegistry::new(); // empty!
492        let datastore = make_datastore(&env);
493        let pack = make_pack("app", env.dotfiles_root.join("app"));
494
495        let entries = vec![PackEntry {
496            relative_path: "config.toml.identity".into(),
497            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
498            is_dir: false,
499        }];
500
501        let result =
502            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
503
504        // With no preprocessors registered, the file is treated as regular
505        assert_eq!(result.regular_entries.len(), 1);
506        assert!(result.virtual_entries.is_empty());
507    }
508
509    #[test]
510    fn directories_are_never_preprocessed() {
511        let env = TempEnvironment::builder()
512            .pack("app")
513            .file("bin.identity/tool", "#!/bin/sh")
514            .done()
515            .build();
516
517        let registry = make_registry();
518        let datastore = make_datastore(&env);
519        let pack = make_pack("app", env.dotfiles_root.join("app"));
520
521        let entries = vec![PackEntry {
522            relative_path: "bin.identity".into(),
523            absolute_path: env.dotfiles_root.join("app/bin.identity"),
524            is_dir: true, // directory — should NOT be preprocessed
525        }];
526
527        let result =
528            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
529
530        assert_eq!(result.regular_entries.len(), 1);
531        assert!(result.virtual_entries.is_empty());
532    }
533
534    #[test]
535    fn subdirectory_preprocessor_file_preserves_parent() {
536        let env = TempEnvironment::builder()
537            .pack("app")
538            .file("subdir/config.toml.identity", "nested content")
539            .done()
540            .build();
541
542        let registry = make_registry();
543        let datastore = make_datastore(&env);
544        let pack = make_pack("app", env.dotfiles_root.join("app"));
545
546        let entries = vec![PackEntry {
547            relative_path: "subdir/config.toml.identity".into(),
548            absolute_path: env.dotfiles_root.join("app/subdir/config.toml.identity"),
549            is_dir: false,
550        }];
551
552        let result =
553            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
554
555        assert_eq!(result.virtual_entries.len(), 1);
556        assert_eq!(
557            result.virtual_entries[0].relative_path,
558            PathBuf::from("subdir/config.toml")
559        );
560    }
561
562    #[test]
563    fn multiple_preprocessor_files_in_one_pack() {
564        let env = TempEnvironment::builder()
565            .pack("app")
566            .file("config.toml.identity", "config content")
567            .file("settings.json.identity", "settings content")
568            .done()
569            .build();
570
571        let registry = make_registry();
572        let datastore = make_datastore(&env);
573        let pack = make_pack("app", env.dotfiles_root.join("app"));
574
575        let entries = vec![
576            PackEntry {
577                relative_path: "config.toml.identity".into(),
578                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
579                is_dir: false,
580            },
581            PackEntry {
582                relative_path: "settings.json.identity".into(),
583                absolute_path: env.dotfiles_root.join("app/settings.json.identity"),
584                is_dir: false,
585            },
586        ];
587
588        let result =
589            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
590
591        assert!(result.regular_entries.is_empty());
592        assert_eq!(result.virtual_entries.len(), 2);
593
594        let names: Vec<String> = result
595            .virtual_entries
596            .iter()
597            .map(|e| e.relative_path.to_string_lossy().to_string())
598            .collect();
599        assert!(names.contains(&"config.toml".to_string()));
600        assert!(names.contains(&"settings.json".to_string()));
601
602        // Each should have a source_map entry
603        assert_eq!(result.source_map.len(), 2);
604    }
605
606    #[test]
607    fn pack_with_only_preprocessor_files() {
608        let env = TempEnvironment::builder()
609            .pack("app")
610            .file("only.conf.identity", "the only file")
611            .done()
612            .build();
613
614        let registry = make_registry();
615        let datastore = make_datastore(&env);
616        let pack = make_pack("app", env.dotfiles_root.join("app"));
617
618        let entries = vec![PackEntry {
619            relative_path: "only.conf.identity".into(),
620            absolute_path: env.dotfiles_root.join("app/only.conf.identity"),
621            is_dir: false,
622        }];
623
624        let result =
625            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
626
627        assert!(result.regular_entries.is_empty());
628        assert_eq!(result.virtual_entries.len(), 1);
629        assert_eq!(result.merged_entries().len(), 1);
630    }
631
632    #[test]
633    fn source_map_is_complete() {
634        let env = TempEnvironment::builder()
635            .pack("app")
636            .file("a.conf.identity", "aaa")
637            .file("b.conf.identity", "bbb")
638            .file("regular.txt", "ccc")
639            .done()
640            .build();
641
642        let registry = make_registry();
643        let datastore = make_datastore(&env);
644        let pack = make_pack("app", env.dotfiles_root.join("app"));
645
646        let entries = vec![
647            PackEntry {
648                relative_path: "a.conf.identity".into(),
649                absolute_path: env.dotfiles_root.join("app/a.conf.identity"),
650                is_dir: false,
651            },
652            PackEntry {
653                relative_path: "b.conf.identity".into(),
654                absolute_path: env.dotfiles_root.join("app/b.conf.identity"),
655                is_dir: false,
656            },
657            PackEntry {
658                relative_path: "regular.txt".into(),
659                absolute_path: env.dotfiles_root.join("app/regular.txt"),
660                is_dir: false,
661            },
662        ];
663
664        let result =
665            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
666
667        // Every virtual entry must have a source_map entry
668        for ve in &result.virtual_entries {
669            assert!(
670                result.source_map.contains_key(&ve.absolute_path),
671                "virtual entry {} has no source_map entry",
672                ve.absolute_path.display()
673            );
674        }
675        // No regular entries in the source_map
676        for re in &result.regular_entries {
677            assert!(
678                !result.source_map.contains_key(&re.absolute_path),
679                "regular entry {} should not be in source_map",
680                re.absolute_path.display()
681            );
682        }
683    }
684
685    #[test]
686    fn preprocessing_is_idempotent() {
687        let env = TempEnvironment::builder()
688            .pack("app")
689            .file("config.toml.identity", "content")
690            .done()
691            .build();
692
693        let registry = make_registry();
694        let datastore = make_datastore(&env);
695        let pack = make_pack("app", env.dotfiles_root.join("app"));
696
697        let make_entries = || {
698            vec![PackEntry {
699                relative_path: "config.toml.identity".into(),
700                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
701                is_dir: false,
702            }]
703        };
704
705        let result1 = preprocess_pack(
706            make_entries(),
707            &registry,
708            &pack,
709            env.fs.as_ref(),
710            &datastore,
711        )
712        .unwrap();
713        let result2 = preprocess_pack(
714            make_entries(),
715            &registry,
716            &pack,
717            env.fs.as_ref(),
718            &datastore,
719        )
720        .unwrap();
721
722        assert_eq!(result1.virtual_entries.len(), result2.virtual_entries.len());
723        assert_eq!(
724            result1.virtual_entries[0].relative_path,
725            result2.virtual_entries[0].relative_path
726        );
727
728        // Datastore file should be the same content
729        let content1 = env
730            .fs
731            .read_to_string(&result1.virtual_entries[0].absolute_path)
732            .unwrap();
733        let content2 = env
734            .fs
735            .read_to_string(&result2.virtual_entries[0].absolute_path)
736            .unwrap();
737        assert_eq!(content1, content2);
738    }
739
740    #[test]
741    fn expansion_error_propagates() {
742        let env = TempEnvironment::builder()
743            .pack("app")
744            .file("placeholder", "")
745            .done()
746            .build();
747
748        let registry = make_registry();
749        let datastore = make_datastore(&env);
750        let pack = make_pack("app", env.dotfiles_root.join("app"));
751
752        // Point to a file that doesn't exist — expansion should fail
753        let entries = vec![PackEntry {
754            relative_path: "missing.conf.identity".into(),
755            absolute_path: env.dotfiles_root.join("app/missing.conf.identity"),
756            is_dir: false,
757        }];
758
759        let err =
760            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
761        assert!(
762            matches!(err, DodotError::Fs { .. }),
763            "expected Fs error for missing file, got: {err}"
764        );
765    }
766
767    #[test]
768    fn inter_preprocessor_collision_detected() {
769        // Two preprocessors produce the same logical name.
770        // Set up: `config.toml.identity` and `config.toml.other` (custom
771        // extension) both strip to `config.toml`. The pipeline must
772        // detect this and refuse rather than silently overwriting.
773        let env = TempEnvironment::builder()
774            .pack("app")
775            .file("config.toml.identity", "a")
776            .file("config.toml.other", "b")
777            .done()
778            .build();
779
780        let mut registry = PreprocessorRegistry::new();
781        registry.register(Box::new(IdentityPreprocessor::new()));
782        registry.register(Box::new(IdentityPreprocessor::with_extension("other")));
783
784        let datastore = make_datastore(&env);
785        let pack = make_pack("app", env.dotfiles_root.join("app"));
786
787        let entries = vec![
788            PackEntry {
789                relative_path: "config.toml.identity".into(),
790                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
791                is_dir: false,
792            },
793            PackEntry {
794                relative_path: "config.toml.other".into(),
795                absolute_path: env.dotfiles_root.join("app/config.toml.other"),
796                is_dir: false,
797            },
798        ];
799
800        let err =
801            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
802        assert!(
803            matches!(err, DodotError::PreprocessorCollision { .. }),
804            "expected PreprocessorCollision for inter-preprocessor clash, got: {err}"
805        );
806    }
807
808    #[test]
809    fn datastore_preserves_directory_structure() {
810        // Preprocessor files in subdirectories should land in matching
811        // subdirectories under the datastore, not be flattened with `__`.
812        let env = TempEnvironment::builder()
813            .pack("app")
814            .file("sub/config.toml.identity", "nested")
815            .done()
816            .build();
817
818        let registry = make_registry();
819        let datastore = make_datastore(&env);
820        let pack = make_pack("app", env.dotfiles_root.join("app"));
821
822        let entries = vec![PackEntry {
823            relative_path: "sub/config.toml.identity".into(),
824            absolute_path: env.dotfiles_root.join("app/sub/config.toml.identity"),
825            is_dir: false,
826        }];
827
828        let result =
829            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
830
831        assert_eq!(result.virtual_entries.len(), 1);
832        let datastore_path = &result.virtual_entries[0].absolute_path;
833
834        // The datastore path should contain the subdirectory structure, not flattened
835        let ds_str = datastore_path.to_string_lossy();
836        assert!(
837            ds_str.contains("sub/config.toml"),
838            "datastore path should preserve directory structure, got: {ds_str}"
839        );
840        assert!(
841            !ds_str.contains("__"),
842            "datastore path should not contain flattening separator, got: {ds_str}"
843        );
844
845        // File should actually exist at that path
846        assert!(env.fs.exists(datastore_path));
847        let content = env.fs.read_to_string(datastore_path).unwrap();
848        assert_eq!(content, "nested");
849    }
850
851    #[test]
852    fn datastore_distinguishes_sibling_from_flattened_name() {
853        // Regression test for the flatten-with-`__` edge case: a user could
854        // have `a/b.txt` and `a__b.txt` both as preprocessor outputs, which
855        // would have collided under the old flattening scheme. With
856        // directory-preserving storage they live in distinct datastore paths.
857        let env = TempEnvironment::builder()
858            .pack("app")
859            .file("a/b.txt.identity", "nested")
860            .file("a__b.txt.identity", "flat")
861            .done()
862            .build();
863
864        let registry = make_registry();
865        let datastore = make_datastore(&env);
866        let pack = make_pack("app", env.dotfiles_root.join("app"));
867
868        let entries = vec![
869            PackEntry {
870                relative_path: "a/b.txt.identity".into(),
871                absolute_path: env.dotfiles_root.join("app/a/b.txt.identity"),
872                is_dir: false,
873            },
874            PackEntry {
875                relative_path: "a__b.txt.identity".into(),
876                absolute_path: env.dotfiles_root.join("app/a__b.txt.identity"),
877                is_dir: false,
878            },
879        ];
880
881        let result =
882            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
883
884        assert_eq!(result.virtual_entries.len(), 2);
885
886        // Both files must exist with distinct content
887        let nested = result
888            .virtual_entries
889            .iter()
890            .find(|e| e.relative_path == std::path::Path::new("a/b.txt"))
891            .expect("nested entry");
892        let flat = result
893            .virtual_entries
894            .iter()
895            .find(|e| e.relative_path == std::path::Path::new("a__b.txt"))
896            .expect("flat entry");
897
898        assert_ne!(nested.absolute_path, flat.absolute_path);
899        assert_eq!(
900            env.fs.read_to_string(&nested.absolute_path).unwrap(),
901            "nested"
902        );
903        assert_eq!(env.fs.read_to_string(&flat.absolute_path).unwrap(), "flat");
904    }
905
906    // ── Path-traversal defenses ─────────────────────────────────
907
908    /// Test-only preprocessor that emits a configurable set of
909    /// [`crate::preprocessing::ExpandedFile`]s — lets tests inject
910    /// unsafe paths or directory entries without needing a real archive.
911    struct ScriptedPreprocessor {
912        name: &'static str,
913        extension: &'static str,
914        outputs: Vec<crate::preprocessing::ExpandedFile>,
915    }
916
917    impl crate::preprocessing::Preprocessor for ScriptedPreprocessor {
918        fn name(&self) -> &str {
919            self.name
920        }
921        fn transform_type(&self) -> crate::preprocessing::TransformType {
922            crate::preprocessing::TransformType::Opaque
923        }
924        fn matches_extension(&self, filename: &str) -> bool {
925            filename.ends_with(self.extension)
926        }
927        fn stripped_name(&self, filename: &str) -> String {
928            filename
929                .strip_suffix(self.extension)
930                .unwrap_or(filename)
931                .to_string()
932        }
933        fn expand(
934            &self,
935            _source: &Path,
936            _fs: &dyn Fs,
937        ) -> Result<Vec<crate::preprocessing::ExpandedFile>> {
938            Ok(self.outputs.clone())
939        }
940    }
941
942    #[test]
943    fn rejects_absolute_path_from_preprocessor() {
944        let env = TempEnvironment::builder()
945            .pack("app")
946            .file("bad.evil", "x")
947            .done()
948            .build();
949
950        let mut registry = PreprocessorRegistry::new();
951        registry.register(Box::new(ScriptedPreprocessor {
952            name: "evil",
953            extension: ".evil",
954            outputs: vec![crate::preprocessing::ExpandedFile {
955                relative_path: PathBuf::from("/etc/passwd"),
956                content: b"pwn".to_vec(),
957                is_dir: false,
958            }],
959        }));
960
961        let datastore = make_datastore(&env);
962        let pack = make_pack("app", env.dotfiles_root.join("app"));
963
964        let entries = vec![PackEntry {
965            relative_path: "bad.evil".into(),
966            absolute_path: env.dotfiles_root.join("app/bad.evil"),
967            is_dir: false,
968        }];
969
970        let err =
971            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
972        assert!(
973            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe path")),
974            "expected unsafe-path error, got: {err}"
975        );
976        // Verify the malicious target was not written
977        assert!(!std::path::Path::new("/etc/passwd.dodot-would-have-written-here").exists());
978    }
979
980    #[test]
981    fn rejects_parent_dir_escape_from_preprocessor() {
982        let env = TempEnvironment::builder()
983            .pack("app")
984            .file("bad.evil", "x")
985            .done()
986            .build();
987
988        let mut registry = PreprocessorRegistry::new();
989        registry.register(Box::new(ScriptedPreprocessor {
990            name: "evil",
991            extension: ".evil",
992            outputs: vec![crate::preprocessing::ExpandedFile {
993                relative_path: PathBuf::from("../../escape.txt"),
994                content: b"pwn".to_vec(),
995                is_dir: false,
996            }],
997        }));
998
999        let datastore = make_datastore(&env);
1000        let pack = make_pack("app", env.dotfiles_root.join("app"));
1001
1002        let entries = vec![PackEntry {
1003            relative_path: "bad.evil".into(),
1004            absolute_path: env.dotfiles_root.join("app/bad.evil"),
1005            is_dir: false,
1006        }];
1007
1008        let err =
1009            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
1010        assert!(
1011            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe path")),
1012            "expected unsafe-path error, got: {err}"
1013        );
1014    }
1015
1016    #[test]
1017    fn directory_entry_is_mkdird_not_written_as_file() {
1018        // A preprocessor emits a directory marker followed by a file
1019        // inside it. The pipeline must mkdir the directory rather than
1020        // writing a file at the directory path (which would break the
1021        // subsequent nested file write).
1022        let env = TempEnvironment::builder()
1023            .pack("app")
1024            .file("bundle.zz", "x")
1025            .done()
1026            .build();
1027
1028        let mut registry = PreprocessorRegistry::new();
1029        registry.register(Box::new(ScriptedPreprocessor {
1030            name: "scripted",
1031            extension: ".zz",
1032            outputs: vec![
1033                crate::preprocessing::ExpandedFile {
1034                    relative_path: PathBuf::from("sub"),
1035                    content: Vec::new(),
1036                    is_dir: true,
1037                },
1038                crate::preprocessing::ExpandedFile {
1039                    relative_path: PathBuf::from("sub/nested.txt"),
1040                    content: b"hello".to_vec(),
1041                    is_dir: false,
1042                },
1043            ],
1044        }));
1045
1046        let datastore = make_datastore(&env);
1047        let pack = make_pack("app", env.dotfiles_root.join("app"));
1048
1049        let entries = vec![PackEntry {
1050            relative_path: "bundle.zz".into(),
1051            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1052            is_dir: false,
1053        }];
1054
1055        let result =
1056            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
1057
1058        assert_eq!(result.virtual_entries.len(), 2);
1059
1060        let dir_entry = result
1061            .virtual_entries
1062            .iter()
1063            .find(|e| e.is_dir)
1064            .expect("directory entry");
1065        assert!(
1066            env.fs.is_dir(&dir_entry.absolute_path),
1067            "directory entry should be a real directory: {}",
1068            dir_entry.absolute_path.display()
1069        );
1070
1071        let file_entry = result
1072            .virtual_entries
1073            .iter()
1074            .find(|e| !e.is_dir)
1075            .expect("file entry");
1076        assert_eq!(
1077            env.fs.read_to_string(&file_entry.absolute_path).unwrap(),
1078            "hello"
1079        );
1080    }
1081
1082    #[test]
1083    fn rejects_empty_path_from_preprocessor() {
1084        // A preprocessor that produces an empty relative_path (e.g. a
1085        // template file named literally `.tmpl` whose stripped name is
1086        // empty) must be rejected with a clean PreprocessorError, not
1087        // cascaded to the datastore's opaque "empty datastore path"
1088        // message.
1089        let env = TempEnvironment::builder()
1090            .pack("app")
1091            .file("bad.zz", "x")
1092            .done()
1093            .build();
1094
1095        let mut registry = PreprocessorRegistry::new();
1096        registry.register(Box::new(ScriptedPreprocessor {
1097            name: "scripted",
1098            extension: ".zz",
1099            outputs: vec![crate::preprocessing::ExpandedFile {
1100                relative_path: PathBuf::from(""),
1101                content: b"nope".to_vec(),
1102                is_dir: false,
1103            }],
1104        }));
1105
1106        let datastore = make_datastore(&env);
1107        let pack = make_pack("app", env.dotfiles_root.join("app"));
1108
1109        let entries = vec![PackEntry {
1110            relative_path: "bad.zz".into(),
1111            absolute_path: env.dotfiles_root.join("app/bad.zz"),
1112            is_dir: false,
1113        }];
1114
1115        let err =
1116            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
1117        assert!(
1118            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("empty output path")),
1119            "expected empty-path error, got: {err}"
1120        );
1121    }
1122
1123    #[test]
1124    fn rejects_curdir_only_path_from_preprocessor() {
1125        // `./` or `.` alone normalises to empty — same rejection.
1126        let env = TempEnvironment::builder()
1127            .pack("app")
1128            .file("bad.zz", "x")
1129            .done()
1130            .build();
1131
1132        let mut registry = PreprocessorRegistry::new();
1133        registry.register(Box::new(ScriptedPreprocessor {
1134            name: "scripted",
1135            extension: ".zz",
1136            outputs: vec![crate::preprocessing::ExpandedFile {
1137                relative_path: PathBuf::from("."),
1138                content: b"nope".to_vec(),
1139                is_dir: false,
1140            }],
1141        }));
1142
1143        let datastore = make_datastore(&env);
1144        let pack = make_pack("app", env.dotfiles_root.join("app"));
1145
1146        let entries = vec![PackEntry {
1147            relative_path: "bad.zz".into(),
1148            absolute_path: env.dotfiles_root.join("app/bad.zz"),
1149            is_dir: false,
1150        }];
1151
1152        let err =
1153            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
1154        assert!(
1155            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("empty output path")),
1156            "expected empty-path error, got: {err}"
1157        );
1158    }
1159
1160    #[test]
1161    fn curdir_prefixed_paths_collide_with_plain_paths() {
1162        // Two preprocessor outputs — one `./foo` and one `foo` — must
1163        // be treated as a collision. Before normalisation these lived
1164        // at distinct HashSet keys but the same datastore path, so the
1165        // second write silently clobbered the first.
1166        let env = TempEnvironment::builder()
1167            .pack("app")
1168            .file("bundle.zz", "x")
1169            .done()
1170            .build();
1171
1172        let mut registry = PreprocessorRegistry::new();
1173        registry.register(Box::new(ScriptedPreprocessor {
1174            name: "scripted",
1175            extension: ".zz",
1176            outputs: vec![
1177                crate::preprocessing::ExpandedFile {
1178                    relative_path: PathBuf::from("foo"),
1179                    content: b"first".to_vec(),
1180                    is_dir: false,
1181                },
1182                crate::preprocessing::ExpandedFile {
1183                    relative_path: PathBuf::from("./foo"),
1184                    content: b"second".to_vec(),
1185                    is_dir: false,
1186                },
1187            ],
1188        }));
1189
1190        let datastore = make_datastore(&env);
1191        let pack = make_pack("app", env.dotfiles_root.join("app"));
1192
1193        let entries = vec![PackEntry {
1194            relative_path: "bundle.zz".into(),
1195            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1196            is_dir: false,
1197        }];
1198
1199        let err =
1200            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
1201        assert!(
1202            matches!(err, DodotError::PreprocessorCollision { .. }),
1203            "expected PreprocessorCollision for ./foo vs foo, got: {err}"
1204        );
1205    }
1206
1207    #[test]
1208    fn virtual_entry_relative_path_is_normalized() {
1209        // When a preprocessor emits `./foo`, the resulting virtual entry
1210        // must carry a normalised relative path. Otherwise downstream
1211        // code (e.g. rule matching or status display) sees both shapes
1212        // and treats them as different files.
1213        let env = TempEnvironment::builder()
1214            .pack("app")
1215            .file("bundle.zz", "x")
1216            .done()
1217            .build();
1218
1219        let mut registry = PreprocessorRegistry::new();
1220        registry.register(Box::new(ScriptedPreprocessor {
1221            name: "scripted",
1222            extension: ".zz",
1223            outputs: vec![crate::preprocessing::ExpandedFile {
1224                relative_path: PathBuf::from("./nested/file.txt"),
1225                content: b"hi".to_vec(),
1226                is_dir: false,
1227            }],
1228        }));
1229
1230        let datastore = make_datastore(&env);
1231        let pack = make_pack("app", env.dotfiles_root.join("app"));
1232
1233        let entries = vec![PackEntry {
1234            relative_path: "bundle.zz".into(),
1235            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1236            is_dir: false,
1237        }];
1238
1239        let result =
1240            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
1241
1242        assert_eq!(result.virtual_entries.len(), 1);
1243        assert_eq!(
1244            result.virtual_entries[0].relative_path,
1245            PathBuf::from("nested/file.txt"),
1246            "CurDir components must be stripped from virtual entry"
1247        );
1248    }
1249}