Skip to main content

dodot_lib/preprocessing/
pipeline.rs

1//! Preprocessing pipeline — partitions, expands, and merges entries.
2//!
3//! This module contains the core pipeline function that runs between
4//! directory walking and rule matching. It identifies preprocessor files,
5//! expands them, writes results to the datastore, checks for collisions,
6//! and produces virtual entries for the handler pipeline.
7
8use std::collections::HashMap;
9use std::path::{Component, Path, PathBuf};
10
11use tracing::{debug, info};
12
13use crate::datastore::DataStore;
14use crate::fs::Fs;
15use crate::packs::Pack;
16use crate::preprocessing::PreprocessorRegistry;
17use crate::rules::PackEntry;
18use crate::{DodotError, Result};
19
20/// Validate that a preprocessor-produced path is safe to materialise in
21/// the datastore: relative, no root/prefix/parent-dir components, and
22/// not effectively empty.
23///
24/// Malicious or malformed preprocessor output (tar-slip, absolute paths,
25/// `..` segments) can escape the pack namespace and overwrite arbitrary
26/// files. Empty paths (or paths made up only of `.` components) are
27/// rejected because they would silently fail at the datastore layer with
28/// an opaque error — here we produce a clean diagnostic naming the
29/// preprocessor and source file.
30fn validate_safe_relative_path(path: &Path, preprocessor: &str, source_file: &Path) -> Result<()> {
31    let mut has_normal = false;
32    for component in path.components() {
33        match component {
34            Component::Normal(_) => has_normal = true,
35            Component::CurDir => {}
36            Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
37                return Err(DodotError::PreprocessorError {
38                    preprocessor: preprocessor.into(),
39                    source_file: source_file.to_path_buf(),
40                    message: format!(
41                        "unsafe path in preprocessor output: {} (absolute or contains `..`)",
42                        path.display()
43                    ),
44                });
45            }
46        }
47    }
48    if !has_normal {
49        return Err(DodotError::PreprocessorError {
50            preprocessor: preprocessor.into(),
51            source_file: source_file.to_path_buf(),
52            message: format!(
53                "preprocessor produced an empty output path (\"{}\"). This usually means a file like \
54                 `.tmpl` or `.identity` has no stem after stripping the preprocessor extension — \
55                 rename the source file so that it has a non-empty name after stripping.",
56                path.display()
57            ),
58        });
59    }
60    Ok(())
61}
62
63/// Normalise a validated relative path by dropping `CurDir` components,
64/// so that `./foo` and `foo` are treated as the same virtual path for
65/// collision detection. Only call after [`validate_safe_relative_path`].
66fn normalize_relative(path: &Path) -> PathBuf {
67    let mut out = PathBuf::new();
68    for component in path.components() {
69        if let Component::Normal(n) = component {
70            out.push(n);
71        }
72    }
73    out
74}
75
76/// The result of preprocessing a pack's file entries.
77#[derive(Debug)]
78pub struct PreprocessResult {
79    /// Entries that were NOT preprocessed (pass through unchanged).
80    pub regular_entries: Vec<PackEntry>,
81    /// Virtual entries created by preprocessing (point to datastore files).
82    pub virtual_entries: Vec<PackEntry>,
83    /// Maps virtual entry absolute_path → original source path in pack.
84    pub source_map: HashMap<PathBuf, PathBuf>,
85}
86
87impl PreprocessResult {
88    /// Create a passthrough result where all entries are regular (no preprocessing).
89    pub fn passthrough(entries: Vec<PackEntry>) -> Self {
90        Self {
91            regular_entries: entries,
92            virtual_entries: Vec::new(),
93            source_map: HashMap::new(),
94        }
95    }
96
97    /// Return all entries (regular + virtual) merged into one list, sorted by relative path.
98    pub fn merged_entries(&self) -> Vec<PackEntry> {
99        let mut all = Vec::with_capacity(self.regular_entries.len() + self.virtual_entries.len());
100        all.extend(self.regular_entries.iter().cloned());
101        all.extend(self.virtual_entries.iter().cloned());
102        all.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
103        all
104    }
105}
106
107/// The handler name used for preprocessor-expanded files in the datastore.
108const PREPROCESSED_HANDLER: &str = "preprocessed";
109
110/// Run the preprocessing pipeline for a pack's file entries.
111///
112/// 1. Partition entries into preprocessor files vs regular files.
113/// 2. For each preprocessor file: expand, write results to datastore.
114/// 3. Create virtual PackEntries pointing to the datastore files.
115/// 4. Check for collisions between virtual and regular entries.
116/// 5. Return the result for merging into the handler pipeline.
117pub fn preprocess_pack(
118    entries: Vec<PackEntry>,
119    registry: &PreprocessorRegistry,
120    pack: &Pack,
121    fs: &dyn Fs,
122    datastore: &dyn DataStore,
123) -> Result<PreprocessResult> {
124    let mut regular_entries = Vec::new();
125    let mut preprocessor_entries = Vec::new();
126
127    // Phase 1: Partition
128    for entry in entries {
129        let filename = entry
130            .relative_path
131            .file_name()
132            .map(|n| n.to_string_lossy().to_string())
133            .unwrap_or_default();
134
135        if !entry.is_dir && registry.is_preprocessor_file(&filename) {
136            preprocessor_entries.push(entry);
137        } else {
138            regular_entries.push(entry);
139        }
140    }
141
142    debug!(
143        pack = %pack.name,
144        preprocessor = preprocessor_entries.len(),
145        regular = regular_entries.len(),
146        "partitioned entries"
147    );
148
149    if preprocessor_entries.is_empty() {
150        return Ok(PreprocessResult {
151            regular_entries,
152            virtual_entries: Vec::new(),
153            source_map: HashMap::new(),
154        });
155    }
156
157    // Phase 2 & 3: Expand and create virtual entries
158    let mut virtual_entries = Vec::new();
159    let mut source_map = HashMap::new();
160
161    // Tracks claimed paths for collision detection. Seeded with regular
162    // entries; virtual entries are added as they're created so two
163    // preprocessors can't both produce the same virtual path (e.g.
164    // `config.toml.identity` and `config.toml.tmpl` both expanding to
165    // `config.toml`).
166    let mut claimed_paths: std::collections::HashSet<PathBuf> = regular_entries
167        .iter()
168        .map(|e| e.relative_path.clone())
169        .collect();
170
171    for entry in &preprocessor_entries {
172        let filename = entry
173            .relative_path
174            .file_name()
175            .map(|n| n.to_string_lossy().to_string())
176            .unwrap_or_default();
177
178        let preprocessor = registry
179            .find_for_file(&filename)
180            .expect("already checked in partition");
181
182        info!(
183            pack = %pack.name,
184            preprocessor = preprocessor.name(),
185            file = %filename,
186            "expanding"
187        );
188
189        // Expand the source file
190        let expanded_files = preprocessor.expand(&entry.absolute_path, fs)?;
191
192        for expanded in expanded_files {
193            // Reject unsafe paths from the preprocessor (tar-slip,
194            // absolute paths, parent-dir escapes) before any disk write.
195            validate_safe_relative_path(
196                &expanded.relative_path,
197                preprocessor.name(),
198                &entry.absolute_path,
199            )?;
200
201            // Compute the virtual relative path.
202            // If the source was in a subdirectory (e.g., "subdir/config.toml.identity"),
203            // the virtual entry should preserve the parent (e.g., "subdir/config.toml").
204            let virtual_relative = if let Some(parent) = entry.relative_path.parent() {
205                if parent == Path::new("") {
206                    expanded.relative_path.clone()
207                } else {
208                    parent.join(&expanded.relative_path)
209                }
210            } else {
211                expanded.relative_path.clone()
212            };
213
214            // Defense-in-depth: validate the joined path too (parent
215            // could only come from the pack scanner, but re-check).
216            validate_safe_relative_path(
217                &virtual_relative,
218                preprocessor.name(),
219                &entry.absolute_path,
220            )?;
221
222            // Normalise `./foo` and `foo` to the same canonical form, so
223            // that collision detection and downstream comparisons don't
224            // silently diverge from the datastore's own normalisation.
225            let virtual_relative = normalize_relative(&virtual_relative);
226
227            // Phase 4: Collision check (against both regular entries and
228            // previously-expanded virtual entries)
229            if claimed_paths.contains(&virtual_relative) {
230                return Err(DodotError::PreprocessorCollision {
231                    pack: pack.name.clone(),
232                    source_file: filename.clone(),
233                    expanded_name: virtual_relative.to_string_lossy().into_owned(),
234                });
235            }
236
237            // Write expanded content to datastore, preserving directory
238            // structure. Directories get mkdir'd; files get their content
239            // written. `write_rendered_file` creates any needed parent
240            // directories.
241            let datastore_path = if expanded.is_dir {
242                datastore.write_rendered_dir(
243                    &pack.name,
244                    PREPROCESSED_HANDLER,
245                    &virtual_relative.to_string_lossy(),
246                )?
247            } else {
248                datastore.write_rendered_file(
249                    &pack.name,
250                    PREPROCESSED_HANDLER,
251                    &virtual_relative.to_string_lossy(),
252                    &expanded.content,
253                )?
254            };
255
256            debug!(
257                pack = %pack.name,
258                virtual_path = %virtual_relative.display(),
259                datastore_path = %datastore_path.display(),
260                is_dir = expanded.is_dir,
261                "wrote expanded entry"
262            );
263
264            claimed_paths.insert(virtual_relative.clone());
265            source_map.insert(datastore_path.clone(), entry.absolute_path.clone());
266
267            virtual_entries.push(PackEntry {
268                relative_path: virtual_relative,
269                absolute_path: datastore_path,
270                is_dir: expanded.is_dir,
271            });
272        }
273    }
274
275    info!(
276        pack = %pack.name,
277        virtual_count = virtual_entries.len(),
278        "preprocessing complete"
279    );
280
281    Ok(PreprocessResult {
282        regular_entries,
283        virtual_entries,
284        source_map,
285    })
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291    use crate::datastore::FilesystemDataStore;
292    use crate::handlers::HandlerConfig;
293    use crate::preprocessing::identity::IdentityPreprocessor;
294    use crate::testing::TempEnvironment;
295    use std::sync::Arc;
296
297    fn make_pack(name: &str, path: PathBuf) -> Pack {
298        Pack {
299            name: name.into(),
300            path,
301            config: HandlerConfig::default(),
302        }
303    }
304
305    fn make_registry() -> PreprocessorRegistry {
306        let mut registry = PreprocessorRegistry::new();
307        registry.register(Box::new(IdentityPreprocessor::new()));
308        registry
309    }
310
311    fn make_datastore(env: &TempEnvironment) -> FilesystemDataStore {
312        let runner = Arc::new(crate::datastore::ShellCommandRunner);
313        FilesystemDataStore::new(env.fs.clone(), env.paths.clone(), runner)
314    }
315
316    #[test]
317    fn passthrough_when_no_preprocessor_files() {
318        let env = TempEnvironment::builder()
319            .pack("vim")
320            .file("vimrc", "set nocompatible")
321            .file("gvimrc", "set guifont=Mono")
322            .done()
323            .build();
324
325        let registry = make_registry();
326        let datastore = make_datastore(&env);
327        let pack = make_pack("vim", env.dotfiles_root.join("vim"));
328
329        let entries = vec![
330            PackEntry {
331                relative_path: "vimrc".into(),
332                absolute_path: env.dotfiles_root.join("vim/vimrc"),
333                is_dir: false,
334            },
335            PackEntry {
336                relative_path: "gvimrc".into(),
337                absolute_path: env.dotfiles_root.join("vim/gvimrc"),
338                is_dir: false,
339            },
340        ];
341
342        let result =
343            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
344
345        assert_eq!(result.regular_entries.len(), 2);
346        assert!(result.virtual_entries.is_empty());
347        assert!(result.source_map.is_empty());
348    }
349
350    #[test]
351    fn identity_preprocessor_creates_virtual_entry() {
352        let env = TempEnvironment::builder()
353            .pack("app")
354            .file("config.toml.identity", "host = localhost")
355            .done()
356            .build();
357
358        let registry = make_registry();
359        let datastore = make_datastore(&env);
360        let pack = make_pack("app", env.dotfiles_root.join("app"));
361
362        let entries = vec![PackEntry {
363            relative_path: "config.toml.identity".into(),
364            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
365            is_dir: false,
366        }];
367
368        let result =
369            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
370
371        assert!(result.regular_entries.is_empty());
372        assert_eq!(result.virtual_entries.len(), 1);
373
374        let virtual_entry = &result.virtual_entries[0];
375        assert_eq!(virtual_entry.relative_path, PathBuf::from("config.toml"));
376        assert!(!virtual_entry.is_dir);
377
378        // Verify the file was written to the datastore
379        let content = env.fs.read_to_string(&virtual_entry.absolute_path).unwrap();
380        assert_eq!(content, "host = localhost");
381
382        // Verify source map
383        assert_eq!(
384            result.source_map[&virtual_entry.absolute_path],
385            env.dotfiles_root.join("app/config.toml.identity")
386        );
387    }
388
389    #[test]
390    fn mixed_pack_partitions_correctly() {
391        let env = TempEnvironment::builder()
392            .pack("app")
393            .file("config.toml.identity", "host = localhost")
394            .file("readme.txt", "hello")
395            .done()
396            .build();
397
398        let registry = make_registry();
399        let datastore = make_datastore(&env);
400        let pack = make_pack("app", env.dotfiles_root.join("app"));
401
402        let entries = vec![
403            PackEntry {
404                relative_path: "config.toml.identity".into(),
405                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
406                is_dir: false,
407            },
408            PackEntry {
409                relative_path: "readme.txt".into(),
410                absolute_path: env.dotfiles_root.join("app/readme.txt"),
411                is_dir: false,
412            },
413        ];
414
415        let result =
416            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
417
418        assert_eq!(result.regular_entries.len(), 1);
419        assert_eq!(
420            result.regular_entries[0].relative_path,
421            PathBuf::from("readme.txt")
422        );
423
424        assert_eq!(result.virtual_entries.len(), 1);
425        assert_eq!(
426            result.virtual_entries[0].relative_path,
427            PathBuf::from("config.toml")
428        );
429    }
430
431    #[test]
432    fn collision_detection_rejects_conflict() {
433        let env = TempEnvironment::builder()
434            .pack("app")
435            .file("config.toml.identity", "preprocessed")
436            .file("config.toml", "regular")
437            .done()
438            .build();
439
440        let registry = make_registry();
441        let datastore = make_datastore(&env);
442        let pack = make_pack("app", env.dotfiles_root.join("app"));
443
444        let entries = vec![
445            PackEntry {
446                relative_path: "config.toml.identity".into(),
447                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
448                is_dir: false,
449            },
450            PackEntry {
451                relative_path: "config.toml".into(),
452                absolute_path: env.dotfiles_root.join("app/config.toml"),
453                is_dir: false,
454            },
455        ];
456
457        let err =
458            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
459        assert!(
460            matches!(err, DodotError::PreprocessorCollision { .. }),
461            "expected PreprocessorCollision, got: {err}"
462        );
463    }
464
465    #[test]
466    fn merged_entries_combines_and_sorts() {
467        let result = PreprocessResult {
468            regular_entries: vec![PackEntry {
469                relative_path: "zebra".into(),
470                absolute_path: "/z".into(),
471                is_dir: false,
472            }],
473            virtual_entries: vec![PackEntry {
474                relative_path: "alpha".into(),
475                absolute_path: "/a".into(),
476                is_dir: false,
477            }],
478            source_map: HashMap::new(),
479        };
480
481        let merged = result.merged_entries();
482        assert_eq!(merged.len(), 2);
483        assert_eq!(merged[0].relative_path, PathBuf::from("alpha"));
484        assert_eq!(merged[1].relative_path, PathBuf::from("zebra"));
485    }
486
487    #[test]
488    fn empty_registry_passes_all_through() {
489        let env = TempEnvironment::builder()
490            .pack("app")
491            .file("config.toml.identity", "content")
492            .done()
493            .build();
494
495        let registry = PreprocessorRegistry::new(); // empty!
496        let datastore = make_datastore(&env);
497        let pack = make_pack("app", env.dotfiles_root.join("app"));
498
499        let entries = vec![PackEntry {
500            relative_path: "config.toml.identity".into(),
501            absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
502            is_dir: false,
503        }];
504
505        let result =
506            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
507
508        // With no preprocessors registered, the file is treated as regular
509        assert_eq!(result.regular_entries.len(), 1);
510        assert!(result.virtual_entries.is_empty());
511    }
512
513    #[test]
514    fn directories_are_never_preprocessed() {
515        let env = TempEnvironment::builder()
516            .pack("app")
517            .file("bin.identity/tool", "#!/bin/sh")
518            .done()
519            .build();
520
521        let registry = make_registry();
522        let datastore = make_datastore(&env);
523        let pack = make_pack("app", env.dotfiles_root.join("app"));
524
525        let entries = vec![PackEntry {
526            relative_path: "bin.identity".into(),
527            absolute_path: env.dotfiles_root.join("app/bin.identity"),
528            is_dir: true, // directory — should NOT be preprocessed
529        }];
530
531        let result =
532            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
533
534        assert_eq!(result.regular_entries.len(), 1);
535        assert!(result.virtual_entries.is_empty());
536    }
537
538    #[test]
539    fn subdirectory_preprocessor_file_preserves_parent() {
540        let env = TempEnvironment::builder()
541            .pack("app")
542            .file("subdir/config.toml.identity", "nested content")
543            .done()
544            .build();
545
546        let registry = make_registry();
547        let datastore = make_datastore(&env);
548        let pack = make_pack("app", env.dotfiles_root.join("app"));
549
550        let entries = vec![PackEntry {
551            relative_path: "subdir/config.toml.identity".into(),
552            absolute_path: env.dotfiles_root.join("app/subdir/config.toml.identity"),
553            is_dir: false,
554        }];
555
556        let result =
557            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
558
559        assert_eq!(result.virtual_entries.len(), 1);
560        assert_eq!(
561            result.virtual_entries[0].relative_path,
562            PathBuf::from("subdir/config.toml")
563        );
564    }
565
566    #[test]
567    fn multiple_preprocessor_files_in_one_pack() {
568        let env = TempEnvironment::builder()
569            .pack("app")
570            .file("config.toml.identity", "config content")
571            .file("settings.json.identity", "settings content")
572            .done()
573            .build();
574
575        let registry = make_registry();
576        let datastore = make_datastore(&env);
577        let pack = make_pack("app", env.dotfiles_root.join("app"));
578
579        let entries = vec![
580            PackEntry {
581                relative_path: "config.toml.identity".into(),
582                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
583                is_dir: false,
584            },
585            PackEntry {
586                relative_path: "settings.json.identity".into(),
587                absolute_path: env.dotfiles_root.join("app/settings.json.identity"),
588                is_dir: false,
589            },
590        ];
591
592        let result =
593            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
594
595        assert!(result.regular_entries.is_empty());
596        assert_eq!(result.virtual_entries.len(), 2);
597
598        let names: Vec<String> = result
599            .virtual_entries
600            .iter()
601            .map(|e| e.relative_path.to_string_lossy().to_string())
602            .collect();
603        assert!(names.contains(&"config.toml".to_string()));
604        assert!(names.contains(&"settings.json".to_string()));
605
606        // Each should have a source_map entry
607        assert_eq!(result.source_map.len(), 2);
608    }
609
610    #[test]
611    fn pack_with_only_preprocessor_files() {
612        let env = TempEnvironment::builder()
613            .pack("app")
614            .file("only.conf.identity", "the only file")
615            .done()
616            .build();
617
618        let registry = make_registry();
619        let datastore = make_datastore(&env);
620        let pack = make_pack("app", env.dotfiles_root.join("app"));
621
622        let entries = vec![PackEntry {
623            relative_path: "only.conf.identity".into(),
624            absolute_path: env.dotfiles_root.join("app/only.conf.identity"),
625            is_dir: false,
626        }];
627
628        let result =
629            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
630
631        assert!(result.regular_entries.is_empty());
632        assert_eq!(result.virtual_entries.len(), 1);
633        assert_eq!(result.merged_entries().len(), 1);
634    }
635
636    #[test]
637    fn source_map_is_complete() {
638        let env = TempEnvironment::builder()
639            .pack("app")
640            .file("a.conf.identity", "aaa")
641            .file("b.conf.identity", "bbb")
642            .file("regular.txt", "ccc")
643            .done()
644            .build();
645
646        let registry = make_registry();
647        let datastore = make_datastore(&env);
648        let pack = make_pack("app", env.dotfiles_root.join("app"));
649
650        let entries = vec![
651            PackEntry {
652                relative_path: "a.conf.identity".into(),
653                absolute_path: env.dotfiles_root.join("app/a.conf.identity"),
654                is_dir: false,
655            },
656            PackEntry {
657                relative_path: "b.conf.identity".into(),
658                absolute_path: env.dotfiles_root.join("app/b.conf.identity"),
659                is_dir: false,
660            },
661            PackEntry {
662                relative_path: "regular.txt".into(),
663                absolute_path: env.dotfiles_root.join("app/regular.txt"),
664                is_dir: false,
665            },
666        ];
667
668        let result =
669            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
670
671        // Every virtual entry must have a source_map entry
672        for ve in &result.virtual_entries {
673            assert!(
674                result.source_map.contains_key(&ve.absolute_path),
675                "virtual entry {} has no source_map entry",
676                ve.absolute_path.display()
677            );
678        }
679        // No regular entries in the source_map
680        for re in &result.regular_entries {
681            assert!(
682                !result.source_map.contains_key(&re.absolute_path),
683                "regular entry {} should not be in source_map",
684                re.absolute_path.display()
685            );
686        }
687    }
688
689    #[test]
690    fn preprocessing_is_idempotent() {
691        let env = TempEnvironment::builder()
692            .pack("app")
693            .file("config.toml.identity", "content")
694            .done()
695            .build();
696
697        let registry = make_registry();
698        let datastore = make_datastore(&env);
699        let pack = make_pack("app", env.dotfiles_root.join("app"));
700
701        let make_entries = || {
702            vec![PackEntry {
703                relative_path: "config.toml.identity".into(),
704                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
705                is_dir: false,
706            }]
707        };
708
709        let result1 = preprocess_pack(
710            make_entries(),
711            &registry,
712            &pack,
713            env.fs.as_ref(),
714            &datastore,
715        )
716        .unwrap();
717        let result2 = preprocess_pack(
718            make_entries(),
719            &registry,
720            &pack,
721            env.fs.as_ref(),
722            &datastore,
723        )
724        .unwrap();
725
726        assert_eq!(result1.virtual_entries.len(), result2.virtual_entries.len());
727        assert_eq!(
728            result1.virtual_entries[0].relative_path,
729            result2.virtual_entries[0].relative_path
730        );
731
732        // Datastore file should be the same content
733        let content1 = env
734            .fs
735            .read_to_string(&result1.virtual_entries[0].absolute_path)
736            .unwrap();
737        let content2 = env
738            .fs
739            .read_to_string(&result2.virtual_entries[0].absolute_path)
740            .unwrap();
741        assert_eq!(content1, content2);
742    }
743
744    #[test]
745    fn expansion_error_propagates() {
746        let env = TempEnvironment::builder()
747            .pack("app")
748            .file("placeholder", "")
749            .done()
750            .build();
751
752        let registry = make_registry();
753        let datastore = make_datastore(&env);
754        let pack = make_pack("app", env.dotfiles_root.join("app"));
755
756        // Point to a file that doesn't exist — expansion should fail
757        let entries = vec![PackEntry {
758            relative_path: "missing.conf.identity".into(),
759            absolute_path: env.dotfiles_root.join("app/missing.conf.identity"),
760            is_dir: false,
761        }];
762
763        let err =
764            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
765        assert!(
766            matches!(err, DodotError::Fs { .. }),
767            "expected Fs error for missing file, got: {err}"
768        );
769    }
770
771    #[test]
772    fn inter_preprocessor_collision_detected() {
773        // Two preprocessors produce the same logical name.
774        // Set up: `config.toml.identity` and `config.toml.other` (custom
775        // extension) both strip to `config.toml`. The pipeline must
776        // detect this and refuse rather than silently overwriting.
777        let env = TempEnvironment::builder()
778            .pack("app")
779            .file("config.toml.identity", "a")
780            .file("config.toml.other", "b")
781            .done()
782            .build();
783
784        let mut registry = PreprocessorRegistry::new();
785        registry.register(Box::new(IdentityPreprocessor::new()));
786        registry.register(Box::new(IdentityPreprocessor::with_extension("other")));
787
788        let datastore = make_datastore(&env);
789        let pack = make_pack("app", env.dotfiles_root.join("app"));
790
791        let entries = vec![
792            PackEntry {
793                relative_path: "config.toml.identity".into(),
794                absolute_path: env.dotfiles_root.join("app/config.toml.identity"),
795                is_dir: false,
796            },
797            PackEntry {
798                relative_path: "config.toml.other".into(),
799                absolute_path: env.dotfiles_root.join("app/config.toml.other"),
800                is_dir: false,
801            },
802        ];
803
804        let err =
805            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
806        assert!(
807            matches!(err, DodotError::PreprocessorCollision { .. }),
808            "expected PreprocessorCollision for inter-preprocessor clash, got: {err}"
809        );
810    }
811
812    #[test]
813    fn datastore_preserves_directory_structure() {
814        // Preprocessor files in subdirectories should land in matching
815        // subdirectories under the datastore, not be flattened with `__`.
816        let env = TempEnvironment::builder()
817            .pack("app")
818            .file("sub/config.toml.identity", "nested")
819            .done()
820            .build();
821
822        let registry = make_registry();
823        let datastore = make_datastore(&env);
824        let pack = make_pack("app", env.dotfiles_root.join("app"));
825
826        let entries = vec![PackEntry {
827            relative_path: "sub/config.toml.identity".into(),
828            absolute_path: env.dotfiles_root.join("app/sub/config.toml.identity"),
829            is_dir: false,
830        }];
831
832        let result =
833            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
834
835        assert_eq!(result.virtual_entries.len(), 1);
836        let datastore_path = &result.virtual_entries[0].absolute_path;
837
838        // The datastore path should contain the subdirectory structure, not flattened
839        let ds_str = datastore_path.to_string_lossy();
840        assert!(
841            ds_str.contains("sub/config.toml"),
842            "datastore path should preserve directory structure, got: {ds_str}"
843        );
844        assert!(
845            !ds_str.contains("__"),
846            "datastore path should not contain flattening separator, got: {ds_str}"
847        );
848
849        // File should actually exist at that path
850        assert!(env.fs.exists(datastore_path));
851        let content = env.fs.read_to_string(datastore_path).unwrap();
852        assert_eq!(content, "nested");
853    }
854
855    #[test]
856    fn datastore_distinguishes_sibling_from_flattened_name() {
857        // Regression test for the flatten-with-`__` edge case: a user could
858        // have `a/b.txt` and `a__b.txt` both as preprocessor outputs, which
859        // would have collided under the old flattening scheme. With
860        // directory-preserving storage they live in distinct datastore paths.
861        let env = TempEnvironment::builder()
862            .pack("app")
863            .file("a/b.txt.identity", "nested")
864            .file("a__b.txt.identity", "flat")
865            .done()
866            .build();
867
868        let registry = make_registry();
869        let datastore = make_datastore(&env);
870        let pack = make_pack("app", env.dotfiles_root.join("app"));
871
872        let entries = vec![
873            PackEntry {
874                relative_path: "a/b.txt.identity".into(),
875                absolute_path: env.dotfiles_root.join("app/a/b.txt.identity"),
876                is_dir: false,
877            },
878            PackEntry {
879                relative_path: "a__b.txt.identity".into(),
880                absolute_path: env.dotfiles_root.join("app/a__b.txt.identity"),
881                is_dir: false,
882            },
883        ];
884
885        let result =
886            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
887
888        assert_eq!(result.virtual_entries.len(), 2);
889
890        // Both files must exist with distinct content
891        let nested = result
892            .virtual_entries
893            .iter()
894            .find(|e| e.relative_path == std::path::Path::new("a/b.txt"))
895            .expect("nested entry");
896        let flat = result
897            .virtual_entries
898            .iter()
899            .find(|e| e.relative_path == std::path::Path::new("a__b.txt"))
900            .expect("flat entry");
901
902        assert_ne!(nested.absolute_path, flat.absolute_path);
903        assert_eq!(
904            env.fs.read_to_string(&nested.absolute_path).unwrap(),
905            "nested"
906        );
907        assert_eq!(env.fs.read_to_string(&flat.absolute_path).unwrap(), "flat");
908    }
909
910    // ── Path-traversal defenses ─────────────────────────────────
911
912    /// Test-only preprocessor that emits a configurable set of
913    /// [`crate::preprocessing::ExpandedFile`]s — lets tests inject
914    /// unsafe paths or directory entries without needing a real archive.
915    struct ScriptedPreprocessor {
916        name: &'static str,
917        extension: &'static str,
918        outputs: Vec<crate::preprocessing::ExpandedFile>,
919    }
920
921    impl crate::preprocessing::Preprocessor for ScriptedPreprocessor {
922        fn name(&self) -> &str {
923            self.name
924        }
925        fn transform_type(&self) -> crate::preprocessing::TransformType {
926            crate::preprocessing::TransformType::Opaque
927        }
928        fn matches_extension(&self, filename: &str) -> bool {
929            filename.ends_with(self.extension)
930        }
931        fn stripped_name(&self, filename: &str) -> String {
932            filename
933                .strip_suffix(self.extension)
934                .unwrap_or(filename)
935                .to_string()
936        }
937        fn expand(
938            &self,
939            _source: &Path,
940            _fs: &dyn Fs,
941        ) -> Result<Vec<crate::preprocessing::ExpandedFile>> {
942            Ok(self.outputs.clone())
943        }
944    }
945
946    #[test]
947    fn rejects_absolute_path_from_preprocessor() {
948        let env = TempEnvironment::builder()
949            .pack("app")
950            .file("bad.evil", "x")
951            .done()
952            .build();
953
954        let mut registry = PreprocessorRegistry::new();
955        registry.register(Box::new(ScriptedPreprocessor {
956            name: "evil",
957            extension: ".evil",
958            outputs: vec![crate::preprocessing::ExpandedFile {
959                relative_path: PathBuf::from("/etc/passwd"),
960                content: b"pwn".to_vec(),
961                is_dir: false,
962            }],
963        }));
964
965        let datastore = make_datastore(&env);
966        let pack = make_pack("app", env.dotfiles_root.join("app"));
967
968        let entries = vec![PackEntry {
969            relative_path: "bad.evil".into(),
970            absolute_path: env.dotfiles_root.join("app/bad.evil"),
971            is_dir: false,
972        }];
973
974        let err =
975            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
976        assert!(
977            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe path")),
978            "expected unsafe-path error, got: {err}"
979        );
980        // Verify the malicious target was not written
981        assert!(!std::path::Path::new("/etc/passwd.dodot-would-have-written-here").exists());
982    }
983
984    #[test]
985    fn rejects_parent_dir_escape_from_preprocessor() {
986        let env = TempEnvironment::builder()
987            .pack("app")
988            .file("bad.evil", "x")
989            .done()
990            .build();
991
992        let mut registry = PreprocessorRegistry::new();
993        registry.register(Box::new(ScriptedPreprocessor {
994            name: "evil",
995            extension: ".evil",
996            outputs: vec![crate::preprocessing::ExpandedFile {
997                relative_path: PathBuf::from("../../escape.txt"),
998                content: b"pwn".to_vec(),
999                is_dir: false,
1000            }],
1001        }));
1002
1003        let datastore = make_datastore(&env);
1004        let pack = make_pack("app", env.dotfiles_root.join("app"));
1005
1006        let entries = vec![PackEntry {
1007            relative_path: "bad.evil".into(),
1008            absolute_path: env.dotfiles_root.join("app/bad.evil"),
1009            is_dir: false,
1010        }];
1011
1012        let err =
1013            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
1014        assert!(
1015            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe path")),
1016            "expected unsafe-path error, got: {err}"
1017        );
1018    }
1019
1020    #[test]
1021    fn directory_entry_is_mkdird_not_written_as_file() {
1022        // A preprocessor emits a directory marker followed by a file
1023        // inside it. The pipeline must mkdir the directory rather than
1024        // writing a file at the directory path (which would break the
1025        // subsequent nested file write).
1026        let env = TempEnvironment::builder()
1027            .pack("app")
1028            .file("bundle.zz", "x")
1029            .done()
1030            .build();
1031
1032        let mut registry = PreprocessorRegistry::new();
1033        registry.register(Box::new(ScriptedPreprocessor {
1034            name: "scripted",
1035            extension: ".zz",
1036            outputs: vec![
1037                crate::preprocessing::ExpandedFile {
1038                    relative_path: PathBuf::from("sub"),
1039                    content: Vec::new(),
1040                    is_dir: true,
1041                },
1042                crate::preprocessing::ExpandedFile {
1043                    relative_path: PathBuf::from("sub/nested.txt"),
1044                    content: b"hello".to_vec(),
1045                    is_dir: false,
1046                },
1047            ],
1048        }));
1049
1050        let datastore = make_datastore(&env);
1051        let pack = make_pack("app", env.dotfiles_root.join("app"));
1052
1053        let entries = vec![PackEntry {
1054            relative_path: "bundle.zz".into(),
1055            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1056            is_dir: false,
1057        }];
1058
1059        let result =
1060            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
1061
1062        assert_eq!(result.virtual_entries.len(), 2);
1063
1064        let dir_entry = result
1065            .virtual_entries
1066            .iter()
1067            .find(|e| e.is_dir)
1068            .expect("directory entry");
1069        assert!(
1070            env.fs.is_dir(&dir_entry.absolute_path),
1071            "directory entry should be a real directory: {}",
1072            dir_entry.absolute_path.display()
1073        );
1074
1075        let file_entry = result
1076            .virtual_entries
1077            .iter()
1078            .find(|e| !e.is_dir)
1079            .expect("file entry");
1080        assert_eq!(
1081            env.fs.read_to_string(&file_entry.absolute_path).unwrap(),
1082            "hello"
1083        );
1084    }
1085
1086    #[test]
1087    fn rejects_empty_path_from_preprocessor() {
1088        // A preprocessor that produces an empty relative_path (e.g. a
1089        // template file named literally `.tmpl` whose stripped name is
1090        // empty) must be rejected with a clean PreprocessorError, not
1091        // cascaded to the datastore's opaque "empty datastore path"
1092        // message.
1093        let env = TempEnvironment::builder()
1094            .pack("app")
1095            .file("bad.zz", "x")
1096            .done()
1097            .build();
1098
1099        let mut registry = PreprocessorRegistry::new();
1100        registry.register(Box::new(ScriptedPreprocessor {
1101            name: "scripted",
1102            extension: ".zz",
1103            outputs: vec![crate::preprocessing::ExpandedFile {
1104                relative_path: PathBuf::from(""),
1105                content: b"nope".to_vec(),
1106                is_dir: false,
1107            }],
1108        }));
1109
1110        let datastore = make_datastore(&env);
1111        let pack = make_pack("app", env.dotfiles_root.join("app"));
1112
1113        let entries = vec![PackEntry {
1114            relative_path: "bad.zz".into(),
1115            absolute_path: env.dotfiles_root.join("app/bad.zz"),
1116            is_dir: false,
1117        }];
1118
1119        let err =
1120            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
1121        assert!(
1122            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("empty output path")),
1123            "expected empty-path error, got: {err}"
1124        );
1125    }
1126
1127    #[test]
1128    fn rejects_curdir_only_path_from_preprocessor() {
1129        // `./` or `.` alone normalises to empty — same rejection.
1130        let env = TempEnvironment::builder()
1131            .pack("app")
1132            .file("bad.zz", "x")
1133            .done()
1134            .build();
1135
1136        let mut registry = PreprocessorRegistry::new();
1137        registry.register(Box::new(ScriptedPreprocessor {
1138            name: "scripted",
1139            extension: ".zz",
1140            outputs: vec![crate::preprocessing::ExpandedFile {
1141                relative_path: PathBuf::from("."),
1142                content: b"nope".to_vec(),
1143                is_dir: false,
1144            }],
1145        }));
1146
1147        let datastore = make_datastore(&env);
1148        let pack = make_pack("app", env.dotfiles_root.join("app"));
1149
1150        let entries = vec![PackEntry {
1151            relative_path: "bad.zz".into(),
1152            absolute_path: env.dotfiles_root.join("app/bad.zz"),
1153            is_dir: false,
1154        }];
1155
1156        let err =
1157            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
1158        assert!(
1159            matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("empty output path")),
1160            "expected empty-path error, got: {err}"
1161        );
1162    }
1163
1164    #[test]
1165    fn curdir_prefixed_paths_collide_with_plain_paths() {
1166        // Two preprocessor outputs — one `./foo` and one `foo` — must
1167        // be treated as a collision. Before normalisation these lived
1168        // at distinct HashSet keys but the same datastore path, so the
1169        // second write silently clobbered the first.
1170        let env = TempEnvironment::builder()
1171            .pack("app")
1172            .file("bundle.zz", "x")
1173            .done()
1174            .build();
1175
1176        let mut registry = PreprocessorRegistry::new();
1177        registry.register(Box::new(ScriptedPreprocessor {
1178            name: "scripted",
1179            extension: ".zz",
1180            outputs: vec![
1181                crate::preprocessing::ExpandedFile {
1182                    relative_path: PathBuf::from("foo"),
1183                    content: b"first".to_vec(),
1184                    is_dir: false,
1185                },
1186                crate::preprocessing::ExpandedFile {
1187                    relative_path: PathBuf::from("./foo"),
1188                    content: b"second".to_vec(),
1189                    is_dir: false,
1190                },
1191            ],
1192        }));
1193
1194        let datastore = make_datastore(&env);
1195        let pack = make_pack("app", env.dotfiles_root.join("app"));
1196
1197        let entries = vec![PackEntry {
1198            relative_path: "bundle.zz".into(),
1199            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1200            is_dir: false,
1201        }];
1202
1203        let err =
1204            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap_err();
1205        assert!(
1206            matches!(err, DodotError::PreprocessorCollision { .. }),
1207            "expected PreprocessorCollision for ./foo vs foo, got: {err}"
1208        );
1209    }
1210
1211    #[test]
1212    fn virtual_entry_relative_path_is_normalized() {
1213        // When a preprocessor emits `./foo`, the resulting virtual entry
1214        // must carry a normalised relative path. Otherwise downstream
1215        // code (e.g. rule matching or status display) sees both shapes
1216        // and treats them as different files.
1217        let env = TempEnvironment::builder()
1218            .pack("app")
1219            .file("bundle.zz", "x")
1220            .done()
1221            .build();
1222
1223        let mut registry = PreprocessorRegistry::new();
1224        registry.register(Box::new(ScriptedPreprocessor {
1225            name: "scripted",
1226            extension: ".zz",
1227            outputs: vec![crate::preprocessing::ExpandedFile {
1228                relative_path: PathBuf::from("./nested/file.txt"),
1229                content: b"hi".to_vec(),
1230                is_dir: false,
1231            }],
1232        }));
1233
1234        let datastore = make_datastore(&env);
1235        let pack = make_pack("app", env.dotfiles_root.join("app"));
1236
1237        let entries = vec![PackEntry {
1238            relative_path: "bundle.zz".into(),
1239            absolute_path: env.dotfiles_root.join("app/bundle.zz"),
1240            is_dir: false,
1241        }];
1242
1243        let result =
1244            preprocess_pack(entries, &registry, &pack, env.fs.as_ref(), &datastore).unwrap();
1245
1246        assert_eq!(result.virtual_entries.len(), 1);
1247        assert_eq!(
1248            result.virtual_entries[0].relative_path,
1249            PathBuf::from("nested/file.txt"),
1250            "CurDir components must be stripped from virtual entry"
1251        );
1252    }
1253}