1use std::io::Read;
10use std::path::{Component, Path};
11
12use crate::fs::Fs;
13use crate::preprocessing::{ExpandedFile, Preprocessor, TransformType};
14use crate::{DodotError, Result};
15
16fn entry_path_is_safe(path: &Path) -> bool {
20 for component in path.components() {
21 match component {
22 Component::Normal(_) | Component::CurDir => {}
23 Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
24 return false;
25 }
26 }
27 }
28 true
29}
30
31pub struct UnarchivePreprocessor;
33
34impl UnarchivePreprocessor {
35 pub fn new() -> Self {
36 Self
37 }
38}
39
40impl Default for UnarchivePreprocessor {
41 fn default() -> Self {
42 Self::new()
43 }
44}
45
46impl Preprocessor for UnarchivePreprocessor {
47 fn name(&self) -> &str {
48 "unarchive"
49 }
50
51 fn transform_type(&self) -> TransformType {
52 TransformType::Opaque
53 }
54
55 fn matches_extension(&self, filename: &str) -> bool {
56 filename.ends_with(".tar.gz")
57 }
58
59 fn stripped_name(&self, filename: &str) -> String {
60 filename
61 .strip_suffix(".tar.gz")
62 .unwrap_or(filename)
63 .to_string()
64 }
65
66 fn expand(&self, source: &Path, fs: &dyn Fs) -> Result<Vec<ExpandedFile>> {
67 let reader = fs.open_read(source)?;
68 let gz = flate2::read::GzDecoder::new(reader);
69 let mut archive = tar::Archive::new(gz);
70
71 let mut expanded = Vec::new();
72
73 let entries = archive
74 .entries()
75 .map_err(|e| DodotError::PreprocessorError {
76 preprocessor: "unarchive".into(),
77 source_file: source.to_path_buf(),
78 message: format!("failed to read archive entries: {e}"),
79 })?;
80
81 for entry_result in entries {
82 let mut entry = entry_result.map_err(|e| DodotError::PreprocessorError {
83 preprocessor: "unarchive".into(),
84 source_file: source.to_path_buf(),
85 message: format!("failed to read archive entry: {e}"),
86 })?;
87
88 let entry_path = entry
89 .path()
90 .map_err(|e| DodotError::PreprocessorError {
91 preprocessor: "unarchive".into(),
92 source_file: source.to_path_buf(),
93 message: format!("invalid path in archive: {e}"),
94 })?
95 .into_owned();
96
97 if !entry_path_is_safe(&entry_path) {
99 return Err(DodotError::PreprocessorError {
100 preprocessor: "unarchive".into(),
101 source_file: source.to_path_buf(),
102 message: format!(
103 "unsafe entry path in archive: {} (absolute or contains `..`)",
104 entry_path.display()
105 ),
106 });
107 }
108
109 let entry_type = entry.header().entry_type();
114 if entry_type.is_dir() {
115 expanded.push(ExpandedFile {
116 relative_path: entry_path,
117 content: Vec::new(),
118 is_dir: true,
119 tracked_render: None,
120 context_hash: None,
121 });
122 } else if entry_type.is_file() {
123 let mut content = Vec::new();
124 entry
125 .read_to_end(&mut content)
126 .map_err(|e| DodotError::PreprocessorError {
127 preprocessor: "unarchive".into(),
128 source_file: source.to_path_buf(),
129 message: format!("failed to read entry content: {e}"),
130 })?;
131
132 expanded.push(ExpandedFile {
133 relative_path: entry_path,
134 content,
135 is_dir: false,
136 tracked_render: None,
137 context_hash: None,
138 });
139 } else {
140 return Err(DodotError::PreprocessorError {
141 preprocessor: "unarchive".into(),
142 source_file: source.to_path_buf(),
143 message: format!(
144 "unsupported tar entry type {:?} for {} (only regular files and directories are allowed)",
145 entry_type,
146 entry_path.display()
147 ),
148 });
149 }
150 }
151
152 Ok(expanded)
153 }
154}
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159
160 #[test]
161 fn matches_tar_gz_extension() {
162 let pp = UnarchivePreprocessor::new();
163 assert!(pp.matches_extension("bin.tar.gz"));
164 assert!(pp.matches_extension("tools.tar.gz"));
165 assert!(!pp.matches_extension("file.tar"));
166 assert!(!pp.matches_extension("file.gz"));
167 assert!(!pp.matches_extension("file.zip"));
168 assert!(!pp.matches_extension("tar.gz")); }
170
171 #[test]
172 fn stripped_name_removes_extension() {
173 let pp = UnarchivePreprocessor::new();
174 assert_eq!(pp.stripped_name("bin.tar.gz"), "bin");
175 assert_eq!(pp.stripped_name("my-tools.tar.gz"), "my-tools");
176 assert_eq!(pp.stripped_name("nested.dir.tar.gz"), "nested.dir");
177 }
178
179 #[test]
180 fn trait_properties() {
181 let pp = UnarchivePreprocessor::new();
182 assert_eq!(pp.name(), "unarchive");
183 assert_eq!(pp.transform_type(), TransformType::Opaque);
184 }
185
186 #[test]
187 fn expand_extracts_tar_gz() {
188 use flate2::write::GzEncoder;
189 use flate2::Compression;
190
191 let env = crate::testing::TempEnvironment::builder()
192 .pack("tools")
193 .file("placeholder", "")
194 .done()
195 .build();
196
197 let archive_path = env.dotfiles_root.join("tools/bin.tar.gz");
199 let file = std::fs::File::create(&archive_path).unwrap();
200 let enc = GzEncoder::new(file, Compression::default());
201 let mut builder = tar::Builder::new(enc);
202
203 let content = b"#!/bin/sh\necho hello";
205 let mut header = tar::Header::new_gnu();
206 header.set_path("mytool").unwrap();
207 header.set_size(content.len() as u64);
208 header.set_mode(0o755);
209 header.set_cksum();
210 builder.append(&header, &content[..]).unwrap();
211
212 let content2 = b"#!/bin/sh\necho world";
214 let mut header2 = tar::Header::new_gnu();
215 header2.set_path("other-tool").unwrap();
216 header2.set_size(content2.len() as u64);
217 header2.set_mode(0o755);
218 header2.set_cksum();
219 builder.append(&header2, &content2[..]).unwrap();
220
221 let enc = builder.into_inner().unwrap();
222 enc.finish().unwrap();
223
224 let pp = UnarchivePreprocessor::new();
226 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
227
228 assert_eq!(result.len(), 2);
229
230 let names: Vec<String> = result
231 .iter()
232 .map(|f| f.relative_path.to_string_lossy().to_string())
233 .collect();
234 assert!(names.contains(&"mytool".to_string()));
235 assert!(names.contains(&"other-tool".to_string()));
236
237 let mytool = result
238 .iter()
239 .find(|f| f.relative_path.to_str() == Some("mytool"))
240 .unwrap();
241 assert_eq!(
242 String::from_utf8_lossy(&mytool.content),
243 "#!/bin/sh\necho hello"
244 );
245 assert!(!mytool.is_dir);
246 }
247
248 #[test]
249 fn expand_tar_gz_with_directory() {
250 use flate2::write::GzEncoder;
251 use flate2::Compression;
252
253 let env = crate::testing::TempEnvironment::builder()
254 .pack("tools")
255 .file("placeholder", "")
256 .done()
257 .build();
258
259 let archive_path = env.dotfiles_root.join("tools/stuff.tar.gz");
260 let file = std::fs::File::create(&archive_path).unwrap();
261 let enc = GzEncoder::new(file, Compression::default());
262 let mut builder = tar::Builder::new(enc);
263
264 let mut dir_header = tar::Header::new_gnu();
266 dir_header.set_path("subdir/").unwrap();
267 dir_header.set_size(0);
268 dir_header.set_entry_type(tar::EntryType::Directory);
269 dir_header.set_mode(0o755);
270 dir_header.set_cksum();
271 builder.append(&dir_header, &[][..]).unwrap();
272
273 let content = b"nested file";
275 let mut file_header = tar::Header::new_gnu();
276 file_header.set_path("subdir/nested.txt").unwrap();
277 file_header.set_size(content.len() as u64);
278 file_header.set_mode(0o644);
279 file_header.set_cksum();
280 builder.append(&file_header, &content[..]).unwrap();
281
282 let enc = builder.into_inner().unwrap();
283 enc.finish().unwrap();
284
285 let pp = UnarchivePreprocessor::new();
286 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
287
288 assert_eq!(result.len(), 2);
289
290 let dir_entry = result
291 .iter()
292 .find(|f| f.relative_path.to_str() == Some("subdir/"))
293 .expect("should have directory entry");
294 assert!(dir_entry.is_dir);
295
296 let file_entry = result
297 .iter()
298 .find(|f| f.relative_path.to_str() == Some("subdir/nested.txt"))
299 .expect("should have nested file");
300 assert!(!file_entry.is_dir);
301 assert_eq!(String::from_utf8_lossy(&file_entry.content), "nested file");
302 }
303
304 #[test]
305 fn expand_empty_tar_gz() {
306 use flate2::write::GzEncoder;
307 use flate2::Compression;
308
309 let env = crate::testing::TempEnvironment::builder()
310 .pack("tools")
311 .file("placeholder", "")
312 .done()
313 .build();
314
315 let archive_path = env.dotfiles_root.join("tools/empty.tar.gz");
316 let file = std::fs::File::create(&archive_path).unwrap();
317 let enc = GzEncoder::new(file, Compression::default());
318 let builder = tar::Builder::new(enc);
319 let enc = builder.into_inner().unwrap();
320 enc.finish().unwrap();
321
322 let pp = UnarchivePreprocessor::new();
323 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
324
325 assert!(result.is_empty(), "empty archive should expand to no files");
326 }
327
328 #[test]
329 fn expand_single_file_tar_gz() {
330 use flate2::write::GzEncoder;
331 use flate2::Compression;
332
333 let env = crate::testing::TempEnvironment::builder()
334 .pack("tools")
335 .file("placeholder", "")
336 .done()
337 .build();
338
339 let archive_path = env.dotfiles_root.join("tools/one.tar.gz");
340 let file = std::fs::File::create(&archive_path).unwrap();
341 let enc = GzEncoder::new(file, Compression::default());
342 let mut builder = tar::Builder::new(enc);
343
344 let content = b"single file";
345 let mut header = tar::Header::new_gnu();
346 header.set_path("only.txt").unwrap();
347 header.set_size(content.len() as u64);
348 header.set_mode(0o644);
349 header.set_cksum();
350 builder.append(&header, &content[..]).unwrap();
351
352 let enc = builder.into_inner().unwrap();
353 enc.finish().unwrap();
354
355 let pp = UnarchivePreprocessor::new();
356 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
357
358 assert_eq!(result.len(), 1);
359 assert_eq!(result[0].relative_path.to_str(), Some("only.txt"));
360 }
361
362 #[test]
363 fn expand_corrupted_archive_returns_error() {
364 let env = crate::testing::TempEnvironment::builder()
365 .pack("tools")
366 .file("bad.tar.gz", "this is not a valid gzip stream")
367 .done()
368 .build();
369
370 let pp = UnarchivePreprocessor::new();
371 let source = env.dotfiles_root.join("tools/bad.tar.gz");
372 let err = pp.expand(&source, env.fs.as_ref());
373
374 assert!(err.is_err(), "corrupted archive should produce an error");
375 }
376
377 #[test]
378 fn expand_missing_file_returns_error() {
379 let env = crate::testing::TempEnvironment::builder().build();
380
381 let pp = UnarchivePreprocessor::new();
382 let source = env.dotfiles_root.join("nonexistent.tar.gz");
383 let err = pp.expand(&source, env.fs.as_ref());
384
385 assert!(err.is_err(), "missing archive should produce an error");
386 }
387
388 fn write_malicious_tar_gz(archive_path: &Path, raw_path: &[u8], content: &[u8]) {
394 use flate2::write::GzEncoder;
395 use flate2::Compression;
396 use std::io::Write;
397
398 let mut header = [0u8; 512];
401
402 let name_len = raw_path.len().min(99);
404 header[..name_len].copy_from_slice(&raw_path[..name_len]);
405
406 header[100..108].copy_from_slice(b"0000644\0");
408
409 header[108..116].copy_from_slice(b"0000000\0");
411 header[116..124].copy_from_slice(b"0000000\0");
412
413 let size_str = format!("{:011o}\0", content.len());
415 header[124..136].copy_from_slice(size_str.as_bytes());
416
417 header[136..148].copy_from_slice(b"00000000000\0");
419
420 header[148..156].copy_from_slice(b" ");
422
423 header[156] = b'0';
425
426 header[257..263].copy_from_slice(b"ustar\0");
428 header[263..265].copy_from_slice(b"00");
430
431 let checksum: u32 = header.iter().map(|b| *b as u32).sum();
433 let cksum_str = format!("{checksum:06o}\0 ");
434 header[148..156].copy_from_slice(cksum_str.as_bytes());
435
436 let file = std::fs::File::create(archive_path).unwrap();
437 let mut enc = GzEncoder::new(file, Compression::default());
438 enc.write_all(&header).unwrap();
439
440 enc.write_all(content).unwrap();
442 let pad = (512 - content.len() % 512) % 512;
443 if pad > 0 {
444 enc.write_all(&vec![0u8; pad]).unwrap();
445 }
446
447 enc.write_all(&[0u8; 1024]).unwrap();
449
450 enc.finish().unwrap();
451 }
452
453 #[test]
454 fn rejects_tar_slip_absolute_path() {
455 let env = crate::testing::TempEnvironment::builder()
456 .pack("tools")
457 .file("placeholder", "")
458 .done()
459 .build();
460
461 let archive_path = env.dotfiles_root.join("tools/evil.tar.gz");
462 write_malicious_tar_gz(&archive_path, b"/etc/passwd", b"pwn");
463
464 let pp = UnarchivePreprocessor::new();
465 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
466 assert!(
467 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe entry path")),
468 "expected unsafe-path error, got: {err}"
469 );
470 }
471
472 #[test]
473 fn rejects_tar_slip_parent_dir() {
474 let env = crate::testing::TempEnvironment::builder()
475 .pack("tools")
476 .file("placeholder", "")
477 .done()
478 .build();
479
480 let archive_path = env.dotfiles_root.join("tools/evil.tar.gz");
481 write_malicious_tar_gz(&archive_path, b"../../escape.txt", b"pwn");
482
483 let pp = UnarchivePreprocessor::new();
484 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
485 assert!(
486 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe entry path")),
487 "expected unsafe-path error, got: {err}"
488 );
489 }
490
491 #[test]
492 fn rejects_symlink_entry() {
493 use flate2::write::GzEncoder;
494 use flate2::Compression;
495
496 let env = crate::testing::TempEnvironment::builder()
497 .pack("tools")
498 .file("placeholder", "")
499 .done()
500 .build();
501
502 let archive_path = env.dotfiles_root.join("tools/syms.tar.gz");
503 let file = std::fs::File::create(&archive_path).unwrap();
504 let enc = GzEncoder::new(file, Compression::default());
505 let mut builder = tar::Builder::new(enc);
506
507 let mut header = tar::Header::new_gnu();
508 header.set_path("link").unwrap();
509 header.set_size(0);
510 header.set_entry_type(tar::EntryType::Symlink);
511 header.set_link_name("/etc/passwd").unwrap();
512 header.set_mode(0o644);
513 header.set_cksum();
514 builder.append(&header, &[][..]).unwrap();
515
516 let enc = builder.into_inner().unwrap();
517 enc.finish().unwrap();
518
519 let pp = UnarchivePreprocessor::new();
520 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
521 assert!(
522 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsupported tar entry type")),
523 "expected unsupported-entry-type error, got: {err}"
524 );
525 }
526}