1use std::io::Read;
10use std::path::{Component, Path};
11
12use crate::fs::Fs;
13use crate::preprocessing::{ExpandedFile, Preprocessor, TransformType};
14use crate::{DodotError, Result};
15
16fn entry_path_is_safe(path: &Path) -> bool {
20 for component in path.components() {
21 match component {
22 Component::Normal(_) | Component::CurDir => {}
23 Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
24 return false;
25 }
26 }
27 }
28 true
29}
30
31pub struct UnarchivePreprocessor;
33
34impl UnarchivePreprocessor {
35 pub fn new() -> Self {
36 Self
37 }
38}
39
40impl Default for UnarchivePreprocessor {
41 fn default() -> Self {
42 Self::new()
43 }
44}
45
46impl Preprocessor for UnarchivePreprocessor {
47 fn name(&self) -> &str {
48 "unarchive"
49 }
50
51 fn transform_type(&self) -> TransformType {
52 TransformType::Opaque
53 }
54
55 fn matches_extension(&self, filename: &str) -> bool {
56 filename.ends_with(".tar.gz")
57 }
58
59 fn stripped_name(&self, filename: &str) -> String {
60 filename
61 .strip_suffix(".tar.gz")
62 .unwrap_or(filename)
63 .to_string()
64 }
65
66 fn expand(&self, source: &Path, fs: &dyn Fs) -> Result<Vec<ExpandedFile>> {
67 let reader = fs.open_read(source)?;
68 let gz = flate2::read::GzDecoder::new(reader);
69 let mut archive = tar::Archive::new(gz);
70
71 let mut expanded = Vec::new();
72
73 let entries = archive
74 .entries()
75 .map_err(|e| DodotError::PreprocessorError {
76 preprocessor: "unarchive".into(),
77 source_file: source.to_path_buf(),
78 message: format!("failed to read archive entries: {e}"),
79 })?;
80
81 for entry_result in entries {
82 let mut entry = entry_result.map_err(|e| DodotError::PreprocessorError {
83 preprocessor: "unarchive".into(),
84 source_file: source.to_path_buf(),
85 message: format!("failed to read archive entry: {e}"),
86 })?;
87
88 let entry_path = entry
89 .path()
90 .map_err(|e| DodotError::PreprocessorError {
91 preprocessor: "unarchive".into(),
92 source_file: source.to_path_buf(),
93 message: format!("invalid path in archive: {e}"),
94 })?
95 .into_owned();
96
97 if !entry_path_is_safe(&entry_path) {
99 return Err(DodotError::PreprocessorError {
100 preprocessor: "unarchive".into(),
101 source_file: source.to_path_buf(),
102 message: format!(
103 "unsafe entry path in archive: {} (absolute or contains `..`)",
104 entry_path.display()
105 ),
106 });
107 }
108
109 let entry_type = entry.header().entry_type();
114 if entry_type.is_dir() {
115 expanded.push(ExpandedFile {
116 relative_path: entry_path,
117 content: Vec::new(),
118 is_dir: true,
119 tracked_render: None,
120 context_hash: None,
121 secret_line_ranges: Vec::new(),
122 deploy_mode: None,
123 });
124 } else if entry_type.is_file() {
125 let mut content = Vec::new();
126 entry
127 .read_to_end(&mut content)
128 .map_err(|e| DodotError::PreprocessorError {
129 preprocessor: "unarchive".into(),
130 source_file: source.to_path_buf(),
131 message: format!("failed to read entry content: {e}"),
132 })?;
133
134 expanded.push(ExpandedFile {
135 relative_path: entry_path,
136 content,
137 is_dir: false,
138 tracked_render: None,
139 context_hash: None,
140 secret_line_ranges: Vec::new(),
141 deploy_mode: None,
142 });
143 } else {
144 return Err(DodotError::PreprocessorError {
145 preprocessor: "unarchive".into(),
146 source_file: source.to_path_buf(),
147 message: format!(
148 "unsupported tar entry type {:?} for {} (only regular files and directories are allowed)",
149 entry_type,
150 entry_path.display()
151 ),
152 });
153 }
154 }
155
156 Ok(expanded)
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163
164 #[test]
165 fn matches_tar_gz_extension() {
166 let pp = UnarchivePreprocessor::new();
167 assert!(pp.matches_extension("bin.tar.gz"));
168 assert!(pp.matches_extension("tools.tar.gz"));
169 assert!(!pp.matches_extension("file.tar"));
170 assert!(!pp.matches_extension("file.gz"));
171 assert!(!pp.matches_extension("file.zip"));
172 assert!(!pp.matches_extension("tar.gz")); }
174
175 #[test]
176 fn stripped_name_removes_extension() {
177 let pp = UnarchivePreprocessor::new();
178 assert_eq!(pp.stripped_name("bin.tar.gz"), "bin");
179 assert_eq!(pp.stripped_name("my-tools.tar.gz"), "my-tools");
180 assert_eq!(pp.stripped_name("nested.dir.tar.gz"), "nested.dir");
181 }
182
183 #[test]
184 fn trait_properties() {
185 let pp = UnarchivePreprocessor::new();
186 assert_eq!(pp.name(), "unarchive");
187 assert_eq!(pp.transform_type(), TransformType::Opaque);
188 }
189
190 #[test]
191 fn expand_extracts_tar_gz() {
192 use flate2::write::GzEncoder;
193 use flate2::Compression;
194
195 let env = crate::testing::TempEnvironment::builder()
196 .pack("tools")
197 .file("placeholder", "")
198 .done()
199 .build();
200
201 let archive_path = env.dotfiles_root.join("tools/bin.tar.gz");
203 let file = std::fs::File::create(&archive_path).unwrap();
204 let enc = GzEncoder::new(file, Compression::default());
205 let mut builder = tar::Builder::new(enc);
206
207 let content = b"#!/bin/sh\necho hello";
209 let mut header = tar::Header::new_gnu();
210 header.set_path("mytool").unwrap();
211 header.set_size(content.len() as u64);
212 header.set_mode(0o755);
213 header.set_cksum();
214 builder.append(&header, &content[..]).unwrap();
215
216 let content2 = b"#!/bin/sh\necho world";
218 let mut header2 = tar::Header::new_gnu();
219 header2.set_path("other-tool").unwrap();
220 header2.set_size(content2.len() as u64);
221 header2.set_mode(0o755);
222 header2.set_cksum();
223 builder.append(&header2, &content2[..]).unwrap();
224
225 let enc = builder.into_inner().unwrap();
226 enc.finish().unwrap();
227
228 let pp = UnarchivePreprocessor::new();
230 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
231
232 assert_eq!(result.len(), 2);
233
234 let names: Vec<String> = result
235 .iter()
236 .map(|f| f.relative_path.to_string_lossy().to_string())
237 .collect();
238 assert!(names.contains(&"mytool".to_string()));
239 assert!(names.contains(&"other-tool".to_string()));
240
241 let mytool = result
242 .iter()
243 .find(|f| f.relative_path.to_str() == Some("mytool"))
244 .unwrap();
245 assert_eq!(
246 String::from_utf8_lossy(&mytool.content),
247 "#!/bin/sh\necho hello"
248 );
249 assert!(!mytool.is_dir);
250 }
251
252 #[test]
253 fn expand_tar_gz_with_directory() {
254 use flate2::write::GzEncoder;
255 use flate2::Compression;
256
257 let env = crate::testing::TempEnvironment::builder()
258 .pack("tools")
259 .file("placeholder", "")
260 .done()
261 .build();
262
263 let archive_path = env.dotfiles_root.join("tools/stuff.tar.gz");
264 let file = std::fs::File::create(&archive_path).unwrap();
265 let enc = GzEncoder::new(file, Compression::default());
266 let mut builder = tar::Builder::new(enc);
267
268 let mut dir_header = tar::Header::new_gnu();
270 dir_header.set_path("subdir/").unwrap();
271 dir_header.set_size(0);
272 dir_header.set_entry_type(tar::EntryType::Directory);
273 dir_header.set_mode(0o755);
274 dir_header.set_cksum();
275 builder.append(&dir_header, &[][..]).unwrap();
276
277 let content = b"nested file";
279 let mut file_header = tar::Header::new_gnu();
280 file_header.set_path("subdir/nested.txt").unwrap();
281 file_header.set_size(content.len() as u64);
282 file_header.set_mode(0o644);
283 file_header.set_cksum();
284 builder.append(&file_header, &content[..]).unwrap();
285
286 let enc = builder.into_inner().unwrap();
287 enc.finish().unwrap();
288
289 let pp = UnarchivePreprocessor::new();
290 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
291
292 assert_eq!(result.len(), 2);
293
294 let dir_entry = result
295 .iter()
296 .find(|f| f.relative_path.to_str() == Some("subdir/"))
297 .expect("should have directory entry");
298 assert!(dir_entry.is_dir);
299
300 let file_entry = result
301 .iter()
302 .find(|f| f.relative_path.to_str() == Some("subdir/nested.txt"))
303 .expect("should have nested file");
304 assert!(!file_entry.is_dir);
305 assert_eq!(String::from_utf8_lossy(&file_entry.content), "nested file");
306 }
307
308 #[test]
309 fn expand_empty_tar_gz() {
310 use flate2::write::GzEncoder;
311 use flate2::Compression;
312
313 let env = crate::testing::TempEnvironment::builder()
314 .pack("tools")
315 .file("placeholder", "")
316 .done()
317 .build();
318
319 let archive_path = env.dotfiles_root.join("tools/empty.tar.gz");
320 let file = std::fs::File::create(&archive_path).unwrap();
321 let enc = GzEncoder::new(file, Compression::default());
322 let builder = tar::Builder::new(enc);
323 let enc = builder.into_inner().unwrap();
324 enc.finish().unwrap();
325
326 let pp = UnarchivePreprocessor::new();
327 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
328
329 assert!(result.is_empty(), "empty archive should expand to no files");
330 }
331
332 #[test]
333 fn expand_single_file_tar_gz() {
334 use flate2::write::GzEncoder;
335 use flate2::Compression;
336
337 let env = crate::testing::TempEnvironment::builder()
338 .pack("tools")
339 .file("placeholder", "")
340 .done()
341 .build();
342
343 let archive_path = env.dotfiles_root.join("tools/one.tar.gz");
344 let file = std::fs::File::create(&archive_path).unwrap();
345 let enc = GzEncoder::new(file, Compression::default());
346 let mut builder = tar::Builder::new(enc);
347
348 let content = b"single file";
349 let mut header = tar::Header::new_gnu();
350 header.set_path("only.txt").unwrap();
351 header.set_size(content.len() as u64);
352 header.set_mode(0o644);
353 header.set_cksum();
354 builder.append(&header, &content[..]).unwrap();
355
356 let enc = builder.into_inner().unwrap();
357 enc.finish().unwrap();
358
359 let pp = UnarchivePreprocessor::new();
360 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
361
362 assert_eq!(result.len(), 1);
363 assert_eq!(result[0].relative_path.to_str(), Some("only.txt"));
364 }
365
366 #[test]
367 fn expand_corrupted_archive_returns_error() {
368 let env = crate::testing::TempEnvironment::builder()
369 .pack("tools")
370 .file("bad.tar.gz", "this is not a valid gzip stream")
371 .done()
372 .build();
373
374 let pp = UnarchivePreprocessor::new();
375 let source = env.dotfiles_root.join("tools/bad.tar.gz");
376 let err = pp.expand(&source, env.fs.as_ref());
377
378 assert!(err.is_err(), "corrupted archive should produce an error");
379 }
380
381 #[test]
382 fn expand_missing_file_returns_error() {
383 let env = crate::testing::TempEnvironment::builder().build();
384
385 let pp = UnarchivePreprocessor::new();
386 let source = env.dotfiles_root.join("nonexistent.tar.gz");
387 let err = pp.expand(&source, env.fs.as_ref());
388
389 assert!(err.is_err(), "missing archive should produce an error");
390 }
391
392 fn write_malicious_tar_gz(archive_path: &Path, raw_path: &[u8], content: &[u8]) {
398 use flate2::write::GzEncoder;
399 use flate2::Compression;
400 use std::io::Write;
401
402 let mut header = [0u8; 512];
405
406 let name_len = raw_path.len().min(99);
408 header[..name_len].copy_from_slice(&raw_path[..name_len]);
409
410 header[100..108].copy_from_slice(b"0000644\0");
412
413 header[108..116].copy_from_slice(b"0000000\0");
415 header[116..124].copy_from_slice(b"0000000\0");
416
417 let size_str = format!("{:011o}\0", content.len());
419 header[124..136].copy_from_slice(size_str.as_bytes());
420
421 header[136..148].copy_from_slice(b"00000000000\0");
423
424 header[148..156].copy_from_slice(b" ");
426
427 header[156] = b'0';
429
430 header[257..263].copy_from_slice(b"ustar\0");
432 header[263..265].copy_from_slice(b"00");
434
435 let checksum: u32 = header.iter().map(|b| *b as u32).sum();
437 let cksum_str = format!("{checksum:06o}\0 ");
438 header[148..156].copy_from_slice(cksum_str.as_bytes());
439
440 let file = std::fs::File::create(archive_path).unwrap();
441 let mut enc = GzEncoder::new(file, Compression::default());
442 enc.write_all(&header).unwrap();
443
444 enc.write_all(content).unwrap();
446 let pad = (512 - content.len() % 512) % 512;
447 if pad > 0 {
448 enc.write_all(&vec![0u8; pad]).unwrap();
449 }
450
451 enc.write_all(&[0u8; 1024]).unwrap();
453
454 enc.finish().unwrap();
455 }
456
457 #[test]
458 fn rejects_tar_slip_absolute_path() {
459 let env = crate::testing::TempEnvironment::builder()
460 .pack("tools")
461 .file("placeholder", "")
462 .done()
463 .build();
464
465 let archive_path = env.dotfiles_root.join("tools/evil.tar.gz");
466 write_malicious_tar_gz(&archive_path, b"/etc/passwd", b"pwn");
467
468 let pp = UnarchivePreprocessor::new();
469 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
470 assert!(
471 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe entry path")),
472 "expected unsafe-path error, got: {err}"
473 );
474 }
475
476 #[test]
477 fn rejects_tar_slip_parent_dir() {
478 let env = crate::testing::TempEnvironment::builder()
479 .pack("tools")
480 .file("placeholder", "")
481 .done()
482 .build();
483
484 let archive_path = env.dotfiles_root.join("tools/evil.tar.gz");
485 write_malicious_tar_gz(&archive_path, b"../../escape.txt", b"pwn");
486
487 let pp = UnarchivePreprocessor::new();
488 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
489 assert!(
490 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe entry path")),
491 "expected unsafe-path error, got: {err}"
492 );
493 }
494
495 #[test]
496 fn rejects_symlink_entry() {
497 use flate2::write::GzEncoder;
498 use flate2::Compression;
499
500 let env = crate::testing::TempEnvironment::builder()
501 .pack("tools")
502 .file("placeholder", "")
503 .done()
504 .build();
505
506 let archive_path = env.dotfiles_root.join("tools/syms.tar.gz");
507 let file = std::fs::File::create(&archive_path).unwrap();
508 let enc = GzEncoder::new(file, Compression::default());
509 let mut builder = tar::Builder::new(enc);
510
511 let mut header = tar::Header::new_gnu();
512 header.set_path("link").unwrap();
513 header.set_size(0);
514 header.set_entry_type(tar::EntryType::Symlink);
515 header.set_link_name("/etc/passwd").unwrap();
516 header.set_mode(0o644);
517 header.set_cksum();
518 builder.append(&header, &[][..]).unwrap();
519
520 let enc = builder.into_inner().unwrap();
521 enc.finish().unwrap();
522
523 let pp = UnarchivePreprocessor::new();
524 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
525 assert!(
526 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsupported tar entry type")),
527 "expected unsupported-entry-type error, got: {err}"
528 );
529 }
530}