1use std::io::Read;
10use std::path::{Component, Path};
11
12use crate::fs::Fs;
13use crate::preprocessing::{ExpandedFile, Preprocessor, TransformType};
14use crate::{DodotError, Result};
15
16fn entry_path_is_safe(path: &Path) -> bool {
20 for component in path.components() {
21 match component {
22 Component::Normal(_) | Component::CurDir => {}
23 Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
24 return false;
25 }
26 }
27 }
28 true
29}
30
31pub struct UnarchivePreprocessor;
33
34impl UnarchivePreprocessor {
35 pub fn new() -> Self {
36 Self
37 }
38}
39
40impl Default for UnarchivePreprocessor {
41 fn default() -> Self {
42 Self::new()
43 }
44}
45
46impl Preprocessor for UnarchivePreprocessor {
47 fn name(&self) -> &str {
48 "unarchive"
49 }
50
51 fn transform_type(&self) -> TransformType {
52 TransformType::Opaque
53 }
54
55 fn matches_extension(&self, filename: &str) -> bool {
56 filename.ends_with(".tar.gz")
57 }
58
59 fn stripped_name(&self, filename: &str) -> String {
60 filename
61 .strip_suffix(".tar.gz")
62 .unwrap_or(filename)
63 .to_string()
64 }
65
66 fn expand(&self, source: &Path, fs: &dyn Fs) -> Result<Vec<ExpandedFile>> {
67 let reader = fs.open_read(source)?;
68 let gz = flate2::read::GzDecoder::new(reader);
69 let mut archive = tar::Archive::new(gz);
70
71 let mut expanded = Vec::new();
72
73 let entries = archive
74 .entries()
75 .map_err(|e| DodotError::PreprocessorError {
76 preprocessor: "unarchive".into(),
77 source_file: source.to_path_buf(),
78 message: format!("failed to read archive entries: {e}"),
79 })?;
80
81 for entry_result in entries {
82 let mut entry = entry_result.map_err(|e| DodotError::PreprocessorError {
83 preprocessor: "unarchive".into(),
84 source_file: source.to_path_buf(),
85 message: format!("failed to read archive entry: {e}"),
86 })?;
87
88 let entry_path = entry
89 .path()
90 .map_err(|e| DodotError::PreprocessorError {
91 preprocessor: "unarchive".into(),
92 source_file: source.to_path_buf(),
93 message: format!("invalid path in archive: {e}"),
94 })?
95 .into_owned();
96
97 if !entry_path_is_safe(&entry_path) {
99 return Err(DodotError::PreprocessorError {
100 preprocessor: "unarchive".into(),
101 source_file: source.to_path_buf(),
102 message: format!(
103 "unsafe entry path in archive: {} (absolute or contains `..`)",
104 entry_path.display()
105 ),
106 });
107 }
108
109 let entry_type = entry.header().entry_type();
114 if entry_type.is_dir() {
115 expanded.push(ExpandedFile {
116 relative_path: entry_path,
117 content: Vec::new(),
118 is_dir: true,
119 });
120 } else if entry_type.is_file() {
121 let mut content = Vec::new();
122 entry
123 .read_to_end(&mut content)
124 .map_err(|e| DodotError::PreprocessorError {
125 preprocessor: "unarchive".into(),
126 source_file: source.to_path_buf(),
127 message: format!("failed to read entry content: {e}"),
128 })?;
129
130 expanded.push(ExpandedFile {
131 relative_path: entry_path,
132 content,
133 is_dir: false,
134 });
135 } else {
136 return Err(DodotError::PreprocessorError {
137 preprocessor: "unarchive".into(),
138 source_file: source.to_path_buf(),
139 message: format!(
140 "unsupported tar entry type {:?} for {} (only regular files and directories are allowed)",
141 entry_type,
142 entry_path.display()
143 ),
144 });
145 }
146 }
147
148 Ok(expanded)
149 }
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155
156 #[test]
157 fn matches_tar_gz_extension() {
158 let pp = UnarchivePreprocessor::new();
159 assert!(pp.matches_extension("bin.tar.gz"));
160 assert!(pp.matches_extension("tools.tar.gz"));
161 assert!(!pp.matches_extension("file.tar"));
162 assert!(!pp.matches_extension("file.gz"));
163 assert!(!pp.matches_extension("file.zip"));
164 assert!(!pp.matches_extension("tar.gz")); }
166
167 #[test]
168 fn stripped_name_removes_extension() {
169 let pp = UnarchivePreprocessor::new();
170 assert_eq!(pp.stripped_name("bin.tar.gz"), "bin");
171 assert_eq!(pp.stripped_name("my-tools.tar.gz"), "my-tools");
172 assert_eq!(pp.stripped_name("nested.dir.tar.gz"), "nested.dir");
173 }
174
175 #[test]
176 fn trait_properties() {
177 let pp = UnarchivePreprocessor::new();
178 assert_eq!(pp.name(), "unarchive");
179 assert_eq!(pp.transform_type(), TransformType::Opaque);
180 }
181
182 #[test]
183 fn expand_extracts_tar_gz() {
184 use flate2::write::GzEncoder;
185 use flate2::Compression;
186
187 let env = crate::testing::TempEnvironment::builder()
188 .pack("tools")
189 .file("placeholder", "")
190 .done()
191 .build();
192
193 let archive_path = env.dotfiles_root.join("tools/bin.tar.gz");
195 let file = std::fs::File::create(&archive_path).unwrap();
196 let enc = GzEncoder::new(file, Compression::default());
197 let mut builder = tar::Builder::new(enc);
198
199 let content = b"#!/bin/sh\necho hello";
201 let mut header = tar::Header::new_gnu();
202 header.set_path("mytool").unwrap();
203 header.set_size(content.len() as u64);
204 header.set_mode(0o755);
205 header.set_cksum();
206 builder.append(&header, &content[..]).unwrap();
207
208 let content2 = b"#!/bin/sh\necho world";
210 let mut header2 = tar::Header::new_gnu();
211 header2.set_path("other-tool").unwrap();
212 header2.set_size(content2.len() as u64);
213 header2.set_mode(0o755);
214 header2.set_cksum();
215 builder.append(&header2, &content2[..]).unwrap();
216
217 let enc = builder.into_inner().unwrap();
218 enc.finish().unwrap();
219
220 let pp = UnarchivePreprocessor::new();
222 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
223
224 assert_eq!(result.len(), 2);
225
226 let names: Vec<String> = result
227 .iter()
228 .map(|f| f.relative_path.to_string_lossy().to_string())
229 .collect();
230 assert!(names.contains(&"mytool".to_string()));
231 assert!(names.contains(&"other-tool".to_string()));
232
233 let mytool = result
234 .iter()
235 .find(|f| f.relative_path.to_str() == Some("mytool"))
236 .unwrap();
237 assert_eq!(
238 String::from_utf8_lossy(&mytool.content),
239 "#!/bin/sh\necho hello"
240 );
241 assert!(!mytool.is_dir);
242 }
243
244 #[test]
245 fn expand_tar_gz_with_directory() {
246 use flate2::write::GzEncoder;
247 use flate2::Compression;
248
249 let env = crate::testing::TempEnvironment::builder()
250 .pack("tools")
251 .file("placeholder", "")
252 .done()
253 .build();
254
255 let archive_path = env.dotfiles_root.join("tools/stuff.tar.gz");
256 let file = std::fs::File::create(&archive_path).unwrap();
257 let enc = GzEncoder::new(file, Compression::default());
258 let mut builder = tar::Builder::new(enc);
259
260 let mut dir_header = tar::Header::new_gnu();
262 dir_header.set_path("subdir/").unwrap();
263 dir_header.set_size(0);
264 dir_header.set_entry_type(tar::EntryType::Directory);
265 dir_header.set_mode(0o755);
266 dir_header.set_cksum();
267 builder.append(&dir_header, &[][..]).unwrap();
268
269 let content = b"nested file";
271 let mut file_header = tar::Header::new_gnu();
272 file_header.set_path("subdir/nested.txt").unwrap();
273 file_header.set_size(content.len() as u64);
274 file_header.set_mode(0o644);
275 file_header.set_cksum();
276 builder.append(&file_header, &content[..]).unwrap();
277
278 let enc = builder.into_inner().unwrap();
279 enc.finish().unwrap();
280
281 let pp = UnarchivePreprocessor::new();
282 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
283
284 assert_eq!(result.len(), 2);
285
286 let dir_entry = result
287 .iter()
288 .find(|f| f.relative_path.to_str() == Some("subdir/"))
289 .expect("should have directory entry");
290 assert!(dir_entry.is_dir);
291
292 let file_entry = result
293 .iter()
294 .find(|f| f.relative_path.to_str() == Some("subdir/nested.txt"))
295 .expect("should have nested file");
296 assert!(!file_entry.is_dir);
297 assert_eq!(String::from_utf8_lossy(&file_entry.content), "nested file");
298 }
299
300 #[test]
301 fn expand_empty_tar_gz() {
302 use flate2::write::GzEncoder;
303 use flate2::Compression;
304
305 let env = crate::testing::TempEnvironment::builder()
306 .pack("tools")
307 .file("placeholder", "")
308 .done()
309 .build();
310
311 let archive_path = env.dotfiles_root.join("tools/empty.tar.gz");
312 let file = std::fs::File::create(&archive_path).unwrap();
313 let enc = GzEncoder::new(file, Compression::default());
314 let builder = tar::Builder::new(enc);
315 let enc = builder.into_inner().unwrap();
316 enc.finish().unwrap();
317
318 let pp = UnarchivePreprocessor::new();
319 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
320
321 assert!(result.is_empty(), "empty archive should expand to no files");
322 }
323
324 #[test]
325 fn expand_single_file_tar_gz() {
326 use flate2::write::GzEncoder;
327 use flate2::Compression;
328
329 let env = crate::testing::TempEnvironment::builder()
330 .pack("tools")
331 .file("placeholder", "")
332 .done()
333 .build();
334
335 let archive_path = env.dotfiles_root.join("tools/one.tar.gz");
336 let file = std::fs::File::create(&archive_path).unwrap();
337 let enc = GzEncoder::new(file, Compression::default());
338 let mut builder = tar::Builder::new(enc);
339
340 let content = b"single file";
341 let mut header = tar::Header::new_gnu();
342 header.set_path("only.txt").unwrap();
343 header.set_size(content.len() as u64);
344 header.set_mode(0o644);
345 header.set_cksum();
346 builder.append(&header, &content[..]).unwrap();
347
348 let enc = builder.into_inner().unwrap();
349 enc.finish().unwrap();
350
351 let pp = UnarchivePreprocessor::new();
352 let result = pp.expand(&archive_path, env.fs.as_ref()).unwrap();
353
354 assert_eq!(result.len(), 1);
355 assert_eq!(result[0].relative_path.to_str(), Some("only.txt"));
356 }
357
358 #[test]
359 fn expand_corrupted_archive_returns_error() {
360 let env = crate::testing::TempEnvironment::builder()
361 .pack("tools")
362 .file("bad.tar.gz", "this is not a valid gzip stream")
363 .done()
364 .build();
365
366 let pp = UnarchivePreprocessor::new();
367 let source = env.dotfiles_root.join("tools/bad.tar.gz");
368 let err = pp.expand(&source, env.fs.as_ref());
369
370 assert!(err.is_err(), "corrupted archive should produce an error");
371 }
372
373 #[test]
374 fn expand_missing_file_returns_error() {
375 let env = crate::testing::TempEnvironment::builder().build();
376
377 let pp = UnarchivePreprocessor::new();
378 let source = env.dotfiles_root.join("nonexistent.tar.gz");
379 let err = pp.expand(&source, env.fs.as_ref());
380
381 assert!(err.is_err(), "missing archive should produce an error");
382 }
383
384 fn write_malicious_tar_gz(archive_path: &Path, raw_path: &[u8], content: &[u8]) {
390 use flate2::write::GzEncoder;
391 use flate2::Compression;
392 use std::io::Write;
393
394 let mut header = [0u8; 512];
397
398 let name_len = raw_path.len().min(99);
400 header[..name_len].copy_from_slice(&raw_path[..name_len]);
401
402 header[100..108].copy_from_slice(b"0000644\0");
404
405 header[108..116].copy_from_slice(b"0000000\0");
407 header[116..124].copy_from_slice(b"0000000\0");
408
409 let size_str = format!("{:011o}\0", content.len());
411 header[124..136].copy_from_slice(size_str.as_bytes());
412
413 header[136..148].copy_from_slice(b"00000000000\0");
415
416 header[148..156].copy_from_slice(b" ");
418
419 header[156] = b'0';
421
422 header[257..263].copy_from_slice(b"ustar\0");
424 header[263..265].copy_from_slice(b"00");
426
427 let checksum: u32 = header.iter().map(|b| *b as u32).sum();
429 let cksum_str = format!("{checksum:06o}\0 ");
430 header[148..156].copy_from_slice(cksum_str.as_bytes());
431
432 let file = std::fs::File::create(archive_path).unwrap();
433 let mut enc = GzEncoder::new(file, Compression::default());
434 enc.write_all(&header).unwrap();
435
436 enc.write_all(content).unwrap();
438 let pad = (512 - content.len() % 512) % 512;
439 if pad > 0 {
440 enc.write_all(&vec![0u8; pad]).unwrap();
441 }
442
443 enc.write_all(&[0u8; 1024]).unwrap();
445
446 enc.finish().unwrap();
447 }
448
449 #[test]
450 fn rejects_tar_slip_absolute_path() {
451 let env = crate::testing::TempEnvironment::builder()
452 .pack("tools")
453 .file("placeholder", "")
454 .done()
455 .build();
456
457 let archive_path = env.dotfiles_root.join("tools/evil.tar.gz");
458 write_malicious_tar_gz(&archive_path, b"/etc/passwd", b"pwn");
459
460 let pp = UnarchivePreprocessor::new();
461 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
462 assert!(
463 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe entry path")),
464 "expected unsafe-path error, got: {err}"
465 );
466 }
467
468 #[test]
469 fn rejects_tar_slip_parent_dir() {
470 let env = crate::testing::TempEnvironment::builder()
471 .pack("tools")
472 .file("placeholder", "")
473 .done()
474 .build();
475
476 let archive_path = env.dotfiles_root.join("tools/evil.tar.gz");
477 write_malicious_tar_gz(&archive_path, b"../../escape.txt", b"pwn");
478
479 let pp = UnarchivePreprocessor::new();
480 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
481 assert!(
482 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsafe entry path")),
483 "expected unsafe-path error, got: {err}"
484 );
485 }
486
487 #[test]
488 fn rejects_symlink_entry() {
489 use flate2::write::GzEncoder;
490 use flate2::Compression;
491
492 let env = crate::testing::TempEnvironment::builder()
493 .pack("tools")
494 .file("placeholder", "")
495 .done()
496 .build();
497
498 let archive_path = env.dotfiles_root.join("tools/syms.tar.gz");
499 let file = std::fs::File::create(&archive_path).unwrap();
500 let enc = GzEncoder::new(file, Compression::default());
501 let mut builder = tar::Builder::new(enc);
502
503 let mut header = tar::Header::new_gnu();
504 header.set_path("link").unwrap();
505 header.set_size(0);
506 header.set_entry_type(tar::EntryType::Symlink);
507 header.set_link_name("/etc/passwd").unwrap();
508 header.set_mode(0o644);
509 header.set_cksum();
510 builder.append(&header, &[][..]).unwrap();
511
512 let enc = builder.into_inner().unwrap();
513 enc.finish().unwrap();
514
515 let pp = UnarchivePreprocessor::new();
516 let err = pp.expand(&archive_path, env.fs.as_ref()).unwrap_err();
517 assert!(
518 matches!(err, DodotError::PreprocessorError { ref message, .. } if message.contains("unsupported tar entry type")),
519 "expected unsupported-entry-type error, got: {err}"
520 );
521 }
522}