Skip to main content

cargo_deb/deb/
tar.rs

1use crate::assets::{Asset, AssetSource, SymlinkKind};
2use crate::error::{CDResult, CargoDebError};
3use crate::listener::Listener;
4use crate::PackageConfig;
5use crate::util::pathbytes::AsUnixPathBytes;
6use std::collections::HashSet;
7use std::io::{Read, Write};
8use std::path::{Component, Path, PathBuf};
9use std::{fs, io};
10use tar::{EntryType, Header as TarHeader};
11
12/// Tarball for control and data files
13pub(crate) struct Tarball<W: Write> {
14    added_directories: HashSet<Box<Path>>,
15    time: u64,
16    tar: tar::Builder<W>,
17}
18
19impl<W: Write> Tarball<W> {
20    pub fn new(dest: W, time: u64) -> Self {
21        Self {
22            added_directories: HashSet::new(),
23            time,
24            tar: tar::Builder::new(dest),
25        }
26    }
27
28    /// Copies all the files to be packaged into the tar archive.
29    pub fn archive_files(mut self, package_deb: &PackageConfig, rsyncable: bool, listener: &dyn Listener) -> CDResult<W> {
30        let mut archive_data_added = 0;
31        let mut prev_is_built = false;
32        let log_display_base_dir = std::env::current_dir().unwrap_or_default();
33
34        debug_assert!(package_deb.assets.unresolved.is_empty());
35        for asset in &package_deb.assets.resolved {
36            log_asset(asset, &log_display_base_dir, listener);
37
38            if let AssetSource::Symlink(symlink_kind) = &asset.source {
39
40                let link_name;
41                let link_name = match symlink_kind {
42                    SymlinkKind::Copied {source_path} => {
43                        link_name = fs::read_link(source_path)
44                            .map_err(|e| CargoDebError::IoFile("Symlink asset", e, source_path.clone()))?;
45                        &link_name
46                    }
47                    SymlinkKind::Created { target_path:_, link_name } => {
48                        link_name
49                    }
50                };
51
52                let Some(normalized_link_name) = normalize_link_name(&asset.c.target_path, link_name) else {
53                    return Err(CargoDebError::InvalidSymlink(asset.c.target_path.clone(), link_name.clone(), "would ascend beyond the root dir"));
54                };
55                
56                self.symlink(&asset.c.target_path, &normalized_link_name)?;
57            } else {
58                let out_data = asset.source.data()?;
59                if rsyncable {
60                    if archive_data_added > 1_000_000 || prev_is_built != asset.c.is_built() {
61                        self.flush().map_err(|e| CargoDebError::Io(e).context("error while writing tar archive"))?;
62                        archive_data_added = 0;
63                    }
64                    // puts synchronization point between non-code and code assets
65                    prev_is_built = asset.c.is_built();
66                    archive_data_added += out_data.len();
67                }
68                self.file(&asset.c.target_path, &out_data, asset.c.chmod.unwrap_or(0o644))?;
69            }
70        }
71
72        self.tar.into_inner().map_err(|e| CargoDebError::Io(e).context("error while finalizing tar archive"))
73    }
74
75    fn directory(&mut self, path: &Path) -> io::Result<()> {
76        let mut header = self.header_for_path(path, true)?;
77        header.set_mtime(self.time);
78        header.set_size(0);
79        header.set_mode(0o755);
80        header.set_entry_type(EntryType::Directory);
81        header.set_cksum();
82        self.tar.append(&header, &mut io::empty())
83    }
84
85    fn add_parent_directories(&mut self, path: &Path) -> CDResult<()> {
86        debug_assert!(path.is_relative());
87
88        let dirs = path.ancestors().skip(1)
89            .take_while(|&d| !self.added_directories.contains(d))
90            .filter(|&d| !d.as_os_str().is_empty())
91            .map(Box::from)
92            .collect::<Vec<_>>();
93
94        for directory in dirs.into_iter().rev() {
95            if let Err(e) = self.directory(&directory) {
96                return Err(CargoDebError::IoFile("Can't add directory to tarball", e, directory.into()));
97            }
98            self.added_directories.insert(directory);
99        }
100        Ok(())
101    }
102
103    pub(crate) fn file<P: AsRef<Path>>(&mut self, path: P, out_data: &[u8], chmod: u32) -> CDResult<()> {
104        self.file_(path.as_ref(), out_data, chmod)
105    }
106
107    fn file_(&mut self, path: &Path, out_data: &[u8], chmod: u32) -> CDResult<()> {
108        debug_assert!(path.is_relative());
109        self.add_parent_directories(path)?;
110
111        let mut header = self.header_for_path(path, false)
112            .map_err(|e| CargoDebError::IoFile("Can't set header path", e, path.into()))?;
113        header.set_mtime(self.time);
114        header.set_mode(chmod);
115        header.set_size(out_data.len() as u64);
116        header.set_cksum();
117        self.tar.append(&header, out_data)
118            .map_err(|e| CargoDebError::IoFile("Can't add file to tarball", e, path.into()))?;
119        Ok(())
120    }
121
122    pub(crate) fn symlink(&mut self, path: &Path, link_name: &Path) -> CDResult<()> {
123        debug_assert!(path.is_relative());
124        self.add_parent_directories(path.as_ref())?;
125
126        let mut header = self.header_for_path(path, false)
127            .map_err(|e| CargoDebError::IoFile("Can't set header path", e, path.into()))?;
128        header.set_mtime(self.time);
129        header.set_entry_type(EntryType::Symlink);
130        header.set_size(0);
131        header.set_mode(0o777);
132        header.set_link_name(link_name)
133            .map_err(|e| CargoDebError::IoFile("Can't set header link name", e, path.into()))?;
134        header.set_cksum();
135        self.tar.append(&header, &mut io::empty())
136            .map_err(|e| CargoDebError::IoFile("Can't add symlink to tarball", e, path.into()))?;
137        Ok(())
138    }
139
140    #[inline]
141    fn header_for_path(&mut self, path: &Path, is_dir: bool) -> io::Result<TarHeader> {
142        debug_assert!(path.is_relative());
143        let path_bytes = path.to_bytes();
144
145        let mut header = if path_bytes.len() < 98 {
146            TarHeader::new_old()
147        } else {
148            TarHeader::new_gnu()
149        };
150        self.set_header_path(&mut header, path_bytes, is_dir)?;
151        Ok(header)
152    }
153
154    #[inline(never)]
155    fn set_header_path(&mut self, header: &mut TarHeader, path_bytes: &[u8], is_dir: bool) -> io::Result<()> {
156        debug_assert!(is_dir || path_bytes.last() != Some(&b'/'));
157        let needs_slash = is_dir && path_bytes.last() != Some(&b'/');
158
159        const PREFIX: &[u8] = b"./";
160        let (prefix, path_slot) = header.as_old_mut().name.split_at_mut(PREFIX.len());
161        prefix.copy_from_slice(PREFIX);
162        let (path_slot, zero) = path_slot.split_at_mut(path_bytes.len().min(path_slot.len()));
163        path_slot.copy_from_slice(&path_bytes[..path_slot.len()]);
164        if cfg!(target_os = "windows") {
165            for b in path_slot {
166                if *b == b'\\' { *b = b'/' }
167            }
168        }
169
170        if let Some((t, rest)) = zero.split_first_mut() {
171            if !needs_slash {
172                *t = 0;
173                return Ok(());
174            }
175            if let Some(t2) = rest.first_mut() {
176                // Lintian insists on dir paths ending with /, which Rust doesn't
177                *t = b'/';
178                *t2 = 0;
179                return Ok(());
180            }
181        }
182
183        // GNU long name extension, copied from
184        // https://github.com/alexcrichton/tar-rs/blob/a1c3036af48fa02437909112239f0632e4cfcfae/src/builder.rs#L731-L744
185        let mut header = TarHeader::new_gnu();
186        const LONG_LINK: &[u8] = b"././@LongLink\0";
187        header.as_gnu_mut().ok_or(io::ErrorKind::Other)?
188            .name[..LONG_LINK.len()].copy_from_slice(LONG_LINK);
189        header.set_mode(0o644);
190        header.set_uid(0);
191        header.set_gid(0);
192        header.set_mtime(0);
193        // include \0 in len to be compliant with GNU tar
194        let suffix = b"/\0";
195        let suffix = if needs_slash { &suffix[..] } else { &suffix[1..] };
196        header.set_size((PREFIX.len() + path_bytes.len() + suffix.len()) as u64);
197        header.set_entry_type(EntryType::new(b'L'));
198        header.set_cksum();
199        self.tar.append(&header, PREFIX.chain(path_bytes).chain(suffix))
200    }
201
202    fn flush(&mut self) -> io::Result<()> {
203        self.tar.get_mut().flush()
204    }
205
206    pub fn into_inner(self) -> io::Result<W> {
207        self.tar.into_inner()
208    }
209}
210
211fn normalize_link_name(target_path: &Path, link_name: &Path) -> Option<PathBuf> {
212    // normalize symlinks according to https://www.debian.org/doc/debian-policy/ch-files.html#symbolic-links 
213    // like dh_link https://manpages.debian.org/testing/debhelper/dh_link.1.en.html#DESCRIPTION
214
215    
216    let normalized_target_path = join_lexically("/".as_ref(), target_path)?;
217
218    let target_parent = normalized_target_path.parent().expect("the root path is an invalid target");
219
220    let resolved_link = join_lexically(target_parent, link_name)?;
221
222    // normalized_target_path and resolved_link are now absolute and don't contain /./ or /../ components
223
224    let mut target_components = target_parent.components();
225    let mut link_components = resolved_link.components();
226
227    if target_components.nth(1) != link_components.nth(1) {
228        // the paths differ in the top level folder (after the root dir) so the link must be absolute
229        return Some(resolved_link);
230    }
231
232    let mut link = PathBuf::new();
233
234    loop {
235        let next_target = target_components.next();
236        let next_link = link_components.next();
237
238        match (next_target, next_link) {
239            (None, None) => break Some(link),
240            (None, Some(comp)) => {
241                link.push(comp);
242                link.extend(link_components);
243                break Some(link);
244                
245            },
246            (Some(_), None) => {
247                for _ in 0..=target_components.count() {
248                    link = AsRef::<Path>::as_ref("..").join(link)
249                }
250                break Some(link);
251            },
252            (Some(l), Some(r)) => {
253                if l == r {
254                    continue;
255                }
256
257                for _ in 0..=(&mut target_components).count()  {
258                    link = AsRef::<Path>::as_ref("..").join(link)
259                }
260
261                link.push(r);
262            },
263        }
264    }
265}
266
267// Join the two paths while normalizing them lexically, so that the final path contains no /./ or /../ components
268// Assumes that base is already lexically normalized.
269// returns None if we at some point we attempted to ascend beyond the first component of base
270fn join_lexically(base: &Path, adjoint_path: &Path) -> Option<PathBuf> {
271    let mut resolved_link = base.to_path_buf();
272    for comp in adjoint_path.components() {
273        match comp {
274            Component::Prefix(_) => unreachable!(),
275            Component::RootDir => {
276                resolved_link = PathBuf::from("/");
277            },
278            Component::CurDir => {},
279            Component::ParentDir => {
280                
281                if !resolved_link.pop() {
282                    return None;
283                }
284            },
285            Component::Normal(os_str) => {
286                resolved_link.push(os_str);
287            },
288        }
289    }
290    Some(resolved_link)
291}
292
293#[test]
294fn normalized_links() {
295    let examples = [
296        ("usr/lib/foo", "/usr/share/bar", Some("../share/bar")),
297        ("usr/lib/foo", "/usr/share/./bar", Some("../share/bar")),
298        ("usr/lib/foo", "/usr/share/foo/../bar", Some("../share/bar")),
299        ("usr/lib/foo", "/var/lib/foo/../bar", Some("/var/lib/bar")),
300        ("usr/lib/foo", "/var/lib/foo/./bar", Some("/var/lib/foo/bar")),
301        ("var/run", "/run", Some("/run")),
302        ("usr/share/foo", "../../../var/lib/baz", None),
303        ("usr/share/foo", "../../var/lib/baz", Some("/var/lib/baz")),
304        ("usr/share/foo", "../../usr/lib/baz", Some("../lib/baz")),
305    ];
306
307    for (target, link_name, result) in examples {
308        assert_eq!(normalize_link_name(target.as_ref(), link_name.as_ref()).as_deref(), result.map(AsRef::<Path>::as_ref), "{target} -> {link_name} should normalize to {result:?}")
309    }
310}
311
312fn log_asset(asset: &Asset, log_display_base_dir: &Path, listener: &dyn Listener) {
313    let operation = if let AssetSource::Symlink(_) = &asset.source {
314        "Linking"
315    } else {
316        "Adding"
317    };
318    let mut log_line = format!("'{}' {}-> {}",
319        asset.processed_from.as_ref().and_then(|p| p.original_path.as_deref()).or(asset.source.source_path())
320            .map(|p| p.strip_prefix(log_display_base_dir).unwrap_or(p))
321            .unwrap_or_else(|| Path::new("-")).display(),
322        asset.processed_from.as_ref().map(|p| p.action).unwrap_or_default(),
323        asset.c.target_path.display()
324    );
325    if let Some(len) = asset.source.file_size() {
326        let (size, unit) = human_size(len);
327        use std::fmt::Write;
328        let _ = write!(&mut log_line, " ({size}{unit})");
329    }
330    listener.progress(operation, log_line);
331}
332
333fn human_size(len: u64) -> (u64, &'static str) {
334    if len < 1000 {
335        return (len, "B");
336    }
337    if len < 1_000_000 {
338        return (len.div_ceil(1000), "KB");
339    }
340    (len.div_ceil(1_000_000), "MB")
341}
342
343#[cfg(test)]
344mod tests {
345    use super::Tarball;
346    use std::{io::{Cursor, Read}, path::Path};
347    use tar::{Archive, EntryType};
348
349    struct ExpectedEntry<'a> {
350        path: &'a str,
351        entry_type: EntryType,
352        mode: u32,
353        check: Option<Box<dyn Fn(&mut tar::Entry<Cursor<Vec<u8>>>) + 'a>>,
354    }
355
356    impl<'a> ExpectedEntry<'a> {
357        fn with_check<F>(mut self, check: F) -> Self
358            where F: Fn(&mut tar::Entry<Cursor<Vec<u8>>>) + 'a
359        {
360            self.check = Some(Box::new(check));
361            self
362        }
363    }
364
365    fn expected_entry(path: &str, entry_type: EntryType, mode: u32) -> ExpectedEntry<'_> {
366        ExpectedEntry { path, entry_type, mode, check: None }
367    }
368
369    fn check_tarball_content(tarball: Vec<u8>, expected_entries: &[ExpectedEntry]) {
370        let cursor = Cursor::new(tarball);
371        let mut archive = Archive::new(cursor);
372        let mut entries = archive.entries().unwrap();
373        let mut expected_entries = expected_entries.iter();
374        loop {
375            let (entry_result, expected_entry) = match (entries.next(), expected_entries.next()) {
376                (Some(entry_result), Some(expected_entry)) => (entry_result, expected_entry),
377                (None, None) => break,
378                _ => panic!("mismatched number of entries"),
379            };
380            let mut entry = entry_result.unwrap();
381            let path = entry.path().unwrap().to_string_lossy().to_string();
382            let entry_type = entry.header().entry_type();
383            let mode = entry.header().mode().unwrap();
384            let mtime = entry.header().mtime().unwrap();
385            assert_eq!(path.strip_prefix("./").unwrap(), expected_entry.path);
386            assert_eq!(entry_type, expected_entry.entry_type);
387            assert_eq!(mode, expected_entry.mode);
388            assert_eq!(mtime, 1234567890);
389            if let Some(check) = &expected_entry.check {
390                check(&mut entry);
391            }
392        }
393    }
394
395    #[test]
396    fn basic() {
397        let buffer = Vec::new();
398        let mut tarball = Tarball::new(buffer, 1234567890);
399        let file_content = b"Hello, world!";
400        tarball.file("test/file.txt", file_content, 0o644).unwrap();
401        let script_content = b"#!/bin/bash\necho 'test'";
402        tarball.file("usr/bin/script", script_content, 0o755).unwrap();
403        tarball.symlink(Path::new("usr/bin/link"), Path::new("script")).unwrap();
404
405        let buffer = tarball.into_inner().unwrap();
406        check_tarball_content(buffer, &[
407            expected_entry("test/", EntryType::Directory, 0o755),
408            expected_entry("test/file.txt", EntryType::Regular, 0o644).with_check(|entry| {
409                let mut content = Vec::new();
410                entry.read_to_end(&mut content).unwrap();
411                assert_eq!(content, file_content);
412            }),
413            expected_entry("usr/", EntryType::Directory, 0o755),
414            expected_entry("usr/bin/", EntryType::Directory, 0o755),
415            expected_entry("usr/bin/script", EntryType::Regular, 0o755).with_check(|entry| {
416                let mut content = Vec::new();
417                entry.read_to_end(&mut content).unwrap();
418                assert_eq!(content, script_content);
419            }),
420            expected_entry("usr/bin/link", EntryType::Symlink, 0o777).with_check(|entry| {
421                let link_name = entry.header().link_name().unwrap().unwrap();
422                assert_eq!(link_name.to_string_lossy(), "script");
423            }),
424        ]);
425    }
426
427    #[test]
428    fn long_path() {
429        let buffer = Vec::new();
430        let mut tarball = Tarball::new(buffer, 1234567890);
431
432        tarball.file("a.txt", b"start", 0o644).unwrap();
433        let level = "long/";
434        let deep_path = level.repeat(25) + "file.txt";
435        tarball.file(&deep_path, b"long path", 0o644).unwrap();
436        let long_filename = "very_".repeat(25) + "long_filename.txt";
437        tarball.file(&long_filename, b"long filename", 0o644).unwrap();
438        tarball.file("b.txt", b"end", 0o644).unwrap();
439        let buffer = tarball.into_inner().unwrap();
440
441        let mut expected_entries = vec![expected_entry("a.txt", EntryType::Regular, 0o644)];
442        expected_entries.extend((1..=25).map(|i| expected_entry(&deep_path[..i * level.len()], EntryType::Directory, 0o755)));
443        expected_entries.extend([
444            expected_entry(&deep_path, EntryType::Regular, 0o644),
445            expected_entry(&long_filename, EntryType::Regular, 0o644),
446            expected_entry("b.txt", EntryType::Regular, 0o644),
447        ]);
448        check_tarball_content(buffer, &expected_entries);
449    }
450}