Skip to main content

composefs_oci/
tar.rs

1use std::{
2    cell::RefCell,
3    collections::BTreeMap,
4    ffi::{OsStr, OsString},
5    fmt,
6    io::Read,
7    os::unix::prelude::{OsStrExt, OsStringExt},
8    path::PathBuf,
9};
10
11use anyhow::{bail, ensure, Result};
12use rustix::fs::makedev;
13use tar::{EntryType, Header, PaxExtensions};
14use tokio::io::{AsyncRead, AsyncReadExt};
15
16use composefs::{
17    dumpfile,
18    fsverity::FsVerityHashValue,
19    splitstream::{SplitStreamData, SplitStreamReader, SplitStreamWriter},
20    tree::{LeafContent, RegularFile, Stat},
21    util::{read_exactish, read_exactish_async},
22    INLINE_CONTENT_MAX,
23};
24
25fn read_header<R: Read>(reader: &mut R) -> Result<Option<Header>> {
26    let mut header = Header::new_gnu();
27    if read_exactish(reader, header.as_mut_bytes())? {
28        Ok(Some(header))
29    } else {
30        Ok(None)
31    }
32}
33
34async fn read_header_async(reader: &mut (impl AsyncRead + Unpin)) -> Result<Option<Header>> {
35    let mut header = Header::new_gnu();
36    if read_exactish_async(reader, header.as_mut_bytes()).await? {
37        Ok(Some(header))
38    } else {
39        Ok(None)
40    }
41}
42
43/// Splits the tar file from tar_stream into a Split Stream.  The store_data function is
44/// responsible for ensuring that "external data" is in the composefs repository and returns the
45/// fsverity hash value of that data.
46pub fn split(
47    tar_stream: &mut impl Read,
48    writer: &mut SplitStreamWriter<impl FsVerityHashValue>,
49) -> Result<()> {
50    while let Some(header) = read_header(tar_stream)? {
51        // the header always gets stored as inline data
52        writer.write_inline(header.as_bytes());
53
54        if header.as_bytes() == &[0u8; 512] {
55            continue;
56        }
57
58        // read the corresponding data, if there is any
59        let actual_size = header.entry_size()? as usize;
60        let storage_size = (actual_size + 511) & !511;
61        let mut buffer = vec![0u8; storage_size];
62        tar_stream.read_exact(&mut buffer)?;
63
64        if header.entry_type() == EntryType::Regular && actual_size > INLINE_CONTENT_MAX {
65            // non-empty regular file: store the data in the object store
66            let padding = buffer.split_off(actual_size);
67            writer.write_external(&buffer, padding)?;
68        } else {
69            // else: store the data inline in the split stream
70            writer.write_inline(&buffer);
71        }
72    }
73    Ok(())
74}
75
76pub async fn split_async(
77    mut tar_stream: impl AsyncRead + Unpin,
78    writer: &mut SplitStreamWriter<impl FsVerityHashValue>,
79) -> Result<()> {
80    while let Some(header) = read_header_async(&mut tar_stream).await? {
81        // the header always gets stored as inline data
82        writer.write_inline(header.as_bytes());
83
84        if header.as_bytes() == &[0u8; 512] {
85            continue;
86        }
87
88        // read the corresponding data, if there is any
89        let actual_size = header.entry_size()? as usize;
90        let storage_size = (actual_size + 511) & !511;
91        let mut buffer = vec![0u8; storage_size];
92        tar_stream.read_exact(&mut buffer).await?;
93
94        if header.entry_type() == EntryType::Regular && actual_size > INLINE_CONTENT_MAX {
95            // non-empty regular file: store the data in the object store
96            let padding = buffer.split_off(actual_size);
97            writer.write_external_async(buffer, padding).await?;
98        } else {
99            // else: store the data inline in the split stream
100            writer.write_inline(&buffer);
101        }
102    }
103    Ok(())
104}
105
106#[derive(Debug)]
107pub enum TarItem<ObjectID: FsVerityHashValue> {
108    Directory,
109    Leaf(LeafContent<ObjectID>),
110    Hardlink(OsString),
111}
112
113#[derive(Debug)]
114pub struct TarEntry<ObjectID: FsVerityHashValue> {
115    pub path: PathBuf,
116    pub stat: Stat,
117    pub item: TarItem<ObjectID>,
118}
119
120impl<ObjectID: FsVerityHashValue> fmt::Display for TarEntry<ObjectID> {
121    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
122        match self.item {
123            TarItem::Hardlink(ref target) => dumpfile::write_hardlink(fmt, &self.path, target),
124            TarItem::Directory => dumpfile::write_directory(fmt, &self.path, &self.stat, 1),
125            TarItem::Leaf(ref content) => {
126                dumpfile::write_leaf(fmt, &self.path, &self.stat, content, 1)
127            }
128        }
129    }
130}
131
132fn path_from_tar(pax: Option<Box<[u8]>>, gnu: Vec<u8>, short: &[u8]) -> PathBuf {
133    // Prepend leading /
134    let mut path = vec![b'/'];
135    if let Some(name) = pax {
136        path.extend(name);
137    } else if !gnu.is_empty() {
138        path.extend(gnu);
139    } else {
140        path.extend(short);
141    }
142
143    // Drop trailing '/' characters in case of directories.
144    // https://github.com/rust-lang/rust/issues/122741
145    // path.pop_if(|x| x == &b'/');
146    if path.last() == Some(&b'/') {
147        path.pop(); // this is Vec<u8>, so that's a single char.
148    }
149
150    PathBuf::from(OsString::from_vec(path))
151}
152
153fn symlink_target_from_tar(pax: Option<Box<[u8]>>, gnu: Vec<u8>, short: &[u8]) -> Box<OsStr> {
154    if let Some(name) = pax {
155        OsStr::from_bytes(name.as_ref()).into()
156    } else if !gnu.is_empty() {
157        OsStr::from_bytes(&gnu).into()
158    } else {
159        OsStr::from_bytes(short).into()
160    }
161}
162
163pub fn get_entry<R: Read, ObjectID: FsVerityHashValue>(
164    reader: &mut SplitStreamReader<R, ObjectID>,
165) -> Result<Option<TarEntry<ObjectID>>> {
166    let mut gnu_longlink: Vec<u8> = vec![];
167    let mut gnu_longname: Vec<u8> = vec![];
168    let mut pax_longlink: Option<Box<[u8]>> = None;
169    let mut pax_longname: Option<Box<[u8]>> = None;
170    let mut xattrs = BTreeMap::new();
171
172    loop {
173        let mut buf = [0u8; 512];
174        if !reader.read_inline_exact(&mut buf)? || buf == [0u8; 512] {
175            return Ok(None);
176        }
177
178        let header = tar::Header::from_byte_slice(&buf);
179
180        let size = header.entry_size()?;
181
182        let item = match reader.read_exact(size as usize, ((size + 511) & !511) as usize)? {
183            SplitStreamData::External(id) => match header.entry_type() {
184                EntryType::Regular | EntryType::Continuous => {
185                    ensure!(
186                        size as usize > INLINE_CONTENT_MAX,
187                        "Splitstream incorrectly stored a small ({size} byte) file external"
188                    );
189                    TarItem::Leaf(LeafContent::Regular(RegularFile::External(id, size)))
190                }
191                _ => bail!("Unsupported external-chunked entry {header:?} {id:?}"),
192            },
193            SplitStreamData::Inline(content) => match header.entry_type() {
194                EntryType::GNULongLink => {
195                    gnu_longlink.extend(content);
196                    continue;
197                }
198                EntryType::GNULongName => {
199                    gnu_longname.extend(content);
200                    continue;
201                }
202                EntryType::XGlobalHeader => {
203                    todo!();
204                }
205                EntryType::XHeader => {
206                    for item in PaxExtensions::new(&content) {
207                        let extension = item?;
208                        let key = extension.key()?;
209                        let value = Box::from(extension.value_bytes());
210
211                        if key == "path" {
212                            pax_longname = Some(value);
213                        } else if key == "linkpath" {
214                            pax_longlink = Some(value);
215                        } else if let Some(xattr) = key.strip_prefix("SCHILY.xattr.") {
216                            xattrs.insert(Box::from(OsStr::new(xattr)), value);
217                        }
218                    }
219                    continue;
220                }
221                EntryType::Directory => TarItem::Directory,
222                EntryType::Regular | EntryType::Continuous => {
223                    ensure!(
224                        content.len() <= INLINE_CONTENT_MAX,
225                        "Splitstream incorrectly stored a large ({} byte) file inline",
226                        content.len()
227                    );
228                    TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(content)))
229                }
230                EntryType::Link => TarItem::Hardlink({
231                    let Some(link_name) = header.link_name_bytes() else {
232                        bail!("link without a name?")
233                    };
234                    OsString::from(path_from_tar(pax_longlink, gnu_longlink, &link_name))
235                }),
236                EntryType::Symlink => TarItem::Leaf(LeafContent::Symlink({
237                    let Some(link_name) = header.link_name_bytes() else {
238                        bail!("symlink without a name?")
239                    };
240                    symlink_target_from_tar(pax_longlink, gnu_longlink, &link_name)
241                })),
242                EntryType::Block => TarItem::Leaf(LeafContent::BlockDevice(
243                    match (header.device_major()?, header.device_minor()?) {
244                        (Some(major), Some(minor)) => makedev(major, minor),
245                        _ => bail!("Device entry without device numbers?"),
246                    },
247                )),
248                EntryType::Char => TarItem::Leaf(LeafContent::CharacterDevice(
249                    match (header.device_major()?, header.device_minor()?) {
250                        (Some(major), Some(minor)) => makedev(major, minor),
251                        _ => bail!("Device entry without device numbers?"),
252                    },
253                )),
254                EntryType::Fifo => TarItem::Leaf(LeafContent::Fifo),
255                _ => {
256                    todo!("Unsupported entry {:?}", header);
257                }
258            },
259        };
260
261        return Ok(Some(TarEntry {
262            path: path_from_tar(pax_longname, gnu_longname, &header.path_bytes()),
263            stat: Stat {
264                st_uid: header.uid()? as u32,
265                st_gid: header.gid()? as u32,
266                st_mode: header.mode()?,
267                st_mtim_sec: header.mtime()? as i64,
268                xattrs: RefCell::new(xattrs),
269            },
270            item,
271        }));
272    }
273}