1use std::{
2 cell::RefCell,
3 collections::BTreeMap,
4 ffi::{OsStr, OsString},
5 fmt,
6 io::Read,
7 os::unix::prelude::{OsStrExt, OsStringExt},
8 path::PathBuf,
9};
10
11use anyhow::{bail, ensure, Result};
12use rustix::fs::makedev;
13use tar::{EntryType, Header, PaxExtensions};
14use tokio::io::{AsyncRead, AsyncReadExt};
15
16use composefs::{
17 dumpfile,
18 fsverity::FsVerityHashValue,
19 splitstream::{SplitStreamData, SplitStreamReader, SplitStreamWriter},
20 tree::{LeafContent, RegularFile, Stat},
21 util::{read_exactish, read_exactish_async},
22 INLINE_CONTENT_MAX,
23};
24
25fn read_header<R: Read>(reader: &mut R) -> Result<Option<Header>> {
26 let mut header = Header::new_gnu();
27 if read_exactish(reader, header.as_mut_bytes())? {
28 Ok(Some(header))
29 } else {
30 Ok(None)
31 }
32}
33
34async fn read_header_async(reader: &mut (impl AsyncRead + Unpin)) -> Result<Option<Header>> {
35 let mut header = Header::new_gnu();
36 if read_exactish_async(reader, header.as_mut_bytes()).await? {
37 Ok(Some(header))
38 } else {
39 Ok(None)
40 }
41}
42
43pub fn split(
47 tar_stream: &mut impl Read,
48 writer: &mut SplitStreamWriter<impl FsVerityHashValue>,
49) -> Result<()> {
50 while let Some(header) = read_header(tar_stream)? {
51 writer.write_inline(header.as_bytes());
53
54 if header.as_bytes() == &[0u8; 512] {
55 continue;
56 }
57
58 let actual_size = header.entry_size()? as usize;
60 let storage_size = (actual_size + 511) & !511;
61 let mut buffer = vec![0u8; storage_size];
62 tar_stream.read_exact(&mut buffer)?;
63
64 if header.entry_type() == EntryType::Regular && actual_size > INLINE_CONTENT_MAX {
65 let padding = buffer.split_off(actual_size);
67 writer.write_external(&buffer, padding)?;
68 } else {
69 writer.write_inline(&buffer);
71 }
72 }
73 Ok(())
74}
75
76pub async fn split_async(
77 mut tar_stream: impl AsyncRead + Unpin,
78 writer: &mut SplitStreamWriter<impl FsVerityHashValue>,
79) -> Result<()> {
80 while let Some(header) = read_header_async(&mut tar_stream).await? {
81 writer.write_inline(header.as_bytes());
83
84 if header.as_bytes() == &[0u8; 512] {
85 continue;
86 }
87
88 let actual_size = header.entry_size()? as usize;
90 let storage_size = (actual_size + 511) & !511;
91 let mut buffer = vec![0u8; storage_size];
92 tar_stream.read_exact(&mut buffer).await?;
93
94 if header.entry_type() == EntryType::Regular && actual_size > INLINE_CONTENT_MAX {
95 let padding = buffer.split_off(actual_size);
97 writer.write_external_async(buffer, padding).await?;
98 } else {
99 writer.write_inline(&buffer);
101 }
102 }
103 Ok(())
104}
105
106#[derive(Debug)]
107pub enum TarItem<ObjectID: FsVerityHashValue> {
108 Directory,
109 Leaf(LeafContent<ObjectID>),
110 Hardlink(OsString),
111}
112
113#[derive(Debug)]
114pub struct TarEntry<ObjectID: FsVerityHashValue> {
115 pub path: PathBuf,
116 pub stat: Stat,
117 pub item: TarItem<ObjectID>,
118}
119
120impl<ObjectID: FsVerityHashValue> fmt::Display for TarEntry<ObjectID> {
121 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
122 match self.item {
123 TarItem::Hardlink(ref target) => dumpfile::write_hardlink(fmt, &self.path, target),
124 TarItem::Directory => dumpfile::write_directory(fmt, &self.path, &self.stat, 1),
125 TarItem::Leaf(ref content) => {
126 dumpfile::write_leaf(fmt, &self.path, &self.stat, content, 1)
127 }
128 }
129 }
130}
131
132fn path_from_tar(pax: Option<Box<[u8]>>, gnu: Vec<u8>, short: &[u8]) -> PathBuf {
133 let mut path = vec![b'/'];
135 if let Some(name) = pax {
136 path.extend(name);
137 } else if !gnu.is_empty() {
138 path.extend(gnu);
139 } else {
140 path.extend(short);
141 }
142
143 if path.last() == Some(&b'/') {
147 path.pop(); }
149
150 PathBuf::from(OsString::from_vec(path))
151}
152
153fn symlink_target_from_tar(pax: Option<Box<[u8]>>, gnu: Vec<u8>, short: &[u8]) -> Box<OsStr> {
154 if let Some(name) = pax {
155 OsStr::from_bytes(name.as_ref()).into()
156 } else if !gnu.is_empty() {
157 OsStr::from_bytes(&gnu).into()
158 } else {
159 OsStr::from_bytes(short).into()
160 }
161}
162
163pub fn get_entry<R: Read, ObjectID: FsVerityHashValue>(
164 reader: &mut SplitStreamReader<R, ObjectID>,
165) -> Result<Option<TarEntry<ObjectID>>> {
166 let mut gnu_longlink: Vec<u8> = vec![];
167 let mut gnu_longname: Vec<u8> = vec![];
168 let mut pax_longlink: Option<Box<[u8]>> = None;
169 let mut pax_longname: Option<Box<[u8]>> = None;
170 let mut xattrs = BTreeMap::new();
171
172 loop {
173 let mut buf = [0u8; 512];
174 if !reader.read_inline_exact(&mut buf)? || buf == [0u8; 512] {
175 return Ok(None);
176 }
177
178 let header = tar::Header::from_byte_slice(&buf);
179
180 let size = header.entry_size()?;
181
182 let item = match reader.read_exact(size as usize, ((size + 511) & !511) as usize)? {
183 SplitStreamData::External(id) => match header.entry_type() {
184 EntryType::Regular | EntryType::Continuous => {
185 ensure!(
186 size as usize > INLINE_CONTENT_MAX,
187 "Splitstream incorrectly stored a small ({size} byte) file external"
188 );
189 TarItem::Leaf(LeafContent::Regular(RegularFile::External(id, size)))
190 }
191 _ => bail!("Unsupported external-chunked entry {header:?} {id:?}"),
192 },
193 SplitStreamData::Inline(content) => match header.entry_type() {
194 EntryType::GNULongLink => {
195 gnu_longlink.extend(content);
196 continue;
197 }
198 EntryType::GNULongName => {
199 gnu_longname.extend(content);
200 continue;
201 }
202 EntryType::XGlobalHeader => {
203 todo!();
204 }
205 EntryType::XHeader => {
206 for item in PaxExtensions::new(&content) {
207 let extension = item?;
208 let key = extension.key()?;
209 let value = Box::from(extension.value_bytes());
210
211 if key == "path" {
212 pax_longname = Some(value);
213 } else if key == "linkpath" {
214 pax_longlink = Some(value);
215 } else if let Some(xattr) = key.strip_prefix("SCHILY.xattr.") {
216 xattrs.insert(Box::from(OsStr::new(xattr)), value);
217 }
218 }
219 continue;
220 }
221 EntryType::Directory => TarItem::Directory,
222 EntryType::Regular | EntryType::Continuous => {
223 ensure!(
224 content.len() <= INLINE_CONTENT_MAX,
225 "Splitstream incorrectly stored a large ({} byte) file inline",
226 content.len()
227 );
228 TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(content)))
229 }
230 EntryType::Link => TarItem::Hardlink({
231 let Some(link_name) = header.link_name_bytes() else {
232 bail!("link without a name?")
233 };
234 OsString::from(path_from_tar(pax_longlink, gnu_longlink, &link_name))
235 }),
236 EntryType::Symlink => TarItem::Leaf(LeafContent::Symlink({
237 let Some(link_name) = header.link_name_bytes() else {
238 bail!("symlink without a name?")
239 };
240 symlink_target_from_tar(pax_longlink, gnu_longlink, &link_name)
241 })),
242 EntryType::Block => TarItem::Leaf(LeafContent::BlockDevice(
243 match (header.device_major()?, header.device_minor()?) {
244 (Some(major), Some(minor)) => makedev(major, minor),
245 _ => bail!("Device entry without device numbers?"),
246 },
247 )),
248 EntryType::Char => TarItem::Leaf(LeafContent::CharacterDevice(
249 match (header.device_major()?, header.device_minor()?) {
250 (Some(major), Some(minor)) => makedev(major, minor),
251 _ => bail!("Device entry without device numbers?"),
252 },
253 )),
254 EntryType::Fifo => TarItem::Leaf(LeafContent::Fifo),
255 _ => {
256 todo!("Unsupported entry {:?}", header);
257 }
258 },
259 };
260
261 return Ok(Some(TarEntry {
262 path: path_from_tar(pax_longname, gnu_longname, &header.path_bytes()),
263 stat: Stat {
264 st_uid: header.uid()? as u32,
265 st_gid: header.gid()? as u32,
266 st_mode: header.mode()?,
267 st_mtim_sec: header.mtime()? as i64,
268 xattrs: RefCell::new(xattrs),
269 },
270 item,
271 }));
272 }
273}