archive_trait/lib.rs
1//! Format-neutral, asynchronous archive construction and extraction.
2//!
3//! Archive formats implement [`ArchiveBuilder`] and call
4//! [`ArchiveBuilder::builder`] to reuse high-level entry addition, recursive
5//! filesystem traversal, validation, and source streaming.
6//! Archive formats implement [`Archive`] by projecting their entries into
7//! [`Member`] values. The default [`Archive::extract_in`] implementation then
8//! applies common extraction policy and filesystem behavior.
9//!
10//! Extraction assumes unique access to the destination directory. Concurrent
11//! mutation of that directory is outside the threat model.
12
13pub mod builder;
14mod component_tree;
15pub mod extract;
16mod name;
17
18use std::{
19 io,
20 marker::PhantomData,
21 path::{Path, PathBuf},
22};
23
24use thiserror::Error;
25
26pub use builder::{
27 ArchiveBuilder, BuildError, Builder, EntryMetadata, FilePayload, TraversalError,
28};
29pub use name::{NameValidator, default_name_validator};
30
31/// Common metadata for one archive member.
32#[derive(Clone, Debug, Eq, PartialEq)]
33pub struct MemberMetadata {
34 /// The archive-relative member path before extraction normalization.
35 pub path: String,
36 /// The member's byte position in the source archive.
37 pub position: u64,
38}
39
40/// A special-file kind that generic extraction deliberately rejects.
41#[derive(Clone, Copy, Debug, Eq, PartialEq)]
42pub enum SpecialKind {
43 /// A character device.
44 CharacterDevice,
45 /// A block device.
46 BlockDevice,
47 /// A FIFO.
48 Fifo,
49}
50
51/// One format-neutral archive member.
52#[derive(Debug)]
53pub enum Member<P> {
54 /// A regular file with a streaming payload.
55 File {
56 /// Common member metadata.
57 metadata: MemberMetadata,
58 /// The logical, decoded payload size in bytes.
59 ///
60 /// `payload` must yield exactly this many bytes before completing
61 /// successfully. Format-level framing and compression are excluded.
62 size: u64,
63 /// Whether the archived mode carries executable intent.
64 executable: bool,
65 /// The streaming member payload.
66 payload: P,
67 },
68 /// A directory.
69 Directory {
70 /// Common member metadata.
71 metadata: MemberMetadata,
72 },
73 /// A symbolic link.
74 SymbolicLink {
75 /// Common member metadata.
76 metadata: MemberMetadata,
77 /// The archive-provided link target.
78 target: String,
79 },
80 /// A hard link, optionally followed by replacement payload bytes.
81 HardLink {
82 /// Common member metadata.
83 metadata: MemberMetadata,
84 /// The archive-provided link target.
85 target: String,
86 /// The logical, decoded replacement-payload size in bytes.
87 ///
88 /// `payload` must yield exactly this many bytes before completing
89 /// successfully. Format-level framing and compression are excluded.
90 size: u64,
91 /// The streaming member payload.
92 payload: P,
93 },
94 /// A parsed special file that cannot be extracted safely.
95 Special {
96 /// Common member metadata.
97 metadata: MemberMetadata,
98 /// The special-file kind.
99 kind: SpecialKind,
100 },
101}
102
103impl<P> Member<P> {
104 /// Returns this member's common metadata.
105 pub fn metadata(&self) -> &MemberMetadata {
106 match self {
107 Self::File { metadata, .. }
108 | Self::Directory { metadata }
109 | Self::SymbolicLink { metadata, .. }
110 | Self::HardLink { metadata, .. }
111 | Self::Special { metadata, .. } => metadata,
112 }
113 }
114
115 fn lend_payload<'a>(self) -> Member<LentPayload<'a, P>> {
116 match self {
117 Self::File {
118 metadata,
119 size,
120 executable,
121 payload,
122 } => Member::File {
123 metadata,
124 size,
125 executable,
126 payload: LentPayload::new(payload),
127 },
128 Self::Directory { metadata } => Member::Directory { metadata },
129 Self::SymbolicLink { metadata, target } => Member::SymbolicLink { metadata, target },
130 Self::HardLink {
131 metadata,
132 target,
133 size,
134 payload,
135 } => Member::HardLink {
136 metadata,
137 target,
138 size,
139 payload: LentPayload::new(payload),
140 },
141 Self::Special { metadata, kind } => Member::Special { metadata, kind },
142 }
143 }
144}
145
146/// A streaming cursor over one archive member's payload.
147#[expect(
148 async_fn_in_trait,
149 reason = "payload readers may be !Send and run on a local executor"
150)]
151pub trait MemberPayload: Sized {
152 /// The archive-format error returned while reading the payload.
153 type Error;
154
155 /// Reads the next validated, logical payload chunk into a reusable buffer.
156 ///
157 /// Returns `true` after replacing `buffer` with a nonempty chunk. Returns
158 /// `false` only after the payload has been fully consumed and validated,
159 /// leaving `buffer` unchanged so its initialized storage can be reused.
160 /// Callers should not clear `buffer` between calls. Implementations may
161 /// return chunks shorter than `target_len`.
162 ///
163 /// Successful chunks contain decoded member contents rather than stored or
164 /// compressed bytes. Their total length must equal the `size` declared by
165 /// the enclosing [`Member`]; a mismatch must produce an error.
166 async fn next_chunk(
167 &mut self,
168 buffer: &mut Vec<u8>,
169 target_len: usize,
170 ) -> Result<bool, Self::Error>;
171
172 /// Discards and validates all remaining payload bytes.
173 async fn skip(self) -> Result<(), Self::Error>;
174}
175
176/// A member payload that keeps its lending [`Members`] cursor borrowed.
177///
178/// This wrapper is returned by [`Members::next`]. Its private fields prevent a
179/// payload whose concrete type does not itself borrow the archive from being
180/// detached from the cursor lifetime.
181#[derive(Debug)]
182pub struct LentPayload<'a, P> {
183 payload: P,
184 cursor: PhantomData<&'a mut ()>,
185}
186
187impl<P> LentPayload<'_, P> {
188 fn new(payload: P) -> Self {
189 Self {
190 payload,
191 cursor: PhantomData,
192 }
193 }
194}
195
196impl<P: MemberPayload> MemberPayload for LentPayload<'_, P> {
197 type Error = P::Error;
198
199 async fn next_chunk(
200 &mut self,
201 buffer: &mut Vec<u8>,
202 target_len: usize,
203 ) -> Result<bool, Self::Error> {
204 self.payload.next_chunk(buffer, target_len).await
205 }
206
207 async fn skip(self) -> Result<(), Self::Error> {
208 self.payload.skip().await
209 }
210}
211
212/// A consuming, lending member cursor.
213pub struct Members<A> {
214 archive: A,
215}
216
217impl<A: Archive> Members<A> {
218 /// Returns the next archive member.
219 ///
220 /// The returned payload borrows this cursor, so the cursor cannot advance
221 /// until that member is dropped or consumed.
222 pub async fn next<'a>(
223 &'a mut self,
224 ) -> Result<Option<Member<LentPayload<'a, A::Payload<'a>>>>, A::Error> {
225 Ok(self.archive.next_member().await?.map(Member::lend_payload))
226 }
227}
228
229/// A one-pass archive that can enumerate and extract format-neutral members.
230#[expect(
231 async_fn_in_trait,
232 reason = "archive readers may be !Send and run on a local executor"
233)]
234pub trait Archive: Sized {
235 /// The archive-format error returned during member iteration.
236 type Error;
237 /// The streaming payload type lent by each file member.
238 type Payload<'a>: MemberPayload<Error = Self::Error>
239 where
240 Self: 'a;
241
242 /// Reads the next format-neutral member for [`Members::next`].
243 ///
244 /// Implementations must drain and validate an unfinished preceding payload
245 /// before returning another member. Archive consumers should use
246 /// [`Archive::members`] rather than call this hook directly: [`Members`]
247 /// wraps each payload in [`LentPayload`] to enforce the lending cursor
248 /// contract even when a concrete payload type does not retain its lifetime.
249 async fn next_member<'a>(
250 &'a mut self,
251 ) -> Result<Option<Member<Self::Payload<'a>>>, Self::Error>;
252
253 /// Consumes this archive and returns its lending member cursor.
254 fn members(self) -> Members<Self> {
255 Members { archive: self }
256 }
257
258 /// Securely extracts this archive beneath `destination` under `policy`.
259 ///
260 /// `destination` is created if it does not already exist. Symbolic links
261 /// are preserved by default on platforms that support native creation;
262 /// hard links require explicit opt-in through [`extract::LinkPolicy`].
263 ///
264 /// Archived Unix permission modes are normalized rather than restored. New
265 /// regular files are created with mode `0o777` when executable intent is
266 /// set and `0o666` otherwise, in both cases filtered by the process umask.
267 /// Directories use the platform's default creation mode, and special mode
268 /// bits are not restored. Ownership and timestamps are likewise determined
269 /// by extraction activity rather than archived metadata.
270 ///
271 /// **IMPORTANT**: `destination` must not be concurrently modified during
272 /// extraction. No correctness or isolation guarantees are made under
273 /// external mutation.
274 ///
275 /// Extraction is streamwise: a late error can leave a partially extracted
276 /// destination. Callers requiring all-or-nothing behavior should extract
277 /// into a new temporary directory and atomically rename it afterward.
278 async fn extract_in<P: AsRef<Path>>(
279 self,
280 destination: P,
281 policy: extract::ExtractPolicy,
282 ) -> Result<(), ExtractError<Self::Error>> {
283 extract::extract(self.members(), destination.as_ref(), policy).await
284 }
285}
286
287/// A valid member feature rejected by the selected [`extract::ExtractPolicy`].
288#[derive(Clone, Debug, Eq, PartialEq, Error)]
289pub enum ExtractPolicyViolation {
290 /// An effective member name or link target was rejected.
291 #[error("archive {context} rejected by name policy: {value:?}")]
292 NameRejected {
293 /// The role of the rejected archive text.
294 context: &'static str,
295 /// The rejected UTF-8 value.
296 value: String,
297 },
298 /// A symbolic-link member appeared when links are forbidden.
299 #[error("symbolic-link members are not allowed")]
300 SymbolicLink,
301 /// A symbolic-link member requires native creation on an unsupported platform.
302 #[error("native symbolic-link creation is not supported on this platform")]
303 NativeSymlinkCreationUnsupported,
304 /// A hard-link member appeared when links are forbidden.
305 #[error("hard-link members are not allowed")]
306 HardLink,
307}
308
309/// An error produced while securely extracting an archive.
310#[derive(Debug, Error)]
311pub enum ExtractError<E> {
312 /// Reading or decoding the underlying archive failed.
313 #[error(transparent)]
314 Archive(E),
315 /// A destination filesystem operation failed.
316 #[error("failed to {operation} {path}: {source}")]
317 Filesystem {
318 /// The operation that failed.
319 operation: &'static str,
320 /// The path involved in the failed operation.
321 path: PathBuf,
322 /// The underlying I/O error.
323 #[source]
324 source: io::Error,
325 },
326 /// A blocking extraction operation failed to complete.
327 #[error("failed to complete blocking extraction operation: {0}")]
328 BlockingTask(#[from] tokio::task::JoinError),
329 /// An archive member path or link value is unsafe to extract.
330 #[error("at byte {position}: unsafe {context} {value:?}: {reason}")]
331 UnsafePath {
332 /// Source member position.
333 position: u64,
334 /// Whether this is a member path or link target.
335 context: &'static str,
336 /// Archive-provided value.
337 value: String,
338 /// Rejection reason.
339 reason: &'static str,
340 },
341 /// An archive entry collides with a path that cannot be replaced.
342 #[error("archive entry collides with existing path {path}")]
343 PathCollision {
344 /// Normalized extraction-relative path.
345 path: PathBuf,
346 },
347 /// A special member kind is deliberately excluded from extraction.
348 #[error("at byte {position}: cannot extract unsupported member type {kind:?} at {path}")]
349 UnsupportedMember {
350 /// Source member position.
351 position: u64,
352 /// Normalized extraction-relative path.
353 path: PathBuf,
354 /// Unsupported special-file kind.
355 kind: SpecialKind,
356 },
357 /// A symbolic or hard link cannot be safely resolved.
358 #[error("at byte {position}: invalid link {path} -> {target:?}: {reason}")]
359 InvalidLink {
360 /// Source member position.
361 position: u64,
362 /// Normalized link path.
363 path: PathBuf,
364 /// Archive-provided or normalized link target.
365 target: String,
366 /// Rejection reason.
367 reason: &'static str,
368 },
369 /// A structurally valid member was rejected by extraction policy.
370 #[error("at byte {position}: extraction policy rejected input: {violation}")]
371 PolicyViolation {
372 /// Source member position.
373 position: u64,
374 /// The selected policy rule that rejected the member.
375 violation: ExtractPolicyViolation,
376 },
377}
378
379impl<E> ExtractError<E> {
380 fn policy_violation(position: u64, violation: ExtractPolicyViolation) -> Self {
381 Self::PolicyViolation {
382 position,
383 violation,
384 }
385 }
386
387 fn invalid_link(position: u64, path: PathBuf, target: String, reason: &'static str) -> Self {
388 Self::InvalidLink {
389 position,
390 path,
391 target,
392 reason,
393 }
394 }
395
396 fn unsafe_path(
397 position: u64,
398 context: &'static str,
399 value: &str,
400 reason: &'static str,
401 ) -> Self {
402 Self::UnsafePath {
403 position,
404 context,
405 value: value.to_owned(),
406 reason,
407 }
408 }
409
410 fn filesystem(operation: &'static str, path: PathBuf, source: io::Error) -> Self {
411 Self::Filesystem {
412 operation,
413 path,
414 source,
415 }
416 }
417}