archive_trait/lib.rs
1//! Format-neutral, asynchronous archive construction and extraction.
2//!
3//! Archive formats implement [`ArchiveBuilder`] and call
4//! [`ArchiveBuilder::builder`] to reuse high-level entry addition, recursive
5//! filesystem traversal, validation, and source streaming.
6//! Archive formats implement [`Archive`] by projecting their entries into
7//! [`Member`] values. The default [`Archive::extract_in`] implementation then
8//! applies common extraction policy and filesystem behavior.
9//!
10//! Extraction assumes unique access to the destination directory. Concurrent
11//! mutation of that directory is outside the threat model.
12
13pub mod builder;
14mod component_tree;
15pub mod extract;
16mod name;
17
18use std::{
19 io,
20 marker::PhantomData,
21 path::{Path, PathBuf},
22};
23
24use thiserror::Error;
25
26pub use builder::{ArchiveBuilder, BuildError, Builder, EntryMetadata, TraversalError};
27pub use name::{NameValidator, default_name_validator};
28
29/// Common metadata for one archive member.
30#[derive(Clone, Debug, Eq, PartialEq)]
31pub struct MemberMetadata {
32 /// The archive-relative member path before extraction normalization.
33 pub path: String,
34 /// The member's byte position in the source archive.
35 pub position: u64,
36}
37
38/// A special-file kind that generic extraction deliberately rejects.
39#[derive(Clone, Copy, Debug, Eq, PartialEq)]
40pub enum SpecialKind {
41 /// A character device.
42 CharacterDevice,
43 /// A block device.
44 BlockDevice,
45 /// A FIFO.
46 Fifo,
47}
48
49/// One format-neutral archive member.
50#[derive(Debug)]
51pub enum Member<P> {
52 /// A regular file with a streaming payload.
53 File {
54 /// Common member metadata.
55 metadata: MemberMetadata,
56 /// The logical, decoded payload size in bytes.
57 ///
58 /// `payload` must yield exactly this many bytes before completing
59 /// successfully. Format-level framing and compression are excluded.
60 size: u64,
61 /// Whether the archived mode carries executable intent.
62 executable: bool,
63 /// The streaming member payload.
64 payload: P,
65 },
66 /// A directory.
67 Directory {
68 /// Common member metadata.
69 metadata: MemberMetadata,
70 },
71 /// A symbolic link.
72 SymbolicLink {
73 /// Common member metadata.
74 metadata: MemberMetadata,
75 /// The archive-provided link target.
76 target: String,
77 },
78 /// A hard link, optionally followed by replacement payload bytes.
79 HardLink {
80 /// Common member metadata.
81 metadata: MemberMetadata,
82 /// The archive-provided link target.
83 target: String,
84 /// The logical, decoded replacement-payload size in bytes.
85 ///
86 /// `payload` must yield exactly this many bytes before completing
87 /// successfully. Format-level framing and compression are excluded.
88 size: u64,
89 /// The streaming member payload.
90 payload: P,
91 },
92 /// A parsed special file that cannot be extracted safely.
93 Special {
94 /// Common member metadata.
95 metadata: MemberMetadata,
96 /// The special-file kind.
97 kind: SpecialKind,
98 },
99}
100
101impl<P> Member<P> {
102 /// Returns this member's common metadata.
103 pub fn metadata(&self) -> &MemberMetadata {
104 match self {
105 Self::File { metadata, .. }
106 | Self::Directory { metadata }
107 | Self::SymbolicLink { metadata, .. }
108 | Self::HardLink { metadata, .. }
109 | Self::Special { metadata, .. } => metadata,
110 }
111 }
112
113 fn lend_payload<'a>(self) -> Member<LentPayload<'a, P>> {
114 match self {
115 Self::File {
116 metadata,
117 size,
118 executable,
119 payload,
120 } => Member::File {
121 metadata,
122 size,
123 executable,
124 payload: LentPayload::new(payload),
125 },
126 Self::Directory { metadata } => Member::Directory { metadata },
127 Self::SymbolicLink { metadata, target } => Member::SymbolicLink { metadata, target },
128 Self::HardLink {
129 metadata,
130 target,
131 size,
132 payload,
133 } => Member::HardLink {
134 metadata,
135 target,
136 size,
137 payload: LentPayload::new(payload),
138 },
139 Self::Special { metadata, kind } => Member::Special { metadata, kind },
140 }
141 }
142}
143
144/// A streaming cursor over one archive member's payload.
145#[expect(
146 async_fn_in_trait,
147 reason = "payload readers may be !Send and run on a local executor"
148)]
149pub trait MemberPayload: Sized {
150 /// The archive-format error returned while reading the payload.
151 type Error;
152
153 /// Reads the next validated, logical payload chunk into a reusable buffer.
154 ///
155 /// Returns `true` after replacing `buffer` with a nonempty chunk. Returns
156 /// `false` only after the payload has been fully consumed and validated,
157 /// leaving `buffer` unchanged so its initialized storage can be reused.
158 /// Callers should not clear `buffer` between calls. Implementations may
159 /// return chunks shorter than `target_len`.
160 ///
161 /// Successful chunks contain decoded member contents rather than stored or
162 /// compressed bytes. Their total length must equal the `size` declared by
163 /// the enclosing [`Member`]; a mismatch must produce an error.
164 async fn next_chunk(
165 &mut self,
166 buffer: &mut Vec<u8>,
167 target_len: usize,
168 ) -> Result<bool, Self::Error>;
169
170 /// Discards and validates all remaining payload bytes.
171 async fn skip(self) -> Result<(), Self::Error>;
172}
173
174/// A member payload that keeps its lending [`Members`] cursor borrowed.
175///
176/// This wrapper is returned by [`Members::next`]. Its private fields prevent a
177/// payload whose concrete type does not itself borrow the archive from being
178/// detached from the cursor lifetime.
179#[derive(Debug)]
180pub struct LentPayload<'a, P> {
181 payload: P,
182 cursor: PhantomData<&'a mut ()>,
183}
184
185impl<P> LentPayload<'_, P> {
186 fn new(payload: P) -> Self {
187 Self {
188 payload,
189 cursor: PhantomData,
190 }
191 }
192}
193
194impl<P: MemberPayload> MemberPayload for LentPayload<'_, P> {
195 type Error = P::Error;
196
197 async fn next_chunk(
198 &mut self,
199 buffer: &mut Vec<u8>,
200 target_len: usize,
201 ) -> Result<bool, Self::Error> {
202 self.payload.next_chunk(buffer, target_len).await
203 }
204
205 async fn skip(self) -> Result<(), Self::Error> {
206 self.payload.skip().await
207 }
208}
209
210/// A consuming, lending member cursor.
211pub struct Members<A> {
212 archive: A,
213}
214
215impl<A: Archive> Members<A> {
216 /// Returns the next archive member.
217 ///
218 /// The returned payload borrows this cursor, so the cursor cannot advance
219 /// until that member is dropped or consumed.
220 pub async fn next<'a>(
221 &'a mut self,
222 ) -> Result<Option<Member<LentPayload<'a, A::Payload<'a>>>>, A::Error> {
223 Ok(self.archive.next_member().await?.map(Member::lend_payload))
224 }
225}
226
227/// A one-pass archive that can enumerate and extract format-neutral members.
228#[expect(
229 async_fn_in_trait,
230 reason = "archive readers may be !Send and run on a local executor"
231)]
232pub trait Archive: Sized {
233 /// The archive-format error returned during member iteration.
234 type Error;
235 /// The streaming payload type lent by each file member.
236 type Payload<'a>: MemberPayload<Error = Self::Error>
237 where
238 Self: 'a;
239
240 /// Reads the next format-neutral member for [`Members::next`].
241 ///
242 /// Implementations must drain and validate an unfinished preceding payload
243 /// before returning another member. Archive consumers should use
244 /// [`Archive::members`] rather than call this hook directly: [`Members`]
245 /// wraps each payload in [`LentPayload`] to enforce the lending cursor
246 /// contract even when a concrete payload type does not retain its lifetime.
247 async fn next_member<'a>(
248 &'a mut self,
249 ) -> Result<Option<Member<Self::Payload<'a>>>, Self::Error>;
250
251 /// Consumes this archive and returns its lending member cursor.
252 fn members(self) -> Members<Self> {
253 Members { archive: self }
254 }
255
256 /// Securely extracts this archive beneath `destination` under `policy`.
257 ///
258 /// `destination` is created if it does not already exist. Symbolic links
259 /// are preserved by default on platforms that support native creation;
260 /// hard links require explicit opt-in through [`extract::LinkPolicy`].
261 ///
262 /// Archived Unix permission modes are normalized rather than restored. New
263 /// regular files are created with mode `0o777` when executable intent is
264 /// set and `0o666` otherwise, in both cases filtered by the process umask.
265 /// Directories use the platform's default creation mode, and special mode
266 /// bits are not restored. Ownership and timestamps are likewise determined
267 /// by extraction activity rather than archived metadata.
268 ///
269 /// **IMPORTANT**: `destination` must not be concurrently modified during
270 /// extraction. No correctness or isolation guarantees are made under
271 /// external mutation.
272 ///
273 /// Extraction is streamwise: a late error can leave a partially extracted
274 /// destination. Callers requiring all-or-nothing behavior should extract
275 /// into a new temporary directory and atomically rename it afterward.
276 async fn extract_in<P: AsRef<Path>>(
277 self,
278 destination: P,
279 policy: extract::ExtractPolicy,
280 ) -> Result<(), ExtractError<Self::Error>> {
281 extract::extract(self.members(), destination.as_ref(), policy).await
282 }
283}
284
285/// A valid member feature rejected by the selected [`extract::ExtractPolicy`].
286#[derive(Clone, Debug, Eq, PartialEq, Error)]
287pub enum ExtractPolicyViolation {
288 /// An effective member name or link target was rejected.
289 #[error("archive {context} rejected by name policy: {value:?}")]
290 NameRejected {
291 /// The role of the rejected archive text.
292 context: &'static str,
293 /// The rejected UTF-8 value.
294 value: String,
295 },
296 /// A symbolic-link member appeared when links are forbidden.
297 #[error("symbolic-link members are not allowed")]
298 SymbolicLink,
299 /// A symbolic-link member requires native creation on an unsupported platform.
300 #[error("native symbolic-link creation is not supported on this platform")]
301 NativeSymlinkCreationUnsupported,
302 /// A hard-link member appeared when links are forbidden.
303 #[error("hard-link members are not allowed")]
304 HardLink,
305}
306
307/// An error produced while securely extracting an archive.
308#[derive(Debug, Error)]
309pub enum ExtractError<E> {
310 /// Reading or decoding the underlying archive failed.
311 #[error(transparent)]
312 Archive(E),
313 /// A destination filesystem operation failed.
314 #[error("failed to {operation} {path}: {source}")]
315 Filesystem {
316 /// The operation that failed.
317 operation: &'static str,
318 /// The path involved in the failed operation.
319 path: PathBuf,
320 /// The underlying I/O error.
321 #[source]
322 source: io::Error,
323 },
324 /// A blocking extraction operation failed to complete.
325 #[error("failed to complete blocking extraction operation: {0}")]
326 BlockingTask(#[from] tokio::task::JoinError),
327 /// An archive member path or link value is unsafe to extract.
328 #[error("at byte {position}: unsafe {context} {value:?}: {reason}")]
329 UnsafePath {
330 /// Source member position.
331 position: u64,
332 /// Whether this is a member path or link target.
333 context: &'static str,
334 /// Archive-provided value.
335 value: String,
336 /// Rejection reason.
337 reason: &'static str,
338 },
339 /// An archive entry collides with a path that cannot be replaced.
340 #[error("archive entry collides with existing path {path}")]
341 PathCollision {
342 /// Normalized extraction-relative path.
343 path: PathBuf,
344 },
345 /// A special member kind is deliberately excluded from extraction.
346 #[error("at byte {position}: cannot extract unsupported member type {kind:?} at {path}")]
347 UnsupportedMember {
348 /// Source member position.
349 position: u64,
350 /// Normalized extraction-relative path.
351 path: PathBuf,
352 /// Unsupported special-file kind.
353 kind: SpecialKind,
354 },
355 /// A symbolic or hard link cannot be safely resolved.
356 #[error("at byte {position}: invalid link {path} -> {target:?}: {reason}")]
357 InvalidLink {
358 /// Source member position.
359 position: u64,
360 /// Normalized link path.
361 path: PathBuf,
362 /// Archive-provided or normalized link target.
363 target: String,
364 /// Rejection reason.
365 reason: &'static str,
366 },
367 /// A structurally valid member was rejected by extraction policy.
368 #[error("at byte {position}: extraction policy rejected input: {violation}")]
369 PolicyViolation {
370 /// Source member position.
371 position: u64,
372 /// The selected policy rule that rejected the member.
373 violation: ExtractPolicyViolation,
374 },
375}
376
377impl<E> ExtractError<E> {
378 fn policy_violation(position: u64, violation: ExtractPolicyViolation) -> Self {
379 Self::PolicyViolation {
380 position,
381 violation,
382 }
383 }
384
385 fn invalid_link(position: u64, path: PathBuf, target: String, reason: &'static str) -> Self {
386 Self::InvalidLink {
387 position,
388 path,
389 target,
390 reason,
391 }
392 }
393
394 fn unsafe_path(
395 position: u64,
396 context: &'static str,
397 value: &str,
398 reason: &'static str,
399 ) -> Self {
400 Self::UnsafePath {
401 position,
402 context,
403 value: value.to_owned(),
404 reason,
405 }
406 }
407
408 fn filesystem(operation: &'static str, path: PathBuf, source: io::Error) -> Self {
409 Self::Filesystem {
410 operation,
411 path,
412 source,
413 }
414 }
415}