Skip to main content

archive_trait/
lib.rs

1//! Format-neutral, asynchronous archive construction and extraction.
2//!
3//! Archive formats implement [`ArchiveBuilder`] and call
4//! [`ArchiveBuilder::builder`] to reuse high-level entry addition, recursive
5//! filesystem traversal, validation, and source streaming.
6//! Archive formats implement [`Archive`] by projecting their entries into
7//! [`Member`] values. The default [`Archive::extract_in`] implementation then
8//! applies common extraction policy and filesystem behavior.
9//!
10//! Extraction assumes unique access to the destination directory. Concurrent
11//! mutation of that directory is outside the threat model.
12
13pub mod builder;
14mod component_tree;
15pub mod extract;
16mod name;
17
18use std::{
19    io,
20    marker::PhantomData,
21    path::{Path, PathBuf},
22};
23
24use thiserror::Error;
25
26pub use builder::{ArchiveBuilder, BuildError, Builder, EntryMetadata, TraversalError};
27pub use name::{NameValidator, default_name_validator};
28
29/// Common metadata for one archive member.
30#[derive(Clone, Debug, Eq, PartialEq)]
31pub struct MemberMetadata {
32    /// The archive-relative member path before extraction normalization.
33    pub path: String,
34    /// The member's byte position in the source archive.
35    pub position: u64,
36}
37
38/// A special-file kind that generic extraction deliberately rejects.
39#[derive(Clone, Copy, Debug, Eq, PartialEq)]
40pub enum SpecialKind {
41    /// A character device.
42    CharacterDevice,
43    /// A block device.
44    BlockDevice,
45    /// A FIFO.
46    Fifo,
47}
48
49/// One format-neutral archive member.
50#[derive(Debug)]
51pub enum Member<P> {
52    /// A regular file with a streaming payload.
53    File {
54        /// Common member metadata.
55        metadata: MemberMetadata,
56        /// The logical, decoded payload size in bytes.
57        ///
58        /// `payload` must yield exactly this many bytes before completing
59        /// successfully. Format-level framing and compression are excluded.
60        size: u64,
61        /// Whether the archived mode carries executable intent.
62        executable: bool,
63        /// The streaming member payload.
64        payload: P,
65    },
66    /// A directory.
67    Directory {
68        /// Common member metadata.
69        metadata: MemberMetadata,
70    },
71    /// A symbolic link.
72    SymbolicLink {
73        /// Common member metadata.
74        metadata: MemberMetadata,
75        /// The archive-provided link target.
76        target: String,
77    },
78    /// A hard link, optionally followed by replacement payload bytes.
79    HardLink {
80        /// Common member metadata.
81        metadata: MemberMetadata,
82        /// The archive-provided link target.
83        target: String,
84        /// The logical, decoded replacement-payload size in bytes.
85        ///
86        /// `payload` must yield exactly this many bytes before completing
87        /// successfully. Format-level framing and compression are excluded.
88        size: u64,
89        /// The streaming member payload.
90        payload: P,
91    },
92    /// A parsed special file that cannot be extracted safely.
93    Special {
94        /// Common member metadata.
95        metadata: MemberMetadata,
96        /// The special-file kind.
97        kind: SpecialKind,
98    },
99}
100
101impl<P> Member<P> {
102    /// Returns this member's common metadata.
103    pub fn metadata(&self) -> &MemberMetadata {
104        match self {
105            Self::File { metadata, .. }
106            | Self::Directory { metadata }
107            | Self::SymbolicLink { metadata, .. }
108            | Self::HardLink { metadata, .. }
109            | Self::Special { metadata, .. } => metadata,
110        }
111    }
112
113    fn lend_payload<'a>(self) -> Member<LentPayload<'a, P>> {
114        match self {
115            Self::File {
116                metadata,
117                size,
118                executable,
119                payload,
120            } => Member::File {
121                metadata,
122                size,
123                executable,
124                payload: LentPayload::new(payload),
125            },
126            Self::Directory { metadata } => Member::Directory { metadata },
127            Self::SymbolicLink { metadata, target } => Member::SymbolicLink { metadata, target },
128            Self::HardLink {
129                metadata,
130                target,
131                size,
132                payload,
133            } => Member::HardLink {
134                metadata,
135                target,
136                size,
137                payload: LentPayload::new(payload),
138            },
139            Self::Special { metadata, kind } => Member::Special { metadata, kind },
140        }
141    }
142}
143
144/// A streaming cursor over one archive member's payload.
145#[expect(
146    async_fn_in_trait,
147    reason = "payload readers may be !Send and run on a local executor"
148)]
149pub trait MemberPayload: Sized {
150    /// The archive-format error returned while reading the payload.
151    type Error;
152
153    /// Reads the next validated, logical payload chunk into a reusable buffer.
154    ///
155    /// Returns `true` after replacing `buffer` with a nonempty chunk. Returns
156    /// `false` only after the payload has been fully consumed and validated,
157    /// leaving `buffer` unchanged so its initialized storage can be reused.
158    /// Callers should not clear `buffer` between calls. Implementations may
159    /// return chunks shorter than `target_len`.
160    ///
161    /// Successful chunks contain decoded member contents rather than stored or
162    /// compressed bytes. Their total length must equal the `size` declared by
163    /// the enclosing [`Member`]; a mismatch must produce an error.
164    async fn next_chunk(
165        &mut self,
166        buffer: &mut Vec<u8>,
167        target_len: usize,
168    ) -> Result<bool, Self::Error>;
169
170    /// Discards and validates all remaining payload bytes.
171    async fn skip(self) -> Result<(), Self::Error>;
172}
173
174/// A member payload that keeps its lending [`Members`] cursor borrowed.
175///
176/// This wrapper is returned by [`Members::next`]. Its private fields prevent a
177/// payload whose concrete type does not itself borrow the archive from being
178/// detached from the cursor lifetime.
179#[derive(Debug)]
180pub struct LentPayload<'a, P> {
181    payload: P,
182    cursor: PhantomData<&'a mut ()>,
183}
184
185impl<P> LentPayload<'_, P> {
186    fn new(payload: P) -> Self {
187        Self {
188            payload,
189            cursor: PhantomData,
190        }
191    }
192}
193
194impl<P: MemberPayload> MemberPayload for LentPayload<'_, P> {
195    type Error = P::Error;
196
197    async fn next_chunk(
198        &mut self,
199        buffer: &mut Vec<u8>,
200        target_len: usize,
201    ) -> Result<bool, Self::Error> {
202        self.payload.next_chunk(buffer, target_len).await
203    }
204
205    async fn skip(self) -> Result<(), Self::Error> {
206        self.payload.skip().await
207    }
208}
209
210/// A consuming, lending member cursor.
211pub struct Members<A> {
212    archive: A,
213}
214
215impl<A: Archive> Members<A> {
216    /// Returns the next archive member.
217    ///
218    /// The returned payload borrows this cursor, so the cursor cannot advance
219    /// until that member is dropped or consumed.
220    pub async fn next<'a>(
221        &'a mut self,
222    ) -> Result<Option<Member<LentPayload<'a, A::Payload<'a>>>>, A::Error> {
223        Ok(self.archive.next_member().await?.map(Member::lend_payload))
224    }
225}
226
227/// A one-pass archive that can enumerate and extract format-neutral members.
228#[expect(
229    async_fn_in_trait,
230    reason = "archive readers may be !Send and run on a local executor"
231)]
232pub trait Archive: Sized {
233    /// The archive-format error returned during member iteration.
234    type Error;
235    /// The streaming payload type lent by each file member.
236    type Payload<'a>: MemberPayload<Error = Self::Error>
237    where
238        Self: 'a;
239
240    /// Reads the next format-neutral member for [`Members::next`].
241    ///
242    /// Implementations must drain and validate an unfinished preceding payload
243    /// before returning another member. Archive consumers should use
244    /// [`Archive::members`] rather than call this hook directly: [`Members`]
245    /// wraps each payload in [`LentPayload`] to enforce the lending cursor
246    /// contract even when a concrete payload type does not retain its lifetime.
247    async fn next_member<'a>(
248        &'a mut self,
249    ) -> Result<Option<Member<Self::Payload<'a>>>, Self::Error>;
250
251    /// Consumes this archive and returns its lending member cursor.
252    fn members(self) -> Members<Self> {
253        Members { archive: self }
254    }
255
256    /// Securely extracts this archive beneath `destination` under `policy`.
257    ///
258    /// `destination` is created if it does not already exist. Symbolic links
259    /// are preserved by default on platforms that support native creation;
260    /// hard links require explicit opt-in through [`extract::LinkPolicy`].
261    ///
262    /// Archived Unix permission modes are normalized rather than restored. New
263    /// regular files are created with mode `0o777` when executable intent is
264    /// set and `0o666` otherwise, in both cases filtered by the process umask.
265    /// Directories use the platform's default creation mode, and special mode
266    /// bits are not restored. Ownership and timestamps are likewise determined
267    /// by extraction activity rather than archived metadata.
268    ///
269    /// **IMPORTANT**: `destination` must not be concurrently modified during
270    /// extraction. No correctness or isolation guarantees are made under
271    /// external mutation.
272    ///
273    /// Extraction is streamwise: a late error can leave a partially extracted
274    /// destination. Callers requiring all-or-nothing behavior should extract
275    /// into a new temporary directory and atomically rename it afterward.
276    async fn extract_in<P: AsRef<Path>>(
277        self,
278        destination: P,
279        policy: extract::ExtractPolicy,
280    ) -> Result<(), ExtractError<Self::Error>> {
281        extract::extract(self.members(), destination.as_ref(), policy).await
282    }
283}
284
285/// A valid member feature rejected by the selected [`extract::ExtractPolicy`].
286#[derive(Clone, Debug, Eq, PartialEq, Error)]
287pub enum ExtractPolicyViolation {
288    /// An effective member name or link target was rejected.
289    #[error("archive {context} rejected by name policy: {value:?}")]
290    NameRejected {
291        /// The role of the rejected archive text.
292        context: &'static str,
293        /// The rejected UTF-8 value.
294        value: String,
295    },
296    /// A symbolic-link member appeared when links are forbidden.
297    #[error("symbolic-link members are not allowed")]
298    SymbolicLink,
299    /// A symbolic-link member requires native creation on an unsupported platform.
300    #[error("native symbolic-link creation is not supported on this platform")]
301    NativeSymlinkCreationUnsupported,
302    /// A hard-link member appeared when links are forbidden.
303    #[error("hard-link members are not allowed")]
304    HardLink,
305}
306
307/// An error produced while securely extracting an archive.
308#[derive(Debug, Error)]
309pub enum ExtractError<E> {
310    /// Reading or decoding the underlying archive failed.
311    #[error(transparent)]
312    Archive(E),
313    /// A destination filesystem operation failed.
314    #[error("failed to {operation} {path}: {source}")]
315    Filesystem {
316        /// The operation that failed.
317        operation: &'static str,
318        /// The path involved in the failed operation.
319        path: PathBuf,
320        /// The underlying I/O error.
321        #[source]
322        source: io::Error,
323    },
324    /// A blocking extraction operation failed to complete.
325    #[error("failed to complete blocking extraction operation: {0}")]
326    BlockingTask(#[from] tokio::task::JoinError),
327    /// An archive member path or link value is unsafe to extract.
328    #[error("at byte {position}: unsafe {context} {value:?}: {reason}")]
329    UnsafePath {
330        /// Source member position.
331        position: u64,
332        /// Whether this is a member path or link target.
333        context: &'static str,
334        /// Archive-provided value.
335        value: String,
336        /// Rejection reason.
337        reason: &'static str,
338    },
339    /// An archive entry collides with a path that cannot be replaced.
340    #[error("archive entry collides with existing path {path}")]
341    PathCollision {
342        /// Normalized extraction-relative path.
343        path: PathBuf,
344    },
345    /// A special member kind is deliberately excluded from extraction.
346    #[error("at byte {position}: cannot extract unsupported member type {kind:?} at {path}")]
347    UnsupportedMember {
348        /// Source member position.
349        position: u64,
350        /// Normalized extraction-relative path.
351        path: PathBuf,
352        /// Unsupported special-file kind.
353        kind: SpecialKind,
354    },
355    /// A symbolic or hard link cannot be safely resolved.
356    #[error("at byte {position}: invalid link {path} -> {target:?}: {reason}")]
357    InvalidLink {
358        /// Source member position.
359        position: u64,
360        /// Normalized link path.
361        path: PathBuf,
362        /// Archive-provided or normalized link target.
363        target: String,
364        /// Rejection reason.
365        reason: &'static str,
366    },
367    /// A structurally valid member was rejected by extraction policy.
368    #[error("at byte {position}: extraction policy rejected input: {violation}")]
369    PolicyViolation {
370        /// Source member position.
371        position: u64,
372        /// The selected policy rule that rejected the member.
373        violation: ExtractPolicyViolation,
374    },
375}
376
377impl<E> ExtractError<E> {
378    fn policy_violation(position: u64, violation: ExtractPolicyViolation) -> Self {
379        Self::PolicyViolation {
380            position,
381            violation,
382        }
383    }
384
385    fn invalid_link(position: u64, path: PathBuf, target: String, reason: &'static str) -> Self {
386        Self::InvalidLink {
387            position,
388            path,
389            target,
390            reason,
391        }
392    }
393
394    fn unsafe_path(
395        position: u64,
396        context: &'static str,
397        value: &str,
398        reason: &'static str,
399    ) -> Self {
400        Self::UnsafePath {
401            position,
402            context,
403            value: value.to_owned(),
404            reason,
405        }
406    }
407
408    fn filesystem(operation: &'static str, path: PathBuf, source: io::Error) -> Self {
409        Self::Filesystem {
410            operation,
411            path,
412            source,
413        }
414    }
415}