Skip to main content

archive_trait/
lib.rs

1//! Format-neutral, asynchronous archive construction and extraction.
2//!
3//! Archive formats implement [`ArchiveBuilder`] and call
4//! [`ArchiveBuilder::builder`] to reuse high-level entry addition, recursive
5//! filesystem traversal, validation, and source streaming.
6//! Archive formats implement [`Archive`] by projecting their entries into
7//! [`Member`] values. The default [`Archive::extract_in`] implementation then
8//! applies common extraction policy and filesystem behavior.
9//!
10//! Extraction assumes unique access to the destination directory. Concurrent
11//! mutation of that directory is outside the threat model.
12
13pub mod builder;
14mod component_tree;
15pub mod extract;
16mod name;
17
18use std::{
19    io,
20    marker::PhantomData,
21    path::{Path, PathBuf},
22};
23
24use thiserror::Error;
25
26pub use builder::{
27    ArchiveBuilder, BuildError, Builder, EntryMetadata, FilePayload, TraversalError,
28};
29pub use name::{NameValidator, default_name_validator};
30
31/// Common metadata for one archive member.
32#[derive(Clone, Debug, Eq, PartialEq)]
33pub struct MemberMetadata {
34    /// The archive-relative member path before extraction normalization.
35    pub path: String,
36    /// The member's byte position in the source archive.
37    pub position: u64,
38}
39
40/// A special-file kind that generic extraction deliberately rejects.
41#[derive(Clone, Copy, Debug, Eq, PartialEq)]
42pub enum SpecialKind {
43    /// A character device.
44    CharacterDevice,
45    /// A block device.
46    BlockDevice,
47    /// A FIFO.
48    Fifo,
49}
50
51/// One format-neutral archive member.
52#[derive(Debug)]
53pub enum Member<P> {
54    /// A regular file with a streaming payload.
55    File {
56        /// Common member metadata.
57        metadata: MemberMetadata,
58        /// The logical, decoded payload size in bytes.
59        ///
60        /// `payload` must yield exactly this many bytes before completing
61        /// successfully. Format-level framing and compression are excluded.
62        size: u64,
63        /// Whether the archived mode carries executable intent.
64        executable: bool,
65        /// The streaming member payload.
66        payload: P,
67    },
68    /// A directory.
69    Directory {
70        /// Common member metadata.
71        metadata: MemberMetadata,
72    },
73    /// A symbolic link.
74    SymbolicLink {
75        /// Common member metadata.
76        metadata: MemberMetadata,
77        /// The archive-provided link target.
78        target: String,
79    },
80    /// A hard link, optionally followed by replacement payload bytes.
81    HardLink {
82        /// Common member metadata.
83        metadata: MemberMetadata,
84        /// The archive-provided link target.
85        target: String,
86        /// The logical, decoded replacement-payload size in bytes.
87        ///
88        /// `payload` must yield exactly this many bytes before completing
89        /// successfully. Format-level framing and compression are excluded.
90        size: u64,
91        /// The streaming member payload.
92        payload: P,
93    },
94    /// A parsed special file that cannot be extracted safely.
95    Special {
96        /// Common member metadata.
97        metadata: MemberMetadata,
98        /// The special-file kind.
99        kind: SpecialKind,
100    },
101}
102
103impl<P> Member<P> {
104    /// Returns this member's common metadata.
105    pub fn metadata(&self) -> &MemberMetadata {
106        match self {
107            Self::File { metadata, .. }
108            | Self::Directory { metadata }
109            | Self::SymbolicLink { metadata, .. }
110            | Self::HardLink { metadata, .. }
111            | Self::Special { metadata, .. } => metadata,
112        }
113    }
114
115    fn lend_payload<'a>(self) -> Member<LentPayload<'a, P>> {
116        match self {
117            Self::File {
118                metadata,
119                size,
120                executable,
121                payload,
122            } => Member::File {
123                metadata,
124                size,
125                executable,
126                payload: LentPayload::new(payload),
127            },
128            Self::Directory { metadata } => Member::Directory { metadata },
129            Self::SymbolicLink { metadata, target } => Member::SymbolicLink { metadata, target },
130            Self::HardLink {
131                metadata,
132                target,
133                size,
134                payload,
135            } => Member::HardLink {
136                metadata,
137                target,
138                size,
139                payload: LentPayload::new(payload),
140            },
141            Self::Special { metadata, kind } => Member::Special { metadata, kind },
142        }
143    }
144}
145
146/// A streaming cursor over one archive member's payload.
147#[expect(
148    async_fn_in_trait,
149    reason = "payload readers may be !Send and run on a local executor"
150)]
151pub trait MemberPayload: Sized {
152    /// The archive-format error returned while reading the payload.
153    type Error;
154
155    /// Reads the next validated, logical payload chunk into a reusable buffer.
156    ///
157    /// Returns `true` after replacing `buffer` with a nonempty chunk. Returns
158    /// `false` only after the payload has been fully consumed and validated,
159    /// leaving `buffer` unchanged so its initialized storage can be reused.
160    /// Callers should not clear `buffer` between calls. Implementations may
161    /// return chunks shorter than `target_len`.
162    ///
163    /// Successful chunks contain decoded member contents rather than stored or
164    /// compressed bytes. Their total length must equal the `size` declared by
165    /// the enclosing [`Member`]; a mismatch must produce an error.
166    async fn next_chunk(
167        &mut self,
168        buffer: &mut Vec<u8>,
169        target_len: usize,
170    ) -> Result<bool, Self::Error>;
171
172    /// Discards and validates all remaining payload bytes.
173    async fn skip(self) -> Result<(), Self::Error>;
174}
175
176/// A member payload that keeps its lending [`Members`] cursor borrowed.
177///
178/// This wrapper is returned by [`Members::next`]. Its private fields prevent a
179/// payload whose concrete type does not itself borrow the archive from being
180/// detached from the cursor lifetime.
181#[derive(Debug)]
182pub struct LentPayload<'a, P> {
183    payload: P,
184    cursor: PhantomData<&'a mut ()>,
185}
186
187impl<P> LentPayload<'_, P> {
188    fn new(payload: P) -> Self {
189        Self {
190            payload,
191            cursor: PhantomData,
192        }
193    }
194}
195
196impl<P: MemberPayload> MemberPayload for LentPayload<'_, P> {
197    type Error = P::Error;
198
199    async fn next_chunk(
200        &mut self,
201        buffer: &mut Vec<u8>,
202        target_len: usize,
203    ) -> Result<bool, Self::Error> {
204        self.payload.next_chunk(buffer, target_len).await
205    }
206
207    async fn skip(self) -> Result<(), Self::Error> {
208        self.payload.skip().await
209    }
210}
211
212/// A consuming, lending member cursor.
213pub struct Members<A> {
214    archive: A,
215}
216
217impl<A: Archive> Members<A> {
218    /// Returns the next archive member.
219    ///
220    /// The returned payload borrows this cursor, so the cursor cannot advance
221    /// until that member is dropped or consumed.
222    pub async fn next<'a>(
223        &'a mut self,
224    ) -> Result<Option<Member<LentPayload<'a, A::Payload<'a>>>>, A::Error> {
225        Ok(self.archive.next_member().await?.map(Member::lend_payload))
226    }
227}
228
229/// A one-pass archive that can enumerate and extract format-neutral members.
230#[expect(
231    async_fn_in_trait,
232    reason = "archive readers may be !Send and run on a local executor"
233)]
234pub trait Archive: Sized {
235    /// The archive-format error returned during member iteration.
236    type Error;
237    /// The streaming payload type lent by each file member.
238    type Payload<'a>: MemberPayload<Error = Self::Error>
239    where
240        Self: 'a;
241
242    /// Reads the next format-neutral member for [`Members::next`].
243    ///
244    /// Implementations must drain and validate an unfinished preceding payload
245    /// before returning another member. Archive consumers should use
246    /// [`Archive::members`] rather than call this hook directly: [`Members`]
247    /// wraps each payload in [`LentPayload`] to enforce the lending cursor
248    /// contract even when a concrete payload type does not retain its lifetime.
249    async fn next_member<'a>(
250        &'a mut self,
251    ) -> Result<Option<Member<Self::Payload<'a>>>, Self::Error>;
252
253    /// Consumes this archive and returns its lending member cursor.
254    fn members(self) -> Members<Self> {
255        Members { archive: self }
256    }
257
258    /// Securely extracts this archive beneath `destination` under `policy`.
259    ///
260    /// `destination` is created if it does not already exist. Symbolic links
261    /// are preserved by default on platforms that support native creation;
262    /// hard links require explicit opt-in through [`extract::LinkPolicy`].
263    ///
264    /// Archived Unix permission modes are normalized rather than restored. New
265    /// regular files are created with mode `0o777` when executable intent is
266    /// set and `0o666` otherwise, in both cases filtered by the process umask.
267    /// Directories use the platform's default creation mode, and special mode
268    /// bits are not restored. Ownership and timestamps are likewise determined
269    /// by extraction activity rather than archived metadata.
270    ///
271    /// **IMPORTANT**: `destination` must not be concurrently modified during
272    /// extraction. No correctness or isolation guarantees are made under
273    /// external mutation.
274    ///
275    /// Extraction is streamwise: a late error can leave a partially extracted
276    /// destination. Callers requiring all-or-nothing behavior should extract
277    /// into a new temporary directory and atomically rename it afterward.
278    async fn extract_in<P: AsRef<Path>>(
279        self,
280        destination: P,
281        policy: extract::ExtractPolicy,
282    ) -> Result<(), ExtractError<Self::Error>> {
283        extract::extract(self.members(), destination.as_ref(), policy).await
284    }
285}
286
287/// A valid member feature rejected by the selected [`extract::ExtractPolicy`].
288#[derive(Clone, Debug, Eq, PartialEq, Error)]
289pub enum ExtractPolicyViolation {
290    /// An effective member name or link target was rejected.
291    #[error("archive {context} rejected by name policy: {value:?}")]
292    NameRejected {
293        /// The role of the rejected archive text.
294        context: &'static str,
295        /// The rejected UTF-8 value.
296        value: String,
297    },
298    /// A symbolic-link member appeared when links are forbidden.
299    #[error("symbolic-link members are not allowed")]
300    SymbolicLink,
301    /// A symbolic-link member requires native creation on an unsupported platform.
302    #[error("native symbolic-link creation is not supported on this platform")]
303    NativeSymlinkCreationUnsupported,
304    /// A hard-link member appeared when links are forbidden.
305    #[error("hard-link members are not allowed")]
306    HardLink,
307}
308
309/// An error produced while securely extracting an archive.
310#[derive(Debug, Error)]
311pub enum ExtractError<E> {
312    /// Reading or decoding the underlying archive failed.
313    #[error(transparent)]
314    Archive(E),
315    /// A destination filesystem operation failed.
316    #[error("failed to {operation} {path}: {source}")]
317    Filesystem {
318        /// The operation that failed.
319        operation: &'static str,
320        /// The path involved in the failed operation.
321        path: PathBuf,
322        /// The underlying I/O error.
323        #[source]
324        source: io::Error,
325    },
326    /// A blocking extraction operation failed to complete.
327    #[error("failed to complete blocking extraction operation: {0}")]
328    BlockingTask(#[from] tokio::task::JoinError),
329    /// An archive member path or link value is unsafe to extract.
330    #[error("at byte {position}: unsafe {context} {value:?}: {reason}")]
331    UnsafePath {
332        /// Source member position.
333        position: u64,
334        /// Whether this is a member path or link target.
335        context: &'static str,
336        /// Archive-provided value.
337        value: String,
338        /// Rejection reason.
339        reason: &'static str,
340    },
341    /// An archive entry collides with a path that cannot be replaced.
342    #[error("archive entry collides with existing path {path}")]
343    PathCollision {
344        /// Normalized extraction-relative path.
345        path: PathBuf,
346    },
347    /// A special member kind is deliberately excluded from extraction.
348    #[error("at byte {position}: cannot extract unsupported member type {kind:?} at {path}")]
349    UnsupportedMember {
350        /// Source member position.
351        position: u64,
352        /// Normalized extraction-relative path.
353        path: PathBuf,
354        /// Unsupported special-file kind.
355        kind: SpecialKind,
356    },
357    /// A symbolic or hard link cannot be safely resolved.
358    #[error("at byte {position}: invalid link {path} -> {target:?}: {reason}")]
359    InvalidLink {
360        /// Source member position.
361        position: u64,
362        /// Normalized link path.
363        path: PathBuf,
364        /// Archive-provided or normalized link target.
365        target: String,
366        /// Rejection reason.
367        reason: &'static str,
368    },
369    /// A structurally valid member was rejected by extraction policy.
370    #[error("at byte {position}: extraction policy rejected input: {violation}")]
371    PolicyViolation {
372        /// Source member position.
373        position: u64,
374        /// The selected policy rule that rejected the member.
375        violation: ExtractPolicyViolation,
376    },
377}
378
379impl<E> ExtractError<E> {
380    fn policy_violation(position: u64, violation: ExtractPolicyViolation) -> Self {
381        Self::PolicyViolation {
382            position,
383            violation,
384        }
385    }
386
387    fn invalid_link(position: u64, path: PathBuf, target: String, reason: &'static str) -> Self {
388        Self::InvalidLink {
389            position,
390            path,
391            target,
392            reason,
393        }
394    }
395
396    fn unsafe_path(
397        position: u64,
398        context: &'static str,
399        value: &str,
400        reason: &'static str,
401    ) -> Self {
402        Self::UnsafePath {
403            position,
404            context,
405            value: value.to_owned(),
406            reason,
407        }
408    }
409
410    fn filesystem(operation: &'static str, path: PathBuf, source: io::Error) -> Self {
411        Self::Filesystem {
412            operation,
413            path,
414            source,
415        }
416    }
417}