Skip to main content

archive_trait/
builder.rs

1//! Format-neutral archive construction.
2//!
3//! Archive formats implement [`ArchiveBuilder`] and wrap the resulting writer
4//! in a stateful [`Builder`] to use the format-neutral construction APIs.
5
6mod traversal;
7
8use std::{
9    collections::VecDeque,
10    io::{self, Read},
11    mem,
12    ops::Range,
13    path::{Path, PathBuf},
14};
15
16use thiserror::Error;
17use tokio::io::AsyncReadExt;
18
19pub use self::traversal::TraversalError;
20use self::traversal::{TraversalEntry, TraversalKind, TraversalStream, stream_directory_entries};
21use crate::{
22    NameValidator,
23    component_tree::{ComponentTree, ROOT_NODE},
24    name::NameValidation,
25};
26
27const BUFFERED_SOURCE_FILE_BYTES: usize = 1024 * 1024;
28const SOURCE_FILE_CHUNK_BYTES: usize = 2 * 1024 * 1024;
29// A preparation batch may exceed this target by one buffered file, so its
30// payload storage remains below twice this value.
31const SOURCE_FILE_PREPARATION_BATCH_BYTES: usize = BUFFERED_SOURCE_FILE_BYTES;
32
33/// Minimal regular-file metadata accepted by [`Builder::add_entry`].
34#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
35pub struct EntryMetadata {
36    executable: bool,
37}
38
39impl EntryMetadata {
40    /// Configures whether the regular file carries executable intent.
41    pub fn executable(mut self, executable: bool) -> Self {
42        self.executable = executable;
43        self
44    }
45
46    /// Returns whether this entry carries executable intent.
47    pub fn is_executable(self) -> bool {
48        self.executable
49    }
50}
51
52/// Controls format-neutral archive construction behavior.
53#[derive(Clone, Copy, Debug, Default)]
54pub struct BuilderPolicy {
55    name_validation: NameValidation,
56    symlink_policy: SymlinkPolicy,
57}
58
59/// Controls how source symbolic links are handled during recursive builds.
60#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
61pub enum SymlinkPolicy {
62    /// Reject recursive sources containing symbolic links.
63    #[default]
64    Reject,
65    /// Preserve symbolic links as link members in the resulting archive.
66    Preserve,
67    // TODO: Consider adding some kind of "Dereference" policy in the future,
68    // where symlinks get followed and replaced with their normal file/directory
69    // contents.
70}
71
72impl BuilderPolicy {
73    /// Configures validation for member names and preserved symbolic-link targets.
74    ///
75    /// Passing [`None`] disables configurable name validation. UTF-8 and
76    /// archive-format requirements still apply.
77    pub fn name_validator(mut self, validator: Option<NameValidator>) -> Self {
78        self.name_validation = NameValidation::from_validator(validator);
79        self
80    }
81
82    /// Configures how recursive builds handle source symbolic links.
83    ///
84    /// Symbolic links are **rejected by default**. Use
85    /// [`SymlinkPolicy::Preserve`] to write link members instead.
86    pub fn symlink_policy(mut self, policy: SymlinkPolicy) -> Self {
87        self.symlink_policy = policy;
88        self
89    }
90}
91
92struct BuilderState {
93    policy: BuilderPolicy,
94    entries: BuildEntries,
95    source_buffer: Vec<u8>,
96    poisoned: bool,
97}
98
99impl BuilderState {
100    fn new(policy: BuilderPolicy) -> Self {
101        Self {
102            policy,
103            entries: BuildEntries::new(),
104            source_buffer: Vec::new(),
105            poisoned: false,
106        }
107    }
108
109    fn ensure_active<E>(&self) -> Result<(), BuildError<E>> {
110        if self.poisoned {
111            return Err(BuildError::Poisoned);
112        }
113        Ok(())
114    }
115
116    // A backend write is provisionally poisoning. Completion clears this flag
117    // before the returned failure is classified; cancellation leaves it set.
118    fn begin_write(&mut self) {
119        self.poisoned = true;
120    }
121
122    fn complete_write(&mut self) {
123        self.poisoned = false;
124    }
125
126    fn poison(&mut self) {
127        self.poisoned = true;
128    }
129}
130
131/// A format-neutral, uncompressed payload supplied to an [`ArchiveBuilder`]
132/// implementation.
133pub struct EntryPayload<'a> {
134    size: u64,
135    inner: EntryPayloadInner<'a>,
136}
137
138enum EntryPayloadInner<'a> {
139    Buffered(Option<&'a [u8]>),
140    Streaming {
141        file: tokio::fs::File,
142        path: PathBuf,
143        buffer: &'a mut Vec<u8>,
144        remaining: u64,
145        filled: usize,
146    },
147}
148
149impl EntryPayload<'_> {
150    /// Returns the logical, uncompressed source size in bytes.
151    ///
152    /// This is the total number of bytes yielded by [`Self::next_chunk`], not
153    /// necessarily the size ultimately stored by the archive format.
154    pub fn size(&self) -> u64 {
155        self.size
156    }
157
158    /// Returns the next chunk of logical, uncompressed source bytes.
159    pub async fn next_chunk<E>(&mut self) -> Result<Option<&[u8]>, BuildError<E>> {
160        match &mut self.inner {
161            EntryPayloadInner::Buffered(data) => Ok(data.take().filter(|data| !data.is_empty())),
162            EntryPayloadInner::Streaming {
163                file,
164                path,
165                buffer,
166                remaining,
167                filled,
168            } => read_streaming_chunk(file, path, buffer, remaining, filled).await,
169        }
170    }
171
172    fn borrowed<E>(bytes: &[u8]) -> Result<EntryPayload<'_>, BuildError<E>> {
173        let size = u64::try_from(bytes.len())
174            .map_err(|_| arithmetic_overflow("manual entry payload size"))?;
175        Ok(EntryPayload {
176            size,
177            inner: EntryPayloadInner::Buffered(Some(bytes)),
178        })
179    }
180}
181
182async fn read_streaming_chunk<'a, E>(
183    file: &mut tokio::fs::File,
184    path: &Path,
185    buffer: &'a mut Vec<u8>,
186    remaining: &mut u64,
187    filled: &mut usize,
188) -> Result<Option<&'a [u8]>, BuildError<E>> {
189    if *remaining == 0 {
190        return Ok(None);
191    }
192
193    let chunk_size = (*remaining).min(SOURCE_FILE_CHUNK_BYTES as u64);
194    let chunk_len = usize::try_from(chunk_size)
195        .map_err(|_| arithmetic_overflow("source file read buffer size"))?;
196    buffer.resize(chunk_len, 0);
197    // Progress lives in the payload rather than this future, so cancelling and
198    // retrying `EntryPayload::next_chunk` cannot discard completed reads.
199    while *filled < chunk_len {
200        let read = file
201            .read(&mut buffer[*filled..])
202            .await
203            .map_err(|source| filesystem_error("read source file", path, source))?;
204        if read == 0 {
205            return Err(filesystem_error(
206                "read source file",
207                path,
208                io::Error::new(io::ErrorKind::UnexpectedEof, "source file was truncated"),
209            ));
210        }
211        *filled += read;
212    }
213    *remaining -= chunk_size;
214    *filled = 0;
215    Ok(Some(buffer))
216}
217
218/// A failure returned by an [`ArchiveBuilder`] format hook.
219///
220/// This distinguishes errors known to precede output from errors that may have
221/// left a partial member in the output archive.
222#[derive(Debug)]
223pub struct BuildFailure<E> {
224    error: BuildError<E>,
225    // TODO: Maybe make all failures poisoning?
226    // I'm not sure we really need the distinction here.
227    poisons_builder: bool,
228}
229
230impl<E> BuildFailure<E> {
231    /// Reports a failure that occurred before the hook wrote any output.
232    pub fn recoverable(error: BuildError<E>) -> Self {
233        Self {
234            error,
235            poisons_builder: false,
236        }
237    }
238
239    /// Reports a failure that may have left partial output.
240    pub fn poisoned(error: BuildError<E>) -> Self {
241        Self {
242            error,
243            poisons_builder: true,
244        }
245    }
246
247    fn into_parts(self) -> (BuildError<E>, bool) {
248        (self.error, self.poisons_builder)
249    }
250}
251
252/// A format-specific archive writer that can create a stateful [`Builder`].
253///
254/// The asynchronous methods on this trait are implementation hooks for
255/// [`Builder`]. Archive construction callers must not invoke them directly;
256/// doing so bypasses builder policy, collision tracking, and cancellation
257/// poisoning. Use [`Self::builder`] or [`Self::builder_with_policy`] and then
258/// the [`Builder`] APIs instead.
259///
260/// Hook implementations must return [`BuildFailure::recoverable`] only when the
261/// failed invocation wrote no output. Any failure after output may have begun
262/// must use [`BuildFailure::poisoned`].
263#[expect(
264    async_fn_in_trait,
265    reason = "archive writers may be !Send and run on a local executor"
266)]
267pub trait ArchiveBuilder: Sized {
268    /// The archive-format error returned while encoding entries.
269    type Error;
270
271    /// Wraps this format writer in a builder using default policy.
272    ///
273    /// Implementors should not override this default implementation.
274    fn builder(self) -> Builder<Self> {
275        Builder {
276            backend: self,
277            state: BuilderState::new(BuilderPolicy::default()),
278        }
279    }
280
281    /// Wraps this format writer in a builder using `policy`.
282    ///
283    /// Implementors should not override this default implementation.
284    fn builder_with_policy(self, policy: BuilderPolicy) -> Builder<Self> {
285        Builder {
286            backend: self,
287            state: BuilderState::new(policy),
288        }
289    }
290
291    /// Writes any format-specific archive terminator or index.
292    async fn finish_archive(&mut self) -> Result<(), BuildFailure<Self::Error>>;
293
294    /// Writes one regular-file member and its complete payload.
295    ///
296    /// Implementations must call [`EntryPayload::next_chunk`] through
297    /// completion and classify failures using [`BuildFailure`].
298    async fn write_file_member(
299        &mut self,
300        path: &str,
301        payload: &mut EntryPayload<'_>,
302        metadata: EntryMetadata,
303    ) -> Result<(), BuildFailure<Self::Error>>;
304
305    /// Writes one directory member.
306    async fn write_directory_member(&mut self, path: &str)
307    -> Result<(), BuildFailure<Self::Error>>;
308
309    /// Writes one symbolic-link member.
310    async fn write_symbolic_link_member(
311        &mut self,
312        path: &str,
313        target: &str,
314    ) -> Result<(), BuildFailure<Self::Error>>;
315}
316
317/// A stateful format-neutral archive construction engine.
318///
319/// Create this wrapper with [`ArchiveBuilder::builder`] or
320/// [`ArchiveBuilder::builder_with_policy`].
321pub struct Builder<B> {
322    backend: B,
323    state: BuilderState,
324}
325
326impl<B: ArchiveBuilder> Builder<B> {
327    /// Adds one regular file from an in-memory byte buffer.
328    pub async fn add_entry<P, D>(
329        &mut self,
330        path: P,
331        data: D,
332        metadata: EntryMetadata,
333    ) -> Result<(), BuildError<B::Error>>
334    where
335        P: AsRef<Path>,
336        D: AsRef<[u8]>,
337    {
338        self.state.ensure_active()?;
339        let archive_path = path.as_ref();
340        let Some(path) = archive_path.to_str() else {
341            return Err(BuildError::InvalidArchivePath {
342                path: archive_path.to_path_buf(),
343                reason: "path is not valid UTF-8",
344            });
345        };
346        if !self.state.policy.name_validation.accepts(path) {
347            return Err(BuildError::NameRejected {
348                context: "member path",
349                value: path.to_owned(),
350            });
351        }
352        let path = path.to_owned();
353        let reservation = self
354            .state
355            .entries
356            .preflight_entry(&path, ArchivedEntry::NonDirectory)?;
357        let mut payload = EntryPayload::borrowed(data.as_ref())?;
358        self.state.begin_write();
359        let result = self
360            .backend
361            .write_file_member(&path, &mut payload, metadata)
362            .await;
363        self.state.complete_write();
364        self.resolve_hook(result)?;
365        self.state.entries.commit_entry(&path, reservation);
366        Ok(())
367    }
368
369    /// Recursively adds a filesystem directory beneath its UTF-8 basename.
370    ///
371    /// Entries are visited in deterministic sorted order and files are streamed
372    /// with bounded memory. Source symbolic links are rejected by default;
373    /// [`BuilderPolicy::symlink_policy`] can instead preserve them. A late
374    /// traversal or validation failure may leave partial output and poison
375    /// this builder.
376    pub async fn add_directory<P: AsRef<Path>>(
377        &mut self,
378        source: P,
379    ) -> Result<(), BuildError<B::Error>> {
380        self.state.ensure_active()?;
381        let source = source.as_ref().to_path_buf();
382        let mut entries = stream_directory_entries(
383            source,
384            self.state.policy.name_validation,
385            self.state.policy.symlink_policy,
386        )
387        .map_err(BuildError::Traversal)?;
388        self.state.begin_write();
389        let mut traversal = DirectoryBuild {
390            entries: &mut self.state.entries,
391            source_buffer: mem::take(&mut self.state.source_buffer),
392            emitted: false,
393        };
394        let write_result =
395            write_directory_entries(&mut self.backend, &mut entries, &mut traversal).await;
396        let traversal_result = entries
397            .finish()
398            .await
399            .map_err(BuildError::Traversal)
400            .map_err(BuildFailure::recoverable);
401        let result = write_result.and(traversal_result);
402        let DirectoryBuild {
403            entries: _,
404            source_buffer,
405            emitted,
406        } = traversal;
407        self.state.complete_write();
408        self.state.source_buffer = source_buffer;
409        match result {
410            Ok(()) => Ok(()),
411            Err(error) => {
412                let (error, hook_poisoned) = error.into_parts();
413                if emitted || hook_poisoned {
414                    self.state.poison();
415                }
416                Err(error)
417            }
418        }
419    }
420
421    /// Finalizes and consumes this archive builder.
422    ///
423    /// Callers that need to retain access to an output sink should lend it to
424    /// the format writer before wrapping it rather than transferring ownership.
425    pub async fn finish(mut self) -> Result<(), BuildError<B::Error>> {
426        self.state.ensure_active()?;
427        let result = self.backend.finish_archive().await;
428        self.resolve_hook(result)
429    }
430
431    fn resolve_hook<T>(
432        &mut self,
433        result: Result<T, BuildFailure<B::Error>>,
434    ) -> Result<T, BuildError<B::Error>> {
435        match result {
436            Ok(value) => Ok(value),
437            Err(error) => {
438                let (error, poisons_builder) = error.into_parts();
439                if poisons_builder {
440                    self.state.poison();
441                }
442                Err(error)
443            }
444        }
445    }
446}
447
448async fn write_directory_entries<B: ArchiveBuilder>(
449    builder: &mut B,
450    entries: &mut TraversalStream,
451    traversal: &mut DirectoryBuild<'_>,
452) -> Result<(), BuildFailure<B::Error>> {
453    while let Some(entries) = entries.recv().await {
454        let mut entries = VecDeque::from(entries);
455        while !entries.is_empty() {
456            let buffer = mem::take(&mut traversal.source_buffer);
457            let (prepared, remaining) = prepare_directory_entries(entries, buffer)
458                .await
459                .map_err(SourceError::into_build_error)
460                .map_err(BuildFailure::recoverable)?;
461            entries = remaining;
462            let PreparedDirectoryBatch {
463                entries: prepared_entries,
464                mut buffer,
465            } = prepared;
466            let result =
467                write_prepared_directory_entries(builder, prepared_entries, &mut buffer, traversal)
468                    .await;
469            traversal.source_buffer = buffer;
470            result?;
471        }
472    }
473    Ok(())
474}
475
476async fn write_prepared_directory_entries<B: ArchiveBuilder>(
477    builder: &mut B,
478    entries: Vec<PreparedTraversalEntry>,
479    buffer: &mut Vec<u8>,
480    traversal: &mut DirectoryBuild<'_>,
481) -> Result<(), BuildFailure<B::Error>> {
482    for entry in entries {
483        let reservation = traversal
484            .entries
485            .preflight_entry(
486                &entry.archive_path,
487                if matches!(&entry.kind, PreparedTraversalKind::Directory) {
488                    ArchivedEntry::Directory { explicit: true }
489                } else {
490                    ArchivedEntry::NonDirectory
491                },
492            )
493            .map_err(BuildFailure::recoverable)?;
494        match entry.kind {
495            PreparedTraversalKind::Directory => {
496                builder.write_directory_member(&entry.archive_path).await?;
497            }
498            PreparedTraversalKind::BufferedFile { range, executable } => {
499                let data = buffer.get(range).ok_or_else(|| {
500                    BuildFailure::recoverable(arithmetic_overflow(
501                        "prepared source file buffer range",
502                    ))
503                })?;
504                let mut payload =
505                    EntryPayload::borrowed::<B::Error>(data).map_err(BuildFailure::recoverable)?;
506                builder
507                    .write_file_member(
508                        &entry.archive_path,
509                        &mut payload,
510                        EntryMetadata::default().executable(executable),
511                    )
512                    .await?;
513            }
514            PreparedTraversalKind::StreamingFile {
515                file,
516                path,
517                size,
518                executable,
519            } => {
520                let mut file = tokio::fs::File::from_std(file);
521                file.set_max_buf_size(SOURCE_FILE_CHUNK_BYTES);
522                let mut payload = EntryPayload {
523                    size,
524                    inner: EntryPayloadInner::Streaming {
525                        file,
526                        path,
527                        buffer,
528                        remaining: size,
529                        filled: 0,
530                    },
531                };
532                builder
533                    .write_file_member(
534                        &entry.archive_path,
535                        &mut payload,
536                        EntryMetadata::default().executable(executable),
537                    )
538                    .await?;
539            }
540            PreparedTraversalKind::SymbolicLink { target } => {
541                builder
542                    .write_symbolic_link_member(&entry.archive_path, &target)
543                    .await?;
544            }
545        }
546        traversal
547            .entries
548            .commit_entry(&entry.archive_path, reservation);
549        traversal.emitted = true;
550    }
551    Ok(())
552}
553
554struct DirectoryBuild<'entries> {
555    entries: &'entries mut BuildEntries,
556    source_buffer: Vec<u8>,
557    emitted: bool,
558}
559
560struct PreparedDirectoryBatch {
561    entries: Vec<PreparedTraversalEntry>,
562    buffer: Vec<u8>,
563}
564
565struct PreparedTraversalEntry {
566    archive_path: String,
567    kind: PreparedTraversalKind,
568}
569
570enum PreparedTraversalKind {
571    Directory,
572    BufferedFile {
573        range: Range<usize>,
574        executable: bool,
575    },
576    StreamingFile {
577        file: std::fs::File,
578        path: PathBuf,
579        size: u64,
580        executable: bool,
581    },
582    SymbolicLink {
583        target: String,
584    },
585}
586
587async fn prepare_directory_entries(
588    mut entries: VecDeque<TraversalEntry>,
589    mut buffer: Vec<u8>,
590) -> Result<(PreparedDirectoryBatch, VecDeque<TraversalEntry>), SourceError> {
591    tokio::task::spawn_blocking(move || {
592        buffer.clear();
593        let mut prepared = Vec::with_capacity(entries.len());
594        while let Some(entry) = entries.pop_front() {
595            let TraversalEntry {
596                source,
597                archive_path,
598                kind,
599            } = entry;
600            let (kind, batch_complete) = match kind {
601                TraversalKind::Directory => (PreparedTraversalKind::Directory, false),
602                TraversalKind::Regular => prepare_regular_file(source, &mut buffer)?,
603                TraversalKind::SymbolicLink { target } => {
604                    (PreparedTraversalKind::SymbolicLink { target }, false)
605                }
606            };
607            prepared.push(PreparedTraversalEntry { archive_path, kind });
608            if batch_complete {
609                break;
610            }
611        }
612        Ok((
613            PreparedDirectoryBatch {
614                entries: prepared,
615                buffer,
616            },
617            entries,
618        ))
619    })
620    .await
621    .map_err(SourceError::BlockingTask)?
622}
623
624fn prepare_regular_file(
625    path: PathBuf,
626    buffer: &mut Vec<u8>,
627) -> Result<(PreparedTraversalKind, bool), SourceError> {
628    let file = std::fs::File::open(&path)
629        .map_err(|source| SourceError::filesystem("open source file", &path, source))?;
630    let metadata = file
631        .metadata()
632        .map_err(|source| SourceError::filesystem("inspect source file", &path, source))?;
633    if !metadata.is_file() {
634        return Err(SourceError::filesystem(
635            "inspect source file",
636            &path,
637            io::Error::other("source is not a regular file"),
638        ));
639    }
640    let size = metadata.len();
641    let executable = is_executable(&metadata);
642    if size > BUFFERED_SOURCE_FILE_BYTES as u64 {
643        return Ok((
644            PreparedTraversalKind::StreamingFile {
645                file,
646                path,
647                size,
648                executable,
649            },
650            true,
651        ));
652    }
653    let payload_size = usize::try_from(size).map_err(|_| SourceError::ArithmeticOverflow {
654        context: "buffered source file size",
655    })?;
656    let start = buffer.len();
657    let end = start
658        .checked_add(payload_size)
659        .ok_or(SourceError::ArithmeticOverflow {
660            context: "buffered source batch size",
661        })?;
662    buffer.resize(end, 0);
663    (&file)
664        .read_exact(&mut buffer[start..end])
665        .map_err(|source| SourceError::filesystem("read source file", &path, source))?;
666    Ok((
667        PreparedTraversalKind::BufferedFile {
668            range: start..end,
669            executable,
670        },
671        buffer.len() >= SOURCE_FILE_PREPARATION_BATCH_BYTES,
672    ))
673}
674
675enum SourceError {
676    Filesystem {
677        operation: &'static str,
678        path: PathBuf,
679        source: io::Error,
680    },
681    BlockingTask(tokio::task::JoinError),
682    ArithmeticOverflow {
683        context: &'static str,
684    },
685}
686
687impl SourceError {
688    fn filesystem(operation: &'static str, path: &Path, source: io::Error) -> Self {
689        Self::Filesystem {
690            operation,
691            path: path.to_path_buf(),
692            source,
693        }
694    }
695
696    fn into_build_error<E>(self) -> BuildError<E> {
697        match self {
698            Self::Filesystem {
699                operation,
700                path,
701                source,
702            } => BuildError::Filesystem {
703                operation,
704                path,
705                source,
706            },
707            Self::BlockingTask(error) => BuildError::BlockingTask(error),
708            Self::ArithmeticOverflow { context } => BuildError::ArithmeticOverflow { context },
709        }
710    }
711}
712
713/// A failure while constructing an archive.
714#[derive(Debug, Error)]
715pub enum BuildError<E> {
716    /// The archive format encoder failed.
717    #[error(transparent)]
718    Encoder(E),
719    /// Traversing a recursive source failed.
720    #[error(transparent)]
721    Traversal(#[from] TraversalError),
722    /// A requested archive path cannot be represented by the UTF-8 builder.
723    #[error("invalid archive path {path:?}: {reason}")]
724    InvalidArchivePath {
725        /// The rejected archive path.
726        path: PathBuf,
727        /// The reason the path cannot be represented.
728        reason: &'static str,
729    },
730    /// An archive name was rejected by the configured [`BuilderPolicy`].
731    #[error("archive {context} rejected by builder policy: {value:?}")]
732    NameRejected {
733        /// The role of the rejected archive text.
734        context: &'static str,
735        /// The rejected UTF-8 value.
736        value: String,
737    },
738    /// An archive path collides with a previously reserved entry.
739    #[error("archive entry collides with existing path {path}")]
740    PathCollision {
741        /// The conflicting normalized archive path.
742        path: String,
743    },
744    /// A source filesystem operation failed.
745    #[error("failed to {operation} {path}: {source}")]
746    Filesystem {
747        /// The operation that failed.
748        operation: &'static str,
749        /// The affected source filesystem path.
750        path: PathBuf,
751        /// The underlying I/O error.
752        #[source]
753        source: io::Error,
754    },
755    /// A blocking filesystem operation failed to complete.
756    #[error("failed to complete blocking archive filesystem operation: {0}")]
757    BlockingTask(#[from] tokio::task::JoinError),
758    /// The builder cannot continue because a prior failure may have written bytes.
759    #[error("archive builder is poisoned after a previous partial write")]
760    Poisoned,
761    /// A size computation exceeded this API's range.
762    #[error("arithmetic overflow while computing {context}")]
763    ArithmeticOverflow {
764        /// The failed computation.
765        context: &'static str,
766    },
767}
768
769#[derive(Clone, Copy, Debug)]
770enum ArchivedEntry {
771    Directory { explicit: bool },
772    NonDirectory,
773}
774
775/// Builder collision state keyed by literal `/`-separated archive components.
776#[derive(Debug)]
777struct BuildEntries(ComponentTree<Box<str>, ArchivedEntry>);
778
779/// Proof that an entry was checked against the current collision state.
780struct EntryReservation {
781    entry: ArchivedEntry,
782}
783
784impl BuildEntries {
785    fn new() -> Self {
786        Self(ComponentTree::new(None))
787    }
788
789    fn preflight_entry<E>(
790        &self,
791        path: &str,
792        entry: ArchivedEntry,
793    ) -> Result<EntryReservation, BuildError<E>> {
794        let mut parent = ROOT_NODE;
795        let mut components = archive_path_components(path).peekable();
796        while let Some((component, prefix)) = components.next() {
797            let Some(node) = self.0.child(parent, component) else {
798                return Ok(EntryReservation { entry });
799            };
800            if components.peek().is_some() {
801                match self.0.state(node) {
802                    Some(ArchivedEntry::Directory { .. }) => parent = node,
803                    Some(ArchivedEntry::NonDirectory) => return Err(path_collision(prefix)),
804                    None => return Ok(EntryReservation { entry }),
805                }
806            } else {
807                match (self.0.state(node), entry) {
808                    (
809                        Some(ArchivedEntry::Directory { explicit: false }),
810                        ArchivedEntry::Directory { .. },
811                    )
812                    | (None, _) => return Ok(EntryReservation { entry }),
813                    (Some(_), _) => return Err(path_collision(prefix)),
814                }
815            }
816        }
817        Ok(EntryReservation { entry })
818    }
819
820    fn commit_entry(&mut self, path: &str, reservation: EntryReservation) {
821        // The builder holds exclusive state access while the backend hook is
822        // awaited, so a successful reservation remains valid until this commit.
823        let mut parent = ROOT_NODE;
824        let mut components = archive_path_components(path).peekable();
825        while let Some((component, _)) = components.next() {
826            let node = self
827                .0
828                .ensure_child_with(parent, component, || component.into());
829            if components.peek().is_some() {
830                if self.0.state(node).is_none() {
831                    self.0
832                        .set_state(node, ArchivedEntry::Directory { explicit: false });
833                }
834            } else {
835                self.0.set_state(node, reservation.entry);
836            }
837            parent = node;
838        }
839    }
840
841    #[cfg(test)]
842    fn node_count(&self) -> usize {
843        self.0.node_count()
844    }
845
846    #[cfg(test)]
847    fn component_bytes(&self) -> usize {
848        self.0.components().map(|component| component.len()).sum()
849    }
850}
851
852/// Iterates the exact textual component and prefix at each `/` boundary.
853fn archive_path_components(path: &str) -> impl Iterator<Item = (&str, &str)> {
854    let mut component_start = 0;
855    path.split('/').map(move |component| {
856        let prefix_end = component_start + component.len();
857        let prefix = &path[..prefix_end];
858        component_start = if prefix_end < path.len() {
859            prefix_end + 1
860        } else {
861            prefix_end
862        };
863        (component, prefix)
864    })
865}
866
867fn filesystem_error<E>(operation: &'static str, path: &Path, source: io::Error) -> BuildError<E> {
868    BuildError::Filesystem {
869        operation,
870        path: path.to_path_buf(),
871        source,
872    }
873}
874
875fn arithmetic_overflow<E>(context: &'static str) -> BuildError<E> {
876    BuildError::ArithmeticOverflow { context }
877}
878
879fn path_collision<E>(path: &str) -> BuildError<E> {
880    BuildError::PathCollision {
881        path: path.to_owned(),
882    }
883}
884
885#[cfg(unix)]
886fn is_executable(metadata: &std::fs::Metadata) -> bool {
887    use std::os::unix::fs::PermissionsExt;
888
889    metadata.permissions().mode() & 0o111 != 0
890}
891
892#[cfg(not(unix))]
893fn is_executable(_metadata: &std::fs::Metadata) -> bool {
894    false
895}
896
897#[cfg(test)]
898mod tests {
899    use std::fs;
900
901    use tempfile::tempdir;
902
903    use super::*;
904
905    #[derive(Debug)]
906    struct TestError;
907
908    #[derive(Default)]
909    struct NoopArchiveBuilder {
910        fail_next_file: bool,
911        fail_next_directory: bool,
912    }
913
914    impl ArchiveBuilder for NoopArchiveBuilder {
915        type Error = TestError;
916
917        async fn finish_archive(&mut self) -> Result<(), BuildFailure<Self::Error>> {
918            Ok(())
919        }
920
921        async fn write_file_member(
922            &mut self,
923            _path: &str,
924            payload: &mut EntryPayload<'_>,
925            _metadata: EntryMetadata,
926        ) -> Result<(), BuildFailure<Self::Error>> {
927            if mem::take(&mut self.fail_next_file) {
928                return Err(BuildFailure::recoverable(BuildError::Encoder(TestError)));
929            }
930            loop {
931                match payload.next_chunk::<TestError>().await {
932                    Ok(Some(_)) => {}
933                    Ok(None) => return Ok(()),
934                    Err(error) => return Err(BuildFailure::recoverable(error)),
935                }
936            }
937        }
938
939        async fn write_directory_member(
940            &mut self,
941            _path: &str,
942        ) -> Result<(), BuildFailure<Self::Error>> {
943            if mem::take(&mut self.fail_next_directory) {
944                return Err(BuildFailure::recoverable(BuildError::Encoder(TestError)));
945            }
946            Ok(())
947        }
948
949        async fn write_symbolic_link_member(
950            &mut self,
951            _path: &str,
952            _target: &str,
953        ) -> Result<(), BuildFailure<Self::Error>> {
954            Ok(())
955        }
956    }
957
958    #[tokio::test]
959    async fn deep_manual_entry_uses_linear_component_storage() {
960        const COMPONENT: &str = "segment";
961        const DEPTH: usize = 4_096;
962
963        let mut path = format!("{COMPONENT}/").repeat(DEPTH);
964        path.push_str("file");
965        let mut builder = NoopArchiveBuilder::default().builder();
966        builder
967            .add_entry(&path, b"", EntryMetadata::default())
968            .await
969            .expect("deep manual entry should be added");
970
971        assert_eq!(builder.state.entries.node_count(), DEPTH + 2);
972        assert_eq!(
973            builder.state.entries.component_bytes(),
974            DEPTH * COMPONENT.len() + "file".len()
975        );
976    }
977
978    #[tokio::test]
979    async fn collision_state_preserves_literal_slash_components() {
980        let mut builder = NoopArchiveBuilder::default().builder();
981        for path in ["a//b", "a/b", "/absolute", "absolute", ".", ".."] {
982            builder
983                .add_entry(path, b"", EntryMetadata::default())
984                .await
985                .expect("distinct textual path should be added");
986        }
987
988        for (path, collision) in [("a//b", "a//b"), ("a/", "a/"), ("", ""), ("./child", ".")] {
989            assert!(matches!(
990                builder
991                    .add_entry(path, b"", EntryMetadata::default())
992                    .await,
993                Err(BuildError::PathCollision { path }) if path == collision
994            ));
995        }
996    }
997
998    #[tokio::test]
999    async fn recoverable_write_failure_does_not_commit_reservation() {
1000        let mut builder = NoopArchiveBuilder {
1001            fail_next_file: true,
1002            ..Default::default()
1003        }
1004        .builder();
1005        assert!(matches!(
1006            builder
1007                .add_entry("parent/file", b"", EntryMetadata::default())
1008                .await,
1009            Err(BuildError::Encoder(TestError))
1010        ));
1011        builder
1012            .add_entry("parent/file", b"", EntryMetadata::default())
1013            .await
1014            .expect("a recoverable failure should not reserve the path");
1015    }
1016
1017    #[tokio::test]
1018    async fn recoverable_recursive_write_failure_does_not_commit_reservation() {
1019        let temp = tempdir().expect("temporary directory should be created");
1020        let source = temp.path().join("directory");
1021        fs::create_dir(&source).expect("source directory should be created");
1022        let mut builder = NoopArchiveBuilder {
1023            fail_next_directory: true,
1024            ..Default::default()
1025        }
1026        .builder();
1027
1028        assert!(matches!(
1029            builder.add_directory(&source).await,
1030            Err(BuildError::Encoder(TestError))
1031        ));
1032        assert_eq!(builder.state.entries.node_count(), 1);
1033
1034        builder
1035            .add_directory(&source)
1036            .await
1037            .expect("a recoverable failure should not reserve the directory");
1038        assert_eq!(builder.state.entries.node_count(), 2);
1039    }
1040
1041    #[tokio::test]
1042    async fn repeated_directory_additions_use_linear_component_storage() {
1043        const DIRECTORIES: usize = 256;
1044
1045        let temp = tempdir().expect("temporary directory should be created");
1046        let mut builder = NoopArchiveBuilder::default().builder();
1047        for index in 0..DIRECTORIES {
1048            let source = temp.path().join(format!("directory-{index}"));
1049            fs::create_dir(&source).expect("source directory should be created");
1050            builder
1051                .add_directory(&source)
1052                .await
1053                .expect("empty source directory should be added");
1054        }
1055
1056        assert_eq!(builder.state.entries.node_count(), DIRECTORIES + 1);
1057    }
1058}