Skip to main content

pkgsrc/
archive.rs

1/*
2 * Copyright (c) 2026 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/*!
18 * Read and write pkgsrc binary packages.
19 *
20 * pkgsrc binary packages come in two formats:
21 *
22 * 1. **Unsigned packages**: Compressed tar archives (`.tgz`, `.tbz`, etc.)
23 *    containing package metadata (`+CONTENTS`, `+COMMENT`, `+DESC`, etc.)
24 *    and the package files.
25 *
26 * 2. **Signed packages**: `ar(1)` archives containing:
27 *    - `+PKG_HASH`: Hash metadata for verification
28 *    - `+PKG_GPG_SIGNATURE`: GPG signature of the hash file
29 *    - The original compressed tarball
30 *
31 * This module provides a two-layer API:
32 *
33 * ## Low-level (tar-style streaming)
34 *
35 * - [`Archive`]: Streaming access to archive entries
36 * - [`Builder`]: Create new archives by appending entries
37 *
38 * ## High-level (convenience)
39 *
40 * - [`BinaryPackage`]: Cached metadata with fast reads and convenience methods
41 * - [`SignedArchive`]: Output type for signed packages
42 *
43 * # Examples
44 *
45 * ## Fast metadata reading
46 *
47 * ```no_run
48 * use pkgsrc::archive::BinaryPackage;
49 *
50 * let pkg = BinaryPackage::open("package-1.0.tgz")?;
51 * println!("Package: {}", pkg.pkgname().unwrap_or("unknown"));
52 * println!("Comment: {}", pkg.metadata().comment());
53 *
54 * // Convert to summary for repository management
55 * let summary = pkg.to_summary()?;
56 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
57 * ```
58 *
59 * ## Installing a package (iterating entries)
60 *
61 * ```no_run
62 * use pkgsrc::archive::BinaryPackage;
63 *
64 * let pkg = BinaryPackage::open("package-1.0.tgz")?;
65 *
66 * // Check dependencies first (fast, uses cached metadata)
67 * for dep in pkg.plist().depends() {
68 *     println!("Depends: {}", dep);
69 * }
70 *
71 * // Extract files (re-reads archive)
72 * pkg.extract_to("/usr/pkg")?;
73 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
74 * ```
75 *
76 * ## Building a new package
77 *
78 * ```no_run
79 * use pkgsrc::archive::Builder;
80 *
81 * // Auto-detect compression from filename
82 * let mut builder = Builder::create("package-1.0.tgz")?;
83 * builder.append_metadata_file("+COMMENT", b"A test package")?;
84 * builder.append_file("bin/hello", b"#!/bin/sh\necho hello", 0o755)?;
85 * builder.finish()?;
86 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
87 * ```
88 *
89 * ## Signing an existing package
90 *
91 * ```no_run
92 * use pkgsrc::archive::BinaryPackage;
93 *
94 * let pkg = BinaryPackage::open("package-1.0.tgz")?;
95 * let signature = b"GPG SIGNATURE DATA";
96 * pkg.sign(signature)?.write_to("package-1.0-signed.tgz")?;
97 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
98 * ```
99 */
100
101use std::collections::HashMap;
102use std::fmt;
103use std::fmt::Write as FmtWrite;
104use std::fs::{self, File, Permissions};
105use std::io::{self, BufReader, Cursor, Read, Seek, SeekFrom, Write};
106#[cfg(unix)]
107use std::os::unix::fs::PermissionsExt;
108use std::path::{Path, PathBuf};
109use std::sync::OnceLock;
110
111use flate2::read::GzDecoder;
112use flate2::write::GzEncoder;
113use tar::{Archive as TarArchive, Builder as TarBuilder, Entries, Header};
114
115use crate::metadata::{Entry, FileRead, Metadata};
116use crate::plist::{self, Plist, PlistEntry};
117use crate::summary::Summary;
118
119/// Parse a mode string (octal) into a u32.
120///
121/// Supports formats like "0755", "755", "0644", etc.
122fn parse_mode(mode_str: &str) -> Option<u32> {
123    // Handle both "0755" and "755" formats
124    u32::from_str_radix(mode_str, 8).ok()
125}
126
127/// Default block size for package hashing (64KB).
128pub const DEFAULT_BLOCK_SIZE: usize = 65536;
129
130/// Current pkgsrc signature version.
131pub const PKGSRC_SIGNATURE_VERSION: u32 = 1;
132
133/// Magic bytes identifying gzip compressed data.
134const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
135
136/// Magic bytes identifying zstd compressed data.
137const ZSTD_MAGIC: [u8; 4] = [0x28, 0xb5, 0x2f, 0xfd];
138
139/// Result type for archive operations.
140pub type Result<T> = std::result::Result<T, ArchiveError>;
141
142/// Compression format for package archives.
143#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
144#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
145pub enum Compression {
146    /// No compression (plain .tar)
147    None,
148    /// Gzip compression (.tgz, .tar.gz)
149    #[default]
150    Gzip,
151    /// Zstandard compression (.tzst, .tar.zst)
152    Zstd,
153}
154
155impl Compression {
156    /// Detect compression format from magic bytes.
157    #[must_use]
158    pub fn from_magic(bytes: &[u8]) -> Option<Self> {
159        if bytes.starts_with(&GZIP_MAGIC) {
160            Some(Self::Gzip)
161        } else if bytes.starts_with(&ZSTD_MAGIC) {
162            Some(Self::Zstd)
163        } else {
164            None
165        }
166    }
167
168    /// Detect compression format from file extension.
169    #[must_use]
170    pub fn from_extension(path: impl AsRef<Path>) -> Option<Self> {
171        let name = path.as_ref().file_name()?.to_str()?;
172        let lower = name.to_lowercase();
173
174        if lower.ends_with(".tgz") || lower.ends_with(".tar.gz") {
175            Some(Self::Gzip)
176        } else if lower.ends_with(".tzst") || lower.ends_with(".tar.zst") {
177            Some(Self::Zstd)
178        } else if lower.ends_with(".tar") {
179            Some(Self::None)
180        } else {
181            None
182        }
183    }
184
185    /// Return the canonical file extension for this compression type.
186    #[must_use]
187    pub fn extension(&self) -> &'static str {
188        match self {
189            Self::None => "tar",
190            Self::Gzip => "tgz",
191            Self::Zstd => "tzst",
192        }
193    }
194}
195
196impl fmt::Display for Compression {
197    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
198        match self {
199            Self::None => write!(f, "none"),
200            Self::Gzip => write!(f, "gzip"),
201            Self::Zstd => write!(f, "zstd"),
202        }
203    }
204}
205
206/**
207 * Detect the compression of an unsigned tarball from its magic bytes,
208 * falling back to the file extension and finally to gzip.
209 */
210fn detect_compression(magic: &[u8], path: Option<&Path>) -> Compression {
211    Compression::from_magic(magic)
212        .or_else(|| path.and_then(Compression::from_extension))
213        .unwrap_or(Compression::Gzip)
214}
215
216/**
217 * Wrap a reader in the decompression decoder for `compression`.
218 *
219 * The returned decoder borrows for as long as `reader` lives, so this
220 * serves both the owning callers (file and buffer readers) and the
221 * streaming signed path, which decodes a borrowed `ar` entry in place.
222 */
223fn decode<'r, R: Read + 'r>(
224    reader: R,
225    compression: Compression,
226) -> Result<Box<dyn Read + 'r>> {
227    Ok(match compression {
228        Compression::None => Box::new(reader),
229        Compression::Gzip => Box::new(GzDecoder::new(reader)),
230        Compression::Zstd => Box::new(zstd::stream::Decoder::new(reader)?),
231    })
232}
233
234/// Hash algorithm used for package signing.
235#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
236#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
237pub enum PkgHashAlgorithm {
238    /// SHA-512 (recommended, default)
239    #[default]
240    Sha512,
241    /// SHA-256
242    Sha256,
243}
244
245impl PkgHashAlgorithm {
246    /// Return the string representation as used in +PKG_HASH.
247    #[must_use]
248    pub fn as_str(&self) -> &'static str {
249        match self {
250            Self::Sha512 => "SHA512",
251            Self::Sha256 => "SHA256",
252        }
253    }
254
255    /// Return the hash output size in bytes.
256    #[must_use]
257    pub fn hash_size(&self) -> usize {
258        match self {
259            Self::Sha512 => 64,
260            Self::Sha256 => 32,
261        }
262    }
263
264    /// Compute hash of data.
265    #[must_use]
266    pub fn hash(&self, data: &[u8]) -> Vec<u8> {
267        use sha2::{Digest, Sha256, Sha512};
268        match self {
269            Self::Sha512 => Sha512::digest(data).to_vec(),
270            Self::Sha256 => Sha256::digest(data).to_vec(),
271        }
272    }
273
274    /// Format hash as lowercase hex string.
275    #[must_use]
276    pub fn hash_hex(&self, data: &[u8]) -> String {
277        let bytes = self.hash(data);
278        let mut s = String::with_capacity(bytes.len() * 2);
279        for b in &bytes {
280            let _ = write!(s, "{b:02x}");
281        }
282        s
283    }
284}
285
286impl fmt::Display for PkgHashAlgorithm {
287    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288        write!(f, "{}", self.as_str())
289    }
290}
291
292impl std::str::FromStr for PkgHashAlgorithm {
293    type Err = ArchiveError;
294
295    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
296        match s.to_uppercase().as_str() {
297            "SHA512" => Ok(Self::Sha512),
298            "SHA256" => Ok(Self::Sha256),
299            _ => Err(ArchiveError::UnsupportedAlgorithm(s.to_string())),
300        }
301    }
302}
303
304/// Error type for archive operations.
305#[derive(Debug, thiserror::Error)]
306#[non_exhaustive]
307pub enum ArchiveError {
308    /// I/O error.
309    #[error("I/O error: {0}")]
310    Io(#[from] io::Error),
311
312    /// Invalid archive format.
313    #[error("invalid archive format: {0}")]
314    InvalidFormat(String),
315
316    /// Invalid +PKG_HASH format.
317    #[error("invalid +PKG_HASH format: {0}")]
318    InvalidPkgHash(String),
319
320    /// Missing required metadata.
321    #[error("missing required metadata: {0}")]
322    MissingMetadata(String),
323
324    /// Invalid metadata content.
325    #[error("invalid metadata: {0}")]
326    InvalidMetadata(String),
327
328    /// Plist parsing error.
329    #[error("plist error: {0}")]
330    Plist(#[from] crate::plist::PlistError),
331
332    /// Hash verification failed.
333    #[error("hash verification failed: {0}")]
334    HashMismatch(String),
335
336    /// Invalid package hash block size.
337    #[error("invalid package hash block size: {0}")]
338    InvalidBlockSize(usize),
339
340    /// Unsupported algorithm.
341    #[error("unsupported hash algorithm: {0}")]
342    UnsupportedAlgorithm(String),
343
344    /// Unsupported compression.
345    #[error("unsupported compression: {0}")]
346    UnsupportedCompression(String),
347
348    /// Unsupported operation.
349    #[error("unsupported operation: {0}")]
350    UnsupportedOperation(String),
351
352    /// Summary generation error.
353    #[error("summary error: {0}")]
354    Summary(String),
355
356    /// No path available for operation.
357    #[error("no path available: {0}")]
358    NoPath(String),
359}
360
361/// Options for extracting package files.
362#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
363pub struct ExtractOptions {
364    /// Apply file modes from plist `@mode` directives.
365    pub apply_mode: bool,
366    /// Apply file ownership from plist `@owner`/`@group` directives.
367    /// Note: Requires root privileges to change ownership.
368    pub apply_ownership: bool,
369    /// Preserve original timestamps from the archive.
370    pub preserve_mtime: bool,
371}
372
373impl ExtractOptions {
374    /// Create new extract options with all options disabled.
375    #[must_use]
376    pub fn new() -> Self {
377        Self::default()
378    }
379
380    /// Enable applying file modes from plist.
381    #[must_use]
382    pub fn with_mode(mut self) -> Self {
383        self.apply_mode = true;
384        self
385    }
386
387    /// Enable applying file ownership from plist.
388    #[must_use]
389    pub fn with_ownership(mut self) -> Self {
390        self.apply_ownership = true;
391        self
392    }
393
394    /// Enable preserving original timestamps.
395    #[must_use]
396    pub fn with_mtime(mut self) -> Self {
397        self.preserve_mtime = true;
398        self
399    }
400}
401
402/// Result of extracting a single file.
403#[derive(Clone, Debug, Eq, Hash, PartialEq)]
404pub struct ExtractedFile {
405    /// Path where the file was extracted.
406    pub path: PathBuf,
407    /// Whether this is a metadata file (starts with +).
408    pub is_metadata: bool,
409    /// MD5 checksum from plist, if present.
410    pub expected_checksum: Option<String>,
411    /// Mode applied to the file.
412    pub mode: Option<u32>,
413}
414
415/// A single failure reported by [`BinaryPackage::verify_checksums`].
416#[derive(Clone, Debug, Eq, Hash, PartialEq)]
417pub struct ChecksumFailure {
418    /// Path of the file that failed verification.
419    pub path: PathBuf,
420    /// Expected MD5 checksum recorded in the packing list.
421    pub expected: String,
422    /// Why verification failed.
423    pub kind: ChecksumFailureKind,
424}
425
426/// Reason a file failed checksum verification.
427#[derive(Clone, Debug, Eq, Hash, PartialEq)]
428pub enum ChecksumFailureKind {
429    /// The file was not present at the expected path.
430    Missing,
431    /// The file was present but its checksum did not match.
432    Mismatch {
433        /// Checksum computed from the file on disk.
434        actual: String,
435    },
436}
437
438/// The `+PKG_HASH` file contents for signed packages.
439///
440/// This structure represents the hash metadata file used in signed pkgsrc
441/// packages. It contains information needed to verify the package integrity.
442///
443/// # Format
444///
445/// The `+PKG_HASH` file has the following format:
446///
447/// ```text
448/// pkgsrc signature
449/// version: 1
450/// pkgname: package-1.0
451/// algorithm: SHA512
452/// block size: 65536
453/// file size: 123456
454/// <hash1>
455/// <hash2>
456/// ...
457/// ```
458#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
459#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
460pub struct PkgHash {
461    version: u32,
462    pkgname: String,
463    algorithm: PkgHashAlgorithm,
464    block_size: usize,
465    file_size: u64,
466    hashes: Vec<String>,
467}
468
469impl PkgHash {
470    /// Create a new `PkgHash` with default settings.
471    #[must_use]
472    pub fn new(pkgname: impl Into<String>) -> Self {
473        Self {
474            version: PKGSRC_SIGNATURE_VERSION,
475            pkgname: pkgname.into(),
476            algorithm: PkgHashAlgorithm::default(),
477            block_size: DEFAULT_BLOCK_SIZE,
478            file_size: 0,
479            hashes: Vec::new(),
480        }
481    }
482
483    /// Generate `PkgHash` from a tarball.
484    pub fn from_tarball<R: Read>(
485        pkgname: impl Into<String>,
486        mut reader: R,
487        algorithm: PkgHashAlgorithm,
488        block_size: usize,
489    ) -> Result<Self> {
490        if block_size == 0 {
491            return Err(ArchiveError::InvalidBlockSize(block_size));
492        }
493
494        let mut pkg_hash = PkgHash::new(pkgname);
495        pkg_hash.algorithm = algorithm;
496        pkg_hash.block_size = block_size;
497
498        let mut buffer = vec![0u8; block_size];
499        let mut total_size: u64 = 0;
500
501        loop {
502            let bytes_read = reader.read(&mut buffer)?;
503            if bytes_read == 0 {
504                break;
505            }
506
507            total_size += bytes_read as u64;
508            let hash = algorithm.hash_hex(&buffer[..bytes_read]);
509            pkg_hash.hashes.push(hash);
510        }
511
512        pkg_hash.file_size = total_size;
513        Ok(pkg_hash)
514    }
515
516    /// Return the pkgsrc signature version.
517    #[must_use]
518    pub fn version(&self) -> u32 {
519        self.version
520    }
521
522    /// Return the package name.
523    #[must_use]
524    pub fn pkgname(&self) -> &str {
525        &self.pkgname
526    }
527
528    /// Return the hash algorithm.
529    #[must_use]
530    pub fn algorithm(&self) -> PkgHashAlgorithm {
531        self.algorithm
532    }
533
534    /// Return the block size.
535    #[must_use]
536    pub fn block_size(&self) -> usize {
537        self.block_size
538    }
539
540    /// Return the original file size.
541    #[must_use]
542    pub fn file_size(&self) -> u64 {
543        self.file_size
544    }
545
546    /// Return the block hashes.
547    #[must_use]
548    pub fn hashes(&self) -> &[String] {
549        &self.hashes
550    }
551
552    /// Verify a tarball against this hash.
553    pub fn verify<R: Read>(&self, mut reader: R) -> Result<bool> {
554        if self.block_size == 0 {
555            return Err(ArchiveError::InvalidBlockSize(self.block_size));
556        }
557
558        let mut buffer = vec![0u8; self.block_size];
559        let mut hash_idx = 0;
560        let mut total_size: u64 = 0;
561
562        loop {
563            let bytes_read = reader.read(&mut buffer)?;
564            if bytes_read == 0 {
565                break;
566            }
567
568            total_size += bytes_read as u64;
569
570            if hash_idx >= self.hashes.len() {
571                return Err(ArchiveError::HashMismatch(
572                    "more data than expected".into(),
573                ));
574            }
575
576            let computed = self.algorithm.hash_hex(&buffer[..bytes_read]);
577            if computed != self.hashes[hash_idx] {
578                return Err(ArchiveError::HashMismatch(format!(
579                    "block {} hash mismatch",
580                    hash_idx
581                )));
582            }
583
584            hash_idx += 1;
585        }
586
587        if total_size != self.file_size {
588            return Err(ArchiveError::HashMismatch(format!(
589                "file size mismatch: expected {}, got {}",
590                self.file_size, total_size
591            )));
592        }
593
594        if hash_idx != self.hashes.len() {
595            return Err(ArchiveError::HashMismatch(
596                "fewer blocks than expected".into(),
597            ));
598        }
599
600        Ok(true)
601    }
602}
603
604impl fmt::Display for PkgHash {
605    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
606        writeln!(f, "pkgsrc signature")?;
607        writeln!(f, "version: {}", self.version)?;
608        writeln!(f, "pkgname: {}", self.pkgname)?;
609        writeln!(f, "algorithm: {}", self.algorithm)?;
610        writeln!(f, "block size: {}", self.block_size)?;
611        writeln!(f, "file size: {}", self.file_size)?;
612        for hash in &self.hashes {
613            writeln!(f, "{}", hash)?;
614        }
615        Ok(())
616    }
617}
618
619impl std::str::FromStr for PkgHash {
620    type Err = ArchiveError;
621
622    /**
623     * Parse a `PkgHash` from `+PKG_HASH` file contents.
624     */
625    fn from_str(s: &str) -> Result<Self> {
626        let lines: Vec<&str> = s.lines().collect();
627
628        if lines.is_empty() || lines[0] != "pkgsrc signature" {
629            return Err(ArchiveError::InvalidPkgHash(
630                "missing 'pkgsrc signature' header".into(),
631            ));
632        }
633
634        let mut pkg_hash = PkgHash::default();
635        let mut header_complete = false;
636        let mut line_idx = 1;
637
638        while line_idx < lines.len() && !header_complete {
639            let line = lines[line_idx];
640
641            if let Some((key, value)) = line.split_once(": ") {
642                match key {
643                    "version" => {
644                        pkg_hash.version = value.parse().map_err(|_| {
645                            ArchiveError::InvalidPkgHash(format!(
646                                "invalid version: {}",
647                                value
648                            ))
649                        })?;
650                    }
651                    "pkgname" => {
652                        pkg_hash.pkgname = value.to_string();
653                    }
654                    "algorithm" => {
655                        pkg_hash.algorithm = value.parse()?;
656                    }
657                    "block size" => {
658                        pkg_hash.block_size = value.parse().map_err(|_| {
659                            ArchiveError::InvalidPkgHash(format!(
660                                "invalid block size: {}",
661                                value
662                            ))
663                        })?;
664                    }
665                    "file size" => {
666                        pkg_hash.file_size = value.parse().map_err(|_| {
667                            ArchiveError::InvalidPkgHash(format!(
668                                "invalid file size: {}",
669                                value
670                            ))
671                        })?;
672                        header_complete = true;
673                    }
674                    _ => {
675                        return Err(ArchiveError::InvalidPkgHash(format!(
676                            "unknown header field: {}",
677                            key
678                        )));
679                    }
680                }
681            } else if !line.is_empty() {
682                header_complete = true;
683                line_idx -= 1;
684            }
685            line_idx += 1;
686        }
687
688        while line_idx < lines.len() {
689            let line = lines[line_idx].trim();
690            if !line.is_empty() {
691                pkg_hash.hashes.push(line.to_string());
692            }
693            line_idx += 1;
694        }
695
696        if pkg_hash.pkgname.is_empty() {
697            return Err(ArchiveError::InvalidPkgHash("missing pkgname".into()));
698        }
699
700        if pkg_hash.block_size == 0 {
701            return Err(ArchiveError::InvalidBlockSize(pkg_hash.block_size));
702        }
703
704        Ok(pkg_hash)
705    }
706}
707
708/// Type of binary package archive.
709#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
710#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
711pub enum ArchiveType {
712    /// Unsigned package (plain compressed tarball)
713    Unsigned,
714    /// Signed package (ar archive containing tarball + signatures)
715    Signed,
716}
717
718/// Wrapper for different decompression decoders.
719///
720/// This is an implementation detail exposed due to the generic nature of
721/// [`Archive`]. Users should not need to interact with this type directly.
722#[doc(hidden)]
723#[allow(clippy::large_enum_variant)]
724pub enum Decoder<R: Read> {
725    None(R),
726    Gzip(GzDecoder<R>),
727    Zstd(zstd::stream::Decoder<'static, BufReader<R>>),
728}
729
730impl<R: Read> Read for Decoder<R> {
731    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
732        match self {
733            Decoder::None(r) => r.read(buf),
734            Decoder::Gzip(d) => d.read(buf),
735            Decoder::Zstd(d) => d.read(buf),
736        }
737    }
738}
739
740/// Low-level streaming access to package archives.
741///
742/// This provides tar-style streaming access to archive entries. For most use
743/// cases, prefer [`BinaryPackage`] which provides cached metadata and convenience
744/// methods.
745///
746/// # Example
747///
748/// ```no_run
749/// use pkgsrc::archive::{Archive, Compression};
750/// use std::io::Read;
751///
752/// let mut archive = Archive::open("package-1.0.tgz")?;
753/// for entry in archive.entries()? {
754///     let entry = entry?;
755///     println!("{}", entry.path()?.display());
756/// }
757/// # Ok::<(), pkgsrc::archive::ArchiveError>(())
758/// ```
759pub struct Archive<R: Read> {
760    inner: TarArchive<Decoder<R>>,
761    compression: Compression,
762}
763
764impl Archive<BufReader<File>> {
765    /// Open an archive from a file path.
766    ///
767    /// Automatically detects compression format from magic bytes.
768    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
769        let path = path.as_ref();
770        let file = File::open(path)?;
771        let mut reader = BufReader::new(file);
772
773        // Read magic bytes for compression detection
774        let mut magic = [0u8; 8];
775        reader.read_exact(&mut magic)?;
776        reader.seek(SeekFrom::Start(0))?;
777
778        let compression = Compression::from_magic(&magic)
779            .or_else(|| Compression::from_extension(path))
780            .unwrap_or(Compression::Gzip);
781
782        Archive::with_compression(reader, compression)
783    }
784}
785
786impl<R: Read> Archive<R> {
787    /// Create a new archive from a reader.
788    ///
789    /// Defaults to gzip compression. Use [`Archive::with_compression`] to
790    /// specify a different format, or [`Archive::open`] to auto-detect from
791    /// a file path.
792    #[must_use = "creating an archive has no effect if not used"]
793    pub fn new(reader: R) -> Result<Self> {
794        Self::with_compression(reader, Compression::Gzip)
795    }
796
797    /// Create a new archive from a reader with explicit compression.
798    #[must_use = "creating an archive has no effect if not used"]
799    pub fn with_compression(
800        reader: R,
801        compression: Compression,
802    ) -> Result<Self> {
803        let decoder = match compression {
804            Compression::None => Decoder::None(reader),
805            Compression::Gzip => Decoder::Gzip(GzDecoder::new(reader)),
806            Compression::Zstd => {
807                Decoder::Zstd(zstd::stream::Decoder::new(reader)?)
808            }
809        };
810
811        Ok(Archive {
812            inner: TarArchive::new(decoder),
813            compression,
814        })
815    }
816
817    /// Return the compression format.
818    #[must_use]
819    pub fn compression(&self) -> Compression {
820        self.compression
821    }
822
823    /// Return an iterator over the entries in this archive.
824    #[must_use = "entries iterator must be used to iterate"]
825    pub fn entries(&mut self) -> Result<Entries<'_, Decoder<R>>> {
826        Ok(self.inner.entries()?)
827    }
828}
829
830/// Options for converting a [`BinaryPackage`] to a [`Summary`].
831#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
832pub struct SummaryOptions {
833    /// Compute the SHA256 checksum of the package file.
834    ///
835    /// This requires re-reading the entire package file, which can be slow
836    /// for large packages. Default is `false`.
837    pub compute_file_cksum: bool,
838}
839
840/// A pkgsrc binary package with cached metadata.
841///
842/// This provides fast access to package metadata without re-reading the
843/// archive. The metadata is read once during [`BinaryPackage::open`], and subsequent
844/// operations like [`BinaryPackage::archive`] or [`BinaryPackage::extract_to`] re-open
845/// the archive as needed.
846///
847/// # Example
848///
849/// ```no_run
850/// use pkgsrc::archive::BinaryPackage;
851///
852/// // Fast metadata access
853/// let pkg = BinaryPackage::open("package-1.0.tgz")?;
854/// println!("Name: {}", pkg.pkgname().unwrap_or("unknown"));
855/// println!("Comment: {}", pkg.metadata().comment());
856///
857/// // Generate summary for repository
858/// let summary = pkg.to_summary()?;
859///
860/// // Extract files (re-reads archive)
861/// pkg.extract_to("/usr/pkg")?;
862/// # Ok::<(), pkgsrc::archive::ArchiveError>(())
863/// ```
864#[derive(Debug)]
865pub struct BinaryPackage {
866    /// Path to the package file.
867    path: PathBuf,
868
869    /// Detected compression format.
870    compression: Compression,
871
872    /// Type of package (signed or unsigned).
873    archive_type: ArchiveType,
874
875    /// Parsed metadata from the package.
876    metadata: Metadata,
877
878    /** Package name from the packing list `@name`, captured at open. */
879    pkgname: Option<String>,
880
881    /** Packing list, materialised on first [`BinaryPackage::plist`] call. */
882    plist: OnceLock<Plist>,
883
884    /// Build info key-value pairs.
885    build_info: HashMap<String, Vec<String>>,
886
887    /// Package hash (for signed packages).
888    pkg_hash: Option<PkgHash>,
889
890    /// GPG signature (for signed packages).
891    gpg_signature: Option<Vec<u8>>,
892
893    /// File size of the package.
894    file_size: u64,
895}
896
897impl BinaryPackage {
898    /// Open a package from a file path.
899    ///
900    /// This reads only the metadata entries at the start of the archive,
901    /// providing fast access to package information without decompressing
902    /// the entire file.
903    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
904        let path = path.as_ref();
905        let file = File::open(path)?;
906        let file_size = file.metadata()?.len();
907        let mut reader = BufReader::new(file);
908
909        // Read magic bytes
910        let mut magic = [0u8; 8];
911        reader.read_exact(&mut magic)?;
912        reader.seek(SeekFrom::Start(0))?;
913
914        // Check for ar archive (signed package)
915        if &magic[..7] == b"!<arch>" {
916            Self::read_signed(path, reader, file_size)
917        } else {
918            Self::read_unsigned(path, reader, &magic, file_size)
919        }
920    }
921
922    /**
923     * Read an unsigned package (compressed tarball).
924     */
925    fn read_unsigned<R: Read + Seek>(
926        path: &Path,
927        reader: R,
928        magic: &[u8],
929        file_size: u64,
930    ) -> Result<Self> {
931        let compression = detect_compression(magic, Some(path));
932        let decompressed = decode(reader, compression)?;
933
934        let mut archive = TarArchive::new(decompressed);
935        let mut metadata = Metadata::new();
936        let mut build_info: HashMap<String, Vec<String>> = HashMap::new();
937
938        for entry_result in archive.entries()? {
939            let mut entry = entry_result?;
940            let entry_path = entry.path()?.into_owned();
941
942            // Stop at first non-metadata file (fast path)
943            let Some(entry_type) =
944                entry_path.to_str().and_then(Entry::from_filename)
945            else {
946                break;
947            };
948
949            // Pre-allocate based on entry size to avoid reallocation during read
950            let entry_size = entry.header().size().unwrap_or(0) as usize;
951            let mut content = String::with_capacity(entry_size);
952            entry.read_to_string(&mut content).map_err(|e| {
953                io::Error::new(
954                    e.kind(),
955                    format!("{}: {}", entry_path.display(), e),
956                )
957            })?;
958            metadata.read_metadata(entry_type, &content).map_err(|e| {
959                ArchiveError::InvalidMetadata(format!(
960                    "{}: {}",
961                    entry_path.display(),
962                    e
963                ))
964            })?;
965
966            if entry_path.as_os_str() == "+BUILD_INFO" {
967                for line in content.lines() {
968                    if let Some((key, value)) = line.split_once('=') {
969                        build_info
970                            .entry(key.to_string())
971                            .or_default()
972                            .push(value.to_string());
973                    }
974                }
975            }
976        }
977
978        metadata.validate().map_err(|e| {
979            ArchiveError::MissingMetadata(format!("incomplete package: {}", e))
980        })?;
981        let pkgname = Self::validate_contents(metadata.contents().as_bytes())?;
982
983        Ok(Self {
984            path: path.to_path_buf(),
985            compression,
986            archive_type: ArchiveType::Unsigned,
987            metadata,
988            pkgname,
989            plist: OnceLock::new(),
990            build_info,
991            pkg_hash: None,
992            gpg_signature: None,
993            file_size,
994        })
995    }
996
997    /**
998     * Read a signed package (ar archive).
999     */
1000    fn read_signed<R: Read>(
1001        path: &Path,
1002        reader: R,
1003        file_size: u64,
1004    ) -> Result<Self> {
1005        let mut ar = ar::Archive::new(reader);
1006
1007        let mut pkg_hash_content: Option<String> = None;
1008        let mut gpg_signature: Option<Vec<u8>> = None;
1009        let mut metadata = Metadata::new();
1010        let mut build_info: HashMap<String, Vec<String>> = HashMap::new();
1011        let mut compression = Compression::Gzip;
1012
1013        loop {
1014            let mut entry = match ar.next_entry() {
1015                Some(Ok(entry)) => entry,
1016                Some(Err(e)) if e.kind() == io::ErrorKind::UnexpectedEof => {
1017                    break;
1018                }
1019                Some(Err(e)) => return Err(e.into()),
1020                None => break,
1021            };
1022            let name = String::from_utf8_lossy(entry.header().identifier())
1023                .to_string();
1024
1025            match name.as_str() {
1026                "+PKG_HASH" => {
1027                    let mut content = String::new();
1028                    entry.read_to_string(&mut content).map_err(|e| {
1029                        io::Error::new(e.kind(), format!("{name}: {e}"))
1030                    })?;
1031                    pkg_hash_content = Some(content);
1032                }
1033                "+PKG_GPG_SIGNATURE" => {
1034                    let mut data = Vec::new();
1035                    entry.read_to_end(&mut data)?;
1036                    gpg_signature = Some(data);
1037                }
1038                _ if name.ends_with(".tgz")
1039                    || name.ends_with(".tzst")
1040                    || name.ends_with(".tar") =>
1041                {
1042                    // Detect compression from inner tarball name
1043                    compression = Compression::from_extension(&name)
1044                        .unwrap_or(Compression::Gzip);
1045
1046                    let decompressed = decode(entry, compression)?;
1047                    let mut archive = TarArchive::new(decompressed);
1048
1049                    for tar_entry_result in archive.entries()? {
1050                        let mut tar_entry = tar_entry_result?;
1051                        let entry_path = tar_entry.path()?.into_owned();
1052
1053                        let Some(entry_type) =
1054                            entry_path.to_str().and_then(Entry::from_filename)
1055                        else {
1056                            break;
1057                        };
1058
1059                        // Pre-allocate based on entry size to avoid reallocation
1060                        let entry_size =
1061                            tar_entry.header().size().unwrap_or(0) as usize;
1062                        let mut content = String::with_capacity(entry_size);
1063                        tar_entry.read_to_string(&mut content).map_err(
1064                            |e| {
1065                                io::Error::new(
1066                                    e.kind(),
1067                                    format!("{}: {}", entry_path.display(), e),
1068                                )
1069                            },
1070                        )?;
1071                        metadata.read_metadata(entry_type, &content).map_err(
1072                            |e| {
1073                                ArchiveError::InvalidMetadata(format!(
1074                                    "{}: {}",
1075                                    entry_path.display(),
1076                                    e
1077                                ))
1078                            },
1079                        )?;
1080
1081                        if entry_path.as_os_str() == "+BUILD_INFO" {
1082                            for line in content.lines() {
1083                                if let Some((key, value)) = line.split_once('=')
1084                                {
1085                                    build_info
1086                                        .entry(key.to_string())
1087                                        .or_default()
1088                                        .push(value.to_string());
1089                                }
1090                            }
1091                        }
1092                    }
1093                    break;
1094                }
1095                _ => {}
1096            }
1097        }
1098
1099        let pkg_hash: Option<PkgHash> =
1100            pkg_hash_content.as_deref().map(str::parse).transpose()?;
1101
1102        metadata.validate().map_err(|e| {
1103            ArchiveError::MissingMetadata(format!("incomplete package: {}", e))
1104        })?;
1105        let pkgname = Self::validate_contents(metadata.contents().as_bytes())?;
1106
1107        Ok(Self {
1108            path: path.to_path_buf(),
1109            compression,
1110            archive_type: ArchiveType::Signed,
1111            metadata,
1112            pkgname,
1113            plist: OnceLock::new(),
1114            build_info,
1115            pkg_hash,
1116            gpg_signature,
1117            file_size,
1118        })
1119    }
1120
1121    /*
1122     * Validate the packing list without materialising owned entries,
1123     * capturing the `@name` value.  The owned Plist is built lazily on
1124     * first plist() access; open() still fails on malformed input.
1125     */
1126    fn validate_contents(bytes: &[u8]) -> Result<Option<String>> {
1127        let mut pkgname = None;
1128        for entry in plist::parse(bytes) {
1129            if let PlistEntry::Name(name) = entry?
1130                && pkgname.is_none()
1131            {
1132                pkgname = Some(name.into_owned());
1133            }
1134        }
1135        Ok(pkgname)
1136    }
1137
1138    /// Return the path to the package file.
1139    #[must_use]
1140    pub fn path(&self) -> &Path {
1141        &self.path
1142    }
1143
1144    /// Return the compression format.
1145    #[must_use]
1146    pub fn compression(&self) -> Compression {
1147        self.compression
1148    }
1149
1150    /// Return the archive type (signed or unsigned).
1151    #[must_use]
1152    pub fn archive_type(&self) -> ArchiveType {
1153        self.archive_type
1154    }
1155
1156    /// Return whether this package is signed.
1157    #[must_use]
1158    pub fn is_signed(&self) -> bool {
1159        self.archive_type == ArchiveType::Signed
1160    }
1161
1162    /// Return the package metadata.
1163    #[must_use]
1164    pub fn metadata(&self) -> &Metadata {
1165        &self.metadata
1166    }
1167
1168    /**
1169     * Return the packing list, materialising it on first access.
1170     */
1171    #[must_use]
1172    pub fn plist(&self) -> &Plist {
1173        self.plist.get_or_init(|| {
1174            Plist::from_bytes(self.metadata.contents().as_bytes())
1175                .expect("plist validated at open")
1176        })
1177    }
1178
1179    /**
1180     * Return the package name from the plist.
1181     */
1182    #[must_use]
1183    pub fn pkgname(&self) -> Option<&str> {
1184        self.pkgname.as_deref()
1185    }
1186
1187    /// Return the build info key-value pairs.
1188    #[must_use]
1189    pub fn build_info(&self) -> &HashMap<String, Vec<String>> {
1190        &self.build_info
1191    }
1192
1193    /// Get a specific build info value (first value if multiple exist).
1194    #[must_use]
1195    pub fn build_info_value(&self, key: &str) -> Option<&str> {
1196        self.build_info
1197            .get(key)
1198            .and_then(|v| v.first())
1199            .map(|s| s.as_str())
1200    }
1201
1202    /// Get all values for a build info key.
1203    #[must_use]
1204    pub fn build_info_values(&self, key: &str) -> Option<&[String]> {
1205        self.build_info.get(key).map(|v| v.as_slice())
1206    }
1207
1208    /// Return the package hash (for signed packages).
1209    #[must_use]
1210    pub fn pkg_hash(&self) -> Option<&PkgHash> {
1211        self.pkg_hash.as_ref()
1212    }
1213
1214    /// Return the GPG signature (for signed packages).
1215    #[must_use]
1216    pub fn gpg_signature(&self) -> Option<&[u8]> {
1217        self.gpg_signature.as_deref()
1218    }
1219
1220    /// Return the file size of the package.
1221    #[must_use]
1222    pub fn file_size(&self) -> u64 {
1223        self.file_size
1224    }
1225
1226    /// Open the archive for iteration (re-reads the file).
1227    pub fn archive(&self) -> Result<Archive<BufReader<File>>> {
1228        Archive::open(&self.path)
1229    }
1230
1231    /// Extract all files to a destination directory.
1232    ///
1233    /// This re-reads the archive and extracts all entries.
1234    pub fn extract_to(&self, dest: impl AsRef<Path>) -> Result<()> {
1235        let mut archive = self.archive()?;
1236        for entry in archive.entries()? {
1237            let mut entry = entry?;
1238            entry.unpack_in(dest.as_ref())?;
1239        }
1240        Ok(())
1241    }
1242
1243    /**
1244     * Extract files to a destination directory with plist-based permissions.
1245     *
1246     * This method extracts files and applies permissions specified in the
1247     * packing list (`@mode`, `@owner`, `@group` directives).
1248     *
1249     * # Arguments
1250     *
1251     * * `dest` - Destination directory for extraction
1252     * * `options` - Extraction options controlling mode/ownership application
1253     *
1254     * # Returns
1255     *
1256     * A vector of [`ExtractedFile`] describing each extracted file.
1257     *
1258     * # Example
1259     *
1260     * ```no_run
1261     * use pkgsrc::archive::{BinaryPackage, ExtractOptions};
1262     *
1263     * let pkg = BinaryPackage::open("package-1.0.tgz")?;
1264     * let options = ExtractOptions::new().with_mode();
1265     * let extracted = pkg.extract_with_plist("/usr/pkg", options)?;
1266     * for file in &extracted {
1267     *     println!("Extracted: {}", file.path.display());
1268     * }
1269     * # Ok::<(), pkgsrc::archive::ArchiveError>(())
1270     * ```
1271     */
1272    #[cfg(unix)]
1273    pub fn extract_with_plist(
1274        &self,
1275        dest: impl AsRef<Path>,
1276        options: ExtractOptions,
1277    ) -> Result<Vec<ExtractedFile>> {
1278        use crate::plist::FileInfo;
1279        use std::os::unix::ffi::OsStrExt;
1280
1281        let dest = dest.as_ref();
1282        let mut extracted = Vec::new();
1283
1284        if options.apply_ownership {
1285            return Err(ArchiveError::UnsupportedOperation(
1286                "plist ownership application is not implemented".into(),
1287            ));
1288        }
1289        if options.preserve_mtime {
1290            return Err(ArchiveError::UnsupportedOperation(
1291                "mtime preservation is not implemented".into(),
1292            ));
1293        }
1294
1295        // Build a map of file paths to their plist metadata
1296        let file_infos: HashMap<PathBuf, FileInfo> = self
1297            .plist()
1298            .files_with_info()
1299            .map(|info| (info.path.clone(), info))
1300            .collect();
1301
1302        let mut archive = self.archive()?;
1303        for entry_result in archive.entries()? {
1304            let mut entry = entry_result?;
1305            let entry_path = entry.path()?.into_owned();
1306
1307            // Determine if this is a metadata file
1308            let is_metadata =
1309                entry_path.as_os_str().as_bytes().starts_with(b"+");
1310
1311            // Extract the file
1312            entry.unpack_in(dest)?;
1313
1314            let full_path = dest.join(&entry_path);
1315
1316            // Look up plist metadata for this file
1317            let file_info = file_infos.get(&entry_path);
1318
1319            let mut applied_mode = None;
1320
1321            // Apply mode from plist if requested
1322            if options.apply_mode
1323                && !is_metadata
1324                && let Some(info) = file_info
1325                && let Some(mode_str) = &info.mode
1326                && let Some(mode) = parse_mode(mode_str)
1327                && full_path.exists()
1328                && !full_path.is_symlink()
1329            {
1330                fs::set_permissions(&full_path, Permissions::from_mode(mode))?;
1331                applied_mode = Some(mode);
1332            }
1333
1334            extracted.push(ExtractedFile {
1335                path: full_path,
1336                is_metadata,
1337                expected_checksum: file_info.and_then(|i| i.checksum.clone()),
1338                mode: applied_mode,
1339            });
1340        }
1341
1342        Ok(extracted)
1343    }
1344
1345    /**
1346     * Verify checksums of extracted files against plist MD5 values.
1347     *
1348     * Checks that files under `dest` match the MD5 checksums recorded in
1349     * the packing list.  Returns a [`ChecksumFailure`] for each file that
1350     * is missing or whose checksum does not match; an empty vector means
1351     * everything passed.
1352     */
1353    pub fn verify_checksums(
1354        &self,
1355        dest: impl AsRef<Path>,
1356    ) -> Result<Vec<ChecksumFailure>> {
1357        use md5::{Digest, Md5};
1358
1359        let dest = dest.as_ref();
1360        let mut failures = Vec::new();
1361
1362        for info in self.plist().files_with_info() {
1363            let Some(expected) = info.checksum else {
1364                continue;
1365            };
1366
1367            if info.symlink_target.is_some() {
1368                continue;
1369            }
1370
1371            let path = dest.join(&info.path);
1372
1373            if !path.exists() {
1374                failures.push(ChecksumFailure {
1375                    path,
1376                    expected,
1377                    kind: ChecksumFailureKind::Missing,
1378                });
1379                continue;
1380            }
1381
1382            let mut file = File::open(&path)?;
1383            let mut hasher = Md5::new();
1384            io::copy(&mut file, &mut hasher)?;
1385            let actual = format!("{:032x}", hasher.finalize());
1386
1387            if actual != expected {
1388                failures.push(ChecksumFailure {
1389                    path,
1390                    expected,
1391                    kind: ChecksumFailureKind::Mismatch { actual },
1392                });
1393            }
1394        }
1395
1396        Ok(failures)
1397    }
1398
1399    /// Sign this package.
1400    ///
1401    /// Re-reads the package file to compute hashes and create a signed archive.
1402    pub fn sign(&self, signature: &[u8]) -> Result<SignedArchive> {
1403        let pkgname = self
1404            .pkgname()
1405            .ok_or_else(|| ArchiveError::MissingMetadata("pkgname".into()))?
1406            .to_string();
1407
1408        // Read the tarball data
1409        let tarball = std::fs::read(&self.path)?;
1410
1411        // Generate hash
1412        let pkg_hash = PkgHash::from_tarball(
1413            &pkgname,
1414            Cursor::new(&tarball),
1415            PkgHashAlgorithm::Sha512,
1416            DEFAULT_BLOCK_SIZE,
1417        )?;
1418
1419        Ok(SignedArchive {
1420            pkgname,
1421            compression: self.compression,
1422            pkg_hash,
1423            signature: signature.to_vec(),
1424            tarball,
1425        })
1426    }
1427
1428    /// Convert this package to a [`Summary`] entry.
1429    ///
1430    /// This uses default options (no file checksum computation).
1431    /// Use [`to_summary_with_opts`](Self::to_summary_with_opts) for more control.
1432    pub fn to_summary(&self) -> Result<Summary> {
1433        self.to_summary_with_opts(&SummaryOptions::default())
1434    }
1435
1436    /**
1437     * Convert this package to a [`Summary`] entry with options.
1438     *
1439     * # Example
1440     *
1441     * ```no_run
1442     * use pkgsrc::archive::{BinaryPackage, SummaryOptions};
1443     *
1444     * let pkg = BinaryPackage::open("package-1.0.tgz")?;
1445     * let opts = SummaryOptions { compute_file_cksum: true };
1446     * let summary = pkg.to_summary_with_opts(&opts)?;
1447     * # Ok::<(), pkgsrc::archive::ArchiveError>(())
1448     * ```
1449     */
1450    pub fn to_summary_with_opts(
1451        &self,
1452        opts: &SummaryOptions,
1453    ) -> Result<Summary> {
1454        use sha2::{Digest, Sha256};
1455
1456        let pkgname = self
1457            .pkgname
1458            .as_deref()
1459            .map(crate::PkgName::new)
1460            .ok_or_else(|| ArchiveError::MissingMetadata("PKGNAME".into()))?;
1461
1462        /*
1463         * Collect depends and conflicts with a borrowing parse of the
1464         * packing list rather than materialising the owned Plist.
1465         */
1466        let mut conflicts: Vec<String> = Vec::new();
1467        let mut depends: Vec<String> = Vec::new();
1468        for entry in plist::parse(self.metadata.contents().as_bytes()) {
1469            match entry.expect("plist validated at open") {
1470                PlistEntry::PkgCfl(s) => conflicts.push(s.into_owned()),
1471                PlistEntry::PkgDep(s) => depends.push(s.into_owned()),
1472                _ => {}
1473            }
1474        }
1475
1476        // Helper to filter empty/whitespace-only strings
1477        let non_empty = |s: &&str| !s.trim().is_empty();
1478
1479        // Helper to convert &str to String, avoiding redundant into() calls
1480        let to_string = |s: &str| String::from(s);
1481
1482        // Compute SHA256 checksum of the package file if requested
1483        let file_cksum = if opts.compute_file_cksum && self.file_size > 0 {
1484            let mut file = File::open(&self.path)?;
1485            let mut hasher = Sha256::new();
1486            io::copy(&mut file, &mut hasher)?;
1487            let hash = hasher.finalize();
1488            const PREFIX: &str = "sha256 ";
1489            let mut s = String::with_capacity(PREFIX.len() + hash.len() * 2);
1490            s.push_str(PREFIX);
1491            for b in &hash {
1492                let _ = write!(s, "{b:02x}");
1493            }
1494            Some(s)
1495        } else {
1496            None
1497        };
1498
1499        Ok(Summary::new(
1500            pkgname,
1501            self.metadata.comment().to_string(),
1502            self.metadata.size_pkg().unwrap_or(0),
1503            to_string(self.build_info_value("BUILD_DATE").unwrap_or("")),
1504            self.build_info_value("CATEGORIES")
1505                .unwrap_or("")
1506                .split_whitespace()
1507                .map(String::from)
1508                .collect(),
1509            to_string(self.build_info_value("MACHINE_ARCH").unwrap_or("")),
1510            to_string(self.build_info_value("OPSYS").unwrap_or("")),
1511            to_string(self.build_info_value("OS_VERSION").unwrap_or("")),
1512            to_string(self.build_info_value("PKGPATH").unwrap_or("")),
1513            to_string(self.build_info_value("PKGTOOLS_VERSION").unwrap_or("")),
1514            self.metadata.desc().lines().map(String::from).collect(),
1515            // Optional fields - avoid Vec<String> allocation when empty
1516            Some(conflicts).filter(|v| !v.is_empty()),
1517            Some(depends).filter(|v| !v.is_empty()),
1518            self.build_info_value("HOMEPAGE")
1519                .filter(non_empty)
1520                .map(to_string),
1521            self.build_info_value("LICENSE").map(to_string),
1522            self.build_info_value("PKG_OPTIONS").map(to_string),
1523            self.build_info_value("PREV_PKGPATH")
1524                .filter(non_empty)
1525                .map(to_string),
1526            self.build_info_values("PROVIDES").map(|v| v.to_vec()),
1527            self.build_info_values("REQUIRES").map(|v| v.to_vec()),
1528            self.build_info_values("SUPERSEDES").map(|v| v.to_vec()),
1529            self.path
1530                .file_name()
1531                .map(|f| f.to_string_lossy().into_owned()),
1532            if self.file_size > 0 {
1533                Some(self.file_size)
1534            } else {
1535                None
1536            },
1537            file_cksum,
1538        ))
1539    }
1540}
1541
1542impl FileRead for BinaryPackage {
1543    fn pkgname(&self) -> &str {
1544        self.pkgname.as_deref().unwrap_or("")
1545    }
1546
1547    fn comment(&self) -> std::io::Result<String> {
1548        Ok(self.metadata.comment().to_string())
1549    }
1550
1551    fn contents(&self) -> std::io::Result<String> {
1552        Ok(self.metadata.contents().to_string())
1553    }
1554
1555    fn desc(&self) -> std::io::Result<String> {
1556        Ok(self.metadata.desc().to_string())
1557    }
1558
1559    fn build_info(&self) -> std::io::Result<Option<String>> {
1560        Ok(self.metadata.build_info().map(|v| v.join("\n")))
1561    }
1562
1563    fn build_version(&self) -> std::io::Result<Option<String>> {
1564        Ok(self.metadata.build_version().map(|v| v.join("\n")))
1565    }
1566
1567    fn deinstall(&self) -> std::io::Result<Option<String>> {
1568        Ok(self.metadata.deinstall().map(|s| s.to_string()))
1569    }
1570
1571    fn display(&self) -> std::io::Result<Option<String>> {
1572        Ok(self.metadata.display().map(|s| s.to_string()))
1573    }
1574
1575    fn install(&self) -> std::io::Result<Option<String>> {
1576        Ok(self.metadata.install().map(|s| s.to_string()))
1577    }
1578
1579    fn installed_info(&self) -> std::io::Result<Option<String>> {
1580        Ok(self.metadata.installed_info().map(|v| v.join("\n")))
1581    }
1582
1583    fn mtree_dirs(&self) -> std::io::Result<Option<String>> {
1584        Ok(self.metadata.mtree_dirs().map(|v| v.join("\n")))
1585    }
1586
1587    fn preserve(&self) -> std::io::Result<Option<String>> {
1588        Ok(self.metadata.preserve().map(|v| v.join("\n")))
1589    }
1590
1591    fn required_by(&self) -> std::io::Result<Option<String>> {
1592        Ok(self.metadata.required_by().map(|v| v.join("\n")))
1593    }
1594
1595    fn size_all(&self) -> std::io::Result<Option<String>> {
1596        Ok(self.metadata.size_all().map(|n| n.to_string()))
1597    }
1598
1599    fn size_pkg(&self) -> std::io::Result<Option<String>> {
1600        Ok(self.metadata.size_pkg().map(|n| n.to_string()))
1601    }
1602}
1603
1604impl TryFrom<&BinaryPackage> for Summary {
1605    type Error = ArchiveError;
1606
1607    fn try_from(pkg: &BinaryPackage) -> Result<Self> {
1608        pkg.to_summary()
1609    }
1610}
1611
1612/**
1613 * A single metadata member yielded by [`Members`].
1614 */
1615#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1616pub struct MetadataMember {
1617    /** The kind of metadata file (`+CONTENTS`, `+BUILD_VERSION`, etc.). */
1618    pub entry: Entry,
1619    /** The full decoded contents of the member, untrimmed. */
1620    pub content: String,
1621}
1622
1623/**
1624 * Streaming reader for a binary package's leading metadata members.
1625 *
1626 * Unlike [`BinaryPackage::open`], which eagerly reads and stores every `+*`
1627 * member, this yields each metadata member as an owned [`MetadataMember`] on
1628 * demand.  Callers that only need a few fields (for example `+BUILD_VERSION`
1629 * and the `@blddep` lines of `+CONTENTS`) can read just those and stop,
1630 * without buffering members they do not care about.
1631 *
1632 * Iteration stops at the first member that is not a recognised metadata
1633 * file (one named by [`Entry::from_filename`]); in a well-formed package
1634 * that is the first regular file, so only the leading metadata is decoded.
1635 *
1636 * Signed packages are supported; the inner tarball is buffered in memory
1637 * before its metadata is streamed, so the saving applies to the unsigned
1638 * tarballs that dominate the hot path rather than to signed packages.
1639 *
1640 * # Example
1641 *
1642 * ```no_run
1643 * use pkgsrc::archive::MetadataReader;
1644 * use pkgsrc::metadata::Entry;
1645 * use pkgsrc::plist::{self, PlistEntry};
1646 *
1647 * let mut reader = MetadataReader::open("package-1.0.tgz")?;
1648 * let mut build_version = None;
1649 * let mut blddeps = Vec::new();
1650 *
1651 * for member in reader.members()? {
1652 *     let member = member?;
1653 *     match member.entry {
1654 *         Entry::BuildVersion => build_version = Some(member.content),
1655 *         Entry::Contents => {
1656 *             for entry in plist::parse(member.content.as_bytes()) {
1657 *                 if let PlistEntry::BldDep(d) = entry? {
1658 *                     blddeps.push(d.into_owned());
1659 *                 }
1660 *             }
1661 *         }
1662 *         _ => {}
1663 *     }
1664 * }
1665 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
1666 * ```
1667 */
1668pub struct MetadataReader {
1669    archive: TarArchive<Box<dyn Read>>,
1670}
1671
1672impl MetadataReader {
1673    /**
1674     * Open a package and prepare to stream its metadata members.
1675     *
1676     * Detects the signed (`ar`) and unsigned (compressed tarball) formats
1677     * from the leading magic bytes, falling back to the file extension for
1678     * compression when the magic is ambiguous.
1679     */
1680    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
1681        let path = path.as_ref();
1682        let file = File::open(path)?;
1683        let reader = BufReader::new(file);
1684        Self::open_reader(reader, Some(path))
1685    }
1686
1687    /**
1688     * Build a reader from an already-opened seekable stream.  `path` is used
1689     * only as a compression-detection fallback for unsigned tarballs.
1690     */
1691    fn open_reader<R: Read + Seek + 'static>(
1692        mut reader: R,
1693        path: Option<&Path>,
1694    ) -> Result<Self> {
1695        let mut magic = [0u8; 8];
1696        reader.read_exact(&mut magic)?;
1697        reader.seek(SeekFrom::Start(0))?;
1698
1699        let decoder: Box<dyn Read> = if &magic[..7] == b"!<arch>" {
1700            Self::signed_decoder(reader)?
1701        } else {
1702            decode(reader, detect_compression(&magic, path))?
1703        };
1704
1705        Ok(Self {
1706            archive: TarArchive::new(decoder),
1707        })
1708    }
1709
1710    /**
1711     * Locate the inner tarball of a signed package and decode it.
1712     *
1713     * The tarball is buffered in memory; this avoids a second self-borrow
1714     * of the `ar` archive that streaming directly would require.
1715     */
1716    fn signed_decoder<R: Read>(reader: R) -> Result<Box<dyn Read>> {
1717        let mut ar = ar::Archive::new(reader);
1718
1719        loop {
1720            let mut entry = match ar.next_entry() {
1721                Some(Ok(entry)) => entry,
1722                Some(Err(e)) if e.kind() == io::ErrorKind::UnexpectedEof => {
1723                    break;
1724                }
1725                Some(Err(e)) => return Err(e.into()),
1726                None => break,
1727            };
1728            let name = String::from_utf8_lossy(entry.header().identifier())
1729                .into_owned();
1730
1731            if name.ends_with(".tgz")
1732                || name.ends_with(".tzst")
1733                || name.ends_with(".tar")
1734            {
1735                let compression = Compression::from_extension(&name)
1736                    .unwrap_or(Compression::Gzip);
1737                let mut data = Vec::new();
1738                entry.read_to_end(&mut data)?;
1739                return decode(Cursor::new(data), compression);
1740            }
1741        }
1742
1743        Err(ArchiveError::InvalidFormat(
1744            "signed package missing inner tarball".into(),
1745        ))
1746    }
1747
1748    /**
1749     * Return an iterator over the leading `+*` metadata members.
1750     *
1751     * This consumes the archive stream and may be called only once.
1752     */
1753    pub fn members(&mut self) -> Result<Members<'_>> {
1754        Ok(Members {
1755            entries: self.archive.entries()?,
1756            done: false,
1757        })
1758    }
1759}
1760
1761/**
1762 * Iterator over a package's leading metadata members.
1763 *
1764 * Returned by [`MetadataReader::members`].  Yields each recognised metadata
1765 * member as an owned [`MetadataMember`] and stops at the first member that is
1766 * not one.  A read or decode error is reported once, after which the iterator
1767 * is exhausted.
1768 */
1769pub struct Members<'a> {
1770    entries: Entries<'a, Box<dyn Read>>,
1771    done: bool,
1772}
1773
1774impl Iterator for Members<'_> {
1775    type Item = Result<MetadataMember>;
1776
1777    fn next(&mut self) -> Option<Self::Item> {
1778        if self.done {
1779            return None;
1780        }
1781
1782        let mut entry = match self.entries.next()? {
1783            Ok(entry) => entry,
1784            Err(e) => {
1785                self.done = true;
1786                return Some(Err(e.into()));
1787            }
1788        };
1789
1790        let name = match entry.path() {
1791            Ok(path) => path.into_owned(),
1792            Err(e) => {
1793                self.done = true;
1794                return Some(Err(e.into()));
1795            }
1796        };
1797
1798        /* Stop at the first member that is not a recognised metadata file. */
1799        let Some(kind) = name.to_str().and_then(Entry::from_filename) else {
1800            self.done = true;
1801            return None;
1802        };
1803
1804        let size = entry.header().size().unwrap_or(0) as usize;
1805        let mut content = String::with_capacity(size);
1806        if let Err(e) = entry.read_to_string(&mut content) {
1807            self.done = true;
1808            return Some(Err(e.into()));
1809        }
1810
1811        Some(Ok(MetadataMember {
1812            entry: kind,
1813            content,
1814        }))
1815    }
1816}
1817
1818impl std::iter::FusedIterator for Members<'_> {}
1819
1820/// Wrapper for different compression encoders.
1821enum Encoder<W: Write> {
1822    Gzip(GzEncoder<W>),
1823    Zstd(zstd::stream::Encoder<'static, W>),
1824}
1825
1826impl<W: Write> Write for Encoder<W> {
1827    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1828        match self {
1829            Encoder::Gzip(e) => e.write(buf),
1830            Encoder::Zstd(e) => e.write(buf),
1831        }
1832    }
1833
1834    fn flush(&mut self) -> io::Result<()> {
1835        match self {
1836            Encoder::Gzip(e) => e.flush(),
1837            Encoder::Zstd(e) => e.flush(),
1838        }
1839    }
1840}
1841
1842impl<W: Write> Encoder<W> {
1843    fn finish(self) -> io::Result<W> {
1844        match self {
1845            Encoder::Gzip(e) => e.finish(),
1846            Encoder::Zstd(e) => e.finish(),
1847        }
1848    }
1849}
1850
1851/// Build a new compressed package archive.
1852///
1853/// This provides tar-style streaming construction of package archives.
1854/// Supports gzip and zstd compression.
1855///
1856/// # Example
1857///
1858/// ```no_run
1859/// use pkgsrc::archive::Builder;
1860///
1861/// // Create a package with auto-detected compression from filename
1862/// let mut builder = Builder::create("package-1.0.tgz")?;
1863///
1864/// // Add metadata files first
1865/// builder.append_metadata_file("+CONTENTS", b"@name package-1.0\n")?;
1866/// builder.append_metadata_file("+COMMENT", b"A test package")?;
1867/// builder.append_metadata_file("+DESC", b"Description here")?;
1868///
1869/// // Add package files
1870/// builder.append_file("bin/hello", b"#!/bin/sh\necho hello", 0o755)?;
1871///
1872/// builder.finish()?;
1873/// # Ok::<(), pkgsrc::archive::ArchiveError>(())
1874/// ```
1875pub struct Builder<W: Write> {
1876    inner: TarBuilder<Encoder<W>>,
1877    compression: Compression,
1878}
1879
1880impl Builder<File> {
1881    /// Create a new archive file with compression auto-detected from extension.
1882    ///
1883    /// Supported extensions:
1884    /// - `.tgz`, `.tar.gz` → gzip
1885    /// - `.tzst`, `.tar.zst` → zstd
1886    ///
1887    /// Falls back to gzip for unrecognized extensions.
1888    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
1889        let path = path.as_ref();
1890        let compression =
1891            Compression::from_extension(path).unwrap_or(Compression::Gzip);
1892        let file = File::create(path)?;
1893        Self::with_compression(file, compression)
1894    }
1895}
1896
1897impl<W: Write> Builder<W> {
1898    /// Create a new archive builder with gzip compression (default).
1899    ///
1900    /// Use [`Builder::with_compression`] for other formats, or
1901    /// [`Builder::create`] to auto-detect from a file path.
1902    pub fn new(writer: W) -> Result<Self> {
1903        Self::with_compression(writer, Compression::Gzip)
1904    }
1905
1906    /// Create a new archive builder with explicit compression.
1907    pub fn with_compression(
1908        writer: W,
1909        compression: Compression,
1910    ) -> Result<Self> {
1911        let encoder = match compression {
1912            Compression::Gzip => Encoder::Gzip(GzEncoder::new(
1913                writer,
1914                flate2::Compression::default(),
1915            )),
1916            Compression::Zstd => Encoder::Zstd(zstd::stream::Encoder::new(
1917                writer,
1918                zstd::DEFAULT_COMPRESSION_LEVEL,
1919            )?),
1920            Compression::None => {
1921                return Err(ArchiveError::UnsupportedCompression(
1922                    "uncompressed archives not supported for building".into(),
1923                ));
1924            }
1925        };
1926
1927        Ok(Self {
1928            inner: TarBuilder::new(encoder),
1929            compression,
1930        })
1931    }
1932
1933    /// Return the compression format.
1934    #[must_use]
1935    pub fn compression(&self) -> Compression {
1936        self.compression
1937    }
1938
1939    /// Append a metadata file (e.g., +CONTENTS, +COMMENT).
1940    pub fn append_metadata_file(
1941        &mut self,
1942        name: &str,
1943        content: &[u8],
1944    ) -> Result<()> {
1945        let mut header = Header::new_gnu();
1946        header.set_size(content.len() as u64);
1947        header.set_mode(0o644);
1948        header.set_mtime(0);
1949        header.set_cksum();
1950
1951        self.inner.append_data(&mut header, name, content)?;
1952        Ok(())
1953    }
1954
1955    /// Append a file with the given path, content, and mode.
1956    pub fn append_file(
1957        &mut self,
1958        path: impl AsRef<Path>,
1959        content: &[u8],
1960        mode: u32,
1961    ) -> Result<()> {
1962        let mut header = Header::new_gnu();
1963        header.set_size(content.len() as u64);
1964        header.set_mode(mode);
1965        header.set_mtime(0);
1966        header.set_cksum();
1967
1968        self.inner.append_data(&mut header, path, content)?;
1969        Ok(())
1970    }
1971
1972    /// Append a file from disk.
1973    pub fn append_path(&mut self, path: impl AsRef<Path>) -> Result<()> {
1974        self.inner.append_path(path)?;
1975        Ok(())
1976    }
1977
1978    /// Finish building the archive and return the underlying writer.
1979    pub fn finish(self) -> Result<W> {
1980        let encoder = self.inner.into_inner()?;
1981        let writer = encoder.finish()?;
1982        Ok(writer)
1983    }
1984}
1985
1986/// A signed binary package ready to be written.
1987///
1988/// This is created by [`BinaryPackage::sign`] or [`SignedArchive::from_unsigned`].
1989#[derive(Debug)]
1990pub struct SignedArchive {
1991    pkgname: String,
1992    compression: Compression,
1993    pkg_hash: PkgHash,
1994    signature: Vec<u8>,
1995    tarball: Vec<u8>,
1996}
1997
1998impl SignedArchive {
1999    /// Create a signed archive from unsigned tarball bytes.
2000    ///
2001    /// This is useful for signing a freshly-built package without writing
2002    /// it to disk first.
2003    pub fn from_unsigned(
2004        data: Vec<u8>,
2005        pkgname: impl Into<String>,
2006        signature: &[u8],
2007        compression: Compression,
2008    ) -> Result<Self> {
2009        let pkgname = pkgname.into();
2010        let pkg_hash = PkgHash::from_tarball(
2011            &pkgname,
2012            Cursor::new(&data),
2013            PkgHashAlgorithm::Sha512,
2014            DEFAULT_BLOCK_SIZE,
2015        )?;
2016
2017        Ok(Self {
2018            pkgname,
2019            compression,
2020            pkg_hash,
2021            signature: signature.to_vec(),
2022            tarball: data,
2023        })
2024    }
2025
2026    /// Return the package name.
2027    #[must_use]
2028    pub fn pkgname(&self) -> &str {
2029        &self.pkgname
2030    }
2031
2032    /// Return the compression format of the inner tarball.
2033    #[must_use]
2034    pub fn compression(&self) -> Compression {
2035        self.compression
2036    }
2037
2038    /// Return the package hash.
2039    #[must_use]
2040    pub fn pkg_hash(&self) -> &PkgHash {
2041        &self.pkg_hash
2042    }
2043
2044    /// Write the signed package to a file.
2045    pub fn write_to(&self, path: impl AsRef<Path>) -> Result<()> {
2046        let file = File::create(path)?;
2047        self.write(file)
2048    }
2049
2050    /// Write the signed package to a writer.
2051    pub fn write<W: Write>(&self, writer: W) -> Result<()> {
2052        let mut ar = ar::Builder::new(writer);
2053
2054        // Write +PKG_HASH
2055        let hash_content = self.pkg_hash.to_string();
2056        let hash_bytes = hash_content.as_bytes();
2057        let mut header =
2058            ar::Header::new(b"+PKG_HASH".to_vec(), hash_bytes.len() as u64);
2059        header.set_mode(0o644);
2060        ar.append(&header, hash_bytes)?;
2061
2062        // Write +PKG_GPG_SIGNATURE
2063        let mut header = ar::Header::new(
2064            b"+PKG_GPG_SIGNATURE".to_vec(),
2065            self.signature.len() as u64,
2066        );
2067        header.set_mode(0o644);
2068        ar.append(&header, self.signature.as_slice())?;
2069
2070        // Write tarball with appropriate extension
2071        let tarball_name =
2072            format!("{}.{}", self.pkgname, self.compression.extension());
2073        let mut header = ar::Header::new(
2074            tarball_name.into_bytes(),
2075            self.tarball.len() as u64,
2076        );
2077        header.set_mode(0o644);
2078        ar.append(&header, self.tarball.as_slice())?;
2079
2080        Ok(())
2081    }
2082}
2083
2084#[cfg(test)]
2085mod tests {
2086    use super::*;
2087    use std::io::Cursor;
2088
2089    #[test]
2090    fn test_compression_from_magic() {
2091        assert_eq!(
2092            Compression::from_magic(&[0x1f, 0x8b, 0, 0, 0, 0]),
2093            Some(Compression::Gzip)
2094        );
2095        assert_eq!(
2096            Compression::from_magic(&[0x28, 0xb5, 0x2f, 0xfd, 0, 0]),
2097            Some(Compression::Zstd)
2098        );
2099        assert_eq!(
2100            Compression::from_magic(&[0x1f, 0x8b]),
2101            Some(Compression::Gzip)
2102        );
2103        assert_eq!(Compression::from_magic(&[0, 0, 0, 0, 0, 0]), None);
2104    }
2105
2106    #[test]
2107    fn test_compression_from_extension() {
2108        assert_eq!(
2109            Compression::from_extension("foo.tgz"),
2110            Some(Compression::Gzip)
2111        );
2112        assert_eq!(
2113            Compression::from_extension("foo.tar.gz"),
2114            Some(Compression::Gzip)
2115        );
2116        assert_eq!(
2117            Compression::from_extension("foo.tzst"),
2118            Some(Compression::Zstd)
2119        );
2120        assert_eq!(
2121            Compression::from_extension("foo.tar.zst"),
2122            Some(Compression::Zstd)
2123        );
2124        assert_eq!(
2125            Compression::from_extension("foo.tar"),
2126            Some(Compression::None)
2127        );
2128    }
2129
2130    #[test]
2131    fn test_hash_algorithm() {
2132        assert_eq!(
2133            "SHA512".parse::<PkgHashAlgorithm>().ok(),
2134            Some(PkgHashAlgorithm::Sha512)
2135        );
2136        assert_eq!(
2137            "sha256".parse::<PkgHashAlgorithm>().ok(),
2138            Some(PkgHashAlgorithm::Sha256)
2139        );
2140        assert!("MD5".parse::<PkgHashAlgorithm>().is_err());
2141
2142        assert_eq!(PkgHashAlgorithm::Sha512.as_str(), "SHA512");
2143        assert_eq!(PkgHashAlgorithm::Sha256.as_str(), "SHA256");
2144
2145        assert_eq!(PkgHashAlgorithm::Sha512.hash_size(), 64);
2146        assert_eq!(PkgHashAlgorithm::Sha256.hash_size(), 32);
2147    }
2148
2149    #[test]
2150    fn test_pkg_hash_parse() -> Result<()> {
2151        let content = "\
2152pkgsrc signature
2153version: 1
2154pkgname: test-1.0
2155algorithm: SHA512
2156block size: 65536
2157file size: 12345
2158abc123
2159def456
2160";
2161        let pkg_hash: PkgHash = content.parse()?;
2162
2163        assert_eq!(pkg_hash.version(), 1);
2164        assert_eq!(pkg_hash.pkgname(), "test-1.0");
2165        assert_eq!(pkg_hash.algorithm(), PkgHashAlgorithm::Sha512);
2166        assert_eq!(pkg_hash.block_size(), 65536);
2167        assert_eq!(pkg_hash.file_size(), 12345);
2168        assert_eq!(pkg_hash.hashes(), &["abc123", "def456"]);
2169        Ok(())
2170    }
2171
2172    #[test]
2173    fn test_pkg_hash_generate() -> Result<()> {
2174        let data = b"Hello, World!";
2175        let pkg_hash = PkgHash::from_tarball(
2176            "test-1.0",
2177            Cursor::new(data),
2178            PkgHashAlgorithm::Sha512,
2179            1024,
2180        )?;
2181
2182        assert_eq!(pkg_hash.pkgname(), "test-1.0");
2183        assert_eq!(pkg_hash.algorithm(), PkgHashAlgorithm::Sha512);
2184        assert_eq!(pkg_hash.block_size(), 1024);
2185        assert_eq!(pkg_hash.file_size(), 13);
2186        assert_eq!(pkg_hash.hashes().len(), 1);
2187        Ok(())
2188    }
2189
2190    #[test]
2191    fn test_pkg_hash_verify() -> Result<()> {
2192        let data = b"Hello, World!";
2193        let pkg_hash = PkgHash::from_tarball(
2194            "test-1.0",
2195            Cursor::new(data),
2196            PkgHashAlgorithm::Sha512,
2197            1024,
2198        )?;
2199
2200        assert!(pkg_hash.verify(Cursor::new(data))?);
2201
2202        let bad_data = b"Goodbye, World!";
2203        assert!(pkg_hash.verify(Cursor::new(bad_data)).is_err());
2204        Ok(())
2205    }
2206
2207    #[test]
2208    fn test_pkg_hash_roundtrip() -> Result<()> {
2209        let data = vec![0u8; 200_000];
2210        let pkg_hash = PkgHash::from_tarball(
2211            "test-1.0",
2212            Cursor::new(&data),
2213            PkgHashAlgorithm::Sha512,
2214            65536,
2215        )?;
2216
2217        let serialized = pkg_hash.to_string();
2218        let parsed: PkgHash = serialized.parse()?;
2219
2220        assert_eq!(pkg_hash.version(), parsed.version());
2221        assert_eq!(pkg_hash.pkgname(), parsed.pkgname());
2222        assert_eq!(pkg_hash.algorithm(), parsed.algorithm());
2223        assert_eq!(pkg_hash.block_size(), parsed.block_size());
2224        assert_eq!(pkg_hash.file_size(), parsed.file_size());
2225        assert_eq!(pkg_hash.hashes(), parsed.hashes());
2226
2227        assert!(parsed.verify(Cursor::new(&data))?);
2228        Ok(())
2229    }
2230
2231    #[test]
2232    fn test_build_package_gzip() -> Result<()> {
2233        // Use new() which defaults to gzip
2234        let mut builder = Builder::new(Vec::new())?;
2235
2236        let plist = "@name testpkg-1.0\n@cwd /opt/test\nbin/test\n";
2237        builder.append_metadata_file("+CONTENTS", plist.as_bytes())?;
2238        builder.append_metadata_file("+COMMENT", b"A test package")?;
2239        builder.append_metadata_file(
2240            "+DESC",
2241            b"This is a test.\nMultiple lines.",
2242        )?;
2243        builder.append_metadata_file(
2244            "+BUILD_INFO",
2245            b"OPSYS=NetBSD\nMACHINE_ARCH=x86_64\n",
2246        )?;
2247        builder.append_file("bin/test", b"#!/bin/sh\necho test", 0o755)?;
2248        let output = builder.finish()?;
2249
2250        assert!(!output.is_empty());
2251
2252        // Verify we can read it back using low-level Archive (default gzip)
2253        let mut archive = Archive::new(Cursor::new(&output))?;
2254        let mut found_contents = false;
2255        for entry in archive.entries()? {
2256            let entry = entry?;
2257            if entry.path()?.to_str() == Some("+CONTENTS") {
2258                found_contents = true;
2259                break;
2260            }
2261        }
2262        assert!(found_contents);
2263        Ok(())
2264    }
2265
2266    #[test]
2267    fn test_build_package_zstd() -> Result<()> {
2268        // Use with_compression for explicit zstd
2269        let mut builder =
2270            Builder::with_compression(Vec::new(), Compression::Zstd)?;
2271
2272        let plist = "@name testpkg-1.0\n@cwd /opt/test\nbin/test\n";
2273        builder.append_metadata_file("+CONTENTS", plist.as_bytes())?;
2274        builder.append_metadata_file("+COMMENT", b"A test package")?;
2275        builder.append_metadata_file(
2276            "+DESC",
2277            b"This is a test.\nMultiple lines.",
2278        )?;
2279        builder.append_file("bin/test", b"#!/bin/sh\necho test", 0o755)?;
2280        let output = builder.finish()?;
2281
2282        assert!(!output.is_empty());
2283
2284        // Verify we can read it back using low-level Archive
2285        let mut archive =
2286            Archive::with_compression(Cursor::new(&output), Compression::Zstd)?;
2287        let mut found_contents = false;
2288        for entry in archive.entries()? {
2289            let entry = entry?;
2290            if entry.path()?.to_str() == Some("+CONTENTS") {
2291                found_contents = true;
2292                break;
2293            }
2294        }
2295        assert!(found_contents);
2296        Ok(())
2297    }
2298
2299    #[test]
2300    fn test_signed_archive_from_unsigned() -> Result<()> {
2301        // Build an unsigned package (default gzip)
2302        let mut builder = Builder::new(Vec::new())?;
2303        builder.append_metadata_file("+CONTENTS", b"@name testpkg-1.0\n")?;
2304        builder.append_metadata_file("+COMMENT", b"A test package")?;
2305        builder.append_metadata_file("+DESC", b"Test description")?;
2306        let output = builder.finish()?;
2307
2308        let fake_signature = b"FAKE GPG SIGNATURE";
2309        let signed = SignedArchive::from_unsigned(
2310            output,
2311            "testpkg-1.0",
2312            fake_signature,
2313            Compression::Gzip,
2314        )?;
2315
2316        assert_eq!(signed.pkgname(), "testpkg-1.0");
2317        assert_eq!(signed.pkg_hash().algorithm(), PkgHashAlgorithm::Sha512);
2318        assert_eq!(signed.compression(), Compression::Gzip);
2319
2320        // Write to buffer and verify it's an ar archive
2321        let mut signed_output = Vec::new();
2322        signed.write(&mut signed_output)?;
2323        assert!(&signed_output[..7] == b"!<arch>");
2324        Ok(())
2325    }
2326
2327    #[test]
2328    fn test_signed_archive_zstd() -> Result<()> {
2329        // Build an unsigned zstd package
2330        let mut builder =
2331            Builder::with_compression(Vec::new(), Compression::Zstd)?;
2332        builder.append_metadata_file("+CONTENTS", b"@name testpkg-1.0\n")?;
2333        builder.append_metadata_file("+COMMENT", b"A test package")?;
2334        builder.append_metadata_file("+DESC", b"Test description")?;
2335        let output = builder.finish()?;
2336
2337        let fake_signature = b"FAKE GPG SIGNATURE";
2338        let signed = SignedArchive::from_unsigned(
2339            output,
2340            "testpkg-1.0",
2341            fake_signature,
2342            Compression::Zstd,
2343        )?;
2344
2345        assert_eq!(signed.pkgname(), "testpkg-1.0");
2346        assert_eq!(signed.compression(), Compression::Zstd);
2347
2348        // Write to buffer and verify it's an ar archive
2349        let mut signed_output = Vec::new();
2350        signed.write(&mut signed_output)?;
2351        assert!(&signed_output[..7] == b"!<arch>");
2352        Ok(())
2353    }
2354
2355    #[test]
2356    fn test_parse_mode() {
2357        // Standard octal formats
2358        assert_eq!(super::parse_mode("0755"), Some(0o755));
2359        assert_eq!(super::parse_mode("755"), Some(0o755));
2360        assert_eq!(super::parse_mode("0644"), Some(0o644));
2361        assert_eq!(super::parse_mode("644"), Some(0o644));
2362        assert_eq!(super::parse_mode("0777"), Some(0o777));
2363        assert_eq!(super::parse_mode("0400"), Some(0o400));
2364
2365        // Invalid formats
2366        assert_eq!(super::parse_mode(""), None);
2367        assert_eq!(super::parse_mode("abc"), None);
2368        assert_eq!(super::parse_mode("999"), None); // 9 is not valid octal
2369    }
2370
2371    #[test]
2372    fn test_extract_options() {
2373        let opts = ExtractOptions::new();
2374        assert!(!opts.apply_mode);
2375        assert!(!opts.apply_ownership);
2376        assert!(!opts.preserve_mtime);
2377
2378        let opts = ExtractOptions::new().with_mode().with_ownership();
2379        assert!(opts.apply_mode);
2380        assert!(opts.apply_ownership);
2381        assert!(!opts.preserve_mtime);
2382    }
2383
2384    /* Build an unsigned gzip package with metadata followed by one file. */
2385    fn build_unsigned_pkg() -> Vec<u8> {
2386        let mut builder =
2387            Builder::with_compression(Vec::new(), Compression::Gzip).unwrap();
2388        builder
2389            .append_metadata_file("+COMMENT", b"A test package")
2390            .unwrap();
2391        builder
2392            .append_metadata_file("+DESC", b"A description.\n")
2393            .unwrap();
2394        builder
2395            .append_metadata_file(
2396                "+CONTENTS",
2397                b"@name testpkg-1.0\n@pkgdep deppkg-[0-9]*\n\
2398                  @blddep deppkg-2.0\nbin/foo\n",
2399            )
2400            .unwrap();
2401        builder
2402            .append_metadata_file("+BUILD_VERSION", b"some-version-info\n")
2403            .unwrap();
2404        builder
2405            .append_file("bin/foo", b"#!/bin/sh\n", 0o755)
2406            .unwrap();
2407        builder.finish().unwrap()
2408    }
2409
2410    #[test]
2411    fn test_metadata_reader_members() -> Result<()> {
2412        let bytes = build_unsigned_pkg();
2413        let mut reader = MetadataReader::open_reader(Cursor::new(bytes), None)?;
2414
2415        let members: Vec<MetadataMember> =
2416            reader.members()?.collect::<Result<_>>()?;
2417
2418        let kinds: Vec<Entry> = members.iter().map(|m| m.entry).collect();
2419        assert_eq!(
2420            kinds,
2421            vec![
2422                Entry::Comment,
2423                Entry::Desc,
2424                Entry::Contents,
2425                Entry::BuildVersion,
2426            ]
2427        );
2428
2429        let comment =
2430            members.iter().find(|m| m.entry == Entry::Comment).unwrap();
2431        assert_eq!(comment.content, "A test package");
2432        Ok(())
2433    }
2434
2435    #[test]
2436    fn test_metadata_reader_early_break() -> Result<()> {
2437        let bytes = build_unsigned_pkg();
2438        let mut reader = MetadataReader::open_reader(Cursor::new(bytes), None)?;
2439
2440        let mut build_version = None;
2441        for member in reader.members()? {
2442            let member = member?;
2443            if member.entry == Entry::BuildVersion {
2444                build_version = Some(member.content);
2445                break;
2446            }
2447        }
2448        assert_eq!(build_version.as_deref(), Some("some-version-info\n"));
2449        Ok(())
2450    }
2451
2452    #[test]
2453    fn test_metadata_reader_blddep() -> Result<()> {
2454        use crate::plist::{self, PlistEntry};
2455
2456        let bytes = build_unsigned_pkg();
2457        let mut reader = MetadataReader::open_reader(Cursor::new(bytes), None)?;
2458
2459        let mut blddeps = Vec::new();
2460        for member in reader.members()? {
2461            let member = member?;
2462            if member.entry == Entry::Contents {
2463                for entry in plist::parse(member.content.as_bytes()) {
2464                    if let PlistEntry::BldDep(d) = entry? {
2465                        blddeps.push(d.into_owned());
2466                    }
2467                }
2468            }
2469        }
2470        assert_eq!(blddeps, vec!["deppkg-2.0".to_string()]);
2471        Ok(())
2472    }
2473
2474    #[test]
2475    fn test_binary_package_lazy_plist() -> Result<()> {
2476        let bytes = build_unsigned_pkg();
2477        let pkg = BinaryPackage::read_unsigned(
2478            Path::new("testpkg-1.0.tgz"),
2479            Cursor::new(&bytes),
2480            &bytes[..8],
2481            bytes.len() as u64,
2482        )?;
2483
2484        /* @name is captured at open without materialising the Plist. */
2485        assert_eq!(pkg.pkgname(), Some("testpkg-1.0"));
2486        assert!(pkg.plist.get().is_none());
2487
2488        /* Summary generation must not materialise it either. */
2489        let summary = pkg.to_summary()?;
2490        assert_eq!(summary.pkgname().pkgname(), "testpkg-1.0");
2491        assert_eq!(
2492            summary.depends(),
2493            Some(["deppkg-[0-9]*".to_string()].as_slice())
2494        );
2495        assert!(pkg.plist.get().is_none());
2496
2497        /* plist() materialises on demand and agrees with the metadata. */
2498        assert_eq!(pkg.plist().pkgname(), Some("testpkg-1.0"));
2499        assert_eq!(pkg.plist().depends().count(), 1);
2500        assert!(pkg.plist.get().is_some());
2501        Ok(())
2502    }
2503
2504    #[test]
2505    fn test_binary_package_invalid_plist() -> Result<()> {
2506        let mut builder = Builder::new(Vec::new())?;
2507        builder.append_metadata_file("+COMMENT", b"A test package")?;
2508        builder.append_metadata_file("+DESC", b"A description.\n")?;
2509        builder.append_metadata_file("+CONTENTS", b"@name x-1.0\n@bogus\n")?;
2510        let bytes = builder.finish()?;
2511
2512        /* A malformed packing list must still fail at open time. */
2513        let res = BinaryPackage::read_unsigned(
2514            Path::new("x-1.0.tgz"),
2515            Cursor::new(&bytes),
2516            &bytes[..8],
2517            bytes.len() as u64,
2518        );
2519        assert!(matches!(res, Err(ArchiveError::Plist(_))));
2520        Ok(())
2521    }
2522
2523    #[test]
2524    fn test_metadata_reader_signed() -> Result<()> {
2525        let tarball = build_unsigned_pkg();
2526        let signed = SignedArchive::from_unsigned(
2527            tarball,
2528            "testpkg-1.0",
2529            b"fake-signature",
2530            Compression::Gzip,
2531        )?;
2532        let mut out = Vec::new();
2533        signed.write(&mut out)?;
2534
2535        let mut reader = MetadataReader::open_reader(Cursor::new(out), None)?;
2536        let kinds: Vec<Entry> = reader
2537            .members()?
2538            .map(|m| m.map(|m| m.entry))
2539            .collect::<Result<_>>()?;
2540
2541        assert!(kinds.contains(&Entry::Contents));
2542        assert!(kinds.contains(&Entry::BuildVersion));
2543        Ok(())
2544    }
2545}