Skip to main content

pkgsrc/
archive.rs

1/*
2 * Copyright (c) 2026 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/*!
18 * Read and write pkgsrc binary packages.
19 *
20 * pkgsrc binary packages come in two formats:
21 *
22 * 1. **Unsigned packages**: Compressed tar archives (`.tgz`, `.tbz`, etc.)
23 *    containing package metadata (`+CONTENTS`, `+COMMENT`, `+DESC`, etc.)
24 *    and the package files.
25 *
26 * 2. **Signed packages**: `ar(1)` archives containing:
27 *    - `+PKG_HASH`: Hash metadata for verification
28 *    - `+PKG_GPG_SIGNATURE`: GPG signature of the hash file
29 *    - The original compressed tarball
30 *
31 * This module provides a two-layer API:
32 *
33 * ## Low-level (tar-style streaming)
34 *
35 * - [`Archive`]: Streaming access to archive entries
36 * - [`Builder`]: Create new archives by appending entries
37 *
38 * ## High-level (convenience)
39 *
40 * - [`BinaryPackage`]: Cached metadata with fast reads and convenience methods
41 * - [`SignedArchive`]: Output type for signed packages
42 *
43 * # Examples
44 *
45 * ## Fast metadata reading
46 *
47 * ```no_run
48 * use pkgsrc::archive::BinaryPackage;
49 *
50 * let pkg = BinaryPackage::open("package-1.0.tgz")?;
51 * println!("Package: {}", pkg.pkgname().unwrap_or("unknown"));
52 * println!("Comment: {}", pkg.metadata().comment());
53 *
54 * // Convert to summary for repository management
55 * let summary = pkg.to_summary()?;
56 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
57 * ```
58 *
59 * ## Installing a package (iterating entries)
60 *
61 * ```no_run
62 * use pkgsrc::archive::BinaryPackage;
63 *
64 * let pkg = BinaryPackage::open("package-1.0.tgz")?;
65 *
66 * // Check dependencies first (fast, uses cached metadata)
67 * for dep in pkg.plist().depends() {
68 *     println!("Depends: {}", dep);
69 * }
70 *
71 * // Extract files (re-reads archive)
72 * pkg.extract_to("/usr/pkg")?;
73 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
74 * ```
75 *
76 * ## Building a new package
77 *
78 * ```no_run
79 * use pkgsrc::archive::Builder;
80 *
81 * // Auto-detect compression from filename
82 * let mut builder = Builder::create("package-1.0.tgz")?;
83 * builder.append_metadata_file("+COMMENT", b"A test package")?;
84 * builder.append_file("bin/hello", b"#!/bin/sh\necho hello", 0o755)?;
85 * builder.finish()?;
86 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
87 * ```
88 *
89 * ## Signing an existing package
90 *
91 * ```no_run
92 * use pkgsrc::archive::BinaryPackage;
93 *
94 * let pkg = BinaryPackage::open("package-1.0.tgz")?;
95 * let signature = b"GPG SIGNATURE DATA";
96 * pkg.sign(signature)?.write_to("package-1.0-signed.tgz")?;
97 * # Ok::<(), pkgsrc::archive::ArchiveError>(())
98 * ```
99 */
100
101use std::collections::HashMap;
102use std::ffi::OsString;
103use std::fmt;
104use std::fs::{self, File, Permissions};
105use std::io::{self, BufReader, Cursor, Read, Seek, SeekFrom, Write};
106#[cfg(unix)]
107use std::os::unix::fs::PermissionsExt;
108use std::path::{Path, PathBuf};
109
110use flate2::read::GzDecoder;
111use flate2::write::GzEncoder;
112use tar::{Archive as TarArchive, Builder as TarBuilder, Entries, Header};
113
114use crate::metadata::{Entry, FileRead, Metadata};
115use crate::plist::Plist;
116use crate::summary::Summary;
117
118/// Parse a mode string (octal) into a u32.
119///
120/// Supports formats like "0755", "755", "0644", etc.
121fn parse_mode(mode_str: &str) -> Option<u32> {
122    // Handle both "0755" and "755" formats
123    u32::from_str_radix(mode_str, 8).ok()
124}
125
126/// Default block size for package hashing (64KB).
127pub const DEFAULT_BLOCK_SIZE: usize = 65536;
128
129/// Current pkgsrc signature version.
130pub const PKGSRC_SIGNATURE_VERSION: u32 = 1;
131
132/// Magic bytes identifying gzip compressed data.
133const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
134
135/// Magic bytes identifying zstd compressed data.
136const ZSTD_MAGIC: [u8; 4] = [0x28, 0xb5, 0x2f, 0xfd];
137
138/// Result type for archive operations.
139pub type Result<T> = std::result::Result<T, ArchiveError>;
140
141// ============================================================================
142// Compression
143// ============================================================================
144
145/// Compression format for package archives.
146#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
147#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
148pub enum Compression {
149    /// No compression (plain .tar)
150    None,
151    /// Gzip compression (.tgz, .tar.gz)
152    #[default]
153    Gzip,
154    /// Zstandard compression (.tzst, .tar.zst)
155    Zstd,
156}
157
158impl Compression {
159    /// Detect compression format from magic bytes.
160    #[must_use]
161    pub fn from_magic(bytes: &[u8]) -> Option<Self> {
162        if bytes.len() < ZSTD_MAGIC.len() {
163            return None;
164        }
165        if bytes.starts_with(&GZIP_MAGIC) {
166            Some(Self::Gzip)
167        } else if bytes.starts_with(&ZSTD_MAGIC) {
168            Some(Self::Zstd)
169        } else {
170            None
171        }
172    }
173
174    /// Detect compression format from file extension.
175    #[must_use]
176    pub fn from_extension(path: impl AsRef<Path>) -> Option<Self> {
177        let name = path.as_ref().file_name()?.to_str()?;
178        let lower = name.to_lowercase();
179
180        if lower.ends_with(".tgz") || lower.ends_with(".tar.gz") {
181            Some(Self::Gzip)
182        } else if lower.ends_with(".tzst") || lower.ends_with(".tar.zst") {
183            Some(Self::Zstd)
184        } else if lower.ends_with(".tar") {
185            Some(Self::None)
186        } else {
187            None
188        }
189    }
190
191    /// Return the canonical file extension for this compression type.
192    #[must_use]
193    pub fn extension(&self) -> &'static str {
194        match self {
195            Self::None => "tar",
196            Self::Gzip => "tgz",
197            Self::Zstd => "tzst",
198        }
199    }
200}
201
202impl fmt::Display for Compression {
203    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204        match self {
205            Self::None => write!(f, "none"),
206            Self::Gzip => write!(f, "gzip"),
207            Self::Zstd => write!(f, "zstd"),
208        }
209    }
210}
211
212// ============================================================================
213// PkgHashAlgorithm
214// ============================================================================
215
216/// Hash algorithm used for package signing.
217#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
218#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
219pub enum PkgHashAlgorithm {
220    /// SHA-512 (recommended, default)
221    #[default]
222    Sha512,
223    /// SHA-256
224    Sha256,
225}
226
227impl PkgHashAlgorithm {
228    /// Return the string representation as used in +PKG_HASH.
229    #[must_use]
230    pub fn as_str(&self) -> &'static str {
231        match self {
232            Self::Sha512 => "SHA512",
233            Self::Sha256 => "SHA256",
234        }
235    }
236
237    /// Return the hash output size in bytes.
238    #[must_use]
239    pub fn hash_size(&self) -> usize {
240        match self {
241            Self::Sha512 => 64,
242            Self::Sha256 => 32,
243        }
244    }
245
246    /// Compute hash of data.
247    #[must_use]
248    pub fn hash(&self, data: &[u8]) -> Vec<u8> {
249        use sha2::{Digest, Sha256, Sha512};
250        match self {
251            Self::Sha512 => Sha512::digest(data).to_vec(),
252            Self::Sha256 => Sha256::digest(data).to_vec(),
253        }
254    }
255
256    /// Format hash as lowercase hex string.
257    #[must_use]
258    pub fn hash_hex(&self, data: &[u8]) -> String {
259        self.hash(data)
260            .iter()
261            .map(|b| format!("{:02x}", b))
262            .collect()
263    }
264}
265
266impl fmt::Display for PkgHashAlgorithm {
267    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
268        write!(f, "{}", self.as_str())
269    }
270}
271
272impl std::str::FromStr for PkgHashAlgorithm {
273    type Err = ArchiveError;
274
275    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
276        match s.to_uppercase().as_str() {
277            "SHA512" => Ok(Self::Sha512),
278            "SHA256" => Ok(Self::Sha256),
279            _ => Err(ArchiveError::UnsupportedAlgorithm(s.to_string())),
280        }
281    }
282}
283
284// ============================================================================
285// Error
286// ============================================================================
287
288/// Error type for archive operations.
289#[derive(Debug, thiserror::Error)]
290#[non_exhaustive]
291pub enum ArchiveError {
292    /// I/O error.
293    #[error("I/O error: {0}")]
294    Io(#[from] io::Error),
295
296    /// Invalid archive format.
297    #[error("invalid archive format: {0}")]
298    InvalidFormat(String),
299
300    /// Invalid +PKG_HASH format.
301    #[error("invalid +PKG_HASH format: {0}")]
302    InvalidPkgHash(String),
303
304    /// Missing required metadata.
305    #[error("missing required metadata: {0}")]
306    MissingMetadata(String),
307
308    /// Invalid metadata content.
309    #[error("invalid metadata: {0}")]
310    InvalidMetadata(String),
311
312    /// Plist parsing error.
313    #[error("plist error: {0}")]
314    Plist(#[from] crate::plist::PlistError),
315
316    /// Hash verification failed.
317    #[error("hash verification failed: {0}")]
318    HashMismatch(String),
319
320    /// Unsupported algorithm.
321    #[error("unsupported hash algorithm: {0}")]
322    UnsupportedAlgorithm(String),
323
324    /// Unsupported compression.
325    #[error("unsupported compression: {0}")]
326    UnsupportedCompression(String),
327
328    /// Summary generation error.
329    #[error("summary error: {0}")]
330    Summary(String),
331
332    /// No path available for operation.
333    #[error("no path available: {0}")]
334    NoPath(String),
335}
336
337// ============================================================================
338// ExtractOptions
339// ============================================================================
340
341/// Options for extracting package files.
342#[derive(Clone, Debug, Default)]
343pub struct ExtractOptions {
344    /// Apply file modes from plist `@mode` directives.
345    pub apply_mode: bool,
346    /// Apply file ownership from plist `@owner`/`@group` directives.
347    /// Note: Requires root privileges to change ownership.
348    pub apply_ownership: bool,
349    /// Preserve original timestamps from the archive.
350    pub preserve_mtime: bool,
351}
352
353impl ExtractOptions {
354    /// Create new extract options with all options disabled.
355    #[must_use]
356    pub fn new() -> Self {
357        Self::default()
358    }
359
360    /// Enable applying file modes from plist.
361    #[must_use]
362    pub fn with_mode(mut self) -> Self {
363        self.apply_mode = true;
364        self
365    }
366
367    /// Enable applying file ownership from plist.
368    #[must_use]
369    pub fn with_ownership(mut self) -> Self {
370        self.apply_ownership = true;
371        self
372    }
373
374    /// Enable preserving original timestamps.
375    #[must_use]
376    pub fn with_mtime(mut self) -> Self {
377        self.preserve_mtime = true;
378        self
379    }
380}
381
382/// Result of extracting a single file.
383#[derive(Clone, Debug)]
384pub struct ExtractedFile {
385    /// Path where the file was extracted.
386    pub path: PathBuf,
387    /// Whether this is a metadata file (starts with +).
388    pub is_metadata: bool,
389    /// MD5 checksum from plist, if present.
390    pub expected_checksum: Option<String>,
391    /// Mode applied to the file.
392    pub mode: Option<u32>,
393}
394
395// ============================================================================
396// PkgHash
397// ============================================================================
398
399/// The `+PKG_HASH` file contents for signed packages.
400///
401/// This structure represents the hash metadata file used in signed pkgsrc
402/// packages. It contains information needed to verify the package integrity.
403///
404/// # Format
405///
406/// The `+PKG_HASH` file has the following format:
407///
408/// ```text
409/// pkgsrc signature
410/// version: 1
411/// pkgname: package-1.0
412/// algorithm: SHA512
413/// block size: 65536
414/// file size: 123456
415/// <hash1>
416/// <hash2>
417/// ...
418/// ```
419#[derive(Clone, Debug, Default, Eq, PartialEq)]
420#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
421pub struct PkgHash {
422    version: u32,
423    pkgname: String,
424    algorithm: PkgHashAlgorithm,
425    block_size: usize,
426    file_size: u64,
427    hashes: Vec<String>,
428}
429
430impl PkgHash {
431    /// Create a new `PkgHash` with default settings.
432    #[must_use]
433    pub fn new(pkgname: impl Into<String>) -> Self {
434        Self {
435            version: PKGSRC_SIGNATURE_VERSION,
436            pkgname: pkgname.into(),
437            algorithm: PkgHashAlgorithm::default(),
438            block_size: DEFAULT_BLOCK_SIZE,
439            file_size: 0,
440            hashes: Vec::new(),
441        }
442    }
443
444    /// Parse a `PkgHash` from `+PKG_HASH` file contents.
445    pub fn parse(content: &str) -> Result<Self> {
446        let lines: Vec<&str> = content.lines().collect();
447
448        if lines.is_empty() || lines[0] != "pkgsrc signature" {
449            return Err(ArchiveError::InvalidPkgHash(
450                "missing 'pkgsrc signature' header".into(),
451            ));
452        }
453
454        let mut pkg_hash = PkgHash::default();
455        let mut header_complete = false;
456        let mut line_idx = 1;
457
458        while line_idx < lines.len() && !header_complete {
459            let line = lines[line_idx];
460
461            if let Some((key, value)) = line.split_once(": ") {
462                match key {
463                    "version" => {
464                        pkg_hash.version = value.parse().map_err(|_| {
465                            ArchiveError::InvalidPkgHash(format!(
466                                "invalid version: {}",
467                                value
468                            ))
469                        })?;
470                    }
471                    "pkgname" => {
472                        pkg_hash.pkgname = value.to_string();
473                    }
474                    "algorithm" => {
475                        pkg_hash.algorithm = value.parse()?;
476                    }
477                    "block size" => {
478                        pkg_hash.block_size = value.parse().map_err(|_| {
479                            ArchiveError::InvalidPkgHash(format!(
480                                "invalid block size: {}",
481                                value
482                            ))
483                        })?;
484                    }
485                    "file size" => {
486                        pkg_hash.file_size = value.parse().map_err(|_| {
487                            ArchiveError::InvalidPkgHash(format!(
488                                "invalid file size: {}",
489                                value
490                            ))
491                        })?;
492                        header_complete = true;
493                    }
494                    _ => {
495                        return Err(ArchiveError::InvalidPkgHash(format!(
496                            "unknown header field: {}",
497                            key
498                        )));
499                    }
500                }
501            } else if !line.is_empty() {
502                header_complete = true;
503                line_idx -= 1;
504            }
505            line_idx += 1;
506        }
507
508        while line_idx < lines.len() {
509            let line = lines[line_idx].trim();
510            if !line.is_empty() {
511                pkg_hash.hashes.push(line.to_string());
512            }
513            line_idx += 1;
514        }
515
516        if pkg_hash.pkgname.is_empty() {
517            return Err(ArchiveError::InvalidPkgHash("missing pkgname".into()));
518        }
519
520        Ok(pkg_hash)
521    }
522
523    /// Generate `PkgHash` from a tarball.
524    pub fn from_tarball<R: Read>(
525        pkgname: impl Into<String>,
526        mut reader: R,
527        algorithm: PkgHashAlgorithm,
528        block_size: usize,
529    ) -> Result<Self> {
530        let mut pkg_hash = PkgHash::new(pkgname);
531        pkg_hash.algorithm = algorithm;
532        pkg_hash.block_size = block_size;
533
534        let mut buffer = vec![0u8; block_size];
535        let mut total_size: u64 = 0;
536
537        loop {
538            let bytes_read = reader.read(&mut buffer)?;
539            if bytes_read == 0 {
540                break;
541            }
542
543            total_size += bytes_read as u64;
544            let hash = algorithm.hash_hex(&buffer[..bytes_read]);
545            pkg_hash.hashes.push(hash);
546        }
547
548        pkg_hash.file_size = total_size;
549        Ok(pkg_hash)
550    }
551
552    /// Return the pkgsrc signature version.
553    #[must_use]
554    pub fn version(&self) -> u32 {
555        self.version
556    }
557
558    /// Return the package name.
559    #[must_use]
560    pub fn pkgname(&self) -> &str {
561        &self.pkgname
562    }
563
564    /// Return the hash algorithm.
565    #[must_use]
566    pub fn algorithm(&self) -> PkgHashAlgorithm {
567        self.algorithm
568    }
569
570    /// Return the block size.
571    #[must_use]
572    pub fn block_size(&self) -> usize {
573        self.block_size
574    }
575
576    /// Return the original file size.
577    #[must_use]
578    pub fn file_size(&self) -> u64 {
579        self.file_size
580    }
581
582    /// Return the block hashes.
583    #[must_use]
584    pub fn hashes(&self) -> &[String] {
585        &self.hashes
586    }
587
588    /// Verify a tarball against this hash.
589    pub fn verify<R: Read>(&self, mut reader: R) -> Result<bool> {
590        let mut buffer = vec![0u8; self.block_size];
591        let mut hash_idx = 0;
592        let mut total_size: u64 = 0;
593
594        loop {
595            let bytes_read = reader.read(&mut buffer)?;
596            if bytes_read == 0 {
597                break;
598            }
599
600            total_size += bytes_read as u64;
601
602            if hash_idx >= self.hashes.len() {
603                return Err(ArchiveError::HashMismatch(
604                    "more data than expected".into(),
605                ));
606            }
607
608            let computed = self.algorithm.hash_hex(&buffer[..bytes_read]);
609            if computed != self.hashes[hash_idx] {
610                return Err(ArchiveError::HashMismatch(format!(
611                    "block {} hash mismatch",
612                    hash_idx
613                )));
614            }
615
616            hash_idx += 1;
617        }
618
619        if total_size != self.file_size {
620            return Err(ArchiveError::HashMismatch(format!(
621                "file size mismatch: expected {}, got {}",
622                self.file_size, total_size
623            )));
624        }
625
626        if hash_idx != self.hashes.len() {
627            return Err(ArchiveError::HashMismatch(
628                "fewer blocks than expected".into(),
629            ));
630        }
631
632        Ok(true)
633    }
634}
635
636impl fmt::Display for PkgHash {
637    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
638        writeln!(f, "pkgsrc signature")?;
639        writeln!(f, "version: {}", self.version)?;
640        writeln!(f, "pkgname: {}", self.pkgname)?;
641        writeln!(f, "algorithm: {}", self.algorithm)?;
642        writeln!(f, "block size: {}", self.block_size)?;
643        writeln!(f, "file size: {}", self.file_size)?;
644        for hash in &self.hashes {
645            writeln!(f, "{}", hash)?;
646        }
647        Ok(())
648    }
649}
650
651// ============================================================================
652// ArchiveType
653// ============================================================================
654
655/// Type of binary package archive.
656#[derive(Clone, Copy, Debug, Eq, PartialEq)]
657#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
658pub enum ArchiveType {
659    /// Unsigned package (plain compressed tarball)
660    Unsigned,
661    /// Signed package (ar archive containing tarball + signatures)
662    Signed,
663}
664
665// ============================================================================
666// Archive (low-level, tar-style)
667// ============================================================================
668
669/// Wrapper for different decompression decoders.
670///
671/// This is an implementation detail exposed due to the generic nature of
672/// [`Archive`]. Users should not need to interact with this type directly.
673#[doc(hidden)]
674#[allow(clippy::large_enum_variant)]
675pub enum Decoder<R: Read> {
676    None(R),
677    Gzip(GzDecoder<R>),
678    Zstd(zstd::stream::Decoder<'static, BufReader<R>>),
679}
680
681impl<R: Read> Read for Decoder<R> {
682    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
683        match self {
684            Decoder::None(r) => r.read(buf),
685            Decoder::Gzip(d) => d.read(buf),
686            Decoder::Zstd(d) => d.read(buf),
687        }
688    }
689}
690
691/// Low-level streaming access to package archives.
692///
693/// This provides tar-style streaming access to archive entries. For most use
694/// cases, prefer [`BinaryPackage`] which provides cached metadata and convenience
695/// methods.
696///
697/// # Example
698///
699/// ```no_run
700/// use pkgsrc::archive::{Archive, Compression};
701/// use std::io::Read;
702///
703/// let mut archive = Archive::open("package-1.0.tgz")?;
704/// for entry in archive.entries()? {
705///     let entry = entry?;
706///     println!("{}", entry.path()?.display());
707/// }
708/// # Ok::<(), pkgsrc::archive::ArchiveError>(())
709/// ```
710pub struct Archive<R: Read> {
711    inner: TarArchive<Decoder<R>>,
712    compression: Compression,
713}
714
715impl Archive<BufReader<File>> {
716    /// Open an archive from a file path.
717    ///
718    /// Automatically detects compression format from magic bytes.
719    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
720        let path = path.as_ref();
721        let file = File::open(path)?;
722        let mut reader = BufReader::new(file);
723
724        // Read magic bytes for compression detection
725        let mut magic = [0u8; 8];
726        reader.read_exact(&mut magic)?;
727        reader.seek(SeekFrom::Start(0))?;
728
729        let compression = Compression::from_magic(&magic)
730            .or_else(|| Compression::from_extension(path))
731            .unwrap_or(Compression::Gzip);
732
733        Archive::with_compression(reader, compression)
734    }
735}
736
737impl<R: Read> Archive<R> {
738    /// Create a new archive from a reader.
739    ///
740    /// Defaults to gzip compression. Use [`Archive::with_compression`] to
741    /// specify a different format, or [`Archive::open`] to auto-detect from
742    /// a file path.
743    #[must_use = "creating an archive has no effect if not used"]
744    pub fn new(reader: R) -> Result<Self> {
745        Self::with_compression(reader, Compression::Gzip)
746    }
747
748    /// Create a new archive from a reader with explicit compression.
749    #[must_use = "creating an archive has no effect if not used"]
750    pub fn with_compression(
751        reader: R,
752        compression: Compression,
753    ) -> Result<Self> {
754        let decoder = match compression {
755            Compression::None => Decoder::None(reader),
756            Compression::Gzip => Decoder::Gzip(GzDecoder::new(reader)),
757            Compression::Zstd => {
758                Decoder::Zstd(zstd::stream::Decoder::new(reader)?)
759            }
760        };
761
762        Ok(Archive {
763            inner: TarArchive::new(decoder),
764            compression,
765        })
766    }
767
768    /// Return the compression format.
769    #[must_use]
770    pub fn compression(&self) -> Compression {
771        self.compression
772    }
773
774    /// Return an iterator over the entries in this archive.
775    #[must_use = "entries iterator must be used to iterate"]
776    pub fn entries(&mut self) -> Result<Entries<'_, Decoder<R>>> {
777        Ok(self.inner.entries()?)
778    }
779}
780
781// ============================================================================
782// Package (high-level, cached metadata)
783// ============================================================================
784
785/// Options for converting a [`BinaryPackage`] to a [`Summary`].
786#[derive(Debug, Clone, Default)]
787pub struct SummaryOptions {
788    /// Compute the SHA256 checksum of the package file.
789    ///
790    /// This requires re-reading the entire package file, which can be slow
791    /// for large packages. Default is `false`.
792    pub compute_file_cksum: bool,
793}
794
795/// A pkgsrc binary package with cached metadata.
796///
797/// This provides fast access to package metadata without re-reading the
798/// archive. The metadata is read once during [`BinaryPackage::open`], and subsequent
799/// operations like [`BinaryPackage::archive`] or [`BinaryPackage::extract_to`] re-open
800/// the archive as needed.
801///
802/// # Example
803///
804/// ```no_run
805/// use pkgsrc::archive::BinaryPackage;
806///
807/// // Fast metadata access
808/// let pkg = BinaryPackage::open("package-1.0.tgz")?;
809/// println!("Name: {}", pkg.pkgname().unwrap_or("unknown"));
810/// println!("Comment: {}", pkg.metadata().comment());
811///
812/// // Generate summary for repository
813/// let summary = pkg.to_summary()?;
814///
815/// // Extract files (re-reads archive)
816/// pkg.extract_to("/usr/pkg")?;
817/// # Ok::<(), pkgsrc::archive::ArchiveError>(())
818/// ```
819#[derive(Debug)]
820pub struct BinaryPackage {
821    /// Path to the package file.
822    path: PathBuf,
823
824    /// Detected compression format.
825    compression: Compression,
826
827    /// Type of package (signed or unsigned).
828    archive_type: ArchiveType,
829
830    /// Parsed metadata from the package.
831    metadata: Metadata,
832
833    /// Parsed packing list.
834    plist: Plist,
835
836    /// Build info key-value pairs.
837    build_info: HashMap<String, Vec<String>>,
838
839    /// Package hash (for signed packages).
840    pkg_hash: Option<PkgHash>,
841
842    /// GPG signature (for signed packages).
843    gpg_signature: Option<Vec<u8>>,
844
845    /// File size of the package.
846    file_size: u64,
847}
848
849impl BinaryPackage {
850    /// Open a package from a file path.
851    ///
852    /// This reads only the metadata entries at the start of the archive,
853    /// providing fast access to package information without decompressing
854    /// the entire file.
855    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
856        let path = path.as_ref();
857        let file = File::open(path)?;
858        let file_size = file.metadata()?.len();
859        let mut reader = BufReader::new(file);
860
861        // Read magic bytes
862        let mut magic = [0u8; 8];
863        reader.read_exact(&mut magic)?;
864        reader.seek(SeekFrom::Start(0))?;
865
866        // Check for ar archive (signed package)
867        if &magic[..7] == b"!<arch>" {
868            Self::read_signed(path, reader, file_size)
869        } else {
870            Self::read_unsigned(path, reader, &magic, file_size)
871        }
872    }
873
874    /// Read an unsigned package (compressed tarball).
875    fn read_unsigned<R: Read + Seek>(
876        path: &Path,
877        reader: R,
878        magic: &[u8],
879        file_size: u64,
880    ) -> Result<Self> {
881        let compression = Compression::from_magic(magic)
882            .or_else(|| Compression::from_extension(path))
883            .unwrap_or(Compression::Gzip);
884
885        let decompressed: Box<dyn Read> = match compression {
886            Compression::None => Box::new(reader),
887            Compression::Gzip => Box::new(GzDecoder::new(reader)),
888            Compression::Zstd => Box::new(zstd::stream::Decoder::new(reader)?),
889        };
890
891        let mut archive = TarArchive::new(decompressed);
892        let mut metadata = Metadata::new();
893        let mut plist = Plist::new();
894        let mut build_info: HashMap<String, Vec<String>> = HashMap::new();
895
896        for entry_result in archive.entries()? {
897            let mut entry = entry_result?;
898            let entry_path = entry.path()?.into_owned();
899
900            // Stop at first non-metadata file (fast path)
901            let Some(entry_type) =
902                entry_path.to_str().and_then(Entry::from_filename)
903            else {
904                break;
905            };
906
907            // Pre-allocate based on entry size to avoid reallocation during read
908            let entry_size = entry.header().size().unwrap_or(0) as usize;
909            let mut content = String::with_capacity(entry_size);
910            entry.read_to_string(&mut content)?;
911            metadata.read_metadata(entry_type, &content).map_err(|e| {
912                ArchiveError::InvalidMetadata(format!(
913                    "{}: {}",
914                    entry_path.display(),
915                    e
916                ))
917            })?;
918
919            if entry_path.as_os_str() == "+CONTENTS" {
920                plist = Plist::from_bytes(content.as_bytes())?;
921            } else if entry_path.as_os_str() == "+BUILD_INFO" {
922                for line in content.lines() {
923                    if let Some((key, value)) = line.split_once('=') {
924                        build_info
925                            .entry(key.to_string())
926                            .or_default()
927                            .push(value.to_string());
928                    }
929                }
930            }
931        }
932
933        metadata.validate().map_err(|e| {
934            ArchiveError::MissingMetadata(format!("incomplete package: {}", e))
935        })?;
936
937        Ok(Self {
938            path: path.to_path_buf(),
939            compression,
940            archive_type: ArchiveType::Unsigned,
941            metadata,
942            plist,
943            build_info,
944            pkg_hash: None,
945            gpg_signature: None,
946            file_size,
947        })
948    }
949
950    /// Read a signed package (ar archive).
951    fn read_signed<R: Read>(
952        path: &Path,
953        reader: R,
954        file_size: u64,
955    ) -> Result<Self> {
956        let mut ar = ar::Archive::new(reader);
957
958        let mut pkg_hash_content: Option<String> = None;
959        let mut gpg_signature: Option<Vec<u8>> = None;
960        let mut metadata = Metadata::new();
961        let mut plist = Plist::new();
962        let mut build_info: HashMap<String, Vec<String>> = HashMap::new();
963        let mut compression = Compression::Gzip;
964
965        loop {
966            let mut entry = match ar.next_entry() {
967                Some(Ok(entry)) => entry,
968                Some(Err(e)) if e.kind() == io::ErrorKind::UnexpectedEof => {
969                    break;
970                }
971                Some(Err(e)) => return Err(e.into()),
972                None => break,
973            };
974            let name = String::from_utf8_lossy(entry.header().identifier())
975                .to_string();
976
977            match name.as_str() {
978                "+PKG_HASH" => {
979                    let mut content = String::new();
980                    entry.read_to_string(&mut content)?;
981                    pkg_hash_content = Some(content);
982                }
983                "+PKG_GPG_SIGNATURE" => {
984                    let mut data = Vec::new();
985                    entry.read_to_end(&mut data)?;
986                    gpg_signature = Some(data);
987                }
988                _ if name.ends_with(".tgz")
989                    || name.ends_with(".tzst")
990                    || name.ends_with(".tar") =>
991                {
992                    // Detect compression from inner tarball name
993                    compression = Compression::from_extension(&name)
994                        .unwrap_or(Compression::Gzip);
995
996                    let decompressed: Box<dyn Read> = match compression {
997                        Compression::None => Box::new(entry),
998                        Compression::Gzip => Box::new(GzDecoder::new(entry)),
999                        Compression::Zstd => {
1000                            Box::new(zstd::stream::Decoder::new(entry)?)
1001                        }
1002                    };
1003
1004                    let mut archive = TarArchive::new(decompressed);
1005
1006                    for tar_entry_result in archive.entries()? {
1007                        let mut tar_entry = tar_entry_result?;
1008                        let entry_path = tar_entry.path()?.into_owned();
1009
1010                        let Some(entry_type) =
1011                            entry_path.to_str().and_then(Entry::from_filename)
1012                        else {
1013                            break;
1014                        };
1015
1016                        // Pre-allocate based on entry size to avoid reallocation
1017                        let entry_size =
1018                            tar_entry.header().size().unwrap_or(0) as usize;
1019                        let mut content = String::with_capacity(entry_size);
1020                        tar_entry.read_to_string(&mut content)?;
1021                        metadata.read_metadata(entry_type, &content).map_err(
1022                            |e| {
1023                                ArchiveError::InvalidMetadata(format!(
1024                                    "{}: {}",
1025                                    entry_path.display(),
1026                                    e
1027                                ))
1028                            },
1029                        )?;
1030
1031                        if entry_path.as_os_str() == "+CONTENTS" {
1032                            plist = Plist::from_bytes(content.as_bytes())?;
1033                        } else if entry_path.as_os_str() == "+BUILD_INFO" {
1034                            for line in content.lines() {
1035                                if let Some((key, value)) = line.split_once('=')
1036                                {
1037                                    build_info
1038                                        .entry(key.to_string())
1039                                        .or_default()
1040                                        .push(value.to_string());
1041                                }
1042                            }
1043                        }
1044                    }
1045                    break;
1046                }
1047                _ => {}
1048            }
1049        }
1050
1051        let pkg_hash =
1052            pkg_hash_content.map(|c| PkgHash::parse(&c)).transpose()?;
1053
1054        metadata.validate().map_err(|e| {
1055            ArchiveError::MissingMetadata(format!("incomplete package: {}", e))
1056        })?;
1057
1058        Ok(Self {
1059            path: path.to_path_buf(),
1060            compression,
1061            archive_type: ArchiveType::Signed,
1062            metadata,
1063            plist,
1064            build_info,
1065            pkg_hash,
1066            gpg_signature,
1067            file_size,
1068        })
1069    }
1070
1071    /// Return the path to the package file.
1072    #[must_use]
1073    pub fn path(&self) -> &Path {
1074        &self.path
1075    }
1076
1077    /// Return the compression format.
1078    #[must_use]
1079    pub fn compression(&self) -> Compression {
1080        self.compression
1081    }
1082
1083    /// Return the archive type (signed or unsigned).
1084    #[must_use]
1085    pub fn archive_type(&self) -> ArchiveType {
1086        self.archive_type
1087    }
1088
1089    /// Return whether this package is signed.
1090    #[must_use]
1091    pub fn is_signed(&self) -> bool {
1092        self.archive_type == ArchiveType::Signed
1093    }
1094
1095    /// Return the package metadata.
1096    #[must_use]
1097    pub fn metadata(&self) -> &Metadata {
1098        &self.metadata
1099    }
1100
1101    /// Return the packing list.
1102    #[must_use]
1103    pub fn plist(&self) -> &Plist {
1104        &self.plist
1105    }
1106
1107    /// Return the package name from the plist.
1108    #[must_use]
1109    pub fn pkgname(&self) -> Option<&str> {
1110        self.plist.pkgname()
1111    }
1112
1113    /// Return the build info key-value pairs.
1114    #[must_use]
1115    pub fn build_info(&self) -> &HashMap<String, Vec<String>> {
1116        &self.build_info
1117    }
1118
1119    /// Get a specific build info value (first value if multiple exist).
1120    #[must_use]
1121    pub fn build_info_value(&self, key: &str) -> Option<&str> {
1122        self.build_info
1123            .get(key)
1124            .and_then(|v| v.first())
1125            .map(|s| s.as_str())
1126    }
1127
1128    /// Get all values for a build info key.
1129    #[must_use]
1130    pub fn build_info_values(&self, key: &str) -> Option<&[String]> {
1131        self.build_info.get(key).map(|v| v.as_slice())
1132    }
1133
1134    /// Return the package hash (for signed packages).
1135    #[must_use]
1136    pub fn pkg_hash(&self) -> Option<&PkgHash> {
1137        self.pkg_hash.as_ref()
1138    }
1139
1140    /// Return the GPG signature (for signed packages).
1141    #[must_use]
1142    pub fn gpg_signature(&self) -> Option<&[u8]> {
1143        self.gpg_signature.as_deref()
1144    }
1145
1146    /// Return the file size of the package.
1147    #[must_use]
1148    pub fn file_size(&self) -> u64 {
1149        self.file_size
1150    }
1151
1152    /// Open the archive for iteration (re-reads the file).
1153    pub fn archive(&self) -> Result<Archive<BufReader<File>>> {
1154        Archive::open(&self.path)
1155    }
1156
1157    /// Extract all files to a destination directory.
1158    ///
1159    /// This re-reads the archive and extracts all entries.
1160    pub fn extract_to(&self, dest: impl AsRef<Path>) -> Result<()> {
1161        let mut archive = self.archive()?;
1162        for entry in archive.entries()? {
1163            let mut entry = entry?;
1164            entry.unpack_in(dest.as_ref())?;
1165        }
1166        Ok(())
1167    }
1168
1169    /// Extract files to a destination directory with plist-based permissions.
1170    ///
1171    /// This method extracts files and applies permissions specified in the
1172    /// packing list (`@mode`, `@owner`, `@group` directives).
1173    ///
1174    /// # Arguments
1175    ///
1176    /// * `dest` - Destination directory for extraction
1177    /// * `options` - Extraction options controlling mode/ownership application
1178    ///
1179    /// # Returns
1180    ///
1181    /// A vector of [`ExtractedFile`] describing each extracted file.
1182    ///
1183    /// # Example
1184    ///
1185    /// ```no_run
1186    /// use pkgsrc::archive::{BinaryPackage, ExtractOptions};
1187    ///
1188    /// let pkg = BinaryPackage::open("package-1.0.tgz")?;
1189    /// let options = ExtractOptions::new().with_mode();
1190    /// let extracted = pkg.extract_with_plist("/usr/pkg", options)?;
1191    /// for file in &extracted {
1192    ///     println!("Extracted: {}", file.path.display());
1193    /// }
1194    /// # Ok::<(), pkgsrc::archive::ArchiveError>(())
1195    /// ```
1196    #[cfg(unix)]
1197    pub fn extract_with_plist(
1198        &self,
1199        dest: impl AsRef<Path>,
1200        options: ExtractOptions,
1201    ) -> Result<Vec<ExtractedFile>> {
1202        use crate::plist::FileInfo;
1203        use std::os::unix::ffi::OsStrExt;
1204
1205        let dest = dest.as_ref();
1206        let mut extracted = Vec::new();
1207
1208        // Build a map of file paths to their plist metadata
1209        let file_infos: HashMap<OsString, FileInfo> = self
1210            .plist
1211            .files_with_info()
1212            .into_iter()
1213            .map(|info| (info.path.clone(), info))
1214            .collect();
1215
1216        let mut archive = self.archive()?;
1217        for entry_result in archive.entries()? {
1218            let mut entry = entry_result?;
1219            let entry_path = entry.path()?.into_owned();
1220
1221            // Determine if this is a metadata file
1222            let is_metadata =
1223                entry_path.as_os_str().as_bytes().starts_with(b"+");
1224
1225            // Extract the file
1226            entry.unpack_in(dest)?;
1227
1228            let full_path = dest.join(&entry_path);
1229
1230            // Look up plist metadata for this file
1231            let file_info = file_infos.get(entry_path.as_os_str());
1232
1233            let mut applied_mode = None;
1234
1235            // Apply mode from plist if requested
1236            if options.apply_mode && !is_metadata {
1237                if let Some(info) = file_info {
1238                    if let Some(mode_str) = &info.mode {
1239                        if let Some(mode) = parse_mode(mode_str) {
1240                            if full_path.exists() && !full_path.is_symlink() {
1241                                fs::set_permissions(
1242                                    &full_path,
1243                                    Permissions::from_mode(mode),
1244                                )?;
1245                                applied_mode = Some(mode);
1246                            }
1247                        }
1248                    }
1249                }
1250            }
1251
1252            // Apply ownership from plist if requested
1253            // Note: This requires root privileges
1254            #[cfg(unix)]
1255            if options.apply_ownership && !is_metadata {
1256                if let Some(info) = file_info {
1257                    if info.owner.is_some() || info.group.is_some() {
1258                        // Ownership changes require the nix crate or libc
1259                        // For now, we just note it in the result but don't apply
1260                        // To implement: use nix::unistd::{chown, Uid, Gid}
1261                    }
1262                }
1263            }
1264
1265            extracted.push(ExtractedFile {
1266                path: full_path,
1267                is_metadata,
1268                expected_checksum: file_info.and_then(|i| i.checksum.clone()),
1269                mode: applied_mode,
1270            });
1271        }
1272
1273        Ok(extracted)
1274    }
1275
1276    /// Verify checksums of extracted files against plist MD5 values.
1277    ///
1278    /// This method checks that files in the destination directory match
1279    /// the MD5 checksums recorded in the packing list.
1280    ///
1281    /// # Arguments
1282    ///
1283    /// * `dest` - Directory where files were extracted
1284    ///
1285    /// # Returns
1286    ///
1287    /// A vector of tuples containing (file_path, expected_hash, actual_hash)
1288    /// for files that failed verification. Empty vector means all passed.
1289    pub fn verify_checksums(
1290        &self,
1291        dest: impl AsRef<Path>,
1292    ) -> Result<Vec<(PathBuf, String, String)>> {
1293        use md5::{Digest, Md5};
1294
1295        let dest = dest.as_ref();
1296        let mut failures = Vec::new();
1297
1298        for info in self.plist.files_with_info() {
1299            // Skip files without checksums
1300            let Some(expected) = &info.checksum else {
1301                continue;
1302            };
1303
1304            // Skip symlinks (they have Symlink: comments instead of MD5:)
1305            if info.symlink_target.is_some() {
1306                continue;
1307            }
1308
1309            let file_path = dest.join(&info.path);
1310
1311            if !file_path.exists() {
1312                failures.push((
1313                    file_path,
1314                    expected.clone(),
1315                    "FILE_NOT_FOUND".to_string(),
1316                ));
1317                continue;
1318            }
1319
1320            // Compute MD5 of the file
1321            let mut file = File::open(&file_path)?;
1322            let mut hasher = Md5::new();
1323            io::copy(&mut file, &mut hasher)?;
1324            let result = hasher.finalize();
1325            let actual = format!("{:032x}", result);
1326
1327            if actual != *expected {
1328                failures.push((file_path, expected.clone(), actual));
1329            }
1330        }
1331
1332        Ok(failures)
1333    }
1334
1335    /// Sign this package.
1336    ///
1337    /// Re-reads the package file to compute hashes and create a signed archive.
1338    pub fn sign(&self, signature: &[u8]) -> Result<SignedArchive> {
1339        let pkgname = self
1340            .pkgname()
1341            .ok_or_else(|| ArchiveError::MissingMetadata("pkgname".into()))?
1342            .to_string();
1343
1344        // Read the tarball data
1345        let tarball = std::fs::read(&self.path)?;
1346
1347        // Generate hash
1348        let pkg_hash = PkgHash::from_tarball(
1349            &pkgname,
1350            Cursor::new(&tarball),
1351            PkgHashAlgorithm::Sha512,
1352            DEFAULT_BLOCK_SIZE,
1353        )?;
1354
1355        Ok(SignedArchive {
1356            pkgname,
1357            compression: self.compression,
1358            pkg_hash,
1359            signature: signature.to_vec(),
1360            tarball,
1361        })
1362    }
1363
1364    /// Convert this package to a [`Summary`] entry.
1365    ///
1366    /// This uses default options (no file checksum computation).
1367    /// Use [`to_summary_with_opts`](Self::to_summary_with_opts) for more control.
1368    pub fn to_summary(&self) -> Result<Summary> {
1369        self.to_summary_with_opts(&SummaryOptions::default())
1370    }
1371
1372    /// Convert this package to a [`Summary`] entry with options.
1373    ///
1374    /// # Example
1375    ///
1376    /// ```no_run
1377    /// use pkgsrc::archive::{BinaryPackage, SummaryOptions};
1378    ///
1379    /// let pkg = BinaryPackage::open("package-1.0.tgz")?;
1380    /// let opts = SummaryOptions { compute_file_cksum: true };
1381    /// let summary = pkg.to_summary_with_opts(&opts)?;
1382    /// # Ok::<(), pkgsrc::archive::ArchiveError>(())
1383    /// ```
1384    pub fn to_summary_with_opts(
1385        &self,
1386        opts: &SummaryOptions,
1387    ) -> Result<Summary> {
1388        use sha2::{Digest, Sha256};
1389
1390        let pkgname = self
1391            .plist
1392            .pkgname()
1393            .map(crate::PkgName::new)
1394            .ok_or_else(|| ArchiveError::MissingMetadata("PKGNAME".into()))?;
1395
1396        // Helper to convert Vec<&str> to Option<Vec<String>>, avoiding allocation when empty
1397        fn to_opt_vec(v: Vec<&str>) -> Option<Vec<String>> {
1398            if v.is_empty() {
1399                None
1400            } else {
1401                Some(v.into_iter().map(String::from).collect())
1402            }
1403        }
1404
1405        // Helper to filter empty/whitespace-only strings
1406        let non_empty = |s: &&str| !s.trim().is_empty();
1407
1408        // Helper to convert &str to String, avoiding redundant into() calls
1409        let to_string = |s: &str| String::from(s);
1410
1411        // Compute SHA256 checksum of the package file if requested
1412        let file_cksum = if opts.compute_file_cksum && self.file_size > 0 {
1413            let mut file = File::open(&self.path)?;
1414            let mut hasher = Sha256::new();
1415            io::copy(&mut file, &mut hasher)?;
1416            let hash = hasher.finalize();
1417            Some(format!(
1418                "sha256 {}",
1419                hash.iter()
1420                    .map(|b| format!("{:02x}", b))
1421                    .collect::<String>()
1422            ))
1423        } else {
1424            None
1425        };
1426
1427        Ok(Summary::new(
1428            pkgname,
1429            self.metadata.comment().to_string(),
1430            self.metadata.size_pkg().unwrap_or(0),
1431            to_string(self.build_info_value("BUILD_DATE").unwrap_or("")),
1432            self.build_info_value("CATEGORIES")
1433                .unwrap_or("")
1434                .split_whitespace()
1435                .map(String::from)
1436                .collect(),
1437            to_string(self.build_info_value("MACHINE_ARCH").unwrap_or("")),
1438            to_string(self.build_info_value("OPSYS").unwrap_or("")),
1439            to_string(self.build_info_value("OS_VERSION").unwrap_or("")),
1440            to_string(self.build_info_value("PKGPATH").unwrap_or("")),
1441            to_string(self.build_info_value("PKGTOOLS_VERSION").unwrap_or("")),
1442            self.metadata.desc().lines().map(String::from).collect(),
1443            // Optional fields - avoid Vec<String> allocation when empty
1444            to_opt_vec(self.plist.conflicts()),
1445            to_opt_vec(self.plist.depends()),
1446            self.build_info_value("HOMEPAGE")
1447                .filter(non_empty)
1448                .map(to_string),
1449            self.build_info_value("LICENSE").map(to_string),
1450            self.build_info_value("PKG_OPTIONS").map(to_string),
1451            self.build_info_value("PREV_PKGPATH")
1452                .filter(non_empty)
1453                .map(to_string),
1454            self.build_info_values("PROVIDES").map(|v| v.to_vec()),
1455            self.build_info_values("REQUIRES").map(|v| v.to_vec()),
1456            self.build_info_values("SUPERSEDES").map(|v| v.to_vec()),
1457            self.path
1458                .file_name()
1459                .map(|f| f.to_string_lossy().into_owned()),
1460            if self.file_size > 0 {
1461                Some(self.file_size)
1462            } else {
1463                None
1464            },
1465            file_cksum,
1466        ))
1467    }
1468}
1469
1470impl FileRead for BinaryPackage {
1471    fn pkgname(&self) -> &str {
1472        self.plist.pkgname().unwrap_or("")
1473    }
1474
1475    fn comment(&self) -> std::io::Result<String> {
1476        Ok(self.metadata.comment().to_string())
1477    }
1478
1479    fn contents(&self) -> std::io::Result<String> {
1480        Ok(self.metadata.contents().to_string())
1481    }
1482
1483    fn desc(&self) -> std::io::Result<String> {
1484        Ok(self.metadata.desc().to_string())
1485    }
1486
1487    fn build_info(&self) -> std::io::Result<Option<String>> {
1488        Ok(self.metadata.build_info().map(|v| v.join("\n")))
1489    }
1490
1491    fn build_version(&self) -> std::io::Result<Option<String>> {
1492        Ok(self.metadata.build_version().map(|v| v.join("\n")))
1493    }
1494
1495    fn deinstall(&self) -> std::io::Result<Option<String>> {
1496        Ok(self.metadata.deinstall().map(|s| s.to_string()))
1497    }
1498
1499    fn display(&self) -> std::io::Result<Option<String>> {
1500        Ok(self.metadata.display().map(|s| s.to_string()))
1501    }
1502
1503    fn install(&self) -> std::io::Result<Option<String>> {
1504        Ok(self.metadata.install().map(|s| s.to_string()))
1505    }
1506
1507    fn installed_info(&self) -> std::io::Result<Option<String>> {
1508        Ok(self.metadata.installed_info().map(|v| v.join("\n")))
1509    }
1510
1511    fn mtree_dirs(&self) -> std::io::Result<Option<String>> {
1512        Ok(self.metadata.mtree_dirs().map(|v| v.join("\n")))
1513    }
1514
1515    fn preserve(&self) -> std::io::Result<Option<String>> {
1516        Ok(self.metadata.preserve().map(|v| v.join("\n")))
1517    }
1518
1519    fn required_by(&self) -> std::io::Result<Option<String>> {
1520        Ok(self.metadata.required_by().map(|v| v.join("\n")))
1521    }
1522
1523    fn size_all(&self) -> std::io::Result<Option<String>> {
1524        Ok(self.metadata.size_all().map(|n| n.to_string()))
1525    }
1526
1527    fn size_pkg(&self) -> std::io::Result<Option<String>> {
1528        Ok(self.metadata.size_pkg().map(|n| n.to_string()))
1529    }
1530}
1531
1532impl TryFrom<&BinaryPackage> for Summary {
1533    type Error = ArchiveError;
1534
1535    fn try_from(pkg: &BinaryPackage) -> Result<Self> {
1536        pkg.to_summary()
1537    }
1538}
1539
1540// ============================================================================
1541// Builder (low-level, tar-style)
1542// ============================================================================
1543
1544/// Wrapper for different compression encoders.
1545enum Encoder<W: Write> {
1546    Gzip(GzEncoder<W>),
1547    Zstd(zstd::stream::Encoder<'static, W>),
1548}
1549
1550impl<W: Write> Write for Encoder<W> {
1551    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1552        match self {
1553            Encoder::Gzip(e) => e.write(buf),
1554            Encoder::Zstd(e) => e.write(buf),
1555        }
1556    }
1557
1558    fn flush(&mut self) -> io::Result<()> {
1559        match self {
1560            Encoder::Gzip(e) => e.flush(),
1561            Encoder::Zstd(e) => e.flush(),
1562        }
1563    }
1564}
1565
1566impl<W: Write> Encoder<W> {
1567    fn finish(self) -> io::Result<W> {
1568        match self {
1569            Encoder::Gzip(e) => e.finish(),
1570            Encoder::Zstd(e) => e.finish(),
1571        }
1572    }
1573}
1574
1575/// Build a new compressed package archive.
1576///
1577/// This provides tar-style streaming construction of package archives.
1578/// Supports gzip and zstd compression.
1579///
1580/// # Example
1581///
1582/// ```no_run
1583/// use pkgsrc::archive::Builder;
1584///
1585/// // Create a package with auto-detected compression from filename
1586/// let mut builder = Builder::create("package-1.0.tgz")?;
1587///
1588/// // Add metadata files first
1589/// builder.append_metadata_file("+CONTENTS", b"@name package-1.0\n")?;
1590/// builder.append_metadata_file("+COMMENT", b"A test package")?;
1591/// builder.append_metadata_file("+DESC", b"Description here")?;
1592///
1593/// // Add package files
1594/// builder.append_file("bin/hello", b"#!/bin/sh\necho hello", 0o755)?;
1595///
1596/// builder.finish()?;
1597/// # Ok::<(), pkgsrc::archive::ArchiveError>(())
1598/// ```
1599pub struct Builder<W: Write> {
1600    inner: TarBuilder<Encoder<W>>,
1601    compression: Compression,
1602}
1603
1604impl Builder<File> {
1605    /// Create a new archive file with compression auto-detected from extension.
1606    ///
1607    /// Supported extensions:
1608    /// - `.tgz`, `.tar.gz` → gzip
1609    /// - `.tzst`, `.tar.zst` → zstd
1610    ///
1611    /// Falls back to gzip for unrecognized extensions.
1612    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
1613        let path = path.as_ref();
1614        let compression =
1615            Compression::from_extension(path).unwrap_or(Compression::Gzip);
1616        let file = File::create(path)?;
1617        Self::with_compression(file, compression)
1618    }
1619}
1620
1621impl<W: Write> Builder<W> {
1622    /// Create a new archive builder with gzip compression (default).
1623    ///
1624    /// Use [`Builder::with_compression`] for other formats, or
1625    /// [`Builder::create`] to auto-detect from a file path.
1626    pub fn new(writer: W) -> Result<Self> {
1627        Self::with_compression(writer, Compression::Gzip)
1628    }
1629
1630    /// Create a new archive builder with explicit compression.
1631    pub fn with_compression(
1632        writer: W,
1633        compression: Compression,
1634    ) -> Result<Self> {
1635        let encoder = match compression {
1636            Compression::Gzip => Encoder::Gzip(GzEncoder::new(
1637                writer,
1638                flate2::Compression::default(),
1639            )),
1640            Compression::Zstd => Encoder::Zstd(zstd::stream::Encoder::new(
1641                writer,
1642                zstd::DEFAULT_COMPRESSION_LEVEL,
1643            )?),
1644            Compression::None => {
1645                return Err(ArchiveError::UnsupportedCompression(
1646                    "uncompressed archives not supported for building".into(),
1647                ));
1648            }
1649        };
1650
1651        Ok(Self {
1652            inner: TarBuilder::new(encoder),
1653            compression,
1654        })
1655    }
1656
1657    /// Return the compression format.
1658    #[must_use]
1659    pub fn compression(&self) -> Compression {
1660        self.compression
1661    }
1662
1663    /// Append a metadata file (e.g., +CONTENTS, +COMMENT).
1664    pub fn append_metadata_file(
1665        &mut self,
1666        name: &str,
1667        content: &[u8],
1668    ) -> Result<()> {
1669        let mut header = Header::new_gnu();
1670        header.set_size(content.len() as u64);
1671        header.set_mode(0o644);
1672        header.set_mtime(0);
1673        header.set_cksum();
1674
1675        self.inner.append_data(&mut header, name, content)?;
1676        Ok(())
1677    }
1678
1679    /// Append a file with the given path, content, and mode.
1680    pub fn append_file(
1681        &mut self,
1682        path: impl AsRef<Path>,
1683        content: &[u8],
1684        mode: u32,
1685    ) -> Result<()> {
1686        let mut header = Header::new_gnu();
1687        header.set_size(content.len() as u64);
1688        header.set_mode(mode);
1689        header.set_mtime(0);
1690        header.set_cksum();
1691
1692        self.inner.append_data(&mut header, path, content)?;
1693        Ok(())
1694    }
1695
1696    /// Append a file from disk.
1697    pub fn append_path(&mut self, path: impl AsRef<Path>) -> Result<()> {
1698        self.inner.append_path(path)?;
1699        Ok(())
1700    }
1701
1702    /// Finish building the archive and return the underlying writer.
1703    pub fn finish(self) -> Result<W> {
1704        let encoder = self.inner.into_inner()?;
1705        let writer = encoder.finish()?;
1706        Ok(writer)
1707    }
1708}
1709
1710// ============================================================================
1711// SignedArchive
1712// ============================================================================
1713
1714/// A signed binary package ready to be written.
1715///
1716/// This is created by [`BinaryPackage::sign`] or [`SignedArchive::from_unsigned`].
1717#[derive(Debug)]
1718pub struct SignedArchive {
1719    pkgname: String,
1720    compression: Compression,
1721    pkg_hash: PkgHash,
1722    signature: Vec<u8>,
1723    tarball: Vec<u8>,
1724}
1725
1726impl SignedArchive {
1727    /// Create a signed archive from unsigned tarball bytes.
1728    ///
1729    /// This is useful for signing a freshly-built package without writing
1730    /// it to disk first.
1731    pub fn from_unsigned(
1732        data: Vec<u8>,
1733        pkgname: impl Into<String>,
1734        signature: &[u8],
1735        compression: Compression,
1736    ) -> Result<Self> {
1737        let pkgname = pkgname.into();
1738        let pkg_hash = PkgHash::from_tarball(
1739            &pkgname,
1740            Cursor::new(&data),
1741            PkgHashAlgorithm::Sha512,
1742            DEFAULT_BLOCK_SIZE,
1743        )?;
1744
1745        Ok(Self {
1746            pkgname,
1747            compression,
1748            pkg_hash,
1749            signature: signature.to_vec(),
1750            tarball: data,
1751        })
1752    }
1753
1754    /// Return the package name.
1755    #[must_use]
1756    pub fn pkgname(&self) -> &str {
1757        &self.pkgname
1758    }
1759
1760    /// Return the compression format of the inner tarball.
1761    #[must_use]
1762    pub fn compression(&self) -> Compression {
1763        self.compression
1764    }
1765
1766    /// Return the package hash.
1767    #[must_use]
1768    pub fn pkg_hash(&self) -> &PkgHash {
1769        &self.pkg_hash
1770    }
1771
1772    /// Write the signed package to a file.
1773    pub fn write_to(&self, path: impl AsRef<Path>) -> Result<()> {
1774        let file = File::create(path)?;
1775        self.write(file)
1776    }
1777
1778    /// Write the signed package to a writer.
1779    pub fn write<W: Write>(&self, writer: W) -> Result<()> {
1780        let mut ar = ar::Builder::new(writer);
1781
1782        // Write +PKG_HASH
1783        let hash_content = self.pkg_hash.to_string();
1784        let hash_bytes = hash_content.as_bytes();
1785        let mut header =
1786            ar::Header::new(b"+PKG_HASH".to_vec(), hash_bytes.len() as u64);
1787        header.set_mode(0o644);
1788        ar.append(&header, hash_bytes)?;
1789
1790        // Write +PKG_GPG_SIGNATURE
1791        let mut header = ar::Header::new(
1792            b"+PKG_GPG_SIGNATURE".to_vec(),
1793            self.signature.len() as u64,
1794        );
1795        header.set_mode(0o644);
1796        ar.append(&header, self.signature.as_slice())?;
1797
1798        // Write tarball with appropriate extension
1799        let tarball_name =
1800            format!("{}.{}", self.pkgname, self.compression.extension());
1801        let mut header = ar::Header::new(
1802            tarball_name.into_bytes(),
1803            self.tarball.len() as u64,
1804        );
1805        header.set_mode(0o644);
1806        ar.append(&header, self.tarball.as_slice())?;
1807
1808        Ok(())
1809    }
1810}
1811
1812// ============================================================================
1813// Tests
1814// ============================================================================
1815
1816#[cfg(test)]
1817mod tests {
1818    use super::*;
1819    use std::io::Cursor;
1820
1821    #[test]
1822    fn test_compression_from_magic() {
1823        assert_eq!(
1824            Compression::from_magic(&[0x1f, 0x8b, 0, 0, 0, 0]),
1825            Some(Compression::Gzip)
1826        );
1827        assert_eq!(
1828            Compression::from_magic(&[0x28, 0xb5, 0x2f, 0xfd, 0, 0]),
1829            Some(Compression::Zstd)
1830        );
1831        assert_eq!(Compression::from_magic(&[0, 0, 0, 0, 0, 0]), None);
1832    }
1833
1834    #[test]
1835    fn test_compression_from_extension() {
1836        assert_eq!(
1837            Compression::from_extension("foo.tgz"),
1838            Some(Compression::Gzip)
1839        );
1840        assert_eq!(
1841            Compression::from_extension("foo.tar.gz"),
1842            Some(Compression::Gzip)
1843        );
1844        assert_eq!(
1845            Compression::from_extension("foo.tzst"),
1846            Some(Compression::Zstd)
1847        );
1848        assert_eq!(
1849            Compression::from_extension("foo.tar.zst"),
1850            Some(Compression::Zstd)
1851        );
1852        assert_eq!(
1853            Compression::from_extension("foo.tar"),
1854            Some(Compression::None)
1855        );
1856    }
1857
1858    #[test]
1859    fn test_hash_algorithm() {
1860        assert_eq!(
1861            "SHA512".parse::<PkgHashAlgorithm>().ok(),
1862            Some(PkgHashAlgorithm::Sha512)
1863        );
1864        assert_eq!(
1865            "sha256".parse::<PkgHashAlgorithm>().ok(),
1866            Some(PkgHashAlgorithm::Sha256)
1867        );
1868        assert!("MD5".parse::<PkgHashAlgorithm>().is_err());
1869
1870        assert_eq!(PkgHashAlgorithm::Sha512.as_str(), "SHA512");
1871        assert_eq!(PkgHashAlgorithm::Sha256.as_str(), "SHA256");
1872
1873        assert_eq!(PkgHashAlgorithm::Sha512.hash_size(), 64);
1874        assert_eq!(PkgHashAlgorithm::Sha256.hash_size(), 32);
1875    }
1876
1877    #[test]
1878    fn test_pkg_hash_parse() -> Result<()> {
1879        let content = "\
1880pkgsrc signature
1881version: 1
1882pkgname: test-1.0
1883algorithm: SHA512
1884block size: 65536
1885file size: 12345
1886abc123
1887def456
1888";
1889        let pkg_hash = PkgHash::parse(content)?;
1890
1891        assert_eq!(pkg_hash.version(), 1);
1892        assert_eq!(pkg_hash.pkgname(), "test-1.0");
1893        assert_eq!(pkg_hash.algorithm(), PkgHashAlgorithm::Sha512);
1894        assert_eq!(pkg_hash.block_size(), 65536);
1895        assert_eq!(pkg_hash.file_size(), 12345);
1896        assert_eq!(pkg_hash.hashes(), &["abc123", "def456"]);
1897        Ok(())
1898    }
1899
1900    #[test]
1901    fn test_pkg_hash_generate() -> Result<()> {
1902        let data = b"Hello, World!";
1903        let pkg_hash = PkgHash::from_tarball(
1904            "test-1.0",
1905            Cursor::new(data),
1906            PkgHashAlgorithm::Sha512,
1907            1024,
1908        )?;
1909
1910        assert_eq!(pkg_hash.pkgname(), "test-1.0");
1911        assert_eq!(pkg_hash.algorithm(), PkgHashAlgorithm::Sha512);
1912        assert_eq!(pkg_hash.block_size(), 1024);
1913        assert_eq!(pkg_hash.file_size(), 13);
1914        assert_eq!(pkg_hash.hashes().len(), 1);
1915        Ok(())
1916    }
1917
1918    #[test]
1919    fn test_pkg_hash_verify() -> Result<()> {
1920        let data = b"Hello, World!";
1921        let pkg_hash = PkgHash::from_tarball(
1922            "test-1.0",
1923            Cursor::new(data),
1924            PkgHashAlgorithm::Sha512,
1925            1024,
1926        )?;
1927
1928        assert!(pkg_hash.verify(Cursor::new(data))?);
1929
1930        let bad_data = b"Goodbye, World!";
1931        assert!(pkg_hash.verify(Cursor::new(bad_data)).is_err());
1932        Ok(())
1933    }
1934
1935    #[test]
1936    fn test_pkg_hash_roundtrip() -> Result<()> {
1937        let data = vec![0u8; 200_000];
1938        let pkg_hash = PkgHash::from_tarball(
1939            "test-1.0",
1940            Cursor::new(&data),
1941            PkgHashAlgorithm::Sha512,
1942            65536,
1943        )?;
1944
1945        let serialized = pkg_hash.to_string();
1946        let parsed = PkgHash::parse(&serialized)?;
1947
1948        assert_eq!(pkg_hash.version(), parsed.version());
1949        assert_eq!(pkg_hash.pkgname(), parsed.pkgname());
1950        assert_eq!(pkg_hash.algorithm(), parsed.algorithm());
1951        assert_eq!(pkg_hash.block_size(), parsed.block_size());
1952        assert_eq!(pkg_hash.file_size(), parsed.file_size());
1953        assert_eq!(pkg_hash.hashes(), parsed.hashes());
1954
1955        assert!(parsed.verify(Cursor::new(&data))?);
1956        Ok(())
1957    }
1958
1959    #[test]
1960    fn test_build_package_gzip() -> Result<()> {
1961        // Use new() which defaults to gzip
1962        let mut builder = Builder::new(Vec::new())?;
1963
1964        let plist = "@name testpkg-1.0\n@cwd /opt/test\nbin/test\n";
1965        builder.append_metadata_file("+CONTENTS", plist.as_bytes())?;
1966        builder.append_metadata_file("+COMMENT", b"A test package")?;
1967        builder.append_metadata_file(
1968            "+DESC",
1969            b"This is a test.\nMultiple lines.",
1970        )?;
1971        builder.append_metadata_file(
1972            "+BUILD_INFO",
1973            b"OPSYS=NetBSD\nMACHINE_ARCH=x86_64\n",
1974        )?;
1975        builder.append_file("bin/test", b"#!/bin/sh\necho test", 0o755)?;
1976        let output = builder.finish()?;
1977
1978        assert!(!output.is_empty());
1979
1980        // Verify we can read it back using low-level Archive (default gzip)
1981        let mut archive = Archive::new(Cursor::new(&output))?;
1982        let mut found_contents = false;
1983        for entry in archive.entries()? {
1984            let entry = entry?;
1985            if entry.path()?.to_str() == Some("+CONTENTS") {
1986                found_contents = true;
1987                break;
1988            }
1989        }
1990        assert!(found_contents);
1991        Ok(())
1992    }
1993
1994    #[test]
1995    fn test_build_package_zstd() -> Result<()> {
1996        // Use with_compression for explicit zstd
1997        let mut builder =
1998            Builder::with_compression(Vec::new(), Compression::Zstd)?;
1999
2000        let plist = "@name testpkg-1.0\n@cwd /opt/test\nbin/test\n";
2001        builder.append_metadata_file("+CONTENTS", plist.as_bytes())?;
2002        builder.append_metadata_file("+COMMENT", b"A test package")?;
2003        builder.append_metadata_file(
2004            "+DESC",
2005            b"This is a test.\nMultiple lines.",
2006        )?;
2007        builder.append_file("bin/test", b"#!/bin/sh\necho test", 0o755)?;
2008        let output = builder.finish()?;
2009
2010        assert!(!output.is_empty());
2011
2012        // Verify we can read it back using low-level Archive
2013        let mut archive =
2014            Archive::with_compression(Cursor::new(&output), Compression::Zstd)?;
2015        let mut found_contents = false;
2016        for entry in archive.entries()? {
2017            let entry = entry?;
2018            if entry.path()?.to_str() == Some("+CONTENTS") {
2019                found_contents = true;
2020                break;
2021            }
2022        }
2023        assert!(found_contents);
2024        Ok(())
2025    }
2026
2027    #[test]
2028    fn test_signed_archive_from_unsigned() -> Result<()> {
2029        // Build an unsigned package (default gzip)
2030        let mut builder = Builder::new(Vec::new())?;
2031        builder.append_metadata_file("+CONTENTS", b"@name testpkg-1.0\n")?;
2032        builder.append_metadata_file("+COMMENT", b"A test package")?;
2033        builder.append_metadata_file("+DESC", b"Test description")?;
2034        let output = builder.finish()?;
2035
2036        let fake_signature = b"FAKE GPG SIGNATURE";
2037        let signed = SignedArchive::from_unsigned(
2038            output,
2039            "testpkg-1.0",
2040            fake_signature,
2041            Compression::Gzip,
2042        )?;
2043
2044        assert_eq!(signed.pkgname(), "testpkg-1.0");
2045        assert_eq!(signed.pkg_hash().algorithm(), PkgHashAlgorithm::Sha512);
2046        assert_eq!(signed.compression(), Compression::Gzip);
2047
2048        // Write to buffer and verify it's an ar archive
2049        let mut signed_output = Vec::new();
2050        signed.write(&mut signed_output)?;
2051        assert!(&signed_output[..7] == b"!<arch>");
2052        Ok(())
2053    }
2054
2055    #[test]
2056    fn test_signed_archive_zstd() -> Result<()> {
2057        // Build an unsigned zstd package
2058        let mut builder =
2059            Builder::with_compression(Vec::new(), Compression::Zstd)?;
2060        builder.append_metadata_file("+CONTENTS", b"@name testpkg-1.0\n")?;
2061        builder.append_metadata_file("+COMMENT", b"A test package")?;
2062        builder.append_metadata_file("+DESC", b"Test description")?;
2063        let output = builder.finish()?;
2064
2065        let fake_signature = b"FAKE GPG SIGNATURE";
2066        let signed = SignedArchive::from_unsigned(
2067            output,
2068            "testpkg-1.0",
2069            fake_signature,
2070            Compression::Zstd,
2071        )?;
2072
2073        assert_eq!(signed.pkgname(), "testpkg-1.0");
2074        assert_eq!(signed.compression(), Compression::Zstd);
2075
2076        // Write to buffer and verify it's an ar archive
2077        let mut signed_output = Vec::new();
2078        signed.write(&mut signed_output)?;
2079        assert!(&signed_output[..7] == b"!<arch>");
2080        Ok(())
2081    }
2082
2083    #[test]
2084    fn test_parse_mode() {
2085        // Standard octal formats
2086        assert_eq!(super::parse_mode("0755"), Some(0o755));
2087        assert_eq!(super::parse_mode("755"), Some(0o755));
2088        assert_eq!(super::parse_mode("0644"), Some(0o644));
2089        assert_eq!(super::parse_mode("644"), Some(0o644));
2090        assert_eq!(super::parse_mode("0777"), Some(0o777));
2091        assert_eq!(super::parse_mode("0400"), Some(0o400));
2092
2093        // Invalid formats
2094        assert_eq!(super::parse_mode(""), None);
2095        assert_eq!(super::parse_mode("abc"), None);
2096        assert_eq!(super::parse_mode("999"), None); // 9 is not valid octal
2097    }
2098
2099    #[test]
2100    fn test_extract_options() {
2101        let opts = ExtractOptions::new();
2102        assert!(!opts.apply_mode);
2103        assert!(!opts.apply_ownership);
2104        assert!(!opts.preserve_mtime);
2105
2106        let opts = ExtractOptions::new().with_mode().with_ownership();
2107        assert!(opts.apply_mode);
2108        assert!(opts.apply_ownership);
2109        assert!(!opts.preserve_mtime);
2110    }
2111}