debian_packaging/repository/
builder.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5/*! Build your own Debian repositories.
6
7This module defines functionality for constructing Debian repositories.
8
9See <https://wiki.debian.org/DebianRepository/Format> for the format of repositories.
10
11Repositories are essentially a virtual filesystem composed of some well-defined files.
12Primitives in this module facilitate constructing your own repositories.
13*/
14
15use {
16    crate::{
17        binary_package_control::BinaryPackageControlFile,
18        control::{ControlField, ControlParagraph},
19        deb::reader::resolve_control_file,
20        error::{DebianError, Result},
21        io::{read_compressed, ContentDigest, DataResolver, MultiContentDigest, MultiDigester},
22        repository::{
23            release::{ChecksumType, ReleaseFile, DATE_FORMAT},
24            Compression, PublishEvent, RepositoryPathVerificationState, RepositoryWriter,
25        },
26    },
27    chrono::{DateTime, Utc},
28    futures::{AsyncRead, AsyncReadExt, StreamExt, TryStreamExt},
29    pgp::{crypto::hash::HashAlgorithm, types::SecretKeyTrait},
30    pgp_cleartext::cleartext_sign,
31    std::{
32        borrow::Cow,
33        collections::{BTreeMap, BTreeSet, HashMap},
34        pin::Pin,
35    },
36};
37
38/// Pre-defined progress callback that is empty.
39pub const NO_PROGRESS_CB: Option<fn(PublishEvent)> = None;
40
41/// Pre-defined signing key argument that is empty.
42#[allow(clippy::type_complexity)]
43pub const NO_SIGNING_KEY: Option<(&pgp::SignedSecretKey, fn() -> String)> = None;
44
45/// Describes the layout of the `pool` part of the repository.
46///
47/// This type effectively controls where `.deb` files will be placed under the repository root.
48#[derive(Clone, Copy, Debug)]
49pub enum PoolLayout {
50    /// File paths are `<component>/<name_prefix>/<filename>`.
51    ///
52    /// This is the layout as used by the Debian distribution.
53    ///
54    /// The package name is used to derive a directory prefix. For packages beginning with `lib`,
55    /// the prefix is `libz/<package>/`. For everything else, it is `<first character>/<package>/`.
56    ///
57    /// For example, file `zstd_1.4.8+dfsg-2.1_amd64.deb` in the `main` component will be mapped to
58    /// `pool/main/libz/libzstd/zstd_1.4.8+dfsg-2.1_amd64.deb` and `python3.9_3.9.9-1_arm64.deb` in
59    /// the `main` component will be mapped to `pool/main/p/python3.9/python3.9_3.9.9-1_arm64.deb`.
60    ComponentThenNamePrefix,
61}
62
63impl Default for PoolLayout {
64    fn default() -> Self {
65        Self::ComponentThenNamePrefix
66    }
67}
68
69impl PoolLayout {
70    /// Compute the path to a file given the source package name and its filename.
71    pub fn path(&self, component: &str, package: &str, filename: &str) -> String {
72        match self {
73            Self::ComponentThenNamePrefix => {
74                let name_prefix = if package.starts_with("lib") {
75                    format!("{}/{}", &package[0..4], package)
76                } else {
77                    format!("{}/{}", &package[0..1], package)
78                };
79
80                format!("pool/{}/{}/{}", component, name_prefix, filename)
81            }
82        }
83    }
84}
85
86/// Describes a reference to a `.deb` Debian package existing somewhere.
87///
88/// This trait is used as a generic way to refer to a `.deb` package, without implementations
89/// necessarily having immediate access to the full content/data of that `.deb` package.
90pub trait DebPackageReference<'cf> {
91    /// Obtain the size in bytes of the `.deb` file.
92    ///
93    /// This becomes the `Size` field in `Packages*` control files.
94    fn deb_size_bytes(&self) -> Result<u64>;
95
96    /// Obtains the binary digest of this file given a checksum flavor.
97    ///
98    /// Implementations can compute the digest at run-time or return a cached value.
99    fn deb_digest(&self, checksum: ChecksumType) -> Result<ContentDigest>;
100
101    /// Obtain the filename of this `.deb`.
102    ///
103    /// This should be just the file name, without any directory components.
104    fn deb_filename(&self) -> Result<String>;
105
106    /// Obtain a [BinaryPackageControlFile] representing content for a `Packages` index file.
107    ///
108    /// The returned content can come from a `control` file in a `control.tar` or from
109    /// an existing `Packages` control file.
110    ///
111    /// The control file must have at least `Package`, `Version`, and `Architecture` fields.
112    fn control_file_for_packages_index(&self) -> Result<BinaryPackageControlFile<'cf>>;
113}
114
115/// Holds the content of a `.deb` file in-memory.
116pub struct InMemoryDebFile {
117    filename: String,
118    data: Vec<u8>,
119}
120
121impl InMemoryDebFile {
122    /// Create a new instance bound to memory.
123    pub fn new(filename: String, data: Vec<u8>) -> Self {
124        Self { filename, data }
125    }
126}
127
128impl<'cf> DebPackageReference<'cf> for InMemoryDebFile {
129    fn deb_size_bytes(&self) -> Result<u64> {
130        Ok(self.data.len() as u64)
131    }
132
133    fn deb_digest(&self, checksum: ChecksumType) -> Result<ContentDigest> {
134        let mut h = checksum.new_hasher();
135        h.update(&self.data);
136        let digest = h.finish().to_vec();
137
138        Ok(match checksum {
139            ChecksumType::Md5 => ContentDigest::Md5(digest),
140            ChecksumType::Sha1 => ContentDigest::Sha1(digest),
141            ChecksumType::Sha256 => ContentDigest::Sha256(digest),
142        })
143    }
144
145    fn deb_filename(&self) -> Result<String> {
146        Ok(self.filename.clone())
147    }
148
149    fn control_file_for_packages_index(&self) -> Result<BinaryPackageControlFile<'cf>> {
150        resolve_control_file(std::io::Cursor::new(&self.data))
151    }
152}
153
154/// Describes an index file to write.
155pub struct IndexFileReader<'a> {
156    /// Provides the uncompressed content of the file.
157    pub reader: Pin<Box<dyn AsyncRead + Send + 'a>>,
158    /// The compression to apply to the written file.
159    pub compression: Compression,
160    /// The directory the index file is based in.
161    pub directory: String,
162    /// The filename of the index file (without the compression suffix).
163    pub filename: String,
164}
165
166impl<'a> IndexFileReader<'a> {
167    /// Obtain the canonical path of this entry as it would appear in an `[In]Release` file.
168    pub fn canonical_path(&self) -> String {
169        format!(
170            "{}/{}{}",
171            self.directory,
172            self.filename,
173            self.compression.extension()
174        )
175    }
176
177    /// Obtain the `by-hash` path given a [ContentDigest].
178    pub fn by_hash_path(&self, digest: &ContentDigest) -> String {
179        format!(
180            "{}/by-hash/{}/{}",
181            self.directory,
182            digest.release_field_name(),
183            digest.digest_hex()
184        )
185    }
186}
187
188struct ExpandedIndexFile {
189    canonical_path: String,
190    write_path: String,
191    digests: MultiContentDigest,
192    data: Vec<u8>,
193}
194
195/// Describes a file in the *pool* to support a binary package.
196#[derive(Debug)]
197pub struct BinaryPackagePoolArtifact<'a> {
198    /// The file path relative to the repository root.
199    pub path: &'a str,
200    /// The expected size of the file.
201    pub size: u64,
202    /// The expected digest of the file.
203    pub digest: ContentDigest,
204}
205
206// (Package, Version) -> paragraph.
207type IndexedBinaryPackages<'a> = BTreeMap<(String, String), ControlParagraph<'a>>;
208
209// (component, architecture) -> packages.
210type ComponentBinaryPackages<'a> = BTreeMap<(String, String), IndexedBinaryPackages<'a>>;
211
212/// Build Debian repositories from scratch.
213///
214/// Instances of this type are used to iteratively construct a Debian repository.
215///
216/// A Debian repository consists of named *components* holding binary packages, sources,
217/// installer packages, and metadata gluing it all together.
218///
219/// # Usage
220///
221/// Instances are constructed, preferably via [Self::new_recommended()].
222///
223/// Additional metadata about the repository is then registered using the following functions
224/// (as needed):
225///
226/// * [Self::add_architecture()]
227/// * [Self::add_component()]
228/// * [Self::add_checksum()]
229/// * [Self::set_suite()]
230/// * [Self::set_codename()]
231/// * [Self::set_date()]
232/// * [Self::set_valid_until()]
233/// * [Self::set_description()]
234/// * [Self::set_origin()]
235/// * [Self::set_label()]
236/// * [Self::set_version()]
237/// * [Self::set_acquire_by_hash()]
238///
239/// See <https://wiki.debian.org/DebianRepository/Format> for a description of what these various
240/// fields are used for.
241///
242/// After basic metadata is in place, `.deb` packages are registered against the builder via
243/// [Self::add_binary_deb()].
244///
245/// Once everything is registered against the builder, it is time to *publish* (read: write)
246/// the repository content.
247///
248/// Publishing works by first writing *pool* content. The *pool* is an area of the repository
249/// where blobs (like `.deb` packages) are stored. To publish the pool, call
250/// [Self::publish_pool_artifacts()]. This takes a [DataResolver] for obtaining missing pool
251/// content. Its content retrieval functions will be called for each pool path that needs to be
252/// copied to the writer. Since [crate::repository::RepositoryRootReader] must implement
253/// [DataResolver], you can pass an instance as the [DataResolver] to effectively copy
254/// artifacts from another Debian repository. Note: for this to work, the source repository
255/// must have the same [PoolLayout] as this repository. This may not always be the case!
256/// To more robustly copy files from another repository, instantiate a
257/// [crate::io::PathMappingDataResolver] and call
258/// [crate::io::PathMappingDataResolver::add_path_map()] with the result from
259/// [Self::add_binary_deb()] (and similar function) to install a path mapping.
260///
261/// After pool content is written, indices files are derived and written. To publish these
262/// files, call [Self::publish_indices()]. This step uses an optional signing key to
263/// PGP sign the indices files.
264///
265/// For convenience, the [Self::publish()] method exists to perform both pool and indices
266/// publishing. It is strongly recommended to call this method instead of the lower-level
267/// methods for writing out content.
268#[derive(Debug, Default)]
269pub struct RepositoryBuilder<'cf> {
270    // Release file fields.
271    architectures: BTreeSet<String>,
272    components: BTreeSet<String>,
273    suite: Option<String>,
274    codename: Option<String>,
275    date: Option<DateTime<Utc>>,
276    valid_until: Option<DateTime<Utc>>,
277    description: Option<String>,
278    origin: Option<String>,
279    label: Option<String>,
280    version: Option<String>,
281    acquire_by_hash: Option<bool>,
282    checksums: BTreeSet<ChecksumType>,
283    pool_layout: PoolLayout,
284    index_file_compressions: BTreeSet<Compression>,
285    binary_packages: ComponentBinaryPackages<'cf>,
286    installer_packages: ComponentBinaryPackages<'cf>,
287    source_packages: BTreeMap<String, IndexedBinaryPackages<'cf>>,
288    translations: BTreeMap<String, ()>,
289}
290
291impl<'cf> RepositoryBuilder<'cf> {
292    /// Create a new instance with recommended settings.
293    ///
294    /// Files that should almost always be set (like `Architectures` and `Components`)
295    /// are empty. It is recommended to use [Self::new_recommended()] instead.
296    pub fn new_recommended_empty() -> Self {
297        Self {
298            architectures: BTreeSet::new(),
299            components: BTreeSet::new(),
300            suite: None,
301            codename: None,
302            date: Some(Utc::now()),
303            valid_until: None,
304            description: None,
305            origin: None,
306            label: None,
307            version: None,
308            acquire_by_hash: Some(true),
309            checksums: BTreeSet::from_iter([ChecksumType::Md5, ChecksumType::Sha256]),
310            pool_layout: PoolLayout::default(),
311            index_file_compressions: BTreeSet::from_iter([
312                Compression::None,
313                Compression::Gzip,
314                Compression::Xz,
315            ]),
316            binary_packages: ComponentBinaryPackages::default(),
317            installer_packages: ComponentBinaryPackages::default(),
318            source_packages: BTreeMap::default(),
319            translations: BTreeMap::default(),
320        }
321    }
322
323    /// Create a new instance with recommended settings and fields.
324    ///
325    /// The arguments to this function are those that should be defined on most Debian repositories.
326    ///
327    /// Calling this function is equivalent to calling [Self::new_recommended_empty()] then calling
328    /// various `.add_*()` methods on the returned instance.
329    pub fn new_recommended(
330        architectures: impl Iterator<Item = impl ToString>,
331        components: impl Iterator<Item = impl ToString>,
332        suite: impl ToString,
333        codename: impl ToString,
334    ) -> Self {
335        Self {
336            architectures: BTreeSet::from_iter(architectures.map(|x| x.to_string())),
337            components: BTreeSet::from_iter(components.map(|x| x.to_string())),
338            suite: Some(suite.to_string()),
339            codename: Some(codename.to_string()),
340            ..Self::new_recommended_empty()
341        }
342    }
343
344    /// Register an architecture with the builder.
345    ///
346    /// This defines which platform architectures there will be packages for.
347    ///
348    /// Example architecture values are `all`, `amd64`, `arm64`, and `i386`.
349    pub fn add_architecture(&mut self, arch: impl ToString) {
350        self.architectures.insert(arch.to_string());
351    }
352
353    /// Register a named component with the builder.
354    ///
355    /// Components describe a named subset of the repository. Example names include
356    /// `main`, `contrib`, `restricted`, `stable`.
357    pub fn add_component(&mut self, name: impl ToString) {
358        self.components.insert(name.to_string());
359    }
360
361    /// Register a checksum type to emit.
362    ///
363    /// [ChecksumType::Sha256] should always be used. Adding [ChecksumType::Md5] is
364    /// recommended for compatibility with old clients.
365    pub fn add_checksum(&mut self, value: ChecksumType) {
366        self.checksums.insert(value);
367    }
368
369    /// Set the `Suite` value.
370    ///
371    /// This is often a value like `stable`, `bionic`, `groovy`. Some identifier that helps
372    /// identify this repository.
373    pub fn set_suite(&mut self, value: impl ToString) {
374        self.suite = Some(value.to_string());
375    }
376
377    /// Set the `Codename` value.
378    ///
379    /// This is often a human friendly name to help identify the repository. Example values
380    /// include `groovy`, `bullseye`, `bionic`.
381    pub fn set_codename(&mut self, value: impl ToString) {
382        self.codename = Some(value.to_string());
383    }
384
385    /// Set the time this repository was created/updated.
386    ///
387    /// If not set, the current time will be used automatically.
388    pub fn set_date(&mut self, value: DateTime<Utc>) {
389        self.date = Some(value);
390    }
391
392    /// Set the value for the `Valid-Until` field.
393    ///
394    /// Clients should not trust this repository after this date.
395    pub fn set_valid_until(&mut self, value: DateTime<Utc>) {
396        self.valid_until = Some(value);
397    }
398
399    /// Set a human friendly description text for this repository.
400    pub fn set_description(&mut self, value: impl ToString) {
401        self.description = Some(value.to_string());
402    }
403
404    /// Set a field indicating the origin of the repository.
405    pub fn set_origin(&mut self, value: impl ToString) {
406        self.origin = Some(value.to_string());
407    }
408
409    /// Set freeform text describing the repository.
410    pub fn set_label(&mut self, value: impl ToString) {
411        self.label = Some(value.to_string());
412    }
413
414    /// Set the version of the release.
415    ///
416    /// Typically `.` delimited integers.
417    pub fn set_version(&mut self, value: impl ToString) {
418        self.version = Some(value.to_string());
419    }
420
421    /// Set the value of `Acquire-By-Hash`.
422    ///
423    /// This should be enabled for new repositories.
424    pub fn set_acquire_by_hash(&mut self, value: bool) {
425        self.acquire_by_hash = Some(value);
426    }
427
428    /// Set the [PoolLayout] to use.
429    ///
430    /// The layout can only be updated before content is added. Once a package has been
431    /// indexed, this function will error.
432    pub fn set_pool_layout(&mut self, layout: PoolLayout) -> Result<()> {
433        if self.have_entries() {
434            Err(DebianError::RepositoryBuildPoolLayoutImmutable)
435        } else {
436            self.pool_layout = layout;
437            Ok(())
438        }
439    }
440
441    fn have_entries(&self) -> bool {
442        !self.binary_packages.is_empty()
443            || !self.source_packages.is_empty()
444            || !self.installer_packages.is_empty()
445            || !self.translations.is_empty()
446    }
447
448    /// Add a binary package `.deb` to this repository in the given component.
449    ///
450    /// The package to add is specified as a trait to enable callers to represent Debian
451    /// packages differently. For example, the trait members may be implemented by just-in-time
452    /// parsing of an actual `.deb` file or by retrieving the data from a cache.
453    ///
454    /// The specified `component` name must be registered with this instance or an error will
455    /// occur.
456    ///
457    /// Returns the pool path / `Filename` field that this binary package `.deb` will occupy
458    /// in the repository.
459    pub fn add_binary_deb(
460        &mut self,
461        component: &str,
462        deb: &impl DebPackageReference<'cf>,
463    ) -> Result<String> {
464        if !self.components.contains(component) {
465            return Err(DebianError::RepositoryBuildUnknownComponent(
466                component.to_string(),
467            ));
468        }
469
470        let original_control_file = deb.control_file_for_packages_index()?;
471
472        let package = original_control_file.package()?;
473        let version = original_control_file.version_str()?;
474        let arch = original_control_file.architecture()?;
475
476        if !self.architectures.contains(arch) {
477            return Err(DebianError::RepositoryBuildUnknownArchitecture(
478                arch.to_string(),
479            ));
480        }
481
482        // We iteratively build up the control paragraph for the `Packages` file from the original
483        // control file.
484        let mut para = ControlParagraph::default();
485
486        // Different packages have different fields and it is effectively impossible to maintain
487        // an numeration of all known fields. So, copy over all fields and ignore the special ones,
488        // which we handle later.
489        for field in original_control_file.iter_fields() {
490            if ![
491                "Description",
492                "Filename",
493                "Size",
494                "MD5sum",
495                "SHA1",
496                "SHA256",
497            ]
498            .contains(&field.name())
499            {
500                para.set_field(field.clone());
501            }
502        }
503
504        // The `Description` field is a bit wonky in Packages files. Instead of capturing multiline
505        // values, `Description` is just the first line and a `Description-md5` contains the md5
506        // of the multiline value.
507        if let Some(description) = original_control_file.field("Description") {
508            let description = description.value_str();
509
510            if let Some(index) = description.find('\n') {
511                let mut h = ChecksumType::Md5.new_hasher();
512                h.update(description.as_bytes());
513                h.update(b"\n");
514                let digest = h.finish();
515
516                para.set_field_from_string(
517                    "Description".into(),
518                    (description[0..index]).to_string().into(),
519                );
520                para.set_field_from_string("Description-md5".into(), hex::encode(digest).into());
521            } else {
522                para.set_field_from_string("Description".into(), description.to_string().into());
523            }
524        }
525
526        // The `Filename` is derived from the pool layout scheme in effect.
527        let filename = self.pool_layout.path(
528            component,
529            if let Some(name) = original_control_file.source() {
530                name
531            } else {
532                package
533            },
534            &deb.deb_filename()?,
535        );
536        para.set_field_from_string("Filename".into(), filename.clone().into());
537
538        // `Size` shouldn't be in the original control file, since it is a property of the
539        // `.deb` in which the control file is embedded.
540        para.set_field_from_string("Size".into(), format!("{}", deb.deb_size_bytes()?).into());
541
542        // Add all configured digests for this repository.
543        for checksum in &self.checksums {
544            let digest = deb.deb_digest(*checksum)?;
545
546            para.set_field_from_string(checksum.field_name().into(), digest.digest_hex().into());
547        }
548
549        let component_key = (component.to_string(), arch.to_string());
550        let package_key = (package.to_string(), version.to_string());
551        self.binary_packages
552            .entry(component_key)
553            .or_default()
554            .insert(package_key, para);
555
556        Ok(filename)
557    }
558
559    /// Obtain all components having binary packages.
560    ///
561    /// The iterator contains 2-tuples of `(component, architecture)`.
562    pub fn binary_package_components(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
563        self.binary_packages
564            .keys()
565            .map(|(a, b)| (a.as_str(), b.as_str()))
566    }
567
568    /// Obtain an iterator of [ControlParagraph] for binary packages in a given component + architecture.
569    ///
570    /// This method forms the basic building block for constructing `Packages` files. `Packages`
571    /// files can be built by serializing the [ControlParagraph] to a string/writer.
572    pub fn iter_component_binary_packages(
573        &self,
574        component: impl ToString,
575        architecture: impl ToString,
576    ) -> Box<dyn Iterator<Item = &'_ ControlParagraph> + Send + '_> {
577        if let Some(packages) = self
578            .binary_packages
579            .get(&(component.to_string(), architecture.to_string()))
580        {
581            Box::new(packages.values())
582        } else {
583            Box::new(std::iter::empty())
584        }
585    }
586
587    /// Obtain an iterator of pool artifacts for binary packages that will need to exist.
588    pub fn iter_component_binary_package_pool_artifacts(
589        &self,
590        component: impl ToString,
591        architecture: impl ToString,
592    ) -> impl Iterator<Item = Result<BinaryPackagePoolArtifact<'_>>> + '_ {
593        self.iter_component_binary_packages(component, architecture)
594            .map(|para| {
595                let path = para
596                    .field_str("Filename")
597                    .expect("Filename should have been populated at package add time");
598                let size = para
599                    .field_u64("Size")
600                    .expect("Size should have been populated at package add time")
601                    .expect("Size should parse to an integer");
602
603                // Checksums are stored in a BTreeSet and sort from weakest to strongest. So use the
604                // strongest available checksum.
605                let strongest_checksum = self
606                    .checksums
607                    .iter()
608                    .last()
609                    .expect("should have at least 1 checksum defined");
610
611                let digest_hex = para
612                    .field_str(strongest_checksum.field_name())
613                    .expect("checksum's field should have been set");
614                let digest = ContentDigest::from_hex_digest(*strongest_checksum, digest_hex)?;
615
616                Ok(BinaryPackagePoolArtifact { path, size, digest })
617            })
618    }
619
620    /// Obtain an [AsyncRead] that reads contents of a `Packages` file for binary packages.
621    ///
622    /// This is a wrapper around [Self::iter_component_binary_packages()] that normalizes the
623    /// [ControlParagraph] to data and converts it to an [AsyncRead].
624    pub fn component_binary_packages_reader(
625        &self,
626        component: impl ToString,
627        architecture: impl ToString,
628    ) -> impl AsyncRead + '_ {
629        futures::stream::iter(
630            self.iter_component_binary_packages(component, architecture)
631                .map(|p| Ok(format!("{}\n", p.to_string()))),
632        )
633        .into_async_read()
634    }
635
636    /// Like [Self::component_binary_packages_reader()] except data is compressed.
637    pub fn component_binary_packages_reader_compression(
638        &self,
639        component: impl ToString,
640        architecture: impl ToString,
641        compression: Compression,
642    ) -> Pin<Box<dyn AsyncRead + Send + '_>> {
643        read_compressed(
644            futures::io::BufReader::new(
645                self.component_binary_packages_reader(
646                    component.to_string(),
647                    architecture.to_string(),
648                ),
649            ),
650            compression,
651        )
652    }
653
654    /// Obtain [IndexFileReader] for each logical `Packages` file.
655    pub fn binary_packages_index_readers(&self) -> impl Iterator<Item = IndexFileReader<'_>> + '_ {
656        self.binary_packages
657            .keys()
658            .flat_map(move |(component, architecture)| {
659                self.index_file_compressions
660                    .iter()
661                    .map(move |compression| IndexFileReader {
662                        reader: self.component_binary_packages_reader_compression(
663                            component,
664                            architecture,
665                            *compression,
666                        ),
667                        compression: *compression,
668                        directory: format!("{}/binary-{}", component, architecture),
669                        filename: "Packages".to_string(),
670                    })
671            })
672    }
673
674    /// Obtain all [IndexFileReader] to be published.
675    ///
676    /// Each item corresponds to a logical item in an `[In]Release`.
677    pub fn index_file_readers(&self) -> impl Iterator<Item = IndexFileReader<'_>> + '_ {
678        self.binary_packages_index_readers()
679    }
680
681    /// Obtain records describing pool artifacts needed to support binary packages.
682    pub fn iter_binary_packages_pool_artifacts(
683        &self,
684    ) -> impl Iterator<Item = Result<BinaryPackagePoolArtifact<'_>>> + '_ {
685        self.binary_packages
686            .keys()
687            .flat_map(move |(component, architecture)| {
688                self.iter_component_binary_package_pool_artifacts(component, architecture)
689            })
690    }
691
692    /// Publish artifacts to the *pool*.
693    ///
694    /// The *pool* is the area of a Debian repository holding files like the .deb packages.
695    ///
696    /// Content must be published to the pool before indices data is written, otherwise there
697    /// is a race condition where the indices could refer to files not yet in the pool.
698    pub async fn publish_pool_artifacts<F>(
699        &self,
700        resolver: &impl DataResolver,
701        writer: &impl RepositoryWriter,
702        threads: usize,
703        progress_cb: &Option<F>,
704    ) -> Result<()>
705    where
706        F: Fn(PublishEvent),
707    {
708        let artifacts = self
709            .iter_binary_packages_pool_artifacts()
710            .collect::<Result<Vec<_>>>()?;
711
712        if let Some(ref cb) = progress_cb {
713            cb(PublishEvent::ResolvedPoolArtifacts(artifacts.len()));
714        }
715
716        // Queue a verification check for each artifact.
717        let mut fs = futures::stream::iter(
718            artifacts
719                .iter()
720                .map(|a| writer.verify_path(a.path, Some((a.size, a.digest.clone())))),
721        )
722        .buffer_unordered(threads);
723
724        let mut missing_paths = BTreeSet::new();
725
726        while let Some(result) = fs.next().await {
727            let result = result?;
728
729            match result.state {
730                RepositoryPathVerificationState::ExistsNoIntegrityCheck
731                | RepositoryPathVerificationState::ExistsIntegrityVerified => {
732                    if let Some(ref cb) = progress_cb {
733                        cb(PublishEvent::PoolArtifactCurrent(result.path.to_string()));
734                    }
735                }
736                RepositoryPathVerificationState::ExistsIntegrityMismatch
737                | RepositoryPathVerificationState::Missing => {
738                    if let Some(ref cb) = progress_cb {
739                        cb(PublishEvent::PoolArtifactMissing(result.path.to_string()));
740                    }
741
742                    missing_paths.insert(result.path);
743                }
744            }
745        }
746
747        if let Some(ref cb) = progress_cb {
748            cb(PublishEvent::PoolArtifactsToPublish(missing_paths.len()));
749        }
750
751        // Now we need to copy files from our source.
752
753        let mut fs = futures::stream::iter(
754            artifacts
755                .iter()
756                .filter(|a| missing_paths.contains(a.path))
757                .map(|a| get_path_and_copy(resolver, writer, a)),
758        )
759        .buffer_unordered(threads);
760
761        while let Some(artifact) = fs.next().await {
762            let artifact = artifact?;
763
764            if let Some(ref cb) = progress_cb {
765                cb(PublishEvent::PoolArtifactCreated(
766                    artifact.path.to_string(),
767                    artifact.size,
768                ));
769            }
770        }
771
772        Ok(())
773    }
774
775    async fn expand_index_file_reader<'ifr, 'slf: 'ifr>(
776        &'slf self,
777        mut ifr: IndexFileReader<'ifr>,
778    ) -> Result<Box<dyn Iterator<Item = ExpandedIndexFile> + 'ifr>> {
779        let mut buf = vec![];
780        ifr.reader.read_to_end(&mut buf).await?;
781
782        let mut digester = MultiDigester::default();
783        digester.update(&buf);
784        let digests = digester.finish();
785
786        if self.acquire_by_hash == Some(true) {
787            Ok(Box::new(self.checksums.iter().map(move |checksum| {
788                ExpandedIndexFile {
789                    canonical_path: ifr.canonical_path(),
790                    write_path: ifr.by_hash_path(digests.digest_from_checksum(*checksum)),
791                    digests: digests.clone(),
792                    data: buf.clone(),
793                }
794            })))
795        } else {
796            Ok(Box::new(std::iter::once(ExpandedIndexFile {
797                canonical_path: ifr.canonical_path(),
798                write_path: ifr.canonical_path(),
799                digests,
800                data: buf,
801            })))
802        }
803    }
804
805    /// Derive fields for `Release` files that aren't related to indices lists.
806    fn static_release_fields(&self) -> impl Iterator<Item = ControlField<'_>> {
807        let mut fields: BTreeMap<Cow<'_, str>, Cow<'_, str>> = BTreeMap::new();
808
809        fields.insert(
810            "Components".into(),
811            self.components
812                .iter()
813                .map(|x| x.as_str())
814                .collect::<Vec<_>>()
815                .join(" ")
816                .into(),
817        );
818
819        fields.insert(
820            "Architectures".into(),
821            self.architectures
822                .iter()
823                .map(|x| x.as_str())
824                .collect::<Vec<_>>()
825                .join(" ")
826                .into(),
827        );
828
829        if let Some(suite) = &self.suite {
830            fields.insert("Suite".into(), suite.into());
831        }
832        if let Some(codename) = &self.codename {
833            fields.insert("Codename".into(), codename.into());
834        }
835        if let Some(date) = &self.date {
836            fields.insert(
837                "Date".into(),
838                format!("{}", date.format(DATE_FORMAT)).into(),
839            );
840        }
841        if let Some(valid_until) = &self.valid_until {
842            fields.insert(
843                "Valid-Until".into(),
844                format!("{}", valid_until.format(DATE_FORMAT)).into(),
845            );
846        }
847        if let Some(description) = &self.description {
848            fields.insert("Description".into(), description.into());
849        }
850        if let Some(origin) = &self.origin {
851            fields.insert("Origin".into(), origin.into());
852        }
853        if let Some(label) = &self.label {
854            fields.insert("Label".into(), label.into());
855        }
856        if let Some(version) = &self.version {
857            fields.insert("Version".into(), version.into());
858        }
859        if let Some(acquire_by_hash) = self.acquire_by_hash {
860            fields.insert(
861                "Acquire-By-Hash".into(),
862                if acquire_by_hash { "yes" } else { "no" }.into(),
863            );
864        }
865
866        fields.into_iter().map(|(k, v)| ControlField::new(k, v))
867    }
868
869    /// Derive a [ReleaseFile] representing the content of the `Release` file.
870    ///
871    /// This takes an iterable describing indices files. This iterable is typically derived
872    /// from [Self::index_file_readers()].
873    pub fn create_release_file(
874        &self,
875        indices: impl Iterator<Item = (String, (u64, MultiContentDigest))>,
876    ) -> Result<ReleaseFile<'_>> {
877        let mut para = ControlParagraph::default();
878
879        for field in self.static_release_fields() {
880            para.set_field(field);
881        }
882
883        let mut digests_by_field = HashMap::new();
884
885        for (path, (size, digests)) in indices {
886            for digest in digests.iter_digests() {
887                digests_by_field
888                    .entry(digest.release_field_name())
889                    .or_insert_with(BTreeMap::new)
890                    .insert(path.clone(), (size, digest.digest_hex()));
891            }
892        }
893
894        for checksum in self.checksums.iter() {
895            // We can have no entries if there were no indices.
896            let default = BTreeMap::new();
897            let entries = digests_by_field
898                .get(checksum.field_name())
899                .unwrap_or(&default);
900
901            let longest_path = entries.keys().map(|x| x.len()).max().unwrap_or_default();
902            let longest_size = entries
903                .values()
904                .map(|(size, _)| format!("{}", size).len())
905                .max()
906                .unwrap_or_default();
907
908            para.set_field(ControlField::new(
909                checksum.field_name().into(),
910                std::iter::once("".to_string())
911                    .chain(entries.iter().map(|(path, (size, digest))| {
912                        format!(
913                            " {:<path_width$} {:>size_width$} {}",
914                            path,
915                            size,
916                            digest,
917                            path_width = longest_path,
918                            size_width = longest_size
919                        )
920                    }))
921                    .collect::<Vec<_>>()
922                    .join("\n")
923                    .into(),
924            ));
925        }
926
927        Ok(para.into())
928    }
929
930    /// Publish index files.
931    ///
932    /// Repository index files describe the contents of the repository. Index files are
933    /// referred to by the `InRelease` and `Release` files.
934    ///
935    /// Indices should only be published after pool artifacts are published. Otherwise
936    /// there is a race condition where an index file could refer to a file in the pool
937    /// that does not exist.
938    pub async fn publish_indices<F, PW>(
939        &self,
940        writer: &impl RepositoryWriter,
941        path_prefix: Option<&str>,
942        threads: usize,
943        progress_cb: &Option<F>,
944        signing_key: Option<(&impl SecretKeyTrait, PW)>,
945    ) -> Result<()>
946    where
947        F: Fn(PublishEvent),
948        PW: FnOnce() -> String,
949    {
950        let mut index_paths = BTreeMap::new();
951
952        // This will effectively buffer all indices files in memory. This could be avoided if
953        // we want to limit memory use.
954
955        let mut fs = futures::stream::iter(
956            self.index_file_readers()
957                .map(|ifr| self.expand_index_file_reader(ifr)),
958        )
959        .buffer_unordered(threads);
960
961        let mut iters = vec![];
962
963        while let Some(res) = fs.try_next().await? {
964            for mut eif in res {
965                if let Some(prefix) = path_prefix {
966                    eif.write_path = format!("{}/{}", prefix.trim_matches('/'), eif.write_path);
967                }
968
969                if let Some(cb) = progress_cb {
970                    cb(PublishEvent::IndexFileToWrite(eif.write_path.clone()));
971                }
972
973                index_paths.insert(
974                    eif.canonical_path.clone(),
975                    (eif.data.len() as u64, eif.digests.clone()),
976                );
977
978                iters.push(eif);
979            }
980        }
981
982        let mut fs = futures::stream::iter(iters.into_iter().map(|eif| {
983            writer.write_path(
984                eif.write_path.into(),
985                Box::pin(futures::io::Cursor::new(eif.data)),
986            )
987        }))
988        .buffer_unordered(threads);
989
990        while let Some(write) = fs.try_next().await? {
991            if let Some(cb) = progress_cb {
992                cb(PublishEvent::IndexFileWritten(
993                    write.path.to_string(),
994                    write.bytes_written,
995                ));
996            }
997        }
998
999        // Now with all the indices files written, we can write the `[In]Release` files.
1000
1001        let release = self.create_release_file(index_paths.into_iter())?;
1002
1003        let (release_path, inrelease_path) = if let Some(prefix) = path_prefix {
1004            (
1005                format!("{}/Release", prefix.trim_matches('/')),
1006                format!("{}/InRelease", prefix.trim_matches('/')),
1007            )
1008        } else {
1009            ("Release".to_string(), "InRelease".to_string())
1010        };
1011
1012        if let Some(cb) = progress_cb {
1013            cb(PublishEvent::IndexFileToWrite(release_path.clone()))
1014        }
1015
1016        let release_write = writer
1017            .write_path(
1018                release_path.into(),
1019                Box::pin(futures::io::Cursor::new(release.to_string().into_bytes())),
1020            )
1021            .await?;
1022
1023        if let Some(cb) = progress_cb {
1024            cb(PublishEvent::IndexFileWritten(
1025                release_write.path.to_string(),
1026                release_write.bytes_written,
1027            ));
1028        }
1029
1030        if let Some((key, password)) = signing_key {
1031            let inrelease_content = cleartext_sign(
1032                key,
1033                password,
1034                HashAlgorithm::SHA2_256,
1035                std::io::Cursor::new(release.to_string().as_bytes()),
1036            )?;
1037
1038            if let Some(cb) = progress_cb {
1039                cb(PublishEvent::IndexFileToWrite(inrelease_path.clone()));
1040            }
1041
1042            let inrelease_write = writer
1043                .write_path(
1044                    inrelease_path.into(),
1045                    Box::pin(futures::io::Cursor::new(inrelease_content.into_bytes())),
1046                )
1047                .await?;
1048
1049            if let Some(cb) = progress_cb {
1050                cb(PublishEvent::IndexFileWritten(
1051                    inrelease_write.path.to_string(),
1052                    inrelease_write.bytes_written,
1053                ));
1054            }
1055        }
1056
1057        Ok(())
1058    }
1059
1060    /// Publish the repository to the given [RepositoryWriter].
1061    ///
1062    /// This is the main function for *writing out* the desired state in this builder.
1063    ///
1064    /// Publishing effectively works in 3 phases:
1065    ///
1066    /// 1. Publish missing pool artifacts.
1067    /// 2. Publish *indices* files (e.g. `Packages` lists).
1068    /// 3. Publish the `InRelease` and `Release` file.
1069    ///
1070    /// `writer` is a [RepositoryWriter] used to perform I/O for writing output files.
1071    /// `resolver` is a [DataResolver] for resolving pool paths. It will be consulted
1072    /// to obtain paths of `.deb` and other pool files.
1073    /// `distribution_path` is the relative path under `writer` to write indices files
1074    /// under. It typically begins with `dists/`. e.g. `dists/bullseye`. This value
1075    /// becomes the directory with the generated `InRelease` file.
1076    /// `threads` is the number of parallel threads to use for I/O.
1077    /// `progress_cb` provides an optional function to receive progress updates.
1078    /// `signing_key` provides a signing key for PGP signing and an optional function to
1079    /// obtain the password to unlock that key.
1080    ///
1081    /// To set `progress_cb` or `signing_key` to `None`, you'll need to use the turbofish
1082    /// operator to specify the type. e.g. `&Option<fn(PublishEvent)>::None` for `progress_cb`
1083    /// and `Option::<(&pgp::SignedSecretKey, fn() -> String)>::None` for `signing_key`.
1084    /// Alternatively, use the `NO_PROGRESS_CB` or `NO_SIGNING_KEY` module constants to avoid
1085    /// some typing.
1086    pub async fn publish<F, PW>(
1087        &self,
1088        writer: &impl RepositoryWriter,
1089        resolver: &impl DataResolver,
1090        distribution_path: &str,
1091        threads: usize,
1092        progress_cb: &Option<F>,
1093        signing_key: Option<(&impl SecretKeyTrait, PW)>,
1094    ) -> Result<()>
1095    where
1096        F: Fn(PublishEvent),
1097        PW: FnOnce() -> String,
1098    {
1099        self.publish_pool_artifacts(resolver, writer, threads, progress_cb)
1100            .await?;
1101
1102        self.publish_indices(
1103            writer,
1104            Some(distribution_path),
1105            threads,
1106            progress_cb,
1107            signing_key,
1108        )
1109        .await?;
1110
1111        Ok(())
1112    }
1113}
1114
1115async fn get_path_and_copy<'a, 'b>(
1116    resolver: &impl DataResolver,
1117    writer: &impl RepositoryWriter,
1118    artifact: &'a BinaryPackagePoolArtifact<'b>,
1119) -> Result<&'a BinaryPackagePoolArtifact<'b>> {
1120    // It would be slightly more defensive to plug in the content validator
1121    // explicitly here. However, the API contract is a contract. Let's let
1122    // implementations shoot themselves in the foot.
1123    let reader = resolver
1124        .get_path_with_digest_verification(artifact.path, artifact.size, artifact.digest.clone())
1125        .await?;
1126
1127    writer.write_path(artifact.path.into(), reader).await?;
1128
1129    Ok(artifact)
1130}
1131
1132#[cfg(test)]
1133mod test {
1134    #[cfg(feature = "http")]
1135    use crate::repository::http::HttpRepositoryClient;
1136    use {
1137        super::*,
1138        crate::{
1139            io::PathMappingDataResolver,
1140            repository::{
1141                RepositoryPathVerification, RepositoryPathVerificationState, RepositoryRootReader,
1142                RepositoryWrite,
1143            },
1144            signing_key::{create_self_signed_key, signing_secret_key_params_builder},
1145        },
1146        async_trait::async_trait,
1147        futures::AsyncReadExt,
1148        std::borrow::Cow,
1149    };
1150
1151    const BULLSEYE_URL: &str = "http://snapshot.debian.org/archive/debian/20211120T085721Z";
1152
1153    #[derive(Default)]
1154    struct CapturingWriter {
1155        paths: std::sync::Mutex<HashMap<String, Vec<u8>>>,
1156    }
1157
1158    impl CapturingWriter {
1159        fn get_path(&self, path: impl ToString) -> Option<Vec<u8>> {
1160            self.paths.lock().unwrap().get(&path.to_string()).cloned()
1161        }
1162    }
1163
1164    #[async_trait]
1165    impl RepositoryWriter for CapturingWriter {
1166        async fn verify_path<'path>(
1167            &self,
1168            path: &'path str,
1169            _expected_content: Option<(u64, ContentDigest)>,
1170        ) -> Result<RepositoryPathVerification<'path>> {
1171            Ok(RepositoryPathVerification {
1172                path,
1173                state: RepositoryPathVerificationState::Missing,
1174            })
1175        }
1176
1177        async fn write_path<'path, 'reader>(
1178            &self,
1179            path: Cow<'path, str>,
1180            reader: Pin<Box<dyn AsyncRead + Send + 'reader>>,
1181        ) -> Result<RepositoryWrite<'path>> {
1182            let mut writer = futures::io::Cursor::new(Vec::<u8>::new());
1183
1184            let bytes_written = futures::io::copy(reader, &mut writer)
1185                .await
1186                .map_err(|e| DebianError::RepositoryIoPath(path.to_string(), e))?;
1187
1188            self.paths
1189                .lock()
1190                .unwrap()
1191                .insert(path.to_string(), writer.into_inner());
1192
1193            Ok(RepositoryWrite {
1194                path,
1195                bytes_written,
1196            })
1197        }
1198    }
1199
1200    #[test]
1201    fn pool_layout_paths() {
1202        let layout = PoolLayout::ComponentThenNamePrefix;
1203
1204        assert_eq!(
1205            layout.path("main", "python3.9", "python3.9_3.9.9-1_arm64.deb"),
1206            "pool/main/p/python3.9/python3.9_3.9.9-1_arm64.deb"
1207        );
1208        assert_eq!(
1209            layout.path("main", "libzstd", "zstd_1.4.8+dfsg-2.1_amd64.deb"),
1210            "pool/main/libz/libzstd/zstd_1.4.8+dfsg-2.1_amd64.deb"
1211        );
1212    }
1213
1214    #[tokio::test]
1215    #[cfg(feature = "http")]
1216    async fn bullseye_binary_packages_reader() -> Result<()> {
1217        let root = HttpRepositoryClient::new(BULLSEYE_URL).unwrap();
1218        let release = root.release_reader("bullseye").await.unwrap();
1219
1220        let packages = release
1221            .resolve_packages("main", "amd64", false)
1222            .await
1223            .unwrap();
1224
1225        let mut builder = RepositoryBuilder::new_recommended(
1226            ["all", "amd64"].iter(),
1227            ["main"].iter(),
1228            "suite",
1229            "codename",
1230        );
1231
1232        let mut mapping_resolver = PathMappingDataResolver::new(root);
1233
1234        // Cap total work by limiting packages examined.
1235        for package in packages
1236            .iter()
1237            .filter(|cf| {
1238                if let Some(Ok(size)) = cf.size() {
1239                    size < 1000000
1240                } else {
1241                    false
1242                }
1243            })
1244            .take(10)
1245        {
1246            let dest_filename = builder.add_binary_deb("main", package)?;
1247
1248            let source_filename = package.field_str("Filename").unwrap();
1249
1250            mapping_resolver.add_path_map(dest_filename, source_filename);
1251        }
1252
1253        let pool_artifacts = builder
1254            .iter_binary_packages_pool_artifacts()
1255            .collect::<Result<Vec<_>>>()?;
1256        assert_eq!(pool_artifacts.len(), 10);
1257
1258        let mut entries = builder.binary_packages_index_readers().collect::<Vec<_>>();
1259        assert_eq!(entries.len(), 6);
1260        assert!(entries
1261            .iter()
1262            .all(|entry| entry.canonical_path().starts_with("main/binary-")));
1263
1264        for entry in entries.iter_mut() {
1265            let mut buf = vec![];
1266            entry.reader.read_to_end(&mut buf).await.unwrap();
1267        }
1268
1269        let writer = CapturingWriter::default();
1270
1271        let cb = |event| {
1272            eprintln!("{}", event);
1273        };
1274
1275        let passwd_fn = String::new;
1276        let signed_secret_key = create_self_signed_key(
1277            signing_secret_key_params_builder("Me <someone@example.com>")
1278                .build()
1279                .unwrap(),
1280            passwd_fn,
1281        )
1282        .unwrap()
1283        .0;
1284
1285        builder
1286            .publish(
1287                &writer,
1288                &mapping_resolver,
1289                "dists/mydist",
1290                10,
1291                &Some(cb),
1292                Some((&signed_secret_key, passwd_fn)),
1293            )
1294            .await?;
1295
1296        let wanted_paths = ["dists/mydist/Release", "dists/mydist/InRelease"];
1297
1298        assert!(wanted_paths.iter().all(|path| writer
1299            .paths
1300            .lock()
1301            .unwrap()
1302            .contains_key(&path.to_string())));
1303
1304        let release = ReleaseFile::from_armored_reader(std::io::Cursor::new(
1305            writer.get_path("dists/mydist/InRelease").unwrap(),
1306        ))
1307        .unwrap();
1308
1309        let signatures = release
1310            .signatures()
1311            .expect("PGP signatures should have been parsed");
1312        assert_eq!(
1313            signatures
1314                .iter_signatures_from_key(&signed_secret_key)
1315                .count(),
1316            1
1317        );
1318
1319        signatures.verify(&signed_secret_key).unwrap();
1320
1321        Ok(())
1322    }
1323}
1324
1325#[cfg(test)]
1326mod tests {
1327    use {
1328        super::*,
1329        crate::{
1330            repository::{filesystem::FilesystemRepositoryWriter, reader_from_str},
1331            signing_key::{create_self_signed_key, signing_secret_key_params_builder},
1332        },
1333        tempfile::TempDir,
1334    };
1335
1336    fn temp_dir() -> Result<TempDir> {
1337        Ok(tempfile::Builder::new()
1338            .prefix("debian-packaging-test-")
1339            .tempdir()?)
1340    }
1341
1342    #[tokio::test]
1343    async fn publish_empty() -> Result<()> {
1344        let td = temp_dir()?;
1345
1346        let mut builder = RepositoryBuilder::new_recommended(
1347            ["amd64"].into_iter(),
1348            ["main"].into_iter(),
1349            "suite",
1350            "codename",
1351        );
1352
1353        builder.set_description("description");
1354        builder.set_version("1");
1355
1356        let writer = FilesystemRepositoryWriter::new(td.path());
1357
1358        let key_params = signing_secret_key_params_builder("someone@example.com")
1359            .build()
1360            .unwrap();
1361        let key = create_self_signed_key(key_params, String::new)?.0;
1362
1363        builder
1364            .publish_indices(
1365                &writer,
1366                Some("dists/dist"),
1367                1,
1368                &NO_PROGRESS_CB,
1369                Some((&key, String::new)),
1370            )
1371            .await?;
1372
1373        let reader = reader_from_str(format!("file://{}", td.path().display()))?;
1374
1375        let release_reader = reader.release_reader("dist").await?;
1376
1377        let indices = release_reader.classified_indices_entries()?;
1378        assert!(indices.is_empty());
1379
1380        Ok(())
1381    }
1382}