libcoreinst/
miniso.rs

1// Copyright 2021 Red Hat, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::fs::File;
17use std::io::{copy, Read, Seek, SeekFrom, Write};
18
19use anyhow::{bail, Context, Result};
20use bincode::Options;
21use clap::crate_version;
22use serde::{Deserialize, Serialize};
23use xz2::read::XzDecoder;
24use xz2::write::XzEncoder;
25
26use crate::io::*;
27use crate::iso9660;
28
29/// Magic header value for miniso data file.
30const HEADER_MAGIC: [u8; 8] = *b"MINISO\0\0";
31
32/// Basic versioning. Used as a safety check that we're unpacking a miniso data file we understand.
33/// Bump this when making changes to the format.
34const HEADER_VERSION: u32 = 1;
35
36/// Maximum size of miniso data file we'll agree to deserialize. FCOS is currently
37/// at 2892 bytes, so this is generous.
38const DATA_MAX_SIZE: u64 = 1024 * 1024;
39
40#[derive(Serialize, Deserialize, Debug)]
41struct Table {
42    entries: Vec<TableEntry>,
43}
44
45impl Table {
46    fn new(
47        full_files: &HashMap<String, iso9660::File>,
48        minimal_files: &HashMap<String, iso9660::File>,
49    ) -> Result<(Self, usize)> {
50        let mut entries: Vec<TableEntry> = Vec::new();
51        for (path, minimal_entry) in minimal_files {
52            let full_entry = full_files
53                .get(path)
54                .with_context(|| format!("missing minimal file {path} in full ISO"))?;
55            if full_entry.length != minimal_entry.length {
56                bail!("File {path} has different lengths in full and minimal ISOs");
57            }
58            entries.push(TableEntry {
59                minimal: minimal_entry.address,
60                full: full_entry.address,
61                length: full_entry.length,
62            });
63        }
64
65        entries.sort_by_key(|e| e.minimal.as_sector());
66        // drop zero-length files (which can overlap with other files) and
67        // duplicate entries (hardlinks), and calculate how many there were
68        // for reporting
69        let size = entries.len();
70        entries = entries.drain(..).filter(|e| e.length > 0).collect();
71        entries.dedup();
72        let extraneous = size - entries.len();
73        let table = Table { entries };
74        table.validate().context("validating table")?;
75        Ok((table, extraneous))
76    }
77
78    fn validate(&self) -> Result<()> {
79        let n = self.entries.len();
80        if n == 0 {
81            bail!("table is empty; ISOs have no files in common?");
82        }
83        for (e, next_e) in self.entries[..n - 1].iter().zip(self.entries[1..n].iter()) {
84            if e.minimal.as_offset() + e.length as u64 > next_e.minimal.as_offset() {
85                bail!(
86                    "Files at offsets {} and {} overlap",
87                    e.minimal.as_offset(),
88                    next_e.minimal.as_offset(),
89                );
90            }
91        }
92
93        Ok(())
94    }
95}
96
97#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
98struct TableEntry {
99    minimal: iso9660::Address,
100    full: iso9660::Address,
101    length: u32,
102}
103
104// Version-agnostic header. Frozen.
105#[derive(Serialize, Deserialize, Debug)]
106struct Header {
107    magic: [u8; 8],
108    version: u32,
109    /// For informational purposes only.
110    app_version: String,
111}
112
113impl Default for Header {
114    fn default() -> Self {
115        Self {
116            magic: HEADER_MAGIC,
117            version: HEADER_VERSION,
118            app_version: crate_version!().into(),
119        }
120    }
121}
122
123impl Header {
124    pub fn validate(&self) -> Result<()> {
125        if self.magic != HEADER_MAGIC {
126            bail!("not a miniso file!");
127        }
128        if self.version != HEADER_VERSION {
129            bail!(
130                "incompatible miniso file version: {} vs {} (created by {})",
131                HEADER_VERSION,
132                self.version,
133                self.app_version,
134            );
135        }
136        Ok(())
137    }
138}
139
140// Version-specific payload. Evolvable.
141#[derive(Serialize, Deserialize, Debug)]
142pub struct Data {
143    table: Table,
144    digest: Sha256Digest,
145    xzpacked: Vec<u8>,
146}
147
148impl Data {
149    pub fn xzpack(
150        miniso: &mut File,
151        full_files: &HashMap<String, iso9660::File>,
152        minimal_files: &HashMap<String, iso9660::File>,
153    ) -> Result<(Self, usize, u64, u64, u64)> {
154        let (table, extraneous) = Table::new(full_files, minimal_files)?;
155
156        // A `ReadHasher` here would let us wrap the miniso so we calculate the digest as we read.
157        let digest = Sha256Digest::from_file(miniso)?;
158        miniso.rewind().context("seeking back to miniso start")?;
159        let mut offset = 0;
160
161        let mut xzw = XzEncoder::new(Vec::new(), 9);
162        let mut buf = [0u8; BUFFER_SIZE];
163        let mut skipped: u64 = 0;
164        for entry in &table.entries {
165            let addr: u64 = entry.minimal.as_offset();
166            assert!(offset <= addr);
167            if addr > offset {
168                copy_exactly_n(miniso, &mut xzw, addr - offset, &mut buf).with_context(|| {
169                    format!(
170                        "copying {} miniso bytes at offset {}",
171                        addr - offset,
172                        offset
173                    )
174                })?;
175            }
176            // I tested trying to be smarter here and rounding to the nearest 2k block so we can
177            // skip padding, but zeroes compress so well that it only saved a grand total of 4
178            // bytes after xz. So not worth the complexity.
179            offset = miniso
180                .seek(SeekFrom::Current(entry.length as i64))
181                .with_context(|| format!("skipping miniso file at offset {addr}"))?;
182            skipped += entry.length as u64;
183        }
184
185        copy(miniso, &mut xzw).context("copying remaining miniso bytes")?;
186
187        xzw.try_finish().context("trying to finish xz stream")?;
188        let matches = table.entries.len() + extraneous;
189        let written = xzw.total_in();
190        let written_compressed = xzw.total_out();
191        Ok((
192            Self {
193                table,
194                digest,
195                xzpacked: xzw.finish().context("finishing xz stream")?,
196            },
197            matches,
198            skipped,
199            written,
200            written_compressed,
201        ))
202    }
203
204    pub fn serialize(&self, w: impl Write) -> Result<()> {
205        let mut limiter = LimitWriter::new(w, DATA_MAX_SIZE, "data size limit".into());
206
207        let header = Header::default();
208        let coder = &mut bincoder();
209        coder
210            .serialize_into(&mut limiter, &header)
211            .context("failed to serialize header")?;
212        coder
213            .serialize_into(&mut limiter, &self)
214            .context("failed to serialize data")?;
215
216        Ok(())
217    }
218
219    pub fn deserialize(r: impl Read) -> Result<Self> {
220        let mut limiter = LimitReader::new(r, DATA_MAX_SIZE, "data size limit".into());
221
222        let coder = &mut bincoder();
223        let header: Header = coder
224            .deserialize_from(&mut limiter)
225            .context("failed to deserialize header")?;
226        header.validate().context("validating header")?;
227
228        let data: Self = coder
229            .deserialize_from(&mut limiter)
230            .context("failed to deserialize data")?;
231        data.table.validate().context("validating table")?;
232
233        Ok(data)
234    }
235
236    pub fn unxzpack(&self, fulliso: &mut File, w: impl Write) -> Result<()> {
237        let mut xzr = XzDecoder::new(self.xzpacked.as_slice());
238        let mut w = WriteHasher::new_sha256(w)?;
239        let mut buf = [0u8; BUFFER_SIZE];
240        let mut offset = 0;
241        for entry in &self.table.entries {
242            let minimal_addr = entry.minimal.as_offset();
243            let fulliso_addr = entry.full.as_offset();
244            if minimal_addr > offset {
245                offset += copy_exactly_n(&mut xzr, &mut w, minimal_addr - offset, &mut buf)
246                    .with_context(|| {
247                        format!(
248                            "copying {} packed bytes at offset {}",
249                            minimal_addr - offset,
250                            offset
251                        )
252                    })?;
253            }
254            fulliso
255                .seek(SeekFrom::Start(fulliso_addr))
256                .with_context(|| format!("seeking to full ISO file at offset {fulliso_addr}"))?;
257            offset += copy_exactly_n(fulliso, &mut w, entry.length as u64, &mut buf)
258                .with_context(|| format!("copying full ISO file at offset {fulliso_addr}"))?;
259        }
260
261        copy(&mut xzr, &mut w).context("copying remaining packed bytes")?;
262        let digest = w.try_into()?;
263        if self.digest != digest {
264            bail!(
265                "wrong final digest: expected {}, found {}",
266                self.digest.to_hex_string()?,
267                digest.to_hex_string()?
268            );
269        }
270
271        Ok(())
272    }
273}