Skip to main content

gix_pack/data/input/
entries_to_bytes.rs

1use std::iter::Peekable;
2
3use crate::data::input;
4
5/// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time
6/// `next()` is called.
7///
8/// It is able to deal with an unknown amount of objects as it will rewrite the pack header once the entries iterator
9/// is depleted and compute the hash in one go by re-reading the whole file.
10pub struct EntriesToBytesIter<I: Iterator, W> {
11    /// An iterator for input [`input::Entry`] instances
12    pub input: Peekable<I>,
13    /// A way of writing encoded bytes.
14    output: W,
15    /// Our trailing hash when done writing all input entries
16    trailer: Option<gix_hash::ObjectId>,
17    /// The amount of objects in the iteration and the version of the packfile to be written.
18    /// Will be `None` to signal the header was written already.
19    data_version: crate::data::Version,
20    /// The amount of entries seen so far
21    num_entries: u32,
22    /// If we are done, no additional writes will occur
23    is_done: bool,
24    /// The kind of hash to use for the digest
25    object_hash: gix_hash::Kind,
26}
27
28impl<I, W> EntriesToBytesIter<I, W>
29where
30    I: Iterator<Item = Result<input::Entry, input::Error>>,
31    W: std::io::Read + std::io::Write + std::io::Seek,
32{
33    /// Create a new instance reading [entries][input::Entry] from an `input` iterator and write pack data bytes to
34    /// `output` writer, resembling a pack of `version`. The amount of entries will be dynamically determined and
35    /// the pack is completed once the last entry was written.
36    /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
37    ///
38    /// # Panics
39    ///
40    /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors.
41    pub fn new(input: I, output: W, version: crate::data::Version, object_hash: gix_hash::Kind) -> Self {
42        assert!(
43            matches!(version, crate::data::Version::V2),
44            "currently only pack version 2 can be written",
45        );
46        assert!(
47            matches!(object_hash, gix_hash::Kind::Sha1),
48            "currently only Sha1 is supported, right now we don't know how other hashes are encoded",
49        );
50        EntriesToBytesIter {
51            input: input.peekable(),
52            output,
53            object_hash,
54            num_entries: 0,
55            trailer: None,
56            data_version: version,
57            is_done: false,
58        }
59    }
60
61    /// Returns the trailing hash over all ~ entries once done.
62    /// It's `None` if we are not yet done writing.
63    pub fn digest(&self) -> Option<gix_hash::ObjectId> {
64        self.trailer
65    }
66
67    fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, gix_hash::io::Error> {
68        if self.num_entries == 0 {
69            let header_bytes = crate::data::header::encode(self.data_version, 0);
70            self.output.write_all(&header_bytes[..])?;
71        }
72        self.num_entries += 1;
73        entry.header.write_to(entry.decompressed_size, &mut self.output)?;
74        self.output.write_all(
75            entry
76                .compressed
77                .as_deref()
78                .expect("caller must configure generator to keep compressed bytes"),
79        )?;
80        Ok(entry)
81    }
82
83    fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), gix_hash::io::Error> {
84        let header_bytes = crate::data::header::encode(self.data_version, self.num_entries);
85        let num_bytes_written = if last_entry.is_some() {
86            self.output.stream_position()?
87        } else {
88            header_bytes.len() as u64
89        };
90        self.output.rewind()?;
91        self.output.write_all(&header_bytes[..])?;
92        self.output.flush()?;
93
94        self.output.rewind()?;
95        let interrupt_never = std::sync::atomic::AtomicBool::new(false);
96        let digest = gix_hash::bytes(
97            &mut self.output,
98            num_bytes_written,
99            self.object_hash,
100            &mut gix_features::progress::Discard,
101            &interrupt_never,
102        )?;
103        self.output.write_all(digest.as_slice())?;
104        self.output.flush()?;
105
106        self.is_done = true;
107        if let Some(last_entry) = last_entry {
108            last_entry.trailer = Some(digest);
109        }
110        self.trailer = Some(digest);
111        Ok(())
112    }
113}
114
115impl<I, W> Iterator for EntriesToBytesIter<I, W>
116where
117    I: Iterator<Item = Result<input::Entry, input::Error>>,
118    W: std::io::Read + std::io::Write + std::io::Seek,
119{
120    /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input.
121    type Item = Result<input::Entry, input::Error>;
122
123    fn next(&mut self) -> Option<Self::Item> {
124        if self.is_done {
125            return None;
126        }
127
128        match self.input.next() {
129            Some(res) => Some(match res {
130                Ok(entry) => self
131                    .next_inner(entry)
132                    .and_then(|mut entry| {
133                        if self.input.peek().is_none() {
134                            self.write_header_and_digest(Some(&mut entry)).map(|_| entry)
135                        } else {
136                            Ok(entry)
137                        }
138                    })
139                    .map_err(input::Error::from),
140                Err(err) => {
141                    self.is_done = true;
142                    Err(err)
143                }
144            }),
145            None => match self.write_header_and_digest(None) {
146                Ok(_) => None,
147                Err(err) => Some(Err(err.into())),
148            },
149        }
150    }
151
152    fn size_hint(&self) -> (usize, Option<usize>) {
153        self.input.size_hint()
154    }
155}