git_chunk/file/
write.rs

1use crate::file::{index::Entry, Index};
2
3mod write_chunk {
4    use std::collections::VecDeque;
5
6    use crate::file::index;
7
8    /// A [`Write`][std::io::Write] implementation that validates chunk sizes while allowing the user to know
9    /// which chunk is to be written next.
10    pub struct Chunk<W> {
11        chunks_to_write: VecDeque<index::Entry>,
12        inner: W,
13        next_chunk: Option<index::Entry>,
14        written_bytes: usize,
15    }
16
17    impl<W> Chunk<W>
18    where
19        W: std::io::Write,
20    {
21        pub(crate) fn new(out: W, chunks: VecDeque<index::Entry>) -> Chunk<W>
22        where
23            W: std::io::Write,
24        {
25            Chunk {
26                chunks_to_write: chunks,
27                inner: out,
28                next_chunk: None,
29                written_bytes: 0,
30            }
31        }
32    }
33
34    impl<W> std::io::Write for Chunk<W>
35    where
36        W: std::io::Write,
37    {
38        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
39            let written = self.inner.write(buf)?;
40            self.written_bytes += written;
41            Ok(written)
42        }
43
44        fn flush(&mut self) -> std::io::Result<()> {
45            self.inner.flush()
46        }
47    }
48
49    impl<W> Chunk<W> {
50        /// Return the inner writer - should only be called once there is no more chunk to write.
51        pub fn into_inner(self) -> W {
52            self.inner
53        }
54        /// Return the next chunk-id to write, if there is one.
55        pub fn next_chunk(&mut self) -> Option<crate::Id> {
56            if let Some(entry) = self.next_chunk.take() {
57                assert_eq!(
58                    entry.offset.end,
59                    self.written_bytes as u64,
60                    "BUG: expected to write {} bytes, but only wrote {} for chunk {:?}",
61                    entry.offset.end,
62                    self.written_bytes,
63                    std::str::from_utf8(&entry.kind)
64                )
65            }
66            self.written_bytes = 0;
67            self.next_chunk = self.chunks_to_write.pop_front();
68            self.next_chunk.as_ref().map(|e| e.kind)
69        }
70    }
71}
72pub use write_chunk::Chunk;
73
74/// Writing
75impl Index {
76    /// Create a new index whose sole purpose is to be receiving chunks using [`plan_chunk()`][Index::plan_chunk()] and to be written to
77    /// an output using [`into_write()`][Index::into_write()]
78    pub fn for_writing() -> Self {
79        Index {
80            will_write: true,
81            chunks: Vec::new(),
82        }
83    }
84    /// Plan to write a new chunk as part of the index when [`into_write()`][Index::into_write()] is called.
85    pub fn plan_chunk(&mut self, chunk: crate::Id, exact_size_on_disk: u64) {
86        assert!(self.will_write, "BUG: create the index with `for_writing()`");
87        assert!(
88            !self.chunks.iter().any(|e| e.kind == chunk),
89            "BUG: must not add chunk of same kind twice: {:?}",
90            std::str::from_utf8(&chunk)
91        );
92        self.chunks.push(Entry {
93            kind: chunk,
94            offset: 0..exact_size_on_disk,
95        })
96    }
97
98    /// Return the total size of all planned chunks thus far.
99    pub fn planned_storage_size(&self) -> u64 {
100        assert!(self.will_write, "BUG: create the index with `for_writing()`");
101        self.chunks.iter().map(|e| e.offset.end).sum()
102    }
103
104    /// Return the amount of chunks we currently know.
105    pub fn num_chunks(&self) -> usize {
106        self.chunks.len()
107    }
108
109    /// After [planning all chunks][Index::plan_chunk()] call this method with the destination to write the chunks to.
110    /// Use the [Chunk] writer to write each chunk in order.
111    /// `current_offset` is the byte position at which `out` will continue writing.
112    pub fn into_write<W>(self, mut out: W, current_offset: usize) -> std::io::Result<Chunk<W>>
113    where
114        W: std::io::Write,
115    {
116        assert!(
117            self.will_write,
118            "BUG: create the index with `for_writing()`, cannot write decoded indices"
119        );
120        // First chunk starts past the table of contents
121        let mut current_offset = (current_offset + Self::size_for_entries(self.num_chunks())) as u64;
122
123        for entry in &self.chunks {
124            out.write_all(&entry.kind)?;
125            out.write_all(&current_offset.to_be_bytes())?;
126
127            current_offset += entry.offset.end;
128        }
129
130        // sentinel to mark end of chunks
131        out.write_all(&0u32.to_be_bytes())?;
132        out.write_all(&current_offset.to_be_bytes())?;
133
134        Ok(Chunk::new(out, self.chunks.into()))
135    }
136}