git_odb/store_impls/loose/
find.rs

1use std::{cmp::Ordering, collections::HashSet, fs, io::Read, path::PathBuf};
2
3use git_features::zlib;
4
5use crate::store_impls::loose::{hash_path, Store, HEADER_MAX_SIZE};
6
7/// Returned by [`Store::try_find()`]
8#[derive(thiserror::Error, Debug)]
9#[allow(missing_docs)]
10pub enum Error {
11    #[error("decompression of loose object at '{path}' failed")]
12    DecompressFile {
13        source: zlib::inflate::Error,
14        path: PathBuf,
15    },
16    #[error("file at '{path}' showed invalid size of inflated data, expected {expected}, got {actual}")]
17    SizeMismatch {
18        actual: usize,
19        expected: usize,
20        path: PathBuf,
21    },
22    #[error(transparent)]
23    Decode(#[from] git_object::decode::LooseHeaderDecodeError),
24    #[error("Could not {action} data at '{path}'")]
25    Io {
26        source: std::io::Error,
27        action: &'static str,
28        path: PathBuf,
29    },
30}
31
32/// Object lookup
33impl Store {
34    const OPEN_ACTION: &'static str = "open";
35
36    /// Returns true if the given id is contained in our repository.
37    pub fn contains(&self, id: impl AsRef<git_hash::oid>) -> bool {
38        debug_assert_eq!(self.object_hash, id.as_ref().kind());
39        hash_path(id.as_ref(), self.path.clone()).is_file()
40    }
41
42    /// Given a `prefix`, find an object that matches it uniquely within this loose object
43    /// database as `Ok(Some(Ok(<oid>)))`.
44    /// If there is more than one object matching the object `Ok(Some(Err(()))` is returned.
45    ///
46    /// Finally, if no object matches, the return value is `Ok(None)`.
47    ///
48    /// The outer `Result` is to indicate errors during file system traversal.
49    ///
50    /// Pass `candidates` to obtain the set of all object ids matching `prefix`, with the same return value as
51    /// one would have received if it remained `None`.
52    pub fn lookup_prefix(
53        &self,
54        prefix: git_hash::Prefix,
55        mut candidates: Option<&mut HashSet<git_hash::ObjectId>>,
56    ) -> Result<Option<crate::store::prefix::lookup::Outcome>, crate::loose::iter::Error> {
57        let single_directory_iter = crate::loose::Iter {
58            inner: git_features::fs::walkdir_new(
59                self.path.join(prefix.as_oid().to_hex_with_len(2).to_string()),
60                git_features::fs::walkdir::Parallelism::Serial,
61            )
62            .min_depth(1)
63            .max_depth(1)
64            .follow_links(false)
65            .into_iter(),
66            hash_hex_len: prefix.as_oid().kind().len_in_hex(),
67        };
68        let mut candidate = None;
69        for oid in single_directory_iter {
70            let oid = match oid {
71                Ok(oid) => oid,
72                Err(err) => {
73                    return match err.io_error() {
74                        Some(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
75                        None | Some(_) => Err(err),
76                    }
77                }
78            };
79            if prefix.cmp_oid(&oid) == Ordering::Equal {
80                match &mut candidates {
81                    Some(candidates) => {
82                        candidates.insert(oid);
83                    }
84                    None => {
85                        if candidate.is_some() {
86                            return Ok(Some(Err(())));
87                        }
88                        candidate = Some(oid);
89                    }
90                }
91            }
92        }
93
94        match &mut candidates {
95            Some(candidates) => match candidates.len() {
96                0 => Ok(None),
97                1 => Ok(candidates.iter().next().cloned().map(Ok)),
98                _ => Ok(Some(Err(()))),
99            },
100            None => Ok(candidate.map(Ok)),
101        }
102    }
103
104    /// Return the object identified by the given [`ObjectId`][git_hash::ObjectId] if present in this database,
105    /// writing its raw data into the given `out` buffer.
106    ///
107    /// Returns `Err` if there was an error locating or reading the object. Returns `Ok<None>` if
108    /// there was no such object.
109    pub fn try_find<'a>(
110        &self,
111        id: impl AsRef<git_hash::oid>,
112        out: &'a mut Vec<u8>,
113    ) -> Result<Option<git_object::Data<'a>>, Error> {
114        debug_assert_eq!(self.object_hash, id.as_ref().kind());
115        match self.find_inner(id.as_ref(), out) {
116            Ok(obj) => Ok(Some(obj)),
117            Err(err) => match err {
118                Error::Io {
119                    source: err,
120                    action,
121                    path,
122                } => {
123                    if action == Self::OPEN_ACTION && err.kind() == std::io::ErrorKind::NotFound {
124                        Ok(None)
125                    } else {
126                        Err(Error::Io {
127                            source: err,
128                            action,
129                            path,
130                        })
131                    }
132                }
133                err => Err(err),
134            },
135        }
136    }
137
138    /// Return only the decompressed size of the object and its kind without fully reading it into memory as tuple of `(size, kind)`.
139    /// Returns `None` if `id` does not exist in the database.
140    pub fn try_header(&self, id: impl AsRef<git_hash::oid>) -> Result<Option<(usize, git_object::Kind)>, Error> {
141        const BUF_SIZE: usize = 256;
142        let mut buf = [0_u8; BUF_SIZE];
143        let path = hash_path(id.as_ref(), self.path.clone());
144
145        let mut inflate = zlib::Inflate::default();
146        let mut istream = match fs::File::open(&path) {
147            Ok(f) => f,
148            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
149            Err(err) => {
150                return Err(Error::Io {
151                    source: err,
152                    action: Self::OPEN_ACTION,
153                    path,
154                })
155            }
156        };
157
158        let (compressed_buf, _) = buf.split_at_mut(BUF_SIZE - HEADER_MAX_SIZE);
159        let bytes_read = istream.read(compressed_buf).map_err(|e| Error::Io {
160            source: e,
161            action: "read",
162            path: path.to_owned(),
163        })?;
164        let (compressed_buf, header_buf) = buf.split_at_mut(bytes_read);
165        let (status, _consumed_in, consumed_out) =
166            inflate
167                .once(compressed_buf, header_buf)
168                .map_err(|e| Error::DecompressFile {
169                    source: e,
170                    path: path.to_owned(),
171                })?;
172
173        if status == zlib::Status::BufError {
174            return Err(Error::DecompressFile {
175                source: zlib::inflate::Error::Status(status),
176                path,
177            });
178        }
179        let (kind, size, _header_size) = git_object::decode::loose_header(&header_buf[..consumed_out])?;
180        Ok(Some((size, kind)))
181    }
182
183    fn find_inner<'a>(&self, id: &git_hash::oid, buf: &'a mut Vec<u8>) -> Result<git_object::Data<'a>, Error> {
184        let path = hash_path(id, self.path.clone());
185
186        let mut inflate = zlib::Inflate::default();
187        let ((status, consumed_in, consumed_out), bytes_read) = {
188            let mut istream = fs::File::open(&path).map_err(|e| Error::Io {
189                source: e,
190                action: Self::OPEN_ACTION,
191                path: path.to_owned(),
192            })?;
193
194            buf.clear();
195            let bytes_read = istream.read_to_end(buf).map_err(|e| Error::Io {
196                source: e,
197                action: "read",
198                path: path.to_owned(),
199            })?;
200            buf.resize(bytes_read + HEADER_MAX_SIZE, 0);
201            let (input, output) = buf.split_at_mut(bytes_read);
202            (
203                inflate
204                    .once(&input[..bytes_read], output)
205                    .map_err(|e| Error::DecompressFile {
206                        source: e,
207                        path: path.to_owned(),
208                    })?,
209                bytes_read,
210            )
211        };
212        if status == zlib::Status::BufError {
213            return Err(Error::DecompressFile {
214                source: zlib::inflate::Error::Status(status),
215                path,
216            });
217        }
218
219        let decompressed_start = bytes_read;
220        let (kind, size, header_size) =
221            git_object::decode::loose_header(&buf[decompressed_start..decompressed_start + consumed_out])?;
222
223        if status == zlib::Status::StreamEnd {
224            let decompressed_body_bytes_sans_header =
225                decompressed_start + header_size..decompressed_start + consumed_out;
226
227            if consumed_out != size + header_size {
228                return Err(Error::SizeMismatch {
229                    expected: size + header_size,
230                    actual: consumed_out,
231                    path,
232                });
233            }
234            buf.copy_within(decompressed_body_bytes_sans_header, 0);
235        } else {
236            buf.resize(bytes_read + size + header_size, 0);
237            {
238                let (input, output) = buf.split_at_mut(bytes_read);
239                let num_decompressed_bytes = zlib::stream::inflate::read(
240                    &mut &input[consumed_in..],
241                    &mut inflate.state,
242                    &mut output[consumed_out..],
243                )
244                .map_err(|e| Error::Io {
245                    source: e,
246                    action: "deflate",
247                    path: path.to_owned(),
248                })?;
249                if num_decompressed_bytes + consumed_out != size + header_size {
250                    return Err(Error::SizeMismatch {
251                        expected: size + header_size,
252                        actual: num_decompressed_bytes + consumed_out,
253                        path,
254                    });
255                }
256            };
257            buf.copy_within(decompressed_start + header_size.., 0);
258        }
259        buf.resize(size, 0);
260        Ok(git_object::Data { kind, data: buf })
261    }
262}