Skip to main content

grit_lib/
odb.rs

1//! Loose object database: reading and writing zlib-compressed Git objects.
2//!
3//! Git stores objects as files under `<git-dir>/objects/<xx>/<38-hex-chars>`,
4//! where the path is derived from the SHA-1 digest. Each file is a zlib-
5//! compressed byte sequence whose decompressed form is:
6//!
7//! ```text
8//! "<type> <size>\0<data>"
9//! ```
10//!
11//! # Usage
12//!
13//! ```no_run
14//! use std::path::Path;
15//! use grit_lib::odb::Odb;
16//!
17//! let odb = Odb::new(Path::new(".git/objects"));
18//! ```
19
20use std::ffi::CString;
21use std::fs;
22use std::io::{Read, Write};
23use std::path::{Path, PathBuf};
24use std::sync::{Arc, Mutex, OnceLock};
25
26use flate2::read::ZlibDecoder;
27use flate2::write::ZlibEncoder;
28use flate2::Compression;
29use sha1::{Digest, Sha1};
30
31use crate::config::ConfigSet;
32use crate::error::{Error, Result};
33use crate::midx::{midx_oid_listed_in_tip, try_read_object_via_midx};
34use crate::objects::{Object, ObjectId, ObjectKind};
35use crate::pack;
36
37/// Decompress a zlib-wrapped loose object payload from an open file.
38///
39/// When the zlib wrapper advertises a preset dictionary (FDICT), `flate2` typically fails with a
40/// generic corrupt-stream error; map that to `"needs dictionary"` so callers match Git's messages
41/// (`t1006-cat-file` zlib-dictionary test).
42fn read_zlib_loose_payload(mut file: fs::File) -> Result<Vec<u8>> {
43    let mut hdr = [0u8; 2];
44    file.read_exact(&mut hdr).map_err(Error::Io)?;
45    let cmf_flg = u16::from(hdr[0]) << 8 | u16::from(hdr[1]);
46    let looks_like_zlib_header = cmf_flg != 0 && cmf_flg % 31 == 0;
47    let preset_dictionary = looks_like_zlib_header && (hdr[1] & 0x20) != 0;
48    let mut decoder = ZlibDecoder::new(hdr.as_slice().chain(file));
49    let mut raw = Vec::new();
50    match decoder.read_to_end(&mut raw) {
51        Ok(_) => Ok(raw),
52        Err(e) => {
53            if preset_dictionary {
54                Err(Error::Zlib("needs dictionary".to_owned()))
55            } else {
56                Err(Error::Zlib(e.to_string()))
57            }
58        }
59    }
60}
61
62/// True when `oid` is stored as a loose object or in a **non-promisor** local pack.
63fn exists_materialized_in_objects_dir(objects_dir: &Path, oid: &ObjectId) -> bool {
64    let loose = objects_dir
65        .join(oid.loose_prefix())
66        .join(oid.loose_suffix());
67    if loose.exists() {
68        return true;
69    }
70    let Ok(indexes) = pack::read_local_pack_indexes_cached(objects_dir) else {
71        return false;
72    };
73    for idx in &indexes {
74        if idx.pack_path.with_extension("promisor").is_file() {
75            continue;
76        }
77        if idx.contains(oid) {
78            return true;
79        }
80    }
81    false
82}
83
84/// A loose-object database rooted at a given `objects/` directory.
85#[derive(Clone)]
86pub struct Odb {
87    objects_dir: PathBuf,
88    /// Work tree root for resolving relative alternate env paths.
89    work_tree: Option<PathBuf>,
90    /// Embedded submodule object stores registered for this read pass (Git `register_all_submodule_sources`).
91    submodule_alternate_dirs: Arc<Mutex<Vec<PathBuf>>>,
92    /// When set, used to read `core.multiPackIndex` (and related) for MIDX-backed object reads.
93    config_git_dir: Option<PathBuf>,
94    /// Cache for `core.multiPackIndex` — populated on first lookup.
95    ///
96    /// Reading this config requires loading the system/global/local config cascade and reparsing
97    /// every file; the value cannot change for a process that has opened a single repository, so
98    /// caching it here avoids re-loading the cascade for every object read.
99    core_multi_pack_index_cache: Arc<OnceLock<bool>>,
100}
101
102impl std::fmt::Debug for Odb {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        f.debug_struct("Odb")
105            .field("objects_dir", &self.objects_dir)
106            .field("work_tree", &self.work_tree)
107            .field("submodule_alternate_dirs", &"<mutex>")
108            .field("config_git_dir", &self.config_git_dir)
109            .finish()
110    }
111}
112
113impl Odb {
114    /// Create an [`Odb`] pointing at the given `objects/` directory.
115    ///
116    /// The directory does not need to exist yet; it will be created on the
117    /// first write operation.
118    #[must_use]
119    pub fn new(objects_dir: &Path) -> Self {
120        Self {
121            objects_dir: objects_dir.to_path_buf(),
122            work_tree: None,
123            submodule_alternate_dirs: Arc::new(Mutex::new(Vec::new())),
124            config_git_dir: None,
125            core_multi_pack_index_cache: Arc::new(OnceLock::new()),
126        }
127    }
128
129    /// Create an [`Odb`] with a work tree for resolving relative alternate paths.
130    #[must_use]
131    pub fn with_work_tree(objects_dir: &Path, work_tree: &Path) -> Self {
132        Self {
133            objects_dir: objects_dir.to_path_buf(),
134            work_tree: Some(work_tree.to_path_buf()),
135            submodule_alternate_dirs: Arc::new(Mutex::new(Vec::new())),
136            config_git_dir: None,
137            core_multi_pack_index_cache: Arc::new(OnceLock::new()),
138        }
139    }
140
141    /// Register `<submodule-git-dir>/objects` for every stage-0 gitlink in `index` that has a
142    /// checkout under `work_tree`, so reads can resolve submodule commits stored only in the
143    /// nested repository (matches Git's `odb_add_submodule_source_by_path` / `register_all_submodule_sources`).
144    pub fn register_submodule_object_directories_from_index(
145        &self,
146        work_tree: &Path,
147        index: &crate::index::Index,
148    ) {
149        use crate::diff::submodule_embedded_git_dir;
150
151        let Ok(mut dirs) = self.submodule_alternate_dirs.lock() else {
152            return;
153        };
154        dirs.clear();
155        for e in &index.entries {
156            if e.stage() != 0 || e.mode != crate::index::MODE_GITLINK {
157                continue;
158            }
159            let path_str = String::from_utf8_lossy(&e.path);
160            let abs = work_tree.join(path_str.as_ref());
161            let Some(sub_git) = submodule_embedded_git_dir(&abs) else {
162                continue;
163            };
164            let objects = sub_git.join("objects");
165            if !objects.is_dir() {
166                continue;
167            }
168            let canon = objects.canonicalize().unwrap_or(objects);
169            if !dirs.iter().any(|p| p == &canon) {
170                dirs.push(canon);
171            }
172        }
173    }
174
175    /// Attach a git directory so [`Self::read`] can honor `core.multiPackIndex` when resolving packed objects.
176    #[must_use]
177    pub fn with_config_git_dir(mut self, git_dir: PathBuf) -> Self {
178        self.config_git_dir = Some(git_dir);
179        self
180    }
181
182    fn core_multi_pack_index_enabled(&self) -> bool {
183        // The system/global/local config cascade is expensive to load (the parser walks every
184        // file from `/etc/gitconfig` through `.git/config`); calling it once per object lookup
185        // dominated `status` runtime. Cache the result for the lifetime of this `Odb`.
186        *self.core_multi_pack_index_cache.get_or_init(|| {
187            let Some(git_dir) = &self.config_git_dir else {
188                return false;
189            };
190            let cfg = ConfigSet::load(Some(git_dir), true).unwrap_or_default();
191            match cfg.get_bool("core.multiPackIndex") {
192                Some(Ok(b)) => b,
193                Some(Err(_)) => true,
194                None => true,
195            }
196        })
197    }
198
199    /// Return the path to the `objects/` directory.
200    #[must_use]
201    pub fn objects_dir(&self) -> &Path {
202        &self.objects_dir
203    }
204
205    /// Return the filesystem path for a given object ID.
206    #[must_use]
207    pub fn object_path(&self, oid: &ObjectId) -> PathBuf {
208        self.objects_dir
209            .join(oid.loose_prefix())
210            .join(oid.loose_suffix())
211    }
212
213    /// Whether the object exists under this database directory only (loose or local packs).
214    ///
215    /// Unlike [`Self::exists`], this ignores `info/alternates` and
216    /// `GIT_ALTERNATE_OBJECT_DIRECTORIES`. Used for partial-clone bookkeeping where
217    /// objects reachable via alternates are still treated as "missing" until copied locally.
218    ///
219    /// Objects stored **only** in promisor packs (sibling `.promisor` marker next to the
220    /// `.pack`) are treated as absent: Git considers them fetchable on demand, and
221    /// `rev-list --missing=print` lists them until materialized as loose objects or a
222    /// non-promisor pack.
223    ///
224    /// The empty tree object is treated as present without a loose file (matches Git).
225    #[must_use]
226    pub fn exists_local(&self, oid: &ObjectId) -> bool {
227        const EMPTY_TREE: &str = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
228        if oid.to_hex() == EMPTY_TREE {
229            return true;
230        }
231        exists_materialized_in_objects_dir(&self.objects_dir, oid)
232    }
233
234    /// Check whether an object exists in the loose store or any pack file.
235    #[must_use]
236    pub fn exists(&self, oid: &ObjectId) -> bool {
237        // The empty tree is a well-known object (no on-disk loose file). Git's
238        // canonical SHA-1 is `...8d69288fbee4904`; some harnesses still use the
239        // legacy typo hash `...899d69f7c6948d4` — treat both as present.
240        const EMPTY_TREE_CANON: &str = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
241        const EMPTY_TREE_LEGACY: &str = "4b825dc642cb6eb9a060e54bf899d69f7c6948d4";
242        let hex = oid.to_hex();
243        if hex == EMPTY_TREE_CANON || hex == EMPTY_TREE_LEGACY {
244            return true;
245        }
246        if self.exists_in_dir(&self.objects_dir, oid) {
247            return true;
248        }
249        // Check alternates from info/alternates file.
250        if let Ok(alts) = pack::read_alternates_recursive(&self.objects_dir) {
251            for alt_dir in &alts {
252                if self.exists_in_dir(alt_dir, oid) {
253                    return true;
254                }
255            }
256        }
257        // Check GIT_ALTERNATE_OBJECT_DIRECTORIES env var.
258        for alt_dir in env_alternate_dirs(self.work_tree.as_deref()) {
259            if self.exists_in_dir(&alt_dir, oid) {
260                return true;
261            }
262        }
263        if let Ok(guard) = self.submodule_alternate_dirs.lock() {
264            for alt_dir in guard.iter() {
265                if self.exists_in_dir(alt_dir, oid) {
266                    return true;
267                }
268            }
269        }
270        false
271    }
272
273    /// Check whether an object exists in a specific objects directory.
274    fn exists_in_dir(&self, objects_dir: &Path, oid: &ObjectId) -> bool {
275        let loose = objects_dir
276            .join(oid.loose_prefix())
277            .join(oid.loose_suffix());
278        if loose.exists() {
279            return true;
280        }
281        if let Ok(indexes) = pack::read_local_pack_indexes_cached(objects_dir) {
282            for idx in &indexes {
283                if idx.contains(oid) {
284                    return true;
285                }
286            }
287        }
288        if objects_dir == self.objects_dir.as_path()
289            && self.config_git_dir.is_some()
290            && self.core_multi_pack_index_enabled()
291        {
292            match midx_oid_listed_in_tip(objects_dir, oid) {
293                Ok(Some(true)) => return true,
294                Ok(Some(false)) | Ok(None) => {}
295                Err(_) => return false,
296            }
297        }
298        false
299    }
300
301    /// Touch the loose object file or pack file containing `oid`, matching Git's
302    /// `odb_freshen_object` (updates mtime so age-based prune keeps recently re-referenced objects).
303    ///
304    /// Returns `true` if an on-disk object was found and touched.
305    #[must_use]
306    pub fn freshen_object(&self, oid: &ObjectId) -> bool {
307        const EMPTY_TREE_CANON: &str = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
308        const EMPTY_TREE_LEGACY: &str = "4b825dc642cb6eb9a060e54bf899d69f7c6948d4";
309        let hex = oid.to_hex();
310        if hex == EMPTY_TREE_CANON || hex == EMPTY_TREE_LEGACY {
311            return false;
312        }
313
314        let loose = self.object_path(oid);
315        if loose.is_file() {
316            return touch_path_mtime(&loose);
317        }
318
319        if freshen_object_in_objects_dir(&self.objects_dir, oid) {
320            return true;
321        }
322
323        if let Ok(alts) = pack::read_alternates_recursive(&self.objects_dir) {
324            for alt_dir in &alts {
325                if freshen_object_in_objects_dir(alt_dir, oid) {
326                    return true;
327                }
328            }
329        }
330
331        for alt_dir in env_alternate_dirs(self.work_tree.as_deref()) {
332            if freshen_object_in_objects_dir(&alt_dir, oid) {
333                return true;
334            }
335        }
336
337        if let Ok(guard) = self.submodule_alternate_dirs.lock() {
338            for alt_dir in guard.iter() {
339                if freshen_object_in_objects_dir(alt_dir, oid) {
340                    return true;
341                }
342            }
343        }
344
345        false
346    }
347
348    /// Read a loose object file at `path`, verifying the uncompressed payload hashes to `expected_oid`.
349    ///
350    /// Git stores loose objects under paths derived from the OID; if the file contents hash to a
351    /// different id (for example after a mistaken `mv`), this returns [`Error::LooseHashMismatch`].
352    ///
353    /// # Errors
354    ///
355    /// - [`Error::Zlib`] — decompression failed.
356    /// - [`Error::CorruptObject`] — header is malformed.
357    /// - [`Error::LooseHashMismatch`] — payload OID does not match `expected_oid`.
358    pub fn read_loose_verify_oid(path: &Path, expected_oid: &ObjectId) -> Result<Object> {
359        let file = fs::File::open(path).map_err(Error::Io)?;
360        let raw = read_zlib_loose_payload(file)?;
361        let obj = parse_object_bytes_with_oid(&raw, expected_oid)?;
362        let computed = hash_object_from_parsed(&obj);
363        if computed != *expected_oid {
364            return Err(Error::LooseHashMismatch {
365                path: path.display().to_string(),
366                real_oid: computed.to_hex(),
367            });
368        }
369        Ok(obj)
370    }
371
372    /// Read and decompress an object from the loose store.
373    ///
374    /// # Errors
375    ///
376    /// - [`Error::ObjectNotFound`] — no file at the expected path.
377    /// - [`Error::Zlib`] — decompression failed.
378    /// - [`Error::CorruptObject`] — header is malformed.
379    pub fn read(&self, oid: &ObjectId) -> Result<Object> {
380        // The empty tree is a well-known virtual object — no storage needed.
381        const EMPTY_TREE_CANON: &str = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
382        const EMPTY_TREE_LEGACY: &str = "4b825dc642cb6eb9a060e54bf899d69f7c6948d4";
383        let hex = oid.to_hex();
384        if hex == EMPTY_TREE_CANON || hex == EMPTY_TREE_LEGACY {
385            return Ok(crate::objects::Object {
386                kind: crate::objects::ObjectKind::Tree,
387                data: Vec::new(),
388            });
389        }
390
391        let path = self.object_path(oid);
392        match fs::File::open(&path) {
393            Ok(file) => {
394                let raw = read_zlib_loose_payload(file)?;
395                // Match Git: loose objects are read from the path implied by `oid` without
396                // requiring the payload to hash back to that oid (t1006 corrupt-loose / swapped files).
397                return parse_object_bytes(&raw);
398            }
399            Err(_) => {
400                // Loose object not found; try pack files.
401            }
402        }
403
404        if self.config_git_dir.is_some() && self.core_multi_pack_index_enabled() {
405            if let Some(obj) = try_read_object_via_midx(&self.objects_dir, oid)? {
406                return Ok(obj);
407            }
408        }
409
410        // Fall back to pack files.
411        if let Ok(obj) = pack::read_object_from_packs(&self.objects_dir, oid) {
412            return Ok(obj);
413        }
414
415        let midx_alt = self.config_git_dir.is_some() && self.core_multi_pack_index_enabled();
416
417        // Check alternates from info/alternates file.
418        if let Ok(alts) = pack::read_alternates_recursive(&self.objects_dir) {
419            for alt_dir in &alts {
420                if let Ok(obj) = Self::read_from_dir(alt_dir, oid, midx_alt) {
421                    return Ok(obj);
422                }
423            }
424        }
425
426        // Check GIT_ALTERNATE_OBJECT_DIRECTORIES env var.
427        for alt_dir in env_alternate_dirs(self.work_tree.as_deref()) {
428            if let Ok(obj) = Self::read_from_dir(&alt_dir, oid, midx_alt) {
429                return Ok(obj);
430            }
431        }
432
433        if let Ok(guard) = self.submodule_alternate_dirs.lock() {
434            for alt_dir in guard.iter() {
435                if let Ok(obj) = Self::read_from_dir(alt_dir, oid, false) {
436                    return Ok(obj);
437                }
438            }
439        }
440
441        Err(Error::ObjectNotFound(oid.to_hex()))
442    }
443
444    /// Try to read an object from a specific objects directory (loose or pack).
445    fn read_from_dir(objects_dir: &Path, oid: &ObjectId, use_midx: bool) -> Result<Object> {
446        let loose = objects_dir
447            .join(oid.loose_prefix())
448            .join(oid.loose_suffix());
449        if let Ok(file) = fs::File::open(&loose) {
450            let raw = read_zlib_loose_payload(file)?;
451            return parse_object_bytes(&raw);
452        }
453        if use_midx {
454            if let Some(obj) = try_read_object_via_midx(objects_dir, oid)? {
455                return Ok(obj);
456            }
457        }
458        pack::read_object_from_packs(objects_dir, oid)
459    }
460
461    /// Hash raw content of a given kind and return the [`ObjectId`].
462    ///
463    /// This does **not** write anything to disk.
464    #[must_use]
465    pub fn hash_object_data(kind: ObjectKind, data: &[u8]) -> ObjectId {
466        // Stream the hash without copying data into a contiguous buffer.
467        let header = format!("{} {}\0", kind, data.len());
468        let mut hasher = Sha1::new();
469        hasher.update(header.as_bytes());
470        hasher.update(data);
471        let digest = hasher.finalize();
472        ObjectId::from_bytes(digest.as_slice())
473            .unwrap_or_else(|_| unreachable!("SHA-1 is 20 bytes"))
474    }
475
476    /// Write an object to the loose store and return its [`ObjectId`].
477    ///
478    /// If the object already exists it is not overwritten (Git behaviour).
479    ///
480    /// # Errors
481    ///
482    /// - [`Error::Io`] — could not create the directory or write the file.
483    /// - [`Error::Zlib`] — compression failed.
484    pub fn write(&self, kind: ObjectKind, data: &[u8]) -> Result<ObjectId> {
485        let store_bytes = build_store_bytes(kind, data);
486        let oid = hash_bytes(&store_bytes);
487
488        let path = self.object_path(&oid);
489        if path.exists() {
490            let _ = self.freshen_object(&oid);
491            return Ok(oid);
492        }
493        if self.exists(&oid) {
494            let _ = self.freshen_object(&oid);
495            return Ok(oid);
496        }
497
498        let prefix_dir = path
499            .parent()
500            .ok_or_else(|| Error::PathError("object path has no parent".to_owned()))?;
501        fs::create_dir_all(prefix_dir)?;
502
503        // Write to a temp file in the same directory, then rename atomically.
504        let tmp_path = prefix_dir.join(format!("tmp_{}", oid.loose_suffix()));
505        {
506            let tmp_file = fs::File::create(&tmp_path)?;
507            let mut encoder = ZlibEncoder::new(tmp_file, Compression::default());
508            encoder
509                .write_all(&store_bytes)
510                .map_err(|e| Error::Zlib(e.to_string()))?;
511            encoder.finish().map_err(|e| Error::Zlib(e.to_string()))?;
512        }
513        fs::rename(&tmp_path, &path)?;
514        #[cfg(unix)]
515        {
516            use std::os::unix::fs::PermissionsExt;
517            let _ = fs::set_permissions(&path, fs::Permissions::from_mode(0o444));
518        }
519
520        Ok(oid)
521    }
522
523    /// Write an object as a loose file in this object directory only.
524    ///
525    /// Unlike [`Self::write`], this ignores `info/alternates` and
526    /// `GIT_ALTERNATE_OBJECT_DIRECTORIES`: if the object exists only in an
527    /// alternate store, it is still written here. That matches how Git's
528    /// `unpack-objects` materializes every packed object into the receiving
529    /// repository even when the same OID is already reachable via alternates
530    /// (see `t5519-push-alternates`).
531    ///
532    /// # Errors
533    ///
534    /// Same as [`Self::write`].
535    pub fn write_local(&self, kind: ObjectKind, data: &[u8]) -> Result<ObjectId> {
536        let store_bytes = build_store_bytes(kind, data);
537        let oid = hash_bytes(&store_bytes);
538
539        let path = self.object_path(&oid);
540        if path.exists() {
541            let _ = self.freshen_object(&oid);
542            return Ok(oid);
543        }
544        if self.exists_local(&oid) {
545            let _ = self.freshen_object(&oid);
546            return Ok(oid);
547        }
548
549        let prefix_dir = path
550            .parent()
551            .ok_or_else(|| Error::PathError("object path has no parent".to_owned()))?;
552        fs::create_dir_all(prefix_dir)?;
553
554        let tmp_path = prefix_dir.join(format!("tmp_{}", oid.loose_suffix()));
555        {
556            let tmp_file = fs::File::create(&tmp_path)?;
557            let mut encoder = ZlibEncoder::new(tmp_file, Compression::default());
558            encoder
559                .write_all(&store_bytes)
560                .map_err(|e| Error::Zlib(e.to_string()))?;
561            encoder.finish().map_err(|e| Error::Zlib(e.to_string()))?;
562        }
563        fs::rename(&tmp_path, &path)?;
564        #[cfg(unix)]
565        {
566            use std::os::unix::fs::PermissionsExt;
567            let _ = fs::set_permissions(&path, fs::Permissions::from_mode(0o444));
568        }
569
570        Ok(oid)
571    }
572
573    /// Write a loose object file when it is missing, even if [`Self::exists`] is true because
574    /// the object lives only in a pack.
575    ///
576    /// Used when materializing a partial-clone layout: objects must be duplicated as loose files
577    /// before local packs are removed. Unlike [`Self::write_local`], objects present only in a
578    /// promisor pack are still written because [`Self::exists_local`] treats those as absent.
579    pub fn write_loose_materialize(&self, kind: ObjectKind, data: &[u8]) -> Result<ObjectId> {
580        let store_bytes = build_store_bytes(kind, data);
581        let oid = hash_bytes(&store_bytes);
582        let path = self.object_path(&oid);
583        if path.exists() {
584            let _ = self.freshen_object(&oid);
585            return Ok(oid);
586        }
587
588        let prefix_dir = path
589            .parent()
590            .ok_or_else(|| Error::PathError("object path has no parent".to_owned()))?;
591        fs::create_dir_all(prefix_dir)?;
592
593        let tmp_path = prefix_dir.join(format!("tmp_{}", oid.loose_suffix()));
594        {
595            let tmp_file = fs::File::create(&tmp_path)?;
596            let mut encoder = ZlibEncoder::new(tmp_file, Compression::default());
597            encoder
598                .write_all(&store_bytes)
599                .map_err(|e| Error::Zlib(e.to_string()))?;
600            encoder.finish().map_err(|e| Error::Zlib(e.to_string()))?;
601        }
602        fs::rename(&tmp_path, &path)?;
603        #[cfg(unix)]
604        {
605            use std::os::unix::fs::PermissionsExt;
606            let _ = fs::set_permissions(&path, fs::Permissions::from_mode(0o444));
607        }
608
609        Ok(oid)
610    }
611
612    /// Write an already-serialized object (header + data) to the loose store.
613    ///
614    /// Useful when the caller has the full store bytes (e.g. from stdin with
615    /// `--literally`).
616    ///
617    /// # Errors
618    ///
619    /// - [`Error::CorruptObject`] — the provided bytes don't form a valid header.
620    /// - [`Error::Io`] / [`Error::Zlib`] — storage errors.
621    pub fn write_raw(&self, store_bytes: &[u8]) -> Result<ObjectId> {
622        // Validate the header before storing
623        parse_object_bytes(store_bytes)?;
624
625        let oid = hash_bytes(store_bytes);
626        let path = self.object_path(&oid);
627        if path.exists() {
628            let _ = self.freshen_object(&oid);
629            return Ok(oid);
630        }
631        if self.exists(&oid) {
632            let _ = self.freshen_object(&oid);
633            return Ok(oid);
634        }
635
636        let prefix_dir = path
637            .parent()
638            .ok_or_else(|| Error::PathError("object path has no parent".to_owned()))?;
639        fs::create_dir_all(prefix_dir)?;
640
641        let tmp_path = prefix_dir.join(format!("tmp_{}", oid.loose_suffix()));
642        {
643            let tmp_file = fs::File::create(&tmp_path)?;
644            let mut encoder = ZlibEncoder::new(tmp_file, Compression::default());
645            encoder
646                .write_all(store_bytes)
647                .map_err(|e| Error::Zlib(e.to_string()))?;
648            encoder.finish().map_err(|e| Error::Zlib(e.to_string()))?;
649        }
650        fs::rename(&tmp_path, &path)?;
651        #[cfg(unix)]
652        {
653            use std::os::unix::fs::PermissionsExt;
654            let _ = fs::set_permissions(&path, fs::Permissions::from_mode(0o444));
655        }
656
657        Ok(oid)
658    }
659
660    /// Like [`Self::write_raw`] but only consults this object directory, not alternates.
661    ///
662    /// See [`Self::write_local`].
663    ///
664    /// # Errors
665    ///
666    /// Same as [`Self::write_raw`].
667    pub fn write_raw_local(&self, store_bytes: &[u8]) -> Result<ObjectId> {
668        parse_object_bytes(store_bytes)?;
669
670        let oid = hash_bytes(store_bytes);
671        let path = self.object_path(&oid);
672        if path.exists() {
673            let _ = self.freshen_object(&oid);
674            return Ok(oid);
675        }
676        if self.exists_local(&oid) {
677            let _ = self.freshen_object(&oid);
678            return Ok(oid);
679        }
680
681        let prefix_dir = path
682            .parent()
683            .ok_or_else(|| Error::PathError("object path has no parent".to_owned()))?;
684        fs::create_dir_all(prefix_dir)?;
685
686        let tmp_path = prefix_dir.join(format!("tmp_{}", oid.loose_suffix()));
687        {
688            let tmp_file = fs::File::create(&tmp_path)?;
689            let mut encoder = ZlibEncoder::new(tmp_file, Compression::default());
690            encoder
691                .write_all(store_bytes)
692                .map_err(|e| Error::Zlib(e.to_string()))?;
693            encoder.finish().map_err(|e| Error::Zlib(e.to_string()))?;
694        }
695        fs::rename(&tmp_path, &path)?;
696        #[cfg(unix)]
697        {
698            use std::os::unix::fs::PermissionsExt;
699            let _ = fs::set_permissions(&path, fs::Permissions::from_mode(0o444));
700        }
701
702        Ok(oid)
703    }
704
705    /// Returns true when a loose object exists at `oid`'s path and zlib-decompresses to a
706    /// structurally valid `<type> <size>\0<payload>` object (type may be non-standard).
707    ///
708    /// Used for `git cat-file -e`, which succeeds for hand-crafted loose objects that
709    /// [`Self::read`] rejects due to [`Error::UnknownObjectType`].
710    #[must_use]
711    pub fn loose_object_plumbing_ok(&self, oid: &ObjectId) -> bool {
712        let path = self.object_path(oid);
713        let Ok(file) = fs::File::open(&path) else {
714            return false;
715        };
716        let Ok(raw) = read_zlib_loose_payload(file) else {
717            return false;
718        };
719        loose_store_bytes_header_valid(&raw)
720    }
721}
722
723fn loose_store_bytes_header_valid(raw: &[u8]) -> bool {
724    let nul = match raw.iter().position(|&b| b == 0) {
725        Some(i) => i,
726        None => return false,
727    };
728    let header = &raw[..nul];
729    let data = &raw[nul + 1..];
730    let sp = match header.iter().position(|&b| b == b' ') {
731        Some(i) => i,
732        None => return false,
733    };
734    if sp == 0 || sp > 32 {
735        return false;
736    }
737    let size_str = match std::str::from_utf8(&header[sp + 1..]) {
738        Ok(s) => s,
739        Err(_) => return false,
740    };
741    let size: usize = match size_str.parse() {
742        Ok(s) => s,
743        Err(_) => return false,
744    };
745    data.len() == size
746}
747
748/// Update `path`'s mtime to "now" (Git `utime(path, NULL)`), returning whether it succeeded.
749fn touch_path_mtime(path: &Path) -> bool {
750    #[cfg(unix)]
751    {
752        use std::os::unix::ffi::OsStrExt;
753        let Ok(c_path) = CString::new(path.as_os_str().as_bytes()) else {
754            return false;
755        };
756        // SAFETY: `utimes` with NULL `times` sets atime and mtime to the current time.
757        unsafe { libc::utimes(c_path.as_ptr(), std::ptr::null()) == 0 }
758    }
759    #[cfg(not(unix))]
760    {
761        let _ = path;
762        false
763    }
764}
765
766fn freshen_object_in_objects_dir(objects_dir: &Path, oid: &ObjectId) -> bool {
767    let Ok(indexes) = pack::read_local_pack_indexes_cached(objects_dir) else {
768        return false;
769    };
770    for idx in &indexes {
771        if idx.contains(oid) {
772            return touch_path_mtime(&idx.pack_path);
773        }
774    }
775    false
776}
777
778fn hash_object_from_parsed(obj: &Object) -> ObjectId {
779    Odb::hash_object_data(obj.kind, &obj.data)
780}
781
782/// Compute the SHA-1 of a byte slice and return it as an [`ObjectId`].
783fn hash_bytes(data: &[u8]) -> ObjectId {
784    let mut hasher = Sha1::new();
785    hasher.update(data);
786    let digest = hasher.finalize();
787    // SAFETY: SHA-1 always produces exactly 20 bytes.
788    ObjectId::from_bytes(digest.as_slice()).unwrap_or_else(|_| unreachable!("SHA-1 is 20 bytes"))
789}
790
791/// Build the canonical store byte sequence: `"<kind> <len>\0<data>"`.
792fn build_store_bytes(kind: ObjectKind, data: &[u8]) -> Vec<u8> {
793    let header = format!("{} {}\0", kind, data.len());
794    let mut out = Vec::with_capacity(header.len() + data.len());
795    out.extend_from_slice(header.as_bytes());
796    out.extend_from_slice(data);
797    out
798}
799
800/// Parse decompressed object bytes (`"<type> <size>\0<data>"`) into an [`Object`].
801pub(crate) fn parse_object_bytes(raw: &[u8]) -> Result<Object> {
802    parse_object_bytes_inner(raw, None)
803}
804
805pub(crate) fn parse_object_bytes_with_oid(raw: &[u8], oid: &ObjectId) -> Result<Object> {
806    parse_object_bytes_inner(raw, Some(oid))
807}
808
809fn parse_object_bytes_inner(raw: &[u8], oid_hint: Option<&ObjectId>) -> Result<Object> {
810    let nul = raw
811        .iter()
812        .position(|&b| b == 0)
813        .ok_or_else(|| Error::CorruptObject("missing NUL in object header".to_owned()))?;
814
815    let header = &raw[..nul];
816    let data = raw[nul + 1..].to_vec();
817
818    let sp = header
819        .iter()
820        .position(|&b| b == b' ')
821        .ok_or_else(|| Error::CorruptObject("missing space in object header".to_owned()))?;
822
823    if sp > 32 {
824        let oid_str = oid_hint
825            .map(|o| o.to_hex())
826            .unwrap_or_else(|| hash_bytes(raw).to_hex());
827        return Err(Error::ObjectHeaderTooLong { oid: oid_str });
828    }
829
830    let kind = ObjectKind::from_bytes(&header[..sp])?;
831
832    let size_str = std::str::from_utf8(&header[sp + 1..])
833        .map_err(|_| Error::CorruptObject("non-UTF-8 object size".to_owned()))?;
834    let size: usize = size_str
835        .parse()
836        .map_err(|_| Error::CorruptObject(format!("invalid object size: {size_str}")))?;
837
838    if data.len() != size {
839        return Err(Error::CorruptObject(format!(
840            "object size mismatch: header says {size} but got {}",
841            data.len()
842        )));
843    }
844
845    Ok(Object::new(kind, data))
846}
847
848/// Parse `GIT_ALTERNATE_OBJECT_DIRECTORIES` into a list of paths.
849///
850/// The env var contains colon-separated (`:`-separated on Unix) paths
851/// to additional object directories to search. Supports double-quoted
852/// entries with octal escapes (e.g. `\057` for `/`).
853///
854/// Relative paths are resolved against `resolve_base` (typically the work tree root).
855fn env_alternate_dirs(resolve_base: Option<&Path>) -> Vec<PathBuf> {
856    match std::env::var("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
857        Ok(val) if !val.is_empty() => {
858            let mut dirs = parse_alternate_env(&val);
859            if let Some(base) = resolve_base {
860                for dir in &mut dirs {
861                    if dir.is_relative() {
862                        *dir = base.join(&dir);
863                    }
864                }
865            }
866            dirs
867        }
868        _ => Vec::new(),
869    }
870}
871
872/// Parse a colon-separated alternates string, handling double-quoted entries
873/// with octal escape sequences.
874fn parse_alternate_env(val: &str) -> Vec<PathBuf> {
875    let mut result = Vec::new();
876    let mut chars = val.chars().peekable();
877    while chars.peek().is_some() {
878        if chars.peek() == Some(&':') {
879            chars.next();
880            continue;
881        }
882        if chars.peek() == Some(&'"') {
883            // Try quoted parsing; if EOF is hit without closing quote,
884            // fall back to treating the whole segment as a raw path.
885            chars.next(); // consume the opening '"'
886            let saved: Vec<char> = chars.clone().collect();
887            let mut path = String::new();
888            let mut properly_closed = false;
889            loop {
890                match chars.next() {
891                    None => break,
892                    Some('"') => {
893                        properly_closed = true;
894                        break;
895                    }
896                    Some('\\') => match chars.peek() {
897                        Some(c) if c.is_ascii_digit() => {
898                            let mut oct = String::new();
899                            for _ in 0..3 {
900                                if let Some(&c) = chars.peek() {
901                                    if c.is_ascii_digit() {
902                                        oct.push(c);
903                                        chars.next();
904                                    } else {
905                                        break;
906                                    }
907                                } else {
908                                    break;
909                                }
910                            }
911                            if let Ok(byte) = u8::from_str_radix(&oct, 8) {
912                                path.push(byte as char);
913                            }
914                        }
915                        Some(_) => {
916                            if let Some(c) = chars.next() {
917                                match c {
918                                    'n' => path.push('\n'),
919                                    't' => path.push('\t'),
920                                    'r' => path.push('\r'),
921                                    _ => path.push(c),
922                                }
923                            }
924                        }
925                        None => {}
926                    },
927                    Some(c) => path.push(c),
928                }
929            }
930            if !properly_closed {
931                // Broken quoting: fall back to treating raw value (with leading ")
932                // as a literal path.
933                let raw: String = std::iter::once('"').chain(saved.into_iter()).collect();
934                // Extract up to ':' or end
935                let raw_path = raw.split(':').next().unwrap_or(&raw);
936                if !raw_path.is_empty() {
937                    result.push(PathBuf::from(raw_path));
938                }
939                // Advance past the ':' in the original chars (we consumed the saved copy)
940                // Since chars is now at EOF, we need to handle remaining items.
941                // Actually, we consumed chars fully. Let's reconstruct from raw.
942                let remainder = &raw[raw_path.len()..];
943                if let Some(rest) = remainder.strip_prefix(':') {
944                    // Parse remaining entries
945                    result.extend(parse_alternate_env(rest));
946                }
947                return result;
948            } else if !path.is_empty() {
949                result.push(PathBuf::from(path));
950            }
951        } else {
952            let mut path = String::new();
953            while let Some(&c) = chars.peek() {
954                if c == ':' {
955                    break;
956                }
957                path.push(c);
958                chars.next();
959            }
960            if !path.is_empty() {
961                result.push(PathBuf::from(path));
962            }
963        }
964    }
965    result
966}
967
968#[cfg(test)]
969mod tests {
970    #![allow(clippy::expect_used, clippy::unwrap_used)]
971
972    use super::*;
973    use tempfile::TempDir;
974
975    #[test]
976    fn round_trip_blob() {
977        let dir = TempDir::new().unwrap();
978        let odb = Odb::new(dir.path());
979        let data = b"hello world";
980        let oid = odb.write(ObjectKind::Blob, data).unwrap();
981        let obj = odb.read(&oid).unwrap();
982        assert_eq!(obj.kind, ObjectKind::Blob);
983        assert_eq!(obj.data, data);
984    }
985
986    #[test]
987    fn known_blob_hash() {
988        // Verified: echo -n "hello" | git hash-object --stdin
989        //        => b6fc4c620b67d95f953a5c1c1230aaab5db5a1b0
990        let oid = Odb::hash_object_data(ObjectKind::Blob, b"hello");
991        assert_eq!(oid.to_hex(), "b6fc4c620b67d95f953a5c1c1230aaab5db5a1b0");
992    }
993}