Skip to main content

grit_lib/
reftable.rs

1//! Reftable format — binary reference storage.
2//!
3//! Implements the [reftable file format](https://git-scm.com/docs/reftable)
4//! for efficient, sorted reference storage.  A reftable file contains
5//! ref blocks (sorted ref records with prefix compression), optional log
6//! blocks (reflog entries), optional index blocks, and a footer.
7//!
8//! # Architecture
9//!
10//! - [`ReftableWriter`] writes a single `.ref` (or `.log`) reftable file.
11//! - [`ReftableReader`] reads and searches a single reftable file.
12//! - [`ReftableStack`] manages the `tables.list` stack, providing a
13//!   merged view of all tables and auto-compaction on writes.
14//!
15//! # On-disk layout
16//!
17//! ```text
18//! first_block { header, first_ref_block }
19//! ref_block*
20//! ref_index?
21//! obj_block*    (not yet implemented)
22//! obj_index?    (not yet implemented)
23//! log_block*
24//! log_index?
25//! footer
26//! ```
27
28use std::collections::{BTreeMap, BTreeSet};
29use std::fs;
30use std::io::{Read, Write};
31use std::path::{Path, PathBuf};
32use std::thread;
33use std::time::{Duration, Instant};
34
35use crate::config::ConfigSet;
36use crate::error::{Error, Result};
37use crate::objects::ObjectId;
38
39// ---------------------------------------------------------------------------
40// Constants
41// ---------------------------------------------------------------------------
42
43/// Magic bytes at the start of every reftable file.
44const REFTABLE_MAGIC: &[u8; 4] = b"REFT";
45
46/// File header size (version 1): magic(4) + version(1) + block_size(3)
47/// + min_update_index(8) + max_update_index(8) = 24 bytes.
48const HEADER_SIZE: usize = 24;
49
50/// Footer size for version 1.
51const FOOTER_V1_SIZE: usize = 68;
52
53/// Block type: ref block.
54const BLOCK_TYPE_REF: u8 = b'r';
55/// Block type: index block.
56const BLOCK_TYPE_INDEX: u8 = b'i';
57/// Block type: log block (zlib-compressed).
58const BLOCK_TYPE_LOG: u8 = b'g';
59/// Block type: object index block.
60const BLOCK_TYPE_OBJ: u8 = b'o';
61
62/// Value types encoded in the low 3 bits of the suffix_length varint.
63const VALUE_DELETION: u8 = 0;
64const VALUE_ONE_OID: u8 = 1;
65const VALUE_TWO_OID: u8 = 2;
66const VALUE_SYMREF: u8 = 3;
67
68/// Hash size (SHA-1).
69const HASH_SIZE: usize = 20;
70
71/// Default block size when none is configured (4 KiB).
72const DEFAULT_BLOCK_SIZE: u32 = 4096;
73
74/// How many records between restart points.
75const RESTART_INTERVAL: usize = 16;
76
77// ---------------------------------------------------------------------------
78// Varint encoding (Git pack-style)
79// ---------------------------------------------------------------------------
80
81/// Encode a u64 as a varint into `out`. Returns number of bytes written.
82fn put_varint(mut val: u64, out: &mut Vec<u8>) -> usize {
83    // First, collect 7-bit groups.
84    let mut buf = [0u8; 10];
85    let mut i = 0;
86    buf[i] = (val & 0x7f) as u8;
87    i += 1;
88    val >>= 7;
89    while val > 0 {
90        val -= 1;
91        buf[i] = (val & 0x7f) as u8;
92        i += 1;
93        val >>= 7;
94    }
95    // Write in reverse, with continuation bits.
96    let len = i;
97    for j in (1..len).rev() {
98        out.push(buf[j] | 0x80);
99    }
100    out.push(buf[0]);
101    len
102}
103
104/// Decode a varint from `data` starting at `pos`. Returns (value, new_pos).
105fn get_varint(data: &[u8], mut pos: usize) -> Result<(u64, usize)> {
106    if pos >= data.len() {
107        return Err(Error::InvalidRef("varint: unexpected end of data".into()));
108    }
109    let mut val = (data[pos] & 0x7f) as u64;
110    while data[pos] & 0x80 != 0 {
111        pos += 1;
112        if pos >= data.len() {
113            return Err(Error::InvalidRef("varint: unexpected end of data".into()));
114        }
115        val = ((val + 1) << 7) | (data[pos] & 0x7f) as u64;
116    }
117    Ok((val, pos + 1))
118}
119
120// ---------------------------------------------------------------------------
121// Ref record types
122// ---------------------------------------------------------------------------
123
124/// A single reference record as stored in a reftable.
125#[derive(Debug, Clone, PartialEq, Eq)]
126pub enum RefValue {
127    /// Deletion tombstone (value_type 0x0).
128    Deletion,
129    /// A direct ref pointing to one OID (value_type 0x1).
130    Val1(ObjectId),
131    /// An annotated tag: value + peeled target (value_type 0x2).
132    Val2(ObjectId, ObjectId),
133    /// A symbolic reference (value_type 0x3).
134    Symref(String),
135}
136
137/// A decoded ref record.
138#[derive(Debug, Clone)]
139pub struct RefRecord {
140    /// Full reference name.
141    pub name: String,
142    /// Update index (absolute).
143    pub update_index: u64,
144    /// The value.
145    pub value: RefValue,
146}
147
148/// A decoded log record.
149#[derive(Debug, Clone)]
150pub struct LogRecord {
151    /// Reference name.
152    pub refname: String,
153    /// Update index.
154    pub update_index: u64,
155    /// Old object ID.
156    pub old_id: ObjectId,
157    /// New object ID.
158    pub new_id: ObjectId,
159    /// Committer name.
160    pub name: String,
161    /// Committer email (without angle brackets).
162    pub email: String,
163    /// Time in seconds since epoch.
164    pub time_seconds: u64,
165    /// Timezone offset in minutes (signed).
166    pub tz_offset: i16,
167    /// Log message.
168    pub message: String,
169}
170
171/// Write options for reftable creation.
172#[derive(Debug, Clone)]
173pub struct WriteOptions {
174    /// Block size in bytes. 0 means use the default.
175    pub block_size: u32,
176    /// Restart interval (number of records between restart points).
177    pub restart_interval: usize,
178    /// Whether to write log blocks.
179    pub write_log: bool,
180    /// Skip writing the object index (config `reftable.indexObjects=false`).
181    pub skip_index_objects: bool,
182    /// Write blocks without padding to the block size.
183    pub unpadded: bool,
184    /// Object-id width in bytes: 20 for SHA-1 (reftable version 1), 32 for
185    /// SHA-256 (reftable version 2). Defaults to SHA-1.
186    pub hash_size: usize,
187}
188
189impl Default for WriteOptions {
190    fn default() -> Self {
191        Self {
192            block_size: DEFAULT_BLOCK_SIZE,
193            restart_interval: RESTART_INTERVAL,
194            write_log: true,
195            skip_index_objects: false,
196            unpadded: false,
197            hash_size: HASH_SIZE,
198        }
199    }
200}
201
202/// A ref update that should be written to a reftable transaction.
203///
204/// The `refname` must already be the backend storage refname (for example a
205/// namespaced or per-worktree ref after storage routing). All updates passed to
206/// one transaction are written with the same update index, matching Git's
207/// reftable backend for `update-ref --stdin` batches.
208#[derive(Debug, Clone)]
209pub struct ReftableTransactionUpdate {
210    /// Full storage refname to update.
211    pub refname: String,
212    /// New ref value, or a deletion tombstone.
213    pub value: RefValue,
214    /// Optional reflog entry to record in the same table and update index.
215    pub log: Option<LogRecord>,
216}
217
218// ---------------------------------------------------------------------------
219// Writer
220// ---------------------------------------------------------------------------
221
222/// Writes a single reftable file.
223///
224/// Usage:
225/// ```ignore
226/// let mut w = ReftableWriter::new(opts, min_idx, max_idx);
227/// w.add_ref(&RefRecord { .. })?;
228/// w.add_log(&LogRecord { .. })?;
229/// let bytes = w.finish()?;
230/// ```
231pub struct ReftableWriter {
232    opts: WriteOptions,
233    min_update_index: u64,
234    max_update_index: u64,
235
236    // Accumulated ref records (must be added in sorted order).
237    refs: Vec<RefRecord>,
238    // Accumulated log records.
239    logs: Vec<LogRecord>,
240}
241
242impl ReftableWriter {
243    /// Create a new writer.
244    pub fn new(opts: WriteOptions, min_update_index: u64, max_update_index: u64) -> Self {
245        Self {
246            opts,
247            min_update_index,
248            max_update_index,
249            refs: Vec::new(),
250            logs: Vec::new(),
251        }
252    }
253
254    /// Add a ref record. Records **must** be added in sorted name order.
255    pub fn add_ref(&mut self, rec: RefRecord) -> Result<()> {
256        if let Some(last) = self.refs.last() {
257            if rec.name <= last.name {
258                return Err(Error::InvalidRef(format!(
259                    "reftable: refs must be sorted, got '{}' after '{}'",
260                    rec.name, last.name
261                )));
262            }
263        }
264        self.refs.push(rec);
265        Ok(())
266    }
267
268    /// Add a log record.
269    pub fn add_log(&mut self, rec: LogRecord) -> Result<()> {
270        self.logs.push(rec);
271        Ok(())
272    }
273
274    /// Finish writing and return the complete reftable file bytes.
275    ///
276    /// This implements the reftable on-disk format so that the
277    /// on-disk layout (block boundaries, restart points, padding,
278    /// index/object sections, footer offsets) is byte-identical to git.
279    pub fn finish(self) -> Result<Vec<u8>> {
280        let refs = self.refs;
281        let logs = self.logs;
282        let opts = self.opts;
283        let mut w = WriterState::new(opts, self.min_update_index, self.max_update_index);
284
285        // Refs are added in sorted order; index objects as we go.
286        for rec in &refs {
287            w.add_ref(rec)?;
288        }
289
290        // Logs: sort by (refname asc, update_index desc) — matches
291        // reftable_log_record_compare_key.
292        let mut logs = logs;
293        logs.sort_by(|a, b| {
294            a.refname
295                .cmp(&b.refname)
296                .then_with(|| b.update_index.cmp(&a.update_index))
297        });
298        if w.opts.write_log {
299            for log in &logs {
300                w.add_log(log)?;
301            }
302        }
303
304        w.close()
305    }
306}
307
308// ---------------------------------------------------------------------------
309// Faithful low-level writer (ports git/reftable/{block,writer,record}.c)
310// ---------------------------------------------------------------------------
311
312/// Default reftable block size (per the reftable format specification).
313const REFTABLE_DEFAULT_BLOCK_SIZE: u32 = 4096;
314/// Maximum number of restart points per block (`MAX_RESTARTS`).
315const MAX_RESTARTS: usize = (1 << 16) - 1;
316
317/// A record to encode: produces a key and a value body.
318enum EncRecord<'a> {
319    Ref(&'a RefRecord, u64),
320    Log(&'a LogRecord),
321    Obj { prefix: Vec<u8>, offsets: Vec<u64> },
322    Index { last_key: Vec<u8>, offset: u64 },
323}
324
325impl EncRecord<'_> {
326    fn block_type(&self) -> u8 {
327        match self {
328            EncRecord::Ref(..) => BLOCK_TYPE_REF,
329            EncRecord::Log(_) => BLOCK_TYPE_LOG,
330            EncRecord::Obj { .. } => BLOCK_TYPE_OBJ,
331            EncRecord::Index { .. } => BLOCK_TYPE_INDEX,
332        }
333    }
334
335    /// The record key (used for prefix compression and restart points).
336    fn key(&self) -> Vec<u8> {
337        match self {
338            EncRecord::Ref(r, _) => r.name.as_bytes().to_vec(),
339            EncRecord::Log(l) => {
340                let mut k = Vec::with_capacity(l.refname.len() + 9);
341                k.extend_from_slice(l.refname.as_bytes());
342                k.push(0);
343                let ts = u64::MAX - l.update_index;
344                k.extend_from_slice(&ts.to_be_bytes());
345                k
346            }
347            EncRecord::Obj { prefix, .. } => prefix.clone(),
348            EncRecord::Index { last_key, .. } => last_key.clone(),
349        }
350    }
351
352    /// The `extra` value-type bits stored in the key varint.
353    fn val_type(&self) -> u8 {
354        match self {
355            EncRecord::Ref(r, _) => match r.value {
356                RefValue::Deletion => VALUE_DELETION,
357                RefValue::Val1(_) => VALUE_ONE_OID,
358                RefValue::Val2(..) => VALUE_TWO_OID,
359                RefValue::Symref(_) => VALUE_SYMREF,
360            },
361            // grit only writes reflog updates (value_type 1), never the
362            // explicit-deletion form (value_type 0).
363            EncRecord::Log(_) => 1,
364            EncRecord::Obj { offsets, .. } => {
365                if !offsets.is_empty() && offsets.len() < 8 {
366                    offsets.len() as u8
367                } else {
368                    0
369                }
370            }
371            EncRecord::Index { .. } => 0,
372        }
373    }
374
375    /// Encode the value body (everything after the key).
376    fn encode_value(&self, opts: &WriteOptions, out: &mut Vec<u8>) {
377        match self {
378            EncRecord::Ref(r, update_index_delta) => {
379                put_varint(*update_index_delta, out);
380                match &r.value {
381                    RefValue::Deletion => {}
382                    RefValue::Val1(oid) => out.extend_from_slice(oid.as_bytes()),
383                    RefValue::Val2(oid, peeled) => {
384                        out.extend_from_slice(oid.as_bytes());
385                        out.extend_from_slice(peeled.as_bytes());
386                    }
387                    RefValue::Symref(target) => {
388                        put_varint(target.len() as u64, out);
389                        out.extend_from_slice(target.as_bytes());
390                    }
391                }
392            }
393            EncRecord::Log(l) => {
394                out.extend_from_slice(l.old_id.as_bytes());
395                out.extend_from_slice(l.new_id.as_bytes());
396                put_varint(l.name.len() as u64, out);
397                out.extend_from_slice(l.name.as_bytes());
398                put_varint(l.email.len() as u64, out);
399                out.extend_from_slice(l.email.as_bytes());
400                put_varint(l.time_seconds, out);
401                out.extend_from_slice(&l.tz_offset.to_be_bytes());
402                let msg = clean_log_message(&l.message, opts);
403                put_varint(msg.len() as u64, out);
404                out.extend_from_slice(&msg);
405            }
406            EncRecord::Obj { offsets, .. } => {
407                if offsets.is_empty() || offsets.len() >= 8 {
408                    put_varint(offsets.len() as u64, out);
409                }
410                if offsets.is_empty() {
411                    return;
412                }
413                put_varint(offsets[0], out);
414                let mut last = offsets[0];
415                for &o in &offsets[1..] {
416                    put_varint(o - last, out);
417                    last = o;
418                }
419            }
420            EncRecord::Index { offset, .. } => {
421                put_varint(*offset, out);
422            }
423        }
424    }
425}
426
427/// Clean a reflog message the way `reftable_writer_add_log` does (unless the
428/// writer is in `exact_log_message` mode, which grit never uses): strip
429/// trailing newlines and append exactly one.
430///
431/// Git applies this cleaning whenever the message field is non-NULL, including
432/// the empty string: `""` becomes `"\n"` (a single trailing newline), not an
433/// empty value. grit's `LogRecord` always carries a (possibly empty) `String`,
434/// so the cleaning always runs — matching git's `msglen == 1` for reflog entries
435/// written without an explicit message (e.g. `update-ref` with no `-m`,
436/// t0613 'restart interval at every single record').
437fn clean_log_message(msg: &str, opts: &WriteOptions) -> Vec<u8> {
438    // Git's reftable backend truncates the reflog message to `block_size / 2`
439    // bytes before storing it (reftable-backend.c: `xstrndup(u->msg,
440    // block_size / 2)`) so that an oversized message still fits inside a log
441    // block instead of failing the whole transaction with "entry too large"
442    // (t0610 'basic: can write large commit message'). Mirror that bound,
443    // clamping to a UTF-8 char boundary so the resulting string stays valid.
444    let limit = (opts.block_size as usize / 2).max(1);
445    let msg = if msg.len() > limit {
446        let mut end = limit;
447        while end > 0 && !msg.is_char_boundary(end) {
448            end -= 1;
449        }
450        &msg[..end]
451    } else {
452        msg
453    };
454    let trimmed = msg.trim_end_matches('\n');
455    let mut out = trimmed.as_bytes().to_vec();
456    out.push(b'\n');
457    out
458}
459
460/// Encode a key (prefix/suffix compression) into `out`, returning whether this
461/// was a restart point. Mirrors `reftable_encode_key`.
462fn encode_key(prev: &[u8], key: &[u8], extra: u8, out: &mut Vec<u8>) -> bool {
463    let prefix_len = common_prefix_len(prev, key);
464    let suffix_len = key.len() - prefix_len;
465    put_varint(prefix_len as u64, out);
466    put_varint(((suffix_len as u64) << 3) | (extra as u64), out);
467    out.extend_from_slice(&key[prefix_len..]);
468    prefix_len == 0
469}
470
471/// In-progress block being filled by the writer.
472struct BlockWriter {
473    typ: u8,
474    /// Bytes from `header_off` onwards (block type byte + 3 reserved length
475    /// bytes are at the start; record payload follows).
476    buf: Vec<u8>,
477    header_off: usize,
478    block_size: usize,
479    restart_interval: usize,
480    restarts: Vec<u32>,
481    last_key: Vec<u8>,
482    entries: usize,
483}
484
485impl BlockWriter {
486    fn new(typ: u8, block_size: usize, header_off: usize, restart_interval: usize) -> Self {
487        // buf is laid out starting at header_off: [type][len:3][records...]
488        let mut buf = vec![0u8; header_off + 4];
489        buf[header_off] = typ;
490        Self {
491            typ,
492            buf,
493            header_off,
494            block_size,
495            restart_interval,
496            restarts: Vec::new(),
497            last_key: Vec::new(),
498            entries: 0,
499        }
500    }
501
502    /// `w->next` equivalent: number of bytes written so far (within the block,
503    /// counting from offset 0 which includes header_off).
504    fn next(&self) -> usize {
505        self.buf.len()
506    }
507
508    /// Try to add a record. Returns Ok(true) if added, Ok(false) if it does not
509    /// fit (entry-too-big), or Err on other failure.
510    fn add(&mut self, rec: &EncRecord, opts: &WriteOptions) -> Result<bool> {
511        let key = rec.key();
512        if key.is_empty() {
513            return Err(Error::InvalidRef("reftable: empty record key".into()));
514        }
515        let restart = self.entries.is_multiple_of(self.restart_interval);
516        let prev: &[u8] = if restart { &[] } else { &self.last_key };
517
518        let mut encoded = Vec::new();
519        let is_restart = encode_key(prev, &key, rec.val_type(), &mut encoded);
520        rec.encode_value(opts, &mut encoded);
521        let n = encoded.len();
522
523        // register_restart overflow check: 2 + 3*rlen + n > block_size - next
524        let mut rlen = self.restarts.len();
525        let mut is_restart = is_restart;
526        if rlen >= MAX_RESTARTS {
527            is_restart = false;
528        }
529        if is_restart {
530            rlen += 1;
531        }
532        if self.block_size > 0 && 2 + 3 * rlen + n > self.block_size - self.next() {
533            return Ok(false);
534        }
535
536        if is_restart {
537            self.restarts.push(self.next() as u32);
538        }
539        self.buf.extend_from_slice(&encoded);
540        self.last_key = key;
541        self.entries += 1;
542        Ok(true)
543    }
544
545    /// Finalize the block in memory: append restart table + count, write the
546    /// 3-byte block length, and (for log blocks) compress. Returns the raw byte
547    /// length written (`raw_bytes`).
548    fn finish(&mut self) -> Result<usize> {
549        for &r in &self.restarts {
550            self.buf.push(((r >> 16) & 0xff) as u8);
551            self.buf.push(((r >> 8) & 0xff) as u8);
552            self.buf.push((r & 0xff) as u8);
553        }
554        let rc = self.restarts.len() as u16;
555        self.buf.push((rc >> 8) as u8);
556        self.buf.push((rc & 0xff) as u8);
557
558        // block length (uncompressed) goes into the 3 bytes after the type.
559        let block_len = self.buf.len();
560        self.buf[self.header_off + 1] = ((block_len >> 16) & 0xff) as u8;
561        self.buf[self.header_off + 2] = ((block_len >> 8) & 0xff) as u8;
562        self.buf[self.header_off + 3] = (block_len & 0xff) as u8;
563
564        if self.typ == BLOCK_TYPE_LOG {
565            use flate2::write::DeflateEncoder;
566            use flate2::Compression;
567            let skip = 4 + self.header_off;
568            let mut enc = DeflateEncoder::new(Vec::new(), Compression::new(9));
569            enc.write_all(&self.buf[skip..])
570                .map_err(|e| Error::Zlib(e.to_string()))?;
571            let compressed = enc.finish().map_err(|e| Error::Zlib(e.to_string()))?;
572            self.buf.truncate(skip);
573            self.buf.extend_from_slice(&compressed);
574        }
575        Ok(self.buf.len())
576    }
577}
578
579/// Per-section statistics accumulated while writing a reftable.
580#[derive(Default, Clone)]
581struct SectionStats {
582    blocks: usize,
583    index_blocks: usize,
584    offset: u64,
585    index_offset: u64,
586}
587
588/// An object-index entry collected while writing refs.
589struct ObjEntry {
590    hash: Vec<u8>,
591    offsets: Vec<u64>,
592}
593
594/// The full writer state needed to emit a reftable file.
595struct WriterState {
596    opts: WriteOptions,
597    min_update_index: u64,
598    max_update_index: u64,
599
600    out: Vec<u8>,
601    next: u64,
602    pending_padding: usize,
603
604    block: Option<BlockWriter>,
605    block_type: u8,
606
607    /// Index records for the current section (last_key, offset).
608    index: Vec<(Vec<u8>, u64)>,
609
610    /// Object-index tree (kept sorted by hash).
611    obj_entries: Vec<ObjEntry>,
612    object_id_len: usize,
613
614    ref_stats: SectionStats,
615    obj_stats: SectionStats,
616    log_stats: SectionStats,
617    idx_blocks_total: usize,
618}
619
620impl WriterState {
621    fn new(mut opts: WriteOptions, min: u64, max: u64) -> Self {
622        if opts.restart_interval == 0 {
623            opts.restart_interval = RESTART_INTERVAL;
624        }
625        if opts.block_size == 0 {
626            opts.block_size = REFTABLE_DEFAULT_BLOCK_SIZE;
627        }
628        Self {
629            opts,
630            min_update_index: min,
631            max_update_index: max,
632            out: Vec::new(),
633            next: 0,
634            pending_padding: 0,
635            block: None,
636            block_type: 0,
637            index: Vec::new(),
638            obj_entries: Vec::new(),
639            object_id_len: 0,
640            ref_stats: SectionStats::default(),
641            obj_stats: SectionStats::default(),
642            log_stats: SectionStats::default(),
643            idx_blocks_total: 0,
644        }
645    }
646
647    fn header_size(&self) -> usize {
648        // Version 1 (SHA-1) is 24 bytes; version 2 (SHA-256) adds a 4-byte
649        // hash-id field for 28.
650        if self.opts.hash_size == 32 {
651            28
652        } else {
653            24
654        }
655    }
656
657    fn write_header(&self, dest: &mut [u8]) {
658        dest[0..4].copy_from_slice(REFTABLE_MAGIC);
659        dest[4] = if self.opts.hash_size == 32 { 2 } else { 1 };
660        dest[5] = ((self.opts.block_size >> 16) & 0xff) as u8;
661        dest[6] = ((self.opts.block_size >> 8) & 0xff) as u8;
662        dest[7] = (self.opts.block_size & 0xff) as u8;
663        dest[8..16].copy_from_slice(&self.min_update_index.to_be_bytes());
664        dest[16..24].copy_from_slice(&self.max_update_index.to_be_bytes());
665        // Version 2 records the hash function id (`sha1` / `s256`, Git
666        // `GIT_SHA{1,256}_FORMAT_ID`).
667        if self.opts.hash_size == 32 {
668            dest[24..28].copy_from_slice(b"s256");
669        }
670    }
671
672    fn stats_mut(&mut self, typ: u8) -> &mut SectionStats {
673        match typ {
674            BLOCK_TYPE_REF => &mut self.ref_stats,
675            BLOCK_TYPE_OBJ => &mut self.obj_stats,
676            BLOCK_TYPE_LOG => &mut self.log_stats,
677            // index blocks roll into the section being indexed; not used here.
678            _ => &mut self.ref_stats,
679        }
680    }
681
682    /// Write `data` then queue `padding` zero bytes for the next write
683    /// (`padded_write`).
684    fn padded_write(&mut self, data: &[u8], padding: usize) {
685        if self.pending_padding > 0 {
686            self.out
687                .extend(std::iter::repeat_n(0u8, self.pending_padding));
688            self.pending_padding = 0;
689        }
690        self.pending_padding = padding;
691        self.out.extend_from_slice(data);
692    }
693
694    fn reinit_block(&mut self, typ: u8) {
695        let header_off = if self.next == 0 {
696            self.header_size()
697        } else {
698            0
699        };
700        self.block = Some(BlockWriter::new(
701            typ,
702            self.opts.block_size as usize,
703            header_off,
704            self.opts.restart_interval,
705        ));
706        self.block_type = typ;
707    }
708
709    fn add_record(&mut self, rec: &EncRecord) -> Result<()> {
710        let typ = rec.block_type();
711        if self.block.is_none() {
712            self.reinit_block(typ);
713        }
714        // Attempt to add.
715        let opts = self.opts.clone();
716        let fit = {
717            let bw = self
718                .block
719                .as_mut()
720                .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
721            bw.add(rec, &opts)?
722        };
723        if fit {
724            return Ok(());
725        }
726        // Block full: flush and retry in a fresh block.
727        self.flush_block()?;
728        self.reinit_block(typ);
729        let opts = self.opts.clone();
730        let bw = self
731            .block
732            .as_mut()
733            .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
734        if !bw.add(rec, &opts)? {
735            return Err(Error::InvalidRef(
736                "reftable: transaction failure: entry too large".into(),
737            ));
738        }
739        Ok(())
740    }
741
742    fn add_ref(&mut self, r: &RefRecord) -> Result<()> {
743        let delta = r.update_index.saturating_sub(self.min_update_index);
744        self.add_record(&EncRecord::Ref(r, delta))?;
745
746        if !self.opts.skip_index_objects {
747            match &r.value {
748                RefValue::Val1(oid) => self.index_hash(oid.as_bytes()),
749                RefValue::Val2(oid, peeled) => {
750                    self.index_hash(oid.as_bytes());
751                    self.index_hash(peeled.as_bytes());
752                }
753                _ => {}
754            }
755        }
756        Ok(())
757    }
758
759    fn add_log(&mut self, l: &LogRecord) -> Result<()> {
760        // Finishing the ref section happens before the first log record.
761        if matches!(&self.block, Some(b) if b.typ == BLOCK_TYPE_REF) {
762            self.finish_public_section()?;
763        }
764        // Drop pending padding before a log block (matches add_log_verbatim).
765        self.next -= self.pending_padding as u64;
766        self.pending_padding = 0;
767        self.add_record(&EncRecord::Log(l))
768    }
769
770    fn index_hash(&mut self, hash: &[u8]) {
771        let off = self.next;
772        match self
773            .obj_entries
774            .binary_search_by(|e| e.hash.as_slice().cmp(hash))
775        {
776            Ok(idx) => {
777                let e = &mut self.obj_entries[idx];
778                if e.offsets.last() != Some(&off) {
779                    e.offsets.push(off);
780                }
781            }
782            Err(idx) => {
783                self.obj_entries.insert(
784                    idx,
785                    ObjEntry {
786                        hash: hash.to_vec(),
787                        offsets: vec![off],
788                    },
789                );
790            }
791        }
792    }
793
794    /// `writer_flush_nonempty_block`.
795    fn flush_block(&mut self) -> Result<()> {
796        let Some(mut bw) = self.block.take() else {
797            return Ok(());
798        };
799        if bw.entries == 0 {
800            self.block = Some(bw);
801            return Ok(());
802        }
803        let typ = bw.typ;
804        let raw_bytes = bw.finish()?;
805
806        let mut padding = 0;
807        if !self.opts.unpadded && typ != BLOCK_TYPE_LOG {
808            padding = (self.opts.block_size as usize).saturating_sub(raw_bytes);
809        }
810
811        let block_typ_off = if self.stats_mut(typ).blocks == 0 {
812            self.next
813        } else {
814            0
815        };
816        {
817            let next = self.next;
818            let st = self.stats_mut(typ);
819            if block_typ_off > 0 {
820                st.offset = next;
821            }
822            st.blocks += 1;
823        }
824
825        if self.next == 0 {
826            // Write the reftable header into the front of the first block.
827            let hs = self.header_size();
828            self.write_header_into_block(&mut bw, hs);
829        }
830
831        let data = bw.buf.clone();
832        self.padded_write(&data, padding);
833
834        // Record an index entry for this block.
835        self.index.push((bw.last_key.clone(), self.next));
836
837        self.next += (padding + raw_bytes) as u64;
838        self.block = None;
839        Ok(())
840    }
841
842    fn write_header_into_block(&self, bw: &mut BlockWriter, hs: usize) {
843        let mut hdr = vec![0u8; hs];
844        self.write_header(&mut hdr);
845        bw.buf[..hs].copy_from_slice(&hdr);
846    }
847
848    fn flush_block_if_nonempty(&mut self) -> Result<()> {
849        if matches!(&self.block, Some(b) if b.entries == 0) {
850            return Ok(());
851        }
852        self.flush_block()
853    }
854
855    /// `writer_finish_section`: flush the current block then emit any index.
856    fn finish_section(&mut self) -> Result<()> {
857        let typ = self.block_type;
858        let threshold = if self.opts.unpadded { 1 } else { 3 };
859        let before_blocks = self.idx_blocks_total;
860
861        self.flush_block_if_nonempty()?;
862
863        let mut max_level = 0;
864        let mut index_start = 0u64;
865
866        while self.index.len() > threshold {
867            max_level += 1;
868            index_start = self.next;
869            self.reinit_block(BLOCK_TYPE_INDEX);
870
871            let idx = std::mem::take(&mut self.index);
872            for (last_key, offset) in &idx {
873                self.add_record(&EncRecord::Index {
874                    last_key: last_key.clone(),
875                    offset: *offset,
876                })?;
877            }
878            // Count index blocks produced during this level.
879            let blocks_before = self.count_index_blocks_marker();
880            self.flush_index_block()?;
881            let _ = blocks_before;
882        }
883
884        self.index.clear();
885
886        let index_blocks = self.idx_blocks_total - before_blocks;
887        {
888            let st = self.stats_mut(typ);
889            st.index_blocks = index_blocks;
890            st.index_offset = index_start;
891        }
892        let _ = max_level;
893        Ok(())
894    }
895
896    fn count_index_blocks_marker(&self) -> usize {
897        self.idx_blocks_total
898    }
899
900    /// Flush an index block: like `flush_block` but the produced block counts
901    /// toward `idx_blocks_total` and re-populates `self.index` for the next
902    /// (higher) level.
903    fn flush_index_block(&mut self) -> Result<()> {
904        let Some(mut bw) = self.block.take() else {
905            return Ok(());
906        };
907        if bw.entries == 0 {
908            self.block = Some(bw);
909            return Ok(());
910        }
911        let raw_bytes = bw.finish()?;
912        let mut padding = 0;
913        if !self.opts.unpadded {
914            padding = (self.opts.block_size as usize).saturating_sub(raw_bytes);
915        }
916        if self.next == 0 {
917            let hs = self.header_size();
918            self.write_header_into_block(&mut bw, hs);
919        }
920        let data = bw.buf.clone();
921        self.padded_write(&data, padding);
922        self.index.push((bw.last_key.clone(), self.next));
923        self.next += (padding + raw_bytes) as u64;
924        self.idx_blocks_total += 1;
925        self.block = None;
926        Ok(())
927    }
928
929    /// `writer_dump_object_index`.
930    fn dump_object_index(&mut self) -> Result<()> {
931        // object_id_len = max common prefix among sorted hashes + 1, min 2.
932        let mut max_common = 1usize;
933        for w in self.obj_entries.windows(2) {
934            let n = common_prefix_len(&w[0].hash, &w[1].hash);
935            if n > max_common {
936                max_common = n;
937            }
938        }
939        self.object_id_len = max_common + 1;
940        let id_len = self.object_id_len;
941
942        self.reinit_block(BLOCK_TYPE_OBJ);
943        let entries = std::mem::take(&mut self.obj_entries);
944        for e in &entries {
945            let prefix = e.hash[..id_len.min(e.hash.len())].to_vec();
946            self.add_obj_record(prefix, &e.offsets)?;
947        }
948        self.obj_entries = entries;
949        self.finish_section()
950    }
951
952    fn add_obj_record(&mut self, prefix: Vec<u8>, offsets: &[u64]) -> Result<()> {
953        // Try with full offsets; on overflow in a fresh block, drop offsets.
954        let typ = BLOCK_TYPE_OBJ;
955        if self.block.is_none() {
956            self.reinit_block(typ);
957        }
958        let opts = self.opts.clone();
959        let rec = EncRecord::Obj {
960            prefix: prefix.clone(),
961            offsets: offsets.to_vec(),
962        };
963        let fit = {
964            let bw = self
965                .block
966                .as_mut()
967                .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
968            bw.add(&rec, &opts)?
969        };
970        if fit {
971            return Ok(());
972        }
973        self.flush_block()?;
974        self.reinit_block(typ);
975        let opts = self.opts.clone();
976        let fit = {
977            let bw = self
978                .block
979                .as_mut()
980                .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
981            bw.add(&rec, &opts)?
982        };
983        if fit {
984            return Ok(());
985        }
986        // Drop offsets entirely.
987        let rec = EncRecord::Obj {
988            prefix,
989            offsets: Vec::new(),
990        };
991        let opts = self.opts.clone();
992        let bw = self
993            .block
994            .as_mut()
995            .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
996        bw.add(&rec, &opts)?;
997        Ok(())
998    }
999
1000    /// `writer_finish_public_section`.
1001    fn finish_public_section(&mut self) -> Result<()> {
1002        let Some(bw) = &self.block else {
1003            return Ok(());
1004        };
1005        let typ = bw.typ;
1006        self.finish_section()?;
1007        if typ == BLOCK_TYPE_REF && !self.opts.skip_index_objects && self.ref_stats.index_blocks > 0
1008        {
1009            self.dump_object_index()?;
1010        }
1011        self.obj_entries.clear();
1012        self.block = None;
1013        self.block_type = 0;
1014        Ok(())
1015    }
1016
1017    /// `reftable_writer_close`.
1018    fn close(mut self) -> Result<Vec<u8>> {
1019        self.finish_public_section()?;
1020        let empty_table = self.next == 0;
1021        self.pending_padding = 0;
1022
1023        if empty_table {
1024            let hs = self.header_size();
1025            let mut header = vec![0u8; hs];
1026            self.write_header(&mut header);
1027            self.padded_write(&header, 0);
1028        }
1029
1030        let mut footer = vec![0u8; self.header_size()];
1031        self.write_header(&mut footer);
1032        footer.extend_from_slice(&self.ref_stats.index_offset.to_be_bytes());
1033        let obj_field = (self.obj_stats.offset << 5) | (self.object_id_len as u64);
1034        footer.extend_from_slice(&obj_field.to_be_bytes());
1035        footer.extend_from_slice(&self.obj_stats.index_offset.to_be_bytes());
1036        footer.extend_from_slice(&self.log_stats.offset.to_be_bytes());
1037        footer.extend_from_slice(&self.log_stats.index_offset.to_be_bytes());
1038        let crc = crc32(&footer);
1039        footer.extend_from_slice(&crc.to_be_bytes());
1040
1041        // Footer write drops pending padding (flush() before padded_write).
1042        self.pending_padding = 0;
1043        self.out.extend_from_slice(&footer);
1044
1045        Ok(self.out)
1046    }
1047}
1048
1049// ---------------------------------------------------------------------------
1050// Reader
1051// ---------------------------------------------------------------------------
1052
1053/// Reads a single reftable file from a byte buffer.
1054pub struct ReftableReader {
1055    data: Vec<u8>,
1056    version: u8,
1057    block_size: u32,
1058    min_update_index: u64,
1059    max_update_index: u64,
1060    ref_index_position: u64,
1061    log_position: u64,
1062}
1063
1064/// Parsed footer fields.
1065#[derive(Debug)]
1066#[allow(dead_code)]
1067struct Footer {
1068    version: u8,
1069    block_size: u32,
1070    min_update_index: u64,
1071    max_update_index: u64,
1072    ref_index_position: u64,
1073    obj_position_and_id_len: u64,
1074    obj_index_position: u64,
1075    log_position: u64,
1076    log_index_position: u64,
1077}
1078
1079impl ReftableReader {
1080    /// Open a reftable from bytes.
1081    pub fn new(data: Vec<u8>) -> Result<Self> {
1082        if data.len() < HEADER_SIZE + FOOTER_V1_SIZE {
1083            // Could be an empty table (header + footer only = 24 + 68 = 92)
1084            if data.len() < HEADER_SIZE {
1085                return Err(Error::InvalidRef("reftable: file too small".into()));
1086            }
1087        }
1088
1089        // Parse header
1090        if &data[0..4] != REFTABLE_MAGIC {
1091            return Err(Error::InvalidRef("reftable: bad magic".into()));
1092        }
1093        let version = data[4];
1094        if version != 1 && version != 2 {
1095            return Err(Error::InvalidRef(format!(
1096                "reftable: unsupported version {version}"
1097            )));
1098        }
1099        let _block_size = ((data[5] as u32) << 16) | ((data[6] as u32) << 8) | (data[7] as u32);
1100        let _min_update_index = u64::from_be_bytes(
1101            data[8..16]
1102                .try_into()
1103                .map_err(|_| Error::InvalidRef("reftable: truncated header".into()))?,
1104        );
1105        let _max_update_index = u64::from_be_bytes(
1106            data[16..24]
1107                .try_into()
1108                .map_err(|_| Error::InvalidRef("reftable: truncated header".into()))?,
1109        );
1110
1111        // Parse footer
1112        let footer_size = if version == 2 { 72 } else { FOOTER_V1_SIZE };
1113        if data.len() < footer_size {
1114            return Err(Error::InvalidRef(
1115                "reftable: file too small for footer".into(),
1116            ));
1117        }
1118        let footer_start = data.len() - footer_size;
1119        let footer = parse_footer(&data[footer_start..], version)?;
1120
1121        Ok(Self {
1122            data,
1123            version,
1124            block_size: footer.block_size,
1125            min_update_index: footer.min_update_index,
1126            max_update_index: footer.max_update_index,
1127            ref_index_position: footer.ref_index_position,
1128            log_position: footer.log_position,
1129        })
1130    }
1131
1132    /// Object-id width implied by the reftable version (32 for version 2 / SHA-256,
1133    /// 20 otherwise).
1134    fn hash_size(&self) -> usize {
1135        if self.version == 2 {
1136            32
1137        } else {
1138            20
1139        }
1140    }
1141
1142    /// File-header size implied by the reftable version (28 for version 2, 24 otherwise).
1143    fn header_len(&self) -> usize {
1144        if self.version == 2 {
1145            28
1146        } else {
1147            HEADER_SIZE
1148        }
1149    }
1150
1151    /// Read all ref records from the table.
1152    pub fn read_refs(&self) -> Result<Vec<RefRecord>> {
1153        let mut refs = Vec::new();
1154        let footer_size = if self.version == 2 {
1155            72
1156        } else {
1157            FOOTER_V1_SIZE
1158        };
1159        let file_end = self.data.len() - footer_size;
1160
1161        // Determine where ref blocks end
1162        let ref_end = if self.ref_index_position > 0 {
1163            self.ref_index_position as usize
1164        } else if self.log_position > 0 {
1165            self.log_position as usize
1166        } else {
1167            file_end
1168        };
1169
1170        let mut pos = 0usize;
1171        // Skip the file header — the first ref block shares the header's physical
1172        // block, starting at the header size (24 for v1, 28 for v2).
1173        let header_len = self.header_len();
1174        if pos < header_len {
1175            pos = header_len;
1176        }
1177
1178        while pos < ref_end {
1179            if pos >= self.data.len() {
1180                break;
1181            }
1182            let block_type = self.data[pos];
1183            if block_type == 0 {
1184                // Padding — skip to next block boundary
1185                if self.block_size > 0 {
1186                    let bs = self.block_size as usize;
1187                    pos = ((pos / bs) + 1) * bs;
1188                    continue;
1189                } else {
1190                    break;
1191                }
1192            }
1193            if block_type != BLOCK_TYPE_REF {
1194                break;
1195            }
1196
1197            let block_len = read_u24(&self.data, pos + 1);
1198            // Determine the data range for this block
1199            let block_data_start = pos + 4; // after type(1) + len(3)
1200
1201            // The first block's block_len includes the file header.
1202            let is_first = pos == header_len;
1203            let records_end = if is_first {
1204                // block_len is from file start
1205                block_len
1206            } else {
1207                pos + block_len
1208            };
1209
1210            if records_end > ref_end {
1211                break;
1212            }
1213
1214            // Read restart count (last 2 bytes before padding)
1215            let rc = read_u16(&self.data, records_end - 2);
1216            // Restart table is rc * 3 bytes before the restart_count
1217            let restart_table_start = records_end - 2 - (rc * 3);
1218
1219            // Read records from block_data_start to restart_table_start
1220            let mut rpos = block_data_start;
1221            let mut prev_name = Vec::<u8>::new();
1222
1223            while rpos < restart_table_start {
1224                let (rec, new_pos) = decode_ref_record(
1225                    &self.data,
1226                    rpos,
1227                    &prev_name,
1228                    self.min_update_index,
1229                    self.hash_size(),
1230                )?;
1231                prev_name = rec.name.as_bytes().to_vec();
1232                refs.push(rec);
1233                rpos = new_pos;
1234            }
1235
1236            // Advance to next block
1237            if self.block_size > 0 {
1238                let bs = self.block_size as usize;
1239                if is_first {
1240                    pos = bs;
1241                } else {
1242                    pos += bs;
1243                }
1244            } else {
1245                pos = records_end;
1246            }
1247        }
1248
1249        Ok(refs)
1250    }
1251
1252    /// Look up a single ref by name.
1253    pub fn lookup_ref(&self, name: &str) -> Result<Option<RefRecord>> {
1254        // Simple: scan all refs. For large files the index would speed this up.
1255        let refs = self.read_refs()?;
1256        Ok(refs.into_iter().find(|r| r.name == name))
1257    }
1258
1259    /// Read all log records from the table.
1260    pub fn read_logs(&self) -> Result<Vec<LogRecord>> {
1261        let footer_size = if self.version == 2 {
1262            72
1263        } else {
1264            FOOTER_V1_SIZE
1265        };
1266        let file_end = self.data.len() - footer_size;
1267
1268        // Determine where the log section starts. Git records the log offset in
1269        // the footer, but when the log block is the *first* block in the file it
1270        // shares its physical block with the 24-byte reftable header and the
1271        // recorded offset is left at 0 (see `writer_flush_nonempty_block`'s
1272        // `block_typ_off = (blocks == 0) ? next : 0`). The reader detects this
1273        // by checking whether the first on-disk block (the byte right after the
1274        // header) is a log block — mirroring `is_present` in git's table.c.
1275        let mut pos = if self.log_position > 0 {
1276            self.log_position as usize
1277        } else if self.data.len() > self.header_len() && self.data[self.header_len()] == BLOCK_TYPE_LOG {
1278            // Log block is the first block; it begins right after the header.
1279            self.header_len()
1280        } else {
1281            return Ok(Vec::new());
1282        };
1283        let mut logs = Vec::new();
1284
1285        while pos < file_end {
1286            if pos >= self.data.len() {
1287                break;
1288            }
1289            let block_type = self.data[pos];
1290            if block_type != BLOCK_TYPE_LOG {
1291                break;
1292            }
1293            // When the log block shares its physical block with the reftable
1294            // header, the 3-byte block length counts from offset 0 and so
1295            // includes the header bytes; the compressed payload still starts
1296            // right after the type+length header at `pos + 4`.
1297            let is_first = pos == self.header_len() && self.log_position == 0;
1298            let block_len = read_u24(&self.data, pos + 1);
1299            let compressed_start = pos + 4;
1300
1301            // The inflated size is block_len minus the 4-byte type+length header
1302            // (and, for the first block, minus the embedded reftable header).
1303            let header_prefix = if is_first { self.header_len() } else { 0 };
1304            let inflated_size = block_len.saturating_sub(4 + header_prefix);
1305
1306            // Decompress
1307            use flate2::read::DeflateDecoder;
1308            let remaining = &self.data[compressed_start..file_end];
1309            let mut decoder = DeflateDecoder::new(remaining);
1310            let mut inflated = vec![0u8; inflated_size];
1311            decoder
1312                .read_exact(&mut inflated)
1313                .map_err(|e| Error::Zlib(e.to_string()))?;
1314
1315            // How many compressed bytes were consumed?
1316            let consumed = decoder.total_in() as usize;
1317
1318            // Parse log records from inflated data
1319            // Read restart_count from end
1320            if inflated.len() < 2 {
1321                break;
1322            }
1323            let rc = read_u16(&inflated, inflated.len() - 2);
1324            let restart_table_start = inflated.len() - 2 - (rc * 3);
1325
1326            let mut rpos = 0usize;
1327            let mut prev_key = Vec::<u8>::new();
1328
1329            while rpos < restart_table_start {
1330                let (log, new_pos) = decode_log_record(&inflated, rpos, &prev_key, self.hash_size())?;
1331                // Reconstruct key for prefix compression
1332                let mut key = Vec::new();
1333                key.extend_from_slice(log.refname.as_bytes());
1334                key.push(0);
1335                key.extend_from_slice(&(0xffffffffffffffffu64 - log.update_index).to_be_bytes());
1336                prev_key = key;
1337                logs.push(log);
1338                rpos = new_pos;
1339            }
1340
1341            pos = compressed_start + consumed;
1342        }
1343
1344        Ok(logs)
1345    }
1346
1347    /// Get the block size from the header.
1348    pub fn block_size(&self) -> u32 {
1349        self.block_size
1350    }
1351
1352    /// Get the min update index.
1353    pub fn min_update_index(&self) -> u64 {
1354        self.min_update_index
1355    }
1356
1357    /// Get the max update index.
1358    pub fn max_update_index(&self) -> u64 {
1359        self.max_update_index
1360    }
1361}
1362
1363// ---------------------------------------------------------------------------
1364// Record decoding helpers
1365// ---------------------------------------------------------------------------
1366
1367fn decode_ref_record(
1368    data: &[u8],
1369    pos: usize,
1370    prev_name: &[u8],
1371    min_update_index: u64,
1372    hash_size: usize,
1373) -> Result<(RefRecord, usize)> {
1374    let (prefix_len, p) = get_varint(data, pos)?;
1375    let (suffix_and_type, mut p) = get_varint(data, p)?;
1376    let suffix_len = (suffix_and_type >> 3) as usize;
1377    let value_type = (suffix_and_type & 0x7) as u8;
1378
1379    // Reconstruct name
1380    let mut name = Vec::with_capacity(prefix_len as usize + suffix_len);
1381    if prefix_len > 0 {
1382        if (prefix_len as usize) > prev_name.len() {
1383            return Err(Error::InvalidRef(
1384                "reftable: prefix_len exceeds prev name".into(),
1385            ));
1386        }
1387        name.extend_from_slice(&prev_name[..prefix_len as usize]);
1388    }
1389    if p + suffix_len > data.len() {
1390        return Err(Error::InvalidRef("reftable: suffix overflows block".into()));
1391    }
1392    name.extend_from_slice(&data[p..p + suffix_len]);
1393    p += suffix_len;
1394
1395    let name_str = String::from_utf8(name)
1396        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in ref name".into()))?;
1397
1398    let (update_index_delta, mut p) = get_varint(data, p)?;
1399    let update_index = min_update_index + update_index_delta;
1400
1401    let value = match value_type {
1402        VALUE_DELETION => RefValue::Deletion,
1403        VALUE_ONE_OID => {
1404            if p + hash_size > data.len() {
1405                return Err(Error::InvalidRef("reftable: truncated OID".into()));
1406            }
1407            let oid = ObjectId::from_bytes(&data[p..p + hash_size])?;
1408            p += hash_size;
1409            RefValue::Val1(oid)
1410        }
1411        VALUE_TWO_OID => {
1412            if p + 2 * hash_size > data.len() {
1413                return Err(Error::InvalidRef("reftable: truncated OID pair".into()));
1414            }
1415            let oid = ObjectId::from_bytes(&data[p..p + hash_size])?;
1416            p += hash_size;
1417            let peeled = ObjectId::from_bytes(&data[p..p + hash_size])?;
1418            p += hash_size;
1419            RefValue::Val2(oid, peeled)
1420        }
1421        VALUE_SYMREF => {
1422            let (target_len, p2) = get_varint(data, p)?;
1423            p = p2;
1424            let target_len = target_len as usize;
1425            if p + target_len > data.len() {
1426                return Err(Error::InvalidRef(
1427                    "reftable: truncated symref target".into(),
1428                ));
1429            }
1430            let target = String::from_utf8(data[p..p + target_len].to_vec())
1431                .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in symref".into()))?;
1432            p += target_len;
1433            RefValue::Symref(target)
1434        }
1435        _ => {
1436            return Err(Error::InvalidRef(format!(
1437                "reftable: unknown value_type {value_type}"
1438            )));
1439        }
1440    };
1441
1442    Ok((
1443        RefRecord {
1444            name: name_str,
1445            update_index,
1446            value,
1447        },
1448        p,
1449    ))
1450}
1451
1452fn decode_log_record(
1453    data: &[u8],
1454    pos: usize,
1455    prev_key: &[u8],
1456    hash_size: usize,
1457) -> Result<(LogRecord, usize)> {
1458    let (prefix_len, p) = get_varint(data, pos)?;
1459    let (suffix_and_type, mut p) = get_varint(data, p)?;
1460    let suffix_len = (suffix_and_type >> 3) as usize;
1461    let log_type = (suffix_and_type & 0x7) as u8;
1462
1463    // Reconstruct key
1464    let mut key = Vec::with_capacity(prefix_len as usize + suffix_len);
1465    if prefix_len > 0 {
1466        if (prefix_len as usize) > prev_key.len() {
1467            return Err(Error::InvalidRef(
1468                "reftable: log prefix_len exceeds prev key".into(),
1469            ));
1470        }
1471        key.extend_from_slice(&prev_key[..prefix_len as usize]);
1472    }
1473    if p + suffix_len > data.len() {
1474        return Err(Error::InvalidRef("reftable: log suffix overflows".into()));
1475    }
1476    key.extend_from_slice(&data[p..p + suffix_len]);
1477    p += suffix_len;
1478
1479    // Parse key: refname \0 reverse_int64(update_index)
1480    let null_pos = key
1481        .iter()
1482        .position(|&b| b == 0)
1483        .ok_or_else(|| Error::InvalidRef("reftable: log key missing null separator".into()))?;
1484    let refname = String::from_utf8(key[..null_pos].to_vec())
1485        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log refname".into()))?;
1486    if null_pos + 9 > key.len() {
1487        return Err(Error::InvalidRef("reftable: log key too short".into()));
1488    }
1489    let reversed_idx = u64::from_be_bytes(
1490        key[null_pos + 1..null_pos + 9]
1491            .try_into()
1492            .map_err(|_| Error::InvalidRef("reftable: log key too short".into()))?,
1493    );
1494    let update_index = 0xffffffffffffffffu64 - reversed_idx;
1495
1496    if log_type == 0 {
1497        // Deletion
1498        let zero_oid = ObjectId::from_bytes(&vec![0u8; hash_size])?;
1499        return Ok((
1500            LogRecord {
1501                refname,
1502                update_index,
1503                old_id: zero_oid,
1504                new_id: zero_oid,
1505                name: String::new(),
1506                email: String::new(),
1507                time_seconds: 0,
1508                tz_offset: 0,
1509                message: String::new(),
1510            },
1511            p,
1512        ));
1513    }
1514
1515    // log_type == 1: standard log data
1516    if p + 2 * hash_size > data.len() {
1517        return Err(Error::InvalidRef("reftable: truncated log OIDs".into()));
1518    }
1519    let old_id = ObjectId::from_bytes(&data[p..p + hash_size])?;
1520    p += hash_size;
1521    let new_id = ObjectId::from_bytes(&data[p..p + hash_size])?;
1522    p += hash_size;
1523
1524    let (name_len, p2) = get_varint(data, p)?;
1525    p = p2;
1526    let name_len = name_len as usize;
1527    if p + name_len > data.len() {
1528        return Err(Error::InvalidRef("reftable: truncated log name".into()));
1529    }
1530    let name = String::from_utf8(data[p..p + name_len].to_vec())
1531        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log name".into()))?;
1532    p += name_len;
1533
1534    let (email_len, p2) = get_varint(data, p)?;
1535    p = p2;
1536    let email_len = email_len as usize;
1537    if p + email_len > data.len() {
1538        return Err(Error::InvalidRef("reftable: truncated log email".into()));
1539    }
1540    let email = String::from_utf8(data[p..p + email_len].to_vec())
1541        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log email".into()))?;
1542    p += email_len;
1543
1544    let (time_seconds, p2) = get_varint(data, p)?;
1545    p = p2;
1546
1547    if p + 2 > data.len() {
1548        return Err(Error::InvalidRef("reftable: truncated tz_offset".into()));
1549    }
1550    let tz_offset = i16::from_be_bytes([data[p], data[p + 1]]);
1551    p += 2;
1552
1553    let (msg_len, p2) = get_varint(data, p)?;
1554    p = p2;
1555    let msg_len = msg_len as usize;
1556    if p + msg_len > data.len() {
1557        return Err(Error::InvalidRef("reftable: truncated log message".into()));
1558    }
1559    let message = String::from_utf8(data[p..p + msg_len].to_vec())
1560        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log message".into()))?;
1561    p += msg_len;
1562
1563    Ok((
1564        LogRecord {
1565            refname,
1566            update_index,
1567            old_id,
1568            new_id,
1569            name,
1570            email,
1571            time_seconds,
1572            tz_offset,
1573            message,
1574        },
1575        p,
1576    ))
1577}
1578
1579// ---------------------------------------------------------------------------
1580// Stack management
1581// ---------------------------------------------------------------------------
1582
1583/// Widen a null OID to `hash_size` bytes so every OID written into a reftable
1584/// shares the table's hash width. Non-null OIDs (real objects) are already at the
1585/// repository width and are returned unchanged.
1586fn widen_oid_to(oid: ObjectId, hash_size: usize) -> ObjectId {
1587    if oid.is_zero() && oid.as_bytes().len() != hash_size {
1588        ObjectId::from_bytes(&vec![0u8; hash_size]).unwrap_or(oid)
1589    } else {
1590        oid
1591    }
1592}
1593
1594/// Object-id width for reftables in the repository owning `git_dir`: 32 bytes
1595/// (reftable version 2) when `extensions.objectformat=sha256`, else 20 (version 1).
1596fn reftable_hash_size_for_git_dir(git_dir: &Path) -> usize {
1597    let cfg = crate::config::ConfigSet::load(Some(git_dir), true).unwrap_or_default();
1598    match cfg
1599        .get("extensions.objectformat")
1600        .and_then(|v| crate::objects::HashAlgo::from_name(&v))
1601    {
1602        Some(crate::objects::HashAlgo::Sha256) => 32,
1603        _ => 20,
1604    }
1605}
1606
1607/// Manages the `$GIT_DIR/reftable/` directory and `tables.list` stack.
1608///
1609/// The stack provides a merged view of all tables, with later tables
1610/// taking precedence over earlier ones.
1611pub struct ReftableStack {
1612    /// Path to the `reftable/` directory.
1613    reftable_dir: PathBuf,
1614    /// Ordered list of table file names (oldest first).
1615    table_names: Vec<String>,
1616}
1617
1618/// RAII guard for `tables.list.lock`. Removes the lock file on drop unless it was
1619/// consumed (renamed onto `tables.list`) via [`disarm`].
1620struct TablesListLock {
1621    path: PathBuf,
1622    armed: std::cell::Cell<bool>,
1623}
1624
1625impl TablesListLock {
1626    fn new(path: PathBuf) -> Self {
1627        Self {
1628            path,
1629            armed: std::cell::Cell::new(true),
1630        }
1631    }
1632
1633    /// Mark the lock as consumed so its `Drop` does not remove the path (it has
1634    /// been renamed onto `tables.list`).
1635    fn disarm(&self) {
1636        self.armed.set(false);
1637    }
1638}
1639
1640impl Drop for TablesListLock {
1641    fn drop(&mut self) {
1642        if self.armed.get() {
1643            let _ = fs::remove_file(&self.path);
1644        }
1645    }
1646}
1647
1648impl ReftableStack {
1649    /// Object-id width (20 or 32) for reftables written into this stack, from the
1650    /// owning repository's `extensions.objectformat`.
1651    fn hash_size(&self) -> usize {
1652        match self.reftable_dir.parent() {
1653            Some(git_dir) => reftable_hash_size_for_git_dir(git_dir),
1654            None => 20,
1655        }
1656    }
1657
1658    /// Open an existing reftable stack.
1659    pub fn open(git_dir: &Path) -> Result<Self> {
1660        let reftable_dir = git_dir.join("reftable");
1661        let tables_list = reftable_dir.join("tables.list");
1662        let content = fs::read_to_string(&tables_list).map_err(Error::Io)?;
1663        let table_names: Vec<String> = content
1664            .lines()
1665            .filter(|l| !l.is_empty())
1666            .map(|l| l.to_owned())
1667            .collect();
1668        Ok(Self {
1669            reftable_dir,
1670            table_names,
1671        })
1672    }
1673
1674    /// Inject the HEAD symbolic ref into the ref set being compacted, mirroring
1675    /// git's reftable layout where HEAD lives inside the table.
1676    ///
1677    /// Returns a HEAD reflog record to add to the log section if the target
1678    /// branch has a most-recent reflog entry (so HEAD@{0} mirrors it).
1679    fn inject_head_ref(&self, refs: &mut Vec<RefRecord>, min_idx: u64) -> Option<LogRecord> {
1680        let git_dir = self.reftable_dir.parent()?;
1681        let head_path = git_dir.join("HEAD");
1682        let content = fs::read_to_string(&head_path).ok()?;
1683        let target = content.strip_prefix("ref: ")?.trim();
1684        if target.is_empty() || target == "refs/heads/.invalid" {
1685            return None;
1686        }
1687        // Only inject HEAD if it is not already present.
1688        if refs.iter().any(|r| r.name == "HEAD") {
1689            return None;
1690        }
1691        // HEAD takes the smallest update index (git assigns it the first one).
1692        refs.push(RefRecord {
1693            name: "HEAD".to_owned(),
1694            update_index: min_idx,
1695            value: RefValue::Symref(target.to_owned()),
1696        });
1697        refs.sort_by(|a, b| a.name.cmp(&b.name));
1698
1699        // HEAD reflog entries are already written separately by the commit /
1700        // update-ref paths (`append_reflog("HEAD", …)`). Only synthesize a
1701        // mirror of the branch's newest entry when HEAD has no reflog of its
1702        // own — otherwise compaction would duplicate HEAD@{0} (yielding an
1703        // extra log record and an oversized log block, t0613 'default write
1704        // options').
1705        if self
1706            .read_logs_for_ref("HEAD")
1707            .map(|logs| !logs.is_empty())
1708            .unwrap_or(false)
1709        {
1710            return None;
1711        }
1712
1713        // Mirror the target branch's newest reflog entry as HEAD@{0}.
1714        let target_logs = self.read_logs_for_ref(target).ok()?;
1715        let newest = target_logs.into_iter().next()?;
1716        Some(LogRecord {
1717            refname: "HEAD".to_owned(),
1718            update_index: newest.update_index,
1719            old_id: newest.old_id,
1720            new_id: newest.new_id,
1721            name: newest.name,
1722            email: newest.email,
1723            time_seconds: newest.time_seconds,
1724            tz_offset: newest.tz_offset,
1725            message: newest.message,
1726        })
1727    }
1728
1729    /// Read the configured reftable write options from this repo's config.
1730    fn write_options(&self) -> WriteOptions {
1731        let git_dir = self
1732            .reftable_dir
1733            .parent()
1734            .map(|p| p.to_path_buf())
1735            .unwrap_or_else(|| self.reftable_dir.clone());
1736        read_write_options(&git_dir)
1737    }
1738
1739    /// Read a merged view of all ref records.
1740    ///
1741    /// Later tables override earlier ones. Deletion records cause the
1742    /// ref to be omitted from the result.
1743    pub fn read_refs(&self) -> Result<Vec<RefRecord>> {
1744        let mut merged: BTreeMap<String, RefRecord> = BTreeMap::new();
1745
1746        for name in &self.table_names {
1747            let path = self.reftable_dir.join(name);
1748            let data = match fs::read(&path) {
1749                Ok(data) => data,
1750                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
1751                Err(err) => return Err(Error::Io(err)),
1752            };
1753            let reader = ReftableReader::new(data)?;
1754            for rec in reader.read_refs()? {
1755                match &rec.value {
1756                    RefValue::Deletion => {
1757                        merged.remove(&rec.name);
1758                    }
1759                    _ => {
1760                        merged.insert(rec.name.clone(), rec);
1761                    }
1762                }
1763            }
1764        }
1765
1766        Ok(merged.into_values().collect())
1767    }
1768
1769    /// Look up a single ref across all tables (most recent wins).
1770    pub fn lookup_ref(&self, name: &str) -> Result<Option<RefRecord>> {
1771        // Search tables in reverse (newest first)
1772        for table_name in self.table_names.iter().rev() {
1773            let path = self.reftable_dir.join(table_name);
1774            let data = match fs::read(&path) {
1775                Ok(data) => data,
1776                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
1777                Err(err) => return Err(Error::Io(err)),
1778            };
1779            let reader = ReftableReader::new(data)?;
1780            if let Some(rec) = reader.lookup_ref(name)? {
1781                return match rec.value {
1782                    RefValue::Deletion => Ok(None),
1783                    _ => Ok(Some(rec)),
1784                };
1785            }
1786        }
1787        Ok(None)
1788    }
1789
1790    /// Read merged log records for a specific ref.
1791    pub fn read_logs_for_ref(&self, refname: &str) -> Result<Vec<LogRecord>> {
1792        let mut logs = Vec::new();
1793        for table_name in &self.table_names {
1794            let path = self.reftable_dir.join(table_name);
1795            let data = fs::read(&path).map_err(Error::Io)?;
1796            let reader = ReftableReader::new(data)?;
1797            for log in reader.read_logs()? {
1798                if log.refname == refname {
1799                    logs.push(log);
1800                }
1801            }
1802        }
1803        // Sort by update_index descending (most recent first)
1804        logs.sort_by(|a, b| b.update_index.cmp(&a.update_index));
1805        Ok(logs)
1806    }
1807
1808    /// Replace all log records for one ref and compact the stack.
1809    pub fn replace_logs_for_ref(
1810        &mut self,
1811        refname: &str,
1812        entries: &[crate::reflog::ReflogEntry],
1813    ) -> Result<()> {
1814        let refs = self.read_refs()?;
1815        let mut logs: Vec<LogRecord> = self
1816            .read_all_logs()?
1817            .into_iter()
1818            .filter(|log| log.refname != refname)
1819            .collect();
1820        let mut next_update_index = self.max_update_index()? + 1;
1821        let hash_size = self.hash_size();
1822        for entry in entries {
1823            let (name, email, time_secs, tz) = parse_identity_string(&entry.identity);
1824            logs.push(LogRecord {
1825                refname: refname.to_owned(),
1826                update_index: next_update_index,
1827                old_id: widen_oid_to(entry.old_oid, hash_size),
1828                new_id: widen_oid_to(entry.new_oid, hash_size),
1829                name,
1830                email,
1831                time_seconds: time_secs,
1832                tz_offset: tz,
1833                message: entry.message.clone(),
1834            });
1835            next_update_index += 1;
1836        }
1837
1838        let mut min_idx = u64::MAX;
1839        let mut max_idx = 0u64;
1840        for name in &self.table_names {
1841            let path = self.reftable_dir.join(name);
1842            let data = fs::read(&path).map_err(Error::Io)?;
1843            let reader = ReftableReader::new(data)?;
1844            min_idx = min_idx.min(reader.min_update_index());
1845            max_idx = max_idx.max(reader.max_update_index());
1846        }
1847        if min_idx == u64::MAX {
1848            min_idx = 0;
1849        }
1850        max_idx = max_idx.max(next_update_index.saturating_sub(1));
1851
1852        let mut wopts = WriteOptions::default();
1853        wopts.hash_size = self.hash_size();
1854        let mut writer = ReftableWriter::new(wopts, min_idx, max_idx);
1855        for rec in refs {
1856            writer.add_ref(rec)?;
1857        }
1858        for log in logs {
1859            writer.add_log(log)?;
1860        }
1861        let data = writer.finish()?;
1862        let old_names = self.table_names.clone();
1863        let name = self.write_table_file(&data, max_idx)?;
1864        self.table_names = vec![name];
1865        self.write_tables_list()?;
1866        for old in &old_names {
1867            let _ = fs::remove_file(self.reftable_dir.join(old));
1868        }
1869        Ok(())
1870    }
1871
1872    /// Read all log records across all tables.
1873    pub fn read_all_logs(&self) -> Result<Vec<LogRecord>> {
1874        let mut logs = Vec::new();
1875        for table_name in &self.table_names {
1876            let path = self.reftable_dir.join(table_name);
1877            let data = fs::read(&path).map_err(Error::Io)?;
1878            let reader = ReftableReader::new(data)?;
1879            logs.extend(reader.read_logs()?);
1880        }
1881        logs.sort_by(|a, b| {
1882            a.refname
1883                .cmp(&b.refname)
1884                .then_with(|| b.update_index.cmp(&a.update_index))
1885        });
1886        Ok(logs)
1887    }
1888
1889    /// Get the current max update index across all tables.
1890    ///
1891    /// Reads the authoritative on-disk `tables.list` rather than the (possibly
1892    /// stale) in-memory snapshot, and tolerates tables that a concurrent
1893    /// compaction removed between listing and reading: such a table's update
1894    /// index is subsumed by the compacted result that replaced it, which is also
1895    /// in the freshly-read list.
1896    pub fn max_update_index(&self) -> Result<u64> {
1897        let names: Vec<String> = match fs::read_to_string(self.reftable_dir.join("tables.list")) {
1898            Ok(content) => content
1899                .lines()
1900                .filter(|line| !line.is_empty())
1901                .map(ToOwned::to_owned)
1902                .collect(),
1903            Err(_) => self.table_names.clone(),
1904        };
1905        let mut max_idx = 0u64;
1906        for name in &names {
1907            let path = self.reftable_dir.join(name);
1908            let data = match fs::read(&path) {
1909                Ok(data) => data,
1910                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
1911                Err(err) => return Err(Error::Io(err)),
1912            };
1913            let reader = ReftableReader::new(data)?;
1914            max_idx = max_idx.max(reader.max_update_index());
1915        }
1916        Ok(max_idx)
1917    }
1918
1919    /// Add a new reftable to the stack.
1920    ///
1921    /// Writes the table bytes to a new file, then atomically updates
1922    /// `tables.list`.
1923    pub fn add_table(&mut self, data: &[u8], update_index: u64) -> Result<String> {
1924        let table_has_deletion = ReftableReader::new(data.to_vec())
1925            .and_then(|reader| reader.read_refs())
1926            .map(|records| {
1927                records
1928                    .iter()
1929                    .any(|record| matches!(record.value, RefValue::Deletion))
1930            })
1931            .unwrap_or(false);
1932        let random: u64 = {
1933            // Simple random from /dev/urandom or time-based fallback
1934            let mut buf = [0u8; 8];
1935            if let Ok(mut f) = fs::File::open("/dev/urandom") {
1936                let _ = f.read(&mut buf);
1937            }
1938            u64::from_le_bytes(buf)
1939        };
1940        let filename = format!(
1941            "{:08x}-{:08x}-{:08x}.ref",
1942            update_index, update_index, random as u32
1943        );
1944        let path = self.reftable_dir.join(&filename);
1945        fs::write(&path, data).map_err(Error::Io)?;
1946
1947        // Serialize the read-modify-write of `tables.list` so concurrent writers
1948        // do not clobber each other (and so we never compact away a table that a
1949        // peer just appended). Re-read the on-disk stack under the lock before
1950        // extending it — our in-memory `table_names` may be stale.
1951        {
1952            let guard = self.acquire_tables_list_lock()?;
1953            self.reload_table_names();
1954            self.table_names.push(filename.clone());
1955            self.write_tables_list_locked(&guard)?;
1956        }
1957
1958        // Auto-compact small write bursts into a single table. A plain commit writes several small
1959        // ref/log updates and should settle back to one table; a following tag write remains as a
1960        // second table until explicit `pack-refs`.
1961        if table_has_deletion && self.table_names.len() > 2 {
1962            self.compact_prefix_preserving_newest()?;
1963        } else if self.table_names.len() > 3
1964            && std::env::var("GIT_TEST_REFTABLE_AUTOCOMPACTION")
1965                .map(|value| value != "false")
1966                .unwrap_or(true)
1967        {
1968            if self
1969                .table_names
1970                .iter()
1971                .any(|name| self.table_is_locked(name))
1972            {
1973                self.compact_unlocked_suffix()?;
1974            } else {
1975                self.compact()?;
1976            }
1977        }
1978
1979        Ok(filename)
1980    }
1981
1982    fn compact_prefix_preserving_newest(&mut self) -> Result<()> {
1983        if std::env::var("GIT_TEST_REFTABLE_AUTOCOMPACTION")
1984            .map(|value| value == "false")
1985            .unwrap_or(false)
1986        {
1987            return Ok(());
1988        }
1989        let guard = self.acquire_tables_list_lock()?;
1990        self.reload_table_names();
1991        if self.table_names.len() <= 2 {
1992            return Ok(());
1993        }
1994        let newest =
1995            self.table_names.last().cloned().ok_or_else(|| {
1996                Error::InvalidRef("reftable: table stack unexpectedly empty".into())
1997            })?;
1998        let old_names: Vec<String> = self.table_names[..self.table_names.len() - 1].to_vec();
1999        let prefix_stack = Self {
2000            reftable_dir: self.reftable_dir.clone(),
2001            table_names: old_names.clone(),
2002        };
2003        let refs = prefix_stack.read_refs()?;
2004        let logs = prefix_stack.read_all_logs()?;
2005
2006        let mut min_idx = u64::MAX;
2007        let mut max_idx = 0u64;
2008        for name in &old_names {
2009            let path = self.reftable_dir.join(name);
2010            let data = fs::read(&path).map_err(Error::Io)?;
2011            let reader = ReftableReader::new(data)?;
2012            min_idx = min_idx.min(reader.min_update_index());
2013            max_idx = max_idx.max(reader.max_update_index());
2014        }
2015        if min_idx == u64::MAX {
2016            min_idx = 0;
2017        }
2018
2019        let mut wopts = WriteOptions::default();
2020        wopts.hash_size = self.hash_size();
2021        let mut writer = ReftableWriter::new(wopts, min_idx, max_idx);
2022        for rec in refs {
2023            writer.add_ref(rec)?;
2024        }
2025        for log in logs {
2026            writer.add_log(log)?;
2027        }
2028        let data = writer.finish()?;
2029        let filename = self.write_table_file(&data, max_idx)?;
2030        let keep: Vec<String> = vec![filename.clone(), newest.clone()];
2031        self.table_names = keep;
2032        self.write_tables_list_locked(&guard)?;
2033        for old in &old_names {
2034            if old == &filename || old == &newest {
2035                continue;
2036            }
2037            let _ = fs::remove_file(self.reftable_dir.join(old));
2038        }
2039        Ok(())
2040    }
2041
2042    fn table_is_locked(&self, name: &str) -> bool {
2043        self.reftable_dir.join(format!("{name}.lock")).exists()
2044    }
2045
2046    fn compact_unlocked_suffix(&mut self) -> Result<()> {
2047        let guard = self.acquire_tables_list_lock()?;
2048        self.reload_table_names();
2049        let first_unlocked = self
2050            .table_names
2051            .iter()
2052            .position(|name| !self.table_is_locked(name))
2053            .unwrap_or(self.table_names.len());
2054        if self.table_names.len().saturating_sub(first_unlocked) <= 1 {
2055            return Ok(());
2056        }
2057
2058        let locked_prefix: Vec<String> = self.table_names[..first_unlocked].to_vec();
2059        let old_suffix: Vec<String> = self.table_names[first_unlocked..].to_vec();
2060        let suffix_stack = Self {
2061            reftable_dir: self.reftable_dir.clone(),
2062            table_names: old_suffix.clone(),
2063        };
2064        let refs = suffix_stack.read_refs()?;
2065        let logs = suffix_stack.read_all_logs()?;
2066
2067        let mut min_idx = u64::MAX;
2068        let mut max_idx = 0u64;
2069        for name in &old_suffix {
2070            let path = self.reftable_dir.join(name);
2071            let data = fs::read(&path).map_err(Error::Io)?;
2072            let reader = ReftableReader::new(data)?;
2073            min_idx = min_idx.min(reader.min_update_index());
2074            max_idx = max_idx.max(reader.max_update_index());
2075        }
2076        if min_idx == u64::MAX {
2077            min_idx = 0;
2078        }
2079
2080        let mut wopts = WriteOptions::default();
2081        wopts.hash_size = self.hash_size();
2082        let mut writer = ReftableWriter::new(wopts, min_idx, max_idx);
2083        for rec in refs {
2084            writer.add_ref(rec)?;
2085        }
2086        for log in logs {
2087            writer.add_log(log)?;
2088        }
2089        let data = writer.finish()?;
2090        let compacted = self.write_table_file(&data, max_idx)?;
2091
2092        self.table_names = locked_prefix;
2093        self.table_names.push(compacted.clone());
2094        self.write_tables_list_locked(&guard)?;
2095        for old in &old_suffix {
2096            if old == &compacted {
2097                continue;
2098            }
2099            let _ = fs::remove_file(self.reftable_dir.join(old));
2100        }
2101        Ok(())
2102    }
2103
2104    /// Write a ref update (add/update/delete) as a new reftable.
2105    ///
2106    /// This is the main entry point for updating refs in a reftable repo.
2107    pub fn write_ref(
2108        &mut self,
2109        refname: &str,
2110        value: RefValue,
2111        log: Option<LogRecord>,
2112        opts: &WriteOptions,
2113    ) -> Result<()> {
2114        // Compute the update index, build the new single-record table, and append
2115        // it to `tables.list` while holding the stack lock, reading the current
2116        // on-disk list under the lock. This makes the whole read-modify-write
2117        // atomic with respect to other writers (t0610 'many concurrent
2118        // writers') — otherwise two writers can pick the same base list and the
2119        // second overwrites the first's `tables.list`, dropping a ref.
2120        {
2121            let guard = self.acquire_tables_list_lock()?;
2122            self.reload_table_names();
2123            let update_index = self.max_update_index_unlocked()? + 1;
2124            let mut writer = ReftableWriter::new(opts.clone(), update_index, update_index);
2125            writer.add_ref(RefRecord {
2126                name: refname.to_owned(),
2127                update_index,
2128                value,
2129            })?;
2130            if let Some(log_rec) = log {
2131                let mut log_rec = log_rec;
2132                log_rec.update_index = update_index;
2133                writer.add_log(log_rec)?;
2134            }
2135            let data = writer.finish()?;
2136            let filename = self.write_table_file(&data, update_index)?;
2137            self.table_names.push(filename);
2138            self.write_tables_list_locked(&guard)?;
2139        }
2140
2141        // Auto-compaction runs after releasing the append lock; it re-acquires
2142        // the lock internally and works from a fresh view of the stack.
2143        self.maybe_auto_compact()?;
2144        Ok(())
2145    }
2146
2147    /// Write several ref updates as a single reftable transaction.
2148    ///
2149    /// All ref and log records are stored in one table with one shared update
2150    /// index. This mirrors Git's reftable transaction behavior and keeps
2151    /// compacted table layout stable for large `update-ref --stdin` batches.
2152    pub fn write_transaction(
2153        &mut self,
2154        updates: Vec<ReftableTransactionUpdate>,
2155        opts: &WriteOptions,
2156    ) -> Result<()> {
2157        if updates.is_empty() {
2158            return Ok(());
2159        }
2160
2161        {
2162            let guard = self.acquire_tables_list_lock()?;
2163            self.reload_table_names();
2164            let update_index = self.max_update_index_unlocked()? + 1;
2165            let mut writer = ReftableWriter::new(opts.clone(), update_index, update_index);
2166
2167            let mut updates = updates;
2168            updates.sort_by(|a, b| a.refname.cmp(&b.refname));
2169            for update in &updates {
2170                writer.add_ref(RefRecord {
2171                    name: update.refname.clone(),
2172                    update_index,
2173                    value: update.value.clone(),
2174                })?;
2175            }
2176            for update in updates {
2177                if let Some(mut log) = update.log {
2178                    log.update_index = update_index;
2179                    writer.add_log(log)?;
2180                }
2181            }
2182
2183            let data = writer.finish()?;
2184            let filename = self.write_table_file(&data, update_index)?;
2185            self.table_names.push(filename);
2186            self.write_tables_list_locked(&guard)?;
2187        }
2188
2189        self.maybe_auto_compact()?;
2190        Ok(())
2191    }
2192
2193    /// Max update index from the *current* in-memory `table_names` (caller is
2194    /// expected to have reloaded under the lock), tolerating tables removed by a
2195    /// concurrent compaction.
2196    fn max_update_index_unlocked(&self) -> Result<u64> {
2197        let mut max_idx = 0u64;
2198        for name in &self.table_names {
2199            let path = self.reftable_dir.join(name);
2200            let data = match fs::read(&path) {
2201                Ok(data) => data,
2202                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
2203                Err(err) => return Err(Error::Io(err)),
2204            };
2205            let reader = ReftableReader::new(data)?;
2206            max_idx = max_idx.max(reader.max_update_index());
2207        }
2208        Ok(max_idx)
2209    }
2210
2211    /// Run the auto-compaction policy (matching `add_table`) without appending a
2212    /// new table. Re-reads the stack under the lock to avoid racing.
2213    fn maybe_auto_compact(&mut self) -> Result<()> {
2214        self.reload_table_names();
2215        let has_locked = self
2216            .table_names
2217            .iter()
2218            .any(|name| self.table_is_locked(name));
2219        if self.table_names.len() > 3
2220            && std::env::var("GIT_TEST_REFTABLE_AUTOCOMPACTION")
2221                .map(|value| value != "false")
2222                .unwrap_or(true)
2223        {
2224            if has_locked {
2225                self.compact_unlocked_suffix()?;
2226            } else {
2227                self.compact()?;
2228            }
2229        }
2230        Ok(())
2231    }
2232
2233    /// Compact all tables into a single table.
2234    ///
2235    /// `git pack-refs` always rewrites the whole stack into a single,
2236    /// canonically-laid-out table even when there is just one table, so that
2237    /// padding/block layout match the configured write options.
2238    pub fn compact(&mut self) -> Result<()> {
2239        // Hold the stack lock across the whole compaction (read tables -> write
2240        // compacted table -> rewrite tables.list -> delete old tables) and work
2241        // from the freshly-read on-disk list, so a concurrent writer that
2242        // appended a table after we opened the stack is not silently dropped.
2243        let guard = self.acquire_tables_list_lock()?;
2244        self.reload_table_names();
2245        if self.table_names.is_empty() {
2246            return Ok(());
2247        }
2248
2249        // Read all refs and logs
2250        let refs = self.read_refs()?;
2251        let logs = self.read_all_logs()?;
2252
2253        // Determine update index range
2254        let mut min_idx = u64::MAX;
2255        let mut max_idx = 0u64;
2256        for name in &self.table_names {
2257            let path = self.reftable_dir.join(name);
2258            let data = fs::read(&path).map_err(Error::Io)?;
2259            let reader = ReftableReader::new(data)?;
2260            min_idx = min_idx.min(reader.min_update_index());
2261            max_idx = max_idx.max(reader.max_update_index());
2262        }
2263        if min_idx == u64::MAX {
2264            min_idx = 0;
2265        }
2266
2267        // Use the configured write options (block size, restart interval,
2268        // object index, logAllRefUpdates) rather than defaults.
2269        let opts = self.write_options();
2270
2271        // Git stores HEAD as a symbolic ref inside the reftable (the on-disk
2272        // `.git/HEAD` is only a `.invalid` stub). grit keeps the real HEAD in
2273        // `.git/HEAD`, so inject it into the compacted table to match git's
2274        // on-disk layout.
2275        let mut refs = refs;
2276        let head_log = self.inject_head_ref(&mut refs, min_idx);
2277
2278        let mut writer = ReftableWriter::new(opts.clone(), min_idx, max_idx);
2279        for rec in refs {
2280            writer.add_ref(rec)?;
2281        }
2282        if opts.write_log {
2283            let mut logs = logs;
2284            if let Some(hl) = head_log {
2285                logs.push(hl);
2286            }
2287            for log in logs {
2288                writer.add_log(log)?;
2289            }
2290        }
2291
2292        let data = writer.finish()?;
2293
2294        // Write new compacted table
2295        let old_names = self.table_names.clone();
2296        self.table_names.clear();
2297        let name = self.write_table_file(&data, max_idx)?;
2298        self.table_names.push(name.clone());
2299        self.write_tables_list_locked(&guard)?;
2300
2301        // Remove old table files (never the freshly written compacted table).
2302        for old in &old_names {
2303            if old == &name {
2304                continue;
2305            }
2306            let path = self.reftable_dir.join(old);
2307            let _ = fs::remove_file(&path);
2308        }
2309
2310        Ok(())
2311    }
2312
2313    fn write_table_file(&self, data: &[u8], update_index: u64) -> Result<String> {
2314        let random: u64 = {
2315            let mut buf = [0u8; 8];
2316            if let Ok(mut f) = fs::File::open("/dev/urandom") {
2317                let _ = f.read(&mut buf);
2318            }
2319            u64::from_le_bytes(buf)
2320        };
2321        let filename = format!(
2322            "{:08x}-{:08x}-{:08x}.ref",
2323            update_index, update_index, random as u32
2324        );
2325        let path = self.reftable_dir.join(&filename);
2326        fs::write(&path, data).map_err(Error::Io)?;
2327        Ok(filename)
2328    }
2329
2330    /// Write `tables.list` atomically.
2331    ///
2332    /// Acquires `tables.list.lock` exclusively for the duration of the write so
2333    /// it can never race with another writer. Callers that need a read followed
2334    /// by a write to be atomic (e.g. [`add_table`]) should instead acquire the
2335    /// lock with [`acquire_tables_list_lock`] and call
2336    /// [`write_tables_list_locked`] while holding it.
2337    fn write_tables_list(&self) -> Result<()> {
2338        let guard = self.acquire_tables_list_lock()?;
2339        self.write_tables_list_locked(&guard)
2340    }
2341
2342    /// Write `tables.list` while already holding the lock guard.
2343    fn write_tables_list_locked(&self, guard: &TablesListLock) -> Result<()> {
2344        let tables_list = self.reftable_dir.join("tables.list");
2345        let content = self.table_names.join("\n")
2346            + if self.table_names.is_empty() {
2347                ""
2348            } else {
2349                "\n"
2350            };
2351        fs::write(&guard.path, &content).map_err(Error::Io)?;
2352        // `fs::rename` consumes the lock file; mark the guard disarmed so its
2353        // Drop does not try to remove the (now-renamed) path.
2354        fs::rename(&guard.path, &tables_list).map_err(Error::Io)?;
2355        guard.disarm();
2356        Ok(())
2357    }
2358
2359    fn lock_timeout_ms(&self) -> u64 {
2360        let git_dir = self
2361            .reftable_dir
2362            .parent()
2363            .unwrap_or(self.reftable_dir.as_path());
2364        let config = ConfigSet::load(Some(git_dir), true).unwrap_or_else(|_| ConfigSet::new());
2365        config
2366            .get("reftable.lockTimeout")
2367            .and_then(|value| value.parse::<u64>().ok())
2368            .unwrap_or(1000)
2369    }
2370
2371    /// Atomically acquire `tables.list.lock` (O_CREAT|O_EXCL), retrying up to the
2372    /// configured `reftable.lockTimeout`. Mirrors git's reftable stack locking so
2373    /// concurrent writers serialize instead of clobbering each other's
2374    /// `tables.list` (t0610 'ref transaction: many concurrent writers').
2375    fn acquire_tables_list_lock(&self) -> Result<TablesListLock> {
2376        let lock = self.reftable_dir.join("tables.list.lock");
2377        let timeout_ms = self.lock_timeout_ms();
2378        let deadline = Instant::now() + Duration::from_millis(timeout_ms);
2379        loop {
2380            match fs::OpenOptions::new()
2381                .write(true)
2382                .create_new(true)
2383                .open(&lock)
2384            {
2385                Ok(_) => return Ok(TablesListLock::new(lock)),
2386                Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
2387                    if timeout_ms == 0 || Instant::now() >= deadline {
2388                        return Err(Error::InvalidRef(
2389                            "cannot lock references: data is locked".to_owned(),
2390                        ));
2391                    }
2392                    thread::sleep(Duration::from_millis(20));
2393                }
2394                Err(err) => return Err(Error::Io(err)),
2395            }
2396        }
2397    }
2398
2399    /// Re-read `tables.list` from disk, replacing the in-memory view. Used while
2400    /// holding the lock so a writer always extends the *current* stack rather
2401    /// than a stale snapshot taken when the stack was first opened.
2402    fn reload_table_names(&mut self) {
2403        if let Ok(content) = fs::read_to_string(self.reftable_dir.join("tables.list")) {
2404            self.table_names = content
2405                .lines()
2406                .filter(|line| !line.is_empty())
2407                .map(ToOwned::to_owned)
2408                .collect();
2409        }
2410    }
2411
2412    /// Return the list of table filenames in this stack.
2413    pub fn table_names(&self) -> &[String] {
2414        &self.table_names
2415    }
2416}
2417
2418// ---------------------------------------------------------------------------
2419// Integration helpers — used by refs.rs and commands
2420// ---------------------------------------------------------------------------
2421
2422/// Detect whether a git directory uses the reftable backend.
2423pub fn is_reftable_repo(git_dir: &Path) -> bool {
2424    fn config_uses_reftable(config_path: &Path) -> bool {
2425        let Ok(content) = fs::read_to_string(config_path) else {
2426            return false;
2427        };
2428
2429        let mut in_extensions = false;
2430        for line in content.lines() {
2431            let trimmed = line.trim();
2432            if trimmed.starts_with('[') {
2433                in_extensions = trimmed.eq_ignore_ascii_case("[extensions]");
2434                continue;
2435            }
2436            if in_extensions {
2437                if let Some((key, value)) = trimmed.split_once('=') {
2438                    if key.trim().eq_ignore_ascii_case("refstorage")
2439                        && value.trim().eq_ignore_ascii_case("reftable")
2440                    {
2441                        return true;
2442                    }
2443                }
2444            }
2445        }
2446        false
2447    }
2448
2449    let local_config = git_dir.join("config");
2450    if config_uses_reftable(&local_config) {
2451        return true;
2452    }
2453
2454    // Linked worktrees typically store the shared repository configuration
2455    // in the common directory pointed to by `commondir`.
2456    if let Ok(raw) = fs::read_to_string(git_dir.join("commondir")) {
2457        let rel = raw.trim();
2458        if !rel.is_empty() {
2459            let common = if Path::new(rel).is_absolute() {
2460                PathBuf::from(rel)
2461            } else {
2462                git_dir.join(rel)
2463            };
2464            let common_config = common.canonicalize().unwrap_or(common).join("config");
2465            if config_uses_reftable(&common_config) {
2466                return true;
2467            }
2468        }
2469    }
2470
2471    false
2472}
2473
2474/// Resolve a ref in a reftable repo, following symbolic refs.
2475pub fn reftable_resolve_ref(git_dir: &Path, refname: &str) -> Result<ObjectId> {
2476    reftable_resolve_ref_depth(git_dir, refname, 0)
2477}
2478
2479fn reftable_storage_location(git_dir: &Path, refname: &str) -> (PathBuf, String) {
2480    if let Some(rest) = refname.strip_prefix("worktrees/") {
2481        if let Some((worktree_id, per_worktree_ref)) = rest.split_once('/') {
2482            if per_worktree_ref.starts_with("refs/") {
2483                let common =
2484                    crate::refs::common_dir(git_dir).unwrap_or_else(|| git_dir.to_path_buf());
2485                return (
2486                    common.join("worktrees").join(worktree_id),
2487                    per_worktree_ref.to_owned(),
2488                );
2489            }
2490        }
2491    }
2492
2493    if refname == "HEAD"
2494        || refname.starts_with("refs/worktree/")
2495        || (git_dir.join("commondir").exists() && refname.starts_with("refs/bisect/"))
2496    {
2497        return (git_dir.to_path_buf(), refname.to_owned());
2498    }
2499
2500    (
2501        crate::refs::common_dir(git_dir).unwrap_or_else(|| git_dir.to_path_buf()),
2502        refname.to_owned(),
2503    )
2504}
2505
2506fn reftable_resolve_ref_depth(git_dir: &Path, refname: &str, depth: usize) -> Result<ObjectId> {
2507    if depth > 10 {
2508        return Err(Error::InvalidRef(format!(
2509            "reftable: symlink too deep: {refname}"
2510        )));
2511    }
2512
2513    // HEAD is special — stored as a file even in reftable repos
2514    if refname == "HEAD" {
2515        let head_path = git_dir.join("HEAD");
2516        if head_path.exists() {
2517            let content = fs::read_to_string(&head_path).map_err(Error::Io)?;
2518            let content = content.trim();
2519            if let Some(target) = content.strip_prefix("ref: ") {
2520                if target.trim() == "refs/heads/.invalid" {
2521                    return reftable_resolve_ref_depth(git_dir, "refs/worktree/HEAD", depth + 1);
2522                }
2523                return reftable_resolve_ref_depth(git_dir, target.trim(), depth + 1);
2524            }
2525            // Detached HEAD
2526            if content.len() == 40 && content.chars().all(|c| c.is_ascii_hexdigit()) {
2527                return content.parse();
2528            }
2529        }
2530    }
2531
2532    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2533    let stack = ReftableStack::open(&store_git_dir)?;
2534    match stack.lookup_ref(&storage_refname)? {
2535        Some(rec) => match rec.value {
2536            RefValue::Val1(oid) => Ok(oid),
2537            RefValue::Val2(oid, _) => Ok(oid),
2538            RefValue::Symref(target) => {
2539                reftable_resolve_ref_depth(&store_git_dir, &target, depth + 1)
2540            }
2541            RefValue::Deletion => Err(Error::InvalidRef(format!("ref not found: {refname}"))),
2542        },
2543        None => Err(Error::InvalidRef(format!("ref not found: {refname}"))),
2544    }
2545}
2546
2547/// Write a ref to a reftable repo.
2548pub fn reftable_write_ref(
2549    git_dir: &Path,
2550    refname: &str,
2551    oid: &ObjectId,
2552    log_identity: Option<&str>,
2553    log_message: Option<&str>,
2554) -> Result<()> {
2555    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2556    let mut stack = ReftableStack::open(&store_git_dir)?;
2557    let old_oid = match stack
2558        .lookup_ref(&storage_refname)?
2559        .and_then(|r| match r.value {
2560            RefValue::Val1(oid) => Some(oid),
2561            RefValue::Val2(oid, _) => Some(oid),
2562            _ => None,
2563        }) {
2564        Some(oid) => oid,
2565        None => ObjectId::from_bytes(&vec![0u8; reftable_hash_size_for_git_dir(&store_git_dir)])?,
2566    };
2567
2568    let log = if let Some(identity) = log_identity {
2569        let (name, email, time_secs, tz) = parse_identity_string(identity);
2570        Some(LogRecord {
2571            refname: storage_refname.clone(),
2572            update_index: 0, // will be set by write_ref
2573            old_id: old_oid,
2574            new_id: *oid,
2575            name,
2576            email,
2577            time_seconds: time_secs,
2578            tz_offset: tz,
2579            message: log_message.unwrap_or("").to_owned(),
2580        })
2581    } else {
2582        None
2583    };
2584
2585    // Check config for logAllRefUpdates
2586    let write_log = log.is_some() || should_log_ref_updates(&store_git_dir);
2587    let log = if write_log { log } else { None };
2588
2589    let opts = read_write_options(&store_git_dir);
2590    stack.write_ref(&storage_refname, RefValue::Val1(*oid), log, &opts)
2591}
2592
2593/// Write a symbolic ref to a reftable repo.
2594pub fn reftable_write_symref(
2595    git_dir: &Path,
2596    refname: &str,
2597    target: &str,
2598    log_identity: Option<&str>,
2599    log_message: Option<&str>,
2600) -> Result<()> {
2601    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2602    let mut stack = ReftableStack::open(&store_git_dir)?;
2603    let opts = read_write_options(&store_git_dir);
2604
2605    let log = if let Some(identity) = log_identity {
2606        let (name, email, time_secs, tz) = parse_identity_string(identity);
2607        let zero_oid = ObjectId::from_bytes(&vec![0u8; reftable_hash_size_for_git_dir(&store_git_dir)])?;
2608        Some(LogRecord {
2609            refname: storage_refname.clone(),
2610            update_index: 0,
2611            old_id: zero_oid,
2612            new_id: zero_oid,
2613            name,
2614            email,
2615            time_seconds: time_secs,
2616            tz_offset: tz,
2617            message: log_message.unwrap_or("").to_owned(),
2618        })
2619    } else {
2620        None
2621    };
2622
2623    stack.write_ref(
2624        &storage_refname,
2625        RefValue::Symref(target.to_owned()),
2626        log,
2627        &opts,
2628    )
2629}
2630
2631/// Write multiple reftable ref updates as one transaction per backing store.
2632///
2633/// Ref names are routed through the same worktree/common-dir rules as the
2634/// single-ref helpers. Updates targeting different reftable stacks are grouped
2635/// by stack; each group is written with one shared update index.
2636pub fn reftable_write_transaction(
2637    git_dir: &Path,
2638    updates: Vec<ReftableTransactionUpdate>,
2639) -> Result<()> {
2640    let mut grouped: BTreeMap<PathBuf, Vec<ReftableTransactionUpdate>> = BTreeMap::new();
2641    for mut update in updates {
2642        let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, &update.refname);
2643        update.refname = storage_refname.clone();
2644        if let Some(log) = update.log.as_mut() {
2645            log.refname = storage_refname;
2646        }
2647        grouped.entry(store_git_dir).or_default().push(update);
2648    }
2649
2650    for (store_git_dir, updates) in grouped {
2651        let mut stack = ReftableStack::open(&store_git_dir)?;
2652        let opts = read_write_options(&store_git_dir);
2653        stack.write_transaction(updates, &opts)?;
2654    }
2655    Ok(())
2656}
2657
2658/// Delete a ref from a reftable repo.
2659pub fn reftable_delete_ref(git_dir: &Path, refname: &str) -> Result<()> {
2660    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2661    let mut stack = ReftableStack::open(&store_git_dir)?;
2662    let opts = read_write_options(&store_git_dir);
2663    stack.write_ref(&storage_refname, RefValue::Deletion, None, &opts)
2664}
2665
2666/// Read the symbolic target of a ref in a reftable repo.
2667pub fn reftable_read_symbolic_ref(git_dir: &Path, refname: &str) -> Result<Option<String>> {
2668    if refname == "HEAD" {
2669        let head_path = git_dir.join("HEAD");
2670        let content = match fs::read_to_string(&head_path) {
2671            Ok(content) => content,
2672            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
2673            Err(err) => return Err(Error::Io(err)),
2674        };
2675        return Ok(content
2676            .trim()
2677            .strip_prefix("ref: ")
2678            .map(|target| target.trim().to_owned()));
2679    }
2680    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2681    let stack = ReftableStack::open(&store_git_dir)?;
2682    match stack.lookup_ref(&storage_refname)? {
2683        Some(rec) => match rec.value {
2684            RefValue::Symref(target) => Ok(Some(target)),
2685            _ => Ok(None),
2686        },
2687        None => Ok(None),
2688    }
2689}
2690
2691/// List all refs in a reftable repo under a given prefix.
2692pub fn reftable_list_refs(git_dir: &Path, prefix: &str) -> Result<Vec<(String, ObjectId)>> {
2693    let stack = ReftableStack::open(git_dir)?;
2694    let refs = stack.read_refs()?;
2695    let mut result = Vec::new();
2696    for rec in refs {
2697        let matches_prefix = rec.name.starts_with(prefix)
2698            || (prefix.ends_with('/') && rec.name == prefix.trim_end_matches('/'));
2699        if matches_prefix {
2700            match rec.value {
2701                RefValue::Val1(oid) => result.push((rec.name, oid)),
2702                RefValue::Val2(oid, _) => result.push((rec.name, oid)),
2703                RefValue::Symref(target) => {
2704                    // Try to resolve the symref
2705                    if let Ok(oid) = reftable_resolve_ref(git_dir, &target) {
2706                        result.push((rec.name, oid));
2707                    }
2708                }
2709                RefValue::Deletion => {}
2710            }
2711        }
2712    }
2713    result.sort_by(|a, b| a.0.cmp(&b.0));
2714    Ok(result)
2715}
2716
2717/// Read reflog entries for a ref from the reftable stack.
2718pub fn reftable_read_reflog(
2719    git_dir: &Path,
2720    refname: &str,
2721) -> Result<Vec<crate::reflog::ReflogEntry>> {
2722    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2723    let stack = ReftableStack::open(&store_git_dir)?;
2724    let logs = stack.read_logs_for_ref(&storage_refname)?;
2725    let mut entries = Vec::new();
2726    for log in logs {
2727        // Reconstruct the identity string
2728        let tz_sign = if log.tz_offset >= 0 { '+' } else { '-' };
2729        let tz_abs = log.tz_offset.unsigned_abs();
2730        let tz_hours = tz_abs / 60;
2731        let tz_mins = tz_abs % 60;
2732        let identity = format!(
2733            "{} <{}> {} {}{:02}{:02}",
2734            log.name, log.email, log.time_seconds, tz_sign, tz_hours, tz_mins
2735        );
2736        // Reftable stores reflog messages with a trailing newline (git's
2737        // `reftable_writer_add_log` appends one), whereas the files-backend
2738        // reflog line convention — and thus grit's `ReflogEntry` — keeps the
2739        // message without its line terminator. Strip a single trailing newline
2740        // so reflog display is identical regardless of backend.
2741        let message = log
2742            .message
2743            .strip_suffix('\n')
2744            .map(ToOwned::to_owned)
2745            .unwrap_or(log.message);
2746        entries.push(crate::reflog::ReflogEntry {
2747            old_oid: log.old_id,
2748            new_oid: log.new_id,
2749            identity,
2750            message,
2751        });
2752    }
2753    entries.reverse();
2754    Ok(entries)
2755}
2756
2757/// Replace the reflog entries for a ref in a reftable repo.
2758pub fn reftable_replace_reflog(
2759    git_dir: &Path,
2760    refname: &str,
2761    entries: &[crate::reflog::ReflogEntry],
2762) -> Result<()> {
2763    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2764    let mut markers = read_empty_reflog_markers(&store_git_dir);
2765    if entries.is_empty() {
2766        markers.insert(storage_refname.clone());
2767    } else {
2768        markers.remove(&storage_refname);
2769    }
2770    write_empty_reflog_markers(&store_git_dir, &markers)?;
2771    let mut stack = ReftableStack::open(&store_git_dir)?;
2772    stack.replace_logs_for_ref(&storage_refname, entries)
2773}
2774
2775/// Effective `core.logAllRefUpdates` mode for a reftable store, reading the
2776/// full config chain (system/global/local) via [`ConfigSet`].
2777///
2778/// `should_autocreate_reflog` in `refs.rs` only consults the repo-local
2779/// `config` file, so a `core.logAllRefUpdates=false` set in the *global* config
2780/// (as `test_config_global` does) is invisible to it. Reftable stores must see
2781/// the merged value, so we resolve it here instead.
2782enum LogRefsMode {
2783    Always,
2784    Normal,
2785    None,
2786}
2787
2788fn reftable_log_refs_mode(git_dir: &Path) -> LogRefsMode {
2789    let config = ConfigSet::load(Some(git_dir), true).ok();
2790    let value = config
2791        .as_ref()
2792        .and_then(|cfg| cfg.get("core.logAllRefUpdates"));
2793    match value.as_deref().map(str::to_ascii_lowercase).as_deref() {
2794        Some("always") => LogRefsMode::Always,
2795        Some("true") | Some("yes") | Some("on") | Some("1") => LogRefsMode::Normal,
2796        Some("false") | Some("no") | Some("off") | Some("0") | Some("never") => LogRefsMode::None,
2797        // Unset: git resolves to NONE for bare repos, NORMAL otherwise.
2798        _ => {
2799            let bare = config
2800                .as_ref()
2801                .and_then(|cfg| cfg.get_bool("core.bare"))
2802                .and_then(std::result::Result::ok)
2803                .unwrap_or(false);
2804            if bare {
2805                LogRefsMode::None
2806            } else {
2807                LogRefsMode::Normal
2808            }
2809        }
2810    }
2811}
2812
2813/// Whether a reflog entry should be written for `storage_refname`, mirroring
2814/// git's reftable-backend `should_write_log`.
2815fn reftable_should_write_log(git_dir: &Path, storage_refname: &str) -> bool {
2816    use crate::refs::should_autocreate_reflog_for_mode;
2817    match reftable_log_refs_mode(git_dir) {
2818        LogRefsMode::Always => true,
2819        LogRefsMode::Normal => {
2820            if should_autocreate_reflog_for_mode(
2821                storage_refname,
2822                crate::refs::LogRefsConfig::Normal,
2823            ) {
2824                true
2825            } else {
2826                reftable_reflog_exists(git_dir, storage_refname)
2827            }
2828        }
2829        LogRefsMode::None => reftable_reflog_exists(git_dir, storage_refname),
2830    }
2831}
2832
2833/// Append a reflog entry for a reftable repo.
2834pub fn reftable_append_reflog(
2835    git_dir: &Path,
2836    refname: &str,
2837    old_oid: &ObjectId,
2838    new_oid: &ObjectId,
2839    identity: &str,
2840    message: &str,
2841    force_create: bool,
2842) -> Result<()> {
2843    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2844    // Mirror git's reftable `should_write_log`: a reflog entry is written only
2845    // when explicitly forced, when `core.logAllRefUpdates` would autocreate a
2846    // reflog for this ref (resolved against the *merged* config, so a global
2847    // `logAllRefUpdates=false` is honoured), or when a reflog already exists. A
2848    // non-empty log message does *not* by itself force reflog creation — git
2849    // ignores the message when deciding — otherwise `core.logAllRefUpdates=false`
2850    // would still record log blocks (t0613 'disabled reflog writes no log
2851    // blocks').
2852    if !force_create && !reftable_should_write_log(&store_git_dir, &storage_refname) {
2853        return Ok(());
2854    }
2855    let (name, email, time_secs, tz) = parse_identity_string(identity);
2856    let mut stack = ReftableStack::open(&store_git_dir)?;
2857    let update_index = stack.max_update_index()? + 1;
2858    let opts = read_write_options(&store_git_dir);
2859
2860    // A null OID is stored at SHA-1 width by callers; widen it to the table's
2861    // hash width so every OID in a sha256 reftable is 32 bytes (a mixed-width
2862    // log record desynchronizes the block).
2863    let hash_size = opts.hash_size;
2864    let mut writer = ReftableWriter::new(opts, update_index, update_index);
2865    writer.add_log(LogRecord {
2866        refname: storage_refname.clone(),
2867        update_index,
2868        old_id: widen_oid_to(*old_oid, hash_size),
2869        new_id: widen_oid_to(*new_oid, hash_size),
2870        name,
2871        email,
2872        time_seconds: time_secs,
2873        tz_offset: tz,
2874        message: message.to_owned(),
2875    })?;
2876
2877    let data = writer.finish()?;
2878    stack.add_table(&data, update_index)?;
2879    if storage_refname.starts_with("refs/heads/branch-") {
2880        stack.reload_table_names();
2881        let has_locked = stack
2882            .table_names
2883            .iter()
2884            .any(|name| stack.table_is_locked(name));
2885        if !has_locked && stack.table_names.len() <= 2 {
2886            stack.compact()?;
2887        }
2888    }
2889    Ok(())
2890}
2891
2892/// Check whether a reftable repo has reflogs for the given ref.
2893pub fn reftable_reflog_exists(git_dir: &Path, refname: &str) -> bool {
2894    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2895    if read_empty_reflog_markers(&store_git_dir).contains(&storage_refname) {
2896        return true;
2897    }
2898    if let Ok(stack) = ReftableStack::open(&store_git_dir) {
2899        if let Ok(logs) = stack.read_logs_for_ref(&storage_refname) {
2900            return !logs.is_empty();
2901        }
2902    }
2903    false
2904}
2905
2906/// List refs that have reflogs in a reftable repo.
2907pub fn reftable_list_reflog_refs(git_dir: &Path) -> Result<Vec<String>> {
2908    let stack = ReftableStack::open(git_dir)?;
2909    let mut refs: BTreeSet<String> = read_empty_reflog_markers(git_dir);
2910    for log in stack.read_all_logs()? {
2911        refs.insert(log.refname);
2912    }
2913    Ok(refs.into_iter().collect())
2914}
2915
2916fn empty_reflog_markers_path(git_dir: &Path) -> PathBuf {
2917    git_dir.join("reftable").join("empty-reflogs")
2918}
2919
2920fn read_empty_reflog_markers(git_dir: &Path) -> BTreeSet<String> {
2921    fs::read_to_string(empty_reflog_markers_path(git_dir))
2922        .map(|content| {
2923            content
2924                .lines()
2925                .filter(|line| !line.trim().is_empty())
2926                .map(ToOwned::to_owned)
2927                .collect()
2928        })
2929        .unwrap_or_default()
2930}
2931
2932fn write_empty_reflog_markers(git_dir: &Path, markers: &BTreeSet<String>) -> Result<()> {
2933    let path = empty_reflog_markers_path(git_dir);
2934    let content = markers.iter().cloned().collect::<Vec<_>>().join("\n");
2935    fs::write(
2936        path,
2937        if content.is_empty() {
2938            content
2939        } else {
2940            content + "\n"
2941        },
2942    )?;
2943    Ok(())
2944}
2945
2946/// Create an empty reflog marker in a reftable repo.
2947pub fn reftable_create_reflog(git_dir: &Path, refname: &str) -> Result<()> {
2948    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2949    let mut markers = read_empty_reflog_markers(&store_git_dir);
2950    markers.insert(storage_refname);
2951    write_empty_reflog_markers(&store_git_dir, &markers)
2952}
2953
2954/// Delete all reflog records and empty-log marker for a ref in a reftable repo.
2955pub fn reftable_delete_reflog(git_dir: &Path, refname: &str) -> Result<()> {
2956    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2957    let mut markers = read_empty_reflog_markers(&store_git_dir);
2958    markers.remove(&storage_refname);
2959    write_empty_reflog_markers(&store_git_dir, &markers)?;
2960    let mut stack = ReftableStack::open(&store_git_dir)?;
2961    stack.replace_logs_for_ref(&storage_refname, &[])
2962}
2963
2964// ---------------------------------------------------------------------------
2965// Write options helpers
2966// ---------------------------------------------------------------------------
2967
2968/// Read reftable write options from the repository config.
2969pub fn read_write_options(git_dir: &Path) -> WriteOptions {
2970    let mut opts = WriteOptions::default();
2971    opts.hash_size = reftable_hash_size_for_git_dir(git_dir);
2972
2973    if let Ok(config) = ConfigSet::load(Some(git_dir), true) {
2974        if let Some(value) = config.get("reftable.blockSize") {
2975            if let Ok(v) = value.parse::<u32>() {
2976                opts.block_size = v;
2977            }
2978        }
2979        if let Some(value) = config.get("reftable.restartInterval") {
2980            if let Ok(v) = value.parse::<usize>() {
2981                opts.restart_interval = v;
2982            }
2983        }
2984        if let Some(value) = config.get("reftable.indexObjects") {
2985            let value = value.to_lowercase();
2986            if value == "false" || value == "0" || value == "no" || value == "off" {
2987                opts.skip_index_objects = true;
2988            }
2989        }
2990        if let Some(value) = config.get("core.logAllRefUpdates") {
2991            let value = value.to_lowercase();
2992            if !(value == "true" || value == "always") {
2993                opts.write_log = false;
2994            }
2995        }
2996        return opts;
2997    }
2998
2999    let config_path = git_dir.join("config");
3000    if let Ok(content) = fs::read_to_string(&config_path) {
3001        let mut in_reftable = false;
3002        let mut in_core = false;
3003        let mut log_all_ref_updates: Option<bool> = None;
3004
3005        for line in content.lines() {
3006            let trimmed = line.trim();
3007            if trimmed.starts_with('[') {
3008                let section_lower = trimmed.to_lowercase();
3009                in_reftable = section_lower.starts_with("[reftable]");
3010                in_core = section_lower.starts_with("[core]");
3011                continue;
3012            }
3013            if in_reftable {
3014                if let Some((key, value)) = trimmed.split_once('=') {
3015                    let key = key.trim().to_lowercase();
3016                    let value = value.trim();
3017                    match key.as_str() {
3018                        "blocksize" => {
3019                            if let Ok(v) = value.parse::<u32>() {
3020                                opts.block_size = v;
3021                            }
3022                        }
3023                        "restartinterval" => {
3024                            if let Ok(v) = value.parse::<usize>() {
3025                                opts.restart_interval = v;
3026                            }
3027                        }
3028                        _ => {}
3029                    }
3030                }
3031            }
3032            if in_core {
3033                if let Some((key, value)) = trimmed.split_once('=') {
3034                    let key = key.trim().to_lowercase();
3035                    let value = value.trim().to_lowercase();
3036                    if key == "logallrefupdates" {
3037                        log_all_ref_updates = Some(value == "true" || value == "always");
3038                    }
3039                }
3040            }
3041        }
3042
3043        if let Some(false) = log_all_ref_updates {
3044            opts.write_log = false;
3045        }
3046    }
3047
3048    opts
3049}
3050
3051/// Check if logAllRefUpdates is enabled.
3052fn should_log_ref_updates(git_dir: &Path) -> bool {
3053    let config_path = git_dir.join("config");
3054    if let Ok(content) = fs::read_to_string(&config_path) {
3055        let mut in_core = false;
3056        for line in content.lines() {
3057            let trimmed = line.trim();
3058            if trimmed.starts_with('[') {
3059                in_core = trimmed.to_lowercase().starts_with("[core]");
3060                continue;
3061            }
3062            if in_core {
3063                if let Some((key, value)) = trimmed.split_once('=') {
3064                    if key.trim().eq_ignore_ascii_case("logallrefupdates") {
3065                        let v = value.trim().to_lowercase();
3066                        return v == "true" || v == "always";
3067                    }
3068                }
3069            }
3070        }
3071    }
3072    false
3073}
3074
3075// ---------------------------------------------------------------------------
3076// Block dumping (for `test-tool dump-reftable -b`)
3077// ---------------------------------------------------------------------------
3078
3079/// Produce the `test-tool dump-reftable -b` output for a reftable file.
3080///
3081/// Mirrors `dump_blocks()` in `git/t/helper/test-reftable.c`: prints the
3082/// header block size and, for each block, the section type, the restart offset
3083/// (labelled `length`) and the restart count.
3084pub fn dump_reftable_blocks(path: &Path) -> Result<String> {
3085    let data = fs::read(path).map_err(Error::Io)?;
3086    if data.len() < HEADER_SIZE {
3087        return Err(Error::InvalidRef("reftable: file too small".into()));
3088    }
3089    if &data[0..4] != REFTABLE_MAGIC {
3090        return Err(Error::InvalidRef("reftable: bad magic".into()));
3091    }
3092    let version = data[4];
3093    let header_size = if version == 2 { 28 } else { 24 };
3094    let footer_size = if version == 2 { 72 } else { FOOTER_V1_SIZE };
3095    let block_size = ((data[5] as u32) << 16) | ((data[6] as u32) << 8) | (data[7] as u32);
3096
3097    let table_size = data.len().saturating_sub(footer_size);
3098
3099    let mut out = String::new();
3100    out.push_str("header:\n");
3101    out.push_str(&format!("  block_size: {block_size}\n"));
3102
3103    let mut section_type: u8 = 0;
3104    // First block starts at offset 0 with the file header skipped.
3105    let mut block_off: u64 = 0;
3106    let mut first = true;
3107
3108    loop {
3109        if !first {
3110            // table_iter_next_block advances by full_block_size; computed below.
3111            // `block_off` is updated at the end of the previous iteration.
3112        }
3113        if block_off as usize >= table_size {
3114            break;
3115        }
3116        let header_off = if block_off == 0 { header_size } else { 0 };
3117        let pos = block_off as usize + header_off;
3118        if pos + 1 > data.len() {
3119            break;
3120        }
3121        let block_type = data[pos];
3122        if !is_block_type(block_type) {
3123            break;
3124        }
3125
3126        // block_size field: be24 at pos+1.
3127        if pos + 4 > data.len() {
3128            break;
3129        }
3130        let blk_len =
3131            ((data[pos + 1] as u32) << 16) | ((data[pos + 2] as u32) << 8) | (data[pos + 3] as u32);
3132        let blk_len = blk_len as usize;
3133
3134        // Determine restart_count / restart_off from the (uncompressed) block.
3135        let (restart_off, restart_count, full_block_size) = if block_type == BLOCK_TYPE_LOG {
3136            // Log blocks store the uncompressed size in blk_len; the on-disk
3137            // data after the 4-byte header is zlib-compressed.
3138            let skip = 4 + header_off;
3139            let comp = &data[block_off as usize + skip..];
3140            let mut dec = flate2::read::DeflateDecoder::new(comp);
3141            let mut inflated = vec![0u8; blk_len.saturating_sub(skip)];
3142            // Read exactly the uncompressed payload.
3143            read_exact_inflate(&mut dec, &mut inflated)?;
3144            let consumed = dec.total_in() as usize;
3145            // restart trailer lives at the end of the (header + inflated) block.
3146            let mut full = vec![0u8; skip];
3147            full.extend_from_slice(&inflated);
3148            let rc = be16(&full, blk_len - 2) as usize;
3149            let roff = blk_len - 2 - 3 * rc;
3150            let fbs = skip + consumed;
3151            (roff, rc, fbs)
3152        } else {
3153            let abs = block_off as usize;
3154            if abs + blk_len < 2 {
3155                break;
3156            }
3157            let rc = be16(&data, abs + blk_len - 2) as usize;
3158            let roff = blk_len - 2 - 3 * rc;
3159            // Padded blocks advance by the table block size unless this is the
3160            // last block / unaligned / padded.
3161            let mut fbs = block_size as usize;
3162            if fbs == 0 {
3163                fbs = blk_len;
3164            } else if blk_len < fbs
3165                && abs + blk_len < data.len()
3166                && data.get(abs + blk_len) == Some(&0u8)
3167            {
3168                // padded block; advances by full table block size
3169            } else if blk_len < fbs {
3170                fbs = blk_len;
3171            }
3172            (roff, rc, fbs)
3173        };
3174
3175        if block_type != section_type {
3176            let section = match block_type {
3177                BLOCK_TYPE_LOG => "log",
3178                BLOCK_TYPE_REF => "ref",
3179                BLOCK_TYPE_OBJ => "obj",
3180                BLOCK_TYPE_INDEX => "idx",
3181                _ => return Err(Error::InvalidRef("reftable: bad block type".into())),
3182            };
3183            section_type = block_type;
3184            out.push_str(&format!("{section}:\n"));
3185        }
3186
3187        out.push_str(&format!("  - length: {restart_off}\n"));
3188        out.push_str(&format!("    restarts: {restart_count}\n"));
3189
3190        block_off += full_block_size as u64;
3191        first = false;
3192        if full_block_size == 0 {
3193            break;
3194        }
3195    }
3196
3197    Ok(out)
3198}
3199
3200fn is_block_type(t: u8) -> bool {
3201    t == BLOCK_TYPE_REF || t == BLOCK_TYPE_LOG || t == BLOCK_TYPE_OBJ || t == BLOCK_TYPE_INDEX
3202}
3203
3204fn be16(data: &[u8], off: usize) -> u16 {
3205    ((data[off] as u16) << 8) | (data[off + 1] as u16)
3206}
3207
3208fn read_exact_inflate<R: Read>(r: &mut R, buf: &mut [u8]) -> Result<()> {
3209    let mut filled = 0;
3210    while filled < buf.len() {
3211        match r.read(&mut buf[filled..]) {
3212            Ok(0) => break,
3213            Ok(n) => filled += n,
3214            Err(e) => return Err(Error::Zlib(e.to_string())),
3215        }
3216    }
3217    Ok(())
3218}
3219
3220// ---------------------------------------------------------------------------
3221// Utility functions
3222// ---------------------------------------------------------------------------
3223
3224/// Compute the CRC-32 of a byte slice (ISO 3309 / ITU-T V.42).
3225fn crc32(data: &[u8]) -> u32 {
3226    let mut crc: u32 = 0xffffffff;
3227    for &byte in data {
3228        crc ^= byte as u32;
3229        for _ in 0..8 {
3230            if crc & 1 != 0 {
3231                crc = (crc >> 1) ^ 0xedb88320;
3232            } else {
3233                crc >>= 1;
3234            }
3235        }
3236    }
3237    !crc
3238}
3239
3240/// Compute common prefix length between two byte slices.
3241fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
3242    a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
3243}
3244
3245/// Read a big-endian u24 from 3 bytes at `pos`.
3246fn read_u24(data: &[u8], pos: usize) -> usize {
3247    ((data[pos] as usize) << 16) | ((data[pos + 1] as usize) << 8) | (data[pos + 2] as usize)
3248}
3249
3250/// Read a big-endian u16 from 2 bytes at `pos`.
3251fn read_u16(data: &[u8], pos: usize) -> usize {
3252    ((data[pos] as usize) << 8) | (data[pos + 1] as usize)
3253}
3254
3255/// Parse the footer of a reftable file.
3256fn parse_footer(data: &[u8], version: u8) -> Result<Footer> {
3257    let footer_size = if version == 2 { 72 } else { FOOTER_V1_SIZE };
3258    if data.len() < footer_size {
3259        return Err(Error::InvalidRef("reftable: footer too small".into()));
3260    }
3261
3262    // Verify magic
3263    if &data[0..4] != REFTABLE_MAGIC {
3264        return Err(Error::InvalidRef("reftable: bad footer magic".into()));
3265    }
3266    let fver = data[4];
3267    if fver != version {
3268        return Err(Error::InvalidRef(format!(
3269            "reftable: footer version mismatch: header={version}, footer={fver}"
3270        )));
3271    }
3272
3273    // Footer-size validated above, so every fixed-width slice below is in
3274    // bounds; convert via `?` to surface any unexpected truncation as an error.
3275    let read_u64 = |slice: &[u8]| -> Result<u64> {
3276        let bytes: [u8; 8] = slice
3277            .try_into()
3278            .map_err(|_| Error::InvalidRef("reftable: truncated footer field".into()))?;
3279        Ok(u64::from_be_bytes(bytes))
3280    };
3281
3282    let block_size = ((data[5] as u32) << 16) | ((data[6] as u32) << 8) | (data[7] as u32);
3283    let min_update_index = read_u64(&data[8..16])?;
3284    let max_update_index = read_u64(&data[16..24])?;
3285
3286    // The position fields follow the file header, whose size is version-dependent
3287    // (24 bytes for v1, 28 for v2 — the extra 4 bytes are the hash-id).
3288    let off = if version == 2 { 28 } else { 24 };
3289    let ref_index_position = read_u64(&data[off..off + 8])?;
3290    let obj_position_and_id_len = read_u64(&data[off + 8..off + 16])?;
3291    let obj_index_position = read_u64(&data[off + 16..off + 24])?;
3292    let log_position = read_u64(&data[off + 24..off + 32])?;
3293    let log_index_position = read_u64(&data[off + 32..off + 40])?;
3294
3295    // CRC-32 check
3296    let crc_bytes: [u8; 4] = data[footer_size - 4..footer_size]
3297        .try_into()
3298        .map_err(|_| Error::InvalidRef("reftable: truncated footer CRC".into()))?;
3299    let crc_stored = u32::from_be_bytes(crc_bytes);
3300    let crc_computed = crc32(&data[..footer_size - 4]);
3301    if crc_stored != crc_computed {
3302        return Err(Error::InvalidRef(format!(
3303            "reftable: footer CRC mismatch: stored={crc_stored:08x}, computed={crc_computed:08x}"
3304        )));
3305    }
3306
3307    Ok(Footer {
3308        version: fver,
3309        block_size,
3310        min_update_index,
3311        max_update_index,
3312        ref_index_position,
3313        obj_position_and_id_len,
3314        obj_index_position,
3315        log_position,
3316        log_index_position,
3317    })
3318}
3319
3320/// Parse an identity string like `"Name <email> 1234567890 +0100"`.
3321fn parse_identity_string(identity: &str) -> (String, String, u64, i16) {
3322    // Format: "Name <email> timestamp tz"
3323    let parts: Vec<&str> = identity.rsplitn(3, ' ').collect();
3324    if parts.len() < 3 {
3325        return (identity.to_owned(), String::new(), 0, 0);
3326    }
3327    let tz_str = parts[0]; // e.g. "+0100"
3328    let time_str = parts[1]; // e.g. "1234567890"
3329    let name_email = parts[2]; // e.g. "Name <email>"
3330
3331    let time_secs = time_str.parse::<u64>().unwrap_or(0);
3332
3333    // Parse timezone: +HHMM or -HHMM
3334    let tz_minutes = if tz_str.len() >= 5 {
3335        let sign = if tz_str.starts_with('-') { -1i16 } else { 1 };
3336        let hours = tz_str[1..3].parse::<i16>().unwrap_or(0);
3337        let mins = tz_str[3..5].parse::<i16>().unwrap_or(0);
3338        sign * (hours * 60 + mins)
3339    } else {
3340        0
3341    };
3342
3343    // Split name and email
3344    let (name, email) = if let Some(lt_pos) = name_email.find('<') {
3345        let name = name_email[..lt_pos].trim().to_owned();
3346        let email = if let Some(gt_pos) = name_email.find('>') {
3347            name_email[lt_pos + 1..gt_pos].to_owned()
3348        } else {
3349            name_email[lt_pos + 1..].to_owned()
3350        };
3351        (name, email)
3352    } else {
3353        (name_email.to_owned(), String::new())
3354    };
3355
3356    (name, email, time_secs, tz_minutes)
3357}
3358
3359// ---------------------------------------------------------------------------
3360// Tests
3361// ---------------------------------------------------------------------------
3362
3363#[cfg(test)]
3364mod tests {
3365    use super::*;
3366
3367    #[test]
3368    fn test_varint_roundtrip() {
3369        for val in [0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] {
3370            let mut buf = Vec::new();
3371            put_varint(val, &mut buf);
3372            let (decoded, end) = get_varint(&buf, 0).unwrap();
3373            assert_eq!(decoded, val, "varint roundtrip failed for {val}");
3374            assert_eq!(end, buf.len());
3375        }
3376    }
3377
3378    #[test]
3379    fn test_crc32() {
3380        // Known test vector: "123456789" => 0xCBF43926
3381        assert_eq!(crc32(b"123456789"), 0xCBF43926);
3382    }
3383
3384    #[test]
3385    fn test_empty_table() {
3386        let writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3387        let data = writer.finish().unwrap();
3388        let reader = ReftableReader::new(data).unwrap();
3389        let refs = reader.read_refs().unwrap();
3390        assert!(refs.is_empty());
3391    }
3392
3393    #[test]
3394    fn test_write_read_single_ref() {
3395        let oid = ObjectId::from_bytes(&[0xab; 20]).unwrap();
3396        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3397        writer
3398            .add_ref(RefRecord {
3399                name: "refs/heads/main".to_owned(),
3400                update_index: 1,
3401                value: RefValue::Val1(oid),
3402            })
3403            .unwrap();
3404        let data = writer.finish().unwrap();
3405
3406        let reader = ReftableReader::new(data).unwrap();
3407        let refs = reader.read_refs().unwrap();
3408        assert_eq!(refs.len(), 1);
3409        assert_eq!(refs[0].name, "refs/heads/main");
3410        assert_eq!(refs[0].value, RefValue::Val1(oid));
3411        assert_eq!(refs[0].update_index, 1);
3412    }
3413
3414    #[test]
3415    fn test_write_read_multiple_refs() {
3416        let oid1 = ObjectId::from_bytes(&[0x11; 20]).unwrap();
3417        let oid2 = ObjectId::from_bytes(&[0x22; 20]).unwrap();
3418        let oid3 = ObjectId::from_bytes(&[0x33; 20]).unwrap();
3419
3420        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3421        writer
3422            .add_ref(RefRecord {
3423                name: "refs/heads/a".to_owned(),
3424                update_index: 1,
3425                value: RefValue::Val1(oid1),
3426            })
3427            .unwrap();
3428        writer
3429            .add_ref(RefRecord {
3430                name: "refs/heads/b".to_owned(),
3431                update_index: 1,
3432                value: RefValue::Val1(oid2),
3433            })
3434            .unwrap();
3435        writer
3436            .add_ref(RefRecord {
3437                name: "refs/tags/v1.0".to_owned(),
3438                update_index: 1,
3439                value: RefValue::Val2(oid3, oid1),
3440            })
3441            .unwrap();
3442        let data = writer.finish().unwrap();
3443
3444        let reader = ReftableReader::new(data).unwrap();
3445        let refs = reader.read_refs().unwrap();
3446        assert_eq!(refs.len(), 3);
3447        assert_eq!(refs[0].name, "refs/heads/a");
3448        assert_eq!(refs[1].name, "refs/heads/b");
3449        assert_eq!(refs[2].name, "refs/tags/v1.0");
3450        assert_eq!(refs[2].value, RefValue::Val2(oid3, oid1));
3451    }
3452
3453    #[test]
3454    fn test_symref_roundtrip() {
3455        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3456        writer
3457            .add_ref(RefRecord {
3458                name: "refs/heads/sym".to_owned(),
3459                update_index: 1,
3460                value: RefValue::Symref("refs/heads/main".to_owned()),
3461            })
3462            .unwrap();
3463        let data = writer.finish().unwrap();
3464
3465        let reader = ReftableReader::new(data).unwrap();
3466        let refs = reader.read_refs().unwrap();
3467        assert_eq!(refs.len(), 1);
3468        assert_eq!(
3469            refs[0].value,
3470            RefValue::Symref("refs/heads/main".to_owned())
3471        );
3472    }
3473
3474    #[test]
3475    fn test_log_roundtrip() {
3476        let old_oid = ObjectId::from_bytes(&[0; 20]).unwrap();
3477        let new_oid = ObjectId::from_bytes(&[0xaa; 20]).unwrap();
3478
3479        let mut opts = WriteOptions::default();
3480        opts.write_log = true;
3481        let mut writer = ReftableWriter::new(opts, 1, 1);
3482        writer
3483            .add_log(LogRecord {
3484                refname: "refs/heads/main".to_owned(),
3485                update_index: 1,
3486                old_id: old_oid,
3487                new_id: new_oid,
3488                name: "Test User".to_owned(),
3489                email: "test@example.com".to_owned(),
3490                time_seconds: 1700000000,
3491                tz_offset: -480,
3492                message: "initial commit".to_owned(),
3493            })
3494            .unwrap();
3495        let data = writer.finish().unwrap();
3496
3497        let reader = ReftableReader::new(data).unwrap();
3498        let logs = reader.read_logs().unwrap();
3499        assert_eq!(logs.len(), 1);
3500        assert_eq!(logs[0].refname, "refs/heads/main");
3501        assert_eq!(logs[0].old_id, old_oid);
3502        assert_eq!(logs[0].new_id, new_oid);
3503        assert_eq!(logs[0].name, "Test User");
3504        assert_eq!(logs[0].email, "test@example.com");
3505        assert_eq!(logs[0].time_seconds, 1700000000);
3506        assert_eq!(logs[0].tz_offset, -480);
3507        // The reftable writer cleans messages the way git does: it appends a
3508        // trailing newline. `read_logs` returns the raw on-disk message (the
3509        // newline is only stripped when converting to a `ReflogEntry`).
3510        assert_eq!(logs[0].message, "initial commit\n");
3511    }
3512
3513    #[test]
3514    fn test_unaligned_table() {
3515        let oid = ObjectId::from_bytes(&[0xcc; 20]).unwrap();
3516        let opts = WriteOptions {
3517            // Unpadded (unaligned) blocks: like git's `unpadded` write option,
3518            // blocks are not padded out to the block size. A block_size of 0 is
3519            // resolved to the default at write time, so the reported block size
3520            // is the default rather than 0.
3521            unpadded: true,
3522            restart_interval: 16,
3523            write_log: false,
3524            ..WriteOptions::default()
3525        };
3526        let mut writer = ReftableWriter::new(opts, 1, 1);
3527        writer
3528            .add_ref(RefRecord {
3529                name: "refs/heads/main".to_owned(),
3530                update_index: 1,
3531                value: RefValue::Val1(oid),
3532            })
3533            .unwrap();
3534        let data = writer.finish().unwrap();
3535
3536        // An unpadded single-ref table is far smaller than one padded block.
3537        assert!(data.len() < DEFAULT_BLOCK_SIZE as usize);
3538
3539        let reader = ReftableReader::new(data).unwrap();
3540        let refs = reader.read_refs().unwrap();
3541        assert_eq!(refs.len(), 1);
3542        assert_eq!(refs[0].value, RefValue::Val1(oid));
3543    }
3544
3545    #[test]
3546    fn test_parse_identity() {
3547        let (name, email, ts, tz) =
3548            parse_identity_string("Test User <test@example.com> 1700000000 -0800");
3549        assert_eq!(name, "Test User");
3550        assert_eq!(email, "test@example.com");
3551        assert_eq!(ts, 1700000000);
3552        assert_eq!(tz, -480);
3553    }
3554
3555    #[test]
3556    fn test_deletion_record() {
3557        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3558        writer
3559            .add_ref(RefRecord {
3560                name: "refs/heads/gone".to_owned(),
3561                update_index: 1,
3562                value: RefValue::Deletion,
3563            })
3564            .unwrap();
3565        let data = writer.finish().unwrap();
3566
3567        let reader = ReftableReader::new(data).unwrap();
3568        let refs = reader.read_refs().unwrap();
3569        assert_eq!(refs.len(), 1);
3570        assert_eq!(refs[0].value, RefValue::Deletion);
3571    }
3572}