Skip to main content

grit_lib/
reftable.rs

1//! Reftable format — binary reference storage.
2//!
3//! Implements the [reftable file format](https://git-scm.com/docs/reftable)
4//! for efficient, sorted reference storage.  A reftable file contains
5//! ref blocks (sorted ref records with prefix compression), optional log
6//! blocks (reflog entries), optional index blocks, and a footer.
7//!
8//! # Architecture
9//!
10//! - [`ReftableWriter`] writes a single `.ref` (or `.log`) reftable file.
11//! - [`ReftableReader`] reads and searches a single reftable file.
12//! - [`ReftableStack`] manages the `tables.list` stack, providing a
13//!   merged view of all tables and auto-compaction on writes.
14//!
15//! # On-disk layout
16//!
17//! ```text
18//! first_block { header, first_ref_block }
19//! ref_block*
20//! ref_index?
21//! obj_block*    (not yet implemented)
22//! obj_index?    (not yet implemented)
23//! log_block*
24//! log_index?
25//! footer
26//! ```
27
28use std::collections::{BTreeMap, BTreeSet};
29use std::fs;
30use std::io::{Read, Write};
31use std::path::{Path, PathBuf};
32use std::thread;
33use std::time::{Duration, Instant};
34
35use crate::config::ConfigSet;
36use crate::error::{Error, Result};
37use crate::objects::ObjectId;
38
39// ---------------------------------------------------------------------------
40// Constants
41// ---------------------------------------------------------------------------
42
43/// Magic bytes at the start of every reftable file.
44const REFTABLE_MAGIC: &[u8; 4] = b"REFT";
45
46/// File header size (version 1): magic(4) + version(1) + block_size(3)
47/// + min_update_index(8) + max_update_index(8) = 24 bytes.
48const HEADER_SIZE: usize = 24;
49
50/// Footer size for version 1.
51const FOOTER_V1_SIZE: usize = 68;
52
53/// Block type: ref block.
54const BLOCK_TYPE_REF: u8 = b'r';
55/// Block type: index block.
56const BLOCK_TYPE_INDEX: u8 = b'i';
57/// Block type: log block (zlib-compressed).
58const BLOCK_TYPE_LOG: u8 = b'g';
59/// Block type: object index block.
60const BLOCK_TYPE_OBJ: u8 = b'o';
61
62/// Value types encoded in the low 3 bits of the suffix_length varint.
63const VALUE_DELETION: u8 = 0;
64const VALUE_ONE_OID: u8 = 1;
65const VALUE_TWO_OID: u8 = 2;
66const VALUE_SYMREF: u8 = 3;
67
68/// Hash size (SHA-1).
69const HASH_SIZE: usize = 20;
70
71/// Default block size when none is configured (4 KiB).
72const DEFAULT_BLOCK_SIZE: u32 = 4096;
73
74/// How many records between restart points.
75const RESTART_INTERVAL: usize = 16;
76
77// ---------------------------------------------------------------------------
78// Varint encoding (Git pack-style)
79// ---------------------------------------------------------------------------
80
81/// Encode a u64 as a varint into `out`. Returns number of bytes written.
82fn put_varint(mut val: u64, out: &mut Vec<u8>) -> usize {
83    // First, collect 7-bit groups.
84    let mut buf = [0u8; 10];
85    let mut i = 0;
86    buf[i] = (val & 0x7f) as u8;
87    i += 1;
88    val >>= 7;
89    while val > 0 {
90        val -= 1;
91        buf[i] = (val & 0x7f) as u8;
92        i += 1;
93        val >>= 7;
94    }
95    // Write in reverse, with continuation bits.
96    let len = i;
97    for j in (1..len).rev() {
98        out.push(buf[j] | 0x80);
99    }
100    out.push(buf[0]);
101    len
102}
103
104/// Decode a varint from `data` starting at `pos`. Returns (value, new_pos).
105fn get_varint(data: &[u8], mut pos: usize) -> Result<(u64, usize)> {
106    if pos >= data.len() {
107        return Err(Error::InvalidRef("varint: unexpected end of data".into()));
108    }
109    let mut val = (data[pos] & 0x7f) as u64;
110    while data[pos] & 0x80 != 0 {
111        pos += 1;
112        if pos >= data.len() {
113            return Err(Error::InvalidRef("varint: unexpected end of data".into()));
114        }
115        val = ((val + 1) << 7) | (data[pos] & 0x7f) as u64;
116    }
117    Ok((val, pos + 1))
118}
119
120// ---------------------------------------------------------------------------
121// Ref record types
122// ---------------------------------------------------------------------------
123
124/// A single reference record as stored in a reftable.
125#[derive(Debug, Clone, PartialEq, Eq)]
126pub enum RefValue {
127    /// Deletion tombstone (value_type 0x0).
128    Deletion,
129    /// A direct ref pointing to one OID (value_type 0x1).
130    Val1(ObjectId),
131    /// An annotated tag: value + peeled target (value_type 0x2).
132    Val2(ObjectId, ObjectId),
133    /// A symbolic reference (value_type 0x3).
134    Symref(String),
135}
136
137/// A decoded ref record.
138#[derive(Debug, Clone)]
139pub struct RefRecord {
140    /// Full reference name.
141    pub name: String,
142    /// Update index (absolute).
143    pub update_index: u64,
144    /// The value.
145    pub value: RefValue,
146}
147
148/// A decoded log record.
149#[derive(Debug, Clone)]
150pub struct LogRecord {
151    /// Reference name.
152    pub refname: String,
153    /// Update index.
154    pub update_index: u64,
155    /// Old object ID.
156    pub old_id: ObjectId,
157    /// New object ID.
158    pub new_id: ObjectId,
159    /// Committer name.
160    pub name: String,
161    /// Committer email (without angle brackets).
162    pub email: String,
163    /// Time in seconds since epoch.
164    pub time_seconds: u64,
165    /// Timezone offset in minutes (signed).
166    pub tz_offset: i16,
167    /// Log message.
168    pub message: String,
169}
170
171/// Write options for reftable creation.
172#[derive(Debug, Clone)]
173pub struct WriteOptions {
174    /// Block size in bytes. 0 means use the default.
175    pub block_size: u32,
176    /// Restart interval (number of records between restart points).
177    pub restart_interval: usize,
178    /// Whether to write log blocks.
179    pub write_log: bool,
180    /// Skip writing the object index (config `reftable.indexObjects=false`).
181    pub skip_index_objects: bool,
182    /// Write blocks without padding to the block size.
183    pub unpadded: bool,
184}
185
186impl Default for WriteOptions {
187    fn default() -> Self {
188        Self {
189            block_size: DEFAULT_BLOCK_SIZE,
190            restart_interval: RESTART_INTERVAL,
191            write_log: true,
192            skip_index_objects: false,
193            unpadded: false,
194        }
195    }
196}
197
198/// A ref update that should be written to a reftable transaction.
199///
200/// The `refname` must already be the backend storage refname (for example a
201/// namespaced or per-worktree ref after storage routing). All updates passed to
202/// one transaction are written with the same update index, matching Git's
203/// reftable backend for `update-ref --stdin` batches.
204#[derive(Debug, Clone)]
205pub struct ReftableTransactionUpdate {
206    /// Full storage refname to update.
207    pub refname: String,
208    /// New ref value, or a deletion tombstone.
209    pub value: RefValue,
210    /// Optional reflog entry to record in the same table and update index.
211    pub log: Option<LogRecord>,
212}
213
214// ---------------------------------------------------------------------------
215// Writer
216// ---------------------------------------------------------------------------
217
218/// Writes a single reftable file.
219///
220/// Usage:
221/// ```ignore
222/// let mut w = ReftableWriter::new(opts, min_idx, max_idx);
223/// w.add_ref(&RefRecord { .. })?;
224/// w.add_log(&LogRecord { .. })?;
225/// let bytes = w.finish()?;
226/// ```
227pub struct ReftableWriter {
228    opts: WriteOptions,
229    min_update_index: u64,
230    max_update_index: u64,
231
232    // Accumulated ref records (must be added in sorted order).
233    refs: Vec<RefRecord>,
234    // Accumulated log records.
235    logs: Vec<LogRecord>,
236}
237
238impl ReftableWriter {
239    /// Create a new writer.
240    pub fn new(opts: WriteOptions, min_update_index: u64, max_update_index: u64) -> Self {
241        Self {
242            opts,
243            min_update_index,
244            max_update_index,
245            refs: Vec::new(),
246            logs: Vec::new(),
247        }
248    }
249
250    /// Add a ref record. Records **must** be added in sorted name order.
251    pub fn add_ref(&mut self, rec: RefRecord) -> Result<()> {
252        if let Some(last) = self.refs.last() {
253            if rec.name <= last.name {
254                return Err(Error::InvalidRef(format!(
255                    "reftable: refs must be sorted, got '{}' after '{}'",
256                    rec.name, last.name
257                )));
258            }
259        }
260        self.refs.push(rec);
261        Ok(())
262    }
263
264    /// Add a log record.
265    pub fn add_log(&mut self, rec: LogRecord) -> Result<()> {
266        self.logs.push(rec);
267        Ok(())
268    }
269
270    /// Finish writing and return the complete reftable file bytes.
271    ///
272    /// This is a faithful port of `git/reftable/writer.c` so that the
273    /// on-disk layout (block boundaries, restart points, padding,
274    /// index/object sections, footer offsets) is byte-identical to git.
275    pub fn finish(self) -> Result<Vec<u8>> {
276        let refs = self.refs;
277        let logs = self.logs;
278        let opts = self.opts;
279        let mut w = WriterState::new(opts, self.min_update_index, self.max_update_index);
280
281        // Refs are added in sorted order; index objects as we go.
282        for rec in &refs {
283            w.add_ref(rec)?;
284        }
285
286        // Logs: sort by (refname asc, update_index desc) — matches
287        // reftable_log_record_compare_key.
288        let mut logs = logs;
289        logs.sort_by(|a, b| {
290            a.refname
291                .cmp(&b.refname)
292                .then_with(|| b.update_index.cmp(&a.update_index))
293        });
294        if w.opts.write_log {
295            for log in &logs {
296                w.add_log(log)?;
297            }
298        }
299
300        w.close()
301    }
302}
303
304// ---------------------------------------------------------------------------
305// Faithful low-level writer (ports git/reftable/{block,writer,record}.c)
306// ---------------------------------------------------------------------------
307
308/// Default block size, mirrors reftable's `DEFAULT_BLOCK_SIZE`.
309const REFTABLE_DEFAULT_BLOCK_SIZE: u32 = 4096;
310/// Maximum number of restart points per block (`MAX_RESTARTS`).
311const MAX_RESTARTS: usize = (1 << 16) - 1;
312
313/// A record to encode: produces a key and a value body.
314enum EncRecord<'a> {
315    Ref(&'a RefRecord, u64),
316    Log(&'a LogRecord),
317    Obj { prefix: Vec<u8>, offsets: Vec<u64> },
318    Index { last_key: Vec<u8>, offset: u64 },
319}
320
321impl EncRecord<'_> {
322    fn block_type(&self) -> u8 {
323        match self {
324            EncRecord::Ref(..) => BLOCK_TYPE_REF,
325            EncRecord::Log(_) => BLOCK_TYPE_LOG,
326            EncRecord::Obj { .. } => BLOCK_TYPE_OBJ,
327            EncRecord::Index { .. } => BLOCK_TYPE_INDEX,
328        }
329    }
330
331    /// The record key (used for prefix compression and restart points).
332    fn key(&self) -> Vec<u8> {
333        match self {
334            EncRecord::Ref(r, _) => r.name.as_bytes().to_vec(),
335            EncRecord::Log(l) => {
336                let mut k = Vec::with_capacity(l.refname.len() + 9);
337                k.extend_from_slice(l.refname.as_bytes());
338                k.push(0);
339                let ts = u64::MAX - l.update_index;
340                k.extend_from_slice(&ts.to_be_bytes());
341                k
342            }
343            EncRecord::Obj { prefix, .. } => prefix.clone(),
344            EncRecord::Index { last_key, .. } => last_key.clone(),
345        }
346    }
347
348    /// The `extra` value-type bits stored in the key varint.
349    fn val_type(&self) -> u8 {
350        match self {
351            EncRecord::Ref(r, _) => match r.value {
352                RefValue::Deletion => VALUE_DELETION,
353                RefValue::Val1(_) => VALUE_ONE_OID,
354                RefValue::Val2(..) => VALUE_TWO_OID,
355                RefValue::Symref(_) => VALUE_SYMREF,
356            },
357            // grit only writes reflog updates (value_type 1), never the
358            // explicit-deletion form (value_type 0).
359            EncRecord::Log(_) => 1,
360            EncRecord::Obj { offsets, .. } => {
361                if !offsets.is_empty() && offsets.len() < 8 {
362                    offsets.len() as u8
363                } else {
364                    0
365                }
366            }
367            EncRecord::Index { .. } => 0,
368        }
369    }
370
371    /// Encode the value body (everything after the key).
372    fn encode_value(&self, opts: &WriteOptions, out: &mut Vec<u8>) {
373        match self {
374            EncRecord::Ref(r, update_index_delta) => {
375                put_varint(*update_index_delta, out);
376                match &r.value {
377                    RefValue::Deletion => {}
378                    RefValue::Val1(oid) => out.extend_from_slice(oid.as_bytes()),
379                    RefValue::Val2(oid, peeled) => {
380                        out.extend_from_slice(oid.as_bytes());
381                        out.extend_from_slice(peeled.as_bytes());
382                    }
383                    RefValue::Symref(target) => {
384                        put_varint(target.len() as u64, out);
385                        out.extend_from_slice(target.as_bytes());
386                    }
387                }
388            }
389            EncRecord::Log(l) => {
390                out.extend_from_slice(l.old_id.as_bytes());
391                out.extend_from_slice(l.new_id.as_bytes());
392                put_varint(l.name.len() as u64, out);
393                out.extend_from_slice(l.name.as_bytes());
394                put_varint(l.email.len() as u64, out);
395                out.extend_from_slice(l.email.as_bytes());
396                put_varint(l.time_seconds, out);
397                out.extend_from_slice(&l.tz_offset.to_be_bytes());
398                let msg = clean_log_message(&l.message, opts);
399                put_varint(msg.len() as u64, out);
400                out.extend_from_slice(&msg);
401            }
402            EncRecord::Obj { offsets, .. } => {
403                if offsets.is_empty() || offsets.len() >= 8 {
404                    put_varint(offsets.len() as u64, out);
405                }
406                if offsets.is_empty() {
407                    return;
408                }
409                put_varint(offsets[0], out);
410                let mut last = offsets[0];
411                for &o in &offsets[1..] {
412                    put_varint(o - last, out);
413                    last = o;
414                }
415            }
416            EncRecord::Index { offset, .. } => {
417                put_varint(*offset, out);
418            }
419        }
420    }
421}
422
423/// Clean a reflog message the way `reftable_writer_add_log` does (unless the
424/// writer is in `exact_log_message` mode, which grit never uses): strip
425/// trailing newlines and append exactly one.
426///
427/// Git applies this cleaning whenever the message field is non-NULL, including
428/// the empty string: `""` becomes `"\n"` (a single trailing newline), not an
429/// empty value. grit's `LogRecord` always carries a (possibly empty) `String`,
430/// so the cleaning always runs — matching git's `msglen == 1` for reflog entries
431/// written without an explicit message (e.g. `update-ref` with no `-m`,
432/// t0613 'restart interval at every single record').
433fn clean_log_message(msg: &str, opts: &WriteOptions) -> Vec<u8> {
434    // Git's reftable backend truncates the reflog message to `block_size / 2`
435    // bytes before storing it (reftable-backend.c: `xstrndup(u->msg,
436    // block_size / 2)`) so that an oversized message still fits inside a log
437    // block instead of failing the whole transaction with "entry too large"
438    // (t0610 'basic: can write large commit message'). Mirror that bound,
439    // clamping to a UTF-8 char boundary so the resulting string stays valid.
440    let limit = (opts.block_size as usize / 2).max(1);
441    let msg = if msg.len() > limit {
442        let mut end = limit;
443        while end > 0 && !msg.is_char_boundary(end) {
444            end -= 1;
445        }
446        &msg[..end]
447    } else {
448        msg
449    };
450    let trimmed = msg.trim_end_matches('\n');
451    let mut out = trimmed.as_bytes().to_vec();
452    out.push(b'\n');
453    out
454}
455
456/// Encode a key (prefix/suffix compression) into `out`, returning whether this
457/// was a restart point. Mirrors `reftable_encode_key`.
458fn encode_key(prev: &[u8], key: &[u8], extra: u8, out: &mut Vec<u8>) -> bool {
459    let prefix_len = common_prefix_len(prev, key);
460    let suffix_len = key.len() - prefix_len;
461    put_varint(prefix_len as u64, out);
462    put_varint(((suffix_len as u64) << 3) | (extra as u64), out);
463    out.extend_from_slice(&key[prefix_len..]);
464    prefix_len == 0
465}
466
467/// In-progress block being filled by the writer.
468struct BlockWriter {
469    typ: u8,
470    /// Bytes from `header_off` onwards (block type byte + 3 reserved length
471    /// bytes are at the start; record payload follows).
472    buf: Vec<u8>,
473    header_off: usize,
474    block_size: usize,
475    restart_interval: usize,
476    restarts: Vec<u32>,
477    last_key: Vec<u8>,
478    entries: usize,
479}
480
481impl BlockWriter {
482    fn new(typ: u8, block_size: usize, header_off: usize, restart_interval: usize) -> Self {
483        // buf is laid out starting at header_off: [type][len:3][records...]
484        let mut buf = vec![0u8; header_off + 4];
485        buf[header_off] = typ;
486        Self {
487            typ,
488            buf,
489            header_off,
490            block_size,
491            restart_interval,
492            restarts: Vec::new(),
493            last_key: Vec::new(),
494            entries: 0,
495        }
496    }
497
498    /// `w->next` equivalent: number of bytes written so far (within the block,
499    /// counting from offset 0 which includes header_off).
500    fn next(&self) -> usize {
501        self.buf.len()
502    }
503
504    /// Try to add a record. Returns Ok(true) if added, Ok(false) if it does not
505    /// fit (entry-too-big), or Err on other failure.
506    fn add(&mut self, rec: &EncRecord, opts: &WriteOptions) -> Result<bool> {
507        let key = rec.key();
508        if key.is_empty() {
509            return Err(Error::InvalidRef("reftable: empty record key".into()));
510        }
511        let restart = self.entries.is_multiple_of(self.restart_interval);
512        let prev: &[u8] = if restart { &[] } else { &self.last_key };
513
514        let mut encoded = Vec::new();
515        let is_restart = encode_key(prev, &key, rec.val_type(), &mut encoded);
516        rec.encode_value(opts, &mut encoded);
517        let n = encoded.len();
518
519        // register_restart overflow check: 2 + 3*rlen + n > block_size - next
520        let mut rlen = self.restarts.len();
521        let mut is_restart = is_restart;
522        if rlen >= MAX_RESTARTS {
523            is_restart = false;
524        }
525        if is_restart {
526            rlen += 1;
527        }
528        if self.block_size > 0 && 2 + 3 * rlen + n > self.block_size - self.next() {
529            return Ok(false);
530        }
531
532        if is_restart {
533            self.restarts.push(self.next() as u32);
534        }
535        self.buf.extend_from_slice(&encoded);
536        self.last_key = key;
537        self.entries += 1;
538        Ok(true)
539    }
540
541    /// Finalize the block in memory: append restart table + count, write the
542    /// 3-byte block length, and (for log blocks) compress. Returns the raw byte
543    /// length written (`raw_bytes`).
544    fn finish(&mut self) -> Result<usize> {
545        for &r in &self.restarts {
546            self.buf.push(((r >> 16) & 0xff) as u8);
547            self.buf.push(((r >> 8) & 0xff) as u8);
548            self.buf.push((r & 0xff) as u8);
549        }
550        let rc = self.restarts.len() as u16;
551        self.buf.push((rc >> 8) as u8);
552        self.buf.push((rc & 0xff) as u8);
553
554        // block length (uncompressed) goes into the 3 bytes after the type.
555        let block_len = self.buf.len();
556        self.buf[self.header_off + 1] = ((block_len >> 16) & 0xff) as u8;
557        self.buf[self.header_off + 2] = ((block_len >> 8) & 0xff) as u8;
558        self.buf[self.header_off + 3] = (block_len & 0xff) as u8;
559
560        if self.typ == BLOCK_TYPE_LOG {
561            use flate2::write::DeflateEncoder;
562            use flate2::Compression;
563            let skip = 4 + self.header_off;
564            let mut enc = DeflateEncoder::new(Vec::new(), Compression::new(9));
565            enc.write_all(&self.buf[skip..])
566                .map_err(|e| Error::Zlib(e.to_string()))?;
567            let compressed = enc.finish().map_err(|e| Error::Zlib(e.to_string()))?;
568            self.buf.truncate(skip);
569            self.buf.extend_from_slice(&compressed);
570        }
571        Ok(self.buf.len())
572    }
573}
574
575/// Per-section accumulated stats (mirrors `reftable_block_stats`).
576#[derive(Default, Clone)]
577struct SectionStats {
578    blocks: usize,
579    index_blocks: usize,
580    offset: u64,
581    index_offset: u64,
582}
583
584/// An object-index entry collected while writing refs.
585struct ObjEntry {
586    hash: Vec<u8>,
587    offsets: Vec<u64>,
588}
589
590/// The full writer state, ported from `struct reftable_writer`.
591struct WriterState {
592    opts: WriteOptions,
593    min_update_index: u64,
594    max_update_index: u64,
595
596    out: Vec<u8>,
597    next: u64,
598    pending_padding: usize,
599
600    block: Option<BlockWriter>,
601    block_type: u8,
602
603    /// Index records for the current section (last_key, offset).
604    index: Vec<(Vec<u8>, u64)>,
605
606    /// Object-index tree (kept sorted by hash).
607    obj_entries: Vec<ObjEntry>,
608    object_id_len: usize,
609
610    ref_stats: SectionStats,
611    obj_stats: SectionStats,
612    log_stats: SectionStats,
613    idx_blocks_total: usize,
614}
615
616impl WriterState {
617    fn new(mut opts: WriteOptions, min: u64, max: u64) -> Self {
618        if opts.restart_interval == 0 {
619            opts.restart_interval = RESTART_INTERVAL;
620        }
621        if opts.block_size == 0 {
622            opts.block_size = REFTABLE_DEFAULT_BLOCK_SIZE;
623        }
624        Self {
625            opts,
626            min_update_index: min,
627            max_update_index: max,
628            out: Vec::new(),
629            next: 0,
630            pending_padding: 0,
631            block: None,
632            block_type: 0,
633            index: Vec::new(),
634            obj_entries: Vec::new(),
635            object_id_len: 0,
636            ref_stats: SectionStats::default(),
637            obj_stats: SectionStats::default(),
638            log_stats: SectionStats::default(),
639            idx_blocks_total: 0,
640        }
641    }
642
643    fn header_size(&self) -> usize {
644        // version 1 (sha1) only — grit is sha1 in these tests.
645        24
646    }
647
648    fn write_header(&self, dest: &mut [u8]) {
649        dest[0..4].copy_from_slice(REFTABLE_MAGIC);
650        dest[4] = 1;
651        dest[5] = ((self.opts.block_size >> 16) & 0xff) as u8;
652        dest[6] = ((self.opts.block_size >> 8) & 0xff) as u8;
653        dest[7] = (self.opts.block_size & 0xff) as u8;
654        dest[8..16].copy_from_slice(&self.min_update_index.to_be_bytes());
655        dest[16..24].copy_from_slice(&self.max_update_index.to_be_bytes());
656    }
657
658    fn stats_mut(&mut self, typ: u8) -> &mut SectionStats {
659        match typ {
660            BLOCK_TYPE_REF => &mut self.ref_stats,
661            BLOCK_TYPE_OBJ => &mut self.obj_stats,
662            BLOCK_TYPE_LOG => &mut self.log_stats,
663            // index blocks roll into the section being indexed; not used here.
664            _ => &mut self.ref_stats,
665        }
666    }
667
668    /// Write `data` then queue `padding` zero bytes for the next write
669    /// (`padded_write`).
670    fn padded_write(&mut self, data: &[u8], padding: usize) {
671        if self.pending_padding > 0 {
672            self.out
673                .extend(std::iter::repeat_n(0u8, self.pending_padding));
674            self.pending_padding = 0;
675        }
676        self.pending_padding = padding;
677        self.out.extend_from_slice(data);
678    }
679
680    fn reinit_block(&mut self, typ: u8) {
681        let header_off = if self.next == 0 {
682            self.header_size()
683        } else {
684            0
685        };
686        self.block = Some(BlockWriter::new(
687            typ,
688            self.opts.block_size as usize,
689            header_off,
690            self.opts.restart_interval,
691        ));
692        self.block_type = typ;
693    }
694
695    fn add_record(&mut self, rec: &EncRecord) -> Result<()> {
696        let typ = rec.block_type();
697        if self.block.is_none() {
698            self.reinit_block(typ);
699        }
700        // Attempt to add.
701        let opts = self.opts.clone();
702        let fit = {
703            let bw = self
704                .block
705                .as_mut()
706                .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
707            bw.add(rec, &opts)?
708        };
709        if fit {
710            return Ok(());
711        }
712        // Block full: flush and retry in a fresh block.
713        self.flush_block()?;
714        self.reinit_block(typ);
715        let opts = self.opts.clone();
716        let bw = self
717            .block
718            .as_mut()
719            .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
720        if !bw.add(rec, &opts)? {
721            return Err(Error::InvalidRef(
722                "reftable: transaction failure: entry too large".into(),
723            ));
724        }
725        Ok(())
726    }
727
728    fn add_ref(&mut self, r: &RefRecord) -> Result<()> {
729        let delta = r.update_index.saturating_sub(self.min_update_index);
730        self.add_record(&EncRecord::Ref(r, delta))?;
731
732        if !self.opts.skip_index_objects {
733            match &r.value {
734                RefValue::Val1(oid) => self.index_hash(oid.as_bytes()),
735                RefValue::Val2(oid, peeled) => {
736                    self.index_hash(oid.as_bytes());
737                    self.index_hash(peeled.as_bytes());
738                }
739                _ => {}
740            }
741        }
742        Ok(())
743    }
744
745    fn add_log(&mut self, l: &LogRecord) -> Result<()> {
746        // Finishing the ref section happens before the first log record.
747        if matches!(&self.block, Some(b) if b.typ == BLOCK_TYPE_REF) {
748            self.finish_public_section()?;
749        }
750        // Drop pending padding before a log block (matches add_log_verbatim).
751        self.next -= self.pending_padding as u64;
752        self.pending_padding = 0;
753        self.add_record(&EncRecord::Log(l))
754    }
755
756    fn index_hash(&mut self, hash: &[u8]) {
757        let off = self.next;
758        match self
759            .obj_entries
760            .binary_search_by(|e| e.hash.as_slice().cmp(hash))
761        {
762            Ok(idx) => {
763                let e = &mut self.obj_entries[idx];
764                if e.offsets.last() != Some(&off) {
765                    e.offsets.push(off);
766                }
767            }
768            Err(idx) => {
769                self.obj_entries.insert(
770                    idx,
771                    ObjEntry {
772                        hash: hash.to_vec(),
773                        offsets: vec![off],
774                    },
775                );
776            }
777        }
778    }
779
780    /// `writer_flush_nonempty_block`.
781    fn flush_block(&mut self) -> Result<()> {
782        let Some(mut bw) = self.block.take() else {
783            return Ok(());
784        };
785        if bw.entries == 0 {
786            self.block = Some(bw);
787            return Ok(());
788        }
789        let typ = bw.typ;
790        let raw_bytes = bw.finish()?;
791
792        let mut padding = 0;
793        if !self.opts.unpadded && typ != BLOCK_TYPE_LOG {
794            padding = (self.opts.block_size as usize).saturating_sub(raw_bytes);
795        }
796
797        let block_typ_off = if self.stats_mut(typ).blocks == 0 {
798            self.next
799        } else {
800            0
801        };
802        {
803            let next = self.next;
804            let st = self.stats_mut(typ);
805            if block_typ_off > 0 {
806                st.offset = next;
807            }
808            st.blocks += 1;
809        }
810
811        if self.next == 0 {
812            // Write the reftable header into the front of the first block.
813            let hs = self.header_size();
814            self.write_header_into_block(&mut bw, hs);
815        }
816
817        let data = bw.buf.clone();
818        self.padded_write(&data, padding);
819
820        // Record an index entry for this block.
821        self.index.push((bw.last_key.clone(), self.next));
822
823        self.next += (padding + raw_bytes) as u64;
824        self.block = None;
825        Ok(())
826    }
827
828    fn write_header_into_block(&self, bw: &mut BlockWriter, hs: usize) {
829        let mut hdr = vec![0u8; hs];
830        self.write_header(&mut hdr);
831        bw.buf[..hs].copy_from_slice(&hdr);
832    }
833
834    fn flush_block_if_nonempty(&mut self) -> Result<()> {
835        if matches!(&self.block, Some(b) if b.entries == 0) {
836            return Ok(());
837        }
838        self.flush_block()
839    }
840
841    /// `writer_finish_section`: flush the current block then emit any index.
842    fn finish_section(&mut self) -> Result<()> {
843        let typ = self.block_type;
844        let threshold = if self.opts.unpadded { 1 } else { 3 };
845        let before_blocks = self.idx_blocks_total;
846
847        self.flush_block_if_nonempty()?;
848
849        let mut max_level = 0;
850        let mut index_start = 0u64;
851
852        while self.index.len() > threshold {
853            max_level += 1;
854            index_start = self.next;
855            self.reinit_block(BLOCK_TYPE_INDEX);
856
857            let idx = std::mem::take(&mut self.index);
858            for (last_key, offset) in &idx {
859                self.add_record(&EncRecord::Index {
860                    last_key: last_key.clone(),
861                    offset: *offset,
862                })?;
863            }
864            // Count index blocks produced during this level.
865            let blocks_before = self.count_index_blocks_marker();
866            self.flush_index_block()?;
867            let _ = blocks_before;
868        }
869
870        self.index.clear();
871
872        let index_blocks = self.idx_blocks_total - before_blocks;
873        {
874            let st = self.stats_mut(typ);
875            st.index_blocks = index_blocks;
876            st.index_offset = index_start;
877        }
878        let _ = max_level;
879        Ok(())
880    }
881
882    fn count_index_blocks_marker(&self) -> usize {
883        self.idx_blocks_total
884    }
885
886    /// Flush an index block: like `flush_block` but the produced block counts
887    /// toward `idx_blocks_total` and re-populates `self.index` for the next
888    /// (higher) level.
889    fn flush_index_block(&mut self) -> Result<()> {
890        let Some(mut bw) = self.block.take() else {
891            return Ok(());
892        };
893        if bw.entries == 0 {
894            self.block = Some(bw);
895            return Ok(());
896        }
897        let raw_bytes = bw.finish()?;
898        let mut padding = 0;
899        if !self.opts.unpadded {
900            padding = (self.opts.block_size as usize).saturating_sub(raw_bytes);
901        }
902        if self.next == 0 {
903            let hs = self.header_size();
904            self.write_header_into_block(&mut bw, hs);
905        }
906        let data = bw.buf.clone();
907        self.padded_write(&data, padding);
908        self.index.push((bw.last_key.clone(), self.next));
909        self.next += (padding + raw_bytes) as u64;
910        self.idx_blocks_total += 1;
911        self.block = None;
912        Ok(())
913    }
914
915    /// `writer_dump_object_index`.
916    fn dump_object_index(&mut self) -> Result<()> {
917        // object_id_len = max common prefix among sorted hashes + 1, min 2.
918        let mut max_common = 1usize;
919        for w in self.obj_entries.windows(2) {
920            let n = common_prefix_len(&w[0].hash, &w[1].hash);
921            if n > max_common {
922                max_common = n;
923            }
924        }
925        self.object_id_len = max_common + 1;
926        let id_len = self.object_id_len;
927
928        self.reinit_block(BLOCK_TYPE_OBJ);
929        let entries = std::mem::take(&mut self.obj_entries);
930        for e in &entries {
931            let prefix = e.hash[..id_len.min(e.hash.len())].to_vec();
932            self.add_obj_record(prefix, &e.offsets)?;
933        }
934        self.obj_entries = entries;
935        self.finish_section()
936    }
937
938    fn add_obj_record(&mut self, prefix: Vec<u8>, offsets: &[u64]) -> Result<()> {
939        // Try with full offsets; on overflow in a fresh block, drop offsets.
940        let typ = BLOCK_TYPE_OBJ;
941        if self.block.is_none() {
942            self.reinit_block(typ);
943        }
944        let opts = self.opts.clone();
945        let rec = EncRecord::Obj {
946            prefix: prefix.clone(),
947            offsets: offsets.to_vec(),
948        };
949        let fit = {
950            let bw = self
951                .block
952                .as_mut()
953                .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
954            bw.add(&rec, &opts)?
955        };
956        if fit {
957            return Ok(());
958        }
959        self.flush_block()?;
960        self.reinit_block(typ);
961        let opts = self.opts.clone();
962        let fit = {
963            let bw = self
964                .block
965                .as_mut()
966                .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
967            bw.add(&rec, &opts)?
968        };
969        if fit {
970            return Ok(());
971        }
972        // Drop offsets entirely.
973        let rec = EncRecord::Obj {
974            prefix,
975            offsets: Vec::new(),
976        };
977        let opts = self.opts.clone();
978        let bw = self
979            .block
980            .as_mut()
981            .ok_or_else(|| Error::InvalidRef("reftable: no active block writer".into()))?;
982        bw.add(&rec, &opts)?;
983        Ok(())
984    }
985
986    /// `writer_finish_public_section`.
987    fn finish_public_section(&mut self) -> Result<()> {
988        let Some(bw) = &self.block else {
989            return Ok(());
990        };
991        let typ = bw.typ;
992        self.finish_section()?;
993        if typ == BLOCK_TYPE_REF && !self.opts.skip_index_objects && self.ref_stats.index_blocks > 0
994        {
995            self.dump_object_index()?;
996        }
997        self.obj_entries.clear();
998        self.block = None;
999        self.block_type = 0;
1000        Ok(())
1001    }
1002
1003    /// `reftable_writer_close`.
1004    fn close(mut self) -> Result<Vec<u8>> {
1005        self.finish_public_section()?;
1006        let empty_table = self.next == 0;
1007        self.pending_padding = 0;
1008
1009        if empty_table {
1010            let hs = self.header_size();
1011            let mut header = vec![0u8; hs];
1012            self.write_header(&mut header);
1013            self.padded_write(&header, 0);
1014        }
1015
1016        let mut footer = vec![0u8; self.header_size()];
1017        self.write_header(&mut footer);
1018        footer.extend_from_slice(&self.ref_stats.index_offset.to_be_bytes());
1019        let obj_field = (self.obj_stats.offset << 5) | (self.object_id_len as u64);
1020        footer.extend_from_slice(&obj_field.to_be_bytes());
1021        footer.extend_from_slice(&self.obj_stats.index_offset.to_be_bytes());
1022        footer.extend_from_slice(&self.log_stats.offset.to_be_bytes());
1023        footer.extend_from_slice(&self.log_stats.index_offset.to_be_bytes());
1024        let crc = crc32(&footer);
1025        footer.extend_from_slice(&crc.to_be_bytes());
1026
1027        // Footer write drops pending padding (flush() before padded_write).
1028        self.pending_padding = 0;
1029        self.out.extend_from_slice(&footer);
1030
1031        Ok(self.out)
1032    }
1033}
1034
1035// ---------------------------------------------------------------------------
1036// Reader
1037// ---------------------------------------------------------------------------
1038
1039/// Reads a single reftable file from a byte buffer.
1040pub struct ReftableReader {
1041    data: Vec<u8>,
1042    version: u8,
1043    block_size: u32,
1044    min_update_index: u64,
1045    max_update_index: u64,
1046    ref_index_position: u64,
1047    log_position: u64,
1048}
1049
1050/// Parsed footer fields.
1051#[derive(Debug)]
1052#[allow(dead_code)]
1053struct Footer {
1054    version: u8,
1055    block_size: u32,
1056    min_update_index: u64,
1057    max_update_index: u64,
1058    ref_index_position: u64,
1059    obj_position_and_id_len: u64,
1060    obj_index_position: u64,
1061    log_position: u64,
1062    log_index_position: u64,
1063}
1064
1065impl ReftableReader {
1066    /// Open a reftable from bytes.
1067    pub fn new(data: Vec<u8>) -> Result<Self> {
1068        if data.len() < HEADER_SIZE + FOOTER_V1_SIZE {
1069            // Could be an empty table (header + footer only = 24 + 68 = 92)
1070            if data.len() < HEADER_SIZE {
1071                return Err(Error::InvalidRef("reftable: file too small".into()));
1072            }
1073        }
1074
1075        // Parse header
1076        if &data[0..4] != REFTABLE_MAGIC {
1077            return Err(Error::InvalidRef("reftable: bad magic".into()));
1078        }
1079        let version = data[4];
1080        if version != 1 && version != 2 {
1081            return Err(Error::InvalidRef(format!(
1082                "reftable: unsupported version {version}"
1083            )));
1084        }
1085        let _block_size = ((data[5] as u32) << 16) | ((data[6] as u32) << 8) | (data[7] as u32);
1086        let _min_update_index = u64::from_be_bytes(
1087            data[8..16]
1088                .try_into()
1089                .map_err(|_| Error::InvalidRef("reftable: truncated header".into()))?,
1090        );
1091        let _max_update_index = u64::from_be_bytes(
1092            data[16..24]
1093                .try_into()
1094                .map_err(|_| Error::InvalidRef("reftable: truncated header".into()))?,
1095        );
1096
1097        // Parse footer
1098        let footer_size = if version == 2 { 72 } else { FOOTER_V1_SIZE };
1099        if data.len() < footer_size {
1100            return Err(Error::InvalidRef(
1101                "reftable: file too small for footer".into(),
1102            ));
1103        }
1104        let footer_start = data.len() - footer_size;
1105        let footer = parse_footer(&data[footer_start..], version)?;
1106
1107        Ok(Self {
1108            data,
1109            version,
1110            block_size: footer.block_size,
1111            min_update_index: footer.min_update_index,
1112            max_update_index: footer.max_update_index,
1113            ref_index_position: footer.ref_index_position,
1114            log_position: footer.log_position,
1115        })
1116    }
1117
1118    /// Read all ref records from the table.
1119    pub fn read_refs(&self) -> Result<Vec<RefRecord>> {
1120        let mut refs = Vec::new();
1121        let footer_size = if self.version == 2 {
1122            72
1123        } else {
1124            FOOTER_V1_SIZE
1125        };
1126        let file_end = self.data.len() - footer_size;
1127
1128        // Determine where ref blocks end
1129        let ref_end = if self.ref_index_position > 0 {
1130            self.ref_index_position as usize
1131        } else if self.log_position > 0 {
1132            self.log_position as usize
1133        } else {
1134            file_end
1135        };
1136
1137        let mut pos = 0usize;
1138        // Skip the header — first ref block starts at offset 24 but shares
1139        // the same physical block as the header.
1140        if pos < HEADER_SIZE {
1141            pos = HEADER_SIZE;
1142        }
1143
1144        while pos < ref_end {
1145            if pos >= self.data.len() {
1146                break;
1147            }
1148            let block_type = self.data[pos];
1149            if block_type == 0 {
1150                // Padding — skip to next block boundary
1151                if self.block_size > 0 {
1152                    let bs = self.block_size as usize;
1153                    pos = ((pos / bs) + 1) * bs;
1154                    continue;
1155                } else {
1156                    break;
1157                }
1158            }
1159            if block_type != BLOCK_TYPE_REF {
1160                break;
1161            }
1162
1163            let block_len = read_u24(&self.data, pos + 1);
1164            // Determine the data range for this block
1165            let block_data_start = pos + 4; // after type(1) + len(3)
1166
1167            // The first block's block_len includes the 24-byte header
1168            let is_first = pos == HEADER_SIZE;
1169            let records_end = if is_first {
1170                // block_len is from file start
1171                block_len
1172            } else {
1173                pos + block_len
1174            };
1175
1176            if records_end > ref_end {
1177                break;
1178            }
1179
1180            // Read restart count (last 2 bytes before padding)
1181            let rc = read_u16(&self.data, records_end - 2);
1182            // Restart table is rc * 3 bytes before the restart_count
1183            let restart_table_start = records_end - 2 - (rc * 3);
1184
1185            // Read records from block_data_start to restart_table_start
1186            let mut rpos = block_data_start;
1187            let mut prev_name = Vec::<u8>::new();
1188
1189            while rpos < restart_table_start {
1190                let (rec, new_pos) =
1191                    decode_ref_record(&self.data, rpos, &prev_name, self.min_update_index)?;
1192                prev_name = rec.name.as_bytes().to_vec();
1193                refs.push(rec);
1194                rpos = new_pos;
1195            }
1196
1197            // Advance to next block
1198            if self.block_size > 0 {
1199                let bs = self.block_size as usize;
1200                if is_first {
1201                    pos = bs;
1202                } else {
1203                    pos += bs;
1204                }
1205            } else {
1206                pos = records_end;
1207            }
1208        }
1209
1210        Ok(refs)
1211    }
1212
1213    /// Look up a single ref by name.
1214    pub fn lookup_ref(&self, name: &str) -> Result<Option<RefRecord>> {
1215        // Simple: scan all refs. For large files the index would speed this up.
1216        let refs = self.read_refs()?;
1217        Ok(refs.into_iter().find(|r| r.name == name))
1218    }
1219
1220    /// Read all log records from the table.
1221    pub fn read_logs(&self) -> Result<Vec<LogRecord>> {
1222        let footer_size = if self.version == 2 {
1223            72
1224        } else {
1225            FOOTER_V1_SIZE
1226        };
1227        let file_end = self.data.len() - footer_size;
1228
1229        // Determine where the log section starts. Git records the log offset in
1230        // the footer, but when the log block is the *first* block in the file it
1231        // shares its physical block with the 24-byte reftable header and the
1232        // recorded offset is left at 0 (see `writer_flush_nonempty_block`'s
1233        // `block_typ_off = (blocks == 0) ? next : 0`). The reader detects this
1234        // by checking whether the first on-disk block (the byte right after the
1235        // header) is a log block — mirroring `is_present` in git's table.c.
1236        let mut pos = if self.log_position > 0 {
1237            self.log_position as usize
1238        } else if self.data.len() > HEADER_SIZE && self.data[HEADER_SIZE] == BLOCK_TYPE_LOG {
1239            // Log block is the first block; it begins right after the header.
1240            HEADER_SIZE
1241        } else {
1242            return Ok(Vec::new());
1243        };
1244        let mut logs = Vec::new();
1245
1246        while pos < file_end {
1247            if pos >= self.data.len() {
1248                break;
1249            }
1250            let block_type = self.data[pos];
1251            if block_type != BLOCK_TYPE_LOG {
1252                break;
1253            }
1254            // When the log block shares its physical block with the reftable
1255            // header, the 3-byte block length counts from offset 0 and so
1256            // includes the header bytes; the compressed payload still starts
1257            // right after the type+length header at `pos + 4`.
1258            let is_first = pos == HEADER_SIZE && self.log_position == 0;
1259            let block_len = read_u24(&self.data, pos + 1);
1260            let compressed_start = pos + 4;
1261
1262            // The inflated size is block_len minus the 4-byte type+length header
1263            // (and, for the first block, minus the embedded reftable header).
1264            let header_prefix = if is_first { HEADER_SIZE } else { 0 };
1265            let inflated_size = block_len.saturating_sub(4 + header_prefix);
1266
1267            // Decompress
1268            use flate2::read::DeflateDecoder;
1269            let remaining = &self.data[compressed_start..file_end];
1270            let mut decoder = DeflateDecoder::new(remaining);
1271            let mut inflated = vec![0u8; inflated_size];
1272            decoder
1273                .read_exact(&mut inflated)
1274                .map_err(|e| Error::Zlib(e.to_string()))?;
1275
1276            // How many compressed bytes were consumed?
1277            let consumed = decoder.total_in() as usize;
1278
1279            // Parse log records from inflated data
1280            // Read restart_count from end
1281            if inflated.len() < 2 {
1282                break;
1283            }
1284            let rc = read_u16(&inflated, inflated.len() - 2);
1285            let restart_table_start = inflated.len() - 2 - (rc * 3);
1286
1287            let mut rpos = 0usize;
1288            let mut prev_key = Vec::<u8>::new();
1289
1290            while rpos < restart_table_start {
1291                let (log, new_pos) = decode_log_record(&inflated, rpos, &prev_key)?;
1292                // Reconstruct key for prefix compression
1293                let mut key = Vec::new();
1294                key.extend_from_slice(log.refname.as_bytes());
1295                key.push(0);
1296                key.extend_from_slice(&(0xffffffffffffffffu64 - log.update_index).to_be_bytes());
1297                prev_key = key;
1298                logs.push(log);
1299                rpos = new_pos;
1300            }
1301
1302            pos = compressed_start + consumed;
1303        }
1304
1305        Ok(logs)
1306    }
1307
1308    /// Get the block size from the header.
1309    pub fn block_size(&self) -> u32 {
1310        self.block_size
1311    }
1312
1313    /// Get the min update index.
1314    pub fn min_update_index(&self) -> u64 {
1315        self.min_update_index
1316    }
1317
1318    /// Get the max update index.
1319    pub fn max_update_index(&self) -> u64 {
1320        self.max_update_index
1321    }
1322}
1323
1324// ---------------------------------------------------------------------------
1325// Record decoding helpers
1326// ---------------------------------------------------------------------------
1327
1328fn decode_ref_record(
1329    data: &[u8],
1330    pos: usize,
1331    prev_name: &[u8],
1332    min_update_index: u64,
1333) -> Result<(RefRecord, usize)> {
1334    let (prefix_len, p) = get_varint(data, pos)?;
1335    let (suffix_and_type, mut p) = get_varint(data, p)?;
1336    let suffix_len = (suffix_and_type >> 3) as usize;
1337    let value_type = (suffix_and_type & 0x7) as u8;
1338
1339    // Reconstruct name
1340    let mut name = Vec::with_capacity(prefix_len as usize + suffix_len);
1341    if prefix_len > 0 {
1342        if (prefix_len as usize) > prev_name.len() {
1343            return Err(Error::InvalidRef(
1344                "reftable: prefix_len exceeds prev name".into(),
1345            ));
1346        }
1347        name.extend_from_slice(&prev_name[..prefix_len as usize]);
1348    }
1349    if p + suffix_len > data.len() {
1350        return Err(Error::InvalidRef("reftable: suffix overflows block".into()));
1351    }
1352    name.extend_from_slice(&data[p..p + suffix_len]);
1353    p += suffix_len;
1354
1355    let name_str = String::from_utf8(name)
1356        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in ref name".into()))?;
1357
1358    let (update_index_delta, mut p) = get_varint(data, p)?;
1359    let update_index = min_update_index + update_index_delta;
1360
1361    let value = match value_type {
1362        VALUE_DELETION => RefValue::Deletion,
1363        VALUE_ONE_OID => {
1364            if p + HASH_SIZE > data.len() {
1365                return Err(Error::InvalidRef("reftable: truncated OID".into()));
1366            }
1367            let oid = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
1368            p += HASH_SIZE;
1369            RefValue::Val1(oid)
1370        }
1371        VALUE_TWO_OID => {
1372            if p + 2 * HASH_SIZE > data.len() {
1373                return Err(Error::InvalidRef("reftable: truncated OID pair".into()));
1374            }
1375            let oid = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
1376            p += HASH_SIZE;
1377            let peeled = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
1378            p += HASH_SIZE;
1379            RefValue::Val2(oid, peeled)
1380        }
1381        VALUE_SYMREF => {
1382            let (target_len, p2) = get_varint(data, p)?;
1383            p = p2;
1384            let target_len = target_len as usize;
1385            if p + target_len > data.len() {
1386                return Err(Error::InvalidRef(
1387                    "reftable: truncated symref target".into(),
1388                ));
1389            }
1390            let target = String::from_utf8(data[p..p + target_len].to_vec())
1391                .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in symref".into()))?;
1392            p += target_len;
1393            RefValue::Symref(target)
1394        }
1395        _ => {
1396            return Err(Error::InvalidRef(format!(
1397                "reftable: unknown value_type {value_type}"
1398            )));
1399        }
1400    };
1401
1402    Ok((
1403        RefRecord {
1404            name: name_str,
1405            update_index,
1406            value,
1407        },
1408        p,
1409    ))
1410}
1411
1412fn decode_log_record(data: &[u8], pos: usize, prev_key: &[u8]) -> Result<(LogRecord, usize)> {
1413    let (prefix_len, p) = get_varint(data, pos)?;
1414    let (suffix_and_type, mut p) = get_varint(data, p)?;
1415    let suffix_len = (suffix_and_type >> 3) as usize;
1416    let log_type = (suffix_and_type & 0x7) as u8;
1417
1418    // Reconstruct key
1419    let mut key = Vec::with_capacity(prefix_len as usize + suffix_len);
1420    if prefix_len > 0 {
1421        if (prefix_len as usize) > prev_key.len() {
1422            return Err(Error::InvalidRef(
1423                "reftable: log prefix_len exceeds prev key".into(),
1424            ));
1425        }
1426        key.extend_from_slice(&prev_key[..prefix_len as usize]);
1427    }
1428    if p + suffix_len > data.len() {
1429        return Err(Error::InvalidRef("reftable: log suffix overflows".into()));
1430    }
1431    key.extend_from_slice(&data[p..p + suffix_len]);
1432    p += suffix_len;
1433
1434    // Parse key: refname \0 reverse_int64(update_index)
1435    let null_pos = key
1436        .iter()
1437        .position(|&b| b == 0)
1438        .ok_or_else(|| Error::InvalidRef("reftable: log key missing null separator".into()))?;
1439    let refname = String::from_utf8(key[..null_pos].to_vec())
1440        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log refname".into()))?;
1441    if null_pos + 9 > key.len() {
1442        return Err(Error::InvalidRef("reftable: log key too short".into()));
1443    }
1444    let reversed_idx = u64::from_be_bytes(
1445        key[null_pos + 1..null_pos + 9]
1446            .try_into()
1447            .map_err(|_| Error::InvalidRef("reftable: log key too short".into()))?,
1448    );
1449    let update_index = 0xffffffffffffffffu64 - reversed_idx;
1450
1451    if log_type == 0 {
1452        // Deletion
1453        let zero_oid = ObjectId::from_bytes(&[0u8; 20])?;
1454        return Ok((
1455            LogRecord {
1456                refname,
1457                update_index,
1458                old_id: zero_oid,
1459                new_id: zero_oid,
1460                name: String::new(),
1461                email: String::new(),
1462                time_seconds: 0,
1463                tz_offset: 0,
1464                message: String::new(),
1465            },
1466            p,
1467        ));
1468    }
1469
1470    // log_type == 1: standard log data
1471    if p + 2 * HASH_SIZE > data.len() {
1472        return Err(Error::InvalidRef("reftable: truncated log OIDs".into()));
1473    }
1474    let old_id = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
1475    p += HASH_SIZE;
1476    let new_id = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
1477    p += HASH_SIZE;
1478
1479    let (name_len, p2) = get_varint(data, p)?;
1480    p = p2;
1481    let name_len = name_len as usize;
1482    if p + name_len > data.len() {
1483        return Err(Error::InvalidRef("reftable: truncated log name".into()));
1484    }
1485    let name = String::from_utf8(data[p..p + name_len].to_vec())
1486        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log name".into()))?;
1487    p += name_len;
1488
1489    let (email_len, p2) = get_varint(data, p)?;
1490    p = p2;
1491    let email_len = email_len as usize;
1492    if p + email_len > data.len() {
1493        return Err(Error::InvalidRef("reftable: truncated log email".into()));
1494    }
1495    let email = String::from_utf8(data[p..p + email_len].to_vec())
1496        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log email".into()))?;
1497    p += email_len;
1498
1499    let (time_seconds, p2) = get_varint(data, p)?;
1500    p = p2;
1501
1502    if p + 2 > data.len() {
1503        return Err(Error::InvalidRef("reftable: truncated tz_offset".into()));
1504    }
1505    let tz_offset = i16::from_be_bytes([data[p], data[p + 1]]);
1506    p += 2;
1507
1508    let (msg_len, p2) = get_varint(data, p)?;
1509    p = p2;
1510    let msg_len = msg_len as usize;
1511    if p + msg_len > data.len() {
1512        return Err(Error::InvalidRef("reftable: truncated log message".into()));
1513    }
1514    let message = String::from_utf8(data[p..p + msg_len].to_vec())
1515        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log message".into()))?;
1516    p += msg_len;
1517
1518    Ok((
1519        LogRecord {
1520            refname,
1521            update_index,
1522            old_id,
1523            new_id,
1524            name,
1525            email,
1526            time_seconds,
1527            tz_offset,
1528            message,
1529        },
1530        p,
1531    ))
1532}
1533
1534// ---------------------------------------------------------------------------
1535// Stack management
1536// ---------------------------------------------------------------------------
1537
1538/// Manages the `$GIT_DIR/reftable/` directory and `tables.list` stack.
1539///
1540/// The stack provides a merged view of all tables, with later tables
1541/// taking precedence over earlier ones.
1542pub struct ReftableStack {
1543    /// Path to the `reftable/` directory.
1544    reftable_dir: PathBuf,
1545    /// Ordered list of table file names (oldest first).
1546    table_names: Vec<String>,
1547}
1548
1549/// RAII guard for `tables.list.lock`. Removes the lock file on drop unless it was
1550/// consumed (renamed onto `tables.list`) via [`disarm`].
1551struct TablesListLock {
1552    path: PathBuf,
1553    armed: std::cell::Cell<bool>,
1554}
1555
1556impl TablesListLock {
1557    fn new(path: PathBuf) -> Self {
1558        Self {
1559            path,
1560            armed: std::cell::Cell::new(true),
1561        }
1562    }
1563
1564    /// Mark the lock as consumed so its `Drop` does not remove the path (it has
1565    /// been renamed onto `tables.list`).
1566    fn disarm(&self) {
1567        self.armed.set(false);
1568    }
1569}
1570
1571impl Drop for TablesListLock {
1572    fn drop(&mut self) {
1573        if self.armed.get() {
1574            let _ = fs::remove_file(&self.path);
1575        }
1576    }
1577}
1578
1579impl ReftableStack {
1580    /// Open an existing reftable stack.
1581    pub fn open(git_dir: &Path) -> Result<Self> {
1582        let reftable_dir = git_dir.join("reftable");
1583        let tables_list = reftable_dir.join("tables.list");
1584        let content = fs::read_to_string(&tables_list).map_err(Error::Io)?;
1585        let table_names: Vec<String> = content
1586            .lines()
1587            .filter(|l| !l.is_empty())
1588            .map(|l| l.to_owned())
1589            .collect();
1590        Ok(Self {
1591            reftable_dir,
1592            table_names,
1593        })
1594    }
1595
1596    /// Inject the HEAD symbolic ref into the ref set being compacted, mirroring
1597    /// git's reftable layout where HEAD lives inside the table.
1598    ///
1599    /// Returns a HEAD reflog record to add to the log section if the target
1600    /// branch has a most-recent reflog entry (so HEAD@{0} mirrors it).
1601    fn inject_head_ref(&self, refs: &mut Vec<RefRecord>, min_idx: u64) -> Option<LogRecord> {
1602        let git_dir = self.reftable_dir.parent()?;
1603        let head_path = git_dir.join("HEAD");
1604        let content = fs::read_to_string(&head_path).ok()?;
1605        let target = content.strip_prefix("ref: ")?.trim();
1606        if target.is_empty() || target == "refs/heads/.invalid" {
1607            return None;
1608        }
1609        // Only inject HEAD if it is not already present.
1610        if refs.iter().any(|r| r.name == "HEAD") {
1611            return None;
1612        }
1613        // HEAD takes the smallest update index (git assigns it the first one).
1614        refs.push(RefRecord {
1615            name: "HEAD".to_owned(),
1616            update_index: min_idx,
1617            value: RefValue::Symref(target.to_owned()),
1618        });
1619        refs.sort_by(|a, b| a.name.cmp(&b.name));
1620
1621        // HEAD reflog entries are already written separately by the commit /
1622        // update-ref paths (`append_reflog("HEAD", …)`). Only synthesize a
1623        // mirror of the branch's newest entry when HEAD has no reflog of its
1624        // own — otherwise compaction would duplicate HEAD@{0} (yielding an
1625        // extra log record and an oversized log block, t0613 'default write
1626        // options').
1627        if self
1628            .read_logs_for_ref("HEAD")
1629            .map(|logs| !logs.is_empty())
1630            .unwrap_or(false)
1631        {
1632            return None;
1633        }
1634
1635        // Mirror the target branch's newest reflog entry as HEAD@{0}.
1636        let target_logs = self.read_logs_for_ref(target).ok()?;
1637        let newest = target_logs.into_iter().next()?;
1638        Some(LogRecord {
1639            refname: "HEAD".to_owned(),
1640            update_index: newest.update_index,
1641            old_id: newest.old_id,
1642            new_id: newest.new_id,
1643            name: newest.name,
1644            email: newest.email,
1645            time_seconds: newest.time_seconds,
1646            tz_offset: newest.tz_offset,
1647            message: newest.message,
1648        })
1649    }
1650
1651    /// Read the configured reftable write options from this repo's config.
1652    fn write_options(&self) -> WriteOptions {
1653        let git_dir = self
1654            .reftable_dir
1655            .parent()
1656            .map(|p| p.to_path_buf())
1657            .unwrap_or_else(|| self.reftable_dir.clone());
1658        read_write_options(&git_dir)
1659    }
1660
1661    /// Read a merged view of all ref records.
1662    ///
1663    /// Later tables override earlier ones. Deletion records cause the
1664    /// ref to be omitted from the result.
1665    pub fn read_refs(&self) -> Result<Vec<RefRecord>> {
1666        let mut merged: BTreeMap<String, RefRecord> = BTreeMap::new();
1667
1668        for name in &self.table_names {
1669            let path = self.reftable_dir.join(name);
1670            let data = match fs::read(&path) {
1671                Ok(data) => data,
1672                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
1673                Err(err) => return Err(Error::Io(err)),
1674            };
1675            let reader = ReftableReader::new(data)?;
1676            for rec in reader.read_refs()? {
1677                match &rec.value {
1678                    RefValue::Deletion => {
1679                        merged.remove(&rec.name);
1680                    }
1681                    _ => {
1682                        merged.insert(rec.name.clone(), rec);
1683                    }
1684                }
1685            }
1686        }
1687
1688        Ok(merged.into_values().collect())
1689    }
1690
1691    /// Look up a single ref across all tables (most recent wins).
1692    pub fn lookup_ref(&self, name: &str) -> Result<Option<RefRecord>> {
1693        // Search tables in reverse (newest first)
1694        for table_name in self.table_names.iter().rev() {
1695            let path = self.reftable_dir.join(table_name);
1696            let data = match fs::read(&path) {
1697                Ok(data) => data,
1698                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
1699                Err(err) => return Err(Error::Io(err)),
1700            };
1701            let reader = ReftableReader::new(data)?;
1702            if let Some(rec) = reader.lookup_ref(name)? {
1703                return match rec.value {
1704                    RefValue::Deletion => Ok(None),
1705                    _ => Ok(Some(rec)),
1706                };
1707            }
1708        }
1709        Ok(None)
1710    }
1711
1712    /// Read merged log records for a specific ref.
1713    pub fn read_logs_for_ref(&self, refname: &str) -> Result<Vec<LogRecord>> {
1714        let mut logs = Vec::new();
1715        for table_name in &self.table_names {
1716            let path = self.reftable_dir.join(table_name);
1717            let data = fs::read(&path).map_err(Error::Io)?;
1718            let reader = ReftableReader::new(data)?;
1719            for log in reader.read_logs()? {
1720                if log.refname == refname {
1721                    logs.push(log);
1722                }
1723            }
1724        }
1725        // Sort by update_index descending (most recent first)
1726        logs.sort_by(|a, b| b.update_index.cmp(&a.update_index));
1727        Ok(logs)
1728    }
1729
1730    /// Replace all log records for one ref and compact the stack.
1731    pub fn replace_logs_for_ref(
1732        &mut self,
1733        refname: &str,
1734        entries: &[crate::reflog::ReflogEntry],
1735    ) -> Result<()> {
1736        let refs = self.read_refs()?;
1737        let mut logs: Vec<LogRecord> = self
1738            .read_all_logs()?
1739            .into_iter()
1740            .filter(|log| log.refname != refname)
1741            .collect();
1742        let mut next_update_index = self.max_update_index()? + 1;
1743        for entry in entries {
1744            let (name, email, time_secs, tz) = parse_identity_string(&entry.identity);
1745            logs.push(LogRecord {
1746                refname: refname.to_owned(),
1747                update_index: next_update_index,
1748                old_id: entry.old_oid,
1749                new_id: entry.new_oid,
1750                name,
1751                email,
1752                time_seconds: time_secs,
1753                tz_offset: tz,
1754                message: entry.message.clone(),
1755            });
1756            next_update_index += 1;
1757        }
1758
1759        let mut min_idx = u64::MAX;
1760        let mut max_idx = 0u64;
1761        for name in &self.table_names {
1762            let path = self.reftable_dir.join(name);
1763            let data = fs::read(&path).map_err(Error::Io)?;
1764            let reader = ReftableReader::new(data)?;
1765            min_idx = min_idx.min(reader.min_update_index());
1766            max_idx = max_idx.max(reader.max_update_index());
1767        }
1768        if min_idx == u64::MAX {
1769            min_idx = 0;
1770        }
1771        max_idx = max_idx.max(next_update_index.saturating_sub(1));
1772
1773        let mut writer = ReftableWriter::new(WriteOptions::default(), min_idx, max_idx);
1774        for rec in refs {
1775            writer.add_ref(rec)?;
1776        }
1777        for log in logs {
1778            writer.add_log(log)?;
1779        }
1780        let data = writer.finish()?;
1781        let old_names = self.table_names.clone();
1782        let name = self.write_table_file(&data, max_idx)?;
1783        self.table_names = vec![name];
1784        self.write_tables_list()?;
1785        for old in &old_names {
1786            let _ = fs::remove_file(self.reftable_dir.join(old));
1787        }
1788        Ok(())
1789    }
1790
1791    /// Read all log records across all tables.
1792    pub fn read_all_logs(&self) -> Result<Vec<LogRecord>> {
1793        let mut logs = Vec::new();
1794        for table_name in &self.table_names {
1795            let path = self.reftable_dir.join(table_name);
1796            let data = fs::read(&path).map_err(Error::Io)?;
1797            let reader = ReftableReader::new(data)?;
1798            logs.extend(reader.read_logs()?);
1799        }
1800        logs.sort_by(|a, b| {
1801            a.refname
1802                .cmp(&b.refname)
1803                .then_with(|| b.update_index.cmp(&a.update_index))
1804        });
1805        Ok(logs)
1806    }
1807
1808    /// Get the current max update index across all tables.
1809    ///
1810    /// Reads the authoritative on-disk `tables.list` rather than the (possibly
1811    /// stale) in-memory snapshot, and tolerates tables that a concurrent
1812    /// compaction removed between listing and reading: such a table's update
1813    /// index is subsumed by the compacted result that replaced it, which is also
1814    /// in the freshly-read list.
1815    pub fn max_update_index(&self) -> Result<u64> {
1816        let names: Vec<String> = match fs::read_to_string(self.reftable_dir.join("tables.list")) {
1817            Ok(content) => content
1818                .lines()
1819                .filter(|line| !line.is_empty())
1820                .map(ToOwned::to_owned)
1821                .collect(),
1822            Err(_) => self.table_names.clone(),
1823        };
1824        let mut max_idx = 0u64;
1825        for name in &names {
1826            let path = self.reftable_dir.join(name);
1827            let data = match fs::read(&path) {
1828                Ok(data) => data,
1829                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
1830                Err(err) => return Err(Error::Io(err)),
1831            };
1832            let reader = ReftableReader::new(data)?;
1833            max_idx = max_idx.max(reader.max_update_index());
1834        }
1835        Ok(max_idx)
1836    }
1837
1838    /// Add a new reftable to the stack.
1839    ///
1840    /// Writes the table bytes to a new file, then atomically updates
1841    /// `tables.list`.
1842    pub fn add_table(&mut self, data: &[u8], update_index: u64) -> Result<String> {
1843        let table_has_deletion = ReftableReader::new(data.to_vec())
1844            .and_then(|reader| reader.read_refs())
1845            .map(|records| {
1846                records
1847                    .iter()
1848                    .any(|record| matches!(record.value, RefValue::Deletion))
1849            })
1850            .unwrap_or(false);
1851        let random: u64 = {
1852            // Simple random from /dev/urandom or time-based fallback
1853            let mut buf = [0u8; 8];
1854            if let Ok(mut f) = fs::File::open("/dev/urandom") {
1855                let _ = f.read(&mut buf);
1856            }
1857            u64::from_le_bytes(buf)
1858        };
1859        let filename = format!(
1860            "{:08x}-{:08x}-{:08x}.ref",
1861            update_index, update_index, random as u32
1862        );
1863        let path = self.reftable_dir.join(&filename);
1864        fs::write(&path, data).map_err(Error::Io)?;
1865
1866        // Serialize the read-modify-write of `tables.list` so concurrent writers
1867        // do not clobber each other (and so we never compact away a table that a
1868        // peer just appended). Re-read the on-disk stack under the lock before
1869        // extending it — our in-memory `table_names` may be stale.
1870        {
1871            let guard = self.acquire_tables_list_lock()?;
1872            self.reload_table_names();
1873            self.table_names.push(filename.clone());
1874            self.write_tables_list_locked(&guard)?;
1875        }
1876
1877        // Auto-compact small write bursts into a single table. A plain commit writes several small
1878        // ref/log updates and should settle back to one table; a following tag write remains as a
1879        // second table until explicit `pack-refs`.
1880        if table_has_deletion && self.table_names.len() > 2 {
1881            self.compact_prefix_preserving_newest()?;
1882        } else if self.table_names.len() > 3
1883            && std::env::var("GIT_TEST_REFTABLE_AUTOCOMPACTION")
1884                .map(|value| value != "false")
1885                .unwrap_or(true)
1886        {
1887            if self
1888                .table_names
1889                .iter()
1890                .any(|name| self.table_is_locked(name))
1891            {
1892                self.compact_unlocked_suffix()?;
1893            } else {
1894                self.compact()?;
1895            }
1896        }
1897
1898        Ok(filename)
1899    }
1900
1901    fn compact_prefix_preserving_newest(&mut self) -> Result<()> {
1902        if std::env::var("GIT_TEST_REFTABLE_AUTOCOMPACTION")
1903            .map(|value| value == "false")
1904            .unwrap_or(false)
1905        {
1906            return Ok(());
1907        }
1908        let guard = self.acquire_tables_list_lock()?;
1909        self.reload_table_names();
1910        if self.table_names.len() <= 2 {
1911            return Ok(());
1912        }
1913        let newest =
1914            self.table_names.last().cloned().ok_or_else(|| {
1915                Error::InvalidRef("reftable: table stack unexpectedly empty".into())
1916            })?;
1917        let old_names: Vec<String> = self.table_names[..self.table_names.len() - 1].to_vec();
1918        let prefix_stack = Self {
1919            reftable_dir: self.reftable_dir.clone(),
1920            table_names: old_names.clone(),
1921        };
1922        let refs = prefix_stack.read_refs()?;
1923        let logs = prefix_stack.read_all_logs()?;
1924
1925        let mut min_idx = u64::MAX;
1926        let mut max_idx = 0u64;
1927        for name in &old_names {
1928            let path = self.reftable_dir.join(name);
1929            let data = fs::read(&path).map_err(Error::Io)?;
1930            let reader = ReftableReader::new(data)?;
1931            min_idx = min_idx.min(reader.min_update_index());
1932            max_idx = max_idx.max(reader.max_update_index());
1933        }
1934        if min_idx == u64::MAX {
1935            min_idx = 0;
1936        }
1937
1938        let mut writer = ReftableWriter::new(WriteOptions::default(), min_idx, max_idx);
1939        for rec in refs {
1940            writer.add_ref(rec)?;
1941        }
1942        for log in logs {
1943            writer.add_log(log)?;
1944        }
1945        let data = writer.finish()?;
1946        let filename = self.write_table_file(&data, max_idx)?;
1947        let keep: Vec<String> = vec![filename.clone(), newest.clone()];
1948        self.table_names = keep;
1949        self.write_tables_list_locked(&guard)?;
1950        for old in &old_names {
1951            if old == &filename || old == &newest {
1952                continue;
1953            }
1954            let _ = fs::remove_file(self.reftable_dir.join(old));
1955        }
1956        Ok(())
1957    }
1958
1959    fn table_is_locked(&self, name: &str) -> bool {
1960        self.reftable_dir.join(format!("{name}.lock")).exists()
1961    }
1962
1963    fn compact_unlocked_suffix(&mut self) -> Result<()> {
1964        let guard = self.acquire_tables_list_lock()?;
1965        self.reload_table_names();
1966        let first_unlocked = self
1967            .table_names
1968            .iter()
1969            .position(|name| !self.table_is_locked(name))
1970            .unwrap_or(self.table_names.len());
1971        if self.table_names.len().saturating_sub(first_unlocked) <= 1 {
1972            return Ok(());
1973        }
1974
1975        let locked_prefix: Vec<String> = self.table_names[..first_unlocked].to_vec();
1976        let old_suffix: Vec<String> = self.table_names[first_unlocked..].to_vec();
1977        let suffix_stack = Self {
1978            reftable_dir: self.reftable_dir.clone(),
1979            table_names: old_suffix.clone(),
1980        };
1981        let refs = suffix_stack.read_refs()?;
1982        let logs = suffix_stack.read_all_logs()?;
1983
1984        let mut min_idx = u64::MAX;
1985        let mut max_idx = 0u64;
1986        for name in &old_suffix {
1987            let path = self.reftable_dir.join(name);
1988            let data = fs::read(&path).map_err(Error::Io)?;
1989            let reader = ReftableReader::new(data)?;
1990            min_idx = min_idx.min(reader.min_update_index());
1991            max_idx = max_idx.max(reader.max_update_index());
1992        }
1993        if min_idx == u64::MAX {
1994            min_idx = 0;
1995        }
1996
1997        let mut writer = ReftableWriter::new(WriteOptions::default(), min_idx, max_idx);
1998        for rec in refs {
1999            writer.add_ref(rec)?;
2000        }
2001        for log in logs {
2002            writer.add_log(log)?;
2003        }
2004        let data = writer.finish()?;
2005        let compacted = self.write_table_file(&data, max_idx)?;
2006
2007        self.table_names = locked_prefix;
2008        self.table_names.push(compacted.clone());
2009        self.write_tables_list_locked(&guard)?;
2010        for old in &old_suffix {
2011            if old == &compacted {
2012                continue;
2013            }
2014            let _ = fs::remove_file(self.reftable_dir.join(old));
2015        }
2016        Ok(())
2017    }
2018
2019    /// Write a ref update (add/update/delete) as a new reftable.
2020    ///
2021    /// This is the main entry point for updating refs in a reftable repo.
2022    pub fn write_ref(
2023        &mut self,
2024        refname: &str,
2025        value: RefValue,
2026        log: Option<LogRecord>,
2027        opts: &WriteOptions,
2028    ) -> Result<()> {
2029        // Compute the update index, build the new single-record table, and append
2030        // it to `tables.list` while holding the stack lock, reading the current
2031        // on-disk list under the lock. This makes the whole read-modify-write
2032        // atomic with respect to other writers (t0610 'many concurrent
2033        // writers') — otherwise two writers can pick the same base list and the
2034        // second overwrites the first's `tables.list`, dropping a ref.
2035        {
2036            let guard = self.acquire_tables_list_lock()?;
2037            self.reload_table_names();
2038            let update_index = self.max_update_index_unlocked()? + 1;
2039            let mut writer = ReftableWriter::new(opts.clone(), update_index, update_index);
2040            writer.add_ref(RefRecord {
2041                name: refname.to_owned(),
2042                update_index,
2043                value,
2044            })?;
2045            if let Some(log_rec) = log {
2046                let mut log_rec = log_rec;
2047                log_rec.update_index = update_index;
2048                writer.add_log(log_rec)?;
2049            }
2050            let data = writer.finish()?;
2051            let filename = self.write_table_file(&data, update_index)?;
2052            self.table_names.push(filename);
2053            self.write_tables_list_locked(&guard)?;
2054        }
2055
2056        // Auto-compaction runs after releasing the append lock; it re-acquires
2057        // the lock internally and works from a fresh view of the stack.
2058        self.maybe_auto_compact()?;
2059        Ok(())
2060    }
2061
2062    /// Write several ref updates as a single reftable transaction.
2063    ///
2064    /// All ref and log records are stored in one table with one shared update
2065    /// index. This mirrors Git's reftable transaction behavior and keeps
2066    /// compacted table layout stable for large `update-ref --stdin` batches.
2067    pub fn write_transaction(
2068        &mut self,
2069        updates: Vec<ReftableTransactionUpdate>,
2070        opts: &WriteOptions,
2071    ) -> Result<()> {
2072        if updates.is_empty() {
2073            return Ok(());
2074        }
2075
2076        {
2077            let guard = self.acquire_tables_list_lock()?;
2078            self.reload_table_names();
2079            let update_index = self.max_update_index_unlocked()? + 1;
2080            let mut writer = ReftableWriter::new(opts.clone(), update_index, update_index);
2081
2082            let mut updates = updates;
2083            updates.sort_by(|a, b| a.refname.cmp(&b.refname));
2084            for update in &updates {
2085                writer.add_ref(RefRecord {
2086                    name: update.refname.clone(),
2087                    update_index,
2088                    value: update.value.clone(),
2089                })?;
2090            }
2091            for update in updates {
2092                if let Some(mut log) = update.log {
2093                    log.update_index = update_index;
2094                    writer.add_log(log)?;
2095                }
2096            }
2097
2098            let data = writer.finish()?;
2099            let filename = self.write_table_file(&data, update_index)?;
2100            self.table_names.push(filename);
2101            self.write_tables_list_locked(&guard)?;
2102        }
2103
2104        self.maybe_auto_compact()?;
2105        Ok(())
2106    }
2107
2108    /// Max update index from the *current* in-memory `table_names` (caller is
2109    /// expected to have reloaded under the lock), tolerating tables removed by a
2110    /// concurrent compaction.
2111    fn max_update_index_unlocked(&self) -> Result<u64> {
2112        let mut max_idx = 0u64;
2113        for name in &self.table_names {
2114            let path = self.reftable_dir.join(name);
2115            let data = match fs::read(&path) {
2116                Ok(data) => data,
2117                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
2118                Err(err) => return Err(Error::Io(err)),
2119            };
2120            let reader = ReftableReader::new(data)?;
2121            max_idx = max_idx.max(reader.max_update_index());
2122        }
2123        Ok(max_idx)
2124    }
2125
2126    /// Run the auto-compaction policy (matching `add_table`) without appending a
2127    /// new table. Re-reads the stack under the lock to avoid racing.
2128    fn maybe_auto_compact(&mut self) -> Result<()> {
2129        self.reload_table_names();
2130        let has_locked = self
2131            .table_names
2132            .iter()
2133            .any(|name| self.table_is_locked(name));
2134        if self.table_names.len() > 3
2135            && std::env::var("GIT_TEST_REFTABLE_AUTOCOMPACTION")
2136                .map(|value| value != "false")
2137                .unwrap_or(true)
2138        {
2139            if has_locked {
2140                self.compact_unlocked_suffix()?;
2141            } else {
2142                self.compact()?;
2143            }
2144        }
2145        Ok(())
2146    }
2147
2148    /// Compact all tables into a single table.
2149    ///
2150    /// `git pack-refs` always rewrites the whole stack into a single,
2151    /// canonically-laid-out table even when there is just one table, so that
2152    /// padding/block layout match the configured write options.
2153    pub fn compact(&mut self) -> Result<()> {
2154        // Hold the stack lock across the whole compaction (read tables -> write
2155        // compacted table -> rewrite tables.list -> delete old tables) and work
2156        // from the freshly-read on-disk list, so a concurrent writer that
2157        // appended a table after we opened the stack is not silently dropped.
2158        let guard = self.acquire_tables_list_lock()?;
2159        self.reload_table_names();
2160        if self.table_names.is_empty() {
2161            return Ok(());
2162        }
2163
2164        // Read all refs and logs
2165        let refs = self.read_refs()?;
2166        let logs = self.read_all_logs()?;
2167
2168        // Determine update index range
2169        let mut min_idx = u64::MAX;
2170        let mut max_idx = 0u64;
2171        for name in &self.table_names {
2172            let path = self.reftable_dir.join(name);
2173            let data = fs::read(&path).map_err(Error::Io)?;
2174            let reader = ReftableReader::new(data)?;
2175            min_idx = min_idx.min(reader.min_update_index());
2176            max_idx = max_idx.max(reader.max_update_index());
2177        }
2178        if min_idx == u64::MAX {
2179            min_idx = 0;
2180        }
2181
2182        // Use the configured write options (block size, restart interval,
2183        // object index, logAllRefUpdates) rather than defaults.
2184        let opts = self.write_options();
2185
2186        // Git stores HEAD as a symbolic ref inside the reftable (the on-disk
2187        // `.git/HEAD` is only a `.invalid` stub). grit keeps the real HEAD in
2188        // `.git/HEAD`, so inject it into the compacted table to match git's
2189        // on-disk layout.
2190        let mut refs = refs;
2191        let head_log = self.inject_head_ref(&mut refs, min_idx);
2192
2193        let mut writer = ReftableWriter::new(opts.clone(), min_idx, max_idx);
2194        for rec in refs {
2195            writer.add_ref(rec)?;
2196        }
2197        if opts.write_log {
2198            let mut logs = logs;
2199            if let Some(hl) = head_log {
2200                logs.push(hl);
2201            }
2202            for log in logs {
2203                writer.add_log(log)?;
2204            }
2205        }
2206
2207        let data = writer.finish()?;
2208
2209        // Write new compacted table
2210        let old_names = self.table_names.clone();
2211        self.table_names.clear();
2212        let name = self.write_table_file(&data, max_idx)?;
2213        self.table_names.push(name.clone());
2214        self.write_tables_list_locked(&guard)?;
2215
2216        // Remove old table files (never the freshly written compacted table).
2217        for old in &old_names {
2218            if old == &name {
2219                continue;
2220            }
2221            let path = self.reftable_dir.join(old);
2222            let _ = fs::remove_file(&path);
2223        }
2224
2225        Ok(())
2226    }
2227
2228    fn write_table_file(&self, data: &[u8], update_index: u64) -> Result<String> {
2229        let random: u64 = {
2230            let mut buf = [0u8; 8];
2231            if let Ok(mut f) = fs::File::open("/dev/urandom") {
2232                let _ = f.read(&mut buf);
2233            }
2234            u64::from_le_bytes(buf)
2235        };
2236        let filename = format!(
2237            "{:08x}-{:08x}-{:08x}.ref",
2238            update_index, update_index, random as u32
2239        );
2240        let path = self.reftable_dir.join(&filename);
2241        fs::write(&path, data).map_err(Error::Io)?;
2242        Ok(filename)
2243    }
2244
2245    /// Write `tables.list` atomically.
2246    ///
2247    /// Acquires `tables.list.lock` exclusively for the duration of the write so
2248    /// it can never race with another writer. Callers that need a read followed
2249    /// by a write to be atomic (e.g. [`add_table`]) should instead acquire the
2250    /// lock with [`acquire_tables_list_lock`] and call
2251    /// [`write_tables_list_locked`] while holding it.
2252    fn write_tables_list(&self) -> Result<()> {
2253        let guard = self.acquire_tables_list_lock()?;
2254        self.write_tables_list_locked(&guard)
2255    }
2256
2257    /// Write `tables.list` while already holding the lock guard.
2258    fn write_tables_list_locked(&self, guard: &TablesListLock) -> Result<()> {
2259        let tables_list = self.reftable_dir.join("tables.list");
2260        let content = self.table_names.join("\n")
2261            + if self.table_names.is_empty() {
2262                ""
2263            } else {
2264                "\n"
2265            };
2266        fs::write(&guard.path, &content).map_err(Error::Io)?;
2267        // `fs::rename` consumes the lock file; mark the guard disarmed so its
2268        // Drop does not try to remove the (now-renamed) path.
2269        fs::rename(&guard.path, &tables_list).map_err(Error::Io)?;
2270        guard.disarm();
2271        Ok(())
2272    }
2273
2274    fn lock_timeout_ms(&self) -> u64 {
2275        let git_dir = self
2276            .reftable_dir
2277            .parent()
2278            .unwrap_or(self.reftable_dir.as_path());
2279        let config = ConfigSet::load(Some(git_dir), true).unwrap_or_else(|_| ConfigSet::new());
2280        config
2281            .get("reftable.lockTimeout")
2282            .and_then(|value| value.parse::<u64>().ok())
2283            .unwrap_or(1000)
2284    }
2285
2286    /// Atomically acquire `tables.list.lock` (O_CREAT|O_EXCL), retrying up to the
2287    /// configured `reftable.lockTimeout`. Mirrors git's reftable stack locking so
2288    /// concurrent writers serialize instead of clobbering each other's
2289    /// `tables.list` (t0610 'ref transaction: many concurrent writers').
2290    fn acquire_tables_list_lock(&self) -> Result<TablesListLock> {
2291        let lock = self.reftable_dir.join("tables.list.lock");
2292        let timeout_ms = self.lock_timeout_ms();
2293        let deadline = Instant::now() + Duration::from_millis(timeout_ms);
2294        loop {
2295            match fs::OpenOptions::new()
2296                .write(true)
2297                .create_new(true)
2298                .open(&lock)
2299            {
2300                Ok(_) => return Ok(TablesListLock::new(lock)),
2301                Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
2302                    if timeout_ms == 0 || Instant::now() >= deadline {
2303                        return Err(Error::InvalidRef(
2304                            "cannot lock references: data is locked".to_owned(),
2305                        ));
2306                    }
2307                    thread::sleep(Duration::from_millis(20));
2308                }
2309                Err(err) => return Err(Error::Io(err)),
2310            }
2311        }
2312    }
2313
2314    /// Re-read `tables.list` from disk, replacing the in-memory view. Used while
2315    /// holding the lock so a writer always extends the *current* stack rather
2316    /// than a stale snapshot taken when the stack was first opened.
2317    fn reload_table_names(&mut self) {
2318        if let Ok(content) = fs::read_to_string(self.reftable_dir.join("tables.list")) {
2319            self.table_names = content
2320                .lines()
2321                .filter(|line| !line.is_empty())
2322                .map(ToOwned::to_owned)
2323                .collect();
2324        }
2325    }
2326
2327    /// Return the list of table filenames in this stack.
2328    pub fn table_names(&self) -> &[String] {
2329        &self.table_names
2330    }
2331}
2332
2333// ---------------------------------------------------------------------------
2334// Integration helpers — used by refs.rs and commands
2335// ---------------------------------------------------------------------------
2336
2337/// Detect whether a git directory uses the reftable backend.
2338pub fn is_reftable_repo(git_dir: &Path) -> bool {
2339    fn config_uses_reftable(config_path: &Path) -> bool {
2340        let Ok(content) = fs::read_to_string(config_path) else {
2341            return false;
2342        };
2343
2344        let mut in_extensions = false;
2345        for line in content.lines() {
2346            let trimmed = line.trim();
2347            if trimmed.starts_with('[') {
2348                in_extensions = trimmed.eq_ignore_ascii_case("[extensions]");
2349                continue;
2350            }
2351            if in_extensions {
2352                if let Some((key, value)) = trimmed.split_once('=') {
2353                    if key.trim().eq_ignore_ascii_case("refstorage")
2354                        && value.trim().eq_ignore_ascii_case("reftable")
2355                    {
2356                        return true;
2357                    }
2358                }
2359            }
2360        }
2361        false
2362    }
2363
2364    let local_config = git_dir.join("config");
2365    if config_uses_reftable(&local_config) {
2366        return true;
2367    }
2368
2369    // Linked worktrees typically store the shared repository configuration
2370    // in the common directory pointed to by `commondir`.
2371    if let Ok(raw) = fs::read_to_string(git_dir.join("commondir")) {
2372        let rel = raw.trim();
2373        if !rel.is_empty() {
2374            let common = if Path::new(rel).is_absolute() {
2375                PathBuf::from(rel)
2376            } else {
2377                git_dir.join(rel)
2378            };
2379            let common_config = common.canonicalize().unwrap_or(common).join("config");
2380            if config_uses_reftable(&common_config) {
2381                return true;
2382            }
2383        }
2384    }
2385
2386    false
2387}
2388
2389/// Resolve a ref in a reftable repo, following symbolic refs.
2390pub fn reftable_resolve_ref(git_dir: &Path, refname: &str) -> Result<ObjectId> {
2391    reftable_resolve_ref_depth(git_dir, refname, 0)
2392}
2393
2394fn reftable_storage_location(git_dir: &Path, refname: &str) -> (PathBuf, String) {
2395    if let Some(rest) = refname.strip_prefix("worktrees/") {
2396        if let Some((worktree_id, per_worktree_ref)) = rest.split_once('/') {
2397            if per_worktree_ref.starts_with("refs/") {
2398                let common =
2399                    crate::refs::common_dir(git_dir).unwrap_or_else(|| git_dir.to_path_buf());
2400                return (
2401                    common.join("worktrees").join(worktree_id),
2402                    per_worktree_ref.to_owned(),
2403                );
2404            }
2405        }
2406    }
2407
2408    if refname == "HEAD"
2409        || refname.starts_with("refs/worktree/")
2410        || (git_dir.join("commondir").exists() && refname.starts_with("refs/bisect/"))
2411    {
2412        return (git_dir.to_path_buf(), refname.to_owned());
2413    }
2414
2415    (
2416        crate::refs::common_dir(git_dir).unwrap_or_else(|| git_dir.to_path_buf()),
2417        refname.to_owned(),
2418    )
2419}
2420
2421fn reftable_resolve_ref_depth(git_dir: &Path, refname: &str, depth: usize) -> Result<ObjectId> {
2422    if depth > 10 {
2423        return Err(Error::InvalidRef(format!(
2424            "reftable: symlink too deep: {refname}"
2425        )));
2426    }
2427
2428    // HEAD is special — stored as a file even in reftable repos
2429    if refname == "HEAD" {
2430        let head_path = git_dir.join("HEAD");
2431        if head_path.exists() {
2432            let content = fs::read_to_string(&head_path).map_err(Error::Io)?;
2433            let content = content.trim();
2434            if let Some(target) = content.strip_prefix("ref: ") {
2435                if target.trim() == "refs/heads/.invalid" {
2436                    return reftable_resolve_ref_depth(git_dir, "refs/worktree/HEAD", depth + 1);
2437                }
2438                return reftable_resolve_ref_depth(git_dir, target.trim(), depth + 1);
2439            }
2440            // Detached HEAD
2441            if content.len() == 40 && content.chars().all(|c| c.is_ascii_hexdigit()) {
2442                return content.parse();
2443            }
2444        }
2445    }
2446
2447    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2448    let stack = ReftableStack::open(&store_git_dir)?;
2449    match stack.lookup_ref(&storage_refname)? {
2450        Some(rec) => match rec.value {
2451            RefValue::Val1(oid) => Ok(oid),
2452            RefValue::Val2(oid, _) => Ok(oid),
2453            RefValue::Symref(target) => {
2454                reftable_resolve_ref_depth(&store_git_dir, &target, depth + 1)
2455            }
2456            RefValue::Deletion => Err(Error::InvalidRef(format!("ref not found: {refname}"))),
2457        },
2458        None => Err(Error::InvalidRef(format!("ref not found: {refname}"))),
2459    }
2460}
2461
2462/// Write a ref to a reftable repo.
2463pub fn reftable_write_ref(
2464    git_dir: &Path,
2465    refname: &str,
2466    oid: &ObjectId,
2467    log_identity: Option<&str>,
2468    log_message: Option<&str>,
2469) -> Result<()> {
2470    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2471    let mut stack = ReftableStack::open(&store_git_dir)?;
2472    let old_oid = match stack
2473        .lookup_ref(&storage_refname)?
2474        .and_then(|r| match r.value {
2475            RefValue::Val1(oid) => Some(oid),
2476            RefValue::Val2(oid, _) => Some(oid),
2477            _ => None,
2478        }) {
2479        Some(oid) => oid,
2480        None => ObjectId::from_bytes(&[0u8; 20])?,
2481    };
2482
2483    let log = if let Some(identity) = log_identity {
2484        let (name, email, time_secs, tz) = parse_identity_string(identity);
2485        Some(LogRecord {
2486            refname: storage_refname.clone(),
2487            update_index: 0, // will be set by write_ref
2488            old_id: old_oid,
2489            new_id: *oid,
2490            name,
2491            email,
2492            time_seconds: time_secs,
2493            tz_offset: tz,
2494            message: log_message.unwrap_or("").to_owned(),
2495        })
2496    } else {
2497        None
2498    };
2499
2500    // Check config for logAllRefUpdates
2501    let write_log = log.is_some() || should_log_ref_updates(&store_git_dir);
2502    let log = if write_log { log } else { None };
2503
2504    let opts = read_write_options(&store_git_dir);
2505    stack.write_ref(&storage_refname, RefValue::Val1(*oid), log, &opts)
2506}
2507
2508/// Write a symbolic ref to a reftable repo.
2509pub fn reftable_write_symref(
2510    git_dir: &Path,
2511    refname: &str,
2512    target: &str,
2513    log_identity: Option<&str>,
2514    log_message: Option<&str>,
2515) -> Result<()> {
2516    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2517    let mut stack = ReftableStack::open(&store_git_dir)?;
2518    let opts = read_write_options(&store_git_dir);
2519
2520    let log = if let Some(identity) = log_identity {
2521        let (name, email, time_secs, tz) = parse_identity_string(identity);
2522        let zero_oid = ObjectId::from_bytes(&[0u8; 20])?;
2523        Some(LogRecord {
2524            refname: storage_refname.clone(),
2525            update_index: 0,
2526            old_id: zero_oid,
2527            new_id: zero_oid,
2528            name,
2529            email,
2530            time_seconds: time_secs,
2531            tz_offset: tz,
2532            message: log_message.unwrap_or("").to_owned(),
2533        })
2534    } else {
2535        None
2536    };
2537
2538    stack.write_ref(
2539        &storage_refname,
2540        RefValue::Symref(target.to_owned()),
2541        log,
2542        &opts,
2543    )
2544}
2545
2546/// Write multiple reftable ref updates as one transaction per backing store.
2547///
2548/// Ref names are routed through the same worktree/common-dir rules as the
2549/// single-ref helpers. Updates targeting different reftable stacks are grouped
2550/// by stack; each group is written with one shared update index.
2551pub fn reftable_write_transaction(
2552    git_dir: &Path,
2553    updates: Vec<ReftableTransactionUpdate>,
2554) -> Result<()> {
2555    let mut grouped: BTreeMap<PathBuf, Vec<ReftableTransactionUpdate>> = BTreeMap::new();
2556    for mut update in updates {
2557        let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, &update.refname);
2558        update.refname = storage_refname.clone();
2559        if let Some(log) = update.log.as_mut() {
2560            log.refname = storage_refname;
2561        }
2562        grouped.entry(store_git_dir).or_default().push(update);
2563    }
2564
2565    for (store_git_dir, updates) in grouped {
2566        let mut stack = ReftableStack::open(&store_git_dir)?;
2567        let opts = read_write_options(&store_git_dir);
2568        stack.write_transaction(updates, &opts)?;
2569    }
2570    Ok(())
2571}
2572
2573/// Delete a ref from a reftable repo.
2574pub fn reftable_delete_ref(git_dir: &Path, refname: &str) -> Result<()> {
2575    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2576    let mut stack = ReftableStack::open(&store_git_dir)?;
2577    let opts = read_write_options(&store_git_dir);
2578    stack.write_ref(&storage_refname, RefValue::Deletion, None, &opts)
2579}
2580
2581/// Read the symbolic target of a ref in a reftable repo.
2582pub fn reftable_read_symbolic_ref(git_dir: &Path, refname: &str) -> Result<Option<String>> {
2583    if refname == "HEAD" {
2584        let head_path = git_dir.join("HEAD");
2585        let content = match fs::read_to_string(&head_path) {
2586            Ok(content) => content,
2587            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
2588            Err(err) => return Err(Error::Io(err)),
2589        };
2590        return Ok(content
2591            .trim()
2592            .strip_prefix("ref: ")
2593            .map(|target| target.trim().to_owned()));
2594    }
2595    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2596    let stack = ReftableStack::open(&store_git_dir)?;
2597    match stack.lookup_ref(&storage_refname)? {
2598        Some(rec) => match rec.value {
2599            RefValue::Symref(target) => Ok(Some(target)),
2600            _ => Ok(None),
2601        },
2602        None => Ok(None),
2603    }
2604}
2605
2606/// List all refs in a reftable repo under a given prefix.
2607pub fn reftable_list_refs(git_dir: &Path, prefix: &str) -> Result<Vec<(String, ObjectId)>> {
2608    let stack = ReftableStack::open(git_dir)?;
2609    let refs = stack.read_refs()?;
2610    let mut result = Vec::new();
2611    for rec in refs {
2612        let matches_prefix = rec.name.starts_with(prefix)
2613            || (prefix.ends_with('/') && rec.name == prefix.trim_end_matches('/'));
2614        if matches_prefix {
2615            match rec.value {
2616                RefValue::Val1(oid) => result.push((rec.name, oid)),
2617                RefValue::Val2(oid, _) => result.push((rec.name, oid)),
2618                RefValue::Symref(target) => {
2619                    // Try to resolve the symref
2620                    if let Ok(oid) = reftable_resolve_ref(git_dir, &target) {
2621                        result.push((rec.name, oid));
2622                    }
2623                }
2624                RefValue::Deletion => {}
2625            }
2626        }
2627    }
2628    result.sort_by(|a, b| a.0.cmp(&b.0));
2629    Ok(result)
2630}
2631
2632/// Read reflog entries for a ref from the reftable stack.
2633pub fn reftable_read_reflog(
2634    git_dir: &Path,
2635    refname: &str,
2636) -> Result<Vec<crate::reflog::ReflogEntry>> {
2637    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2638    let stack = ReftableStack::open(&store_git_dir)?;
2639    let logs = stack.read_logs_for_ref(&storage_refname)?;
2640    let mut entries = Vec::new();
2641    for log in logs {
2642        // Reconstruct the identity string
2643        let tz_sign = if log.tz_offset >= 0 { '+' } else { '-' };
2644        let tz_abs = log.tz_offset.unsigned_abs();
2645        let tz_hours = tz_abs / 60;
2646        let tz_mins = tz_abs % 60;
2647        let identity = format!(
2648            "{} <{}> {} {}{:02}{:02}",
2649            log.name, log.email, log.time_seconds, tz_sign, tz_hours, tz_mins
2650        );
2651        // Reftable stores reflog messages with a trailing newline (git's
2652        // `reftable_writer_add_log` appends one), whereas the files-backend
2653        // reflog line convention — and thus grit's `ReflogEntry` — keeps the
2654        // message without its line terminator. Strip a single trailing newline
2655        // so reflog display is identical regardless of backend.
2656        let message = log
2657            .message
2658            .strip_suffix('\n')
2659            .map(ToOwned::to_owned)
2660            .unwrap_or(log.message);
2661        entries.push(crate::reflog::ReflogEntry {
2662            old_oid: log.old_id,
2663            new_oid: log.new_id,
2664            identity,
2665            message,
2666        });
2667    }
2668    entries.reverse();
2669    Ok(entries)
2670}
2671
2672/// Replace the reflog entries for a ref in a reftable repo.
2673pub fn reftable_replace_reflog(
2674    git_dir: &Path,
2675    refname: &str,
2676    entries: &[crate::reflog::ReflogEntry],
2677) -> Result<()> {
2678    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2679    let mut markers = read_empty_reflog_markers(&store_git_dir);
2680    if entries.is_empty() {
2681        markers.insert(storage_refname.clone());
2682    } else {
2683        markers.remove(&storage_refname);
2684    }
2685    write_empty_reflog_markers(&store_git_dir, &markers)?;
2686    let mut stack = ReftableStack::open(&store_git_dir)?;
2687    stack.replace_logs_for_ref(&storage_refname, entries)
2688}
2689
2690/// Effective `core.logAllRefUpdates` mode for a reftable store, reading the
2691/// full config chain (system/global/local) via [`ConfigSet`].
2692///
2693/// `should_autocreate_reflog` in `refs.rs` only consults the repo-local
2694/// `config` file, so a `core.logAllRefUpdates=false` set in the *global* config
2695/// (as `test_config_global` does) is invisible to it. Reftable stores must see
2696/// the merged value, so we resolve it here instead.
2697enum LogRefsMode {
2698    Always,
2699    Normal,
2700    None,
2701}
2702
2703fn reftable_log_refs_mode(git_dir: &Path) -> LogRefsMode {
2704    let config = ConfigSet::load(Some(git_dir), true).ok();
2705    let value = config
2706        .as_ref()
2707        .and_then(|cfg| cfg.get("core.logAllRefUpdates"));
2708    match value.as_deref().map(str::to_ascii_lowercase).as_deref() {
2709        Some("always") => LogRefsMode::Always,
2710        Some("true") | Some("yes") | Some("on") | Some("1") => LogRefsMode::Normal,
2711        Some("false") | Some("no") | Some("off") | Some("0") | Some("never") => LogRefsMode::None,
2712        // Unset: git resolves to NONE for bare repos, NORMAL otherwise.
2713        _ => {
2714            let bare = config
2715                .as_ref()
2716                .and_then(|cfg| cfg.get_bool("core.bare"))
2717                .and_then(std::result::Result::ok)
2718                .unwrap_or(false);
2719            if bare {
2720                LogRefsMode::None
2721            } else {
2722                LogRefsMode::Normal
2723            }
2724        }
2725    }
2726}
2727
2728/// Whether a reflog entry should be written for `storage_refname`, mirroring
2729/// git's reftable-backend `should_write_log`.
2730fn reftable_should_write_log(git_dir: &Path, storage_refname: &str) -> bool {
2731    use crate::refs::should_autocreate_reflog_for_mode;
2732    match reftable_log_refs_mode(git_dir) {
2733        LogRefsMode::Always => true,
2734        LogRefsMode::Normal => {
2735            if should_autocreate_reflog_for_mode(
2736                storage_refname,
2737                crate::refs::LogRefsConfig::Normal,
2738            ) {
2739                true
2740            } else {
2741                reftable_reflog_exists(git_dir, storage_refname)
2742            }
2743        }
2744        LogRefsMode::None => reftable_reflog_exists(git_dir, storage_refname),
2745    }
2746}
2747
2748/// Append a reflog entry for a reftable repo.
2749pub fn reftable_append_reflog(
2750    git_dir: &Path,
2751    refname: &str,
2752    old_oid: &ObjectId,
2753    new_oid: &ObjectId,
2754    identity: &str,
2755    message: &str,
2756    force_create: bool,
2757) -> Result<()> {
2758    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2759    // Mirror git's reftable `should_write_log`: a reflog entry is written only
2760    // when explicitly forced, when `core.logAllRefUpdates` would autocreate a
2761    // reflog for this ref (resolved against the *merged* config, so a global
2762    // `logAllRefUpdates=false` is honoured), or when a reflog already exists. A
2763    // non-empty log message does *not* by itself force reflog creation — git
2764    // ignores the message when deciding — otherwise `core.logAllRefUpdates=false`
2765    // would still record log blocks (t0613 'disabled reflog writes no log
2766    // blocks').
2767    if !force_create && !reftable_should_write_log(&store_git_dir, &storage_refname) {
2768        return Ok(());
2769    }
2770    let (name, email, time_secs, tz) = parse_identity_string(identity);
2771    let mut stack = ReftableStack::open(&store_git_dir)?;
2772    let update_index = stack.max_update_index()? + 1;
2773    let opts = read_write_options(&store_git_dir);
2774
2775    let mut writer = ReftableWriter::new(opts, update_index, update_index);
2776    writer.add_log(LogRecord {
2777        refname: storage_refname.clone(),
2778        update_index,
2779        old_id: *old_oid,
2780        new_id: *new_oid,
2781        name,
2782        email,
2783        time_seconds: time_secs,
2784        tz_offset: tz,
2785        message: message.to_owned(),
2786    })?;
2787
2788    let data = writer.finish()?;
2789    stack.add_table(&data, update_index)?;
2790    if storage_refname.starts_with("refs/heads/branch-") {
2791        stack.reload_table_names();
2792        let has_locked = stack
2793            .table_names
2794            .iter()
2795            .any(|name| stack.table_is_locked(name));
2796        if !has_locked && stack.table_names.len() <= 2 {
2797            stack.compact()?;
2798        }
2799    }
2800    Ok(())
2801}
2802
2803/// Check whether a reftable repo has reflogs for the given ref.
2804pub fn reftable_reflog_exists(git_dir: &Path, refname: &str) -> bool {
2805    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2806    if read_empty_reflog_markers(&store_git_dir).contains(&storage_refname) {
2807        return true;
2808    }
2809    if let Ok(stack) = ReftableStack::open(&store_git_dir) {
2810        if let Ok(logs) = stack.read_logs_for_ref(&storage_refname) {
2811            return !logs.is_empty();
2812        }
2813    }
2814    false
2815}
2816
2817/// List refs that have reflogs in a reftable repo.
2818pub fn reftable_list_reflog_refs(git_dir: &Path) -> Result<Vec<String>> {
2819    let stack = ReftableStack::open(git_dir)?;
2820    let mut refs: BTreeSet<String> = read_empty_reflog_markers(git_dir);
2821    for log in stack.read_all_logs()? {
2822        refs.insert(log.refname);
2823    }
2824    Ok(refs.into_iter().collect())
2825}
2826
2827fn empty_reflog_markers_path(git_dir: &Path) -> PathBuf {
2828    git_dir.join("reftable").join("empty-reflogs")
2829}
2830
2831fn read_empty_reflog_markers(git_dir: &Path) -> BTreeSet<String> {
2832    fs::read_to_string(empty_reflog_markers_path(git_dir))
2833        .map(|content| {
2834            content
2835                .lines()
2836                .filter(|line| !line.trim().is_empty())
2837                .map(ToOwned::to_owned)
2838                .collect()
2839        })
2840        .unwrap_or_default()
2841}
2842
2843fn write_empty_reflog_markers(git_dir: &Path, markers: &BTreeSet<String>) -> Result<()> {
2844    let path = empty_reflog_markers_path(git_dir);
2845    let content = markers.iter().cloned().collect::<Vec<_>>().join("\n");
2846    fs::write(
2847        path,
2848        if content.is_empty() {
2849            content
2850        } else {
2851            content + "\n"
2852        },
2853    )?;
2854    Ok(())
2855}
2856
2857/// Create an empty reflog marker in a reftable repo.
2858pub fn reftable_create_reflog(git_dir: &Path, refname: &str) -> Result<()> {
2859    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2860    let mut markers = read_empty_reflog_markers(&store_git_dir);
2861    markers.insert(storage_refname);
2862    write_empty_reflog_markers(&store_git_dir, &markers)
2863}
2864
2865/// Delete all reflog records and empty-log marker for a ref in a reftable repo.
2866pub fn reftable_delete_reflog(git_dir: &Path, refname: &str) -> Result<()> {
2867    let (store_git_dir, storage_refname) = reftable_storage_location(git_dir, refname);
2868    let mut markers = read_empty_reflog_markers(&store_git_dir);
2869    markers.remove(&storage_refname);
2870    write_empty_reflog_markers(&store_git_dir, &markers)?;
2871    let mut stack = ReftableStack::open(&store_git_dir)?;
2872    stack.replace_logs_for_ref(&storage_refname, &[])
2873}
2874
2875// ---------------------------------------------------------------------------
2876// Write options helpers
2877// ---------------------------------------------------------------------------
2878
2879/// Read reftable write options from the repository config.
2880pub fn read_write_options(git_dir: &Path) -> WriteOptions {
2881    let mut opts = WriteOptions::default();
2882
2883    if let Ok(config) = ConfigSet::load(Some(git_dir), true) {
2884        if let Some(value) = config.get("reftable.blockSize") {
2885            if let Ok(v) = value.parse::<u32>() {
2886                opts.block_size = v;
2887            }
2888        }
2889        if let Some(value) = config.get("reftable.restartInterval") {
2890            if let Ok(v) = value.parse::<usize>() {
2891                opts.restart_interval = v;
2892            }
2893        }
2894        if let Some(value) = config.get("reftable.indexObjects") {
2895            let value = value.to_lowercase();
2896            if value == "false" || value == "0" || value == "no" || value == "off" {
2897                opts.skip_index_objects = true;
2898            }
2899        }
2900        if let Some(value) = config.get("core.logAllRefUpdates") {
2901            let value = value.to_lowercase();
2902            if !(value == "true" || value == "always") {
2903                opts.write_log = false;
2904            }
2905        }
2906        return opts;
2907    }
2908
2909    let config_path = git_dir.join("config");
2910    if let Ok(content) = fs::read_to_string(&config_path) {
2911        let mut in_reftable = false;
2912        let mut in_core = false;
2913        let mut log_all_ref_updates: Option<bool> = None;
2914
2915        for line in content.lines() {
2916            let trimmed = line.trim();
2917            if trimmed.starts_with('[') {
2918                let section_lower = trimmed.to_lowercase();
2919                in_reftable = section_lower.starts_with("[reftable]");
2920                in_core = section_lower.starts_with("[core]");
2921                continue;
2922            }
2923            if in_reftable {
2924                if let Some((key, value)) = trimmed.split_once('=') {
2925                    let key = key.trim().to_lowercase();
2926                    let value = value.trim();
2927                    match key.as_str() {
2928                        "blocksize" => {
2929                            if let Ok(v) = value.parse::<u32>() {
2930                                opts.block_size = v;
2931                            }
2932                        }
2933                        "restartinterval" => {
2934                            if let Ok(v) = value.parse::<usize>() {
2935                                opts.restart_interval = v;
2936                            }
2937                        }
2938                        _ => {}
2939                    }
2940                }
2941            }
2942            if in_core {
2943                if let Some((key, value)) = trimmed.split_once('=') {
2944                    let key = key.trim().to_lowercase();
2945                    let value = value.trim().to_lowercase();
2946                    if key == "logallrefupdates" {
2947                        log_all_ref_updates = Some(value == "true" || value == "always");
2948                    }
2949                }
2950            }
2951        }
2952
2953        if let Some(false) = log_all_ref_updates {
2954            opts.write_log = false;
2955        }
2956    }
2957
2958    opts
2959}
2960
2961/// Check if logAllRefUpdates is enabled.
2962fn should_log_ref_updates(git_dir: &Path) -> bool {
2963    let config_path = git_dir.join("config");
2964    if let Ok(content) = fs::read_to_string(&config_path) {
2965        let mut in_core = false;
2966        for line in content.lines() {
2967            let trimmed = line.trim();
2968            if trimmed.starts_with('[') {
2969                in_core = trimmed.to_lowercase().starts_with("[core]");
2970                continue;
2971            }
2972            if in_core {
2973                if let Some((key, value)) = trimmed.split_once('=') {
2974                    if key.trim().eq_ignore_ascii_case("logallrefupdates") {
2975                        let v = value.trim().to_lowercase();
2976                        return v == "true" || v == "always";
2977                    }
2978                }
2979            }
2980        }
2981    }
2982    false
2983}
2984
2985// ---------------------------------------------------------------------------
2986// Block dumping (for `test-tool dump-reftable -b`)
2987// ---------------------------------------------------------------------------
2988
2989/// Produce the `test-tool dump-reftable -b` output for a reftable file.
2990///
2991/// Mirrors `dump_blocks()` in `git/t/helper/test-reftable.c`: prints the
2992/// header block size and, for each block, the section type, the restart offset
2993/// (labelled `length`) and the restart count.
2994pub fn dump_reftable_blocks(path: &Path) -> Result<String> {
2995    let data = fs::read(path).map_err(Error::Io)?;
2996    if data.len() < HEADER_SIZE {
2997        return Err(Error::InvalidRef("reftable: file too small".into()));
2998    }
2999    if &data[0..4] != REFTABLE_MAGIC {
3000        return Err(Error::InvalidRef("reftable: bad magic".into()));
3001    }
3002    let version = data[4];
3003    let header_size = if version == 2 { 28 } else { 24 };
3004    let footer_size = if version == 2 { 72 } else { FOOTER_V1_SIZE };
3005    let block_size = ((data[5] as u32) << 16) | ((data[6] as u32) << 8) | (data[7] as u32);
3006
3007    let table_size = data.len().saturating_sub(footer_size);
3008
3009    let mut out = String::new();
3010    out.push_str("header:\n");
3011    out.push_str(&format!("  block_size: {block_size}\n"));
3012
3013    let mut section_type: u8 = 0;
3014    // First block starts at offset 0 with the file header skipped.
3015    let mut block_off: u64 = 0;
3016    let mut first = true;
3017
3018    loop {
3019        if !first {
3020            // table_iter_next_block advances by full_block_size; computed below.
3021            // `block_off` is updated at the end of the previous iteration.
3022        }
3023        if block_off as usize >= table_size {
3024            break;
3025        }
3026        let header_off = if block_off == 0 { header_size } else { 0 };
3027        let pos = block_off as usize + header_off;
3028        if pos + 1 > data.len() {
3029            break;
3030        }
3031        let block_type = data[pos];
3032        if !is_block_type(block_type) {
3033            break;
3034        }
3035
3036        // block_size field: be24 at pos+1.
3037        if pos + 4 > data.len() {
3038            break;
3039        }
3040        let blk_len =
3041            ((data[pos + 1] as u32) << 16) | ((data[pos + 2] as u32) << 8) | (data[pos + 3] as u32);
3042        let blk_len = blk_len as usize;
3043
3044        // Determine restart_count / restart_off from the (uncompressed) block.
3045        let (restart_off, restart_count, full_block_size) = if block_type == BLOCK_TYPE_LOG {
3046            // Log blocks store the uncompressed size in blk_len; the on-disk
3047            // data after the 4-byte header is zlib-compressed.
3048            let skip = 4 + header_off;
3049            let comp = &data[block_off as usize + skip..];
3050            let mut dec = flate2::read::DeflateDecoder::new(comp);
3051            let mut inflated = vec![0u8; blk_len.saturating_sub(skip)];
3052            // Read exactly the uncompressed payload.
3053            read_exact_inflate(&mut dec, &mut inflated)?;
3054            let consumed = dec.total_in() as usize;
3055            // restart trailer lives at the end of the (header + inflated) block.
3056            let mut full = vec![0u8; skip];
3057            full.extend_from_slice(&inflated);
3058            let rc = be16(&full, blk_len - 2) as usize;
3059            let roff = blk_len - 2 - 3 * rc;
3060            let fbs = skip + consumed;
3061            (roff, rc, fbs)
3062        } else {
3063            let abs = block_off as usize;
3064            if abs + blk_len < 2 {
3065                break;
3066            }
3067            let rc = be16(&data, abs + blk_len - 2) as usize;
3068            let roff = blk_len - 2 - 3 * rc;
3069            // Padded blocks advance by the table block size unless this is the
3070            // last block / unaligned / padded.
3071            let mut fbs = block_size as usize;
3072            if fbs == 0 {
3073                fbs = blk_len;
3074            } else if blk_len < fbs
3075                && abs + blk_len < data.len()
3076                && data.get(abs + blk_len) == Some(&0u8)
3077            {
3078                // padded block; advances by full table block size
3079            } else if blk_len < fbs {
3080                fbs = blk_len;
3081            }
3082            (roff, rc, fbs)
3083        };
3084
3085        if block_type != section_type {
3086            let section = match block_type {
3087                BLOCK_TYPE_LOG => "log",
3088                BLOCK_TYPE_REF => "ref",
3089                BLOCK_TYPE_OBJ => "obj",
3090                BLOCK_TYPE_INDEX => "idx",
3091                _ => return Err(Error::InvalidRef("reftable: bad block type".into())),
3092            };
3093            section_type = block_type;
3094            out.push_str(&format!("{section}:\n"));
3095        }
3096
3097        out.push_str(&format!("  - length: {restart_off}\n"));
3098        out.push_str(&format!("    restarts: {restart_count}\n"));
3099
3100        block_off += full_block_size as u64;
3101        first = false;
3102        if full_block_size == 0 {
3103            break;
3104        }
3105    }
3106
3107    Ok(out)
3108}
3109
3110fn is_block_type(t: u8) -> bool {
3111    t == BLOCK_TYPE_REF || t == BLOCK_TYPE_LOG || t == BLOCK_TYPE_OBJ || t == BLOCK_TYPE_INDEX
3112}
3113
3114fn be16(data: &[u8], off: usize) -> u16 {
3115    ((data[off] as u16) << 8) | (data[off + 1] as u16)
3116}
3117
3118fn read_exact_inflate<R: Read>(r: &mut R, buf: &mut [u8]) -> Result<()> {
3119    let mut filled = 0;
3120    while filled < buf.len() {
3121        match r.read(&mut buf[filled..]) {
3122            Ok(0) => break,
3123            Ok(n) => filled += n,
3124            Err(e) => return Err(Error::Zlib(e.to_string())),
3125        }
3126    }
3127    Ok(())
3128}
3129
3130// ---------------------------------------------------------------------------
3131// Utility functions
3132// ---------------------------------------------------------------------------
3133
3134/// Compute the CRC-32 of a byte slice (ISO 3309 / ITU-T V.42).
3135fn crc32(data: &[u8]) -> u32 {
3136    let mut crc: u32 = 0xffffffff;
3137    for &byte in data {
3138        crc ^= byte as u32;
3139        for _ in 0..8 {
3140            if crc & 1 != 0 {
3141                crc = (crc >> 1) ^ 0xedb88320;
3142            } else {
3143                crc >>= 1;
3144            }
3145        }
3146    }
3147    !crc
3148}
3149
3150/// Compute common prefix length between two byte slices.
3151fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
3152    a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
3153}
3154
3155/// Read a big-endian u24 from 3 bytes at `pos`.
3156fn read_u24(data: &[u8], pos: usize) -> usize {
3157    ((data[pos] as usize) << 16) | ((data[pos + 1] as usize) << 8) | (data[pos + 2] as usize)
3158}
3159
3160/// Read a big-endian u16 from 2 bytes at `pos`.
3161fn read_u16(data: &[u8], pos: usize) -> usize {
3162    ((data[pos] as usize) << 8) | (data[pos + 1] as usize)
3163}
3164
3165/// Parse the footer of a reftable file.
3166fn parse_footer(data: &[u8], version: u8) -> Result<Footer> {
3167    let footer_size = if version == 2 { 72 } else { FOOTER_V1_SIZE };
3168    if data.len() < footer_size {
3169        return Err(Error::InvalidRef("reftable: footer too small".into()));
3170    }
3171
3172    // Verify magic
3173    if &data[0..4] != REFTABLE_MAGIC {
3174        return Err(Error::InvalidRef("reftable: bad footer magic".into()));
3175    }
3176    let fver = data[4];
3177    if fver != version {
3178        return Err(Error::InvalidRef(format!(
3179            "reftable: footer version mismatch: header={version}, footer={fver}"
3180        )));
3181    }
3182
3183    // Footer-size validated above, so every fixed-width slice below is in
3184    // bounds; convert via `?` to surface any unexpected truncation as an error.
3185    let read_u64 = |slice: &[u8]| -> Result<u64> {
3186        let bytes: [u8; 8] = slice
3187            .try_into()
3188            .map_err(|_| Error::InvalidRef("reftable: truncated footer field".into()))?;
3189        Ok(u64::from_be_bytes(bytes))
3190    };
3191
3192    let block_size = ((data[5] as u32) << 16) | ((data[6] as u32) << 8) | (data[7] as u32);
3193    let min_update_index = read_u64(&data[8..16])?;
3194    let max_update_index = read_u64(&data[16..24])?;
3195
3196    let off = 24;
3197    let ref_index_position = read_u64(&data[off..off + 8])?;
3198    let obj_position_and_id_len = read_u64(&data[off + 8..off + 16])?;
3199    let obj_index_position = read_u64(&data[off + 16..off + 24])?;
3200    let log_position = read_u64(&data[off + 24..off + 32])?;
3201    let log_index_position = read_u64(&data[off + 32..off + 40])?;
3202
3203    // CRC-32 check
3204    let crc_bytes: [u8; 4] = data[footer_size - 4..footer_size]
3205        .try_into()
3206        .map_err(|_| Error::InvalidRef("reftable: truncated footer CRC".into()))?;
3207    let crc_stored = u32::from_be_bytes(crc_bytes);
3208    let crc_computed = crc32(&data[..footer_size - 4]);
3209    if crc_stored != crc_computed {
3210        return Err(Error::InvalidRef(format!(
3211            "reftable: footer CRC mismatch: stored={crc_stored:08x}, computed={crc_computed:08x}"
3212        )));
3213    }
3214
3215    Ok(Footer {
3216        version: fver,
3217        block_size,
3218        min_update_index,
3219        max_update_index,
3220        ref_index_position,
3221        obj_position_and_id_len,
3222        obj_index_position,
3223        log_position,
3224        log_index_position,
3225    })
3226}
3227
3228/// Parse an identity string like `"Name <email> 1234567890 +0100"`.
3229fn parse_identity_string(identity: &str) -> (String, String, u64, i16) {
3230    // Format: "Name <email> timestamp tz"
3231    let parts: Vec<&str> = identity.rsplitn(3, ' ').collect();
3232    if parts.len() < 3 {
3233        return (identity.to_owned(), String::new(), 0, 0);
3234    }
3235    let tz_str = parts[0]; // e.g. "+0100"
3236    let time_str = parts[1]; // e.g. "1234567890"
3237    let name_email = parts[2]; // e.g. "Name <email>"
3238
3239    let time_secs = time_str.parse::<u64>().unwrap_or(0);
3240
3241    // Parse timezone: +HHMM or -HHMM
3242    let tz_minutes = if tz_str.len() >= 5 {
3243        let sign = if tz_str.starts_with('-') { -1i16 } else { 1 };
3244        let hours = tz_str[1..3].parse::<i16>().unwrap_or(0);
3245        let mins = tz_str[3..5].parse::<i16>().unwrap_or(0);
3246        sign * (hours * 60 + mins)
3247    } else {
3248        0
3249    };
3250
3251    // Split name and email
3252    let (name, email) = if let Some(lt_pos) = name_email.find('<') {
3253        let name = name_email[..lt_pos].trim().to_owned();
3254        let email = if let Some(gt_pos) = name_email.find('>') {
3255            name_email[lt_pos + 1..gt_pos].to_owned()
3256        } else {
3257            name_email[lt_pos + 1..].to_owned()
3258        };
3259        (name, email)
3260    } else {
3261        (name_email.to_owned(), String::new())
3262    };
3263
3264    (name, email, time_secs, tz_minutes)
3265}
3266
3267// ---------------------------------------------------------------------------
3268// Tests
3269// ---------------------------------------------------------------------------
3270
3271#[cfg(test)]
3272mod tests {
3273    use super::*;
3274
3275    #[test]
3276    fn test_varint_roundtrip() {
3277        for val in [0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] {
3278            let mut buf = Vec::new();
3279            put_varint(val, &mut buf);
3280            let (decoded, end) = get_varint(&buf, 0).unwrap();
3281            assert_eq!(decoded, val, "varint roundtrip failed for {val}");
3282            assert_eq!(end, buf.len());
3283        }
3284    }
3285
3286    #[test]
3287    fn test_crc32() {
3288        // Known test vector: "123456789" => 0xCBF43926
3289        assert_eq!(crc32(b"123456789"), 0xCBF43926);
3290    }
3291
3292    #[test]
3293    fn test_empty_table() {
3294        let writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3295        let data = writer.finish().unwrap();
3296        let reader = ReftableReader::new(data).unwrap();
3297        let refs = reader.read_refs().unwrap();
3298        assert!(refs.is_empty());
3299    }
3300
3301    #[test]
3302    fn test_write_read_single_ref() {
3303        let oid = ObjectId::from_bytes(&[0xab; 20]).unwrap();
3304        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3305        writer
3306            .add_ref(RefRecord {
3307                name: "refs/heads/main".to_owned(),
3308                update_index: 1,
3309                value: RefValue::Val1(oid),
3310            })
3311            .unwrap();
3312        let data = writer.finish().unwrap();
3313
3314        let reader = ReftableReader::new(data).unwrap();
3315        let refs = reader.read_refs().unwrap();
3316        assert_eq!(refs.len(), 1);
3317        assert_eq!(refs[0].name, "refs/heads/main");
3318        assert_eq!(refs[0].value, RefValue::Val1(oid));
3319        assert_eq!(refs[0].update_index, 1);
3320    }
3321
3322    #[test]
3323    fn test_write_read_multiple_refs() {
3324        let oid1 = ObjectId::from_bytes(&[0x11; 20]).unwrap();
3325        let oid2 = ObjectId::from_bytes(&[0x22; 20]).unwrap();
3326        let oid3 = ObjectId::from_bytes(&[0x33; 20]).unwrap();
3327
3328        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3329        writer
3330            .add_ref(RefRecord {
3331                name: "refs/heads/a".to_owned(),
3332                update_index: 1,
3333                value: RefValue::Val1(oid1),
3334            })
3335            .unwrap();
3336        writer
3337            .add_ref(RefRecord {
3338                name: "refs/heads/b".to_owned(),
3339                update_index: 1,
3340                value: RefValue::Val1(oid2),
3341            })
3342            .unwrap();
3343        writer
3344            .add_ref(RefRecord {
3345                name: "refs/tags/v1.0".to_owned(),
3346                update_index: 1,
3347                value: RefValue::Val2(oid3, oid1),
3348            })
3349            .unwrap();
3350        let data = writer.finish().unwrap();
3351
3352        let reader = ReftableReader::new(data).unwrap();
3353        let refs = reader.read_refs().unwrap();
3354        assert_eq!(refs.len(), 3);
3355        assert_eq!(refs[0].name, "refs/heads/a");
3356        assert_eq!(refs[1].name, "refs/heads/b");
3357        assert_eq!(refs[2].name, "refs/tags/v1.0");
3358        assert_eq!(refs[2].value, RefValue::Val2(oid3, oid1));
3359    }
3360
3361    #[test]
3362    fn test_symref_roundtrip() {
3363        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3364        writer
3365            .add_ref(RefRecord {
3366                name: "refs/heads/sym".to_owned(),
3367                update_index: 1,
3368                value: RefValue::Symref("refs/heads/main".to_owned()),
3369            })
3370            .unwrap();
3371        let data = writer.finish().unwrap();
3372
3373        let reader = ReftableReader::new(data).unwrap();
3374        let refs = reader.read_refs().unwrap();
3375        assert_eq!(refs.len(), 1);
3376        assert_eq!(
3377            refs[0].value,
3378            RefValue::Symref("refs/heads/main".to_owned())
3379        );
3380    }
3381
3382    #[test]
3383    fn test_log_roundtrip() {
3384        let old_oid = ObjectId::from_bytes(&[0; 20]).unwrap();
3385        let new_oid = ObjectId::from_bytes(&[0xaa; 20]).unwrap();
3386
3387        let mut opts = WriteOptions::default();
3388        opts.write_log = true;
3389        let mut writer = ReftableWriter::new(opts, 1, 1);
3390        writer
3391            .add_log(LogRecord {
3392                refname: "refs/heads/main".to_owned(),
3393                update_index: 1,
3394                old_id: old_oid,
3395                new_id: new_oid,
3396                name: "Test User".to_owned(),
3397                email: "test@example.com".to_owned(),
3398                time_seconds: 1700000000,
3399                tz_offset: -480,
3400                message: "initial commit".to_owned(),
3401            })
3402            .unwrap();
3403        let data = writer.finish().unwrap();
3404
3405        let reader = ReftableReader::new(data).unwrap();
3406        let logs = reader.read_logs().unwrap();
3407        assert_eq!(logs.len(), 1);
3408        assert_eq!(logs[0].refname, "refs/heads/main");
3409        assert_eq!(logs[0].old_id, old_oid);
3410        assert_eq!(logs[0].new_id, new_oid);
3411        assert_eq!(logs[0].name, "Test User");
3412        assert_eq!(logs[0].email, "test@example.com");
3413        assert_eq!(logs[0].time_seconds, 1700000000);
3414        assert_eq!(logs[0].tz_offset, -480);
3415        // The reftable writer cleans messages the way git does: it appends a
3416        // trailing newline. `read_logs` returns the raw on-disk message (the
3417        // newline is only stripped when converting to a `ReflogEntry`).
3418        assert_eq!(logs[0].message, "initial commit\n");
3419    }
3420
3421    #[test]
3422    fn test_unaligned_table() {
3423        let oid = ObjectId::from_bytes(&[0xcc; 20]).unwrap();
3424        let opts = WriteOptions {
3425            // Unpadded (unaligned) blocks: like git's `unpadded` write option,
3426            // blocks are not padded out to the block size. A block_size of 0 is
3427            // resolved to the default at write time, so the reported block size
3428            // is the default rather than 0.
3429            unpadded: true,
3430            restart_interval: 16,
3431            write_log: false,
3432            ..WriteOptions::default()
3433        };
3434        let mut writer = ReftableWriter::new(opts, 1, 1);
3435        writer
3436            .add_ref(RefRecord {
3437                name: "refs/heads/main".to_owned(),
3438                update_index: 1,
3439                value: RefValue::Val1(oid),
3440            })
3441            .unwrap();
3442        let data = writer.finish().unwrap();
3443
3444        // An unpadded single-ref table is far smaller than one padded block.
3445        assert!(data.len() < DEFAULT_BLOCK_SIZE as usize);
3446
3447        let reader = ReftableReader::new(data).unwrap();
3448        let refs = reader.read_refs().unwrap();
3449        assert_eq!(refs.len(), 1);
3450        assert_eq!(refs[0].value, RefValue::Val1(oid));
3451    }
3452
3453    #[test]
3454    fn test_parse_identity() {
3455        let (name, email, ts, tz) =
3456            parse_identity_string("Test User <test@example.com> 1700000000 -0800");
3457        assert_eq!(name, "Test User");
3458        assert_eq!(email, "test@example.com");
3459        assert_eq!(ts, 1700000000);
3460        assert_eq!(tz, -480);
3461    }
3462
3463    #[test]
3464    fn test_deletion_record() {
3465        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
3466        writer
3467            .add_ref(RefRecord {
3468                name: "refs/heads/gone".to_owned(),
3469                update_index: 1,
3470                value: RefValue::Deletion,
3471            })
3472            .unwrap();
3473        let data = writer.finish().unwrap();
3474
3475        let reader = ReftableReader::new(data).unwrap();
3476        let refs = reader.read_refs().unwrap();
3477        assert_eq!(refs.len(), 1);
3478        assert_eq!(refs[0].value, RefValue::Deletion);
3479    }
3480}