Skip to main content

grit_lib/
reftable.rs

1//! Reftable format — binary reference storage.
2//!
3//! Implements the [reftable file format](https://git-scm.com/docs/reftable)
4//! for efficient, sorted reference storage.  A reftable file contains
5//! ref blocks (sorted ref records with prefix compression), optional log
6//! blocks (reflog entries), optional index blocks, and a footer.
7//!
8//! # Architecture
9//!
10//! - [`ReftableWriter`] writes a single `.ref` (or `.log`) reftable file.
11//! - [`ReftableReader`] reads and searches a single reftable file.
12//! - [`ReftableStack`] manages the `tables.list` stack, providing a
13//!   merged view of all tables and auto-compaction on writes.
14//!
15//! # On-disk layout
16//!
17//! ```text
18//! first_block { header, first_ref_block }
19//! ref_block*
20//! ref_index?
21//! obj_block*    (not yet implemented)
22//! obj_index?    (not yet implemented)
23//! log_block*
24//! log_index?
25//! footer
26//! ```
27
28use std::collections::BTreeMap;
29use std::fs;
30use std::io::{Read, Write};
31use std::path::{Path, PathBuf};
32
33use crate::error::{Error, Result};
34use crate::objects::ObjectId;
35
36// ---------------------------------------------------------------------------
37// Constants
38// ---------------------------------------------------------------------------
39
40/// Magic bytes at the start of every reftable file.
41const REFTABLE_MAGIC: &[u8; 4] = b"REFT";
42
43/// File header size (version 1): magic(4) + version(1) + block_size(3)
44/// + min_update_index(8) + max_update_index(8) = 24 bytes.
45const HEADER_SIZE: usize = 24;
46
47/// Footer size for version 1.
48const FOOTER_V1_SIZE: usize = 68;
49
50/// Block type: ref block.
51const BLOCK_TYPE_REF: u8 = b'r';
52/// Block type: index block.
53const BLOCK_TYPE_INDEX: u8 = b'i';
54/// Block type: log block (zlib-compressed).
55const BLOCK_TYPE_LOG: u8 = b'g';
56
57/// Value types encoded in the low 3 bits of the suffix_length varint.
58const VALUE_DELETION: u8 = 0;
59const VALUE_ONE_OID: u8 = 1;
60const VALUE_TWO_OID: u8 = 2;
61const VALUE_SYMREF: u8 = 3;
62
63/// Hash size (SHA-1).
64const HASH_SIZE: usize = 20;
65
66/// Default block size when none is configured (4 KiB).
67const DEFAULT_BLOCK_SIZE: u32 = 4096;
68
69/// How many records between restart points.
70const RESTART_INTERVAL: usize = 16;
71
72// ---------------------------------------------------------------------------
73// Varint encoding (Git pack-style)
74// ---------------------------------------------------------------------------
75
76/// Encode a u64 as a varint into `out`. Returns number of bytes written.
77fn put_varint(mut val: u64, out: &mut Vec<u8>) -> usize {
78    // First, collect 7-bit groups.
79    let mut buf = [0u8; 10];
80    let mut i = 0;
81    buf[i] = (val & 0x7f) as u8;
82    i += 1;
83    val >>= 7;
84    while val > 0 {
85        val -= 1;
86        buf[i] = (val & 0x7f) as u8;
87        i += 1;
88        val >>= 7;
89    }
90    // Write in reverse, with continuation bits.
91    let len = i;
92    for j in (1..len).rev() {
93        out.push(buf[j] | 0x80);
94    }
95    out.push(buf[0]);
96    len
97}
98
99/// Decode a varint from `data` starting at `pos`. Returns (value, new_pos).
100fn get_varint(data: &[u8], mut pos: usize) -> Result<(u64, usize)> {
101    if pos >= data.len() {
102        return Err(Error::InvalidRef("varint: unexpected end of data".into()));
103    }
104    let mut val = (data[pos] & 0x7f) as u64;
105    while data[pos] & 0x80 != 0 {
106        pos += 1;
107        if pos >= data.len() {
108            return Err(Error::InvalidRef("varint: unexpected end of data".into()));
109        }
110        val = ((val + 1) << 7) | (data[pos] & 0x7f) as u64;
111    }
112    Ok((val, pos + 1))
113}
114
115// ---------------------------------------------------------------------------
116// Ref record types
117// ---------------------------------------------------------------------------
118
119/// A single reference record as stored in a reftable.
120#[derive(Debug, Clone, PartialEq, Eq)]
121pub enum RefValue {
122    /// Deletion tombstone (value_type 0x0).
123    Deletion,
124    /// A direct ref pointing to one OID (value_type 0x1).
125    Val1(ObjectId),
126    /// An annotated tag: value + peeled target (value_type 0x2).
127    Val2(ObjectId, ObjectId),
128    /// A symbolic reference (value_type 0x3).
129    Symref(String),
130}
131
132/// A decoded ref record.
133#[derive(Debug, Clone)]
134pub struct RefRecord {
135    /// Full reference name.
136    pub name: String,
137    /// Update index (absolute).
138    pub update_index: u64,
139    /// The value.
140    pub value: RefValue,
141}
142
143/// A decoded log record.
144#[derive(Debug, Clone)]
145pub struct LogRecord {
146    /// Reference name.
147    pub refname: String,
148    /// Update index.
149    pub update_index: u64,
150    /// Old object ID.
151    pub old_id: ObjectId,
152    /// New object ID.
153    pub new_id: ObjectId,
154    /// Committer name.
155    pub name: String,
156    /// Committer email (without angle brackets).
157    pub email: String,
158    /// Time in seconds since epoch.
159    pub time_seconds: u64,
160    /// Timezone offset in minutes (signed).
161    pub tz_offset: i16,
162    /// Log message.
163    pub message: String,
164}
165
166/// Write options for reftable creation.
167#[derive(Debug, Clone)]
168pub struct WriteOptions {
169    /// Block size in bytes. 0 means unaligned (variable-sized blocks).
170    pub block_size: u32,
171    /// Restart interval (number of records between restart points).
172    pub restart_interval: usize,
173    /// Whether to write log blocks.
174    pub write_log: bool,
175}
176
177impl Default for WriteOptions {
178    fn default() -> Self {
179        Self {
180            block_size: DEFAULT_BLOCK_SIZE,
181            restart_interval: RESTART_INTERVAL,
182            write_log: true,
183        }
184    }
185}
186
187// ---------------------------------------------------------------------------
188// Writer
189// ---------------------------------------------------------------------------
190
191/// Writes a single reftable file.
192///
193/// Usage:
194/// ```ignore
195/// let mut w = ReftableWriter::new(opts, min_idx, max_idx);
196/// w.add_ref(&RefRecord { .. })?;
197/// w.add_log(&LogRecord { .. })?;
198/// let bytes = w.finish()?;
199/// ```
200pub struct ReftableWriter {
201    opts: WriteOptions,
202    min_update_index: u64,
203    max_update_index: u64,
204
205    // Accumulated ref records (must be added in sorted order).
206    refs: Vec<RefRecord>,
207    // Accumulated log records.
208    logs: Vec<LogRecord>,
209}
210
211impl ReftableWriter {
212    /// Create a new writer.
213    pub fn new(opts: WriteOptions, min_update_index: u64, max_update_index: u64) -> Self {
214        Self {
215            opts,
216            min_update_index,
217            max_update_index,
218            refs: Vec::new(),
219            logs: Vec::new(),
220        }
221    }
222
223    /// Add a ref record. Records **must** be added in sorted name order.
224    pub fn add_ref(&mut self, rec: RefRecord) -> Result<()> {
225        if let Some(last) = self.refs.last() {
226            if rec.name <= last.name {
227                return Err(Error::InvalidRef(format!(
228                    "reftable: refs must be sorted, got '{}' after '{}'",
229                    rec.name, last.name
230                )));
231            }
232        }
233        self.refs.push(rec);
234        Ok(())
235    }
236
237    /// Add a log record.
238    pub fn add_log(&mut self, rec: LogRecord) -> Result<()> {
239        self.logs.push(rec);
240        Ok(())
241    }
242
243    /// Finish writing and return the complete reftable file bytes.
244    pub fn finish(mut self) -> Result<Vec<u8>> {
245        let mut out = Vec::new();
246        let block_size = self.opts.block_size;
247
248        // --- Header (24 bytes) ---
249        out.extend_from_slice(REFTABLE_MAGIC);
250        out.push(1); // version
251        out.push(((block_size >> 16) & 0xff) as u8);
252        out.push(((block_size >> 8) & 0xff) as u8);
253        out.push((block_size & 0xff) as u8);
254        out.extend_from_slice(&self.min_update_index.to_be_bytes());
255        out.extend_from_slice(&self.max_update_index.to_be_bytes());
256
257        assert_eq!(out.len(), HEADER_SIZE);
258
259        // --- Ref blocks ---
260        let ref_block_positions = self.write_ref_blocks(&mut out)?;
261
262        // --- Ref index (if ≥ 4 ref blocks) ---
263        let ref_index_position = if ref_block_positions.len() >= 4 {
264            let pos = out.len() as u64;
265            self.write_ref_index(&mut out, &ref_block_positions)?;
266            pos
267        } else {
268            0
269        };
270
271        // --- Log blocks ---
272        let log_position = if self.opts.write_log && !self.logs.is_empty() {
273            let pos = out.len() as u64;
274            self.write_log_blocks(&mut out)?;
275            pos
276        } else {
277            0
278        };
279
280        // --- Footer ---
281        let footer_start = out.len();
282        // Repeat header
283        out.extend_from_slice(REFTABLE_MAGIC);
284        out.push(1);
285        out.push(((block_size >> 16) & 0xff) as u8);
286        out.push(((block_size >> 8) & 0xff) as u8);
287        out.push((block_size & 0xff) as u8);
288        out.extend_from_slice(&self.min_update_index.to_be_bytes());
289        out.extend_from_slice(&self.max_update_index.to_be_bytes());
290
291        // ref_index_position
292        out.extend_from_slice(&ref_index_position.to_be_bytes());
293        // (obj_position << 5) | obj_id_len — no obj blocks yet
294        out.extend_from_slice(&0u64.to_be_bytes());
295        // obj_index_position
296        out.extend_from_slice(&0u64.to_be_bytes());
297        // log_position
298        out.extend_from_slice(&log_position.to_be_bytes());
299        // log_index_position (we skip log index for simplicity)
300        out.extend_from_slice(&0u64.to_be_bytes());
301
302        // CRC-32 of footer (everything from footer_start to here)
303        let crc = crc32(&out[footer_start..]);
304        out.extend_from_slice(&crc.to_be_bytes());
305
306        Ok(out)
307    }
308
309    /// Write ref blocks, returning (block_start_position, last_refname) per block.
310    fn write_ref_blocks(&self, out: &mut Vec<u8>) -> Result<Vec<(u64, String)>> {
311        if self.refs.is_empty() {
312            return Ok(Vec::new());
313        }
314
315        let block_size = self.opts.block_size as usize;
316        let restart_interval = self.opts.restart_interval;
317        let mut block_positions: Vec<(u64, String)> = Vec::new();
318        let mut i = 0;
319
320        while i < self.refs.len() {
321            let block_start = out.len();
322            let is_first_block = block_start == HEADER_SIZE;
323
324            // We accumulate records into a buffer, then write the block.
325            let mut records_buf = Vec::new();
326            let mut restart_offsets: Vec<u32> = Vec::new();
327            let mut prev_name = String::new();
328            let mut count = 0;
329            let mut last_name = String::new();
330
331            while i < self.refs.len() {
332                let rec = &self.refs[i];
333                let is_restart = count % restart_interval == 0;
334
335                let mut rec_buf = Vec::new();
336                let prefix_len = if is_restart {
337                    0
338                } else {
339                    common_prefix_len(prev_name.as_bytes(), rec.name.as_bytes())
340                };
341                let suffix = &rec.name.as_bytes()[prefix_len..];
342                let suffix_len = suffix.len();
343
344                let value_type = match &rec.value {
345                    RefValue::Deletion => VALUE_DELETION,
346                    RefValue::Val1(_) => VALUE_ONE_OID,
347                    RefValue::Val2(_, _) => VALUE_TWO_OID,
348                    RefValue::Symref(_) => VALUE_SYMREF,
349                };
350
351                put_varint(prefix_len as u64, &mut rec_buf);
352                put_varint(((suffix_len as u64) << 3) | value_type as u64, &mut rec_buf);
353                rec_buf.extend_from_slice(suffix);
354
355                let update_index_delta = rec.update_index.saturating_sub(self.min_update_index);
356                put_varint(update_index_delta, &mut rec_buf);
357
358                match &rec.value {
359                    RefValue::Deletion => {}
360                    RefValue::Val1(oid) => {
361                        rec_buf.extend_from_slice(oid.as_bytes());
362                    }
363                    RefValue::Val2(oid, peeled) => {
364                        rec_buf.extend_from_slice(oid.as_bytes());
365                        rec_buf.extend_from_slice(peeled.as_bytes());
366                    }
367                    RefValue::Symref(target) => {
368                        put_varint(target.len() as u64, &mut rec_buf);
369                        rec_buf.extend_from_slice(target.as_bytes());
370                    }
371                }
372
373                // Check if adding this record would overflow the block.
374                // Block overhead: 4 (block header) + restart table
375                let restart_count = restart_offsets.len() + if is_restart { 1 } else { 0 };
376                let trailer_size = restart_count * 3 + 2;
377                let total = 4 + records_buf.len() + rec_buf.len() + trailer_size;
378                let effective_block_size = if is_first_block && block_size > 0 {
379                    block_size // first block includes header
380                } else if block_size > 0 {
381                    block_size
382                } else {
383                    usize::MAX // unaligned
384                };
385                // For first block, block_len includes the 24-byte header
386                let block_len = if is_first_block {
387                    HEADER_SIZE + total
388                } else {
389                    total
390                };
391
392                if block_size > 0 && block_len > effective_block_size && count > 0 {
393                    break; // Start a new block
394                }
395
396                if is_restart {
397                    let offset = if is_first_block {
398                        HEADER_SIZE + 4 + records_buf.len()
399                    } else {
400                        4 + records_buf.len()
401                    };
402                    restart_offsets.push(offset as u32);
403                }
404
405                records_buf.extend_from_slice(&rec_buf);
406                last_name = rec.name.clone();
407                prev_name = rec.name.clone();
408                count += 1;
409                i += 1;
410            }
411
412            if count == 0 {
413                return Err(Error::InvalidRef(
414                    "reftable: ref record too large for block size".into(),
415                ));
416            }
417
418            // Ensure at least one restart point
419            if restart_offsets.is_empty() {
420                restart_offsets.push(if is_first_block {
421                    HEADER_SIZE as u32 + 4
422                } else {
423                    4
424                });
425            }
426
427            // Compute block_len
428            let trailer_size = restart_offsets.len() * 3 + 2;
429            let block_len_val = if is_first_block {
430                HEADER_SIZE + 4 + records_buf.len() + trailer_size
431            } else {
432                4 + records_buf.len() + trailer_size
433            };
434
435            // Write block header: type(1) + block_len(3)
436            out.push(BLOCK_TYPE_REF);
437            out.push(((block_len_val >> 16) & 0xff) as u8);
438            out.push(((block_len_val >> 8) & 0xff) as u8);
439            out.push((block_len_val & 0xff) as u8);
440
441            // Write records
442            out.extend_from_slice(&records_buf);
443
444            // Write restart offsets (3 bytes each)
445            for &off in &restart_offsets {
446                out.push(((off >> 16) & 0xff) as u8);
447                out.push(((off >> 8) & 0xff) as u8);
448                out.push((off & 0xff) as u8);
449            }
450
451            // Write restart count (2 bytes)
452            let rc = restart_offsets.len() as u16;
453            out.push((rc >> 8) as u8);
454            out.push((rc & 0xff) as u8);
455
456            // Pad to block alignment if needed
457            if block_size > 0 {
458                let written = out.len() - block_start;
459                let target = if is_first_block {
460                    block_size
461                } else {
462                    block_size
463                };
464                if written < target {
465                    out.resize(block_start + target, 0);
466                }
467            }
468
469            block_positions.push((block_start as u64, last_name.clone()));
470        }
471
472        Ok(block_positions)
473    }
474
475    /// Write a single-level ref index block.
476    fn write_ref_index(&self, out: &mut Vec<u8>, block_positions: &[(u64, String)]) -> Result<()> {
477        let mut records_buf = Vec::new();
478        let mut restart_offsets: Vec<u32> = Vec::new();
479        let mut prev_name = String::new();
480
481        for (idx, (block_pos, last_ref)) in block_positions.iter().enumerate() {
482            let is_restart = idx % self.opts.restart_interval == 0;
483            let prefix_len = if is_restart {
484                0
485            } else {
486                common_prefix_len(prev_name.as_bytes(), last_ref.as_bytes())
487            };
488            let suffix = &last_ref.as_bytes()[prefix_len..];
489
490            if is_restart {
491                restart_offsets.push(4 + records_buf.len() as u32);
492            }
493
494            put_varint(prefix_len as u64, &mut records_buf);
495            put_varint((suffix.len() as u64) << 3, &mut records_buf);
496            records_buf.extend_from_slice(suffix);
497            put_varint(*block_pos, &mut records_buf);
498
499            prev_name = last_ref.clone();
500        }
501
502        if restart_offsets.is_empty() {
503            restart_offsets.push(4);
504        }
505
506        let trailer_size = restart_offsets.len() * 3 + 2;
507        let block_len = 4 + records_buf.len() + trailer_size;
508
509        out.push(BLOCK_TYPE_INDEX);
510        out.push(((block_len >> 16) & 0xff) as u8);
511        out.push(((block_len >> 8) & 0xff) as u8);
512        out.push((block_len & 0xff) as u8);
513
514        out.extend_from_slice(&records_buf);
515
516        for &off in &restart_offsets {
517            out.push(((off >> 16) & 0xff) as u8);
518            out.push(((off >> 8) & 0xff) as u8);
519            out.push((off & 0xff) as u8);
520        }
521        let rc = restart_offsets.len() as u16;
522        out.push((rc >> 8) as u8);
523        out.push((rc & 0xff) as u8);
524
525        Ok(())
526    }
527
528    /// Write log blocks (zlib-compressed).
529    fn write_log_blocks(&mut self, out: &mut Vec<u8>) -> Result<()> {
530        use flate2::write::DeflateEncoder;
531        use flate2::Compression;
532
533        // Sort logs by (refname, reverse update_index)
534        self.logs.sort_by(|a, b| {
535            a.refname
536                .cmp(&b.refname)
537                .then_with(|| b.update_index.cmp(&a.update_index))
538        });
539
540        // Build the uncompressed log block content
541        let mut inner = Vec::new();
542        let mut restart_offsets: Vec<u32> = Vec::new();
543        let mut prev_key = Vec::<u8>::new();
544
545        for (idx, log) in self.logs.iter().enumerate() {
546            let is_restart = idx % self.opts.restart_interval == 0;
547
548            // Log key: refname \0 reverse_int64(update_index)
549            let mut key = Vec::new();
550            key.extend_from_slice(log.refname.as_bytes());
551            key.push(0);
552            key.extend_from_slice(&(0xffffffffffffffffu64 - log.update_index).to_be_bytes());
553
554            let prefix_len = if is_restart {
555                0
556            } else {
557                common_prefix_len(&prev_key, &key)
558            };
559            let suffix = &key[prefix_len..];
560
561            if is_restart {
562                // Offset within the decompressed block (4 byte header + inner.len())
563                restart_offsets.push(4 + inner.len() as u32);
564            }
565
566            // log_type = 1 (standard reflog data)
567            let log_type: u8 = 1;
568            put_varint(prefix_len as u64, &mut inner);
569            put_varint(((suffix.len() as u64) << 3) | log_type as u64, &mut inner);
570            inner.extend_from_slice(suffix);
571
572            // log_data
573            inner.extend_from_slice(log.old_id.as_bytes());
574            inner.extend_from_slice(log.new_id.as_bytes());
575            put_varint(log.name.len() as u64, &mut inner);
576            inner.extend_from_slice(log.name.as_bytes());
577            put_varint(log.email.len() as u64, &mut inner);
578            inner.extend_from_slice(log.email.as_bytes());
579            put_varint(log.time_seconds, &mut inner);
580            inner.extend_from_slice(&log.tz_offset.to_be_bytes());
581            put_varint(log.message.len() as u64, &mut inner);
582            inner.extend_from_slice(log.message.as_bytes());
583
584            prev_key = key;
585        }
586
587        if restart_offsets.is_empty() {
588            restart_offsets.push(4);
589        }
590
591        // Append restart table
592        for &off in &restart_offsets {
593            inner.push(((off >> 16) & 0xff) as u8);
594            inner.push(((off >> 8) & 0xff) as u8);
595            inner.push((off & 0xff) as u8);
596        }
597        let rc = restart_offsets.len() as u16;
598        inner.push((rc >> 8) as u8);
599        inner.push((rc & 0xff) as u8);
600
601        // block_len is the *inflated* size including the 4-byte block header
602        let block_len = 4 + inner.len();
603
604        // Deflate the inner content
605        let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
606        encoder
607            .write_all(&inner)
608            .map_err(|e| Error::Zlib(e.to_string()))?;
609        let compressed = encoder.finish().map_err(|e| Error::Zlib(e.to_string()))?;
610
611        // Write block header + compressed data
612        out.push(BLOCK_TYPE_LOG);
613        out.push(((block_len >> 16) & 0xff) as u8);
614        out.push(((block_len >> 8) & 0xff) as u8);
615        out.push((block_len & 0xff) as u8);
616        out.extend_from_slice(&compressed);
617
618        Ok(())
619    }
620}
621
622// ---------------------------------------------------------------------------
623// Reader
624// ---------------------------------------------------------------------------
625
626/// Reads a single reftable file from a byte buffer.
627pub struct ReftableReader {
628    data: Vec<u8>,
629    version: u8,
630    block_size: u32,
631    min_update_index: u64,
632    max_update_index: u64,
633    ref_index_position: u64,
634    log_position: u64,
635}
636
637/// Parsed footer fields.
638#[derive(Debug)]
639#[allow(dead_code)]
640struct Footer {
641    version: u8,
642    block_size: u32,
643    min_update_index: u64,
644    max_update_index: u64,
645    ref_index_position: u64,
646    obj_position_and_id_len: u64,
647    obj_index_position: u64,
648    log_position: u64,
649    log_index_position: u64,
650}
651
652impl ReftableReader {
653    /// Open a reftable from bytes.
654    pub fn new(data: Vec<u8>) -> Result<Self> {
655        if data.len() < HEADER_SIZE + FOOTER_V1_SIZE {
656            // Could be an empty table (header + footer only = 24 + 68 = 92)
657            if data.len() < HEADER_SIZE {
658                return Err(Error::InvalidRef("reftable: file too small".into()));
659            }
660        }
661
662        // Parse header
663        if &data[0..4] != REFTABLE_MAGIC {
664            return Err(Error::InvalidRef("reftable: bad magic".into()));
665        }
666        let version = data[4];
667        if version != 1 && version != 2 {
668            return Err(Error::InvalidRef(format!(
669                "reftable: unsupported version {version}"
670            )));
671        }
672        let _block_size = ((data[5] as u32) << 16) | ((data[6] as u32) << 8) | (data[7] as u32);
673        let _min_update_index = u64::from_be_bytes(data[8..16].try_into().unwrap());
674        let _max_update_index = u64::from_be_bytes(data[16..24].try_into().unwrap());
675
676        // Parse footer
677        let footer_size = if version == 2 { 72 } else { FOOTER_V1_SIZE };
678        if data.len() < footer_size {
679            return Err(Error::InvalidRef(
680                "reftable: file too small for footer".into(),
681            ));
682        }
683        let footer_start = data.len() - footer_size;
684        let footer = parse_footer(&data[footer_start..], version)?;
685
686        Ok(Self {
687            data,
688            version,
689            block_size: footer.block_size,
690            min_update_index: footer.min_update_index,
691            max_update_index: footer.max_update_index,
692            ref_index_position: footer.ref_index_position,
693            log_position: footer.log_position,
694        })
695    }
696
697    /// Read all ref records from the table.
698    pub fn read_refs(&self) -> Result<Vec<RefRecord>> {
699        let mut refs = Vec::new();
700        let footer_size = if self.version == 2 {
701            72
702        } else {
703            FOOTER_V1_SIZE
704        };
705        let file_end = self.data.len() - footer_size;
706
707        // Determine where ref blocks end
708        let ref_end = if self.ref_index_position > 0 {
709            self.ref_index_position as usize
710        } else if self.log_position > 0 {
711            self.log_position as usize
712        } else {
713            file_end
714        };
715
716        let mut pos = 0usize;
717        // Skip the header — first ref block starts at offset 24 but shares
718        // the same physical block as the header.
719        if pos < HEADER_SIZE {
720            pos = HEADER_SIZE;
721        }
722
723        while pos < ref_end {
724            if pos >= self.data.len() {
725                break;
726            }
727            let block_type = self.data[pos];
728            if block_type == 0 {
729                // Padding — skip to next block boundary
730                if self.block_size > 0 {
731                    let bs = self.block_size as usize;
732                    pos = ((pos / bs) + 1) * bs;
733                    continue;
734                } else {
735                    break;
736                }
737            }
738            if block_type != BLOCK_TYPE_REF {
739                break;
740            }
741
742            let block_len = read_u24(&self.data, pos + 1);
743            // Determine the data range for this block
744            let block_data_start = pos + 4; // after type(1) + len(3)
745
746            // The first block's block_len includes the 24-byte header
747            let is_first = pos == HEADER_SIZE;
748            let records_end = if is_first {
749                // block_len is from file start
750                block_len
751            } else {
752                pos + block_len
753            };
754
755            if records_end > ref_end {
756                break;
757            }
758
759            // Read restart count (last 2 bytes before padding)
760            let rc = read_u16(&self.data, records_end - 2);
761            // Restart table is rc * 3 bytes before the restart_count
762            let restart_table_start = records_end - 2 - (rc * 3);
763
764            // Read records from block_data_start to restart_table_start
765            let mut rpos = block_data_start;
766            let mut prev_name = Vec::<u8>::new();
767
768            while rpos < restart_table_start {
769                let (rec, new_pos) =
770                    decode_ref_record(&self.data, rpos, &prev_name, self.min_update_index)?;
771                prev_name = rec.name.as_bytes().to_vec();
772                refs.push(rec);
773                rpos = new_pos;
774            }
775
776            // Advance to next block
777            if self.block_size > 0 {
778                let bs = self.block_size as usize;
779                if is_first {
780                    pos = bs;
781                } else {
782                    pos += bs;
783                }
784            } else {
785                pos = records_end;
786            }
787        }
788
789        Ok(refs)
790    }
791
792    /// Look up a single ref by name.
793    pub fn lookup_ref(&self, name: &str) -> Result<Option<RefRecord>> {
794        // Simple: scan all refs. For large files the index would speed this up.
795        let refs = self.read_refs()?;
796        Ok(refs.into_iter().find(|r| r.name == name))
797    }
798
799    /// Read all log records from the table.
800    pub fn read_logs(&self) -> Result<Vec<LogRecord>> {
801        if self.log_position == 0 {
802            return Ok(Vec::new());
803        }
804
805        let footer_size = if self.version == 2 {
806            72
807        } else {
808            FOOTER_V1_SIZE
809        };
810        let file_end = self.data.len() - footer_size;
811        let mut pos = self.log_position as usize;
812        let mut logs = Vec::new();
813
814        while pos < file_end {
815            if pos >= self.data.len() {
816                break;
817            }
818            let block_type = self.data[pos];
819            if block_type != BLOCK_TYPE_LOG {
820                break;
821            }
822            let block_len = read_u24(&self.data, pos + 1);
823            let compressed_start = pos + 4;
824
825            // The inflated size is block_len - 4 (block_len includes the 4-byte header)
826            let inflated_size = block_len - 4;
827
828            // Decompress
829            use flate2::read::DeflateDecoder;
830            let remaining = &self.data[compressed_start..file_end];
831            let mut decoder = DeflateDecoder::new(remaining);
832            let mut inflated = vec![0u8; inflated_size];
833            decoder
834                .read_exact(&mut inflated)
835                .map_err(|e| Error::Zlib(e.to_string()))?;
836
837            // How many compressed bytes were consumed?
838            let consumed = decoder.total_in() as usize;
839
840            // Parse log records from inflated data
841            // Read restart_count from end
842            if inflated.len() < 2 {
843                break;
844            }
845            let rc = read_u16(&inflated, inflated.len() - 2);
846            let restart_table_start = inflated.len() - 2 - (rc * 3);
847
848            let mut rpos = 0usize;
849            let mut prev_key = Vec::<u8>::new();
850
851            while rpos < restart_table_start {
852                let (log, new_pos) = decode_log_record(&inflated, rpos, &prev_key)?;
853                // Reconstruct key for prefix compression
854                let mut key = Vec::new();
855                key.extend_from_slice(log.refname.as_bytes());
856                key.push(0);
857                key.extend_from_slice(&(0xffffffffffffffffu64 - log.update_index).to_be_bytes());
858                prev_key = key;
859                logs.push(log);
860                rpos = new_pos;
861            }
862
863            pos = compressed_start + consumed;
864        }
865
866        Ok(logs)
867    }
868
869    /// Get the block size from the header.
870    pub fn block_size(&self) -> u32 {
871        self.block_size
872    }
873
874    /// Get the min update index.
875    pub fn min_update_index(&self) -> u64 {
876        self.min_update_index
877    }
878
879    /// Get the max update index.
880    pub fn max_update_index(&self) -> u64 {
881        self.max_update_index
882    }
883}
884
885// ---------------------------------------------------------------------------
886// Record decoding helpers
887// ---------------------------------------------------------------------------
888
889fn decode_ref_record(
890    data: &[u8],
891    pos: usize,
892    prev_name: &[u8],
893    min_update_index: u64,
894) -> Result<(RefRecord, usize)> {
895    let (prefix_len, p) = get_varint(data, pos)?;
896    let (suffix_and_type, mut p) = get_varint(data, p)?;
897    let suffix_len = (suffix_and_type >> 3) as usize;
898    let value_type = (suffix_and_type & 0x7) as u8;
899
900    // Reconstruct name
901    let mut name = Vec::with_capacity(prefix_len as usize + suffix_len);
902    if prefix_len > 0 {
903        if (prefix_len as usize) > prev_name.len() {
904            return Err(Error::InvalidRef(
905                "reftable: prefix_len exceeds prev name".into(),
906            ));
907        }
908        name.extend_from_slice(&prev_name[..prefix_len as usize]);
909    }
910    if p + suffix_len > data.len() {
911        return Err(Error::InvalidRef("reftable: suffix overflows block".into()));
912    }
913    name.extend_from_slice(&data[p..p + suffix_len]);
914    p += suffix_len;
915
916    let name_str = String::from_utf8(name)
917        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in ref name".into()))?;
918
919    let (update_index_delta, mut p) = get_varint(data, p)?;
920    let update_index = min_update_index + update_index_delta;
921
922    let value = match value_type {
923        VALUE_DELETION => RefValue::Deletion,
924        VALUE_ONE_OID => {
925            if p + HASH_SIZE > data.len() {
926                return Err(Error::InvalidRef("reftable: truncated OID".into()));
927            }
928            let oid = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
929            p += HASH_SIZE;
930            RefValue::Val1(oid)
931        }
932        VALUE_TWO_OID => {
933            if p + 2 * HASH_SIZE > data.len() {
934                return Err(Error::InvalidRef("reftable: truncated OID pair".into()));
935            }
936            let oid = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
937            p += HASH_SIZE;
938            let peeled = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
939            p += HASH_SIZE;
940            RefValue::Val2(oid, peeled)
941        }
942        VALUE_SYMREF => {
943            let (target_len, p2) = get_varint(data, p)?;
944            p = p2;
945            let target_len = target_len as usize;
946            if p + target_len > data.len() {
947                return Err(Error::InvalidRef(
948                    "reftable: truncated symref target".into(),
949                ));
950            }
951            let target = String::from_utf8(data[p..p + target_len].to_vec())
952                .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in symref".into()))?;
953            p += target_len;
954            RefValue::Symref(target)
955        }
956        _ => {
957            return Err(Error::InvalidRef(format!(
958                "reftable: unknown value_type {value_type}"
959            )));
960        }
961    };
962
963    Ok((
964        RefRecord {
965            name: name_str,
966            update_index,
967            value,
968        },
969        p,
970    ))
971}
972
973fn decode_log_record(data: &[u8], pos: usize, prev_key: &[u8]) -> Result<(LogRecord, usize)> {
974    let (prefix_len, p) = get_varint(data, pos)?;
975    let (suffix_and_type, mut p) = get_varint(data, p)?;
976    let suffix_len = (suffix_and_type >> 3) as usize;
977    let log_type = (suffix_and_type & 0x7) as u8;
978
979    // Reconstruct key
980    let mut key = Vec::with_capacity(prefix_len as usize + suffix_len);
981    if prefix_len > 0 {
982        if (prefix_len as usize) > prev_key.len() {
983            return Err(Error::InvalidRef(
984                "reftable: log prefix_len exceeds prev key".into(),
985            ));
986        }
987        key.extend_from_slice(&prev_key[..prefix_len as usize]);
988    }
989    if p + suffix_len > data.len() {
990        return Err(Error::InvalidRef("reftable: log suffix overflows".into()));
991    }
992    key.extend_from_slice(&data[p..p + suffix_len]);
993    p += suffix_len;
994
995    // Parse key: refname \0 reverse_int64(update_index)
996    let null_pos = key
997        .iter()
998        .position(|&b| b == 0)
999        .ok_or_else(|| Error::InvalidRef("reftable: log key missing null separator".into()))?;
1000    let refname = String::from_utf8(key[..null_pos].to_vec())
1001        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log refname".into()))?;
1002    if null_pos + 9 > key.len() {
1003        return Err(Error::InvalidRef("reftable: log key too short".into()));
1004    }
1005    let reversed_idx = u64::from_be_bytes(key[null_pos + 1..null_pos + 9].try_into().unwrap());
1006    let update_index = 0xffffffffffffffffu64 - reversed_idx;
1007
1008    if log_type == 0 {
1009        // Deletion
1010        let zero_oid = ObjectId::from_bytes(&[0u8; 20])?;
1011        return Ok((
1012            LogRecord {
1013                refname,
1014                update_index,
1015                old_id: zero_oid,
1016                new_id: zero_oid,
1017                name: String::new(),
1018                email: String::new(),
1019                time_seconds: 0,
1020                tz_offset: 0,
1021                message: String::new(),
1022            },
1023            p,
1024        ));
1025    }
1026
1027    // log_type == 1: standard log data
1028    if p + 2 * HASH_SIZE > data.len() {
1029        return Err(Error::InvalidRef("reftable: truncated log OIDs".into()));
1030    }
1031    let old_id = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
1032    p += HASH_SIZE;
1033    let new_id = ObjectId::from_bytes(&data[p..p + HASH_SIZE])?;
1034    p += HASH_SIZE;
1035
1036    let (name_len, p2) = get_varint(data, p)?;
1037    p = p2;
1038    let name_len = name_len as usize;
1039    if p + name_len > data.len() {
1040        return Err(Error::InvalidRef("reftable: truncated log name".into()));
1041    }
1042    let name = String::from_utf8(data[p..p + name_len].to_vec())
1043        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log name".into()))?;
1044    p += name_len;
1045
1046    let (email_len, p2) = get_varint(data, p)?;
1047    p = p2;
1048    let email_len = email_len as usize;
1049    if p + email_len > data.len() {
1050        return Err(Error::InvalidRef("reftable: truncated log email".into()));
1051    }
1052    let email = String::from_utf8(data[p..p + email_len].to_vec())
1053        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log email".into()))?;
1054    p += email_len;
1055
1056    let (time_seconds, p2) = get_varint(data, p)?;
1057    p = p2;
1058
1059    if p + 2 > data.len() {
1060        return Err(Error::InvalidRef("reftable: truncated tz_offset".into()));
1061    }
1062    let tz_offset = i16::from_be_bytes([data[p], data[p + 1]]);
1063    p += 2;
1064
1065    let (msg_len, p2) = get_varint(data, p)?;
1066    p = p2;
1067    let msg_len = msg_len as usize;
1068    if p + msg_len > data.len() {
1069        return Err(Error::InvalidRef("reftable: truncated log message".into()));
1070    }
1071    let message = String::from_utf8(data[p..p + msg_len].to_vec())
1072        .map_err(|_| Error::InvalidRef("reftable: invalid UTF-8 in log message".into()))?;
1073    p += msg_len;
1074
1075    Ok((
1076        LogRecord {
1077            refname,
1078            update_index,
1079            old_id,
1080            new_id,
1081            name,
1082            email,
1083            time_seconds,
1084            tz_offset,
1085            message,
1086        },
1087        p,
1088    ))
1089}
1090
1091// ---------------------------------------------------------------------------
1092// Stack management
1093// ---------------------------------------------------------------------------
1094
1095/// Manages the `$GIT_DIR/reftable/` directory and `tables.list` stack.
1096///
1097/// The stack provides a merged view of all tables, with later tables
1098/// taking precedence over earlier ones.
1099pub struct ReftableStack {
1100    /// Path to the `reftable/` directory.
1101    reftable_dir: PathBuf,
1102    /// Ordered list of table file names (oldest first).
1103    table_names: Vec<String>,
1104}
1105
1106impl ReftableStack {
1107    /// Open an existing reftable stack.
1108    pub fn open(git_dir: &Path) -> Result<Self> {
1109        let reftable_dir = git_dir.join("reftable");
1110        let tables_list = reftable_dir.join("tables.list");
1111        let content = fs::read_to_string(&tables_list).map_err(Error::Io)?;
1112        let table_names: Vec<String> = content
1113            .lines()
1114            .filter(|l| !l.is_empty())
1115            .map(|l| l.to_owned())
1116            .collect();
1117        Ok(Self {
1118            reftable_dir,
1119            table_names,
1120        })
1121    }
1122
1123    /// Read a merged view of all ref records.
1124    ///
1125    /// Later tables override earlier ones. Deletion records cause the
1126    /// ref to be omitted from the result.
1127    pub fn read_refs(&self) -> Result<Vec<RefRecord>> {
1128        let mut merged: BTreeMap<String, RefRecord> = BTreeMap::new();
1129
1130        for name in &self.table_names {
1131            let path = self.reftable_dir.join(name);
1132            let data = fs::read(&path).map_err(Error::Io)?;
1133            let reader = ReftableReader::new(data)?;
1134            for rec in reader.read_refs()? {
1135                match &rec.value {
1136                    RefValue::Deletion => {
1137                        merged.remove(&rec.name);
1138                    }
1139                    _ => {
1140                        merged.insert(rec.name.clone(), rec);
1141                    }
1142                }
1143            }
1144        }
1145
1146        Ok(merged.into_values().collect())
1147    }
1148
1149    /// Look up a single ref across all tables (most recent wins).
1150    pub fn lookup_ref(&self, name: &str) -> Result<Option<RefRecord>> {
1151        // Search tables in reverse (newest first)
1152        for table_name in self.table_names.iter().rev() {
1153            let path = self.reftable_dir.join(table_name);
1154            let data = fs::read(&path).map_err(Error::Io)?;
1155            let reader = ReftableReader::new(data)?;
1156            if let Some(rec) = reader.lookup_ref(name)? {
1157                return match rec.value {
1158                    RefValue::Deletion => Ok(None),
1159                    _ => Ok(Some(rec)),
1160                };
1161            }
1162        }
1163        Ok(None)
1164    }
1165
1166    /// Read merged log records for a specific ref.
1167    pub fn read_logs_for_ref(&self, refname: &str) -> Result<Vec<LogRecord>> {
1168        let mut logs = Vec::new();
1169        for table_name in &self.table_names {
1170            let path = self.reftable_dir.join(table_name);
1171            let data = fs::read(&path).map_err(Error::Io)?;
1172            let reader = ReftableReader::new(data)?;
1173            for log in reader.read_logs()? {
1174                if log.refname == refname {
1175                    logs.push(log);
1176                }
1177            }
1178        }
1179        // Sort by update_index descending (most recent first)
1180        logs.sort_by(|a, b| b.update_index.cmp(&a.update_index));
1181        Ok(logs)
1182    }
1183
1184    /// Read all log records across all tables.
1185    pub fn read_all_logs(&self) -> Result<Vec<LogRecord>> {
1186        let mut logs = Vec::new();
1187        for table_name in &self.table_names {
1188            let path = self.reftable_dir.join(table_name);
1189            let data = fs::read(&path).map_err(Error::Io)?;
1190            let reader = ReftableReader::new(data)?;
1191            logs.extend(reader.read_logs()?);
1192        }
1193        logs.sort_by(|a, b| {
1194            a.refname
1195                .cmp(&b.refname)
1196                .then_with(|| b.update_index.cmp(&a.update_index))
1197        });
1198        Ok(logs)
1199    }
1200
1201    /// Get the current max update index across all tables.
1202    pub fn max_update_index(&self) -> Result<u64> {
1203        let mut max_idx = 0u64;
1204        for name in &self.table_names {
1205            let path = self.reftable_dir.join(name);
1206            let data = fs::read(&path).map_err(Error::Io)?;
1207            let reader = ReftableReader::new(data)?;
1208            max_idx = max_idx.max(reader.max_update_index());
1209        }
1210        Ok(max_idx)
1211    }
1212
1213    /// Add a new reftable to the stack.
1214    ///
1215    /// Writes the table bytes to a new file, then atomically updates
1216    /// `tables.list`.
1217    pub fn add_table(&mut self, data: &[u8], update_index: u64) -> Result<String> {
1218        let random: u64 = {
1219            // Simple random from /dev/urandom or time-based fallback
1220            let mut buf = [0u8; 8];
1221            if let Ok(mut f) = fs::File::open("/dev/urandom") {
1222                let _ = f.read(&mut buf);
1223            }
1224            u64::from_le_bytes(buf)
1225        };
1226        let filename = format!(
1227            "{:08x}-{:08x}-{:08x}.ref",
1228            update_index, update_index, random as u32
1229        );
1230        let path = self.reftable_dir.join(&filename);
1231        fs::write(&path, data).map_err(Error::Io)?;
1232
1233        self.table_names.push(filename.clone());
1234        self.write_tables_list()?;
1235
1236        // Auto-compact if we have too many tables
1237        if self.table_names.len() > 5 {
1238            self.compact()?;
1239        }
1240
1241        Ok(filename)
1242    }
1243
1244    /// Write a ref update (add/update/delete) as a new reftable.
1245    ///
1246    /// This is the main entry point for updating refs in a reftable repo.
1247    pub fn write_ref(
1248        &mut self,
1249        refname: &str,
1250        value: RefValue,
1251        log: Option<LogRecord>,
1252        opts: &WriteOptions,
1253    ) -> Result<()> {
1254        let update_index = self.max_update_index()? + 1;
1255        let mut writer = ReftableWriter::new(opts.clone(), update_index, update_index);
1256
1257        // For a single-ref update we need to write all existing refs + the update
1258        // into a proper sorted order, OR we can write a single-record table.
1259        // The stack handles merging, so a single-record table is fine.
1260        writer.add_ref(RefRecord {
1261            name: refname.to_owned(),
1262            update_index,
1263            value,
1264        })?;
1265
1266        if let Some(log_rec) = log {
1267            let mut log_rec = log_rec;
1268            log_rec.update_index = update_index;
1269            writer.add_log(log_rec)?;
1270        }
1271
1272        let data = writer.finish()?;
1273        self.add_table(&data, update_index)?;
1274        Ok(())
1275    }
1276
1277    /// Compact all tables into a single table.
1278    pub fn compact(&mut self) -> Result<()> {
1279        if self.table_names.len() <= 1 {
1280            return Ok(());
1281        }
1282
1283        // Read all refs and logs
1284        let refs = self.read_refs()?;
1285        let logs = self.read_all_logs()?;
1286
1287        // Determine update index range
1288        let mut min_idx = u64::MAX;
1289        let mut max_idx = 0u64;
1290        for name in &self.table_names {
1291            let path = self.reftable_dir.join(name);
1292            let data = fs::read(&path).map_err(Error::Io)?;
1293            let reader = ReftableReader::new(data)?;
1294            min_idx = min_idx.min(reader.min_update_index());
1295            max_idx = max_idx.max(reader.max_update_index());
1296        }
1297        if min_idx == u64::MAX {
1298            min_idx = 0;
1299        }
1300
1301        let mut writer = ReftableWriter::new(WriteOptions::default(), min_idx, max_idx);
1302        for rec in refs {
1303            writer.add_ref(rec)?;
1304        }
1305        for log in logs {
1306            writer.add_log(log)?;
1307        }
1308
1309        let data = writer.finish()?;
1310
1311        // Write new compacted table
1312        let old_names = self.table_names.clone();
1313        self.table_names.clear();
1314        let _name = self.add_table(&data, max_idx)?;
1315
1316        // Remove old table files
1317        for old in &old_names {
1318            let path = self.reftable_dir.join(old);
1319            let _ = fs::remove_file(&path);
1320        }
1321
1322        Ok(())
1323    }
1324
1325    /// Write `tables.list` atomically.
1326    fn write_tables_list(&self) -> Result<()> {
1327        let tables_list = self.reftable_dir.join("tables.list");
1328        let lock = self.reftable_dir.join("tables.list.lock");
1329        let content = self.table_names.join("\n")
1330            + if self.table_names.is_empty() {
1331                ""
1332            } else {
1333                "\n"
1334            };
1335        fs::write(&lock, &content).map_err(Error::Io)?;
1336        fs::rename(&lock, &tables_list).map_err(Error::Io)?;
1337        Ok(())
1338    }
1339
1340    /// Return the list of table filenames in this stack.
1341    pub fn table_names(&self) -> &[String] {
1342        &self.table_names
1343    }
1344}
1345
1346// ---------------------------------------------------------------------------
1347// Integration helpers — used by refs.rs and commands
1348// ---------------------------------------------------------------------------
1349
1350/// Detect whether a git directory uses the reftable backend.
1351pub fn is_reftable_repo(git_dir: &Path) -> bool {
1352    fn config_uses_reftable(config_path: &Path) -> bool {
1353        let Ok(content) = fs::read_to_string(config_path) else {
1354            return false;
1355        };
1356
1357        let mut in_extensions = false;
1358        for line in content.lines() {
1359            let trimmed = line.trim();
1360            if trimmed.starts_with('[') {
1361                in_extensions = trimmed.eq_ignore_ascii_case("[extensions]");
1362                continue;
1363            }
1364            if in_extensions {
1365                if let Some((key, value)) = trimmed.split_once('=') {
1366                    if key.trim().eq_ignore_ascii_case("refstorage")
1367                        && value.trim().eq_ignore_ascii_case("reftable")
1368                    {
1369                        return true;
1370                    }
1371                }
1372            }
1373        }
1374        false
1375    }
1376
1377    let local_config = git_dir.join("config");
1378    if config_uses_reftable(&local_config) {
1379        return true;
1380    }
1381
1382    // Linked worktrees typically store the shared repository configuration
1383    // in the common directory pointed to by `commondir`.
1384    if let Ok(raw) = fs::read_to_string(git_dir.join("commondir")) {
1385        let rel = raw.trim();
1386        if !rel.is_empty() {
1387            let common = if Path::new(rel).is_absolute() {
1388                PathBuf::from(rel)
1389            } else {
1390                git_dir.join(rel)
1391            };
1392            let common_config = common.canonicalize().unwrap_or(common).join("config");
1393            if config_uses_reftable(&common_config) {
1394                return true;
1395            }
1396        }
1397    }
1398
1399    false
1400}
1401
1402/// Resolve a ref in a reftable repo, following symbolic refs.
1403pub fn reftable_resolve_ref(git_dir: &Path, refname: &str) -> Result<ObjectId> {
1404    reftable_resolve_ref_depth(git_dir, refname, 0)
1405}
1406
1407fn reftable_resolve_ref_depth(git_dir: &Path, refname: &str, depth: usize) -> Result<ObjectId> {
1408    if depth > 10 {
1409        return Err(Error::InvalidRef(format!(
1410            "reftable: symlink too deep: {refname}"
1411        )));
1412    }
1413
1414    // HEAD is special — stored as a file even in reftable repos
1415    if refname == "HEAD" {
1416        let head_path = git_dir.join("HEAD");
1417        if head_path.exists() {
1418            let content = fs::read_to_string(&head_path).map_err(Error::Io)?;
1419            let content = content.trim();
1420            if let Some(target) = content.strip_prefix("ref: ") {
1421                return reftable_resolve_ref_depth(git_dir, target.trim(), depth + 1);
1422            }
1423            // Detached HEAD
1424            if content.len() == 40 && content.chars().all(|c| c.is_ascii_hexdigit()) {
1425                return content.parse();
1426            }
1427        }
1428    }
1429
1430    let stack = ReftableStack::open(git_dir)?;
1431    match stack.lookup_ref(refname)? {
1432        Some(rec) => match rec.value {
1433            RefValue::Val1(oid) => Ok(oid),
1434            RefValue::Val2(oid, _) => Ok(oid),
1435            RefValue::Symref(target) => reftable_resolve_ref_depth(git_dir, &target, depth + 1),
1436            RefValue::Deletion => Err(Error::InvalidRef(format!("ref not found: {refname}"))),
1437        },
1438        None => Err(Error::InvalidRef(format!("ref not found: {refname}"))),
1439    }
1440}
1441
1442/// Write a ref to a reftable repo.
1443pub fn reftable_write_ref(
1444    git_dir: &Path,
1445    refname: &str,
1446    oid: &ObjectId,
1447    log_identity: Option<&str>,
1448    log_message: Option<&str>,
1449) -> Result<()> {
1450    let mut stack = ReftableStack::open(git_dir)?;
1451    let old_oid = stack
1452        .lookup_ref(refname)?
1453        .and_then(|r| match r.value {
1454            RefValue::Val1(oid) => Some(oid),
1455            RefValue::Val2(oid, _) => Some(oid),
1456            _ => None,
1457        })
1458        .unwrap_or_else(|| ObjectId::from_bytes(&[0u8; 20]).unwrap());
1459
1460    let log = if let Some(identity) = log_identity {
1461        let (name, email, time_secs, tz) = parse_identity_string(identity);
1462        Some(LogRecord {
1463            refname: refname.to_owned(),
1464            update_index: 0, // will be set by write_ref
1465            old_id: old_oid,
1466            new_id: *oid,
1467            name,
1468            email,
1469            time_seconds: time_secs,
1470            tz_offset: tz,
1471            message: log_message.unwrap_or("").to_owned(),
1472        })
1473    } else {
1474        None
1475    };
1476
1477    // Check config for logAllRefUpdates
1478    let write_log = log.is_some() || should_log_ref_updates(git_dir);
1479    let log = if write_log { log } else { None };
1480
1481    let opts = read_write_options(git_dir);
1482    stack.write_ref(refname, RefValue::Val1(*oid), log, &opts)
1483}
1484
1485/// Write a symbolic ref to a reftable repo.
1486pub fn reftable_write_symref(
1487    git_dir: &Path,
1488    refname: &str,
1489    target: &str,
1490    log_identity: Option<&str>,
1491    log_message: Option<&str>,
1492) -> Result<()> {
1493    let mut stack = ReftableStack::open(git_dir)?;
1494    let opts = read_write_options(git_dir);
1495
1496    let log = if let Some(identity) = log_identity {
1497        let (name, email, time_secs, tz) = parse_identity_string(identity);
1498        let zero_oid = ObjectId::from_bytes(&[0u8; 20])?;
1499        Some(LogRecord {
1500            refname: refname.to_owned(),
1501            update_index: 0,
1502            old_id: zero_oid,
1503            new_id: zero_oid,
1504            name,
1505            email,
1506            time_seconds: time_secs,
1507            tz_offset: tz,
1508            message: log_message.unwrap_or("").to_owned(),
1509        })
1510    } else {
1511        None
1512    };
1513
1514    stack.write_ref(refname, RefValue::Symref(target.to_owned()), log, &opts)
1515}
1516
1517/// Delete a ref from a reftable repo.
1518pub fn reftable_delete_ref(git_dir: &Path, refname: &str) -> Result<()> {
1519    let mut stack = ReftableStack::open(git_dir)?;
1520    let opts = read_write_options(git_dir);
1521    stack.write_ref(refname, RefValue::Deletion, None, &opts)
1522}
1523
1524/// Read the symbolic target of a ref in a reftable repo.
1525pub fn reftable_read_symbolic_ref(git_dir: &Path, refname: &str) -> Result<Option<String>> {
1526    let stack = ReftableStack::open(git_dir)?;
1527    match stack.lookup_ref(refname)? {
1528        Some(rec) => match rec.value {
1529            RefValue::Symref(target) => Ok(Some(target)),
1530            _ => Ok(None),
1531        },
1532        None => Ok(None),
1533    }
1534}
1535
1536/// List all refs in a reftable repo under a given prefix.
1537pub fn reftable_list_refs(git_dir: &Path, prefix: &str) -> Result<Vec<(String, ObjectId)>> {
1538    let stack = ReftableStack::open(git_dir)?;
1539    let refs = stack.read_refs()?;
1540    let mut result = Vec::new();
1541    for rec in refs {
1542        if rec.name.starts_with(prefix) {
1543            match rec.value {
1544                RefValue::Val1(oid) => result.push((rec.name, oid)),
1545                RefValue::Val2(oid, _) => result.push((rec.name, oid)),
1546                RefValue::Symref(target) => {
1547                    // Try to resolve the symref
1548                    if let Ok(oid) = reftable_resolve_ref(git_dir, &target) {
1549                        result.push((rec.name, oid));
1550                    }
1551                }
1552                RefValue::Deletion => {}
1553            }
1554        }
1555    }
1556    result.sort_by(|a, b| a.0.cmp(&b.0));
1557    Ok(result)
1558}
1559
1560/// Read reflog entries for a ref from the reftable stack.
1561pub fn reftable_read_reflog(
1562    git_dir: &Path,
1563    refname: &str,
1564) -> Result<Vec<crate::reflog::ReflogEntry>> {
1565    let stack = ReftableStack::open(git_dir)?;
1566    let logs = stack.read_logs_for_ref(refname)?;
1567    let mut entries = Vec::new();
1568    for log in logs {
1569        // Reconstruct the identity string
1570        let tz_sign = if log.tz_offset >= 0 { '+' } else { '-' };
1571        let tz_abs = log.tz_offset.unsigned_abs();
1572        let tz_hours = tz_abs / 60;
1573        let tz_mins = tz_abs % 60;
1574        let identity = format!(
1575            "{} <{}> {} {}{:02}{:02}",
1576            log.name, log.email, log.time_seconds, tz_sign, tz_hours, tz_mins
1577        );
1578        entries.push(crate::reflog::ReflogEntry {
1579            old_oid: log.old_id,
1580            new_oid: log.new_id,
1581            identity,
1582            message: log.message,
1583        });
1584    }
1585    Ok(entries)
1586}
1587
1588/// Append a reflog entry for a reftable repo.
1589pub fn reftable_append_reflog(
1590    git_dir: &Path,
1591    refname: &str,
1592    old_oid: &ObjectId,
1593    new_oid: &ObjectId,
1594    identity: &str,
1595    message: &str,
1596    force_create: bool,
1597) -> Result<()> {
1598    use crate::refs::should_autocreate_reflog;
1599    if !force_create
1600        && !should_autocreate_reflog(git_dir, refname)
1601        && message.is_empty()
1602        && !reftable_reflog_exists(git_dir, refname)
1603    {
1604        return Ok(());
1605    }
1606    let (name, email, time_secs, tz) = parse_identity_string(identity);
1607    let mut stack = ReftableStack::open(git_dir)?;
1608    let update_index = stack.max_update_index()? + 1;
1609    let opts = read_write_options(git_dir);
1610
1611    let mut writer = ReftableWriter::new(opts, update_index, update_index);
1612    writer.add_log(LogRecord {
1613        refname: refname.to_owned(),
1614        update_index,
1615        old_id: *old_oid,
1616        new_id: *new_oid,
1617        name,
1618        email,
1619        time_seconds: time_secs,
1620        tz_offset: tz,
1621        message: message.to_owned(),
1622    })?;
1623
1624    let data = writer.finish()?;
1625    stack.add_table(&data, update_index)?;
1626    Ok(())
1627}
1628
1629/// Check whether a reftable repo has reflogs for the given ref.
1630pub fn reftable_reflog_exists(git_dir: &Path, refname: &str) -> bool {
1631    if let Ok(stack) = ReftableStack::open(git_dir) {
1632        if let Ok(logs) = stack.read_logs_for_ref(refname) {
1633            return !logs.is_empty();
1634        }
1635    }
1636    false
1637}
1638
1639// ---------------------------------------------------------------------------
1640// Write options helpers
1641// ---------------------------------------------------------------------------
1642
1643/// Read reftable write options from the repository config.
1644pub fn read_write_options(git_dir: &Path) -> WriteOptions {
1645    let config_path = git_dir.join("config");
1646    let mut opts = WriteOptions::default();
1647
1648    if let Ok(content) = fs::read_to_string(&config_path) {
1649        let mut in_reftable = false;
1650        let mut in_core = false;
1651        let mut log_all_ref_updates: Option<bool> = None;
1652
1653        for line in content.lines() {
1654            let trimmed = line.trim();
1655            if trimmed.starts_with('[') {
1656                let section_lower = trimmed.to_lowercase();
1657                in_reftable = section_lower.starts_with("[reftable]");
1658                in_core = section_lower.starts_with("[core]");
1659                continue;
1660            }
1661            if in_reftable {
1662                if let Some((key, value)) = trimmed.split_once('=') {
1663                    let key = key.trim().to_lowercase();
1664                    let value = value.trim();
1665                    match key.as_str() {
1666                        "blocksize" => {
1667                            if let Ok(v) = value.parse::<u32>() {
1668                                opts.block_size = v;
1669                            }
1670                        }
1671                        "restartinterval" => {
1672                            if let Ok(v) = value.parse::<usize>() {
1673                                opts.restart_interval = v;
1674                            }
1675                        }
1676                        _ => {}
1677                    }
1678                }
1679            }
1680            if in_core {
1681                if let Some((key, value)) = trimmed.split_once('=') {
1682                    let key = key.trim().to_lowercase();
1683                    let value = value.trim().to_lowercase();
1684                    if key == "logallrefupdates" {
1685                        log_all_ref_updates = Some(value == "true" || value == "always");
1686                    }
1687                }
1688            }
1689        }
1690
1691        if let Some(false) = log_all_ref_updates {
1692            opts.write_log = false;
1693        }
1694    }
1695
1696    opts
1697}
1698
1699/// Check if logAllRefUpdates is enabled.
1700fn should_log_ref_updates(git_dir: &Path) -> bool {
1701    let config_path = git_dir.join("config");
1702    if let Ok(content) = fs::read_to_string(&config_path) {
1703        let mut in_core = false;
1704        for line in content.lines() {
1705            let trimmed = line.trim();
1706            if trimmed.starts_with('[') {
1707                in_core = trimmed.to_lowercase().starts_with("[core]");
1708                continue;
1709            }
1710            if in_core {
1711                if let Some((key, value)) = trimmed.split_once('=') {
1712                    if key.trim().eq_ignore_ascii_case("logallrefupdates") {
1713                        let v = value.trim().to_lowercase();
1714                        return v == "true" || v == "always";
1715                    }
1716                }
1717            }
1718        }
1719    }
1720    false
1721}
1722
1723// ---------------------------------------------------------------------------
1724// Utility functions
1725// ---------------------------------------------------------------------------
1726
1727/// Compute the CRC-32 of a byte slice (ISO 3309 / ITU-T V.42).
1728fn crc32(data: &[u8]) -> u32 {
1729    let mut crc: u32 = 0xffffffff;
1730    for &byte in data {
1731        crc ^= byte as u32;
1732        for _ in 0..8 {
1733            if crc & 1 != 0 {
1734                crc = (crc >> 1) ^ 0xedb88320;
1735            } else {
1736                crc >>= 1;
1737            }
1738        }
1739    }
1740    !crc
1741}
1742
1743/// Compute common prefix length between two byte slices.
1744fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1745    a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
1746}
1747
1748/// Read a big-endian u24 from 3 bytes at `pos`.
1749fn read_u24(data: &[u8], pos: usize) -> usize {
1750    ((data[pos] as usize) << 16) | ((data[pos + 1] as usize) << 8) | (data[pos + 2] as usize)
1751}
1752
1753/// Read a big-endian u16 from 2 bytes at `pos`.
1754fn read_u16(data: &[u8], pos: usize) -> usize {
1755    ((data[pos] as usize) << 8) | (data[pos + 1] as usize)
1756}
1757
1758/// Parse the footer of a reftable file.
1759fn parse_footer(data: &[u8], version: u8) -> Result<Footer> {
1760    let footer_size = if version == 2 { 72 } else { FOOTER_V1_SIZE };
1761    if data.len() < footer_size {
1762        return Err(Error::InvalidRef("reftable: footer too small".into()));
1763    }
1764
1765    // Verify magic
1766    if &data[0..4] != REFTABLE_MAGIC {
1767        return Err(Error::InvalidRef("reftable: bad footer magic".into()));
1768    }
1769    let fver = data[4];
1770    if fver != version {
1771        return Err(Error::InvalidRef(format!(
1772            "reftable: footer version mismatch: header={version}, footer={fver}"
1773        )));
1774    }
1775
1776    let block_size = ((data[5] as u32) << 16) | ((data[6] as u32) << 8) | (data[7] as u32);
1777    let min_update_index = u64::from_be_bytes(data[8..16].try_into().unwrap());
1778    let max_update_index = u64::from_be_bytes(data[16..24].try_into().unwrap());
1779
1780    let off = 24;
1781    let ref_index_position = u64::from_be_bytes(data[off..off + 8].try_into().unwrap());
1782    let obj_position_and_id_len = u64::from_be_bytes(data[off + 8..off + 16].try_into().unwrap());
1783    let obj_index_position = u64::from_be_bytes(data[off + 16..off + 24].try_into().unwrap());
1784    let log_position = u64::from_be_bytes(data[off + 24..off + 32].try_into().unwrap());
1785    let log_index_position = u64::from_be_bytes(data[off + 32..off + 40].try_into().unwrap());
1786
1787    // CRC-32 check
1788    let crc_stored = u32::from_be_bytes(data[footer_size - 4..footer_size].try_into().unwrap());
1789    let crc_computed = crc32(&data[..footer_size - 4]);
1790    if crc_stored != crc_computed {
1791        return Err(Error::InvalidRef(format!(
1792            "reftable: footer CRC mismatch: stored={crc_stored:08x}, computed={crc_computed:08x}"
1793        )));
1794    }
1795
1796    Ok(Footer {
1797        version: fver,
1798        block_size,
1799        min_update_index,
1800        max_update_index,
1801        ref_index_position,
1802        obj_position_and_id_len,
1803        obj_index_position,
1804        log_position,
1805        log_index_position,
1806    })
1807}
1808
1809/// Parse an identity string like `"Name <email> 1234567890 +0100"`.
1810fn parse_identity_string(identity: &str) -> (String, String, u64, i16) {
1811    // Format: "Name <email> timestamp tz"
1812    let parts: Vec<&str> = identity.rsplitn(3, ' ').collect();
1813    if parts.len() < 3 {
1814        return (identity.to_owned(), String::new(), 0, 0);
1815    }
1816    let tz_str = parts[0]; // e.g. "+0100"
1817    let time_str = parts[1]; // e.g. "1234567890"
1818    let name_email = parts[2]; // e.g. "Name <email>"
1819
1820    let time_secs = time_str.parse::<u64>().unwrap_or(0);
1821
1822    // Parse timezone: +HHMM or -HHMM
1823    let tz_minutes = if tz_str.len() >= 5 {
1824        let sign = if tz_str.starts_with('-') { -1i16 } else { 1 };
1825        let hours = tz_str[1..3].parse::<i16>().unwrap_or(0);
1826        let mins = tz_str[3..5].parse::<i16>().unwrap_or(0);
1827        sign * (hours * 60 + mins)
1828    } else {
1829        0
1830    };
1831
1832    // Split name and email
1833    let (name, email) = if let Some(lt_pos) = name_email.find('<') {
1834        let name = name_email[..lt_pos].trim().to_owned();
1835        let email = if let Some(gt_pos) = name_email.find('>') {
1836            name_email[lt_pos + 1..gt_pos].to_owned()
1837        } else {
1838            name_email[lt_pos + 1..].to_owned()
1839        };
1840        (name, email)
1841    } else {
1842        (name_email.to_owned(), String::new())
1843    };
1844
1845    (name, email, time_secs, tz_minutes)
1846}
1847
1848// ---------------------------------------------------------------------------
1849// Tests
1850// ---------------------------------------------------------------------------
1851
1852#[cfg(test)]
1853mod tests {
1854    use super::*;
1855
1856    #[test]
1857    fn test_varint_roundtrip() {
1858        for val in [0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] {
1859            let mut buf = Vec::new();
1860            put_varint(val, &mut buf);
1861            let (decoded, end) = get_varint(&buf, 0).unwrap();
1862            assert_eq!(decoded, val, "varint roundtrip failed for {val}");
1863            assert_eq!(end, buf.len());
1864        }
1865    }
1866
1867    #[test]
1868    fn test_crc32() {
1869        // Known test vector: "123456789" => 0xCBF43926
1870        assert_eq!(crc32(b"123456789"), 0xCBF43926);
1871    }
1872
1873    #[test]
1874    fn test_empty_table() {
1875        let writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
1876        let data = writer.finish().unwrap();
1877        let reader = ReftableReader::new(data).unwrap();
1878        let refs = reader.read_refs().unwrap();
1879        assert!(refs.is_empty());
1880    }
1881
1882    #[test]
1883    fn test_write_read_single_ref() {
1884        let oid = ObjectId::from_bytes(&[0xab; 20]).unwrap();
1885        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
1886        writer
1887            .add_ref(RefRecord {
1888                name: "refs/heads/main".to_owned(),
1889                update_index: 1,
1890                value: RefValue::Val1(oid),
1891            })
1892            .unwrap();
1893        let data = writer.finish().unwrap();
1894
1895        let reader = ReftableReader::new(data).unwrap();
1896        let refs = reader.read_refs().unwrap();
1897        assert_eq!(refs.len(), 1);
1898        assert_eq!(refs[0].name, "refs/heads/main");
1899        assert_eq!(refs[0].value, RefValue::Val1(oid));
1900        assert_eq!(refs[0].update_index, 1);
1901    }
1902
1903    #[test]
1904    fn test_write_read_multiple_refs() {
1905        let oid1 = ObjectId::from_bytes(&[0x11; 20]).unwrap();
1906        let oid2 = ObjectId::from_bytes(&[0x22; 20]).unwrap();
1907        let oid3 = ObjectId::from_bytes(&[0x33; 20]).unwrap();
1908
1909        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
1910        writer
1911            .add_ref(RefRecord {
1912                name: "refs/heads/a".to_owned(),
1913                update_index: 1,
1914                value: RefValue::Val1(oid1),
1915            })
1916            .unwrap();
1917        writer
1918            .add_ref(RefRecord {
1919                name: "refs/heads/b".to_owned(),
1920                update_index: 1,
1921                value: RefValue::Val1(oid2),
1922            })
1923            .unwrap();
1924        writer
1925            .add_ref(RefRecord {
1926                name: "refs/tags/v1.0".to_owned(),
1927                update_index: 1,
1928                value: RefValue::Val2(oid3, oid1),
1929            })
1930            .unwrap();
1931        let data = writer.finish().unwrap();
1932
1933        let reader = ReftableReader::new(data).unwrap();
1934        let refs = reader.read_refs().unwrap();
1935        assert_eq!(refs.len(), 3);
1936        assert_eq!(refs[0].name, "refs/heads/a");
1937        assert_eq!(refs[1].name, "refs/heads/b");
1938        assert_eq!(refs[2].name, "refs/tags/v1.0");
1939        assert_eq!(refs[2].value, RefValue::Val2(oid3, oid1));
1940    }
1941
1942    #[test]
1943    fn test_symref_roundtrip() {
1944        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
1945        writer
1946            .add_ref(RefRecord {
1947                name: "refs/heads/sym".to_owned(),
1948                update_index: 1,
1949                value: RefValue::Symref("refs/heads/main".to_owned()),
1950            })
1951            .unwrap();
1952        let data = writer.finish().unwrap();
1953
1954        let reader = ReftableReader::new(data).unwrap();
1955        let refs = reader.read_refs().unwrap();
1956        assert_eq!(refs.len(), 1);
1957        assert_eq!(
1958            refs[0].value,
1959            RefValue::Symref("refs/heads/main".to_owned())
1960        );
1961    }
1962
1963    #[test]
1964    fn test_log_roundtrip() {
1965        let old_oid = ObjectId::from_bytes(&[0; 20]).unwrap();
1966        let new_oid = ObjectId::from_bytes(&[0xaa; 20]).unwrap();
1967
1968        let mut opts = WriteOptions::default();
1969        opts.write_log = true;
1970        let mut writer = ReftableWriter::new(opts, 1, 1);
1971        writer
1972            .add_log(LogRecord {
1973                refname: "refs/heads/main".to_owned(),
1974                update_index: 1,
1975                old_id: old_oid,
1976                new_id: new_oid,
1977                name: "Test User".to_owned(),
1978                email: "test@example.com".to_owned(),
1979                time_seconds: 1700000000,
1980                tz_offset: -480,
1981                message: "initial commit".to_owned(),
1982            })
1983            .unwrap();
1984        let data = writer.finish().unwrap();
1985
1986        let reader = ReftableReader::new(data).unwrap();
1987        let logs = reader.read_logs().unwrap();
1988        assert_eq!(logs.len(), 1);
1989        assert_eq!(logs[0].refname, "refs/heads/main");
1990        assert_eq!(logs[0].old_id, old_oid);
1991        assert_eq!(logs[0].new_id, new_oid);
1992        assert_eq!(logs[0].name, "Test User");
1993        assert_eq!(logs[0].email, "test@example.com");
1994        assert_eq!(logs[0].time_seconds, 1700000000);
1995        assert_eq!(logs[0].tz_offset, -480);
1996        assert_eq!(logs[0].message, "initial commit");
1997    }
1998
1999    #[test]
2000    fn test_unaligned_table() {
2001        let oid = ObjectId::from_bytes(&[0xcc; 20]).unwrap();
2002        let opts = WriteOptions {
2003            block_size: 0, // unaligned
2004            restart_interval: 16,
2005            write_log: false,
2006        };
2007        let mut writer = ReftableWriter::new(opts, 1, 1);
2008        writer
2009            .add_ref(RefRecord {
2010                name: "refs/heads/main".to_owned(),
2011                update_index: 1,
2012                value: RefValue::Val1(oid),
2013            })
2014            .unwrap();
2015        let data = writer.finish().unwrap();
2016
2017        let reader = ReftableReader::new(data).unwrap();
2018        assert_eq!(reader.block_size(), 0);
2019        let refs = reader.read_refs().unwrap();
2020        assert_eq!(refs.len(), 1);
2021        assert_eq!(refs[0].value, RefValue::Val1(oid));
2022    }
2023
2024    #[test]
2025    fn test_parse_identity() {
2026        let (name, email, ts, tz) =
2027            parse_identity_string("Test User <test@example.com> 1700000000 -0800");
2028        assert_eq!(name, "Test User");
2029        assert_eq!(email, "test@example.com");
2030        assert_eq!(ts, 1700000000);
2031        assert_eq!(tz, -480);
2032    }
2033
2034    #[test]
2035    fn test_deletion_record() {
2036        let mut writer = ReftableWriter::new(WriteOptions::default(), 1, 1);
2037        writer
2038            .add_ref(RefRecord {
2039                name: "refs/heads/gone".to_owned(),
2040                update_index: 1,
2041                value: RefValue::Deletion,
2042            })
2043            .unwrap();
2044        let data = writer.finish().unwrap();
2045
2046        let reader = ReftableReader::new(data).unwrap();
2047        let refs = reader.read_refs().unwrap();
2048        assert_eq!(refs.len(), 1);
2049        assert_eq!(refs[0].value, RefValue::Deletion);
2050    }
2051}