armdb 0.2.0 - Docs.rs

use std::fs::{self, File};
use std::path::{Path, PathBuf};
#[cfg(feature = "encryption")]
use std::sync::Arc;

use zerocopy::FromBytes;

use crate::entry::{EntryHeader, compute_crc32, entry_size};
use crate::error::DbResult;
use crate::hint;

/// A raw entry read from the log.
pub struct RawEntry {
    /// Complete serialized entry (header + key + value + padding).
    pub data: Vec<u8>,
    /// GSN extracted from header (sequence number, no tombstone bit).
    pub gsn: u64,
    /// Source file ID.
    pub file_id: u32,
    /// Byte offset of the entry start within the file.
    pub file_offset: u64,
    /// Key length that matched CRC.
    pub key_len: u16,
}

/// Reads entries from a shard's data files starting from a GSN offset.
/// Used for catch-up replication (initial sync, reconnect, SPSC overflow).
pub struct ShardLogReader {
    shard_dir: PathBuf,
    file_ids: Vec<u32>,
    current_file_idx: usize,
    current_file: Option<File>,
    current_offset: u64,
    current_file_len: u64,
    key_len: u16,
    /// Read-ahead buffer for efficient sequential scanning.
    read_buf: Vec<u8>,
    read_buf_offset: u64,
    read_buf_len: usize,
    /// Optional cipher for decrypting 4096-byte pages on read.
    ///
    /// When `Some`, `open_file` also opens the corresponding `.tags` file and
    /// stores it in `current_tag_file`. `fill_read_buf` dispatches to
    /// `pread_value_encrypted` instead of the plain seek+read path.
    #[cfg(feature = "encryption")]
    cipher: Option<Arc<crate::crypto::PageCipher>>,
    /// Tag file for the currently open data file (set by `open_file`).
    #[cfg(feature = "encryption")]
    current_tag_file: Option<crate::io::tags::TagFile>,
}

const READ_AHEAD_SIZE: usize = 64 * 1024;
const HEADER_SIZE: usize = size_of::<EntryHeader>();

impl ShardLogReader {
    /// Create a new log reader starting from `from_gsn`.
    ///
    /// `cipher` — when `Some`, the reader decrypts 4096-byte pages on read by
    /// opening the corresponding `.tags` file alongside each `.data` file.
    /// When `None` (or when the `encryption` feature is disabled), the
    /// plaintext read-ahead path is used unchanged.
    pub fn new(
        shard_dir: PathBuf,
        from_gsn: u64,
        key_len: u16,
        #[cfg(feature = "encryption")] cipher: Option<Arc<crate::crypto::PageCipher>>,
    ) -> DbResult<Self> {
        let file_ids = scan_data_files(&shard_dir)?;

        let mut reader = Self {
            shard_dir,
            file_ids,
            current_file_idx: 0,
            current_file: None,
            current_offset: 0,
            current_file_len: 0,
            key_len,
            read_buf: vec![0u8; READ_AHEAD_SIZE],
            read_buf_offset: 0,
            read_buf_len: 0,
            #[cfg(feature = "encryption")]
            cipher,
            #[cfg(feature = "encryption")]
            current_tag_file: None,
        };

        if from_gsn > 0 {
            reader.seek_to_gsn(from_gsn)?;
        } else if !reader.file_ids.is_empty() {
            reader.open_file(0)?;
        }

        Ok(reader)
    }

    /// Read the next entry. Returns None at end of available data.
    pub fn next_entry(&mut self) -> DbResult<Option<RawEntry>> {
        loop {
            if self.current_file.is_none() {
                return Ok(None);
            }

            // Try to read header from current position
            let header_bytes = match self.read_bytes(HEADER_SIZE)? {
                Some(b) => b,
                None => {
                    // Move to next file
                    if !self.advance_file()? {
                        return Ok(None);
                    }
                    continue;
                }
            };

            let header = match EntryHeader::read_from_bytes(&header_bytes) {
                Ok(h) => h,
                Err(_) => {
                    // Corrupted header — skip rest of file
                    if !self.advance_file()? {
                        return Ok(None);
                    }
                    continue;
                }
            };

            let entry_offset = self.current_offset;
            let file_id = self.file_ids[self.current_file_idx];

            let k = self.key_len as usize;

            // Verify CRC before emitting entry
            if let Some(verified) = self.try_key_len(&header, k)? {
                // Read complete entry
                let data = match self.read_bytes_from(entry_offset, verified.0)? {
                    Some(d) => d,
                    None => {
                        // Partial entry at EOF — move to next file
                        if !self.advance_file()? {
                            return Ok(None);
                        }
                        continue;
                    }
                };

                self.current_offset = entry_offset + verified.0 as u64;

                return Ok(Some(RawEntry {
                    data,
                    gsn: header.sequence(),
                    file_id,
                    file_offset: entry_offset,
                    key_len: self.key_len,
                }));
            } else {
                // CRC mismatch — corrupted entry. Skip header and try next position.
                self.current_offset += HEADER_SIZE as u64;
            }
        }
    }

    /// Try a specific key_len: read the entry, verify CRC.
    fn try_key_len(&mut self, header: &EntryHeader, k: usize) -> DbResult<Option<(usize, usize)>> {
        let total = entry_size(k, header.value_len) as usize;
        let entry_offset = self.current_offset;

        let data = match self.peek_bytes_from(entry_offset, total)? {
            Some(d) => d,
            None => return Ok(None),
        };

        if data.len() < 16 + k + header.value_len as usize {
            return Ok(None);
        }

        let key = &data[16..16 + k];
        let value = &data[16 + k..16 + k + header.value_len as usize];
        let expected_crc = compute_crc32(header.gsn, header.value_len, key, value);

        if expected_crc == header.crc32 {
            Ok(Some((total, k)))
        } else {
            Ok(None)
        }
    }

    /// Seek to the first entry with GSN >= target_gsn.
    fn seek_to_gsn(&mut self, target_gsn: u64) -> DbResult<()> {
        // Try hint files first for faster seeking
        for (idx, &fid) in self.file_ids.iter().enumerate() {
            let hint_path = self.shard_dir.join(format!("{fid:06}.hint"));
            if !hint_path.exists() {
                continue;
            }

            // Check if this file might contain our target GSN
            // by scanning hint entries (much smaller than data file)
            if let Some(hint_data) = hint::read_hint_file(&hint_path)? {
                let k = self.key_len as usize;
                let hint_entry_size = hint::hint_entry_size(k);
                if hint_data.len() % hint_entry_size == 0 {
                    let entry_count = hint_data.len() / hint_entry_size;
                    if entry_count > 0 {
                        let last_entry_start = (entry_count - 1) * hint_entry_size;
                        let last_gsn = u64::from_ne_bytes(
                            hint_data[last_entry_start..last_entry_start + 8]
                                .try_into()
                                .expect("8 bytes"),
                        );
                        let last_seq = last_gsn & !crate::entry::TOMBSTONE_BIT;

                        if last_seq >= target_gsn {
                            // This file contains our target. Open it and scan from start.
                            self.open_file(idx)?;
                            self.skip_until_gsn(target_gsn)?;
                            return Ok(());
                        }
                    }
                }
            }
        }

        // No hint match — scan from first file
        if !self.file_ids.is_empty() {
            self.open_file(0)?;
            self.skip_until_gsn(target_gsn)?;
        }

        Ok(())
    }

    /// Skip entries until we reach one with GSN >= target.
    ///
    /// Uses peek-based lookahead so that crossing a file boundary inside the
    /// loop does not corrupt `current_offset`.  The old save/restore approach
    /// broke when `next_entry()` called `advance_file()`: `save_offset` held
    /// the EOF offset of the previous file but was then written into the new
    /// file's position pointer (C6).
    fn skip_until_gsn(&mut self, target_gsn: u64) -> DbResult<()> {
        loop {
            if self.current_file.is_none() {
                return Ok(());
            }

            // Peek a header at current_offset without mutating any state.
            let header_bytes = match self.peek_bytes_from(self.current_offset, HEADER_SIZE)? {
                Some(b) => b,
                None => {
                    // Exhausted current file — advance and retry.
                    if !self.advance_file()? {
                        return Ok(());
                    }
                    continue;
                }
            };

            let header = match EntryHeader::read_from_bytes(&header_bytes) {
                Ok(h) => h,
                Err(_) => {
                    // Corrupted header — skip rest of file.
                    if !self.advance_file()? {
                        return Ok(());
                    }
                    continue;
                }
            };

            if header.sequence() >= target_gsn {
                // current_offset already points at the matching entry.
                return Ok(());
            }

            // Compute the total on-disk size of this entry and step over it.
            let total = entry_size(self.key_len as usize, header.value_len);
            let next_offset = self.current_offset + total;

            if next_offset >= self.current_file_len {
                // Entry straddles or reaches the end of the file — advance.
                if !self.advance_file()? {
                    return Ok(());
                }
            } else {
                self.current_offset = next_offset;
                // Invalidate the read-ahead buffer when we jump forward so
                // that the next peek_bytes_from re-fills from the new offset.
                if self.current_offset >= self.read_buf_offset + self.read_buf_len as u64 {
                    self.read_buf_len = 0;
                }
            }
        }
    }

    fn open_file(&mut self, idx: usize) -> DbResult<()> {
        let fid = self.file_ids[idx];
        let path = self.shard_dir.join(format!("{fid:06}.data"));
        let file = File::open(&path)?;
        let file_len = file.metadata()?.len();
        self.current_file_idx = idx;
        self.current_file = Some(file);
        self.current_offset = 0;
        self.current_file_len = file_len;
        self.read_buf_len = 0;
        self.read_buf_offset = 0;

        // When encryption is enabled, open the corresponding .tags file lazily
        // here so that fill_read_buf can call pread_value_encrypted.
        #[cfg(feature = "encryption")]
        if self.cipher.is_some() {
            let tag_path = crate::io::tags::tags_path_for_data(&path);
            self.current_tag_file = Some(crate::io::tags::TagFile::open_read(&tag_path)?);
        }

        Ok(())
    }

    fn advance_file(&mut self) -> DbResult<bool> {
        let next_idx = self.current_file_idx + 1;
        if next_idx >= self.file_ids.len() {
            self.current_file = None;
            return Ok(false);
        }
        self.open_file(next_idx)?;
        Ok(true)
    }

    /// Read `len` bytes from current_offset, advancing the position.
    fn read_bytes(&mut self, len: usize) -> DbResult<Option<Vec<u8>>> {
        self.read_bytes_from(self.current_offset, len)
    }

    /// Read `len` bytes from a specific offset without advancing position.
    fn peek_bytes_from(&mut self, offset: u64, len: usize) -> DbResult<Option<Vec<u8>>> {
        if offset + len as u64 > self.current_file_len {
            return Ok(None);
        }

        // Check read-ahead buffer
        if offset >= self.read_buf_offset
            && offset + len as u64 <= self.read_buf_offset + self.read_buf_len as u64
        {
            let start = (offset - self.read_buf_offset) as usize;
            return Ok(Some(self.read_buf[start..start + len].to_vec()));
        }

        // Refill read-ahead buffer
        self.fill_read_buf(offset)?;

        // Re-check the cache-hit condition: the encrypted refill path aligns
        // `read_buf_offset` down to a 4096-byte page boundary, so the requested
        // `offset` may sit *inside* the buffer rather than at index 0.  We
        // must slice from `(offset - read_buf_offset)`, not from 0, otherwise
        // any entry whose file offset is not page-aligned will be read from
        // the wrong position after a cold miss.
        if offset >= self.read_buf_offset
            && offset + len as u64 <= self.read_buf_offset + self.read_buf_len as u64
        {
            let start = (offset - self.read_buf_offset) as usize;
            return Ok(Some(self.read_buf[start..start + len].to_vec()));
        }

        Ok(None)
    }

    /// Read `len` bytes from a specific offset.
    fn read_bytes_from(&mut self, offset: u64, len: usize) -> DbResult<Option<Vec<u8>>> {
        self.peek_bytes_from(offset, len)
    }

    fn fill_read_buf(&mut self, offset: u64) -> DbResult<()> {
        // Encryption path: clone Arc<PageCipher> (cheap) so that the immutable
        // borrows of `self.cipher` and `self.current_tag_file` end before we
        // write to `self.read_buf`, satisfying the borrow checker.
        #[cfg(feature = "encryption")]
        if let Some(cipher) = self.cipher.clone()
            && self.current_tag_file.is_some()
        {
            return self.fill_read_buf_encrypted(offset, cipher);
        }

        self.fill_read_buf_plain(offset)
    }

    /// Plaintext read-ahead: sequential seek + read into `read_buf`.
    fn fill_read_buf_plain(&mut self, offset: u64) -> DbResult<()> {
        use std::io::{Read, Seek, SeekFrom};

        let file = match self.current_file.as_mut() {
            Some(f) => f,
            None => return Ok(()),
        };

        let remaining = self.current_file_len.saturating_sub(offset) as usize;
        let to_read = remaining.min(READ_AHEAD_SIZE);
        if to_read == 0 {
            self.read_buf_len = 0;
            return Ok(());
        }

        file.seek(SeekFrom::Start(offset))?;
        self.read_buf_len = file.read(&mut self.read_buf[..to_read])?;
        self.read_buf_offset = offset;
        Ok(())
    }

    /// Encryption-aware read-ahead: align to 4096-byte page boundaries, call
    /// `pread_value_encrypted` which decrypts each page, then store the
    /// resulting plaintext in `read_buf`.
    ///
    /// The read window is READ_AHEAD_SIZE (64 KiB = 16 × 4096) aligned to the
    /// page boundary containing `offset`. `cipher` is an already-cloned `Arc`
    /// so no borrow of `self.cipher` remains when we later write `self.read_buf`.
    #[cfg(feature = "encryption")]
    fn fill_read_buf_encrypted(
        &mut self,
        offset: u64,
        cipher: Arc<crate::crypto::PageCipher>,
    ) -> DbResult<()> {
        const PAGE_SIZE: u64 = 4096;

        // Align the start to the page boundary containing `offset`.
        let aligned_offset = offset & !(PAGE_SIZE - 1);
        let file_id = self.file_ids[self.current_file_idx];
        let file_len = self.current_file_len;

        let remaining = file_len.saturating_sub(aligned_offset) as usize;
        if remaining == 0 {
            self.read_buf_len = 0;
            return Ok(());
        }

        // Round up read length to full pages (up to READ_AHEAD_SIZE).
        // Encrypted shards always write full pages, but we cap at `remaining`
        // in case the last file isn't padded (e.g. plaintext migration).
        let to_read_raw = remaining.min(READ_AHEAD_SIZE);
        let to_read =
            ((to_read_raw + PAGE_SIZE as usize - 1) & !(PAGE_SIZE as usize - 1)).min(remaining);
        let to_read = to_read.max(PAGE_SIZE as usize).min(remaining);

        // Borrow `file` and `tag_file` in a nested scope; both borrows end
        // before we write to `self.read_buf`.
        let plaintext = {
            let file = match self.current_file.as_ref() {
                Some(f) => f,
                None => return Ok(()),
            };
            let tag_file = match self.current_tag_file.as_ref() {
                Some(t) => t,
                None => return self.fill_read_buf_plain(offset),
            };
            crate::io::direct::pread_value_encrypted(
                file,
                tag_file,
                &cipher,
                file_id,
                aligned_offset,
                to_read,
            )?
        };

        let copy_len = plaintext.len().min(READ_AHEAD_SIZE);
        self.read_buf[..copy_len].copy_from_slice(&plaintext[..copy_len]);
        self.read_buf_len = copy_len;
        self.read_buf_offset = aligned_offset;
        Ok(())
    }
}

fn scan_data_files(dir: &Path) -> DbResult<Vec<u32>> {
    let mut file_ids: Vec<u32> = Vec::new();
    if !dir.exists() {
        return Ok(file_ids);
    }
    for entry in fs::read_dir(dir)? {
        let entry = entry?;
        let name = entry.file_name();
        let name = name.to_string_lossy();
        if name.ends_with(".data")
            && let Ok(id) = name.trim_end_matches(".data").parse::<u32>()
        {
            file_ids.push(id);
        }
    }
    file_ids.sort();
    Ok(file_ids)
}

#[cfg(test)]
mod tests {
    use std::io::Write;

    use super::*;
    use crate::entry::serialize_entry;

    /// Write a sequence of entries into a file, returning the file path.
    fn write_data_file(dir: &Path, file_id: u32, entries: &[(u64, &[u8], &[u8])]) {
        let path = dir.join(format!("{file_id:06}.data"));
        let mut f = std::fs::File::create(&path).unwrap();
        for &(gsn, key, value) in entries {
            let data = serialize_entry(gsn, key, value, false);
            f.write_all(&data).unwrap();
        }
    }

    /// Regression test for C6: cross-file rewind bug in `skip_until_gsn`.
    ///
    /// The old implementation saved `current_offset` before calling
    /// `next_entry()`.  When the entry at the end of file 0 caused
    /// `next_entry()` to call `advance_file()` — which resets
    /// `current_offset` to 0 for file 1 — the subsequent restore wrote the
    /// stale EOF offset of file 0 back into the new file's position pointer.
    /// The next `next_entry()` call then attempted to read from that stale
    /// offset in file 1, which lies past EOF, and returned `None` instead of
    /// the entry that was actually there.
    ///
    /// The peek-based rewrite never mutates `current_offset` during the scan,
    /// so the cross-file advance is handled cleanly.
    #[test]
    fn skip_until_gsn_cross_file_boundary() {
        let dir = tempfile::tempdir().unwrap();
        let shard_dir = dir.path().to_path_buf();

        // 8-byte keys so entry_size(8, 8) = 32 bytes each.
        let key0 = b"key00000";
        let key1 = b"key11111";
        let value = b"vvvvvvvv"; // 8 bytes

        // File 0 (id=0): GSN 1 and GSN 2.
        write_data_file(&shard_dir, 0, &[(1, key0, value), (2, key0, value)]);

        // File 1 (id=1): GSN 3 (the target) and GSN 4.
        write_data_file(&shard_dir, 1, &[(3, key1, value), (4, key1, value)]);

        // Seek to GSN 3 — this must cross the file boundary inside
        // skip_until_gsn without corrupting the offset.
        let mut reader = ShardLogReader::new(
            shard_dir,
            3,
            8,
            #[cfg(feature = "encryption")]
            None,
        )
        .unwrap();

        let entry = reader
            .next_entry()
            .unwrap()
            .expect("should find entry with GSN 3");
        assert_eq!(entry.gsn, 3, "first entry after seek must be GSN 3");
        assert_eq!(entry.file_id, 1, "entry must come from file 1");
        assert_eq!(entry.file_offset, 0, "entry must be at the start of file 1");

        // Confirm the reader is in a sane state: next entry is GSN 4.
        let entry2 = reader
            .next_entry()
            .unwrap()
            .expect("should find entry with GSN 4");
        assert_eq!(entry2.gsn, 4);
    }

    /// Sanity: seeking to a GSN that lives in the first file still works.
    #[test]
    fn skip_until_gsn_single_file() {
        let dir = tempfile::tempdir().unwrap();
        let shard_dir = dir.path().to_path_buf();

        let key = b"thekey00";
        let value = b"theval00";

        write_data_file(
            &shard_dir,
            0,
            &[(1, key, value), (2, key, value), (3, key, value)],
        );

        let mut reader = ShardLogReader::new(
            shard_dir,
            2,
            8,
            #[cfg(feature = "encryption")]
            None,
        )
        .unwrap();

        let entry = reader
            .next_entry()
            .unwrap()
            .expect("should find entry with GSN 2");
        assert_eq!(entry.gsn, 2);

        let entry2 = reader
            .next_entry()
            .unwrap()
            .expect("should find entry with GSN 3");
        assert_eq!(entry2.gsn, 3);

        assert!(reader.next_entry().unwrap().is_none(), "no more entries");
    }

    /// Verify that `ShardLogReader::new` with `cipher: None` compiles and
    /// functions identically to the old three-argument form. This guards
    /// against the cipher-parameter refactor accidentally breaking the
    /// plaintext path.
    #[test]
    fn plaintext_reader_with_none_cipher_smoke() {
        let dir = tempfile::tempdir().unwrap();
        let shard_dir = dir.path().to_path_buf();

        let key = b"smkeyyyy";
        let value = b"smvalyyy";

        write_data_file(&shard_dir, 0, &[(10, key, value), (11, key, value)]);

        let mut reader = ShardLogReader::new(
            shard_dir,
            0,
            8,
            #[cfg(feature = "encryption")]
            None,
        )
        .unwrap();

        let e1 = reader.next_entry().unwrap().expect("entry 10");
        assert_eq!(e1.gsn, 10);
        let e2 = reader.next_entry().unwrap().expect("entry 11");
        assert_eq!(e2.gsn, 11);
        assert!(reader.next_entry().unwrap().is_none());
    }

    /// Verify that ShardLogReader correctly decrypts entries written by an
    /// encrypted Shard, producing the same key+value bytes as were written.
    #[cfg(all(feature = "encryption", feature = "replication"))]
    #[test]
    fn encrypted_log_reader_decrypts_entries() {
        use std::sync::{Arc, atomic::AtomicU64};

        use crate::crypto::PageCipher;
        use crate::shard::Shard;

        let dir = tempfile::tempdir().unwrap();
        let gsn = Arc::new(AtomicU64::new(0));
        let raw_cipher = PageCipher::new(&[0xAB; 32]).expect("create cipher");
        let cipher = Arc::new(raw_cipher);

        // Open an encrypted shard. write_buf_size must be >= 4096 so at least
        // one complete page can be flushed.
        let shard = Shard::open_encrypted(
            0,
            dir.path(),
            1 << 20,
            64 * 1024,
            false,
            Some(cipher.clone()),
            gsn,
        )
        .expect("open encrypted shard");

        // Build a handful of serialized entries and append them via
        // `append_raw_entry` (the replication ingestion path).
        let key_len: u16 = 8;
        let key = b"testkey0";
        let value1 = b"value001";
        let value2 = b"value002";
        let value3 = b"value003";

        let entry1 = serialize_entry(1, key, value1, false);
        let entry2 = serialize_entry(2, key, value2, false);
        let entry3 = serialize_entry(3, key, value3, false);

        {
            let mut inner = shard.lock();
            inner
                .append_raw_entry(0, key_len, &entry1)
                .expect("append entry 1");
            inner
                .append_raw_entry(0, key_len, &entry2)
                .expect("append entry 2");
            inner
                .append_raw_entry(0, key_len, &entry3)
                .expect("append entry 3");
        }

        // flush_buf uses flush_write_buf which only flushes complete 4096-byte
        // pages. Use flush() (flush_write_buf_final + fsync) to ensure all
        // entries land on disk even if the buffer hasn't reached a full page.
        shard.flush().expect("flush shard to disk");

        // Now construct a ShardLogReader with the same cipher.
        let shard_dir = dir.path().to_path_buf();
        let mut reader =
            ShardLogReader::new(shard_dir, 0, key_len, Some(cipher)).expect("create log reader");

        // Collect all entries and verify.
        let mut entries = Vec::new();
        while let Some(e) = reader.next_entry().expect("read entry") {
            entries.push(e);
        }

        assert_eq!(
            entries.len(),
            3,
            "expected 3 entries, got {}",
            entries.len()
        );

        // Verify GSNs are in order.
        assert_eq!(entries[0].gsn, 1);
        assert_eq!(entries[1].gsn, 2);
        assert_eq!(entries[2].gsn, 3);

        // Verify key bytes in each entry (bytes 16..16+key_len).
        let kl = key_len as usize;
        for e in &entries {
            assert_eq!(&e.data[16..16 + kl], key, "key mismatch in entry {}", e.gsn);
        }

        // Verify value bytes in each entry.
        for (i, (e, expected_val)) in entries.iter().zip([value1, value2, value3]).enumerate() {
            let val_start = 16 + kl;
            let val_end = val_start + expected_val.len();
            assert_eq!(
                &e.data[val_start..val_end],
                expected_val,
                "value mismatch in entry {}",
                i + 1
            );
        }
    }

    /// Regression test for the `peek_bytes_from` alignment bug in the
    /// encrypted refill path.
    ///
    /// `fill_read_buf_encrypted` sets `read_buf_offset` to the 4096-byte page
    /// boundary at-or-below the requested offset. The old post-fill fallback
    /// in `peek_bytes_from` returned `read_buf[..len]` — i.e. data starting
    /// at `read_buf_offset`, not at the requested `offset`. This silently
    /// returned wrong bytes for any entry whose file offset was not
    /// page-aligned and which fell into a refill after the first one.
    ///
    /// The fix is to re-check the cache-hit slice with
    /// `start = offset - read_buf_offset`. This test writes enough entries
    /// to exceed READ_AHEAD_SIZE (64 KiB), forcing a second refill at a
    /// non-page-aligned offset. Before the fix, entries past the 64 KiB
    /// boundary either fail CRC or return wrong GSN bytes.
    #[cfg(all(feature = "encryption", feature = "replication"))]
    #[test]
    fn encrypted_log_reader_handles_non_page_aligned_refill() {
        use std::sync::{Arc, atomic::AtomicU64};

        use crate::crypto::PageCipher;
        use crate::shard::Shard;

        let dir = tempfile::tempdir().unwrap();
        let gsn = Arc::new(AtomicU64::new(0));
        let cipher = Arc::new(PageCipher::new(&[0xCD; 32]).expect("create cipher"));

        // max_file_size >> 64 KiB so all entries land in a single data file
        // and the reader has to refill at least twice from it.
        let shard = Shard::open_encrypted(
            0,
            dir.path(),
            1 << 24,    // 16 MiB
            256 * 1024, // 256 KiB write buffer
            false,
            Some(cipher.clone()),
            gsn,
        )
        .expect("open encrypted shard");

        let key_len: u16 = 8;
        let key: &[u8; 8] = b"regrkey0";
        // value_len = 8 → entry_size(8, 8) = (16 + 8 + 8) rounded up to 8 = 32 bytes
        // 32 bytes does divide 4096, so let's use a non-divisor value length.
        // entry_size(8, 16) = (16 + 8 + 16) = 40, padded to 40 — also a divisor of 8.
        // We want the entry size to NOT divide 4096 so successive entry offsets
        // drift away from page boundaries.  Use value_len = 21 →
        // entry_size(8, 21) = (16 + 8 + 21) = 45 rounded up to 48.
        // 4096 / 48 = 85.33 — not integral, good.
        let value_template: [u8; 21] = [0x77; 21];

        // Need total on-disk bytes > READ_AHEAD_SIZE (64 KiB = 65536).
        // 65536 / 48 ≈ 1366.  Use 2000 entries to be comfortably past the
        // first refill window.
        let num_entries: u64 = 2000;

        {
            let mut inner = shard.lock();
            for i in 1..=num_entries {
                let mut value = value_template;
                // Encode the GSN into the first 8 bytes of the value so we can
                // detect mis-aligned reads by content (the bug would surface
                // as a value bytestring shifted by the page-alignment delta).
                value[0..8].copy_from_slice(&i.to_le_bytes());
                let entry = serialize_entry(i, key, &value, false);
                inner
                    .append_raw_entry(0, key_len, &entry)
                    .expect("append entry");
            }
        }

        shard.flush().expect("flush shard to disk");

        let shard_dir = dir.path().to_path_buf();
        let mut reader = ShardLogReader::new(shard_dir, 0, key_len, Some(cipher))
            .expect("create encrypted log reader");

        let kl = key_len as usize;
        let mut count: u64 = 0;
        while let Some(e) = reader.next_entry().expect("read entry") {
            count += 1;
            // Expect entries in the order they were written, GSN 1..=num_entries.
            assert_eq!(e.gsn, count, "entry #{count} has wrong gsn {}", e.gsn);

            // Verify key matches.
            assert_eq!(&e.data[16..16 + kl], key, "key mismatch in entry {count}");

            // Verify value's first 8 bytes encode the GSN (i.e. the bytes are
            // not shifted by a 4096-alignment delta).
            let val_start = 16 + kl;
            let recovered_gsn = u64::from_le_bytes(
                e.data[val_start..val_start + 8]
                    .try_into()
                    .expect("8 bytes"),
            );
            assert_eq!(
                recovered_gsn, count,
                "value at entry #{count} carries wrong embedded gsn {recovered_gsn}"
            );
        }

        assert_eq!(
            count, num_entries,
            "expected {num_entries} entries, decoded {count}"
        );
    }
}