zebo 0.5.0 - Docs.rs

use std::{fs::File, io::Write, os::unix::fs::FileExt};

use tracing::{debug, error, instrument};

use crate::{Document, DocumentId, Result, Version, ZeboError, index::ProbableIndex};

pub const VERSION_OFFSET: u64 = 0;
pub const DOCUMENT_COUNT_LIMIT_OFFSET: u64 = VERSION_OFFSET + 1;
pub const DOCUMENT_COUNT_OFFSET: u64 = DOCUMENT_COUNT_LIMIT_OFFSET + 4;
pub const NEXT_AVAILABLE_OFFSET: u64 = DOCUMENT_COUNT_OFFSET + 4;
pub const NEXT_AVAILABLE_HEADER_OFFSET: u64 = NEXT_AVAILABLE_OFFSET + 4;
pub const STARTING_DOCUMENT_ID_OFFSET: u64 = NEXT_AVAILABLE_HEADER_OFFSET + 4;
pub const DOCUMENT_INDEX_OFFSET: u64 = STARTING_DOCUMENT_ID_OFFSET + 8;

// 1
// 12234556
// 3
// ......
// 0
// (0, 50, 2) // "ab"
// (u64::MAX, u32::MAX, u32::MAX) // "cd" // deleted
// (2, 54, 2) // "ef"
// (0, 0, 0) // not set yet
// (0, 0, 0) // not set yet
// (0, 0, 0) // not set yet
// (0, 0, 0) // not set yet
// (0, 0, 0) // not set yet
// (0, 0, 0) // not set yet
// (0, 0, 0) // not set yet
// ab
// cd
// ef

/// The page file is a file that contains the documents.
///
/// The file is structured as follows:
/// | Structure | content
/// |--|--
/// | `1 bytes` | version
/// | `4 bytes` | document limit
/// | `4 bytes` | number of documents
/// | `4 bytes` | next available offset
/// | `4 bytes` | next available header offset
/// | `8 bytes` | starting document id
/// | `(8 bytes, 4 bytes, 4 bytes) * MAX_DOC_PER_PAGE` | doc_id.as_u64(), starting offset, bytes length
/// | `variable length`  | documents
pub struct ZeboPage {
    document_limit: u32,
    #[allow(dead_code)]
    pub(crate) starting_document_id: u64,
    pub(crate) page_file: std::fs::File,
    pub(crate) next_available_header_offset: u32,
}

impl ZeboPage {
    pub fn try_new(
        document_limit: u32,
        starting_document_id: u64,
        mut page_file: std::fs::File,
    ) -> Result<Self> {
        // 8 bytes: doc_id.as_u64()
        // 4 bytes: starting offset
        // 4 bytes: bytes length
        let document_header_size = (4 + 4 + 8) * (document_limit as u64);
        // We shrink the file to contain at least the document header
        // this because we store documents *after* the header
        page_file
            .set_len(DOCUMENT_INDEX_OFFSET + document_header_size)
            .map_err(ZeboError::OperationError)?;

        // Version on first byte
        page_file
            .write_all_at(&[Version::V1.into()], VERSION_OFFSET)
            .map_err(ZeboError::OperationError)?;
        // document count limit
        page_file
            .write_all_at(&document_limit.to_be_bytes(), DOCUMENT_COUNT_LIMIT_OFFSET)
            .map_err(ZeboError::OperationError)?;
        // Number of documents
        page_file
            .write_all_at(&[0; 4], DOCUMENT_COUNT_OFFSET)
            .map_err(ZeboError::OperationError)?;
        // Next available offset
        let initial_available_offset = (DOCUMENT_INDEX_OFFSET + document_header_size) as u32;
        page_file
            .write_all_at(
                &initial_available_offset.to_be_bytes(),
                NEXT_AVAILABLE_OFFSET,
            )
            .map_err(ZeboError::OperationError)?;
        // Starting document id
        page_file
            .write_all_at(
                &starting_document_id.to_be_bytes(),
                STARTING_DOCUMENT_ID_OFFSET,
            )
            .map_err(ZeboError::OperationError)?;

        page_file.flush().map_err(ZeboError::OperationError)?;
        page_file.sync_all().map_err(ZeboError::OperationError)?;

        let s = Self {
            document_limit,
            starting_document_id,
            page_file,
            next_available_header_offset: 0,
        };

        Ok(s)
    }

    pub fn try_load(page_file: std::fs::File) -> Result<Self> {
        let mut buf = [0; 1];
        page_file
            .read_exact_at(&mut buf, VERSION_OFFSET)
            .map_err(ZeboError::OperationError)?;
        let version = buf[0];

        if version != Version::V1.into() {
            return Err(ZeboError::UnsupportedVersion {
                version,
                wanted: Version::V1.into(),
            });
        }

        let mut buf = [0; 4];
        page_file
            .read_exact_at(&mut buf, DOCUMENT_COUNT_LIMIT_OFFSET)
            .map_err(ZeboError::OperationError)?;
        let document_limit = u32::from_be_bytes(buf);

        page_file
            .read_exact_at(&mut buf, NEXT_AVAILABLE_HEADER_OFFSET)
            .map_err(ZeboError::OperationError)?;
        let next_available_header_offset = u32::from_be_bytes(buf);

        let mut buf = [0; 8];
        page_file
            .read_exact_at(&mut buf, STARTING_DOCUMENT_ID_OFFSET)
            .map_err(ZeboError::OperationError)?;
        let starting_document_id = u64::from_be_bytes(buf);

        Ok(Self {
            page_file,
            document_limit,
            starting_document_id,
            next_available_header_offset,
        })
    }

    pub fn get_document_count(&self) -> Result<u32> {
        let mut buf = [0; 4];
        self.page_file
            .read_exact_at(&mut buf, DOCUMENT_COUNT_OFFSET)
            .map_err(ZeboError::OperationError)?;
        let document_count = u32::from_be_bytes(buf);

        Ok(document_count)
    }

    fn get_next_available_document_offset(&self) -> Result<u32> {
        let mut buf = [0; 4];
        self.page_file
            .read_exact_at(&mut buf, NEXT_AVAILABLE_OFFSET)
            .map_err(ZeboError::OperationError)?;
        let next_available_offset = u32::from_be_bytes(buf);

        Ok(next_available_offset)
    }

    /// Helper function to detect if a document entry represents a deletion.
    /// Supports both old format (doc_id=u64::MAX) and new format (preserved doc_id).
    #[inline]
    fn is_deleted(doc_id: u64, document_offset: u32, document_len: u32) -> bool {
        // Old deletion format: doc_id = u64::MAX, offset = u32::MAX, length = u32::MAX
        // New deletion format: doc_id = preserved, offset = u32::MAX, length = u32::MAX

        if document_offset == u32::MAX && document_len == u32::MAX {
            // Either old format (doc_id also u64::MAX) or new format (doc_id preserved)
            return true;
        }

        // Legacy check: old format specifically set doc_id to u64::MAX
        if doc_id == u64::MAX && document_offset == u32::MAX {
            return true;
        }

        false
    }

    /// Helper function to detect if a document entry represents an uninitialized slot.
    /// Returns true if the document offset is 0, indicating this header slot
    /// has never been written to (different from deleted entries which use u32::MAX).
    #[inline]
    fn is_uninitialized_entry(document_offset: u32) -> bool {
        document_offset == 0
    }

    pub fn get_header(&self) -> Result<ZeboPageHeader> {
        let document_count = self.get_document_count()?;
        let next_available_document_offset = self.get_next_available_document_offset()?;

        let mut doc_index = Vec::with_capacity(document_count as usize);
        let mut found = 0;
        let mut i: u64 = 0;
        while found < document_count {
            if i > (self.document_limit as u64) {
                break;
            }

            if let Some((doc_id, document_offset, document_len)) = self.get_at(i)? {
                // Reached uninitialized entries
                if Self::is_uninitialized_entry(document_offset) {
                    break;
                }
                // this document is deleted
                if Self::is_deleted(doc_id, document_offset, document_len) {
                    i += 1;
                    continue;
                }

                doc_index.push((doc_id, document_offset, document_len));
                found += 1;
            }

            i += 1;
        }

        let header = ZeboPageHeader {
            document_limit: self.document_limit,
            document_count,
            next_available_document_offset,
            next_available_header_offset: self.next_available_header_offset,
            index: doc_index,
        };

        Ok(header)
    }

    pub fn get_documents<DocId: DocumentId>(
        &self,
        doc_id_with_index: &[(u64, ProbableIndex)],
    ) -> Result<Vec<(DocId, Vec<u8>)>> {
        let mut r = Vec::with_capacity(doc_id_with_index.len());

        for (doc_id, probable_index) in doc_id_with_index {
            // Try to get data at probable index if it's within bounds
            let mut probable_index = *probable_index;

            if probable_index.0 >= self.next_available_header_offset as u64 {
                probable_index.0 = self.next_available_header_offset.saturating_sub(1) as u64;
            }

            let mut buff = [0; 16];
            self.page_file
                .read_exact_at(
                    &mut buff,
                    DOCUMENT_INDEX_OFFSET + (probable_index.0 * (4 + 4 + 8)),
                )
                .unwrap();
            let found_id = u64::from_be_bytes([
                buff[0], buff[1], buff[2], buff[3], buff[4], buff[5], buff[6], buff[7],
            ]);
            let document_offset = u32::from_be_bytes([buff[8], buff[9], buff[10], buff[11]]);
            let document_len = u32::from_be_bytes([buff[12], buff[13], buff[14], buff[15]]);

            if &found_id == doc_id {
                if Self::is_uninitialized_entry(document_offset)
                    || Self::is_deleted(found_id, document_offset, document_len)
                {
                    continue; // Found but deleted or uninitialized (supports both old and new deletion formats)
                } else {
                    // Found the document at the probable index location
                    let mut doc_buf = vec![0; document_len as usize];
                    // document_len == 0 is an edge but valid case.
                    // It means that the document is empty.
                    // In this case, we don't need to read the document from the file
                    if document_len > 0 {
                        self.page_file
                            .read_exact_at(&mut doc_buf, document_offset as u64)
                            .map_err(ZeboError::OperationError)?;
                    }

                    debug!("Found with probable index");
                    r.push((DocId::from_u64(*doc_id), doc_buf));
                    // Next document
                    continue;
                }
            }

            // We need to apply a fallback logic

            let mut probable_index = if *doc_id > found_id {
                // The wanted document is after the found document
                // We know it because the document IDs are in ascending order
                let delta = doc_id - found_id;
                ProbableIndex(probable_index.0 + delta)
            } else {
                // The wanted document is before the found document
                // We know it because the document IDs are in ascending order
                let delta = found_id - doc_id;
                ProbableIndex(probable_index.0.saturating_sub(delta))
            };

            if probable_index.0 >= self.next_available_header_offset as u64 {
                probable_index.0 = self.next_available_header_offset.saturating_sub(1) as u64;
            }

            self.page_file
                .read_exact_at(
                    &mut buff,
                    DOCUMENT_INDEX_OFFSET + (probable_index.0 * (4 + 4 + 8)),
                )
                .unwrap();
            let new_found_id = u64::from_be_bytes([
                buff[0], buff[1], buff[2], buff[3], buff[4], buff[5], buff[6], buff[7],
            ]);
            let new_document_offset = u32::from_be_bytes([buff[8], buff[9], buff[10], buff[11]]);
            let new_document_len = u32::from_be_bytes([buff[12], buff[13], buff[14], buff[15]]);

            let a = self.fallback_search_document(
                *doc_id,
                Some((
                    probable_index.0,
                    (new_found_id, new_document_offset, new_document_len),
                )),
            );

            if let Ok(Some((_, document_offset, document_len))) = a {
                let mut doc_buf = vec![0; document_len as usize];
                // document_len == 0 is an edge but valid case.
                // It means that the document is empty.
                // In this case, we don't need to read the document from the file
                if document_len > 0 {
                    self.page_file
                        .read_exact_at(&mut doc_buf, document_offset as u64)
                        .map_err(ZeboError::OperationError)?;
                }

                r.push((DocId::from_u64(*doc_id), doc_buf));
                continue;
            } else if let Err(e) = a {
                error!("Error during fallback search: {:?}", e);
                continue;
            }
        }

        Ok(r)
    }

    pub fn reserve<'docs, DocId: DocumentId, Doc: Document>(
        &mut self,
        documents: &'docs [(DocId, Doc)],
    ) -> Result<ZeboPageReservedSpace<'docs, DocId, Doc>> {
        let mut buf = [0; 4];
        self.page_file
            .read_exact_at(&mut buf, NEXT_AVAILABLE_OFFSET)
            .map_err(ZeboError::OperationError)?;
        // Available offsets are stored near to each other
        // So we can read them together
        let next_available_offset = u32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]);
        self.page_file
            .read_exact_at(&mut buf, NEXT_AVAILABLE_HEADER_OFFSET)
            .map_err(ZeboError::OperationError)?;
        let next_available_header_offset = u32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]);

        let document_count = documents.len() as u32;
        let documents_size = documents.iter().map(|(_, doc)| doc.len()).sum::<usize>() as u32;

        let new_next_available_offset = next_available_offset + documents_size;
        let new_next_available_header_offset = next_available_header_offset + document_count;

        self.next_available_header_offset = new_next_available_header_offset;

        let mut buf = [0; 8];
        buf[0..4].copy_from_slice(&new_next_available_offset.to_be_bytes());
        self.page_file
            .write_at(&buf, NEXT_AVAILABLE_OFFSET)
            .map_err(ZeboError::OperationError)?;
        buf[0..4].copy_from_slice(&new_next_available_header_offset.to_be_bytes());
        self.page_file
            .write_at(&buf, NEXT_AVAILABLE_HEADER_OFFSET)
            .map_err(ZeboError::OperationError)?;

        self.page_file
            .read_exact_at(&mut buf, DOCUMENT_COUNT_OFFSET)
            .map_err(ZeboError::OperationError)?;
        let current_document_count = u32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]);
        let new_document_count = current_document_count + document_count;
        self.page_file
            .write_at(&new_document_count.to_be_bytes(), DOCUMENT_COUNT_OFFSET)
            .map_err(ZeboError::OperationError)?;

        self.page_file.flush().map_err(ZeboError::OperationError)?;
        self.page_file
            .sync_all()
            .map_err(ZeboError::OperationError)?;

        let file = self
            .page_file
            .try_clone()
            .map_err(ZeboError::OperationError)?;

        Ok(ZeboPageReservedSpace {
            file,
            next_available_offset,
            next_available_header_offset,
            documents,
        })
    }

    pub fn delete_documents(
        &mut self,
        documents_to_delete: &[(u64, ProbableIndex)],
        clean_data: bool,
    ) -> Result<u32> {
        // Should we sort documents_to_delete?
        // I have no idea if accessing to the page in a random way is slower than
        // ordered way. Probably yes.
        // TODO: make some tests

        // fill with 0 if requested
        if clean_data {
            let header = self.get_header()?;

            // Allocate the buffer only once
            let mut v: Vec<u8> = vec![];
            for (doc_id, _) in documents_to_delete {
                let found = header.index.iter().find(|(d, _, _)| d == doc_id);
                if let Some((_, document_offset, document_len)) = found {
                    let len = *document_len as usize;
                    if v.len() < len {
                        // Expand the vector if needed
                        v.resize(len, 0);
                    }

                    self.page_file
                        .write_all_at(&v[0..len], *document_offset as u64)
                        .map_err(ZeboError::OperationError)?;
                }
            }
        }

        let mut found = 0_u32;
        let mut buf = [0; 16];
        // Iterate over the header and erase the index
        // High inefficiency, but we don't care
        for i in 0..self.document_limit {
            let (doc_id, offset, _) = match self.get_at(i as u64)? {
                Some(x) => x,
                None => continue,
            };
            // No data in the header - uninitialized entry
            if Self::is_uninitialized_entry(offset) {
                continue;
            }
            // This document is already deleted
            if offset == u32::MAX {
                continue;
            }
            if documents_to_delete.iter().any(|(d, _)| *d == doc_id) {
                // Keep the document ID but mark offset and length as deleted
                buf[0..8].copy_from_slice(&doc_id.to_be_bytes());
                buf[8..12].copy_from_slice(&u32::MAX.to_be_bytes());
                buf[12..16].copy_from_slice(&u32::MAX.to_be_bytes());
                self.page_file
                    .write_all_at(&buf, DOCUMENT_INDEX_OFFSET + (i * (4 + 4 + 8)) as u64)
                    .map_err(ZeboError::OperationError)?;

                found += 1;
            }
        }

        if found > 0 {
            // Update the document count
            let document_count = self.get_document_count()?;
            let new_document_count = document_count - found;
            self.page_file
                .write_all_at(&new_document_count.to_be_bytes(), DOCUMENT_COUNT_OFFSET)
                .map_err(ZeboError::OperationError)?;
        }

        self.page_file.flush().map_err(ZeboError::OperationError)?;
        self.page_file
            .sync_all()
            .map_err(ZeboError::OperationError)?;

        Ok(found)
    }

    fn get_at(&self, document_index: u64) -> Result<Option<(u64, u32, u32)>> {
        if (self.document_limit as u64) < document_index {
            return Ok(None);
        }

        let mut buf = [0; 16];
        if let Err(e) = self.page_file.read_exact_at(
            &mut buf,
            DOCUMENT_INDEX_OFFSET + (document_index * (4 + 4 + 8)),
        ) {
            if e.kind() == std::io::ErrorKind::UnexpectedEof {
                // Reached the end of the file
                return Ok(None);
            }
            return Err(ZeboError::OperationError(e));
        }

        let doc_id = u64::from_be_bytes([
            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
        ]);

        let document_offset = u32::from_be_bytes([buf[8], buf[9], buf[10], buf[11]]);
        let document_len = u32::from_be_bytes([buf[12], buf[13], buf[14], buf[15]]);

        Ok(Some((doc_id, document_offset, document_len)))
    }

    /// Fallback document search algorithm for when probable index lookup fails
    ///
    /// PERFORMANCE NOTE: This algorithm performs a linear search through the page index,
    /// which can be slow for large pages with many deleted entries. However, this fix
    /// ensures CORRECTNESS by properly handling deleted entries.
    ///
    /// BUG FIX: Previous implementation would incorrectly terminate the search when
    /// encountering a deleted entry with the target doc_id, even if a valid (non-deleted)
    /// entry with the same doc_id existed elsewhere in the page. This fix ensures we
    /// skip over deleted entries and continue searching until we find the actual document.
    #[instrument(skip(self, hint_data), fields(target_doc_id = target_doc_id))]
    fn fallback_search_document(
        &self,
        target_doc_id: u64,
        hint_data: Option<(u64, (u64, u32, u32))>,
    ) -> Result<Option<(u64, u32, u32)>> {
        let (starting_index, starting_doc_id) = if let Some((index, (doc_id, offset, len))) =
            hint_data
        {
            // With a hint, we can start our search from a more informed position
            // case 1.
            //     Hint document ID matches target -> return immediately (if not deleted)
            // case 2.
            //     Calculate most probable index based on hint position and ID distance
            //     If probable index has valid data, use it as starting point
            //     Otherwise fall back to using the hint index as starting point

            if doc_id == target_doc_id {
                if Self::is_uninitialized_entry(offset) || Self::is_deleted(doc_id, offset, len) {
                    // Found but uninitialized or deleted (supports both old and new deletion formats)
                    return Ok(None);
                }

                debug!("Found with hint");
                return Ok(Some((doc_id, offset, len)));
            }

            let document_count = self.next_available_header_offset as u64;
            if document_count == 0 {
                return Ok(None);
            }

            let most_probable_index = if doc_id < target_doc_id {
                (target_doc_id - doc_id + index).min(document_count - 1)
            } else {
                index.saturating_sub(doc_id - target_doc_id)
            };

            match self.get_at(most_probable_index)? {
                None => {
                    // No data at the most probable index, so we start from the hint
                    (index, doc_id)
                }
                Some((found_doc_id, document_offset, document_len)) => {
                    if found_doc_id == target_doc_id {
                        if Self::is_uninitialized_entry(document_offset)
                            || Self::is_deleted(found_doc_id, document_offset, document_len)
                        {
                            // Found but uninitialized or deleted (supports both old and new deletion formats)
                            return Ok(None);
                        }

                        debug!("Found with delta hint");
                        return Ok(Some((found_doc_id, document_offset, document_len)));
                    }

                    (most_probable_index, found_doc_id)
                }
            }
        } else {
            // Without a hint, we need to decide from where to start searching
            // case 1.
            //     No document -> return None
            // case 2.
            //     Document count = 1 -> start from initial = 0
            // case 3.
            //     Get the last inserted document id
            //     Compare the distance between the first and last document ids
            //     Choose the closest one as the starting point

            let document_count = self.next_available_header_offset;
            if document_count == 0 {
                return Ok(None);
            }

            let first_doc_id = self.starting_document_id;

            if document_count == 1 {
                (0, first_doc_id)
            } else {
                let last_index = (document_count - 1) as u64;
                match self.get_at(last_index)? {
                    None => (0, first_doc_id),
                    Some((last_doc_id, _, _)) => {
                        let distance_from_first = target_doc_id.abs_diff(first_doc_id);
                        let distance_from_last = target_doc_id.abs_diff(last_doc_id);
                        if distance_from_last < distance_from_first {
                            (last_index, last_doc_id)
                        } else {
                            (0, first_doc_id)
                        }
                    }
                }
            }
        };

        let delta: i32 = if starting_doc_id < target_doc_id {
            1
        } else {
            -1
        };

        let mut tries = 0;
        let mut current_index = starting_index;
        loop {
            tries += 1;
            match self.get_at(current_index)? {
                None => {
                    // Reached the end of the index without finding the document
                    return self.find_in_range(target_doc_id, starting_index, 50);
                }
                Some((doc_id, document_offset, document_len)) => {
                    // CRITICAL FIX: Check if this entry is deleted/uninitialized BEFORE checking doc_id
                    // This prevents the algorithm from incorrectly stopping when it encounters
                    // a deleted entry that happens to have the target doc_id
                    if Self::is_uninitialized_entry(document_offset)
                        || Self::is_deleted(doc_id, document_offset, document_len)
                    {
                        // Skip deleted/uninitialized entries and continue searching
                        // We don't change direction here - just move to the next index
                        let temp_current_index = current_index as i128 + delta as i128;
                        if temp_current_index < 0 {
                            break;
                        }
                        current_index = temp_current_index as u64;
                        continue;
                    }

                    // Found a valid (non-deleted) entry - check if it matches our target
                    if doc_id == target_doc_id {
                        debug!(tries = ?tries, "Found after some retries");

                        return Ok(Some((doc_id, document_offset, document_len)));
                    }

                    // Document ID doesn't match - determine search direction based on comparison
                    let current_delta = if doc_id < target_doc_id { 1 } else { -1 };
                    if current_delta != delta {
                        // We have passed the target document in the sorted order
                        // This means the target doesn't exist in this page
                        // Anyway, we do a final range search around the starting index
                        return self.find_in_range(target_doc_id, starting_index, 50);
                    }

                    let temp_current_index = current_index as i128 + current_delta as i128;
                    if temp_current_index < 0 {
                        break;
                    }

                    current_index = temp_current_index as u64;
                }
            }
        }

        // Document not found
        Ok(None)
    }

    fn find_in_range(
        &self,
        target_doc_id: u64,
        index: u64,
        delta: u64,
    ) -> Result<Option<(u64, u32, u32)>> {
        let starting_index = index.saturating_sub(delta);
        let ending_index = index + delta;

        let mut buf = [0; 16];
        for i in starting_index..=ending_index {
            match self
                .page_file
                .read_exact_at(&mut buf, DOCUMENT_INDEX_OFFSET + (i * (4 + 4 + 8)))
            {
                Ok(_) => {}
                Err(e) => {
                    if e.kind() == std::io::ErrorKind::UnexpectedEof {
                        // Reached the end of the file
                        break;
                    }
                    return Err(ZeboError::OperationError(e));
                }
            };
            let doc_id = u64::from_be_bytes([
                buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
            ]);
            let document_offset = u32::from_be_bytes([buf[8], buf[9], buf[10], buf[11]]);
            let document_len = u32::from_be_bytes([buf[12], buf[13], buf[14], buf[15]]);

            if doc_id == target_doc_id {
                if Self::is_uninitialized_entry(document_offset)
                    || Self::is_deleted(doc_id, document_offset, document_len)
                {
                    return Ok(None);
                }

                debug!("Found in range search");
                return Ok(Some((doc_id, document_offset, document_len)));
            }
        }

        self.find_from_start(target_doc_id)
    }

    fn find_from_start(&self, target_doc_id: u64) -> Result<Option<(u64, u32, u32)>> {
        let mut buf = [0; 16];
        for i in 0..=1179623 {
            match self
                .page_file
                .read_exact_at(&mut buf, DOCUMENT_INDEX_OFFSET + (i * (4 + 4 + 8)))
            {
                Ok(_) => {
                    let doc_id = u64::from_be_bytes([
                        buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
                    ]);
                    let document_offset = u32::from_be_bytes([buf[8], buf[9], buf[10], buf[11]]);
                    let document_len = u32::from_be_bytes([buf[12], buf[13], buf[14], buf[15]]);
                    if doc_id == target_doc_id {
                        if Self::is_uninitialized_entry(document_offset)
                            || Self::is_deleted(doc_id, document_offset, document_len)
                        {
                            return Ok(None);
                        } else {
                            return Ok(Some((doc_id, document_offset, document_len)));
                        }
                    }
                }
                Err(e) => {
                    if e.kind() == std::io::ErrorKind::UnexpectedEof {
                        // Reached the end of the file
                        break;
                    }
                    return Err(ZeboError::OperationError(e));
                }
            }
        }

        Ok(None)
    }

    pub fn close(&mut self) -> Result<()> {
        self.page_file.flush().map_err(ZeboError::OperationError)?;
        self.page_file
            .sync_all()
            .map_err(ZeboError::OperationError)?;

        Ok(())
    }

    #[cfg_attr(coverage_nightly, coverage(off))]
    pub fn debug_content_with_options(
        &self,
        writer: &mut dyn std::io::Write,
        skip_content_checks: bool,
        skip_document_content: bool,
        skip_header_info: bool,
        wanted_doc_id: Option<u64>,
        starting_doc_id: Option<u64>,
    ) -> Result<()> {
        let mut buf = [0; 1];
        self.page_file
            .read_exact_at(&mut buf, VERSION_OFFSET)
            .unwrap();

        let version = u8::from_be_bytes(buf);
        writeln!(writer, "Version: {version}").unwrap();

        let mut buf = [0; 4];
        self.page_file
            .read_exact_at(&mut buf, DOCUMENT_COUNT_LIMIT_OFFSET)
            .unwrap();

        let document_limit = u32::from_be_bytes(buf);
        writeln!(writer, "Document Limit: {document_limit}").unwrap();

        let mut buf = [0; 4];
        self.page_file
            .read_exact_at(&mut buf, DOCUMENT_COUNT_OFFSET)
            .unwrap();

        let document_count = u32::from_be_bytes(buf);
        writeln!(writer, "Document Count: {document_count}").unwrap();

        let mut buf = [0; 4];
        self.page_file
            .read_exact_at(&mut buf, NEXT_AVAILABLE_OFFSET)
            .unwrap();

        let next_available_offset = u32::from_be_bytes(buf);
        writeln!(writer, "Next Available Offset: {next_available_offset}").unwrap();

        let mut buf = [0; 4];
        self.page_file
            .read_exact_at(&mut buf, NEXT_AVAILABLE_HEADER_OFFSET)
            .unwrap();

        let next_available_header_offset = u32::from_be_bytes(buf);
        writeln!(
            writer,
            "Next Available Header Offset: {next_available_header_offset}"
        )
        .unwrap();

        let mut buf = [0; 8];
        self.page_file
            .read_exact_at(&mut buf, DOCUMENT_INDEX_OFFSET)
            .unwrap();

        let starting_document_id = u64::from_be_bytes(buf);
        writeln!(writer, "Starting Document ID: {starting_document_id}").unwrap();

        let mut offset = DOCUMENT_INDEX_OFFSET;
        let mut doc_id = [0; 8];
        let mut starting_offset = [0; 4];
        let mut bytes_length = [0; 4];
        let mut docs: Vec<u8> = vec![];
        let mut i = -1_i128;
        loop {
            i += 1;
            if i > self.next_available_header_offset as i128 {
                // Reach the end
                break;
            }

            self.page_file.read_exact_at(&mut doc_id, offset).unwrap();
            if doc_id == [0; 8] {
                break;
            }
            match self
                .page_file
                .read_exact_at(&mut starting_offset, offset + 8)
            {
                Ok(_) => {}
                Err(e) => {
                    if e.kind() == std::io::ErrorKind::UnexpectedEof {
                        break;
                    }
                    return Err(ZeboError::OperationError(e));
                }
            };
            match self.page_file.read_exact_at(&mut bytes_length, offset + 12) {
                Ok(_) => {}
                Err(e) => {
                    if e.kind() == std::io::ErrorKind::UnexpectedEof {
                        break;
                    }
                    return Err(ZeboError::OperationError(e));
                }
            };
            let doc_id = u64::from_be_bytes(doc_id);

            offset += 8 + 4 + 4;

            if let Some(wanted_doc_id) = wanted_doc_id
                && doc_id != wanted_doc_id
            {
                continue;
            }
            if let Some(starting_doc_id) = starting_doc_id
                && doc_id < starting_doc_id
            {
                continue;
            }

            let starting_offset = u32::from_be_bytes(starting_offset);
            let bytes_length = u32::from_be_bytes(bytes_length);

            if Self::is_uninitialized_entry(starting_offset) {
                break; // reach the end
            }

            if !skip_header_info {
                writeln!(
                    writer,
                    "# {i} - Document id: {doc_id}, starting_offset: {starting_offset}, bytes_length: {bytes_length}"
                )
                .unwrap();
            }

            if doc_id == u64::MAX {
                break; // reach the end
            }

            if bytes_length == u32::MAX || starting_offset == u32::MAX {
                if !skip_document_content {
                    writeln!(writer, "Document is deleted or uninitialized").unwrap();
                }
            } else {
                if docs.len() < bytes_length as usize {
                    docs.resize(bytes_length as usize, 0);
                }

                let slice = &mut docs[0..bytes_length as usize];

                if !skip_content_checks {
                    if let Err(e) = self.page_file.read_exact_at(slice, starting_offset as u64) {
                        if e.kind() == std::io::ErrorKind::UnexpectedEof {
                            writeln!(writer, "Document content: [incomplete data, expected length {bytes_length} bytes]").unwrap();
                            continue;
                        }
                        return Err(ZeboError::OperationError(e));
                    }
                    let probable_index = ProbableIndex(doc_id - starting_document_id);
                    let output = self
                        .get_documents::<u64>(&[(doc_id, probable_index)])
                        .unwrap();
                    assert_eq!(output.len(), 1, "Document id {doc_id} not found");
                    let (f_doc_id, f_content) = &output[0];
                    assert_eq!(*f_doc_id, doc_id, "Document id mismatch");
                    assert_eq!(*f_content, slice, "Document content mismatch");
                }

                if !skip_document_content {
                    self.page_file
                        .read_exact_at(slice, starting_offset as u64)
                        .unwrap();
                    match String::from_utf8(slice.to_vec()) {
                        Ok(s) => {
                            writeln!(writer, "{s}").unwrap();
                        }
                        Err(_) => {
                            writeln!(
                                writer,
                                "Document content: [binary data of {} bytes]",
                                slice.len()
                            )
                            .unwrap();
                        }
                    }
                }
            }
        }

        Ok(())
    }
}

#[cfg_attr(test, derive(Debug))]
pub struct ZeboPageReservedSpace<'docs, DocId, Doc> {
    file: File,
    next_available_offset: u32,
    next_available_header_offset: u32,
    documents: &'docs [(DocId, Doc)],
}

impl<'docs, DocId: DocumentId, Doc: Document> ZeboPageReservedSpace<'docs, DocId, Doc> {
    pub fn write_all(mut self) -> Result<()> {
        let mut all_document_bytes = vec![];
        let mut document_size_per_doc = Vec::with_capacity(self.documents.len() * 16);

        let mut document_offset = self.next_available_offset;

        for (doc_id, doc) in self.documents.iter() {
            let current_bytes_len = all_document_bytes.len();
            doc.as_bytes(&mut all_document_bytes);
            let doc_bytes_len = all_document_bytes.len() - current_bytes_len;
            let doc_bytes_len = doc_bytes_len as u32;

            let doc_id = doc_id.as_u64();
            let doc_id_bytes = doc_id.to_be_bytes();
            let document_offset_bytes = document_offset.to_be_bytes();
            let doc_bytes_len_bytes = doc_bytes_len.to_be_bytes();

            document_offset += doc_bytes_len;

            document_size_per_doc.extend_from_slice(&[
                doc_id_bytes[0],
                doc_id_bytes[1],
                doc_id_bytes[2],
                doc_id_bytes[3],
                doc_id_bytes[4],
                doc_id_bytes[5],
                doc_id_bytes[6],
                doc_id_bytes[7],
                document_offset_bytes[0],
                document_offset_bytes[1],
                document_offset_bytes[2],
                document_offset_bytes[3],
                doc_bytes_len_bytes[0],
                doc_bytes_len_bytes[1],
                doc_bytes_len_bytes[2],
                doc_bytes_len_bytes[3],
            ]);
        }

        self.file
            .write_all_at(
                &document_size_per_doc,
                DOCUMENT_INDEX_OFFSET + (self.next_available_header_offset * 16) as u64,
            )
            .map_err(ZeboError::OperationError)?;
        self.file
            .write_all_at(&all_document_bytes, self.next_available_offset as u64)
            .map_err(ZeboError::OperationError)?;

        self.file.flush().map_err(ZeboError::OperationError)?;
        self.file.sync_all().map_err(ZeboError::OperationError)?;

        Ok(())
    }
}

#[derive(Debug, PartialEq)]
pub struct ZeboPageHeader {
    pub document_limit: u32,
    pub document_count: u32,
    pub next_available_document_offset: u32,
    pub next_available_header_offset: u32,
    pub index: Vec<(u64, u32, u32)>,
}

#[cfg(test)]
mod tests {
    use crate::tests::prepare_test_dir;

    use super::*;

    #[test]
    fn test_zebo_page_check_internals_empty() {
        let test_dir = prepare_test_dir();

        let file_path = test_dir.join("page_0.zebo");
        let zebo_page_file = std::fs::File::options()
            .create(true)
            .truncate(false)
            // .append(true)
            .read(true)
            .write(true)
            .open(&file_path)
            .unwrap();
        let page = ZeboPage::try_new(2, 0, zebo_page_file).unwrap();

        assert_eq!(page.document_limit, 2);
        assert_eq!(page.get_document_count().unwrap(), 0);
        let header = page.get_header().unwrap();
        assert_eq!(header.document_limit, 2);
        assert_eq!(header.document_count, 0);
        assert_eq!(header.next_available_document_offset, 57);
        assert_eq!(header.index.len(), 0);

        drop(page);

        let file_content = std::fs::read(&file_path).unwrap();

        // Version
        assert_eq!(file_content[0], Version::V1.into());
        // Limit
        assert_eq!(
            u32::from_be_bytes([
                file_content[1],
                file_content[2],
                file_content[3],
                file_content[4]
            ]),
            2
        );
        // Document count
        assert_eq!(
            u32::from_be_bytes([
                file_content[5],
                file_content[6],
                file_content[7],
                file_content[8]
            ]),
            0
        );
        // Next available offset
        assert_eq!(
            u32::from_be_bytes([
                file_content[9],
                file_content[10],
                file_content[11],
                file_content[12]
            ]),
            DOCUMENT_INDEX_OFFSET as u32 + (4 + 4 + 8) * 2
        );

        // Document index is preallocated for all documents
        for i in 0..2 {
            let offset = (DOCUMENT_INDEX_OFFSET + (i * (4 + 4 + 8))) as usize;
            // Document id
            assert_eq!(
                u64::from_be_bytes([
                    file_content[offset],
                    file_content[offset + 1],
                    file_content[offset + 2],
                    file_content[offset + 3],
                    file_content[offset + 4],
                    file_content[offset + 5],
                    file_content[offset + 6],
                    file_content[offset + 7]
                ]),
                0
            );
            // Document offset
            assert_eq!(
                u32::from_be_bytes([
                    file_content[offset + 8],
                    file_content[offset + 9],
                    file_content[offset + 10],
                    file_content[offset + 11]
                ]),
                0
            );
            // Document length
            assert_eq!(
                u32::from_be_bytes([
                    file_content[offset + 12],
                    file_content[offset + 13],
                    file_content[offset + 14],
                    file_content[offset + 15]
                ]),
                0
            );
        }
    }

    #[test]
    fn test_zebo_page_check_internals_add_doc() {
        let test_dir = prepare_test_dir();

        let file_path = test_dir.join("page_0.zebo");
        let zebo_page_file = std::fs::File::options()
            .create(true)
            .truncate(false)
            // .append(true)
            .read(true)
            .write(true)
            .open(&file_path)
            .unwrap();
        let mut page = ZeboPage::try_new(2, 0, zebo_page_file).unwrap();

        assert_eq!(page.document_limit, 2);
        assert_eq!(page.get_document_count().unwrap(), 0);
        let header = page.get_header().unwrap();
        assert_eq!(header.document_limit, 2);
        assert_eq!(header.document_count, 0);
        assert_eq!(header.next_available_document_offset, 57);
        assert_eq!(header.index.len(), 0);

        page.reserve(&[(1_u64, "ab")]).unwrap().write_all().unwrap();
        drop(page);

        let file_content = std::fs::read(&file_path).unwrap();

        // Version
        assert_eq!(file_content[0], Version::V1.into());
        // Limit
        assert_eq!(
            u32::from_be_bytes([
                file_content[1],
                file_content[2],
                file_content[3],
                file_content[4]
            ]),
            2
        );
        // Document count
        assert_eq!(
            u32::from_be_bytes([
                file_content[5],
                file_content[6],
                file_content[7],
                file_content[8]
            ]),
            1
        );
        // Next available offset
        assert_eq!(
            u32::from_be_bytes([
                file_content[9],
                file_content[10],
                file_content[11],
                file_content[12]
            ]),
            DOCUMENT_INDEX_OFFSET as u32 + (4 + 4 + 8) * 2 + 2
        );

        // Document index is preallocated for all documents
        let i = 0;
        let offset = (DOCUMENT_INDEX_OFFSET + (i * (4 + 4 + 8))) as usize;
        // Document id
        assert_eq!(
            u64::from_be_bytes([
                file_content[offset],
                file_content[offset + 1],
                file_content[offset + 2],
                file_content[offset + 3],
                file_content[offset + 4],
                file_content[offset + 5],
                file_content[offset + 6],
                file_content[offset + 7]
            ]),
            1
        );
        // Document offset
        assert_eq!(
            u32::from_be_bytes([
                file_content[offset + 8],
                file_content[offset + 9],
                file_content[offset + 10],
                file_content[offset + 11]
            ]),
            57
        );
        // Document length
        assert_eq!(
            u32::from_be_bytes([
                file_content[offset + 12],
                file_content[offset + 13],
                file_content[offset + 14],
                file_content[offset + 15]
            ]),
            2
        );
    }

    #[test]
    fn test_zebo_page_check_internals_add_remove_add_doc() {
        let test_dir = prepare_test_dir();

        let file_path = test_dir.join("page_0.zebo");
        let zebo_page_file = std::fs::File::options()
            .create(true)
            .truncate(false)
            // .append(true)
            .read(true)
            .write(true)
            .open(&file_path)
            .unwrap();
        let mut page = ZeboPage::try_new(10, 0, zebo_page_file).unwrap();

        page.reserve(&[(1_u32, "ab")]).unwrap().write_all().unwrap();

        page.reserve(&[(2_u32, "cd")]).unwrap().write_all().unwrap();

        page.reserve(&[(3_u32, "ef")]).unwrap().write_all().unwrap();

        page.delete_documents(&[(2, ProbableIndex(0))], true)
            .unwrap();

        page.reserve(&[(4_u32, "gh")]).unwrap().write_all().unwrap();

        drop(page);

        let file_content = std::fs::read(&file_path).unwrap();

        assert_eq!(
            &file_content[41..89],
            &[
                0, 0, 0, 0, 0, 0, 0, 2, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0,
                0, 3, 0, 0, 0, 189, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 191, 0, 0, 0, 2
            ]
        );
        assert_eq!(&file_content[89..185], &[0; 185 - 89],);
        assert_eq!(&file_content[185..], "ab\0\0efgh".as_bytes(),);
    }

    #[test]
    fn test_page_get_documents_with_gaps() {
        let test_dir = prepare_test_dir();

        let file_path = test_dir.join("page_0.zebo");
        let zebo_page_file = std::fs::File::options()
            .create(true)
            .truncate(false)
            .read(true)
            .write(true)
            .open(&file_path)
            .unwrap();
        let mut page = ZeboPage::try_new(10, 0, zebo_page_file).unwrap();

        // Add documents with gaps in IDs: 1, 5, 8, 12 (missing 2, 3, 4, 6, 7, 9, 10, 11, 13, 14, etc.)
        page.reserve(&[(1_u32, "a")]).unwrap().write_all().unwrap();

        page.reserve(&[(5_u32, "e")]).unwrap().write_all().unwrap();

        page.reserve(&[(8_u32, "h")]).unwrap().write_all().unwrap();

        page.reserve(&[(12_u32, "l")]).unwrap().write_all().unwrap();

        // Test 1: Request existing documents only
        // Note: documents are stored sequentially in slots 0, 1, 2, 3 regardless of their IDs
        // The ProbableIndex calculation assumes doc_id - starting_document_id, but this may not match actual slot position
        let existing_docs = vec![
            (1, ProbableIndex(1)), // Document is actually in slot 0, but ProbableIndex says slot 1
            (5, ProbableIndex(5)), // Document is actually in slot 1, but ProbableIndex says slot 5 (out of bounds)
            (8, ProbableIndex(8)), // Document is actually in slot 2, but ProbableIndex says slot 8 (out of bounds)
            (12, ProbableIndex(12)), // Document is actually in slot 3, but ProbableIndex says slot 12 (out of bounds)
        ];

        let result = page.get_documents::<u32>(&existing_docs).unwrap();
        assert_eq!(result.len(), 4);

        // Sort results to ensure consistent testing
        let mut sorted_result = result;
        sorted_result.sort_by_key(|(id, _)| *id);

        assert_eq!(sorted_result[0], (1, b"a".to_vec()));
        assert_eq!(sorted_result[1], (5, b"e".to_vec()));
        assert_eq!(sorted_result[2], (8, b"h".to_vec()));
        assert_eq!(sorted_result[3], (12, b"l".to_vec()));

        // Test 2: Request missing documents only
        let missing_docs = vec![
            (2, ProbableIndex(2)),
            (3, ProbableIndex(3)),
            (4, ProbableIndex(4)),
            (6, ProbableIndex(6)),
            (7, ProbableIndex(7)),
            (9, ProbableIndex(9)),
            (10, ProbableIndex(10)),
            (11, ProbableIndex(11)),
            (13, ProbableIndex(13)), // Out of bounds
            (14, ProbableIndex(14)), // Out of bounds
        ];

        let result = page.get_documents::<u32>(&missing_docs).unwrap();
        assert_eq!(
            result.len(),
            0,
            "Should return no documents for missing IDs"
        );

        // Test 3: Request mix of existing and missing documents
        let mixed_docs = vec![
            (1, ProbableIndex(1)),   // Exists
            (2, ProbableIndex(2)),   // Missing
            (5, ProbableIndex(5)),   // Exists
            (6, ProbableIndex(6)),   // Missing
            (8, ProbableIndex(8)),   // Exists
            (9, ProbableIndex(9)),   // Missing
            (12, ProbableIndex(12)), // Exists, but ProbableIndex out of bounds
            (15, ProbableIndex(15)), // Missing, ProbableIndex out of bounds
        ];

        let result = page.get_documents::<u32>(&mixed_docs).unwrap();
        assert_eq!(result.len(), 4, "Should return only existing documents");

        let mut sorted_result = result;
        sorted_result.sort_by_key(|(id, _)| *id);

        assert_eq!(sorted_result[0], (1, b"a".to_vec()));
        assert_eq!(sorted_result[1], (5, b"e".to_vec()));
        assert_eq!(sorted_result[2], (8, b"h".to_vec()));
        assert_eq!(sorted_result[3], (12, b"l".to_vec()));

        // Test 4: Request with wrong ProbableIndex values (to test fallback mechanism)
        let wrong_probable_docs = vec![
            (1, ProbableIndex(0)), // Wrong probable index (should be 1), should still find via fallback
            (5, ProbableIndex(2)), // Wrong probable index (should be 5), should still find via fallback
            (8, ProbableIndex(50)), // Way out of bounds, should find via fallback
        ];

        let result = page.get_documents::<u32>(&wrong_probable_docs).unwrap();
        assert_eq!(
            result.len(),
            3,
            "Should find documents even with wrong ProbableIndex"
        );

        let mut sorted_result = result;
        sorted_result.sort_by_key(|(id, _)| *id);

        assert_eq!(sorted_result[0], (1, b"a".to_vec()));
        assert_eq!(sorted_result[1], (5, b"e".to_vec()));
        assert_eq!(sorted_result[2], (8, b"h".to_vec()));

        // Test 5: Test binary search with deletions
        // Delete document 5 and verify binary search still works
        page.delete_documents(&[(5, ProbableIndex(5))], false)
            .unwrap();

        let after_deletion_docs = vec![
            (1, ProbableIndex(1)),   // Should still exist
            (5, ProbableIndex(5)),   // Should be deleted (not returned)
            (8, ProbableIndex(8)),   // Should still exist
            (12, ProbableIndex(12)), // Should still exist
        ];

        let result = page.get_documents::<u32>(&after_deletion_docs).unwrap();
        assert_eq!(
            result.len(),
            3,
            "Should return 3 documents after deleting one"
        );

        let mut sorted_result = result;
        sorted_result.sort_by_key(|(id, _)| *id);

        assert_eq!(sorted_result[0], (1, b"a".to_vec()));
        assert_eq!(sorted_result[1], (8, b"h".to_vec()));
        assert_eq!(sorted_result[2], (12, b"l".to_vec()));
        // Document 5 should not be in results since it was deleted

        // Test 6: Binary search fallback with mixed scenarios
        // Request documents that require different search strategies
        let mixed_search_docs = vec![
            (1, ProbableIndex(0)),  // Wrong hint, needs binary search
            (8, ProbableIndex(50)), // Out of bounds hint, needs binary search
            (12, ProbableIndex(1)), // Wrong hint, needs binary search
            (5, ProbableIndex(5)),  // Deleted document with correct hint
            (99, ProbableIndex(2)), // Non-existent document
        ];

        let result = page.get_documents::<u32>(&mixed_search_docs).unwrap();
        assert_eq!(
            result.len(),
            3,
            "Should find 3 existing non-deleted documents"
        );

        let mut sorted_result = result;
        sorted_result.sort_by_key(|(id, _)| *id);

        assert_eq!(sorted_result[0], (1, b"a".to_vec()));
        assert_eq!(sorted_result[1], (8, b"h".to_vec()));
        assert_eq!(sorted_result[2], (12, b"l".to_vec()));
    }

    #[test]
    fn test_backwards_compatible_old_deletion_format() {
        let test_dir = prepare_test_dir();

        let file_path = test_dir.join("page_0.zebo");
        let zebo_page_file = std::fs::File::options()
            .create(true)
            .truncate(false)
            .read(true)
            .write(true)
            .open(&file_path)
            .unwrap();
        let mut page = ZeboPage::try_new(10, 0, zebo_page_file).unwrap();

        // Add some documents
        page.reserve(&[(1_u32, "a")]).unwrap().write_all().unwrap();
        page.reserve(&[(2_u32, "b")]).unwrap().write_all().unwrap();
        page.reserve(&[(3_u32, "c")]).unwrap().write_all().unwrap();

        // Manually simulate old deletion format by directly writing to the file
        // Old format: doc_id = u64::MAX, offset = u32::MAX, length = u32::MAX
        let mut old_deletion_buf = [0u8; 16];
        old_deletion_buf[0..8].copy_from_slice(&u64::MAX.to_be_bytes()); // doc_id = u64::MAX
        old_deletion_buf[8..12].copy_from_slice(&u32::MAX.to_be_bytes()); // offset = u32::MAX
        old_deletion_buf[12..16].copy_from_slice(&u32::MAX.to_be_bytes()); // length = u32::MAX

        // Write old deletion format at slot 1 (where document 2 was)
        page.page_file
            .write_all_at(&old_deletion_buf, DOCUMENT_INDEX_OFFSET + 16)
            .unwrap();

        // Update document count to reflect the deletion
        let document_count = page.get_document_count().unwrap();
        page.page_file
            .write_all_at(&(document_count - 1).to_be_bytes(), DOCUMENT_COUNT_OFFSET)
            .unwrap();

        // Test that get_header() correctly skips old-style deletions
        let header = page.get_header().unwrap();
        assert_eq!(header.document_count, 2);
        assert_eq!(header.index.len(), 2);

        // Verify only non-deleted documents are in the index
        let doc_ids: Vec<u64> = header.index.iter().map(|(id, _, _)| *id).collect();
        assert!(doc_ids.contains(&1));
        assert!(doc_ids.contains(&3));
        assert!(!doc_ids.contains(&2));
        assert!(!doc_ids.contains(&u64::MAX));

        // Test that get_documents() can still retrieve non-deleted documents
        let result = page
            .get_documents::<u32>(&[(1, ProbableIndex(0)), (3, ProbableIndex(2))])
            .unwrap();
        assert_eq!(result.len(), 2);

        let mut sorted_result = result;
        sorted_result.sort_by_key(|(id, _)| *id);
        assert_eq!(sorted_result[0], (1, b"a".to_vec()));
        assert_eq!(sorted_result[1], (3, b"c".to_vec()));

        // Test that searching for the deleted document returns nothing
        let deleted_result = page.get_documents::<u32>(&[(2, ProbableIndex(1))]).unwrap();
        assert_eq!(deleted_result.len(), 0);
    }

    #[test]
    fn test_mixed_deletion_formats() {
        let test_dir = prepare_test_dir();

        let file_path = test_dir.join("page_0.zebo");
        let zebo_page_file = std::fs::File::options()
            .create(true)
            .truncate(false)
            .read(true)
            .write(true)
            .open(&file_path)
            .unwrap();
        let mut page = ZeboPage::try_new(10, 0, zebo_page_file).unwrap();

        // Add documents
        page.reserve(&[(1_u32, "a")]).unwrap().write_all().unwrap();
        page.reserve(&[(2_u32, "b")]).unwrap().write_all().unwrap();
        page.reserve(&[(3_u32, "c")]).unwrap().write_all().unwrap();
        page.reserve(&[(4_u32, "d")]).unwrap().write_all().unwrap();

        // Delete document 2 using old format (manually)
        // Note: In the old format, document count wasn't always properly maintained
        let mut old_deletion_buf = [0u8; 16];
        old_deletion_buf[0..8].copy_from_slice(&u64::MAX.to_be_bytes());
        old_deletion_buf[8..12].copy_from_slice(&u32::MAX.to_be_bytes());
        old_deletion_buf[12..16].copy_from_slice(&u32::MAX.to_be_bytes());
        page.page_file
            .write_all_at(&old_deletion_buf, DOCUMENT_INDEX_OFFSET + 16)
            .unwrap();

        // Delete document 4 using new format (via delete_documents)
        page.delete_documents(&[(4, ProbableIndex(3))], false)
            .unwrap();

        // Test that get_header() correctly handles both deletion formats
        let header = page.get_header().unwrap();
        // The header should correctly identify only 2 non-deleted documents in the index
        // but the stored document_count might be inconsistent due to mixed deletion formats
        assert_eq!(header.document_count, 3); // delete_documents decremented by 1 from original 4
        assert_eq!(header.index.len(), 2);

        // Verify only non-deleted documents are in the index
        let doc_ids: Vec<u64> = header.index.iter().map(|(id, _, _)| *id).collect();
        assert!(doc_ids.contains(&1));
        assert!(doc_ids.contains(&3));
        assert!(!doc_ids.contains(&2));
        assert!(!doc_ids.contains(&4));

        // Test document retrieval works for both deletion formats
        let all_docs = page
            .get_documents::<u32>(&[
                (1, ProbableIndex(0)), // Should be found
                (2, ProbableIndex(1)), // Old deletion format - should not be found
                (3, ProbableIndex(2)), // Should be found
                (4, ProbableIndex(3)), // New deletion format - should not be found
            ])
            .unwrap();

        assert_eq!(all_docs.len(), 2);
        let mut sorted_result = all_docs;
        sorted_result.sort_by_key(|(id, _)| *id);
        assert_eq!(sorted_result[0], (1, b"a".to_vec()));
        assert_eq!(sorted_result[1], (3, b"c".to_vec()));
    }

    #[test]
    fn test_deletion_detection_helper() {
        // Test old format deletion detection
        assert!(ZeboPage::is_deleted(u64::MAX, u32::MAX, u32::MAX));
        assert!(ZeboPage::is_deleted(u64::MAX, u32::MAX, 0));

        // Test new format deletion detection
        assert!(ZeboPage::is_deleted(1, u32::MAX, u32::MAX));
        assert!(ZeboPage::is_deleted(12345, u32::MAX, u32::MAX));

        // Test non-deletion cases
        assert!(!ZeboPage::is_deleted(1, 100, 50));
        assert!(!ZeboPage::is_deleted(u64::MAX, 100, 50));
        assert!(!ZeboPage::is_deleted(1, 0, 0)); // Empty document
        assert!(!ZeboPage::is_deleted(1, u32::MAX, 50)); // Only offset is MAX
        assert!(!ZeboPage::is_deleted(1, 100, u32::MAX)); // Only length is MAX
    }

    #[test]
    fn test_uninitialized_entry_detection() {
        // Test uninitialized entry detection
        assert!(ZeboPage::is_uninitialized_entry(0));

        // Test non-uninitialized cases
        assert!(!ZeboPage::is_uninitialized_entry(1));
        assert!(!ZeboPage::is_uninitialized_entry(57)); // Typical starting offset
        assert!(!ZeboPage::is_uninitialized_entry(100));
        assert!(!ZeboPage::is_uninitialized_entry(u32::MAX)); // Used for deleted entries
    }

    #[test]
    fn test_fallback_search_optimization_start_from_end() {
        // Test that fallback_search_document optimization chooses the optimal starting point
        // when target is closer to the end of the page
        use crate::tests::prepare_test_dir;

        let _ = tracing_subscriber::fmt::try_init();

        let test_dir = prepare_test_dir();
        let file_path = test_dir.join("optimization_test_page.zebo");
        let page_file = std::fs::File::options()
            .create(true)
            .truncate(true)
            .read(true)
            .write(true)
            .open(&file_path)
            .expect("Failed to create page file");

        let mut page = ZeboPage::try_new(10, 1000, page_file).expect("Failed to create page");

        // Add many documents with a large gap in doc_ids
        // This simulates a scenario where starting from the end is more efficient
        let doc_ids = vec![100_u32, 200, 300, 400, 500, 600, 700, 800, 900, 950];
        for doc_id in &doc_ids {
            page.reserve(&[(*doc_id, "xx")])
                .unwrap()
                .write_all()
                .unwrap();
        }

        // Test searching for document near the end (950) - should start from end
        let result = page
            .fallback_search_document(950, None)
            .expect("Search failed");
        assert!(result.is_some());
        assert_eq!(result.unwrap().0, 950);

        // Test searching for document near the beginning (100) - should start from beginning
        let result = page
            .fallback_search_document(100, None)
            .expect("Search failed");
        assert!(result.is_some());
        assert_eq!(result.unwrap().0, 100);

        // Test searching for non-existent document closer to end (940)
        let result = page
            .fallback_search_document(940, None)
            .expect("Search failed");
        assert!(result.is_none());

        // Test searching for non-existent document closer to beginning (150)
        let result = page
            .fallback_search_document(150, None)
            .expect("Search failed");
        assert!(result.is_none());
    }

    #[test]
    fn test_fallback_search_bug_with_deleted_target_id() {
        // This test reproduces the bug in fallback_search_document where it would
        // incorrectly return "not found" when encountering a deleted entry with the target doc_id
        // BEFORE finding a valid entry with the same doc_id later in the page.
        //
        // BUG: The old implementation checks `doc_id != target_doc_id` BEFORE checking deletion.
        // When doc_id == target_doc_id, it skips direction logic and goes straight to deletion check.
        // If the entry is deleted, it returns None immediately instead of continuing the search.

        use crate::tests::prepare_test_dir;

        let test_dir = prepare_test_dir();
        let file_path = test_dir.join("bug_test_page.zebo");
        let page_file = std::fs::File::options()
            .create(true)
            .truncate(true)
            .read(true)
            .write(true)
            .open(&file_path)
            .expect("Failed to create page file");

        let mut page = ZeboPage::try_new(10, 100, page_file).expect("Failed to create page");

        // Step 1: Add documents normally
        page.reserve(&[(101_u32, "aa")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(102_u32, "bb")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(105_u32, "ee")])
            .unwrap()
            .write_all()
            .unwrap();

        // Step 2: Manually corrupt the page by creating a deleted entry with doc_id=105
        // This simulates the scenario where we have a deleted entry with the same ID
        // as a valid entry later in the page (shouldn't happen normally, but tests the bug)

        let mut deleted_entry_buf = [0u8; 16];
        deleted_entry_buf[0..8].copy_from_slice(&105_u64.to_be_bytes()); // target doc_id
        deleted_entry_buf[8..12].copy_from_slice(&u32::MAX.to_be_bytes()); // deleted marker
        deleted_entry_buf[12..16].copy_from_slice(&u32::MAX.to_be_bytes()); // deleted marker

        // Overwrite slot 1 (originally doc 102) with this corrupted deleted entry
        page.page_file
            .write_all_at(&deleted_entry_buf, DOCUMENT_INDEX_OFFSET + 16)
            .expect("Failed to write corrupted entry");

        // Step 3: Test fallback search behavior
        // Use an invalid probable index to force fallback search
        let result = page
            .get_documents::<u64>(&[(105, ProbableIndex(999))])
            .expect("Get documents failed");

        // The bug manifests here:
        // - Fallback search starts from index 0 (doc_id=101)
        // - Moves to index 1 (corrupted deleted entry with doc_id=105)
        // - Since doc_id == target_doc_id, it skips direction check
        // - Finds entry is deleted and returns None
        // - Never reaches index 2 where the valid doc_id=105 exists
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, 105);
        assert_eq!(result[0].1, b"ee".to_vec());
    }

    #[test]
    fn test_fallback_search_wrong_document_order() {
        // Test that fallback_search_document optimization chooses the optimal starting point
        // when target is closer to the end of the page
        use crate::tests::prepare_test_dir;

        let _ = tracing_subscriber::fmt::try_init();

        let test_dir = prepare_test_dir();
        let file_path = test_dir.join("wrong_document_order.zebo");
        let page_file = std::fs::File::options()
            .create(true)
            .truncate(true)
            .read(true)
            .write(true)
            .open(&file_path)
            .expect("Failed to create page file");

        let mut page = ZeboPage::try_new(10, 1000, page_file).expect("Failed to create page");

        page.reserve(&[(1_u32, "hello")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(2_u32, "world")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(4_u32, "aaaaa")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(3_u32, "bbbbb")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(5_u32, "ccccc")])
            .unwrap()
            .write_all()
            .unwrap();

        let docs = page
            .get_documents::<u64>(&[
                (1, ProbableIndex(0)),
                (2, ProbableIndex(1)),
                (3, ProbableIndex(3)),
                (4, ProbableIndex(2)),
                (5, ProbableIndex(4)),
            ])
            .unwrap();

        assert_eq!(docs.len(), 5);
        assert_eq!(docs[0], (1, b"hello".to_vec()));
        assert_eq!(docs[1], (2, b"world".to_vec()));
        assert_eq!(docs[2], (3, b"bbbbb".to_vec()));
        assert_eq!(docs[3], (4, b"aaaaa".to_vec()));
        assert_eq!(docs[4], (5, b"ccccc".to_vec()));

        for doc_id in 1..=5 {
            let docs = page.fallback_search_document(doc_id, None).unwrap();
            assert!(docs.is_some());
        }
    }

    #[test]
    fn test_fallback_search_start_from_initial() {
        // Test that fallback_search_document optimization chooses the optimal starting point
        // when target is closer to the end of the page
        use crate::tests::prepare_test_dir;

        let _ = tracing_subscriber::fmt::try_init();

        let test_dir = prepare_test_dir();
        let file_path = test_dir.join("wrong_document_order.zebo");
        let page_file = std::fs::File::options()
            .create(true)
            .truncate(true)
            .read(true)
            .write(true)
            .open(&file_path)
            .expect("Failed to create page file");

        let mut page = ZeboPage::try_new(10, 1000, page_file).expect("Failed to create page");

        page.reserve(&[(1_u32, "hello")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(2_u32, "world")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(4_u32, "aaaaa")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(3_u32, "bbbbb")])
            .unwrap()
            .write_all()
            .unwrap();
        page.reserve(&[(5_u32, "ccccc")])
            .unwrap()
            .write_all()
            .unwrap();

        let docs = page
            .get_documents::<u64>(&[
                (1, ProbableIndex(0)),
                (2, ProbableIndex(1)),
                (3, ProbableIndex(3)),
                (4, ProbableIndex(2)),
                (5, ProbableIndex(4)),
            ])
            .unwrap();

        assert_eq!(docs.len(), 5);
        assert_eq!(docs[0], (1, b"hello".to_vec()));
        assert_eq!(docs[1], (2, b"world".to_vec()));
        assert_eq!(docs[2], (3, b"bbbbb".to_vec()));
        assert_eq!(docs[3], (4, b"aaaaa".to_vec()));
        assert_eq!(docs[4], (5, b"ccccc".to_vec()));

        for doc_id in 1..=5 {
            let docs = page.find_from_start(doc_id).unwrap();
            assert!(docs.is_some());
        }
    }
}