qfilter 0.3.0-alpha.4

//! Approximate Membership Query Filter ([AMQ-Filter](https://en.wikipedia.org/wiki/Approximate_Membership_Query_Filter))
//! based on the [Rank Select Quotient Filter (RSQF)](https://dl.acm.org/doi/pdf/10.1145/3035918.3035963).
//!
//! This is a small and flexible general-purpose AMQ-Filter, it not only supports approximate membership testing like a bloom filter
//! but also deletions, merging, resizing and [serde](https://crates.io/crates/serde) serialization.
//!
//! ### Example
//!
//! ```rust
//! let mut f = qfilter::Filter::new(1000000, 0.01).unwrap();
//! for i in 0..1000 {
//!     f.insert(i).unwrap();
//! }
//! for i in 0..1000 {
//!     assert!(f.contains(i));
//! }
//! ```
//!
//! ### Hasher
//!
//! Methods accepting `T: Hash` are provided for convenience using
//! [foldhash-portable](https://crates.io/crates/foldhash-portable), which offers high performance
//! and stability across platforms. Note that a fixed seed is used (no DoS resistance)
//! and `#[derive(Hash)]` output is [not guaranteed stable](https://github.com/hoxxep/portable-hash#whats-wrong-with-the-stdhash-traits)
//! across Rust compiler versions.
//!
//! ### Custom hasher
//!
//! [`Filter`] supports a custom [`BuildHasher`] via its `S` type
//! parameter (similar to [`HashMap`](std::collections::HashMap)). Use
//! [`Filter::new_with_hasher()`] and related constructors. Caveats:
//!
//! - The hasher is **not serialized**. On deserialization it is reconstructed via `S::default()`.
//!   If that doesn't produce the correct hasher (e.g. random-seeded hashers), use
//!   [`Filter::with_hasher()`] to restore it.
//! - Filters being **merged** must use the same hasher and seed.
//! - The **same hasher instance** (or equivalent seed) must be used for all operations.
//!
//! ### Filter size
//!
//! For a given capacity and error probability the RSQF may require significantly less space than the equivalent bloom filter or other AMQ-Filters.
//!
//! | Bits per item | Error probability when full | Bits per item (cont.) | Error (cont.) |
//! |:---:|:---:|:---:|---|
//! | 3.125 | 0.362 | 19.125 | 6.87e-06 |
//! | 4.125 | 0.201 | 20.125 | 3.43e-06 |
//! | 5.125 | 0.106 | 21.125 | 1.72e-06 |
//! | 6.125 | 0.0547 | 22.125 | 8.58e-07 |
//! | 7.125 | 0.0277 | 23.125 | 4.29e-07 |
//! | 8.125 | 0.014 | 24.125 | 2.15e-07 |
//! | 9.125 | 0.00701 | 25.125 | 1.07e-07 |
//! | 10.125 | 0.00351 | 26.125 | 5.36e-08 |
//! | 11.125 | 0.00176 | 27.125 | 2.68e-08 |
//! | 12.125 | 0.000879 | 28.125 | 1.34e-08 |
//! | 13.125 | 0.000439 | 29.125 | 6.71e-09 |
//! | 14.125 | 0.00022 | 30.125 | 3.35e-09 |
//! | 15.125 | 0.00011 | 31.125 | 1.68e-09 |
//! | 16.125 | 5.49e-05 | 32.125 | 8.38e-10 |
//! | 17.125 | 2.75e-05 | .. | .. |
//! | 18.125 | 1.37e-05 | .. | .. |
//!
//! ### Performance
//!
//! - **Lookup/Insert/Delete**: O(1) expected case, O(n) worst case with pathological hash collisions
//! - **Memory overhead**: 2.125 bits per slot for metadata (occupieds, runends, offset)
//! - **Cache efficiency**: Block-based layout with 64-slot blocks improves cache locality
//!
//! Performance degrades gracefully as occupancy increases. The filter automatically
//! limits occupancy to 95% to maintain good performance.
//!
//! ### Legacy x86_64 CPUs support
//!
//! The implementation assumes `popcnt` and BMI2 (`pdep`, `tzcnt`) instructions are available
//! when compiling for x86_64 targets. These instructions are available on CPUS released after 2015.
//! If they are not available, the Filter constructor will panic.
//!
//! The `legacy_x86_64_support` feature enables support for older x86_64 CPUs by using
//! portable fallbacks.
#![cfg_attr(docsrs, feature(doc_cfg))]

use std::{
    cmp::Ordering,
    hash::{BuildHasher, Hash},
    num::{NonZeroU64, NonZeroU8},
    ops::{RangeBounds, RangeFrom},
};

#[cfg(feature = "jsonschema")]
use schemars::JsonSchema;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
pub use stable_hasher::StableBuildHasher;

mod portable_select;
mod stable_hasher;

/// Truncates a hash to a fingerprint of the given bit size.
///
/// Only the lower `fingerprint_bits` bits of `hash` are returned.
/// This is useful when pre-computing fingerprints for [`Builder`] sorted insertion,
/// which requires truncated fingerprints for correct sort order.
///
/// In most other cases you don't need this — the fingerprint-based APIs
/// ([`Filter::contains_fingerprint()`], [`Filter::insert_fingerprint()`], etc.) accept
/// full untruncated hashes and truncate internally.
///
/// - `fingerprint_bits == 0` returns `0`
/// - `fingerprint_bits >= 64` returns the full hash unchanged
#[inline]
pub fn truncate_to_fingerprint(hash: u64, fingerprint_bits: u8) -> u64 {
    if fingerprint_bits >= 64 {
        hash
    } else {
        hash & ((1u64 << fingerprint_bits) - 1)
    }
}

/// Hashes an item and truncates it to a fingerprint of the given bit size.
///
/// Equivalent to `truncate_to_fingerprint(build_hasher.hash_one(&item), fingerprint_bits)`.
///
/// See [`truncate_to_fingerprint()`] for details on when truncation is needed.
#[inline]
pub fn compute_fingerprint_with_hasher<T: Hash, S: BuildHasher>(
    build_hasher: &S,
    item: T,
    fingerprint_bits: u8,
) -> u64 {
    truncate_to_fingerprint(build_hasher.hash_one(&item), fingerprint_bits)
}

// Private convenience wrapper using the default hasher (test only).
#[cfg(test)]
#[inline]
fn compute_fingerprint<T: Hash>(item: T, fingerprint_bits: u8) -> u64 {
    compute_fingerprint_with_hasher(&StableBuildHasher, item, fingerprint_bits)
}

/// Specifications for a filter with given parameters.
///
/// Returned by [`filter_specs()`].
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct FilterSpecs {
    /// Actual maximum capacity (may be higher than requested due to rounding).
    pub max_capacity: u64,
    /// Maximum false positive rate when at capacity (may be lower than requested due to rounding).
    pub max_error_ratio: f64,
    /// Storage cost per item in bits when at max capacity (remainder bits + 2.125 metadata overhead).
    pub bits_per_item: f64,
    /// Memory usage in bytes at minimum capacity (smallest resizable filter).
    pub memory_bytes_min: usize,
    /// Memory usage in bytes at maximum capacity.
    pub memory_bytes_max: usize,
    /// Internal fingerprint size in bits.
    ///
    /// This is an implementation detail exposed for advanced use cases like
    /// pre-computing fingerprints with [`compute_fingerprint_with_hasher()`]. It represents
    /// the full hash width (qbits + rbits), not the storage cost per item.
    pub fingerprint_bits: u8,
}

/// Computes filter specifications for a given capacity and false positive rate.
///
/// Returns the fingerprint size and memory usage bounds, useful for planning
/// memory allocation or pre-computing fingerprints before creating a filter.
///
/// # Parameters
///
/// - `capacity`: The number of items the filter should hold.
/// - `fp_rate`: Upper bound on false positive probability (clamped to (0, 0.5]).
///   The actual rate may be lower due to internal rounding.
///
/// # Errors
///
/// - [`Error::CapacityTooLarge`] if capacity exceeds [`Filter::MAX_CAPACITY`].
/// - [`Error::NotEnoughFingerprintBits`] if the configuration requires more than 64 bits.
///
/// # Example
///
/// ```rust
/// use qfilter::{filter_specs, compute_fingerprint_with_hasher, StableBuildHasher, Filter};
///
/// let capacity = 10000;
/// let fp_rate = 0.01;
///
/// // Get filter specifications
/// let specs = filter_specs(capacity, fp_rate).unwrap();
///
/// // Pre-compute fingerprints using the fingerprint size
/// let fingerprints: Vec<u64> = (0..100)
///     .map(|i| compute_fingerprint_with_hasher(&StableBuildHasher, i, specs.fingerprint_bits))
///     .collect();
///
/// // Memory will be between min and max depending on how the filter grows
/// let filter = Filter::new(capacity, fp_rate).unwrap();
/// assert_eq!(filter.fingerprint_size(), specs.fingerprint_bits);
/// assert_eq!(filter.memory_usage(), specs.memory_bytes_max);
/// ```
pub fn filter_specs(capacity: u64, fp_rate: f64) -> Result<FilterSpecs, Error> {
    let slots = calculate_needed_slots(capacity)?;
    let qbits = slots.trailing_zeros() as u8;
    let fp_rate = fp_rate.clamp(f64::MIN_POSITIVE, 0.5);
    let rbits = (-fp_rate.log2()).ceil().max(1.0) as u8;
    let fingerprint_bits = qbits + rbits;
    if fingerprint_bits > 64 {
        return Err(Error::NotEnoughFingerprintBits);
    }

    let memory_bytes_max = usize::try_from(calculate_memory_bytes(qbits, rbits))
        .map_err(|_| Error::CapacityTooLarge)?;
    // Smallest resizable filter has 64 slots (qbits=6)
    let memory_bytes_min = usize::try_from(calculate_memory_bytes(6, fingerprint_bits - 6))
        .map_err(|_| Error::CapacityTooLarge)?;
    // Max capacity: 95% of slots
    let capacity = (slots * 19).div_ceil(20);
    // Max capacity error ratio based on rbits
    let max_error_ratio = 2f64.powi(-(rbits as i32));
    // Storage cost per item: remainder bits + metadata overhead
    let bits_per_item = rbits as f64 + 2.125;

    Ok(FilterSpecs {
        fingerprint_bits,
        memory_bytes_min,
        memory_bytes_max,
        max_capacity: capacity,
        max_error_ratio,
        bits_per_item,
    })
}

/// Calculates memory usage in bytes for a filter with given qbits and rbits.
///
/// Memory layout per block (64 slots):
/// - 1 byte: offset
/// - 8 bytes: occupieds bitmap
/// - 8 bytes: runends bitmap
/// - 8 * rbits bytes: remainders (64 slots * rbits bits / 8)
///
/// Total: num_blocks * (17 + 8 * rbits) + 8 bytes padding.
/// The +8 padding allows branchless remainder reads (always reading 2 u64s).
///
/// Returns `u64` to avoid truncation on 32-bit targets. All callers that need `usize`
/// use checked conversion (`usize::try_from`) and return an error on overflow.
fn calculate_memory_bytes(qbits: u8, rbits: u8) -> u64 {
    let num_blocks = (1u64 << qbits) / 64;
    let block_bytes = 17 + 8 * rbits as u64; // see block layout in doc comment above
    num_blocks * block_bytes + 8
}

/// Calculates the number of slots needed to fit the desired capacity with 95% max occupancy.
/// Returns the number of slots rounded to the next power of two, but always >= 64.
fn calculate_needed_slots(desired: u64) -> Result<u64, Error> {
    let mut slots = desired
        .checked_next_power_of_two()
        .ok_or(Error::CapacityTooLarge)?
        .max(64);
    loop {
        let capacity = slots
            .checked_mul(19)
            .ok_or(Error::CapacityTooLarge)?
            .div_ceil(20);
        if capacity >= desired {
            return Ok(slots);
        }
        slots = slots.checked_mul(2).ok_or(Error::CapacityTooLarge)?;
    }
}

/// Approximate Membership Query Filter (AMQ-Filter) based on the Rank Select Quotient Filter (RSQF).
///
/// This data structure is similar to a hash table that stores fingerprints in a very compact way.
/// Fingerprints are similar to a hash values, but are possibly truncated.
/// The reason for false positives is that multiple items can map to the same fingerprint.
/// For more information see the [quotient filter Wikipedia page](https://en.wikipedia.org/wiki/Quotient_filter)
/// that describes a similar but less optimized version of the data structure.
/// The actual implementation is based on the [Rank Select Quotient Filter (RSQF)](https://dl.acm.org/doi/pdf/10.1145/3035918.3035963).
///
/// The public API also exposes a fingerprint API, which can be used to succinctly store u64
/// hash values.
///
/// The type parameter `B` controls the buffer storage. Use `Filter` (defaults to `Box<[u8]>`)
/// for an owned, mutable filter or [`FilterRef`] (`Filter<&[u8]>`) for a borrowed, read-only view
/// that supports zero-copy deserialization.
#[derive(Clone)]
#[cfg_attr(
    feature = "serde",
    derive(Serialize),
    serde(bound(serialize = "B: serde_bytes::Serialize"))
)]
#[cfg_attr(feature = "jsonschema", derive(JsonSchema))]
pub struct Filter<B = Box<[u8]>, S = StableBuildHasher> {
    #[cfg_attr(
        feature = "serde",
        serde(rename = "b", serialize_with = "serde_bytes::serialize",)
    )]
    #[cfg_attr(feature = "jsonschema", schemars(with = "Vec<u8>"))]
    buffer: B,
    #[cfg_attr(feature = "serde", serde(skip))]
    #[cfg_attr(feature = "jsonschema", schemars(skip))]
    build_hasher: S,
    #[cfg_attr(feature = "serde", serde(rename = "l"))]
    len: u64,
    #[cfg_attr(feature = "serde", serde(rename = "q"))]
    qbits: NonZeroU8,
    #[cfg_attr(feature = "serde", serde(rename = "r"))]
    rbits: NonZeroU8,
    #[cfg_attr(feature = "serde", serde(rename = "m"))]
    max_qbits: Option<NonZeroU8>,
}

/// Raw deserialization target for [`Filter`] that is validated before construction.
#[cfg(feature = "serde")]
#[derive(Deserialize)]
#[serde(bound(deserialize = "B: serde_bytes::Deserialize<'de>"))]
struct FilterUnchecked<B> {
    #[serde(rename = "b", deserialize_with = "serde_bytes::deserialize")]
    buffer: B,
    #[serde(rename = "l")]
    len: u64,
    #[serde(rename = "q")]
    qbits: NonZeroU8,
    #[serde(rename = "r")]
    rbits: NonZeroU8,
    #[serde(rename = "m")]
    max_qbits: Option<NonZeroU8>,
}

#[cfg(feature = "serde")]
impl<B: AsRef<[u8]>> FilterUnchecked<B> {
    fn try_into_filter<S: Default>(self) -> Result<Filter<B, S>, &'static str> {
        let qbits = self.qbits.get();
        let rbits = self.rbits.get();
        // qbits must be at least 6 (64 slots per block), within MAX_QBITS,
        // and fingerprint must fit in 64 bits
        if !(6..=MAX_QBITS).contains(&qbits) || qbits as u16 + rbits as u16 > 64 {
            return Err("invalid qbits/rbits");
        }
        // buffer length must match the expected size for this qbits/rbits
        // Compare as u64 to avoid truncation on 32-bit targets
        let expected_bytes = calculate_memory_bytes(qbits, rbits);
        if self.buffer.as_ref().len() as u64 != expected_bytes {
            return Err("buffer length mismatch");
        }
        // len must not exceed total number of buckets
        let total_buckets = 1u64 << qbits;
        if self.len > total_buckets {
            return Err("len exceeds total buckets");
        }
        // max_qbits must be >= qbits, <= MAX_QBITS, and within fingerprint bounds
        if let Some(max_qbits) = self.max_qbits {
            let mq = max_qbits.get();
            if mq < qbits || mq > MAX_QBITS || mq as u16 > qbits as u16 + rbits as u16 - 1 {
                return Err("invalid max_qbits");
            }
        }
        Ok(Filter {
            buffer: self.buffer,
            build_hasher: S::default(),
            len: self.len,
            qbits: self.qbits,
            rbits: self.rbits,
            max_qbits: self.max_qbits,
        })
    }
}

/// The `AsRef<[u8]>` bound is required to validate the buffer length during deserialization.
///
/// The hasher is not serialized — it is reconstructed via `S::default()` on deserialization.
/// Use [`Filter::with_hasher()`] to set a specific hasher after deserialization.
#[cfg(feature = "serde")]
impl<'de, B: AsRef<[u8]> + serde_bytes::Deserialize<'de>, S: Default> Deserialize<'de>
    for Filter<B, S>
{
    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
        FilterUnchecked::<B>::deserialize(deserializer)?
            .try_into_filter()
            .map_err(serde::de::Error::custom)
    }
}

/// A read-only, borrowed view of a [`Filter`].
///
/// This is a type alias for `Filter<&[u8], S>` with `S` defaulting to [`StableBuildHasher`].
/// It supports all read-only operations ([`contains`](Filter::contains),
/// [`count`](Filter::count), [`fingerprints`](Filter::fingerprints), etc.) and enables
/// zero-copy deserialization from binary formats like CBOR, bincode, or postcard via serde.
///
/// # Zero-copy deserialization
///
/// ```rust,ignore
/// // Deserialize without copying the buffer
/// let filter_ref: FilterRef = serde_cbor::from_slice(&bytes).unwrap();
/// assert!(filter_ref.contains("hello"));
/// ```
///
/// Use [`FilterRef::to_owned()`](Filter::to_owned) to convert to a [`Filter`] when mutation is needed.
pub type FilterRef<'a, S = StableBuildHasher> = Filter<&'a [u8], S>;

impl Copy for Filter<&[u8], StableBuildHasher> {}

/// Errors returned by [`Filter`] operations.
#[derive(Debug)]
#[non_exhaustive]
pub enum Error {
    /// The filter is full and cannot grow further.
    CapacityExceeded,
    /// Filters have incompatible fingerprint sizes (e.g., during [`Filter::merge()`]).
    IncompatibleFingerprintSize,
    /// The requested configuration requires more than 64 bits per fingerprint.
    NotEnoughFingerprintBits,
    /// The requested capacity exceeds [`Filter::MAX_CAPACITY`] or the platform's addressable range.
    CapacityTooLarge,
}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{self:?}")
    }
}

impl std::error::Error for Error {}

#[derive(Debug)]
struct Block {
    offset: u64,
    occupieds: u64,
    runends: u64,
}

trait BitExt {
    fn is_bit_set(&self, i: usize) -> bool;
    fn set_bit(&mut self, i: usize);
    fn clear_bit(&mut self, i: usize);
    fn shift_right(&self, bits: usize, b: &Self, b_start: usize, b_end: usize) -> Self;
    fn shift_left(&self, bits: usize, b: &Self, b_start: usize, b_end: usize) -> Self;
    /// Number of set bits (1s) in the range
    fn popcnt(&self, range: impl RangeBounds<u64>) -> u64;
    /// Index of nth set bits in the range
    fn select(&self, range: RangeFrom<u64>, n: u64) -> Option<u64>;

    #[inline]
    fn update_bit(&mut self, i: usize, value: bool) {
        if value {
            self.set_bit(i)
        } else {
            self.clear_bit(i)
        }
    }
}

impl BitExt for u64 {
    #[inline]
    fn is_bit_set(&self, i: usize) -> bool {
        (*self & (1 << i)) != 0
    }

    #[inline]
    fn set_bit(&mut self, i: usize) {
        *self |= 1 << i
    }

    #[inline]
    fn clear_bit(&mut self, i: usize) {
        *self &= !(1 << i)
    }

    #[inline]
    fn shift_right(&self, bits: usize, b: &Self, b_start: usize, b_end: usize) -> Self {
        let a_component = *self >> (64 - bits); // select the highest `bits` from A to become lowest
        let width = b_end - b_start;
        let b_shifted_mask = if width >= 64 {
            u64::MAX
        } else {
            (1u64 << width) - 1
        } << b_start;
        let b_shifted = ((b_shifted_mask & b) << bits) & b_shifted_mask;
        let b_mask = !b_shifted_mask;

        a_component | b_shifted | (b & b_mask)
    }

    #[inline]
    fn shift_left(&self, bits: usize, b: &Self, b_start: usize, b_end: usize) -> Self {
        let a_component = *self << (64 - bits); // select the lowest `bits` from A to become highest
        let width = b_end - b_start;
        let b_shifted_mask = if width >= 64 {
            u64::MAX
        } else {
            (1u64 << width) - 1
        } << b_start;
        let b_shifted = ((b_shifted_mask & b) >> bits) & b_shifted_mask;
        let b_mask = !b_shifted_mask;

        a_component | b_shifted | (b & b_mask)
    }

    #[inline]
    fn popcnt(&self, range: impl RangeBounds<u64>) -> u64 {
        let mut v = match range.start_bound() {
            std::ops::Bound::Included(&i) => *self & (u64::MAX << i),
            std::ops::Bound::Excluded(&i) => *self & (u64::MAX << (i + 1)),
            _ => *self,
        };
        v = match range.end_bound() {
            std::ops::Bound::Included(&i) => v & (u64::MAX >> (63 - i)),
            std::ops::Bound::Excluded(&i) => v & ((1u64 << i) - 1),
            _ => v,
        };

        #[cfg(all(
            target_arch = "x86_64",
            not(feature = "legacy_x86_64_support"),
            not(target_feature = "popcnt")
        ))]
        let result = unsafe {
            // Using intrinsics introduce a function call, and the resulting code
            // ends up slower than the inline assembly below.
            // Any calls to is_x86_feature_detected also significantly affect performance.
            // Given this is available on all x64 cpus starting 2008 we assume it's present
            // (unless legacy_x86_64_support is set) and panic elsewhere otherwise.
            let popcnt;
            std::arch::asm!(
                "popcnt {popcnt}, {v}",
                v = in(reg) v,
                popcnt = out(reg) popcnt,
                options(pure, nomem, nostack)
            );
            popcnt
        };
        #[cfg(any(
            not(target_arch = "x86_64"),
            feature = "legacy_x86_64_support",
            target_feature = "popcnt"
        ))]
        let result = v.count_ones() as u64;

        result
    }

    #[inline]
    fn select(&self, range: RangeFrom<u64>, n: u64) -> Option<u64> {
        debug_assert!(range.start < 64);
        let v = *self & (u64::MAX << range.start);

        // x86_64: use BMI2 (assumed at runtime, or guaranteed at compile time)
        #[cfg(all(
            target_arch = "x86_64",
            any(not(feature = "legacy_x86_64_support"), target_feature = "bmi2")
        ))]
        unsafe {
            let result: u64;
            std::arch::asm!(
                "mov     {tmp}, 1",
                "shlx    {tmp}, {tmp}, {n}",
                "pdep    {tmp}, {tmp}, {v}",
                "tzcnt   {tmp}, {tmp}",
                n = in(reg) n,
                v = in(reg) v,
                tmp = out(reg) result,
                options(pure, nomem, nostack)
            );
            if result != 64 {
                Some(result)
            } else {
                None
            }
        }

        // Fallback: non-x86_64 or legacy without BMI2
        #[cfg(not(all(
            target_arch = "x86_64",
            any(not(feature = "legacy_x86_64_support"), target_feature = "bmi2")
        )))]
        {
            crate::portable_select::select(v, n)
        }
    }
}

trait CastNonZeroU8 {
    fn u64(&self) -> u64;
    fn usize(&self) -> usize;
}

impl CastNonZeroU8 for NonZeroU8 {
    #[inline]
    fn u64(&self) -> u64 {
        self.get() as u64
    }

    #[inline]
    fn usize(&self) -> usize {
        self.get() as usize
    }
}

// Read-only methods available on any Filter<B> where the buffer can be read as &[u8].
// This covers both Filter (owned, B=Box<[u8]>) and FilterRef (borrowed, B=&[u8]).
impl<B: AsRef<[u8]>, S> Filter<B, S> {
    /// Replaces the hasher, returning a filter with the new hasher type.
    ///
    /// This is a zero-cost operation — only the hasher is swapped, the stored
    /// fingerprints are unchanged. Useful after deserialization to restore the
    /// hasher that was used when the filter was originally populated.
    ///
    /// **Warning:** On a non-empty filter, `T: Hash` methods will only work
    /// correctly if the new hasher produces the same hashes as the one used
    /// at insertion time. The fingerprint-based API is unaffected.
    ///
    /// ```rust,ignore
    /// // Deserialize, then restore the original hasher used at insertion time.
    /// let f: Filter = serde_cbor::from_slice(&bytes)?;
    /// let f = f.with_hasher(original_hasher);
    /// ```
    pub fn with_hasher<S2>(self, build_hasher: S2) -> Filter<B, S2> {
        Filter {
            buffer: self.buffer,
            build_hasher,
            len: self.len,
            qbits: self.qbits,
            rbits: self.rbits,
            max_qbits: self.max_qbits,
        }
    }

    /// Returns the fingerprint size in bits.
    ///
    /// Use this with [`compute_fingerprint_with_hasher()`] to create compatible fingerprints.
    #[inline]
    pub fn fingerprint_size(&self) -> u8 {
        self.qbits.get() + self.rbits.get()
    }

    /// Returns `true` if the filter contains no items.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.len == 0
    }

    /// Returns the number of items in the filter.
    #[inline]
    pub fn len(&self) -> u64 {
        self.len
    }

    /// Returns the memory usage in bytes.
    #[inline]
    pub fn memory_usage(&self) -> usize {
        self.buffer.as_ref().len()
    }

    /// Returns the maximum capacity after all possible growth.
    #[inline]
    pub fn max_capacity(&self) -> u64 {
        // Overflow is not possible here as it'd have overflowed in the constructor.
        ((1u64 << self.max_qbits.unwrap_or(self.qbits).get()) * 19).div_ceil(20)
    }

    /// Returns the current capacity (before next growth).
    #[inline]
    pub fn capacity(&self) -> u64 {
        if cfg!(fuzzing) {
            // 100% occupancy is not realistic but stresses the algorithm much more.
            // To generate real counter examples this "pessimisation" must be removed.
            self.total_buckets().get()
        } else {
            // Up to 95% occupancy
            // 19/20 == 0.95
            // Overflow is not possible here as it'd have overflowed in the constructor.
            (self.total_buckets().get() * 19).div_ceil(20)
        }
    }

    /// Returns the false positive rate when fully grown (`len == max_capacity()`).
    pub fn max_error_ratio_resizeable(&self) -> f64 {
        let extra_rbits = self.max_qbits.unwrap_or(self.qbits).get() - self.qbits.get();
        2f64.powi(-((self.rbits.get() - extra_rbits) as i32))
    }

    /// Returns the false positive rate at current capacity (`len == capacity()`).
    pub fn max_error_ratio(&self) -> f64 {
        2f64.powi(-(self.rbits.get() as i32))
    }

    /// Returns the estimated false positive rate at current occupancy.
    pub fn current_error_ratio(&self) -> f64 {
        let occupancy = self.len as f64 / self.total_buckets().get() as f64;
        1.0 - std::f64::consts::E.powf(-occupancy / 2f64.powi(self.rbits.get() as i32))
    }

    /// Returns `true` if the item is probably in the filter, `false` if definitely not.
    ///
    /// May return false positives but never false negatives.
    pub fn contains<T: Hash>(&self, item: T) -> bool
    where
        S: BuildHasher,
    {
        self.contains_fingerprint(self.hash_item(item))
    }

    /// Returns `true` if the fingerprint is probably in the filter, `false` if definitely not.
    ///
    /// Only the lower [`Self::fingerprint_size()`] bits of `hash` are used.
    pub fn contains_fingerprint(&self, hash: u64) -> bool {
        let (hash_bucket_idx, hash_remainder) = self.calc_qr(hash);
        if !self.is_occupied(hash_bucket_idx) {
            return false;
        }
        let mut runstart_idx = self.run_start(hash_bucket_idx);
        loop {
            if hash_remainder == self.get_remainder(runstart_idx) {
                return true;
            }
            if self.is_runend(runstart_idx) {
                return false;
            }
            runstart_idx += 1;
        }
    }

    /// Returns how many times the item appears in the filter (approximate).
    ///
    /// Only meaningful if duplicates were inserted via [`Filter::insert_duplicated()`].
    pub fn count<T: Hash>(&self, item: T) -> u64
    where
        S: BuildHasher,
    {
        self.count_fingerprint(self.hash_item(item))
    }

    /// Returns how many times the fingerprint appears in the filter (approximate).
    ///
    /// Only the lower [`Self::fingerprint_size()`] bits of `hash` are used.
    pub fn count_fingerprint(&self, hash: u64) -> u64 {
        let (hash_bucket_idx, hash_remainder) = self.calc_qr(hash);
        if !self.is_occupied(hash_bucket_idx) {
            return 0;
        }

        let mut count = 0u64;
        let mut runstart_idx = self.run_start(hash_bucket_idx);
        loop {
            if hash_remainder == self.get_remainder(runstart_idx) {
                count += 1;
            }
            if self.is_runend(runstart_idx) {
                return count;
            }
            runstart_idx += 1;
        }
    }

    /// Returns an iterator over the fingerprints stored in the filter.
    ///
    /// Fingerprints are yielded in ascending order. Each value has only the lower
    /// [`Self::fingerprint_size()`] bits set (upper bits are zero).
    ///
    /// This is useful for serialization, migrating data between filters, or
    /// inspecting stored values. Use [`compute_fingerprint_with_hasher()`] to compute a
    /// fingerprint compatible with this filter's size.
    pub fn fingerprints(&self) -> FingerprintIter<'_> {
        FingerprintIter::new(self)
    }

    /// Returns a borrowed, read-only view of this filter.
    ///
    /// When using a custom hasher, `S` must implement `Clone` so it can be
    /// shared with the returned view.
    #[inline]
    pub fn as_filter_ref(&self) -> FilterRef<'_, S>
    where
        S: Clone,
    {
        Filter {
            buffer: self.buffer.as_ref(),
            build_hasher: self.build_hasher.clone(),
            len: self.len,
            qbits: self.qbits,
            rbits: self.rbits,
            max_qbits: self.max_qbits,
        }
    }

    #[inline]
    fn block_byte_size(&self) -> usize {
        1 + 8 + 8 + 64 * self.rbits.usize() / 8
    }

    // SAFETY: Caller must ensure offset + 8 <= buffer.len()
    #[inline(always)]
    unsafe fn read_u64_unchecked(&self, offset: usize) -> u64 {
        debug_assert!(offset + 8 <= self.buffer.as_ref().len());
        u64::from_le_bytes(
            self.buffer
                .as_ref()
                .get_unchecked(offset..offset + 8)
                .try_into()
                .unwrap_unchecked(),
        )
    }

    #[inline]
    fn raw_block(&self, block_num: u64) -> Block {
        let block_num = block_num % self.total_blocks();
        let block_start = block_num as usize * self.block_byte_size();
        // SAFETY: block_num % total_blocks() guarantees valid block index
        unsafe {
            Block {
                offset: *self.buffer.as_ref().get_unchecked(block_start) as u64,
                occupieds: self.read_u64_unchecked(block_start + 1),
                runends: self.read_u64_unchecked(block_start + 1 + 8),
            }
        }
    }

    #[inline(always)]
    fn is_occupied(&self, hash_bucket_idx: u64) -> bool {
        let hash_bucket_idx = hash_bucket_idx % self.total_buckets();
        let block_start = (hash_bucket_idx / 64) as usize * self.block_byte_size();
        let occupieds = unsafe { self.read_u64_unchecked(block_start + 1) };
        occupieds.is_bit_set((hash_bucket_idx % 64) as usize)
    }

    #[inline(always)]
    fn is_runend(&self, hash_bucket_idx: u64) -> bool {
        let hash_bucket_idx = hash_bucket_idx % self.total_buckets();
        let block_start = (hash_bucket_idx / 64) as usize * self.block_byte_size();
        let runends = unsafe { self.read_u64_unchecked(block_start + 1 + 8) };
        runends.is_bit_set((hash_bucket_idx % 64) as usize)
    }

    #[inline(always)]
    fn get_remainder(&self, hash_bucket_idx: u64) -> u64 {
        debug_assert!(self.rbits.get() > 0 && self.rbits.get() < 64);
        let hash_bucket_idx = hash_bucket_idx % self.total_buckets();
        let remainders_start = (hash_bucket_idx / 64) as usize * self.block_byte_size() + 1 + 8 + 8;
        let start_bit_idx = self.rbits.usize() * (hash_bucket_idx % 64) as usize;
        let start_u64 = start_bit_idx / 64;
        let extra_low = start_bit_idx - start_u64 * 64;

        // SAFETY: Always safe due to 8 extra bytes padding at end of buffer
        let rem_part0 = unsafe { self.read_u64_unchecked(remainders_start + start_u64 * 8) };
        let rem_part1 = unsafe { self.read_u64_unchecked(remainders_start + (start_u64 + 1) * 8) };

        let combined = (rem_part0 as u128) | ((rem_part1 as u128) << 64);
        let remainder = (combined >> extra_low) as u64 & ((1u64 << self.rbits.get()) - 1);

        debug_assert!(remainder.leading_zeros() >= 64 - self.rbits.get() as u32);
        remainder
    }

    #[inline]
    fn is_slot_empty(&self, hash_bucket_idx: u64) -> bool {
        let bucket_block_idx = hash_bucket_idx / 64;
        let bucket_intrablock_offset = hash_bucket_idx % 64;
        let bucket_block = self.raw_block(bucket_block_idx);

        if bucket_block.offset > bucket_intrablock_offset {
            return false;
        }

        let num_occupied = bucket_block.occupieds.popcnt(..=bucket_intrablock_offset);
        let num_runends = bucket_block
            .runends
            .popcnt(bucket_block.offset..bucket_intrablock_offset);
        num_occupied == num_runends
    }

    #[inline]
    fn find_first_empty_slot(&self, mut hash_bucket_idx: u64) -> u64 {
        let mut bucket_intrablock_offset = hash_bucket_idx % 64;
        let mut bucket_block = self.raw_block(hash_bucket_idx / 64);

        loop {
            let num_occupied = bucket_block.occupieds.popcnt(..=bucket_intrablock_offset);
            let olb = if bucket_block.offset <= bucket_intrablock_offset {
                num_occupied
                    - bucket_block
                        .runends
                        .popcnt(bucket_block.offset..bucket_intrablock_offset)
            } else {
                bucket_block.offset + num_occupied - bucket_intrablock_offset
            };

            if olb == 0 {
                return hash_bucket_idx % self.total_buckets();
            }

            hash_bucket_idx += olb;
            bucket_intrablock_offset += olb;

            if bucket_intrablock_offset >= 64 {
                bucket_intrablock_offset %= 64;
                bucket_block = self.raw_block(hash_bucket_idx / 64);
            }
        }
    }

    fn find_first_not_shifted_slot(&self, mut hash_bucket_idx: u64) -> u64 {
        loop {
            let run_end = self.run_end(hash_bucket_idx);
            if run_end == hash_bucket_idx {
                return hash_bucket_idx;
            }
            hash_bucket_idx = run_end;
        }
    }

    #[cold]
    #[inline(never)]
    fn calc_offset(&self, block_num: u64) -> u64 {
        let block_start = (block_num * 64) % self.total_buckets();
        let mut run_start = self.run_start(block_start);
        if run_start < block_start {
            run_start += self.total_buckets().get();
        }
        run_start - block_start
    }

    #[inline]
    fn run_start(&self, hash_bucket_idx: u64) -> u64 {
        let prev_bucket = hash_bucket_idx.wrapping_sub(1) % self.total_buckets();
        (self.run_end(prev_bucket) + 1) % self.total_buckets()
    }

    #[cfg_attr(
        all(
            target_arch = "x86_64",
            any(not(feature = "legacy_x86_64_support"), target_feature = "bmi2")
        ),
        inline(always)
    )]
    fn run_end(&self, hash_bucket_idx: u64) -> u64 {
        let hash_bucket_idx = hash_bucket_idx % self.total_buckets();
        let bucket_block_idx = hash_bucket_idx / 64;
        let bucket_intrablock_offset = hash_bucket_idx % 64;
        let bucket_block = self.raw_block(bucket_block_idx);

        let offset = if bucket_block.offset >= u8::MAX as u64 {
            self.calc_offset(bucket_block_idx)
        } else {
            bucket_block.offset
        };
        let bucket_intrablock_rank = bucket_block.occupieds.popcnt(..=bucket_intrablock_offset);

        if bucket_intrablock_rank == 0 {
            return if offset <= bucket_intrablock_offset {
                hash_bucket_idx
            } else {
                (bucket_block_idx * 64 + offset - 1) % self.total_buckets()
            };
        }

        let mut runend_block_idx = bucket_block_idx + offset / 64;
        let mut runend_ignore_bits = offset % 64;
        let mut runend_block = self.raw_block(runend_block_idx);
        let mut runend_rank = bucket_intrablock_rank - 1;

        loop {
            if let Some(off) = runend_block
                .runends
                .select(runend_ignore_bits.., runend_rank)
            {
                let runend_idx = runend_block_idx * 64 + off;
                return runend_idx.max(hash_bucket_idx) % self.total_buckets();
            }
            runend_rank -= runend_block.runends.popcnt(runend_ignore_bits..);
            runend_block_idx += 1;
            runend_ignore_bits = 0;
            runend_block = self.raw_block(runend_block_idx);
        }
    }

    #[inline]
    fn hash_item<T: Hash>(&self, item: T) -> u64
    where
        S: BuildHasher,
    {
        self.build_hasher.hash_one(&item)
    }

    #[inline]
    fn calc_qr(&self, hash: u64) -> (u64, u64) {
        let hash_bucket_idx = (hash >> self.rbits.get()) & ((1 << self.qbits.get()) - 1);
        let remainder = hash & ((1 << self.rbits.get()) - 1);
        (hash_bucket_idx, remainder)
    }

    #[inline]
    fn total_blocks(&self) -> NonZeroU64 {
        // The way this is calculated ensures the compilers sees that the result is both != 0 and a power of 2,
        // both of which allow the optimizer to generate much faster division/remainder code.
        // Safety: qbits in 6..=63, so (1 << qbits) / 64 is in 1..=2^57
        unsafe { NonZeroU64::new_unchecked((1u64 << self.qbits.get()) / 64) }
    }

    #[inline]
    fn total_buckets(&self) -> NonZeroU64 {
        // The way this is calculated ensures the compilers sees that the result is both != 0 and a power of 2,
        // both of which allow the optimizer to generate much faster division/remainder code.
        // Safety: qbits in 6..=63, so 1 << qbits is in 64..=2^63
        unsafe { NonZeroU64::new_unchecked(1u64 << self.qbits.get()) }
    }
}

impl<B: AsRef<[u8]>, S> std::fmt::Debug for Filter<B, S> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Filter")
            .field("buffer", &"[..]")
            .field("len", &self.len)
            .field("qbits", &self.qbits)
            .field("rbits", &self.rbits)
            .field("max_qbits", &self.max_qbits)
            .finish()
    }
}

/// Converts a borrowed filter view into an owned [`Filter`].
impl<S: Clone> Filter<&[u8], S> {
    /// Converts this borrowed filter view into an owned filter.
    pub fn to_owned(&self) -> Filter<Box<[u8]>, S> {
        Filter {
            buffer: self.buffer.into(),
            build_hasher: self.build_hasher.clone(),
            len: self.len,
            qbits: self.qbits,
            rbits: self.rbits,
            max_qbits: self.max_qbits,
        }
    }
}

impl<'a> IntoIterator for FilterRef<'a> {
    type Item = u64;
    type IntoIter = FingerprintIter<'a>;

    /// Returns an iterator over the fingerprints stored in the filter.
    #[inline]
    fn into_iter(self) -> Self::IntoIter {
        FingerprintIter::from_ref(self)
    }
}

/// An iterator over the fingerprints stored in a [`Filter`] or [`FilterRef`].
///
/// Fingerprints are yielded in ascending order. Each value is a `u64` where only
/// the lower [`Filter::fingerprint_size()`] bits are meaningful (upper bits are zero).
///
/// If duplicates were inserted, the same fingerprint value may be yielded multiple times.
///
/// Created by [`Filter::fingerprints()`] or [`FilterRef::fingerprints()`].
pub struct FingerprintIter<'a> {
    filter: FilterRef<'a>,
    q_bucket_idx: u64,
    r_bucket_idx: u64,
    remaining: u64,
    q_block_idx: u64,
    r_block_idx: u64,
    cached_occupieds: u64,
    cached_runends: u64,
}

impl<'a> FingerprintIter<'a> {
    fn new<B: AsRef<[u8]>, S>(filter: &'a Filter<B, S>) -> Self {
        Self::from_ref(FilterRef {
            buffer: filter.buffer.as_ref(),
            build_hasher: StableBuildHasher,
            len: filter.len,
            qbits: filter.qbits,
            rbits: filter.rbits,
            max_qbits: filter.max_qbits,
        })
    }

    fn from_ref(filter: FilterRef<'a>) -> Self {
        let mut q_block_idx = 0u64;
        let mut cached_occupieds = filter.raw_block(0).occupieds;
        if !filter.is_empty() {
            while cached_occupieds == 0 {
                q_block_idx += 1;
                cached_occupieds = filter.raw_block(q_block_idx).occupieds;
            }
        }
        let q_bucket_idx = q_block_idx * 64 + cached_occupieds.trailing_zeros() as u64;
        let r_bucket_idx = filter.run_start(q_bucket_idx);
        let r_block_idx = r_bucket_idx / 64;
        let cached_runends = filter.raw_block(r_block_idx).runends;

        Self {
            filter,
            q_bucket_idx,
            r_bucket_idx,
            remaining: filter.len,
            q_block_idx,
            r_block_idx,
            cached_occupieds,
            cached_runends,
        }
    }
}

impl Iterator for FingerprintIter<'_> {
    type Item = u64;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        self.remaining = self.remaining.checked_sub(1)?;

        let hash = (self.q_bucket_idx << self.filter.rbits.get())
            | self.filter.get_remainder(self.r_bucket_idx);

        let is_runend = self
            .cached_runends
            .is_bit_set((self.r_bucket_idx % 64) as usize);
        if is_runend {
            // Move to next occupied quotient bucket
            self.q_bucket_idx += 1;
            if self.q_bucket_idx / 64 != self.q_block_idx {
                self.q_block_idx = self.q_bucket_idx / 64;
                self.cached_occupieds = self.filter.raw_block(self.q_block_idx).occupieds;
            }

            // Find next occupied using trailing_zeros
            let mut masked = self.cached_occupieds & (u64::MAX << (self.q_bucket_idx % 64));
            while masked == 0 {
                self.q_block_idx += 1;
                self.cached_occupieds = self.filter.raw_block(self.q_block_idx).occupieds;
                masked = self.cached_occupieds;
            }
            self.q_bucket_idx = self.q_block_idx * 64 + masked.trailing_zeros() as u64;
            self.r_bucket_idx = (self.r_bucket_idx + 1).max(self.q_bucket_idx);
        } else {
            self.r_bucket_idx += 1;
        }

        // Update runends cache if needed
        if self.r_bucket_idx / 64 != self.r_block_idx {
            self.r_block_idx = self.r_bucket_idx / 64;
            self.cached_runends = self.filter.raw_block(self.r_block_idx).runends;
        }

        Some(hash)
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        (
            self.remaining.try_into().unwrap_or(usize::MAX),
            self.remaining.try_into().ok(),
        )
    }
}

#[cfg(target_pointer_width = "64")]
impl ExactSizeIterator for FingerprintIter<'_> {}

impl std::iter::FusedIterator for FingerprintIter<'_> {}

impl<'a> IntoIterator for &'a Filter {
    type Item = u64;
    type IntoIter = FingerprintIter<'a>;

    /// Returns an iterator over the fingerprints stored in the filter.
    ///
    /// Equivalent to calling [`Filter::fingerprints()`].
    #[inline]
    fn into_iter(self) -> Self::IntoIter {
        self.fingerprints()
    }
}

/// Builder for constructing a [`Filter`] from fingerprints in sorted (ascending) order.
///
/// This is significantly faster than repeated [`Filter::insert_fingerprint()`] calls
/// when fingerprints are known to be in non-decreasing order, as it avoids run boundary
/// lookups, linear scans, and element shifting — reducing each insertion to a simple
/// sequential write.
///
/// Create a builder by passing an empty [`Filter`] to [`Builder::new()`], then
/// call [`Self::into_filter()`] to retrieve the populated filter.
///
/// # Example
///
/// ```rust
/// let mut inserter = qfilter::Builder::new(qfilter::Filter::new(1000, 0.01).unwrap());
/// let fp_size = inserter.fingerprint_size();
///
/// // Compute truncated fingerprints and sort them.
/// // Builder requires fingerprints in non-decreasing order.
/// let mut fps: Vec<u64> = (0..100u64)
///     .map(|i| qfilter::compute_fingerprint_with_hasher(&qfilter::StableBuildHasher, i, fp_size))
///     .collect();
/// fps.sort();
///
/// for fp in fps {
///     inserter.insert_fingerprint(false, fp).unwrap();
/// }
/// let filter = inserter.into_filter();
///
/// for i in 0..100u64 {
///     assert!(filter.contains(i));
/// }
/// ```
#[derive(Debug)]
pub struct Builder<S = StableBuildHasher> {
    filter: Filter<Box<[u8]>, S>,
    next_slot: u64,
    last_quotient: u64,
    last_remainder: u64,
}

impl<S> Builder<S> {
    /// Creates a builder from an empty filter.
    ///
    /// Use this with [`Filter::new_with_hasher()`] or other constructors to
    /// build a filter with a custom hasher using sorted fingerprint insertion.
    ///
    /// # Panics
    ///
    /// Panics if the filter is not empty.
    pub fn new(filter: Filter<Box<[u8]>, S>) -> Self {
        assert!(filter.is_empty(), "Builder requires an empty filter");
        Self {
            filter,
            next_slot: 0,
            last_quotient: 0,
            last_remainder: 0,
        }
    }

    /// Returns the fingerprint size (in bits) of the filter being built.
    pub fn fingerprint_size(&self) -> u8 {
        self.filter.fingerprint_size()
    }

    /// Returns the current capacity of the filter being built.
    pub fn capacity(&self) -> u64 {
        self.filter.capacity()
    }

    /// Consumes the builder and returns the constructed filter.
    pub fn into_filter(self) -> Filter<Box<[u8]>, S> {
        self.filter
    }
}

impl<S: Clone> Builder<S> {
    /// Creates a builder for internal rebuild paths (grow, shrink).
    fn with_qr_and_hasher(
        qbits: NonZeroU8,
        rbits: NonZeroU8,
        max_qbits: Option<NonZeroU8>,
        build_hasher: S,
    ) -> Result<Self, Error> {
        let mut filter = Filter::with_qr_and_hasher(qbits, rbits, build_hasher)?;
        filter.max_qbits = max_qbits;
        Ok(Self::new(filter))
    }

    /// Inserts a fingerprint that must be >= all previously inserted fingerprints.
    ///
    /// # Parameters
    ///
    /// - `duplicate`: If `true`, insert even if the fingerprint equals the previous one.
    ///   If `false`, skip duplicates (return `Ok(false)`).
    /// - `hash`: The fingerprint value. Only the lower
    ///   [`Filter::fingerprint_size()`] bits are used.
    ///
    /// # Returns
    ///
    /// - `Ok(true)` if inserted successfully.
    /// - `Ok(false)` if already present and `duplicate` is `false`.
    /// - `Err(Error::CapacityExceeded)` if the filter is full and cannot grow.
    ///
    /// # Panics
    ///
    /// Panics if `hash` is not in non-decreasing order relative to the
    /// previously inserted fingerprint.
    #[inline] // see insert_impl
    pub fn insert_fingerprint(&mut self, duplicate: bool, hash: u64) -> Result<bool, Error> {
        match self.insert_impl(duplicate, hash) {
            Ok(inserted) => Ok(inserted),
            Err(Error::CapacityExceeded) => {
                *self = self.filter.rebuild_grown()?;
                self.insert_impl(duplicate, hash)
            }
            Err(e) => Err(e),
        }
    }

    /// Core sorted insertion. Returns `Ok(true)` if a new element was inserted,
    /// `Ok(false)` if it was a duplicate and `duplicate` is `false`.
    // This is optimized enough that the function call overhead is noticeable,
    // so we force inline it into the caller (insert_fingerprint) always.
    // For instance, this makes merge about 25% faster.
    #[inline(always)]
    fn insert_impl(&mut self, duplicate: bool, hash: u64) -> Result<bool, Error> {
        let (quotient, remainder) = self.filter.calc_qr(hash);

        if !self.filter.is_empty() {
            assert!(
                quotient > self.last_quotient
                    || (quotient == self.last_quotient && remainder >= self.last_remainder),
                "fingerprints must be in non-decreasing order: ({quotient}, {remainder}) < ({}, {})",
                self.last_quotient,
                self.last_remainder
            );

            // Duplicate detection — since input is sorted, duplicates are consecutive.
            if quotient == self.last_quotient && remainder == self.last_remainder && !duplicate {
                return Ok(false);
            }

            if self.filter.len >= self.filter.capacity() {
                return Err(Error::CapacityExceeded);
            }

            // When next_slot wraps past total_buckets, the simple sequential-append
            // assumption breaks down — fall back to the general insertion path.
            if self.next_slot >= self.filter.total_buckets().get() {
                self.last_quotient = quotient;
                self.last_remainder = remainder;
                let max_count = if duplicate { u64::MAX } else { 1 };
                let count = self.filter.insert_impl(max_count, hash)?;
                return Ok(count < max_count);
            }
        }

        // Place the element at the appropriate slot.
        let slot = if quotient >= self.next_slot {
            // Canonical slot is free — start a new run here.
            self.filter.set_occupied(quotient, true);
            quotient
        } else {
            if quotient == self.last_quotient {
                // Extending current run: clear previous runend.
                self.filter.set_runend(self.next_slot - 1, false);
            } else {
                // New quotient, but spilled past canonical slot.
                self.filter.set_occupied(quotient, true);
            }
            self.filter.inc_offsets(quotient, self.next_slot);
            self.next_slot
        };
        self.filter.set_runend(slot, true);
        self.filter.set_remainder(slot, remainder);
        self.next_slot = slot + 1;
        self.last_quotient = quotient;
        self.last_remainder = remainder;
        self.filter.len += 1;
        Ok(true)
    }
}

/// Maximum log2 number of slots that can be used in the filter.
/// Effectively, the largest power of 2 that can be multiplied by 19 without overflowing u64.
const MAX_QBITS: u8 = 59;

impl<B, S> Filter<B, S> {
    /// Maximum number of items that can be stored in the filter: ceil(2^59 * 19 / 20)
    pub const MAX_CAPACITY: u64 = crate::MAX_CAPACITY;
}

/// Maximum number of items that can be stored in the filter: ceil(2^59 * 19 / 20)
const MAX_CAPACITY: u64 = (2u64.pow(MAX_QBITS as u32) * 19).div_ceil(20);

impl Filter {
    /// Creates a new filter with the given capacity and false positive rate.
    ///
    /// # Parameters
    ///
    /// - `capacity`: Minimum number of items the filter can hold.
    /// - `fp_rate`: Upper bound on false positive probability (clamped to (0, 0.5]).
    ///   The actual rate may be lower due to internal rounding.
    ///
    /// # Errors
    ///
    /// - [`Error::CapacityTooLarge`] if capacity exceeds [`Self::MAX_CAPACITY`].
    /// - [`Error::NotEnoughFingerprintBits`] if the configuration isn't achievable.
    #[inline]
    pub fn new(capacity: u64, fp_rate: f64) -> Result<Self, Error> {
        Self::new_resizeable(capacity, capacity, fp_rate)
    }

    /// Creates a resizeable filter that can grow from `initial_capacity` to `max_capacity`.
    ///
    /// The filter starts small and automatically grows when full. The `fp_rate` applies
    /// at `max_capacity`; smaller sizes have proportionally lower error rates.
    ///
    /// # Parameters
    ///
    /// - `initial_capacity`: Starting capacity.
    /// - `max_capacity`: Maximum capacity after growth (must be >= `initial_capacity`).
    /// - `fp_rate`: Upper bound on false positive rate at max capacity (clamped to (0, 0.5]).
    ///   The actual rate may be lower due to internal rounding.
    ///
    /// # Errors
    ///
    /// - [`Error::CapacityTooLarge`] if `max_capacity` exceeds [`Self::MAX_CAPACITY`].
    /// - [`Error::NotEnoughFingerprintBits`] if the configuration isn't achievable.
    pub fn new_resizeable(
        initial_capacity: u64,
        max_capacity: u64,
        fp_rate: f64,
    ) -> Result<Self, Error> {
        assert!(max_capacity >= initial_capacity);
        let slots_for_capacity = calculate_needed_slots(initial_capacity)?;
        let qbits = slots_for_capacity.trailing_zeros() as u8;
        let slots_for_max_capacity = calculate_needed_slots(max_capacity)?;
        let max_qbits = slots_for_max_capacity.trailing_zeros() as u8;
        let fp_rate = fp_rate.clamp(f64::MIN_POSITIVE, 0.5);
        let rbits = (-fp_rate.log2()).ceil().max(1.0) as u8 + (max_qbits - qbits);
        let mut result = Self::with_qr(qbits.try_into().unwrap(), rbits.try_into().unwrap())?;
        if max_qbits > qbits {
            result.max_qbits = Some(max_qbits.try_into().unwrap());
        }
        Ok(result)
    }

    /// Creates a new resizeable filter with a specific fingerprint bit size.
    ///
    /// Use this when storing pre-computed fingerprints via [`Self::insert_fingerprint()`].
    /// Use [`compute_fingerprint_with_hasher()`] to compute fingerprints with a specific bit size.
    ///
    /// # Parameters
    ///
    /// - `initial_capacity`: Minimum number of items the filter can hold initially.
    /// - `fingerprint_bits`: Bits per fingerprint (7..=64). Larger values reduce false positives.
    ///
    /// # Errors
    ///
    /// Returns [`Error::NotEnoughFingerprintBits`] if `fingerprint_bits` is outside 7..=64
    /// or too small for the requested capacity.
    pub fn with_fingerprint_size(
        initial_capacity: u64,
        fingerprint_bits: u8,
    ) -> Result<Self, Error> {
        if !(7..=64).contains(&fingerprint_bits) {
            return Err(Error::NotEnoughFingerprintBits);
        }
        let slots_for_capacity = calculate_needed_slots(initial_capacity)?;
        let qbits = slots_for_capacity.trailing_zeros() as u8;
        if fingerprint_bits <= qbits {
            return Err(Error::NotEnoughFingerprintBits);
        }
        let rbits = fingerprint_bits - qbits;
        let mut result = Self::with_qr(qbits.try_into().unwrap(), rbits.try_into().unwrap())?;
        if rbits > 1 {
            result.max_qbits = Some((qbits + rbits - 1).min(MAX_QBITS).try_into().unwrap());
        }
        Ok(result)
    }

    fn with_qr(qbits: NonZeroU8, rbits: NonZeroU8) -> Result<Self, Error> {
        Self::with_qr_and_hasher(qbits, rbits, StableBuildHasher)
    }

    fn check_cpu_support() {
        #[cfg(all(
            target_arch = "x86_64",
            not(feature = "legacy_x86_64_support"),
            not(target_feature = "popcnt")
        ))]
        assert!(
            std::is_x86_feature_detected!("popcnt"),
            "CPU doesn't support the popcnt instruction"
        );
        #[cfg(all(
            target_arch = "x86_64",
            not(feature = "legacy_x86_64_support"),
            not(target_feature = "bmi2")
        ))]
        assert!(
            std::is_x86_feature_detected!("bmi2"),
            "CPU doesn't support the bmi2 instructions"
        );
    }
}

impl<S> Filter<Box<[u8]>, S> {
    fn with_qr_and_hasher(
        qbits: NonZeroU8,
        rbits: NonZeroU8,
        build_hasher: S,
    ) -> Result<Self, Error> {
        Filter::check_cpu_support();
        if qbits.get() + rbits.get() > 64 {
            return Err(Error::NotEnoughFingerprintBits);
        }
        let buffer_bytes = usize::try_from(calculate_memory_bytes(qbits.get(), rbits.get()))
            .map_err(|_| Error::CapacityTooLarge)?;
        let buffer = vec![0u8; buffer_bytes].into_boxed_slice();
        Ok(Self {
            buffer,
            build_hasher,
            qbits,
            rbits,
            len: 0,
            max_qbits: None,
        })
    }
}

impl<S: Clone> Filter<Box<[u8]>, S> {
    /// Creates a new filter with the given capacity, false positive rate, and custom hasher.
    ///
    /// The same hasher (and seed) must be used for all operations on this filter.
    /// The hasher is not serialized. On deserialization it is reconstructed via
    /// `S::default()`. If that doesn't produce the correct hasher, use
    /// [`Filter::with_hasher()`] to restore it.
    ///
    /// See [`Filter::new()`] for parameter details.
    #[inline]
    pub fn new_with_hasher(capacity: u64, fp_rate: f64, build_hasher: S) -> Result<Self, Error> {
        Self::new_resizeable_with_hasher(capacity, capacity, fp_rate, build_hasher)
    }

    /// Creates a resizeable filter with a custom hasher.
    ///
    /// See [`Filter::new_resizeable()`] and [`Filter::new_with_hasher()`] for details.
    pub fn new_resizeable_with_hasher(
        initial_capacity: u64,
        max_capacity: u64,
        fp_rate: f64,
        build_hasher: S,
    ) -> Result<Self, Error> {
        assert!(max_capacity >= initial_capacity);
        let slots_for_capacity = calculate_needed_slots(initial_capacity)?;
        let qbits = slots_for_capacity.trailing_zeros() as u8;
        let slots_for_max_capacity = calculate_needed_slots(max_capacity)?;
        let max_qbits = slots_for_max_capacity.trailing_zeros() as u8;
        let fp_rate = fp_rate.clamp(f64::MIN_POSITIVE, 0.5);
        let rbits = (-fp_rate.log2()).ceil().max(1.0) as u8 + (max_qbits - qbits);
        let mut result = Self::with_qr_and_hasher(
            qbits.try_into().unwrap(),
            rbits.try_into().unwrap(),
            build_hasher,
        )?;
        if max_qbits > qbits {
            result.max_qbits = Some(max_qbits.try_into().unwrap());
        }
        Ok(result)
    }

    /// Creates a filter with a specific fingerprint bit size and custom hasher.
    ///
    /// See [`Filter::with_fingerprint_size()`] and [`Filter::new_with_hasher()`] for details.
    pub fn with_fingerprint_size_and_hasher(
        initial_capacity: u64,
        fingerprint_bits: u8,
        build_hasher: S,
    ) -> Result<Self, Error> {
        if !(7..=64).contains(&fingerprint_bits) {
            return Err(Error::NotEnoughFingerprintBits);
        }
        let slots_for_capacity = calculate_needed_slots(initial_capacity)?;
        let qbits = slots_for_capacity.trailing_zeros() as u8;
        if fingerprint_bits <= qbits {
            return Err(Error::NotEnoughFingerprintBits);
        }
        let rbits = fingerprint_bits - qbits;
        let mut result = Self::with_qr_and_hasher(
            qbits.try_into().unwrap(),
            rbits.try_into().unwrap(),
            build_hasher,
        )?;
        if rbits > 1 {
            result.max_qbits = Some((qbits + rbits - 1).min(MAX_QBITS).try_into().unwrap());
        }
        Ok(result)
    }

    /// Removes all items from the filter.
    pub fn clear(&mut self) {
        self.buffer.fill(0);
        self.len = 0;
    }

    #[inline]
    fn set_block_runends(&mut self, block_num: u64, runends: u64) {
        let block_num = block_num % self.total_blocks();
        let block_start = block_num as usize * self.block_byte_size();
        // SAFETY: block_num % total_blocks() guarantees valid block index
        unsafe { self.write_u64_unchecked(block_start + 1 + 8, runends) };
    }

    /// Write u64 to buffer at given offset without bounds checking.
    /// SAFETY: Caller must ensure offset + 8 <= buffer.len()
    #[inline(always)]
    unsafe fn write_u64_unchecked(&mut self, offset: usize, value: u64) {
        debug_assert!(offset + 8 <= self.buffer.len());
        self.buffer
            .get_unchecked_mut(offset..offset + 8)
            .copy_from_slice(&value.to_le_bytes());
    }

    #[inline]
    fn adjust_block_offset(&mut self, block_num: u64, inc: bool) {
        let block_num = block_num % self.total_blocks();
        let block_start = block_num as usize * self.block_byte_size();
        // SAFETY: block_num % total_blocks() guarantees valid block index
        let current = unsafe { *self.buffer.get_unchecked(block_start) };
        let new_value = if inc {
            current.saturating_add(1)
        } else if current != u8::MAX {
            current - 1
        } else {
            self.calc_offset(block_num).try_into().unwrap_or(u8::MAX)
        };
        unsafe { *self.buffer.get_unchecked_mut(block_start) = new_value };
    }

    #[inline]
    fn inc_offsets(&mut self, start_bucket: u64, end_bucket: u64) {
        let original_block = start_bucket / 64;
        let mut last_affected_block = end_bucket / 64;
        if end_bucket < start_bucket {
            last_affected_block += self.total_blocks().get();
        }
        for b in original_block + 1..=last_affected_block {
            self.adjust_block_offset(b, true);
        }
    }

    #[inline]
    fn dec_offsets(&mut self, start_bucket: u64, end_bucket: u64) {
        let original_block = start_bucket / 64;
        let mut last_affected_block = end_bucket / 64;
        if end_bucket < start_bucket {
            last_affected_block += self.total_blocks().get();
        }

        // As an edge case we may decrement the offsets of 2+ blocks and the block B' offset
        // may be saturated and depend on a previous Block B" with a non saturated offset.
        // But B" offset may also(!) be affected by the decremented operation, so we must
        // decrement B" offset first before the remaining offsets.
        if last_affected_block - original_block >= 2
            && self.raw_block(original_block + 1).offset >= u8::MAX as u64
        {
            // last affected block offset is always <= 64 (BLOCK SIZE)
            // otherwise the decrement operation would be to affecting a subsequent block
            debug_assert!(self.raw_block(last_affected_block).offset <= 64);
            self.adjust_block_offset(last_affected_block, false);
            last_affected_block -= 1;
        }
        for b in original_block + 1..=last_affected_block {
            self.adjust_block_offset(b, false);
        }

        #[cfg(fuzzing)]
        self.validate_offsets(original_block, last_affected_block);
    }

    #[cfg(any(fuzzing, test))]
    fn validate_offsets(&mut self, original_block: u64, last_affected_block: u64) {
        for b in original_block..=last_affected_block {
            let raw_offset = self.raw_block(b).offset;
            let offset = self.calc_offset(b);
            debug_assert!(
                (raw_offset >= u8::MAX as u64 && offset >= u8::MAX as u64)
                    || (offset == raw_offset),
                "block {} offset {} calc {}",
                b,
                raw_offset,
                offset,
            );
        }
    }

    #[inline(always)]
    fn set_occupied(&mut self, hash_bucket_idx: u64, value: bool) {
        let hash_bucket_idx = hash_bucket_idx % self.total_buckets();
        let block_start = (hash_bucket_idx / 64) as usize * self.block_byte_size();
        // SAFETY: hash_bucket_idx % total_buckets() guarantees valid block index
        let mut occupieds = unsafe { self.read_u64_unchecked(block_start + 1) };
        occupieds.update_bit((hash_bucket_idx % 64) as usize, value);
        unsafe { self.write_u64_unchecked(block_start + 1, occupieds) };
    }

    #[inline(always)]
    fn set_runend(&mut self, hash_bucket_idx: u64, value: bool) {
        let hash_bucket_idx = hash_bucket_idx % self.total_buckets();
        let block_start = (hash_bucket_idx / 64) as usize * self.block_byte_size();
        // SAFETY: hash_bucket_idx % total_buckets() guarantees valid block index
        let mut runends = unsafe { self.read_u64_unchecked(block_start + 1 + 8) };
        runends.update_bit((hash_bucket_idx % 64) as usize, value);
        unsafe { self.write_u64_unchecked(block_start + 1 + 8, runends) };
    }

    #[inline(always)]
    fn set_remainder(&mut self, hash_bucket_idx: u64, remainder: u64) {
        debug_assert!(self.rbits.get() > 0 && self.rbits.get() < 64);
        debug_assert!(remainder.leading_zeros() >= 64 - self.rbits.get() as u32);
        let hash_bucket_idx = hash_bucket_idx % self.total_buckets();
        let remainders_start = (hash_bucket_idx / 64) as usize * self.block_byte_size() + 1 + 8 + 8;
        let start_bit_idx = self.rbits.usize() * (hash_bucket_idx % 64) as usize;
        let start_u64 = start_bit_idx / 64;
        let extra_low = start_bit_idx - start_u64 * 64;

        // SAFETY: Always safe due to 8 extra bytes padding at end of buffer
        let offset = remainders_start + start_u64 * 8;
        let rem_part0 = unsafe { self.read_u64_unchecked(offset) };
        let rem_part1 = unsafe { self.read_u64_unchecked(offset + 8) };

        // Combine as 128-bit, clear remainder region, set new remainder (branchless)
        let combined = (rem_part0 as u128) | ((rem_part1 as u128) << 64);
        let rbits_mask = ((1u128 << self.rbits.get()) - 1) << extra_low;
        let new_combined = (combined & !rbits_mask) | ((remainder as u128) << extra_low);

        // Split back and write both
        unsafe {
            self.write_u64_unchecked(offset, new_combined as u64);
            self.write_u64_unchecked(offset + 8, (new_combined >> 64) as u64);
        }
    }

    /// Converts an inclusive end index to exclusive, handling wrap-around.
    #[inline]
    fn wrap_end_exclusive(&self, start: u64, end_inclusive: u64) -> u64 {
        if end_inclusive < start {
            end_inclusive + self.total_buckets().get() + 1
        } else {
            end_inclusive + 1
        }
    }

    fn shift_remainders_by_1(&mut self, start: u64, end_inc: u64) {
        let end = self.wrap_end_exclusive(start, end_inc);
        let mut end_u64 = end * self.rbits.u64() / 64;
        let mut bend = (end * self.rbits.u64() % 64) as usize;
        let start_u64 = start * self.rbits.u64() / 64;
        let bstart = (start * self.rbits.u64() % 64) as usize;

        let rbits = self.rbits.get() as usize;
        let total_blocks = self.total_blocks().get();
        let block_byte_size = self.block_byte_size();

        // Track position as (block_idx, rem_idx) to avoid division in loop
        let mut block_idx = (end_u64 / rbits as u64) % total_blocks;
        let mut rem_idx = (end_u64 % rbits as u64) as usize;

        // Byte offset for a given (block_idx, rem_idx)
        let offset = |b: u64, r: usize| (b as usize * block_byte_size) + 17 + r * 8;

        // Shift from end to start, caching the previous value
        if end_u64 != start_u64 {
            let mut cached_rem = unsafe { self.read_u64_unchecked(offset(block_idx, rem_idx)) };

            while end_u64 != start_u64 {
                // Compute previous position
                let (prev_block, prev_rem) = if rem_idx > 0 {
                    (block_idx, rem_idx - 1)
                } else {
                    (
                        if block_idx > 0 {
                            block_idx - 1
                        } else {
                            total_blocks - 1
                        },
                        rbits - 1,
                    )
                };

                let prev = unsafe { self.read_u64_unchecked(offset(prev_block, prev_rem)) };
                let shifted = prev.shift_right(rbits, &cached_rem, 0, bend);
                unsafe { self.write_u64_unchecked(offset(block_idx, rem_idx), shifted) };

                cached_rem = prev;
                block_idx = prev_block;
                rem_idx = prev_rem;
                end_u64 -= 1;
                bend = 64;
            }
        }

        // Handle start position (block_idx/rem_idx already there after loop)
        let mut rem_val = unsafe { self.read_u64_unchecked(offset(block_idx, rem_idx)) };
        rem_val = 0u64.shift_right(rbits, &rem_val, bstart, bend);
        unsafe { self.write_u64_unchecked(offset(block_idx, rem_idx), rem_val) };
    }

    fn shift_remainders_back_by_1(&mut self, start: u64, end_inc: u64) {
        let end = self.wrap_end_exclusive(start, end_inc);
        let end_u64 = end * self.rbits.u64() / 64;
        let bend = (end * self.rbits.u64() % 64) as usize;
        let mut start_u64 = start * self.rbits.u64() / 64;
        let mut bstart = (start * self.rbits.u64() % 64) as usize;

        let rbits = self.rbits.get() as usize;
        let total_blocks = self.total_blocks().get();
        let block_byte_size = self.block_byte_size();

        // Track position as (block_idx, rem_idx) to avoid division in loop
        let mut block_idx = (start_u64 / rbits as u64) % total_blocks;
        let mut rem_idx = (start_u64 % rbits as u64) as usize;

        // Byte offset for a given (block_idx, rem_idx)
        let offset = |b: u64, r: usize| (b as usize * block_byte_size) + 17 + r * 8;

        // Shift from start to end, caching the previous value
        if end_u64 != start_u64 {
            let mut cached_rem = unsafe { self.read_u64_unchecked(offset(block_idx, rem_idx)) };

            while end_u64 != start_u64 {
                // Compute next position
                let (next_block, next_rem) = if rem_idx + 1 < rbits {
                    (block_idx, rem_idx + 1)
                } else {
                    ((block_idx + 1) % total_blocks, 0)
                };

                let next = unsafe { self.read_u64_unchecked(offset(next_block, next_rem)) };
                let shifted = next.shift_left(rbits, &cached_rem, bstart, 64);
                unsafe { self.write_u64_unchecked(offset(block_idx, rem_idx), shifted) };

                cached_rem = next;
                block_idx = next_block;
                rem_idx = next_rem;
                start_u64 += 1;
                bstart = 0;
            }
        }

        // Handle end position (block_idx/rem_idx already there after loop)
        let mut rem_val = unsafe { self.read_u64_unchecked(offset(block_idx, rem_idx)) };
        rem_val = 0u64.shift_left(rbits, &rem_val, bstart, bend);
        unsafe { self.write_u64_unchecked(offset(block_idx, rem_idx), rem_val) };
    }

    fn shift_runends_by_1(&mut self, start: u64, end_inc: u64) {
        let end = self.wrap_end_exclusive(start, end_inc);
        let mut end_block = end / 64;
        let mut bend = (end % 64) as usize;
        let start_block = start / 64;
        let bstart = (start % 64) as usize;
        while end_block != start_block {
            let prev_block_runends = self.raw_block(end_block - 1).runends;
            let mut block_runends = self.raw_block(end_block).runends;
            block_runends = prev_block_runends.shift_right(1, &block_runends, 0, bend);
            self.set_block_runends(end_block, block_runends);
            end_block -= 1;
            bend = 64;
        }
        let mut block_runends = self.raw_block(start_block).runends;
        block_runends = 0u64.shift_right(1, &block_runends, bstart, bend);
        self.set_block_runends(start_block, block_runends);
    }

    fn shift_runends_back_by_1(&mut self, start: u64, end_inc: u64) {
        let end = self.wrap_end_exclusive(start, end_inc);
        let end_block = end / 64;
        let bend = (end % 64) as usize;
        let mut start_block = start / 64;
        let mut bstart = (start % 64) as usize;
        while start_block != end_block {
            let next_block_runends = self.raw_block(start_block + 1).runends;
            let mut block_runends = self.raw_block(start_block).runends;
            block_runends = next_block_runends.shift_left(1, &block_runends, bstart, 64);
            self.set_block_runends(start_block, block_runends);
            start_block += 1;
            bstart = 0;
        }
        let mut block_runends = self.raw_block(end_block).runends;
        block_runends = 0u64.shift_left(1, &block_runends, bstart, bend);
        self.set_block_runends(end_block, block_runends);
    }

    /// Removes `item` from the filter. Returns `true` if found and removed.
    ///
    /// **Warning:** Removing an item that wasn't inserted may cause false negatives
    /// for other items with colliding fingerprints.
    pub fn remove<T: Hash>(&mut self, item: T) -> bool
    where
        S: BuildHasher,
    {
        self.remove_fingerprint(self.hash_item(item))
    }

    /// Removes the fingerprint from the filter. Returns `true` if found and removed.
    ///
    /// Only the lower [`Self::fingerprint_size()`] bits of `hash` are used.
    ///
    /// **Warning:** Removing a fingerprint that wasn't inserted may cause false negatives
    /// for other items with colliding fingerprints.
    pub fn remove_fingerprint(&mut self, hash: u64) -> bool {
        let (hash_bucket_idx, hash_remainder) = self.calc_qr(hash);
        if !self.is_occupied(hash_bucket_idx) {
            return false;
        }
        let mut run_start = self.run_start(hash_bucket_idx);
        // adjust run_start so we can have
        // hash_bucket_idx <= run_start <= found_idx <= run_end
        if run_start < hash_bucket_idx {
            run_start += self.total_buckets().get();
        }
        let mut run_end = run_start;
        let mut found_idx = None;
        let found_idx = loop {
            if hash_remainder == self.get_remainder(run_end) {
                found_idx = Some(run_end);
            }
            if self.is_runend(run_end) {
                if let Some(i) = found_idx {
                    break i;
                } else {
                    return false;
                };
            }
            run_end += 1;
        };

        let mut last_bucket_shifted_run_end = run_end;
        if last_bucket_shifted_run_end != hash_bucket_idx {
            last_bucket_shifted_run_end = self.find_first_not_shifted_slot(run_end);
            if last_bucket_shifted_run_end < run_end {
                last_bucket_shifted_run_end += self.total_buckets().get();
            }
        }

        // run_end points to the end of the run (inc) which contains the target remainder (found_idx)
        // If we had a single remainder in the run the run is no more
        if run_end == run_start {
            self.set_occupied(hash_bucket_idx, false);
        } else {
            // More than one remainder in the run.
            // If the removed rem is the last one in the run
            // the before last remainder becomes the new runend.
            if found_idx == run_end {
                self.set_runend(run_end - 1, true);
            }
        }
        if found_idx != last_bucket_shifted_run_end {
            self.set_remainder(found_idx, 0);
            self.shift_remainders_back_by_1(found_idx, last_bucket_shifted_run_end);
            self.shift_runends_back_by_1(found_idx, last_bucket_shifted_run_end);
        }
        self.set_runend(last_bucket_shifted_run_end, false);
        self.set_remainder(last_bucket_shifted_run_end, 0);
        self.dec_offsets(hash_bucket_idx, last_bucket_shifted_run_end);
        self.len -= 1;
        true
    }

    /// Inserts `item`, allowing duplicates.
    ///
    /// Use this when the filter supports removals and you're re-adding a previously
    /// removed item. See also [`Self::insert_counting()`] for bounded duplicates.
    ///
    /// # Errors
    ///
    /// Returns [`Error::CapacityExceeded`] if the filter is full.
    #[inline]
    pub fn insert_duplicated<T: Hash>(&mut self, item: T) -> Result<(), Error>
    where
        S: BuildHasher,
    {
        self.insert_counting(u64::MAX, item).map(|_| ())
    }

    /// Inserts `item` if not already present (probabilistically).
    ///
    /// # Returns
    ///
    /// - `Ok(true)` if inserted.
    /// - `Ok(false)` if already present (may be a false positive).
    /// - `Err(Error::CapacityExceeded)` if the filter is full.
    #[inline]
    pub fn insert<T: Hash>(&mut self, item: T) -> Result<bool, Error>
    where
        S: BuildHasher,
    {
        self.insert_counting(1, item).map(|count| count == 0)
    }

    /// Inserts `item` up to `max_count` times.
    ///
    /// # Returns
    ///
    /// - `Ok(prev_count)` where `prev_count` is how many times the item was already present.
    ///   A new copy is inserted only if `prev_count < max_count`.
    /// - `Err(Error::CapacityExceeded)` if the filter is full.
    pub fn insert_counting<T: Hash>(&mut self, max_count: u64, item: T) -> Result<u64, Error>
    where
        S: BuildHasher,
    {
        let hash = self.hash_item(item);
        match self.insert_impl(max_count, hash) {
            Ok(count) => Ok(count),
            Err(_) => {
                self.grow_if_possible()?;
                self.insert_impl(max_count, hash)
            }
        }
    }

    /// Inserts the fingerprint specified by `hash` in the filter.
    ///
    /// Use this instead of [`Self::insert()`] when you have pre-computed fingerprints,
    /// are migrating data between filters, or need deterministic behavior with specific
    /// hash values. Use [`compute_fingerprint_with_hasher()`] to compute compatible fingerprints.
    ///
    /// # Parameters
    ///
    /// - `duplicate`: If `true`, insert even if fingerprint already exists.
    /// - `hash`: The fingerprint value. Only the lower [`Self::fingerprint_size()`] bits are used.
    ///
    /// # Returns
    ///
    /// - `Ok(true)` if inserted successfully.
    /// - `Ok(false)` if already present and `duplicate` is `false`.
    /// - `Err(Error::CapacityExceeded)` if the filter is full.
    #[inline]
    pub fn insert_fingerprint(&mut self, duplicate: bool, hash: u64) -> Result<bool, Error> {
        let max_count = if duplicate { u64::MAX } else { 1 };
        self.insert_fingerprint_counting(max_count, hash)
            .map(|count| count < max_count)
    }

    /// Inserts the fingerprint up to `max_count` times.
    ///
    /// Only the lower [`Self::fingerprint_size()`] bits of `hash` are used.
    ///
    /// # Returns
    ///
    /// - `Ok(prev_count)` where `prev_count` is how many times the fingerprint was present.
    ///   A new copy is inserted only if `prev_count < max_count`.
    /// - `Err(Error::CapacityExceeded)` if the filter is full.
    pub fn insert_fingerprint_counting(&mut self, max_count: u64, hash: u64) -> Result<u64, Error> {
        match self.insert_impl(max_count, hash) {
            Ok(count) => Ok(count),
            Err(_) => {
                self.grow_if_possible()?;
                self.insert_impl(max_count, hash)
            }
        }
    }

    /// Inserts the fingerprint specified by `hash` in the filter.
    /// `max_count` specifies how many occurences of the fingerprint can be added to the filter.
    /// It's up to the caller to grow the filter if needed and retry the insert.
    ///
    /// Returns `Ok(count)` of how many equal fingerprints _were_ in the filter.
    /// Returns `Err(Error::CapacityExceeded)` if the filter cannot admit the new item.
    fn insert_impl(&mut self, max_count: u64, hash: u64) -> Result<u64, Error> {
        enum Operation {
            NewRun,
            BeforeRunend,
            NewRunend,
        }

        let (hash_bucket_idx, hash_remainder) = self.calc_qr(hash);
        if self.is_slot_empty(hash_bucket_idx) {
            if self.len >= self.capacity() {
                return Err(Error::CapacityExceeded);
            }
            debug_assert!(!self.is_occupied(hash_bucket_idx));
            debug_assert!(!self.is_runend(hash_bucket_idx));
            self.set_occupied(hash_bucket_idx, true);
            self.set_runend(hash_bucket_idx, true);
            self.set_remainder(hash_bucket_idx, hash_remainder);
            self.len += 1;
            return Ok(0);
        }

        let mut runstart_idx = self.run_start(hash_bucket_idx);
        let mut runend_idx = self.run_end(hash_bucket_idx);
        let mut fingerprint_count = 0;
        let insert_idx;
        let operation;
        if self.is_occupied(hash_bucket_idx) {
            // adjust runend so its >= runstart even if it wrapped around
            if runend_idx < runstart_idx {
                runend_idx += self.total_buckets().get();
            }
            while runstart_idx <= runend_idx {
                match self.get_remainder(runstart_idx).cmp(&hash_remainder) {
                    Ordering::Equal => {
                        fingerprint_count += 1;
                        if fingerprint_count >= max_count {
                            return Ok(fingerprint_count);
                        }
                    }
                    Ordering::Greater => break,
                    Ordering::Less => (),
                }

                runstart_idx += 1;
            }

            insert_idx = runstart_idx % self.total_buckets();
            if runstart_idx > runend_idx {
                /* new remainder is >= than any remainder in the run. */
                operation = Operation::NewRunend;
            } else {
                /* there are larger remainders already in the run. */
                operation = Operation::BeforeRunend; /* Inserting */
            }
        } else {
            insert_idx = (runend_idx + 1) % self.total_buckets();
            operation = Operation::NewRun; /* Insert into empty bucket */
        }

        if self.len >= self.capacity() {
            return Err(Error::CapacityExceeded);
        }
        let empty_slot_idx = self.find_first_empty_slot(runend_idx + 1);
        if insert_idx != empty_slot_idx {
            self.shift_remainders_by_1(insert_idx, empty_slot_idx);
            self.shift_runends_by_1(insert_idx, empty_slot_idx);
        }
        self.set_remainder(insert_idx, hash_remainder);
        match operation {
            Operation::NewRun => {
                /* Insert into empty bucket */
                self.set_runend(insert_idx, true);
                self.set_occupied(hash_bucket_idx, true);
            }
            Operation::NewRunend => {
                /*  new remainder it is >= than any remainder in the run. */
                self.set_runend(insert_idx.wrapping_sub(1) % self.total_buckets(), false);
                self.set_runend(insert_idx, true);
            }
            Operation::BeforeRunend => { /* there are larger remainders already in the run. */ }
        }

        self.inc_offsets(hash_bucket_idx, empty_slot_idx);
        self.len += 1;
        Ok(fingerprint_count)
    }

    /// Shrinks memory usage if occupancy is low enough.
    ///
    /// Preserves the fingerprint size and false positive guarantees.
    /// Shrinking occurs when occupancy drops below 50% of capacity.
    ///
    /// # Example
    ///
    /// ```rust
    /// let mut filter = qfilter::Filter::new(1000, 0.01).unwrap();
    /// let initial_memory = filter.memory_usage();
    ///
    /// // Fill filter to capacity
    /// for i in 0..filter.capacity() {
    ///     filter.insert(i).unwrap();
    /// }
    ///
    /// // Remove most items to reduce occupancy below 50%
    /// for i in 0..filter.capacity() * 3 / 4 {
    ///     filter.remove(i);
    /// }
    ///
    /// filter.shrink_to_fit();
    /// assert!(filter.memory_usage() < initial_memory);
    /// ```
    pub fn shrink_to_fit(&mut self) {
        if self.total_blocks().get() > 1 && self.len() <= self.capacity() / 2 {
            let mut inserter = Builder::with_qr_and_hasher(
                (self.qbits.get() - 1).try_into().unwrap(),
                (self.rbits.get() + 1).try_into().unwrap(),
                self.max_qbits,
                self.build_hasher.clone(),
            )
            .unwrap();
            // Use insert_impl directly: the shrunk filter has capacity >= len (50% occupancy).
            for hash in self.fingerprints() {
                inserter
                    .insert_impl(true, hash)
                    .expect("Shrinking should not fail");
            }
            let new = inserter.into_filter();
            debug_assert_eq!(new.len, self.len);
            debug_assert_eq!(new.fingerprint_size(), self.fingerprint_size());
            *self = new;
        }
    }

    /// Merges all fingerprints from `other` into `self`.
    ///
    /// Both filters must have been built with the same hasher (and seed) for the
    /// result to be meaningful when using `T: Hash` methods.
    ///
    /// # Parameters
    ///
    /// - `keep_duplicates`: If `true`, the result is a multiset sum — each fingerprint
    ///   appears `count_self + count_other` times. If `false`, the result is a set union —
    ///   each unique fingerprint appears exactly once, discarding any duplicates that
    ///   existed in either filter.
    /// - `other`: Source filter. Must have `fingerprint_size() >= self.fingerprint_size()`.
    ///
    /// # Errors
    ///
    /// - [`Error::IncompatibleFingerprintSize`] if `other` has smaller fingerprints.
    /// - [`Error::CapacityExceeded`] if the merged result exceeds capacity.
    ///   When fingerprint sizes match, the merge is atomic (on error, `self` is unchanged).
    ///
    /// # Example
    ///
    /// ```rust
    /// let mut filter1 = qfilter::Filter::new(100, 0.01).unwrap();
    /// let mut filter2 = qfilter::Filter::new(100, 0.01).unwrap();
    ///
    /// // Insert different items into each filter
    /// for i in 0..10 {
    ///     filter1.insert(i).unwrap();
    /// }
    /// for i in 10..20 {
    ///     filter2.insert(i).unwrap();
    /// }
    ///
    /// // Merge filter2 into filter1
    /// filter1.merge(false, &filter2).unwrap();
    ///
    /// // filter1 now contains items from both filters
    /// for i in 0..20 {
    ///     assert!(filter1.contains(i));
    /// }
    /// ```
    pub fn merge<B2: AsRef<[u8]>, S2>(
        &mut self,
        keep_duplicates: bool,
        other: &Filter<B2, S2>,
    ) -> Result<(), Error> {
        if self.fingerprint_size() == other.fingerprint_size() {
            *self = self.merge_sorted(keep_duplicates, other)?;
        } else if other.fingerprint_size() >= self.fingerprint_size() {
            // Different fingerprint sizes: truncation changes sort order,
            // so fall back to one-by-one insertion.
            let max_count = if keep_duplicates { u64::MAX } else { 1 };
            for hash in other.fingerprints() {
                self.insert_impl(max_count, hash)?;
            }
        } else {
            return Err(Error::IncompatibleFingerprintSize);
        }
        Ok(())
    }

    /// Two-iterator sorted merge into a fresh filter. O(n + m).
    ///
    /// Both iterators yield fingerprints in the same sorted order because
    /// the filters have identical fingerprint sizes (same qbits/rbits split).
    fn merge_sorted<B2: AsRef<[u8]>, S2>(
        &self,
        keep_duplicates: bool,
        other: &Filter<B2, S2>,
    ) -> Result<Self, Error> {
        debug_assert_eq!(self.fingerprint_size(), other.fingerprint_size());
        // Preserve growth headroom from self.
        // Pre-size to avoid intermediate growths, capped by self's max_qbits.
        let needed = if keep_duplicates {
            self.len.saturating_add(other.len)
        } else {
            // This may underestimate if there are no duplicates,
            // but it's a safe upper bound and avoids overallocation.
            self.len.max(other.len)
        };
        let needed_qbits = calculate_needed_slots(needed)
            .map_err(|_| Error::CapacityExceeded)?
            .trailing_zeros() as u8;
        let max_qbits_allowed = self.max_qbits.map_or(self.qbits.get(), |m| m.get());
        if needed_qbits > max_qbits_allowed {
            return Err(Error::CapacityExceeded);
        }
        let qbits = self.qbits.get().max(needed_qbits);
        let rbits = self.fingerprint_size() - qbits;
        let mut builder = Builder::with_qr_and_hasher(
            NonZeroU8::new(qbits).unwrap(),
            NonZeroU8::new(rbits).unwrap(),
            self.max_qbits,
            self.build_hasher.clone(),
        )?;
        let mut a = self.fingerprints().peekable();
        let mut b = other.fingerprints().peekable();
        loop {
            let hash = match (a.peek(), b.peek()) {
                (Some(&a_val), Some(&b_val)) if a_val <= b_val => {
                    a.next();
                    a_val
                }
                (Some(_), Some(_)) => b.next().unwrap(),
                (Some(_), None) => a.next().unwrap(),
                (None, Some(_)) => b.next().unwrap(),
                (None, None) => break,
            };
            builder.insert_fingerprint(keep_duplicates, hash)?;
        }
        Ok(builder.into_filter())
    }

    #[inline]
    fn grow_if_possible(&mut self) -> Result<(), Error> {
        *self = self.rebuild_grown()?.into_filter();
        Ok(())
    }

    /// Rebuilds this filter into a grown [`Builder`] with `qbits + 1`.
    ///
    /// Returns a builder (not a filter) so that callers can either continue
    /// the fast sequential-append path or consume it via [`Builder::into_filter()`].
    #[cold]
    fn rebuild_grown(&self) -> Result<Builder<S>, Error> {
        let max = self.max_qbits.ok_or(Error::CapacityExceeded)?;
        if max <= self.qbits {
            return Err(Error::CapacityExceeded);
        }
        let qbits = self.qbits.checked_add(1).ok_or(Error::CapacityExceeded)?;
        let rbits = NonZeroU8::new(self.rbits.get() - 1).ok_or(Error::CapacityExceeded)?;
        let mut inserter =
            Builder::with_qr_and_hasher(qbits, rbits, self.max_qbits, self.build_hasher.clone())?;
        // Use insert_impl directly: the new filter has 2x capacity so growth cannot happen.
        for hash in self.fingerprints() {
            inserter
                .insert_impl(true, hash)
                .expect("Growth should not fail");
        }
        debug_assert_eq!(self.len, inserter.filter.len);
        Ok(inserter)
    }

    #[doc(hidden)]
    #[cfg(any(fuzzing, test))]
    pub fn printout(&self) {
        eprintln!(
            "=== q {} r {} len {} cap {} ===",
            self.qbits,
            self.rbits,
            self.len(),
            self.capacity()
        );
        for b in 0..self.total_blocks().get() {
            let block = self.raw_block(b);
            eprintln!(
                "block {} offset {:?}\noccup {:064b}\nrunen {:064b}",
                b, block.offset, block.occupieds, block.runends
            );
            eprintln!(
                "      3210987654321098765432109876543210987654321098765432109876543210 {}",
                b * 64
            );
            eprint!("rem   ");
            for i in (0..64).rev() {
                let r = self.get_remainder(b * 64 + i);
                eprint!("{}", r % 100 / 10);
            }
            eprint!("\nrem   ");
            for i in (0..64).rev() {
                let r = self.get_remainder(b * 64 + i);
                eprint!("{}", r % 10);
            }
            println!();
        }
        eprintln!("===");
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_compute_fingerprint() {
        // Same input produces same hash
        assert_eq!(
            compute_fingerprint("hello", 64),
            compute_fingerprint("hello", 64)
        );
        // Different inputs produce different hashes
        assert_ne!(
            compute_fingerprint("hello", 64),
            compute_fingerprint("world", 64)
        );
        // Truncation works correctly
        let full = compute_fingerprint("test", 64);
        assert_eq!(compute_fingerprint("test", 8), full & 0xFF);
        assert_eq!(compute_fingerprint("test", 16), full & 0xFFFF);
        assert_eq!(compute_fingerprint("test", 32), full & 0xFFFF_FFFF);
        // Edge cases
        assert_eq!(compute_fingerprint("test", 0), 0);
        assert_eq!(compute_fingerprint("test", 1), full & 1);
        assert_eq!(compute_fingerprint("test", 65), full); // saturates at 64
                                                           // Matches filter behavior - confirm via fingerprints() iterator
        let mut filter = Filter::new(100, 0.01).unwrap();
        filter.insert("hello").unwrap();
        let fp = compute_fingerprint("hello", filter.fingerprint_size());
        assert_eq!(filter.fingerprints().collect::<Vec<_>>(), vec![fp]);
    }

    #[test]
    fn run_end_simple() {
        let mut f = Filter::new(50, 0.01).unwrap();
        f.set_occupied(5, true);
        f.set_runend(5, true);
        assert_eq!(f.run_end(4), 4);
        assert_eq!(f.run_end(5), 5);
        assert_eq!(f.run_end(6), 6);

        f.set_occupied(6, true);
        f.set_runend(6, true);
        assert_eq!(f.run_end(4), 4);
        assert_eq!(f.run_end(5), 5);
        assert_eq!(f.run_end(6), 6);

        f.set_runend(6, false);
        f.set_runend(7, true);
        assert_eq!(f.run_end(4), 4);
        assert_eq!(f.run_end(5), 5);
        assert_eq!(f.run_end(6), 7);

        f.set_runend(7, false);
        f.set_runend(8, true);
        assert_eq!(f.run_end(4), 4);
        assert_eq!(f.run_end(5), 5);
        assert_eq!(f.run_end(6), 8);

        f.set_occupied(10, true);
        f.set_runend(12, true);
        f.set_occupied(12, true);
        f.set_runend(13, true);
        assert_eq!(f.run_end(10), 12);
        assert_eq!(f.run_end(12), 13);

        f.set_occupied(11, true);
        f.set_runend(14, true);
        assert_eq!(f.run_end(10), 12);
        assert_eq!(f.run_end(11), 13);
        assert_eq!(f.run_end(12), 14);
    }

    #[test]
    fn run_end_eob() {
        let mut f = Filter::new(50, 0.01).unwrap();
        assert_eq!(f.total_buckets().get(), 64);
        f.set_occupied(63, true);
        f.set_runend(63, true);
        assert_eq!(f.run_end(62), 62);
        assert_eq!(f.run_end(63), 63);
        assert_eq!(f.find_first_empty_slot(62), 62);
        assert_eq!(f.find_first_empty_slot(63), 0);
    }

    #[test]
    fn run_end_crossing() {
        let mut f = Filter::new(50, 0.01).unwrap();
        f.set_occupied(0, true);
        f.set_runend(0, true);
        f.set_occupied(63, true);
        f.set_runend(63, true);
        assert_eq!(f.run_end(0), 0);
        assert_eq!(f.run_end(1), 1);
        assert_eq!(f.run_end(62), 62);
        assert_eq!(f.run_end(63), 63);

        f.set_runend(63, false);
        f.set_runend(1, true);
        f.adjust_block_offset(1, true);
        assert_eq!(f.run_end(0), 1);
        assert_eq!(f.run_end(1), 1);
        assert_eq!(f.run_end(62), 62);
        assert_eq!(f.run_end(63), 0);

        f.set_runend(1, false);
        f.set_runend(2, true);
        assert_eq!(f.run_end(63), 0);
        assert_eq!(f.run_end(0), 2);
        assert_eq!(f.run_end(1), 2);

        f.set_runend(2, false);
        f.set_runend(3, true);
        assert_eq!(f.run_end(63), 0);
        assert_eq!(f.run_end(1), 3);
        assert_eq!(f.run_end(2), 3);

        f.set_occupied(65, true);
        f.set_runend(68, true);
        assert_eq!(f.run_end(63), 0);
        assert_eq!(f.run_end(0), 3);
        assert_eq!(f.run_end(1), 4);
    }

    #[test]
    fn test_insert_duplicated() {
        for cap in [100, 200, 500, 1000] {
            let mut f = Filter::new(cap, 0.01).unwrap();
            for i in 0..f.capacity() / 2 {
                f.insert_duplicated(-1).unwrap();
                f.insert_duplicated(i).unwrap();
                assert!(f.count(-1) >= i);
                assert!(f.count(i) >= 1);
            }
        }
    }

    #[test]
    fn test_insert_duplicated_two() {
        for s in 0..10 {
            for c in [200, 800, 1500] {
                let mut f = Filter::new(c, 0.001).unwrap();
                for i in 0..f.capacity() / 2 {
                    f.insert_duplicated(-1).unwrap();
                    assert_eq!(f.count(-1), i + 1);
                    assert_eq!(f.count(s), i);
                    f.insert_duplicated(s).unwrap();
                    assert_eq!(f.count(-1), i + 1);
                    assert_eq!(f.count(s), i + 1);
                }
            }
        }
    }

    #[test]
    fn test_insert_duplicated_one() {
        for s in 0..10 {
            for cap in [100, 200, 500, 1000] {
                let mut f = Filter::new(cap, 0.01).unwrap();
                for i in 0..f.capacity() {
                    f.insert_duplicated(s).unwrap();
                    assert!(f.count(s) > i);
                }
                assert_eq!(f.count(s), f.capacity());
            }
        }
    }

    #[test]
    fn test_auto_resize_two() {
        let mut f = Filter::new_resizeable(50, 1000, 0.01).unwrap();
        for _ in 0..50 {
            f.insert_duplicated(0).unwrap();
        }
        for _ in 0..3 {
            f.insert_duplicated(1).unwrap();
        }
        f.grow_if_possible().unwrap();
        f.grow_if_possible().unwrap();
        f.grow_if_possible().unwrap();
        assert_eq!(f.count(0), 50);
        assert_eq!(f.count(1), 3);
    }

    #[test]
    fn test_new_resizeable() {
        let mut f = Filter::new_resizeable(100, 100, 0.01).unwrap();
        assert!(f.grow_if_possible().is_err());
        let mut f = Filter::new_resizeable(0, 100, 0.01).unwrap();
        assert!(f.grow_if_possible().is_ok());
    }

    #[test]
    fn test_new_resizeable_fp_rate_ceiling() {
        let f = Filter::new_resizeable(0, 1000, 0.05).unwrap();
        let ratio = f.max_error_ratio_resizeable();
        assert!(
            ratio <= 0.05,
            "max error ratio {} exceeds requested fp rate",
            ratio
        );
    }

    #[test]
    #[should_panic]
    fn test_new_capacity_overflow() {
        Filter::new_resizeable(100, u64::MAX, 0.01).unwrap();
    }

    #[test]
    #[should_panic]
    fn test_new_hash_overflow() {
        Filter::new_resizeable(100, u64::MAX / 20, 0.01).unwrap();
    }

    #[test]
    fn test_auto_resize_one() {
        let mut f = Filter::new_resizeable(100, 500, 0.01).unwrap();
        for i in 0u64.. {
            if f.insert_duplicated(i).is_err() {
                assert_eq!(f.len(), i);
                break;
            }
        }
        assert!(f.len() >= 500);
        for i in 0u64..f.len() {
            assert!(f.contains(i), "{}", i);
        }
    }

    #[test]
    fn test_remainders_and_shifts() {
        let mut f = Filter::new(200, 0.01).unwrap();
        let c = f.capacity();
        for j in 0..c {
            f.set_remainder(j, 0b1011101);
            assert_eq!(f.get_remainder(j), 0b1011101);
            f.set_runend(j, true);
            assert!(f.is_runend(j));
        }
        for j in 0..c {
            f.set_remainder(j, 0b1111111);
            assert_eq!(f.get_remainder(j), 0b1111111);
            f.set_runend(j, false);
            assert!(!f.is_runend(j));
        }
        for j in 0..c {
            f.set_remainder(j, 0b1101101);
            assert_eq!(f.get_remainder(j), 0b1101101);
            f.set_runend(j, true);
            assert!(f.is_runend(j));
        }
        f.shift_remainders_by_1(0, c);
        f.shift_runends_by_1(0, c);

        for j in 1..=c {
            assert_eq!(f.get_remainder(j), 0b1101101);
        }
        assert!(!f.is_runend(0));
        for j in 1..=c {
            assert_eq!(f.get_remainder(j), 0b1101101);
            assert!(f.is_runend(j));
        }
    }

    #[test]
    fn test_remove() {
        for fp in [0.0001, 0.00001, 0.000001] {
            for cap in [0, 100, 200, 400, 1000] {
                let mut f = Filter::new(cap, fp).unwrap();
                dbg!(f.rbits, f.capacity());
                let c = f.capacity();
                for i in 0..c {
                    assert!(f.insert(i).unwrap());
                }
                assert_eq!(f.len(), c);
                for i in 0..c {
                    for j in 0..c {
                        assert_eq!(f.count(j), (j >= i) as u64, "{}", j);
                    }
                    // f.printout();
                    assert!(f.remove(i));
                    // f.printout();
                }
                assert!(f.is_empty());
            }
        }
    }
    #[test]
    fn test_remove_dup_one() {
        for s in 0..10 {
            for cap in [0, 100, 200, 500, 1000] {
                let mut f = Filter::new(cap, 0.0001).unwrap();
                let c = f.capacity();
                for _ in 0..c {
                    f.insert_duplicated(s).unwrap();
                }
                assert_eq!(f.len(), c);
                for i in 0..c {
                    assert_eq!(f.count(s), c - i);
                    assert!(f.remove(s));
                }
                assert!(f.is_empty());
            }
        }
    }
    #[test]
    fn test_remove_dup_two() {
        for s in 0..10 {
            dbg!(s);
            for cap in [100, 200, 500, 1000] {
                let mut f = Filter::new(cap, 0.0001).unwrap();
                let c = f.capacity();
                for _ in 0..c / 2 {
                    f.insert_duplicated(-1).unwrap();
                    f.insert_duplicated(s).unwrap();
                }
                assert_eq!(f.count(-1), c / 2);
                assert_eq!(f.count(s), c / 2);
                for i in 0..c / 2 {
                    assert_eq!(f.count(-1), c / 2 - i);
                    assert_eq!(f.count(s), c / 2 - i);
                    assert!(f.remove(-1));
                    assert_eq!(f.count(-1), c / 2 - i - 1);
                    assert_eq!(f.count(s), c / 2 - i);
                    assert!(f.remove(s));
                    assert_eq!(f.count(-1), c / 2 - i - 1);
                    assert_eq!(f.count(s), c / 2 - i - 1);
                }
                assert!(f.is_empty());
            }
        }
    }

    #[test]
    fn test_it_works() {
        for fp_rate_arg in [0.01, 0.001, 0.0001] {
            let mut f = Filter::new(100_000, fp_rate_arg).unwrap();
            assert!(!f.contains(0));
            assert_eq!(f.len(), 0);
            for i in 0..f.capacity() {
                f.insert_duplicated(i).unwrap();
            }
            for i in 0..f.capacity() {
                assert!(f.contains(i));
            }
            let fp_test_start = f.capacity();
            let est_fp_rate = (fp_test_start..)
                .take(50_000)
                .filter(|i| f.contains(i))
                .count() as f64
                / 50_000.0;
            dbg!(f.max_error_ratio(), est_fp_rate);
            assert!(est_fp_rate <= f.max_error_ratio());
        }
    }

    #[test]
    fn test_with_fingerprint_size_resizes() {
        let mut f = Filter::with_fingerprint_size(0, 8).unwrap();
        assert_eq!(f.fingerprint_size(), 8);
        assert_eq!(f.max_capacity(), (128u64 * 19).div_ceil(20));
        assert_eq!(f.capacity(), (64u64 * 19).div_ceil(20));
        for i in 0..f.max_capacity() {
            f.insert_fingerprint(false, i).unwrap();
        }
        assert_eq!(f.len(), f.max_capacity());
        assert!(f.insert_fingerprint(false, f.max_capacity()).is_err());
    }

    #[test]
    fn test_with_fingerprint_size() {
        let fingerprints = [
            0u64,
            0,
            1,
            1,
            1,
            1,
            1,
            0x777777777777,
            u32::MAX as u64 - 1,
            u32::MAX as u64 - 1,
            u32::MAX as u64,
            u64::MAX - 1,
            u64::MAX - 1,
            u64::MAX,
            u64::MAX,
        ];
        for fip_size in [7, 16, 24, 31, 49, 64] {
            let mut filter = Filter::with_fingerprint_size(1, fip_size).unwrap();
            for h in fingerprints {
                filter.insert_fingerprint(true, h).unwrap();
            }
            let out: Vec<u64> = filter.fingerprints().collect::<Vec<_>>();
            let mut expect = fingerprints.map(|h| h << (64 - fip_size) >> (64 - fip_size));
            expect.sort_unstable();
            assert_eq!(out, expect);
        }
    }

    #[test]
    fn test_merge() {
        fn test(mut f1: Filter, mut f2: Filter, mut f3: Filter) {
            assert!(f1.merge(true, &f1.clone()).is_ok());
            assert!(f1.merge(true, &f2).is_ok());
            assert!(f1.merge(true, &f3).is_ok());
            assert!(f2.merge(true, &f1).is_err());
            assert!(f2.merge(true, &f2.clone()).is_ok());
            assert!(f2.merge(true, &f3).is_ok());
            assert!(f3.merge(true, &f1).is_err());
            assert!(f3.merge(true, &f2).is_err());
            assert!(f3.merge(true, &f3.clone()).is_ok());

            f1.insert_fingerprint(true, 1).unwrap();
            f2.insert_fingerprint(true, 1).unwrap();
            f2.insert_fingerprint(true, 2).unwrap();
            f3.insert_fingerprint(true, 1).unwrap();
            f3.insert_fingerprint(true, 2).unwrap();
            f3.insert_fingerprint(true, 3).unwrap();
            assert_eq!(f1.len(), 1);
            assert_eq!(f2.len(), 2);
            assert_eq!(f3.len(), 3);

            f1.merge(false, &f1.clone()).unwrap();
            assert_eq!(f1.len(), 1);
            f1.merge(true, &f2.clone()).unwrap();
            assert_eq!(f1.len(), 3);
            f1.merge(false, &f3.clone()).unwrap();
            assert_eq!(f1.len(), 4);

            for _ in f1.len()..f1.capacity() {
                f1.insert_fingerprint(true, 1).unwrap();
            }
            assert_eq!(f1.len(), f1.capacity());
            assert!(matches!(
                f1.insert_impl(u64::MAX, 1),
                Err(Error::CapacityExceeded)
            ));
            if f1.max_capacity() > f1.capacity() {
                // Resizable: merge grows to accommodate.
                let len_before = f1.len();
                f1.merge(true, &f1.clone()).unwrap();
                assert_eq!(f1.len(), len_before * 2);
            } else {
                // Non-resizable: merge fails when capacity exceeded.
                assert!(matches!(
                    f1.merge(true, &f1.clone()),
                    Err(Error::CapacityExceeded)
                ));
            }
            assert!(matches!(f1.insert_fingerprint(false, 1), Ok(false)));
            assert!(matches!(f1.merge(false, &f1.clone()), Ok(())));
        }
        test(
            Filter::with_fingerprint_size(1, 10).unwrap(),
            Filter::with_fingerprint_size(1, 11).unwrap(),
            Filter::with_fingerprint_size(1, 12).unwrap(),
        );
        test(
            Filter::new(1, 0.01).unwrap(),
            Filter::new(1, 0.001).unwrap(),
            Filter::new(1, 0.0001).unwrap(),
        );
    }

    #[test]
    fn test_merge_correctness() {
        let max_cap = 10000u64;
        for (cap, fp_rate) in [(100, 0.01), (500, 0.001), (1000, 0.01)] {
            let new = |c: u64| Filter::new_resizeable(c, max_cap, fp_rate).unwrap();
            let n = cap as u64;

            // Build two filters with 50% overlap:
            // f1 has [0..n), f2 has [n/2..n*3/2)
            let mut f1 = new(n);
            let mut f2 = new(n);
            for i in 0..n {
                f1.insert_duplicated(i).unwrap();
            }
            for i in n / 2..n + n / 2 {
                f2.insert_duplicated(i).unwrap();
            }

            // Dedup merge: result should contain all unique items
            let mut merged_dedup = f1.clone();
            merged_dedup.merge(false, &f2).unwrap();
            for i in 0..n + n / 2 {
                assert!(merged_dedup.contains(i), "missing {i} after dedup merge");
            }

            // Self-merge with dedup shouldn't change len
            let mut self_merge = f1.clone();
            self_merge.merge(false, &f1).unwrap();
            assert_eq!(self_merge.len(), f1.len());

            // Duplicate merge: len should be sum of both
            let mut merged_dup = f1.clone();
            merged_dup.merge(true, &f2).unwrap();
            assert_eq!(merged_dup.len(), f1.len() + f2.len());
            for i in 0..n + n / 2 {
                assert!(merged_dup.contains(i), "missing {i} after dup merge");
            }

            // Merge two disjoint full filters (exercises pre-sizing growth)
            let mut r1 = new(n);
            let mut r2 = new(n);
            for i in 0..n {
                r1.insert_duplicated(i).unwrap();
            }
            for i in n..n * 2 {
                r2.insert_duplicated(i).unwrap();
            }
            let r1_len = r1.len();
            let r2_len = r2.len();
            r1.merge(true, &r2).unwrap();
            assert_eq!(r1.len(), r1_len + r2_len);

            // Fingerprints must remain sorted after every merge
            for f in [&merged_dedup, &self_merge, &merged_dup, &r1] {
                let fps: Vec<u64> = f.fingerprints().collect();
                for w in fps.windows(2) {
                    assert!(w[0] <= w[1], "unsorted fingerprints: {} > {}", w[0], w[1]);
                }
            }
        }
    }

    #[cfg(feature = "serde")]
    #[test]
    fn test_serde() {
        for capacity in [100, 1000, 10000] {
            for fp_ratio in [0.2, 0.1, 0.01, 0.001, 0.0001] {
                let mut f = Filter::new(capacity, fp_ratio).unwrap();
                for i in 0..f.capacity() {
                    f.insert(i).unwrap();
                }

                let ser = serde_cbor::to_vec(&f).unwrap();
                f = serde_cbor::from_slice(&ser).unwrap();
                for i in 0..f.capacity() {
                    f.contains(i);
                }
                dbg!(
                    f.current_error_ratio(),
                    f.max_error_ratio(),
                    f.capacity(),
                    f.len(),
                    ser.len()
                );
            }
        }
    }

    #[cfg(feature = "serde")]
    #[test]
    fn test_filter_ref_zero_copy() {
        for capacity in [100, 1000, 10000] {
            for fp_ratio in [0.2, 0.1, 0.01, 0.001] {
                let mut f = Filter::new(capacity, fp_ratio).unwrap();
                for i in 0..f.capacity() {
                    f.insert(i).unwrap();
                }

                let ser = serde_cbor::to_vec(&f).unwrap();
                let fr: FilterRef<'_> = serde_cbor::from_slice(&ser).unwrap();

                // All read-only accessors match
                assert_eq!(fr.len(), f.len());
                assert_eq!(fr.is_empty(), f.is_empty());
                assert_eq!(fr.fingerprint_size(), f.fingerprint_size());
                assert_eq!(fr.memory_usage(), f.memory_usage());
                assert_eq!(fr.capacity(), f.capacity());
                assert_eq!(fr.max_capacity(), f.max_capacity());
                assert_eq!(fr.max_error_ratio(), f.max_error_ratio());
                assert_eq!(fr.current_error_ratio(), f.current_error_ratio());

                // contains and count match
                for i in 0..f.capacity() {
                    assert_eq!(fr.contains(i), f.contains(i));
                    assert_eq!(fr.count(i), f.count(i));
                }

                // fingerprint-based queries match
                let fp = compute_fingerprint(42u64, f.fingerprint_size());
                assert_eq!(fr.contains_fingerprint(fp), f.contains_fingerprint(fp));
                assert_eq!(fr.count_fingerprint(fp), f.count_fingerprint(fp));

                // fingerprints iterator matches
                let owned_fps: Vec<u64> = f.fingerprints().collect();
                let ref_fps: Vec<u64> = fr.fingerprints().collect();
                assert_eq!(owned_fps, ref_fps);

                // IntoIterator matches
                let ref_iter_fps: Vec<u64> = fr.into_iter().collect();
                assert_eq!(owned_fps, ref_iter_fps);

                // to_owned roundtrip
                let f2 = fr.to_owned();
                assert_eq!(f2.len(), f.len());
                assert_eq!(f2.fingerprint_size(), f.fingerprint_size());
                for i in 0..f.capacity() {
                    assert_eq!(f2.contains(i), f.contains(i));
                }
            }
        }
    }

    #[cfg(feature = "serde")]
    #[test]
    fn test_filter_ref_as_ref() {
        let mut f = Filter::new(100, 0.01).unwrap();
        for i in 0..50u64 {
            f.insert(i).unwrap();
        }

        let fr = f.as_filter_ref();
        assert_eq!(fr.len(), f.len());
        assert_eq!(fr.fingerprint_size(), f.fingerprint_size());
        for i in 0..50u64 {
            assert!(fr.contains(i));
        }

        let fps: Vec<u64> = f.fingerprints().collect();
        let ref_fps: Vec<u64> = fr.fingerprints().collect();
        assert_eq!(fps, ref_fps);
    }

    #[cfg(feature = "serde")]
    #[test]
    fn test_serde_rejects_invalid() {
        // Valid filter to use as a base for tampering
        let f = Filter::new(100, 0.01).unwrap();
        let ser = serde_cbor::to_vec(&f).unwrap();

        // Valid deserialization works
        let _: Filter = serde_cbor::from_slice(&ser).unwrap();

        // Tamper with qbits to an invalid value (too small)
        let mut tampered: serde_cbor::Value = serde_cbor::from_slice(&ser).unwrap();
        if let serde_cbor::Value::Map(ref mut map) = tampered {
            map.insert(
                serde_cbor::Value::Text("q".into()),
                serde_cbor::Value::Integer(1), // qbits=1, way too small
            );
        }
        let tampered_bytes = serde_cbor::to_vec(&tampered).unwrap();
        assert!(serde_cbor::from_slice::<Filter>(&tampered_bytes).is_err());

        // Tamper with buffer length (truncate)
        let mut tampered: serde_cbor::Value = serde_cbor::from_slice(&ser).unwrap();
        if let serde_cbor::Value::Map(ref mut map) = tampered {
            map.insert(
                serde_cbor::Value::Text("b".into()),
                serde_cbor::Value::Bytes(vec![0; 8]),
            );
        }
        let tampered_bytes = serde_cbor::to_vec(&tampered).unwrap();
        assert!(serde_cbor::from_slice::<Filter>(&tampered_bytes).is_err());

        // Tamper with len to exceed total buckets
        let mut tampered: serde_cbor::Value = serde_cbor::from_slice(&ser).unwrap();
        if let serde_cbor::Value::Map(ref mut map) = tampered {
            map.insert(
                serde_cbor::Value::Text("l".into()),
                serde_cbor::Value::Integer(i128::from(u64::MAX)),
            );
        }
        let tampered_bytes = serde_cbor::to_vec(&tampered).unwrap();
        assert!(serde_cbor::from_slice::<Filter>(&tampered_bytes).is_err());

        // Tamper with qbits to exceed MAX_QBITS
        let mut tampered: serde_cbor::Value = serde_cbor::from_slice(&ser).unwrap();
        if let serde_cbor::Value::Map(ref mut map) = tampered {
            map.insert(
                serde_cbor::Value::Text("q".into()),
                serde_cbor::Value::Integer(63),
            );
        }
        let tampered_bytes = serde_cbor::to_vec(&tampered).unwrap();
        assert!(serde_cbor::from_slice::<Filter>(&tampered_bytes).is_err());

        // Tamper with max_qbits to exceed qbits + rbits - 1
        let mut tampered: serde_cbor::Value = serde_cbor::from_slice(&ser).unwrap();
        if let serde_cbor::Value::Map(ref mut map) = tampered {
            map.insert(
                serde_cbor::Value::Text("m".into()),
                serde_cbor::Value::Integer(60),
            );
        }
        let tampered_bytes = serde_cbor::to_vec(&tampered).unwrap();
        assert!(serde_cbor::from_slice::<Filter>(&tampered_bytes).is_err());
    }

    #[test]
    fn test_dec_offset_edge_case() {
        // case found in fuzz testing, exercises offset decrement across blocks
        #[rustfmt::skip]
        let sample = [(0u16, 287), (2u16, 1), (9u16, 2), (10u16, 1), (53u16, 5), (61u16, 5), (127u16, 2), (232u16, 1), (255u16, 21), (314u16, 2), (317u16, 2), (384u16, 2), (511u16, 3), (512u16, 2), (1599u16, 2), (2303u16, 5), (2559u16, 2), (2568u16, 3), (2815u16, 2), (6400u16, 2), (9211u16, 2), (9728u16, 2), (10790u16, 1), (10794u16, 94), (10797u16, 2), (10999u16, 2), (11007u16, 2), (11520u16, 1), (12800u16, 4), (12842u16, 2), (13823u16, 1), (14984u16, 2), (15617u16, 2), (15871u16, 4), (16128u16, 3), (16383u16, 2), (16394u16, 1), (18167u16, 2), (23807u16, 1), (32759u16, 2)];
        let template = Filter::new(400, 0.1).unwrap();
        let fingerprint_bits = template.qbits.get() + 3;
        let mut f = Filter::with_fingerprint_size(400, fingerprint_bits).unwrap();
        for (i, c) in sample {
            for _ in 0..c {
                f.insert_duplicated(i).unwrap();
            }
        }
        f.validate_offsets(0, f.total_buckets().get());
        f.remove(0u16);
        f.validate_offsets(0, f.total_buckets().get());
    }

    #[test]
    fn test_capacity_edge_cases() {
        // Cap at 28 to avoid buffer allocation overflow on 32-bit platforms
        let max_n = if cfg!(target_pointer_width = "32") {
            28
        } else {
            32
        };
        for n in 1..max_n {
            let base = (1u64 << n) * 19 / 20;
            // Test numbers around the edge
            for i in [base - 1, base, base + 1] {
                let filter = Filter::new(i, 0.01).unwrap();
                assert!(
                    filter.capacity() >= i,
                    "Requested capacity {} but got {}",
                    i,
                    filter.capacity()
                );
                assert_eq!(filter.capacity(), filter.max_capacity());
            }
        }
    }

    #[test]
    fn test_max_capacity() {
        for i in 7..=64 {
            let f = Filter::with_fingerprint_size(0, i).unwrap();
            assert!(f.capacity() <= f.max_capacity());
            assert_eq!(
                f.max_capacity(),
                ((1u64 << (i - 1).min(MAX_QBITS)) * 19).div_ceil(20)
            );
        }
        for i in 1..MAX_QBITS {
            let f = Filter::new_resizeable(0, 2u64.pow(i as u32), 0.5).unwrap();
            assert_eq!(f.capacity(), 61);
            assert!(f.capacity() <= f.max_capacity());
        }
        // Test the maximum capacity
        let f = Filter::new_resizeable(0, MAX_CAPACITY, 0.5).unwrap();
        assert_eq!(f.capacity(), 61);
        assert_eq!(f.max_capacity(), MAX_CAPACITY);
        // Test the maximum capacity + 1, which should fail
        Filter::new_resizeable(0, MAX_CAPACITY + 1, 0.5).unwrap_err();
    }

    #[test]
    fn test_builder_matches_regular() {
        for fp_rate in [0.01, 0.001, 0.0001] {
            for cap in [100, 500, 1000, 5000] {
                let mut regular = Filter::new(cap, fp_rate).unwrap();
                let mut fingerprints: Vec<u64> = (0..regular.capacity())
                    .map(|i| compute_fingerprint(i, regular.fingerprint_size()))
                    .collect();
                fingerprints.sort_unstable();

                for &h in &fingerprints {
                    regular.insert_fingerprint(true, h).unwrap();
                }

                let mut inserter = Builder::new(Filter::new(cap, fp_rate).unwrap());
                for &h in &fingerprints {
                    inserter.insert_fingerprint(true, h).unwrap();
                }
                let sorted = inserter.into_filter();

                assert_eq!(regular.len(), sorted.len());
                let reg_fps: Vec<u64> = regular.fingerprints().collect();
                let sort_fps: Vec<u64> = sorted.fingerprints().collect();
                assert_eq!(reg_fps, sort_fps);
            }
        }
    }

    #[test]
    fn test_builder_no_duplicates() {
        let mut inserter = Builder::new(Filter::new(1000, 0.01).unwrap());
        let fp_size = inserter.fingerprint_size();
        let mut fingerprints: Vec<u64> = (0..500u64)
            .map(|i| compute_fingerprint(i, fp_size))
            .collect();
        fingerprints.sort_unstable();
        let mut inserted = 0;
        for &h in &fingerprints {
            if inserter.insert_fingerprint(false, h).unwrap() {
                inserted += 1;
            }
        }
        let f = inserter.into_filter();

        // With duplicate=false, count of inserts should equal the number of distinct fingerprints
        let distinct: std::collections::HashSet<u64> = fingerprints.iter().copied().collect();
        assert_eq!(inserted, distinct.len());
        assert_eq!(f.len(), distinct.len() as u64);
    }

    #[test]
    fn test_builder_with_duplicates() {
        let mut inserter = Builder::new(Filter::new(1000, 0.01).unwrap());
        let fp_size = inserter.fingerprint_size();

        // Insert the same fingerprint multiple times
        let hash = compute_fingerprint(42u64, fp_size);
        for _ in 0..10 {
            inserter.insert_fingerprint(true, hash).unwrap();
        }
        let f = inserter.into_filter();

        assert_eq!(f.len(), 10);
        assert_eq!(f.count_fingerprint(hash), 10);
    }

    #[test]
    fn test_builder_auto_growth() {
        let mut inserter = Builder::new(Filter::new_resizeable(100, 5000, 0.01).unwrap());
        let initial_cap = inserter.capacity();
        let fp_size = inserter.fingerprint_size();

        let mut fingerprints: Vec<u64> = (0..3000u64)
            .map(|i| compute_fingerprint(i, fp_size))
            .collect();
        fingerprints.sort_unstable();
        for &h in &fingerprints {
            inserter.insert_fingerprint(true, h).unwrap();
        }
        let f = inserter.into_filter();

        assert!(f.capacity() > initial_cap, "filter should have grown");
        assert_eq!(f.len(), 3000);

        // Verify all items are present
        for i in 0..3000u64 {
            assert!(f.contains(i), "missing item {}", i);
        }
    }

    #[test]
    fn test_builder_empty_filter() {
        let inserter = Builder::new(Filter::new(100, 0.01).unwrap());
        let f = inserter.into_filter();
        assert!(f.is_empty());
    }

    #[test]
    fn test_builder_various_fingerprint_sizes() {
        for fp_size in [14, 16, 20, 24, 32] {
            let mut inserter = Builder::new(Filter::with_fingerprint_size(500, fp_size).unwrap());
            let cap = inserter.capacity();
            let mut fingerprints: Vec<u64> =
                (0..cap).map(|i| compute_fingerprint(i, fp_size)).collect();
            fingerprints.sort_unstable();
            for &h in &fingerprints {
                inserter.insert_fingerprint(true, h).unwrap();
            }
            let f = inserter.into_filter();

            assert_eq!(f.len(), cap);

            // Verify via fingerprints() iteration
            let stored: Vec<u64> = f.fingerprints().collect();
            assert_eq!(stored.len(), cap as usize);
            // Should be sorted
            for w in stored.windows(2) {
                assert!(w[0] <= w[1]);
            }
        }
    }

    #[test]
    fn test_builder_single_block() {
        // Small filter that fits in a single block (64 slots)
        let mut inserter = Builder::new(Filter::new(50, 0.01).unwrap());
        let fp_size = inserter.fingerprint_size();
        let cap = inserter.capacity();
        let mut fingerprints: Vec<u64> =
            (0..cap).map(|i| compute_fingerprint(i, fp_size)).collect();
        fingerprints.sort_unstable();
        for &h in &fingerprints {
            inserter.insert_fingerprint(true, h).unwrap();
        }
        let f = inserter.into_filter();

        assert_eq!(f.total_buckets().get(), 64);
        assert_eq!(f.len(), cap);
        for i in 0..cap {
            assert!(f.contains(i));
        }
    }

    #[test]
    fn test_builder_multi_block_spillover() {
        // Use a filter with multiple blocks and a small fingerprint size to force spillover
        let mut inserter = Builder::new(Filter::with_fingerprint_size(500, 14).unwrap());
        let cap = inserter.capacity();
        let fp_size = inserter.fingerprint_size();
        let mask = (1u64 << fp_size) - 1;

        // Create fingerprints that will have many collisions in the same quotient
        // to force runs that spill across block boundaries
        let mut fingerprints: Vec<u64> = (0..cap).map(|i| i & mask).collect();
        fingerprints.sort_unstable();

        let mut regular = Filter::with_fingerprint_size(500, 14).unwrap();
        for &h in &fingerprints {
            regular.insert_fingerprint(true, h).unwrap();
        }
        for &h in &fingerprints {
            inserter.insert_fingerprint(true, h).unwrap();
        }
        let sorted = inserter.into_filter();

        assert_eq!(sorted.len(), regular.len());
        assert_eq!(
            sorted.fingerprints().collect::<Vec<_>>(),
            regular.fingerprints().collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_builder_identical_state() {
        // Verifies that sorted insertion produces byte-identical filter state
        // compared to regular insertion, including at high occupancy where
        // the wrapping slow path is triggered.
        for (cap, fp_rate) in [(500, 0.001), (100, 0.01), (1000, 0.0001)] {
            let mut inserter = Builder::new(Filter::new(cap, fp_rate).unwrap());
            let fp_size = inserter.fingerprint_size();
            let capacity = inserter.capacity();

            let mut fingerprints: Vec<u64> = (0..capacity)
                .map(|i| compute_fingerprint(i, fp_size))
                .collect();
            fingerprints.sort_unstable();

            let mut regular = Filter::new(cap, fp_rate).unwrap();
            for &h in &fingerprints {
                regular.insert_fingerprint(true, h).unwrap();
            }
            for &h in &fingerprints {
                inserter.insert_fingerprint(true, h).unwrap();
            }
            let sorted = inserter.into_filter();

            assert_eq!(
                regular.buffer, sorted.buffer,
                "buffer mismatch for cap={cap}, fp_rate={fp_rate}"
            );
        }
    }

    #[test]
    fn test_builder_hash_collision_dedup() {
        // Regression test: two different hash values that truncate to the same
        // (quotient, remainder) must be treated as duplicates when duplicate=false.
        // See: fuzz_sorted_insert/minimized-from-c401dba0cd67a0a3c34c19ce181d23a0dbd37e8e
        let fp_size = 7u8;
        let cap = 5u64;

        let mut fingerprints: Vec<u64> = vec![0, 12032];
        fingerprints.sort_unstable();

        // Regular insertion
        let mut regular = Filter::with_fingerprint_size(cap, fp_size).unwrap();
        for &h in &fingerprints {
            let _ = regular.insert_fingerprint(false, h);
        }

        // Sorted insertion
        let mut inserter = Builder::new(Filter::with_fingerprint_size(cap, fp_size).unwrap());
        for &h in &fingerprints {
            let _ = inserter.insert_fingerprint(false, h);
        }
        let sorted_f = inserter.into_filter();

        assert_eq!(regular.len(), sorted_f.len());
        let reg_fps: Vec<u64> = regular.fingerprints().collect();
        let sort_fps: Vec<u64> = sorted_f.fingerprints().collect();
        assert_eq!(reg_fps, sort_fps);
    }

    #[test]
    fn test_custom_hasher() {
        use std::collections::hash_map::RandomState;

        let hasher = RandomState::new();
        let mut f = Filter::new_with_hasher(1000, 0.01, hasher).unwrap();
        for i in 0u64..100 {
            f.insert(i).unwrap();
        }
        for i in 0u64..100 {
            assert!(f.contains(i));
        }
        // Remove and verify
        assert!(f.remove(50u64));
        assert!(!f.contains(50u64));
    }

    #[test]
    fn test_random_build_hasher() {
        let hasher = foldhash_portable::quality::RandomState::default();
        let mut f = Filter::new_with_hasher(1000, 0.01, hasher).unwrap();
        for i in 0u64..100 {
            f.insert(i).unwrap();
        }
        for i in 0u64..100 {
            assert!(f.contains(i));
        }
    }

    #[test]
    fn test_stable_build_hasher_deterministic() {
        // Two filters with the default StableBuildHasher produce identical fingerprints
        let mut f1 = Filter::new(1000, 0.01).unwrap();
        let mut f2 = Filter::new(1000, 0.01).unwrap();
        for i in 0u64..100 {
            f1.insert(i).unwrap();
            f2.insert(i).unwrap();
        }
        let fps1: Vec<u64> = f1.fingerprints().collect();
        let fps2: Vec<u64> = f2.fingerprints().collect();
        assert_eq!(fps1, fps2);
    }

    #[cfg(feature = "serde")]
    #[test]
    fn test_hash_stability() {
        // Build filters with known inputs and verify serialized bytes match
        // hardcoded values. This catches any change in the hash algorithm or
        // StableHasher normalization, and ensures cross-platform compatibility
        // (the same bytes must be produced on 32-bit BE and 64-bit LE).

        fn build_filter(items: &[usize], capacity: u64, fp_rate: f64) -> Vec<u8> {
            let mut f = Filter::new(capacity, fp_rate).unwrap();
            for &i in items {
                f.insert(i).unwrap();
            }
            serde_cbor::to_vec(&f).unwrap()
        }

        // Small filter with a few usize items (exercises write_usize → write_u64 normalization)
        let small = build_filter(&[1, 2, 3, 42, 100], 100, 0.01);
        // Larger filter with sequential usize items
        let seq = build_filter(&(0..50).collect::<Vec<_>>(), 100, 0.01);
        // Filter with string items
        let mut f = Filter::new(100, 0.01).unwrap();
        for s in ["hello", "world", "foo", "bar"] {
            f.insert(s).unwrap();
        }
        let strings = serde_cbor::to_vec(&f).unwrap();

        // Hardcoded expected bytes — if these change, the hash algorithm or
        // StableHasher normalization changed, breaking cross-platform compatibility.
        // usize items exercise write_usize → write_u64 normalization (32-bit vs 64-bit).
        assert_eq!(
            small,
            [
                165, 97, 98, 88, 154, 0, 0, 17, 0, 136, 0, 0, 0, 0, 0, 17, 0, 136, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 93, 0, 0, 160, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 224, 14, 0, 0,
                158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                97, 108, 5, 97, 113, 7, 97, 114, 7, 97, 109, 246
            ]
        );
        assert_eq!(
            seq,
            [
                165, 97, 98, 88, 154, 0, 6, 249, 12, 228, 11, 2, 36, 134, 6, 121, 13, 228, 11, 2,
                68, 134, 0, 203, 30, 0, 0, 0, 0, 93, 0, 32, 162, 45, 167, 48, 91, 0, 189, 11, 0, 0,
                0, 0, 192, 5, 0, 184, 14, 159, 164, 22, 224, 2, 0, 0, 0, 128, 35, 0, 0, 0, 0, 0, 0,
                128, 0, 0, 184, 113, 1, 0, 11, 22, 0, 0, 0, 226, 0, 14, 74, 209, 193, 40, 72, 117,
                144, 14, 74, 145, 195, 40, 72, 117, 144, 128, 110, 44, 15, 0, 0, 0, 0, 40, 224, 7,
                0, 132, 1, 32, 0, 0, 0, 6, 220, 242, 126, 42, 0, 0, 0, 140, 1, 0, 0, 160, 11, 120,
                1, 0, 0, 0, 0, 2, 0, 4, 1, 59, 128, 21, 224, 137, 121, 0, 0, 0, 0, 80, 6, 0, 120,
                0, 0, 0, 0, 0, 0, 0, 0, 97, 108, 24, 50, 97, 113, 7, 97, 114, 7, 97, 109, 246
            ]
        );
        assert_eq!(
            strings,
            [
                165, 97, 98, 88, 154, 0, 32, 0, 0, 0, 0, 8, 0, 32, 32, 0, 0, 0, 0, 8, 0, 32, 0, 0,
                0, 0, 72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0, 160, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 216, 0,
                0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                64, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                97, 108, 4, 97, 113, 7, 97, 114, 7, 97, 109, 246
            ]
        );
    }
}