fastars 0.1.0 - Docs.rs

//! Object pool for FASTQ records to enable memory reuse.
//!
//! This module provides `ReadPool`, a simple object pool that allows
//! reusing `OwnedRecord` instances instead of allocating new ones for
//! every read. This significantly reduces heap allocations when processing
//! large FASTQ files.
//!
//! ## Usage
//!
//! ```ignore
//! use fastars::io::pool::ReadPool;
//!
//! let mut pool = ReadPool::new(256); // Hint for typical read length
//!
//! // Acquire a record (from pool or newly allocated)
//! let mut record = pool.acquire();
//! record.set_from(b"read1", b"ACGT", b"IIII");
//!
//! // Use the record...
//!
//! // Return it to the pool for reuse
//! pool.release(record);
//! ```
//!
//! ## Thread Safety
//!
//! `ReadPool` is NOT thread-safe and should be used as thread-local storage.
//! Each worker thread should have its own pool instance.

use super::OwnedRecord;

/// Default pool capacity (number of records to keep in the pool).
const DEFAULT_POOL_SIZE: usize = 1024;

/// A pool of reusable `OwnedRecord` instances.
///
/// This pool maintains a collection of cleared records that can be
/// reused instead of allocating new ones. When `acquire()` is called,
/// a record is taken from the pool if available, or a new one is created.
/// When `release()` is called, the record is cleared and returned to the pool.
#[derive(Debug)]
pub struct ReadPool {
    /// Pool of reusable records.
    pool: Vec<OwnedRecord>,
    /// Hint for the typical capacity needed for records.
    capacity_hint: usize,
    /// Maximum number of records to keep in the pool.
    max_pool_size: usize,
}

impl ReadPool {
    /// Create a new pool with the given capacity hint.
    ///
    /// The capacity hint is used when creating new records to pre-allocate
    /// vectors, reducing the need for reallocations as data is added.
    ///
    /// # Arguments
    ///
    /// * `capacity_hint` - Typical length of reads (e.g., 150 for Illumina short reads)
    #[inline]
    pub fn new(capacity_hint: usize) -> Self {
        Self {
            pool: Vec::with_capacity(DEFAULT_POOL_SIZE),
            capacity_hint,
            max_pool_size: DEFAULT_POOL_SIZE,
        }
    }

    /// Create a new pool with custom size limits.
    ///
    /// # Arguments
    ///
    /// * `capacity_hint` - Typical length of reads
    /// * `max_pool_size` - Maximum records to keep in the pool
    #[inline]
    pub fn with_max_size(capacity_hint: usize, max_pool_size: usize) -> Self {
        Self {
            pool: Vec::with_capacity(max_pool_size),
            capacity_hint,
            max_pool_size,
        }
    }

    /// Acquire a record from the pool.
    ///
    /// If the pool has available records, one is returned (already cleared).
    /// Otherwise, a new record is created with pre-allocated capacity.
    #[inline]
    pub fn acquire(&mut self) -> OwnedRecord {
        self.pool
            .pop()
            .unwrap_or_else(|| OwnedRecord::with_capacity(self.capacity_hint))
    }

    /// Release a record back to the pool.
    ///
    /// The record is cleared (but retains its capacity) and added to the pool
    /// for future reuse. If the pool is at capacity, the record is dropped.
    #[inline]
    pub fn release(&mut self, mut record: OwnedRecord) {
        if self.pool.len() < self.max_pool_size {
            record.clear();
            self.pool.push(record);
        }
        // If pool is full, record is dropped (its memory is freed)
    }

    /// Release multiple records back to the pool.
    ///
    /// This is more efficient than calling `release()` multiple times
    /// when returning a batch of records.
    #[inline]
    pub fn release_batch(&mut self, records: Vec<OwnedRecord>) {
        let available_space = self.max_pool_size.saturating_sub(self.pool.len());
        let to_keep = records.len().min(available_space);

        for mut record in records.into_iter().take(to_keep) {
            record.clear();
            self.pool.push(record);
        }
    }

    /// Get the current number of records in the pool.
    #[inline]
    pub fn len(&self) -> usize {
        self.pool.len()
    }

    /// Check if the pool is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.pool.is_empty()
    }

    /// Get the capacity hint used for new records.
    #[inline]
    pub fn capacity_hint(&self) -> usize {
        self.capacity_hint
    }

    /// Clear the pool, freeing all stored records.
    #[inline]
    pub fn clear(&mut self) {
        self.pool.clear();
    }

    /// Pre-populate the pool with empty records.
    ///
    /// This can be useful to avoid allocation pauses during processing.
    ///
    /// # Arguments
    ///
    /// * `count` - Number of records to pre-allocate
    pub fn prefill(&mut self, count: usize) {
        let to_add = count.min(self.max_pool_size.saturating_sub(self.pool.len()));
        self.pool.reserve(to_add);
        for _ in 0..to_add {
            self.pool.push(OwnedRecord::with_capacity(self.capacity_hint));
        }
    }
}

impl Default for ReadPool {
    fn default() -> Self {
        Self::new(256) // Default for typical short reads
    }
}

/// A fixed-size batch of records that can be reused.
///
/// Unlike `Vec<OwnedRecord>`, this batch pre-allocates all records upfront
/// and reuses them across iterations, avoiding per-batch allocations.
#[derive(Debug)]
pub struct FixedBatch {
    /// Pre-allocated records
    records: Vec<OwnedRecord>,
    /// Number of valid records (records[0..len] are valid)
    len: usize,
}

impl FixedBatch {
    /// Create a new fixed batch with the given capacity.
    ///
    /// # Arguments
    /// * `capacity` - Maximum number of records this batch can hold
    /// * `read_capacity` - Initial capacity for each record's seq/qual vectors
    pub fn new(capacity: usize, read_capacity: usize) -> Self {
        let records = (0..capacity)
            .map(|_| OwnedRecord::with_capacity(read_capacity))
            .collect();
        Self { records, len: 0 }
    }

    /// Get a mutable reference to the record at the given index.
    ///
    /// # Panics
    /// Panics if `idx >= capacity`.
    #[inline]
    pub fn get_mut(&mut self, idx: usize) -> &mut OwnedRecord {
        &mut self.records[idx]
    }

    /// Get a reference to the record at the given index.
    ///
    /// # Panics
    /// Panics if `idx >= len`.
    #[inline]
    pub fn get(&self, idx: usize) -> &OwnedRecord {
        debug_assert!(idx < self.len, "index out of bounds");
        &self.records[idx]
    }

    /// Set the number of valid records in this batch.
    #[inline]
    pub fn set_len(&mut self, len: usize) {
        debug_assert!(len <= self.records.len(), "len exceeds capacity");
        self.len = len;
    }

    /// Get the number of valid records in this batch.
    #[inline]
    pub fn len(&self) -> usize {
        self.len
    }

    /// Check if the batch is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.len == 0
    }

    /// Get the capacity of this batch.
    #[inline]
    pub fn capacity(&self) -> usize {
        self.records.len()
    }

    /// Clear the batch, marking all records as invalid but retaining capacity.
    #[inline]
    pub fn clear(&mut self) {
        for record in &mut self.records[..self.len] {
            record.clear();
        }
        self.len = 0;
    }

    /// Get an iterator over valid records.
    #[inline]
    pub fn iter(&self) -> impl Iterator<Item = &OwnedRecord> {
        self.records[..self.len].iter()
    }

    /// Get a mutable iterator over valid records.
    #[inline]
    pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut OwnedRecord> {
        self.records[..self.len].iter_mut()
    }

    /// Convert to a Vec, consuming the batch.
    /// This is mainly for compatibility with existing code.
    pub fn into_vec(self) -> Vec<OwnedRecord> {
        let mut v = self.records;
        v.truncate(self.len);
        v
    }

    /// Get a slice of valid records.
    #[inline]
    pub fn as_slice(&self) -> &[OwnedRecord] {
        &self.records[..self.len]
    }
}

/// Pool of reusable fixed batches.
#[derive(Debug)]
pub struct BatchPool {
    batches: Vec<FixedBatch>,
    batch_capacity: usize,
    read_capacity: usize,
}

impl BatchPool {
    /// Create a new batch pool.
    ///
    /// # Arguments
    /// * `batch_capacity` - Number of records per batch
    /// * `read_capacity` - Initial capacity for each record's vectors
    pub fn new(batch_capacity: usize, read_capacity: usize) -> Self {
        Self {
            batches: Vec::new(),
            batch_capacity,
            read_capacity,
        }
    }

    /// Acquire a batch from the pool or create a new one.
    pub fn acquire(&mut self) -> FixedBatch {
        self.batches.pop().unwrap_or_else(|| {
            FixedBatch::new(self.batch_capacity, self.read_capacity)
        })
    }

    /// Release a batch back to the pool for reuse.
    pub fn release(&mut self, mut batch: FixedBatch) {
        batch.clear();
        self.batches.push(batch);
    }

    /// Get the number of batches currently in the pool.
    pub fn len(&self) -> usize {
        self.batches.len()
    }

    /// Check if the pool is empty.
    pub fn is_empty(&self) -> bool {
        self.batches.is_empty()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_pool_new() {
        let pool = ReadPool::new(150);
        assert!(pool.is_empty());
        assert_eq!(pool.capacity_hint(), 150);
    }

    #[test]
    fn test_pool_acquire_creates_new() {
        let mut pool = ReadPool::new(150);
        let record = pool.acquire();

        // Should have pre-allocated capacity
        assert!(record.seq.capacity() >= 150);
        assert!(record.qual.capacity() >= 150);
        assert!(record.is_empty());
    }

    #[test]
    fn test_pool_release_and_acquire() {
        let mut pool = ReadPool::new(150);

        // Create and populate a record
        let mut record = pool.acquire();
        record.set_from(b"read1", b"ACGT", b"IIII");

        // Release it
        pool.release(record);
        assert_eq!(pool.len(), 1);

        // Acquire again - should get the same memory
        let record = pool.acquire();
        assert!(pool.is_empty());
        assert!(record.is_empty()); // Should be cleared
        assert!(record.seq.capacity() >= 4); // But retains capacity
    }

    #[test]
    fn test_pool_max_size() {
        let mut pool = ReadPool::with_max_size(100, 2);

        // Fill the pool
        pool.release(OwnedRecord::with_capacity(100));
        pool.release(OwnedRecord::with_capacity(100));
        assert_eq!(pool.len(), 2);

        // Try to add one more - should be dropped
        pool.release(OwnedRecord::with_capacity(100));
        assert_eq!(pool.len(), 2); // Still 2
    }

    #[test]
    fn test_pool_release_batch() {
        let mut pool = ReadPool::with_max_size(100, 5);

        let records: Vec<_> = (0..10)
            .map(|_| OwnedRecord::with_capacity(100))
            .collect();

        pool.release_batch(records);
        assert_eq!(pool.len(), 5); // Only keeps up to max_pool_size
    }

    #[test]
    fn test_pool_prefill() {
        let mut pool = ReadPool::with_max_size(100, 10);
        pool.prefill(5);
        assert_eq!(pool.len(), 5);

        // Try to prefill beyond max
        pool.prefill(10);
        assert_eq!(pool.len(), 10); // Capped at max
    }

    #[test]
    fn test_pool_clear() {
        let mut pool = ReadPool::new(100);
        pool.prefill(5);
        assert_eq!(pool.len(), 5);

        pool.clear();
        assert!(pool.is_empty());
    }

    #[test]
    fn test_pool_default() {
        let pool = ReadPool::default();
        assert_eq!(pool.capacity_hint(), 256);
    }

    #[test]
    fn test_record_reuse_preserves_capacity() {
        let mut pool = ReadPool::new(256);

        // Create a record with larger data
        let mut record = pool.acquire();
        let long_seq = vec![b'A'; 500];
        let long_qual = vec![b'I'; 500];
        record.set_from(b"read1", &long_seq, &long_qual);

        // Remember the capacity after growth
        let seq_cap = record.seq.capacity();
        let qual_cap = record.qual.capacity();

        // Release and reacquire
        pool.release(record);
        let record = pool.acquire();

        // Capacity should be preserved
        assert!(record.seq.capacity() >= seq_cap);
        assert!(record.qual.capacity() >= qual_cap);
        assert!(record.is_empty()); // But data cleared
    }
}

#[cfg(test)]
mod fixed_batch_tests {
    use super::*;

    #[test]
    fn test_fixed_batch_creation() {
        let batch = FixedBatch::new(10, 256);
        assert_eq!(batch.capacity(), 10);
        assert_eq!(batch.len(), 0);
        assert!(batch.is_empty());
    }

    #[test]
    fn test_fixed_batch_usage() {
        let mut batch = FixedBatch::new(3, 64);

        // Fill records
        batch.get_mut(0).set_from(b"read1", b"ACGT", b"IIII");
        batch.get_mut(1).set_from(b"read2", b"TGCA", b"HHHH");
        batch.set_len(2);

        assert_eq!(batch.len(), 2);
        assert_eq!(batch.get(0).name, b"read1");
        assert_eq!(batch.get(1).seq, b"TGCA");
    }

    #[test]
    fn test_fixed_batch_clear() {
        let mut batch = FixedBatch::new(2, 64);
        batch.get_mut(0).set_from(b"read1", b"ACGT", b"IIII");
        batch.set_len(1);

        batch.clear();
        assert!(batch.is_empty());
        // Capacity still preserved
        assert!(batch.get_mut(0).seq.capacity() >= 64);
    }

    #[test]
    fn test_fixed_batch_iter() {
        let mut batch = FixedBatch::new(3, 64);
        batch.get_mut(0).set_from(b"a", b"A", b"I");
        batch.get_mut(1).set_from(b"b", b"C", b"H");
        batch.set_len(2);

        let names: Vec<_> = batch.iter().map(|r| &r.name).collect();
        assert_eq!(names.len(), 2);
    }

    #[test]
    fn test_batch_pool() {
        let mut pool = BatchPool::new(10, 256);
        assert!(pool.is_empty());

        let batch1 = pool.acquire();
        assert_eq!(batch1.capacity(), 10);

        pool.release(batch1);
        assert_eq!(pool.len(), 1);

        let batch2 = pool.acquire();
        assert!(pool.is_empty());
        assert_eq!(batch2.capacity(), 10);
    }
}