dryice-bio 0.2.0

High-throughput transient container for read-like genomic records
Documentation
//! High-throughput transient container for read-like genomic records.
//!
//! `dryice` is a block-oriented temporary storage format optimized for
//! workflows where sequencing records need to move to disk and back
//! quickly, especially external sorting, partitioning, and other
//! out-of-core genomics pipelines.
//!
//! The crate is parser-agnostic: any type implementing [`SeqRecordLike`]
//! can be written into a `dryice` file, and records are read back as
//! borrowed slices with no per-record allocation. Sequence, quality, and
//! name encodings are selected via trait-based codec type parameters,
//! and users can implement their own codecs.
//!
//! # Writing records (default codecs)
//!
//! ```
//! use dryice::{DryIceWriter, SeqRecord, SeqRecordLike};
//!
//! # fn example() -> Result<(), dryice::DryIceError> {
//! let mut buf = Vec::new();
//! let mut writer = DryIceWriter::builder()
//!     .inner(&mut buf)
//!     .build();
//!
//! let record = SeqRecord::new(
//!     b"read1".to_vec(),
//!     b"ACGTACGT".to_vec(),
//!     b"!!!!!!!!".to_vec(),
//! )?;
//! writer.write_record(&record)?;
//! writer.finish()?;
//! # Ok(())
//! # }
//! ```
//!
//! # Writing with compact codecs
//!
//! ```
//! use dryice::{DryIceWriter, SeqRecord};
//!
//! # fn example() -> Result<(), dryice::DryIceError> {
//! let mut buf = Vec::new();
//! let mut writer = DryIceWriter::builder()
//!     .inner(&mut buf)
//!     .two_bit_exact()
//!     .binned_quality()
//!     .split_names()
//!     .target_block_records(4096)
//!     .build();
//!
//! let record = SeqRecord::new(
//!     b"instrument:run:flowcell 1:N:0:ATCACG".to_vec(),
//!     b"ACGTACGT".to_vec(),
//!     b"!!!!!!!!".to_vec(),
//! )?;
//! writer.write_record(&record)?;
//! writer.finish()?;
//! # Ok(())
//! # }
//! ```
//!
//! # Writing with record keys
//!
//! ```
//! use dryice::{Bytes8Key, DryIceWriter, SeqRecord};
//!
//! # fn example() -> Result<(), dryice::DryIceError> {
//! let mut buf = Vec::new();
//! let mut writer = DryIceWriter::builder()
//!     .inner(&mut buf)
//!     .bytes8_key()
//!     .build();
//!
//! let record = SeqRecord::new(
//!     b"read1".to_vec(),
//!     b"ACGTACGT".to_vec(),
//!     b"!!!!!!!!".to_vec(),
//! )?;
//! let key = Bytes8Key(*b"sortkey!");
//! writer.write_record_with_key(&record, &key)?;
//! writer.finish()?;
//! # Ok(())
//! # }
//! ```
//!
//! # Reading records (zero-copy)
//!
//! ```
//! use dryice::{DryIceReader, DryIceWriter, SeqRecord, SeqRecordLike};
//!
//! # fn example() -> Result<(), dryice::DryIceError> {
//! let mut buf = Vec::new();
//! let mut writer = DryIceWriter::builder().inner(&mut buf).build();
//! let record = SeqRecord::new(
//!     b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
//! )?;
//! writer.write_record(&record)?;
//! writer.finish()?;
//!
//! let mut reader = DryIceReader::new(buf.as_slice())?;
//! while reader.next_record()? {
//!     let _name = reader.name();
//!     let _seq = reader.sequence();
//!     let _qual = reader.quality();
//! }
//! # Ok(())
//! # }
//! ```
//!
//! # Reading records (convenience iterator)
//!
//! ```
//! use dryice::{DryIceReader, DryIceWriter, SeqRecord};
//!
//! # fn example() -> Result<(), dryice::DryIceError> {
//! let mut buf = Vec::new();
//! let mut writer = DryIceWriter::builder().inner(&mut buf).build();
//! let record = SeqRecord::new(
//!     b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
//! )?;
//! writer.write_record(&record)?;
//! writer.finish()?;
//!
//! let reader = DryIceReader::new(buf.as_slice())?;
//! for record in reader.into_records() {
//!     let record = record?;
//!     println!("{}", record);
//! }
//! # Ok(())
//! # }
//! ```
//!
//! # Zero-copy reader-to-writer piping
//!
//! ```
//! use dryice::{DryIceReader, DryIceWriter, SeqRecord, SeqRecordLike};
//!
//! # fn example() -> Result<(), dryice::DryIceError> {
//! let mut buf1 = Vec::new();
//! let mut writer1 = DryIceWriter::builder().inner(&mut buf1).build();
//! let record = SeqRecord::new(
//!     b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
//! )?;
//! writer1.write_record(&record)?;
//! writer1.finish()?;
//!
//! let mut buf2 = Vec::new();
//! let mut reader = DryIceReader::new(buf1.as_slice())?;
//! let mut writer2 = DryIceWriter::builder().inner(&mut buf2).build();
//! while reader.next_record()? {
//!     writer2.write_record(&reader)?;
//! }
//! writer2.finish()?;
//! # Ok(())
//! # }
//! ```
//!
//! # Reading with non-default codecs
//!
//! ```
//! use dryice::{
//!     BinnedQualityCodec, DryIceReader, DryIceWriter, SeqRecord,
//!     SeqRecordLike, SplitNameCodec, TwoBitExactCodec,
//! };
//!
//! # fn example() -> Result<(), dryice::DryIceError> {
//! let mut buf = Vec::new();
//! let mut writer = DryIceWriter::builder()
//!     .inner(&mut buf)
//!     .two_bit_exact()
//!     .binned_quality()
//!     .split_names()
//!     .build();
//! let record = SeqRecord::new(
//!     b"instrument:run 1:N:0".to_vec(),
//!     b"ACGT".to_vec(),
//!     b"!!!!".to_vec(),
//! )?;
//! writer.write_record(&record)?;
//! writer.finish()?;
//!
//! let mut reader = DryIceReader::with_codecs::<
//!     TwoBitExactCodec,
//!     BinnedQualityCodec,
//!     SplitNameCodec,
//! >(buf.as_slice())?;
//! while reader.next_record()? {
//!     let _seq = reader.sequence();
//! }
//! # Ok(())
//! # }
//! ```
//!
//! # Custom codec implementation
//!
//! ```
//! use dryice::{DryIceError, SequenceCodec};
//!
//! struct UppercaseCodec;
//!
//! impl SequenceCodec for UppercaseCodec {
//!     const TYPE_TAG: [u8; 16] = *b"demo:seq:upper!!";
//!     const LOSSY: bool = true;
//!
//!     fn encode_into(sequence: &[u8], output: &mut Vec<u8>) -> Result<(), DryIceError> {
//!         output.extend(sequence.iter().map(u8::to_ascii_uppercase));
//!         Ok(())
//!     }
//!
//!     fn decode_into(
//!         encoded: &[u8],
//!         _original_len: usize,
//!         output: &mut Vec<u8>,
//!     ) -> Result<(), DryIceError> {
//!         output.extend_from_slice(encoded);
//!         Ok(())
//!     }
//! }
//! ```

#[cfg(feature = "async")]
pub mod async_io;
mod block;
pub mod config;
mod error;
pub mod fields;
mod format;
mod io;
pub mod key;
#[cfg(feature = "mmap")]
pub mod mmap_io;
mod record;

#[cfg(feature = "async")]
pub use async_io::{AsyncDryIceReader, AsyncDryIceWriter};
pub use block::{
    name::{NameCodec, OmittedNameCodec, RawNameCodec, SplitNameCodec},
    quality::{BinnedQualityCodec, OmittedQualityCodec, QualityCodec, RawQualityCodec},
    sequence::{RawAsciiCodec, SequenceCodec, TwoBitExactCodec, TwoBitLossyNCodec},
};
pub use config::{BlockLayoutOptions, BlockSizePolicy, DryIceWriterOptions};
pub use error::DryIceError;
pub use io::{DryIceReader, DryIceRecords, DryIceWriter, SelectedDryIceReader, SelectedRecord};
pub use key::{Bytes8Key, Bytes16Key, NoRecordKey, RecordKey};
#[cfg(feature = "mmap")]
pub use mmap_io::MmapDryIceReader;
pub use record::{SeqRecord, SeqRecordExt, SeqRecordLike};