dryice-bio 0.2.0

High-throughput transient container for read-like genomic records
Documentation

High-throughput transient container for read-like genomic records.

dryice is a block-oriented temporary storage format optimized for workflows where sequencing records need to move to disk and back quickly, especially external sorting, partitioning, and other out-of-core genomics pipelines.

The crate is parser-agnostic: any type implementing [SeqRecordLike] can be written into a dryice file, and records are read back as borrowed slices with no per-record allocation. Sequence, quality, and name encodings are selected via trait-based codec type parameters, and users can implement their own codecs.

Writing records (default codecs)

use dryice::{DryIceWriter, SeqRecord, SeqRecordLike};

# fn example() -> Result<(), dryice::DryIceError> {
let mut buf = Vec::new();
let mut writer = DryIceWriter::builder()
    .inner(&mut buf)
    .build();

let record = SeqRecord::new(
    b"read1".to_vec(),
    b"ACGTACGT".to_vec(),
    b"!!!!!!!!".to_vec(),
)?;
writer.write_record(&record)?;
writer.finish()?;
# Ok(())
# }

Writing with compact codecs

use dryice::{DryIceWriter, SeqRecord};

# fn example() -> Result<(), dryice::DryIceError> {
let mut buf = Vec::new();
let mut writer = DryIceWriter::builder()
    .inner(&mut buf)
    .two_bit_exact()
    .binned_quality()
    .split_names()
    .target_block_records(4096)
    .build();

let record = SeqRecord::new(
    b"instrument:run:flowcell 1:N:0:ATCACG".to_vec(),
    b"ACGTACGT".to_vec(),
    b"!!!!!!!!".to_vec(),
)?;
writer.write_record(&record)?;
writer.finish()?;
# Ok(())
# }

Writing with record keys

use dryice::{Bytes8Key, DryIceWriter, SeqRecord};

# fn example() -> Result<(), dryice::DryIceError> {
let mut buf = Vec::new();
let mut writer = DryIceWriter::builder()
    .inner(&mut buf)
    .bytes8_key()
    .build();

let record = SeqRecord::new(
    b"read1".to_vec(),
    b"ACGTACGT".to_vec(),
    b"!!!!!!!!".to_vec(),
)?;
let key = Bytes8Key(*b"sortkey!");
writer.write_record_with_key(&record, &key)?;
writer.finish()?;
# Ok(())
# }

Reading records (zero-copy)

use dryice::{DryIceReader, DryIceWriter, SeqRecord, SeqRecordLike};

# fn example() -> Result<(), dryice::DryIceError> {
let mut buf = Vec::new();
let mut writer = DryIceWriter::builder().inner(&mut buf).build();
let record = SeqRecord::new(
    b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
)?;
writer.write_record(&record)?;
writer.finish()?;

let mut reader = DryIceReader::new(buf.as_slice())?;
while reader.next_record()? {
    let _name = reader.name();
    let _seq = reader.sequence();
    let _qual = reader.quality();
}
# Ok(())
# }

Reading records (convenience iterator)

use dryice::{DryIceReader, DryIceWriter, SeqRecord};

# fn example() -> Result<(), dryice::DryIceError> {
let mut buf = Vec::new();
let mut writer = DryIceWriter::builder().inner(&mut buf).build();
let record = SeqRecord::new(
    b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
)?;
writer.write_record(&record)?;
writer.finish()?;

let reader = DryIceReader::new(buf.as_slice())?;
for record in reader.into_records() {
    let record = record?;
    println!("{}", record);
}
# Ok(())
# }

Zero-copy reader-to-writer piping

use dryice::{DryIceReader, DryIceWriter, SeqRecord, SeqRecordLike};

# fn example() -> Result<(), dryice::DryIceError> {
let mut buf1 = Vec::new();
let mut writer1 = DryIceWriter::builder().inner(&mut buf1).build();
let record = SeqRecord::new(
    b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
)?;
writer1.write_record(&record)?;
writer1.finish()?;

let mut buf2 = Vec::new();
let mut reader = DryIceReader::new(buf1.as_slice())?;
let mut writer2 = DryIceWriter::builder().inner(&mut buf2).build();
while reader.next_record()? {
    writer2.write_record(&reader)?;
}
writer2.finish()?;
# Ok(())
# }

Reading with non-default codecs

use dryice::{
    BinnedQualityCodec, DryIceReader, DryIceWriter, SeqRecord,
    SeqRecordLike, SplitNameCodec, TwoBitExactCodec,
};

# fn example() -> Result<(), dryice::DryIceError> {
let mut buf = Vec::new();
let mut writer = DryIceWriter::builder()
    .inner(&mut buf)
    .two_bit_exact()
    .binned_quality()
    .split_names()
    .build();
let record = SeqRecord::new(
    b"instrument:run 1:N:0".to_vec(),
    b"ACGT".to_vec(),
    b"!!!!".to_vec(),
)?;
writer.write_record(&record)?;
writer.finish()?;

let mut reader = DryIceReader::with_codecs::<
    TwoBitExactCodec,
    BinnedQualityCodec,
    SplitNameCodec,
>(buf.as_slice())?;
while reader.next_record()? {
    let _seq = reader.sequence();
}
# Ok(())
# }

Custom codec implementation

use dryice::{DryIceError, SequenceCodec};

struct UppercaseCodec;

impl SequenceCodec for UppercaseCodec {
    const TYPE_TAG: [u8; 16] = *b"demo:seq:upper!!";
    const LOSSY: bool = true;

    fn encode_into(sequence: &[u8], output: &mut Vec<u8>) -> Result<(), DryIceError> {
        output.extend(sequence.iter().map(u8::to_ascii_uppercase));
        Ok(())
    }

    fn decode_into(
        encoded: &[u8],
        _original_len: usize,
        output: &mut Vec<u8>,
    ) -> Result<(), DryIceError> {
        output.extend_from_slice(encoded);
        Ok(())
    }
}