Skip to main content

dryice/
lib.rs

1//! High-throughput transient container for read-like genomic records.
2//!
3//! `dryice` is a block-oriented temporary storage format optimized for
4//! workflows where sequencing records need to move to disk and back
5//! quickly, especially external sorting, partitioning, and other
6//! out-of-core genomics pipelines.
7//!
8//! The crate is parser-agnostic: any type implementing [`SeqRecordLike`]
9//! can be written into a `dryice` file, and records are read back as
10//! borrowed slices with no per-record allocation. Sequence, quality, and
11//! name encodings are selected via trait-based codec type parameters,
12//! and users can implement their own codecs.
13//!
14//! # Writing records (default codecs)
15//!
16//! ```
17//! use dryice::{DryIceWriter, SeqRecord, SeqRecordLike};
18//!
19//! # fn example() -> Result<(), dryice::DryIceError> {
20//! let mut buf = Vec::new();
21//! let mut writer = DryIceWriter::builder()
22//!     .inner(&mut buf)
23//!     .build();
24//!
25//! let record = SeqRecord::new(
26//!     b"read1".to_vec(),
27//!     b"ACGTACGT".to_vec(),
28//!     b"!!!!!!!!".to_vec(),
29//! )?;
30//! writer.write_record(&record)?;
31//! writer.finish()?;
32//! # Ok(())
33//! # }
34//! ```
35//!
36//! # Writing with compact codecs
37//!
38//! ```
39//! use dryice::{DryIceWriter, SeqRecord};
40//!
41//! # fn example() -> Result<(), dryice::DryIceError> {
42//! let mut buf = Vec::new();
43//! let mut writer = DryIceWriter::builder()
44//!     .inner(&mut buf)
45//!     .two_bit_exact()
46//!     .binned_quality()
47//!     .split_names()
48//!     .target_block_records(4096)
49//!     .build();
50//!
51//! let record = SeqRecord::new(
52//!     b"instrument:run:flowcell 1:N:0:ATCACG".to_vec(),
53//!     b"ACGTACGT".to_vec(),
54//!     b"!!!!!!!!".to_vec(),
55//! )?;
56//! writer.write_record(&record)?;
57//! writer.finish()?;
58//! # Ok(())
59//! # }
60//! ```
61//!
62//! # Writing with record keys
63//!
64//! ```
65//! use dryice::{Bytes8Key, DryIceWriter, SeqRecord};
66//!
67//! # fn example() -> Result<(), dryice::DryIceError> {
68//! let mut buf = Vec::new();
69//! let mut writer = DryIceWriter::builder()
70//!     .inner(&mut buf)
71//!     .bytes8_key()
72//!     .build();
73//!
74//! let record = SeqRecord::new(
75//!     b"read1".to_vec(),
76//!     b"ACGTACGT".to_vec(),
77//!     b"!!!!!!!!".to_vec(),
78//! )?;
79//! let key = Bytes8Key(*b"sortkey!");
80//! writer.write_record_with_key(&record, &key)?;
81//! writer.finish()?;
82//! # Ok(())
83//! # }
84//! ```
85//!
86//! # Writing key-only files with empty payload
87//!
88//! ```
89//! use dryice::{Bytes16Key, DryIceWriter};
90//!
91//! # fn example() -> Result<(), dryice::DryIceError> {
92//! let mut buf = Vec::new();
93//! let mut writer = DryIceWriter::builder()
94//!     .inner(&mut buf)
95//!     .bytes16_key()
96//!     .empty_payload()
97//!     .build();
98//!
99//! writer.write_key_only(&Bytes16Key(*b"0000000000000001"))?;
100//! writer.write_key_only(&Bytes16Key(*b"0000000000000002"))?;
101//! writer.finish()?;
102//! # Ok(())
103//! # }
104//! ```
105//!
106//! # Writing minimizer keys with the builder conveniences
107//!
108//! ```
109//! use dryice::{DefaultMinimizer64, DryIceWriter, SeqRecord};
110//!
111//! # fn example() -> Result<(), dryice::DryIceError> {
112//! let mut buf = Vec::new();
113//! let mut writer = DryIceWriter::builder()
114//!     .inner(&mut buf)
115//!     .minimizers_with_sequences()
116//!     .build();
117//!
118//! let record = SeqRecord::new(
119//!     b"read1".to_vec(),
120//!     b"ACGTGCTCAGAGACTCAGAGGATTACAGTTTACGTGCTCAGAGACTCAGAGGA".to_vec(),
121//!     vec![b'!'; 53],
122//! )?;
123//!
124//! if let Some(key) = DefaultMinimizer64::try_from_sequence(record.sequence())? {
125//!     writer.write_record_with_key(&record, &key)?;
126//! }
127//!
128//! writer.finish()?;
129//! # Ok(())
130//! # }
131//! ```
132//!
133//! # Reading records (zero-copy)
134//!
135//! ```
136//! use dryice::{DryIceReader, DryIceWriter, SeqRecord, SeqRecordLike};
137//!
138//! # fn example() -> Result<(), dryice::DryIceError> {
139//! let mut buf = Vec::new();
140//! let mut writer = DryIceWriter::builder().inner(&mut buf).build();
141//! let record = SeqRecord::new(
142//!     b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
143//! )?;
144//! writer.write_record(&record)?;
145//! writer.finish()?;
146//!
147//! let mut reader = DryIceReader::new(buf.as_slice())?;
148//! while reader.next_record()? {
149//!     let _name = reader.name();
150//!     let _seq = reader.sequence();
151//!     let _qual = reader.quality();
152//! }
153//! # Ok(())
154//! # }
155//! ```
156//!
157//! # Reading keys directly
158//!
159//! ```
160//! use dryice::{
161//!     Bytes16Key, DryIceReader, DryIceWriter, OmittedNameCodec, OmittedQualityCodec,
162//!     OmittedSequenceCodec,
163//! };
164//!
165//! # fn example() -> Result<(), dryice::DryIceError> {
166//! let mut buf = Vec::new();
167//! let mut writer = DryIceWriter::builder()
168//!     .inner(&mut buf)
169//!     .bytes16_key()
170//!     .empty_payload()
171//!     .build();
172//! writer.write_key_only(&Bytes16Key(*b"0000000000000001"))?;
173//! writer.finish()?;
174//!
175//! let mut reader = DryIceReader::builder()
176//!     .inner(buf.as_slice())
177//!     .sequence_codec::<OmittedSequenceCodec>()
178//!     .quality_codec::<OmittedQualityCodec>()
179//!     .name_codec::<OmittedNameCodec>()
180//!     .record_key::<Bytes16Key>()
181//!     .build()?;
182//!
183//! while let Some(key) = reader.next_key()? {
184//!     let _ = key;
185//! }
186//! # Ok(())
187//! # }
188//! ```
189//!
190//! # Reading records (convenience iterator)
191//!
192//! ```
193//! use dryice::{DryIceReader, DryIceWriter, SeqRecord};
194//!
195//! # fn example() -> Result<(), dryice::DryIceError> {
196//! let mut buf = Vec::new();
197//! let mut writer = DryIceWriter::builder().inner(&mut buf).build();
198//! let record = SeqRecord::new(
199//!     b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
200//! )?;
201//! writer.write_record(&record)?;
202//! writer.finish()?;
203//!
204//! let reader = DryIceReader::new(buf.as_slice())?;
205//! for record in reader.into_records() {
206//!     let record = record?;
207//!     println!("{}", record);
208//! }
209//! # Ok(())
210//! # }
211//! ```
212//!
213//! # Zero-copy reader-to-writer piping
214//!
215//! ```
216//! use dryice::{DryIceReader, DryIceWriter, SeqRecord, SeqRecordLike};
217//!
218//! # fn example() -> Result<(), dryice::DryIceError> {
219//! let mut buf1 = Vec::new();
220//! let mut writer1 = DryIceWriter::builder().inner(&mut buf1).build();
221//! let record = SeqRecord::new(
222//!     b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
223//! )?;
224//! writer1.write_record(&record)?;
225//! writer1.finish()?;
226//!
227//! let mut buf2 = Vec::new();
228//! let mut reader = DryIceReader::new(buf1.as_slice())?;
229//! let mut writer2 = DryIceWriter::builder().inner(&mut buf2).build();
230//! while reader.next_record()? {
231//!     writer2.write_record(&reader)?;
232//! }
233//! writer2.finish()?;
234//! # Ok(())
235//! # }
236//! ```
237//!
238//! # Temporary file lifecycle
239//!
240//! For filesystem-backed intermediate data, prefer letting `dryice` create and
241//! own the temporary file. [`TempDryIceFile`] composes with the normal
242//! stream-oriented reader and writer APIs, but removes the backing file by
243//! default when the guard is cleaned up or dropped.
244//!
245//! ```
246//! use std::io::{Seek, SeekFrom};
247//!
248//! use dryice::{DryIceReader, DryIceWriter, SeqRecord, TempDryIceFile};
249//!
250//! # fn example() -> Result<(), dryice::DryIceError> {
251//! let temp = TempDryIceFile::new()?;
252//!
253//! let mut file = {
254//!     let file = temp.open()?;
255//!     let mut writer = DryIceWriter::builder().inner(file).build();
256//!     let record = SeqRecord::new(b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec())?;
257//!     writer.write_record(&record)?;
258//!     writer.finish()?
259//! };
260//!
261//! file.seek(SeekFrom::Start(0))?;
262//! let mut reader = DryIceReader::new(file)?;
263//! while reader.next_record()? {
264//!     // use the current record
265//! }
266//!
267//! temp.cleanup()?;
268//! # Ok(())
269//! # }
270//! ```
271//!
272//! # Reading with non-default codecs
273//!
274//! ```
275//! use dryice::{
276//!     BinnedQualityCodec, DryIceReader, DryIceWriter, SeqRecord,
277//!     SeqRecordLike, SplitNameCodec, TwoBitExactCodec,
278//! };
279//!
280//! # fn example() -> Result<(), dryice::DryIceError> {
281//! let mut buf = Vec::new();
282//! let mut writer = DryIceWriter::builder()
283//!     .inner(&mut buf)
284//!     .two_bit_exact()
285//!     .binned_quality()
286//!     .split_names()
287//!     .build();
288//! let record = SeqRecord::new(
289//!     b"instrument:run 1:N:0".to_vec(),
290//!     b"ACGT".to_vec(),
291//!     b"!!!!".to_vec(),
292//! )?;
293//! writer.write_record(&record)?;
294//! writer.finish()?;
295//!
296//! let mut reader = DryIceReader::with_codecs::<
297//!     TwoBitExactCodec,
298//!     BinnedQualityCodec,
299//!     SplitNameCodec,
300//! >(buf.as_slice())?;
301//! while reader.next_record()? {
302//!     let _seq = reader.sequence();
303//! }
304//! # Ok(())
305//! # }
306//! ```
307//!
308//! # Custom codec implementation
309//!
310//! ```
311//! use dryice::{DryIceError, SequenceCodec};
312//!
313//! struct UppercaseCodec;
314//!
315//! impl SequenceCodec for UppercaseCodec {
316//!     const TYPE_TAG: [u8; 16] = *b"demo:seq:upper!!";
317//!     const LOSSY: bool = true;
318//!
319//!     fn encode_into(sequence: &[u8], output: &mut Vec<u8>) -> Result<(), DryIceError> {
320//!         output.extend(sequence.iter().map(u8::to_ascii_uppercase));
321//!         Ok(())
322//!     }
323//!
324//!     fn decode_into(
325//!         encoded: &[u8],
326//!         _original_len: usize,
327//!         output: &mut Vec<u8>,
328//!     ) -> Result<(), DryIceError> {
329//!         output.extend_from_slice(encoded);
330//!         Ok(())
331//!     }
332//! }
333//! ```
334
335#[cfg(feature = "async")]
336pub mod async_io;
337mod block;
338pub mod config;
339mod error;
340pub mod fields;
341mod format;
342mod io;
343pub mod key;
344#[cfg(feature = "mmap")]
345pub mod mmap_io;
346mod record;
347pub mod temp;
348
349#[cfg(feature = "async")]
350pub use async_io::{AsyncDryIceReader, AsyncDryIceWriter};
351pub use block::{
352    name::{NameCodec, OmittedNameCodec, RawNameCodec, SplitNameCodec},
353    quality::{BinnedQualityCodec, OmittedQualityCodec, QualityCodec, RawQualityCodec},
354    sequence::{
355        OmittedSequenceCodec, RawAsciiCodec, SequenceCodec, TwoBitExactCodec, TwoBitLossyNCodec,
356    },
357};
358pub use config::{BlockLayoutOptions, BlockSizePolicy, DryIceWriterOptions};
359pub use error::DryIceError;
360pub use io::{DryIceReader, DryIceRecords, DryIceWriter, SelectedDryIceReader, SelectedRecord};
361pub use key::{Bytes8Key, Bytes16Key, KmerKey, Minimizer64, NoRecordKey, PrefixKmer64, RecordKey};
362pub type DefaultPrefixKmer64 = PrefixKmer64<31>;
363pub type DefaultMinimizer64 = Minimizer64<31, 15>;
364#[cfg(feature = "mmap")]
365pub use mmap_io::MmapDryIceReader;
366pub use record::{EMPTY_RECORD, EmptyRecord, SeqRecord, SeqRecordExt, SeqRecordLike};
367pub use temp::TempDryIceFile;