dryice/lib.rs
1//! High-throughput transient container for read-like genomic records.
2//!
3//! `dryice` is a block-oriented temporary storage format optimized for
4//! workflows where sequencing records need to move to disk and back
5//! quickly, especially external sorting, partitioning, and other
6//! out-of-core genomics pipelines.
7//!
8//! The crate is parser-agnostic: any type implementing [`SeqRecordLike`]
9//! can be written into a `dryice` file, and records are read back as
10//! borrowed slices with no per-record allocation. Sequence, quality, and
11//! name encodings are selected via trait-based codec type parameters,
12//! and users can implement their own codecs.
13//!
14//! # Writing records (default codecs)
15//!
16//! ```
17//! use dryice::{DryIceWriter, SeqRecord, SeqRecordLike};
18//!
19//! # fn example() -> Result<(), dryice::DryIceError> {
20//! let mut buf = Vec::new();
21//! let mut writer = DryIceWriter::builder()
22//! .inner(&mut buf)
23//! .build();
24//!
25//! let record = SeqRecord::new(
26//! b"read1".to_vec(),
27//! b"ACGTACGT".to_vec(),
28//! b"!!!!!!!!".to_vec(),
29//! )?;
30//! writer.write_record(&record)?;
31//! writer.finish()?;
32//! # Ok(())
33//! # }
34//! ```
35//!
36//! # Writing with compact codecs
37//!
38//! ```
39//! use dryice::{DryIceWriter, SeqRecord};
40//!
41//! # fn example() -> Result<(), dryice::DryIceError> {
42//! let mut buf = Vec::new();
43//! let mut writer = DryIceWriter::builder()
44//! .inner(&mut buf)
45//! .two_bit_exact()
46//! .binned_quality()
47//! .split_names()
48//! .target_block_records(4096)
49//! .build();
50//!
51//! let record = SeqRecord::new(
52//! b"instrument:run:flowcell 1:N:0:ATCACG".to_vec(),
53//! b"ACGTACGT".to_vec(),
54//! b"!!!!!!!!".to_vec(),
55//! )?;
56//! writer.write_record(&record)?;
57//! writer.finish()?;
58//! # Ok(())
59//! # }
60//! ```
61//!
62//! # Writing with record keys
63//!
64//! ```
65//! use dryice::{Bytes8Key, DryIceWriter, SeqRecord};
66//!
67//! # fn example() -> Result<(), dryice::DryIceError> {
68//! let mut buf = Vec::new();
69//! let mut writer = DryIceWriter::builder()
70//! .inner(&mut buf)
71//! .bytes8_key()
72//! .build();
73//!
74//! let record = SeqRecord::new(
75//! b"read1".to_vec(),
76//! b"ACGTACGT".to_vec(),
77//! b"!!!!!!!!".to_vec(),
78//! )?;
79//! let key = Bytes8Key(*b"sortkey!");
80//! writer.write_record_with_key(&record, &key)?;
81//! writer.finish()?;
82//! # Ok(())
83//! # }
84//! ```
85//!
86//! # Writing key-only files with empty payload
87//!
88//! ```
89//! use dryice::{Bytes16Key, DryIceWriter};
90//!
91//! # fn example() -> Result<(), dryice::DryIceError> {
92//! let mut buf = Vec::new();
93//! let mut writer = DryIceWriter::builder()
94//! .inner(&mut buf)
95//! .bytes16_key()
96//! .empty_payload()
97//! .build();
98//!
99//! writer.write_key_only(&Bytes16Key(*b"0000000000000001"))?;
100//! writer.write_key_only(&Bytes16Key(*b"0000000000000002"))?;
101//! writer.finish()?;
102//! # Ok(())
103//! # }
104//! ```
105//!
106//! # Writing minimizer keys with the builder conveniences
107//!
108//! ```
109//! use dryice::{DefaultMinimizer64, DryIceWriter, SeqRecord};
110//!
111//! # fn example() -> Result<(), dryice::DryIceError> {
112//! let mut buf = Vec::new();
113//! let mut writer = DryIceWriter::builder()
114//! .inner(&mut buf)
115//! .minimizers_with_sequences()
116//! .build();
117//!
118//! let record = SeqRecord::new(
119//! b"read1".to_vec(),
120//! b"ACGTGCTCAGAGACTCAGAGGATTACAGTTTACGTGCTCAGAGACTCAGAGGA".to_vec(),
121//! vec![b'!'; 53],
122//! )?;
123//!
124//! if let Some(key) = DefaultMinimizer64::try_from_sequence(record.sequence())? {
125//! writer.write_record_with_key(&record, &key)?;
126//! }
127//!
128//! writer.finish()?;
129//! # Ok(())
130//! # }
131//! ```
132//!
133//! # Reading records (zero-copy)
134//!
135//! ```
136//! use dryice::{DryIceReader, DryIceWriter, SeqRecord, SeqRecordLike};
137//!
138//! # fn example() -> Result<(), dryice::DryIceError> {
139//! let mut buf = Vec::new();
140//! let mut writer = DryIceWriter::builder().inner(&mut buf).build();
141//! let record = SeqRecord::new(
142//! b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
143//! )?;
144//! writer.write_record(&record)?;
145//! writer.finish()?;
146//!
147//! let mut reader = DryIceReader::new(buf.as_slice())?;
148//! while reader.next_record()? {
149//! let _name = reader.name();
150//! let _seq = reader.sequence();
151//! let _qual = reader.quality();
152//! }
153//! # Ok(())
154//! # }
155//! ```
156//!
157//! # Reading keys directly
158//!
159//! ```
160//! use dryice::{
161//! Bytes16Key, DryIceReader, DryIceWriter, OmittedNameCodec, OmittedQualityCodec,
162//! OmittedSequenceCodec,
163//! };
164//!
165//! # fn example() -> Result<(), dryice::DryIceError> {
166//! let mut buf = Vec::new();
167//! let mut writer = DryIceWriter::builder()
168//! .inner(&mut buf)
169//! .bytes16_key()
170//! .empty_payload()
171//! .build();
172//! writer.write_key_only(&Bytes16Key(*b"0000000000000001"))?;
173//! writer.finish()?;
174//!
175//! let mut reader = DryIceReader::builder()
176//! .inner(buf.as_slice())
177//! .sequence_codec::<OmittedSequenceCodec>()
178//! .quality_codec::<OmittedQualityCodec>()
179//! .name_codec::<OmittedNameCodec>()
180//! .record_key::<Bytes16Key>()
181//! .build()?;
182//!
183//! while let Some(key) = reader.next_key()? {
184//! let _ = key;
185//! }
186//! # Ok(())
187//! # }
188//! ```
189//!
190//! # Reading records (convenience iterator)
191//!
192//! ```
193//! use dryice::{DryIceReader, DryIceWriter, SeqRecord};
194//!
195//! # fn example() -> Result<(), dryice::DryIceError> {
196//! let mut buf = Vec::new();
197//! let mut writer = DryIceWriter::builder().inner(&mut buf).build();
198//! let record = SeqRecord::new(
199//! b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
200//! )?;
201//! writer.write_record(&record)?;
202//! writer.finish()?;
203//!
204//! let reader = DryIceReader::new(buf.as_slice())?;
205//! for record in reader.into_records() {
206//! let record = record?;
207//! println!("{}", record);
208//! }
209//! # Ok(())
210//! # }
211//! ```
212//!
213//! # Zero-copy reader-to-writer piping
214//!
215//! ```
216//! use dryice::{DryIceReader, DryIceWriter, SeqRecord, SeqRecordLike};
217//!
218//! # fn example() -> Result<(), dryice::DryIceError> {
219//! let mut buf1 = Vec::new();
220//! let mut writer1 = DryIceWriter::builder().inner(&mut buf1).build();
221//! let record = SeqRecord::new(
222//! b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec()
223//! )?;
224//! writer1.write_record(&record)?;
225//! writer1.finish()?;
226//!
227//! let mut buf2 = Vec::new();
228//! let mut reader = DryIceReader::new(buf1.as_slice())?;
229//! let mut writer2 = DryIceWriter::builder().inner(&mut buf2).build();
230//! while reader.next_record()? {
231//! writer2.write_record(&reader)?;
232//! }
233//! writer2.finish()?;
234//! # Ok(())
235//! # }
236//! ```
237//!
238//! # Temporary file lifecycle
239//!
240//! For filesystem-backed intermediate data, prefer letting `dryice` create and
241//! own the temporary file. [`TempDryIceFile`] composes with the normal
242//! stream-oriented reader and writer APIs, but removes the backing file by
243//! default when the guard is cleaned up or dropped.
244//!
245//! ```
246//! use std::io::{Seek, SeekFrom};
247//!
248//! use dryice::{DryIceReader, DryIceWriter, SeqRecord, TempDryIceFile};
249//!
250//! # fn example() -> Result<(), dryice::DryIceError> {
251//! let temp = TempDryIceFile::new()?;
252//!
253//! let mut file = {
254//! let file = temp.open()?;
255//! let mut writer = DryIceWriter::builder().inner(file).build();
256//! let record = SeqRecord::new(b"r1".to_vec(), b"ACGT".to_vec(), b"!!!!".to_vec())?;
257//! writer.write_record(&record)?;
258//! writer.finish()?
259//! };
260//!
261//! file.seek(SeekFrom::Start(0))?;
262//! let mut reader = DryIceReader::new(file)?;
263//! while reader.next_record()? {
264//! // use the current record
265//! }
266//!
267//! temp.cleanup()?;
268//! # Ok(())
269//! # }
270//! ```
271//!
272//! # Reading with non-default codecs
273//!
274//! ```
275//! use dryice::{
276//! BinnedQualityCodec, DryIceReader, DryIceWriter, SeqRecord,
277//! SeqRecordLike, SplitNameCodec, TwoBitExactCodec,
278//! };
279//!
280//! # fn example() -> Result<(), dryice::DryIceError> {
281//! let mut buf = Vec::new();
282//! let mut writer = DryIceWriter::builder()
283//! .inner(&mut buf)
284//! .two_bit_exact()
285//! .binned_quality()
286//! .split_names()
287//! .build();
288//! let record = SeqRecord::new(
289//! b"instrument:run 1:N:0".to_vec(),
290//! b"ACGT".to_vec(),
291//! b"!!!!".to_vec(),
292//! )?;
293//! writer.write_record(&record)?;
294//! writer.finish()?;
295//!
296//! let mut reader = DryIceReader::with_codecs::<
297//! TwoBitExactCodec,
298//! BinnedQualityCodec,
299//! SplitNameCodec,
300//! >(buf.as_slice())?;
301//! while reader.next_record()? {
302//! let _seq = reader.sequence();
303//! }
304//! # Ok(())
305//! # }
306//! ```
307//!
308//! # Custom codec implementation
309//!
310//! ```
311//! use dryice::{DryIceError, SequenceCodec};
312//!
313//! struct UppercaseCodec;
314//!
315//! impl SequenceCodec for UppercaseCodec {
316//! const TYPE_TAG: [u8; 16] = *b"demo:seq:upper!!";
317//! const LOSSY: bool = true;
318//!
319//! fn encode_into(sequence: &[u8], output: &mut Vec<u8>) -> Result<(), DryIceError> {
320//! output.extend(sequence.iter().map(u8::to_ascii_uppercase));
321//! Ok(())
322//! }
323//!
324//! fn decode_into(
325//! encoded: &[u8],
326//! _original_len: usize,
327//! output: &mut Vec<u8>,
328//! ) -> Result<(), DryIceError> {
329//! output.extend_from_slice(encoded);
330//! Ok(())
331//! }
332//! }
333//! ```
334
335#[cfg(feature = "async")]
336pub mod async_io;
337mod block;
338pub mod config;
339mod error;
340pub mod fields;
341mod format;
342mod io;
343pub mod key;
344#[cfg(feature = "mmap")]
345pub mod mmap_io;
346mod record;
347pub mod temp;
348
349#[cfg(feature = "async")]
350pub use async_io::{AsyncDryIceReader, AsyncDryIceWriter};
351pub use block::{
352 name::{NameCodec, OmittedNameCodec, RawNameCodec, SplitNameCodec},
353 quality::{BinnedQualityCodec, OmittedQualityCodec, QualityCodec, RawQualityCodec},
354 sequence::{
355 OmittedSequenceCodec, RawAsciiCodec, SequenceCodec, TwoBitExactCodec, TwoBitLossyNCodec,
356 },
357};
358pub use config::{BlockLayoutOptions, BlockSizePolicy, DryIceWriterOptions};
359pub use error::DryIceError;
360pub use io::{DryIceReader, DryIceRecords, DryIceWriter, SelectedDryIceReader, SelectedRecord};
361pub use key::{Bytes8Key, Bytes16Key, KmerKey, Minimizer64, NoRecordKey, PrefixKmer64, RecordKey};
362pub type DefaultPrefixKmer64 = PrefixKmer64<31>;
363pub type DefaultMinimizer64 = Minimizer64<31, 15>;
364#[cfg(feature = "mmap")]
365pub use mmap_io::MmapDryIceReader;
366pub use record::{EMPTY_RECORD, EmptyRecord, SeqRecord, SeqRecordExt, SeqRecordLike};
367pub use temp::TempDryIceFile;