Skip to main content

dryice/io/
writer.rs

1//! Writer for the `dryice` format.
2
3use std::{io::Write, marker::PhantomData};
4
5use crate::{
6    block::{
7        BlockBuilder, BlockBuilderConfig,
8        name::{NameCodec, OmittedNameCodec, RawNameCodec, SplitNameCodec},
9        quality::{BinnedQualityCodec, OmittedQualityCodec, QualityCodec, RawQualityCodec},
10        sequence::{
11            OmittedSequenceCodec, RawAsciiCodec, SequenceCodec, TwoBitExactCodec, TwoBitLossyNCodec,
12        },
13    },
14    config::{BlockLayoutOptions, BlockSizePolicy, DryIceWriterOptions},
15    error::DryIceError,
16    format,
17    key::{Bytes8Key, Bytes16Key, Minimizer64, NoRecordKey, PrefixKmer64, RecordKey},
18    record::{EMPTY_RECORD, SeqRecordLike},
19};
20
21/// Private marker type used to track a missing writer target in the builder.
22pub struct MissingInner;
23
24/// Builder for [`DryIceWriter`].
25pub struct DryIceWriterBuilder<
26    W = MissingInner,
27    S = RawAsciiCodec,
28    Q = RawQualityCodec,
29    N = RawNameCodec,
30    K = NoRecordKey,
31> {
32    inner: W,
33    target_block_records: usize,
34    _codec: PhantomData<S>,
35    _quality: PhantomData<Q>,
36    _name: PhantomData<N>,
37    _key: PhantomData<K>,
38}
39
40impl DryIceWriterBuilder<MissingInner, RawAsciiCodec, RawQualityCodec, RawNameCodec, NoRecordKey> {
41    fn new() -> Self {
42        Self {
43            inner: MissingInner,
44            target_block_records: 8192,
45            _codec: PhantomData,
46            _quality: PhantomData,
47            _name: PhantomData,
48            _key: PhantomData,
49        }
50    }
51}
52
53impl<W, S, Q, N, K> DryIceWriterBuilder<W, S, Q, N, K> {
54    /// Set the block size policy in records.
55    #[must_use]
56    pub fn target_block_records(mut self, n: usize) -> Self {
57        self.target_block_records = n;
58        self
59    }
60}
61
62impl<S, Q, N, K> DryIceWriterBuilder<MissingInner, S, Q, N, K> {
63    /// Set the writer's output target.
64    #[must_use]
65    pub fn inner<W>(self, inner: W) -> DryIceWriterBuilder<W, S, Q, N, K> {
66        DryIceWriterBuilder {
67            inner,
68            target_block_records: self.target_block_records,
69            _codec: PhantomData,
70            _quality: PhantomData,
71            _name: PhantomData,
72            _key: PhantomData,
73        }
74    }
75}
76
77impl<W, Q, N, K> DryIceWriterBuilder<W, RawAsciiCodec, Q, N, K> {
78    /// Configure the writer to use a user-defined sequence codec.
79    #[must_use]
80    pub fn sequence_codec<S: SequenceCodec>(self) -> DryIceWriterBuilder<W, S, Q, N, K> {
81        DryIceWriterBuilder {
82            inner: self.inner,
83            target_block_records: self.target_block_records,
84            _codec: PhantomData,
85            _quality: PhantomData,
86            _name: PhantomData,
87            _key: PhantomData,
88        }
89    }
90
91    /// Configure the writer to use the built-in 2-bit exact codec.
92    #[must_use]
93    pub fn two_bit_exact(self) -> DryIceWriterBuilder<W, TwoBitExactCodec, Q, N, K> {
94        self.sequence_codec::<TwoBitExactCodec>()
95    }
96
97    /// Configure the writer to use the built-in lossy 2-bit codec
98    /// that collapses all ambiguous bases to N.
99    #[must_use]
100    pub fn two_bit_lossy_n(self) -> DryIceWriterBuilder<W, TwoBitLossyNCodec, Q, N, K> {
101        self.sequence_codec::<TwoBitLossyNCodec>()
102    }
103
104    /// Configure the writer to omit sequence data entirely.
105    ///
106    /// Records written through this builder must still satisfy the normal
107    /// sequence/quality length invariant, which for omitted sequence payloads
108    /// means writing empty sequence and quality fields.
109    #[must_use]
110    pub fn omit_sequence(self) -> DryIceWriterBuilder<W, OmittedSequenceCodec, Q, N, K> {
111        self.sequence_codec::<OmittedSequenceCodec>()
112    }
113}
114
115impl<W, K> DryIceWriterBuilder<W, RawAsciiCodec, RawQualityCodec, RawNameCodec, K> {
116    /// Configure the writer to omit names, sequences, and qualities.
117    ///
118    /// This is convenience sugar over the three orthogonal omission controls:
119    /// `omit_sequence()`, `omit_quality()`, and `omit_names()`.
120    #[must_use]
121    pub fn empty_payload(
122        self,
123    ) -> DryIceWriterBuilder<W, OmittedSequenceCodec, OmittedQualityCodec, OmittedNameCodec, K>
124    {
125        DryIceWriterBuilder {
126            inner: self.inner,
127            target_block_records: self.target_block_records,
128            _codec: PhantomData,
129            _quality: PhantomData,
130            _name: PhantomData,
131            _key: PhantomData,
132        }
133    }
134}
135
136impl<W, S, N, K> DryIceWriterBuilder<W, S, RawQualityCodec, N, K> {
137    /// Configure the writer to use a user-defined quality codec.
138    #[must_use]
139    pub fn quality_codec<Q: QualityCodec>(self) -> DryIceWriterBuilder<W, S, Q, N, K> {
140        DryIceWriterBuilder {
141            inner: self.inner,
142            target_block_records: self.target_block_records,
143            _codec: PhantomData,
144            _quality: PhantomData,
145            _name: PhantomData,
146            _key: PhantomData,
147        }
148    }
149
150    /// Configure the writer to use the built-in binned quality codec.
151    #[must_use]
152    pub fn binned_quality(self) -> DryIceWriterBuilder<W, S, BinnedQualityCodec, N, K> {
153        self.quality_codec::<BinnedQualityCodec>()
154    }
155
156    /// Configure the writer to omit quality scores entirely.
157    #[must_use]
158    pub fn omit_quality(self) -> DryIceWriterBuilder<W, S, OmittedQualityCodec, N, K> {
159        self.quality_codec::<OmittedQualityCodec>()
160    }
161}
162
163impl<W, S, Q, K> DryIceWriterBuilder<W, S, Q, RawNameCodec, K> {
164    /// Configure the writer to use a user-defined name codec.
165    #[must_use]
166    pub fn name_codec<N: NameCodec>(self) -> DryIceWriterBuilder<W, S, Q, N, K> {
167        DryIceWriterBuilder {
168            inner: self.inner,
169            target_block_records: self.target_block_records,
170            _codec: PhantomData,
171            _quality: PhantomData,
172            _name: PhantomData,
173            _key: PhantomData,
174        }
175    }
176
177    /// Configure the writer to omit names entirely.
178    #[must_use]
179    pub fn omit_names(self) -> DryIceWriterBuilder<W, S, Q, OmittedNameCodec, K> {
180        self.name_codec::<OmittedNameCodec>()
181    }
182
183    /// Configure the writer to split names on the first space.
184    #[must_use]
185    pub fn split_names(self) -> DryIceWriterBuilder<W, S, Q, SplitNameCodec, K> {
186        self.name_codec::<SplitNameCodec>()
187    }
188}
189
190impl<W, S, Q, N> DryIceWriterBuilder<W, S, Q, N, NoRecordKey> {
191    /// Configure the writer to store a user-defined record-key type.
192    #[must_use]
193    pub fn record_key<K: RecordKey>(self) -> DryIceWriterBuilder<W, S, Q, N, K> {
194        DryIceWriterBuilder {
195            inner: self.inner,
196            target_block_records: self.target_block_records,
197            _codec: PhantomData,
198            _quality: PhantomData,
199            _name: PhantomData,
200            _key: PhantomData,
201        }
202    }
203
204    /// Configure the writer to store the built-in 8-byte key type.
205    #[must_use]
206    pub fn bytes8_key(self) -> DryIceWriterBuilder<W, S, Q, N, Bytes8Key> {
207        self.record_key::<Bytes8Key>()
208    }
209
210    /// Configure the writer to store the built-in 16-byte key type.
211    #[must_use]
212    pub fn bytes16_key(self) -> DryIceWriterBuilder<W, S, Q, N, Bytes16Key> {
213        self.record_key::<Bytes16Key>()
214    }
215
216    /// Configure the writer to store packed canonical prefix kmer keys.
217    ///
218    /// This is convenience sugar for `record_key::<PrefixKmer64<K>>()` and does
219    /// not otherwise change payload layout or writing semantics.
220    #[must_use]
221    pub fn prefix_kmer_key<const K: u8>(self) -> DryIceWriterBuilder<W, S, Q, N, PrefixKmer64<K>> {
222        self.record_key::<PrefixKmer64<K>>()
223    }
224
225    /// Configure the writer to store packed canonical minimizer keys.
226    ///
227    /// This is convenience sugar for `record_key::<Minimizer64<K, WN>>()` and
228    /// does not otherwise change payload layout or writing semantics.
229    #[must_use]
230    pub fn minimizer_key<const K: u8, const WN: u8>(
231        self,
232    ) -> DryIceWriterBuilder<W, S, Q, N, Minimizer64<K, WN>> {
233        self.record_key::<Minimizer64<K, WN>>()
234    }
235
236    /// Configure the writer to store prefix kmer keys using the library's default parameters.
237    ///
238    /// The current default is `PrefixKmer64<31>`.
239    #[must_use]
240    pub fn prefix_kmer_key_default(self) -> DryIceWriterBuilder<W, S, Q, N, PrefixKmer64<31>> {
241        self.prefix_kmer_key::<31>()
242    }
243
244    /// Configure the writer to store minimizer keys using the library's default parameters.
245    ///
246    /// The current default is `Minimizer64<31, 15>`.
247    #[must_use]
248    pub fn minimizer_key_default(self) -> DryIceWriterBuilder<W, S, Q, N, Minimizer64<31, 15>> {
249        self.minimizer_key::<31, 15>()
250    }
251}
252
253impl<W> DryIceWriterBuilder<W, RawAsciiCodec, RawQualityCodec, RawNameCodec, NoRecordKey> {
254    /// Configure the writer for key-only prefix-kmer output using the default parameters.
255    ///
256    /// This preset selects `PrefixKmer64<31>` and applies `empty_payload()`. It
257    /// does not derive keys automatically; callers still write through the keyed
258    /// writer APIs such as `write_key_only()` or `write_record_with_key()`.
259    #[must_use]
260    pub fn prefix_kmers(
261        self,
262    ) -> DryIceWriterBuilder<
263        W,
264        OmittedSequenceCodec,
265        OmittedQualityCodec,
266        OmittedNameCodec,
267        PrefixKmer64<31>,
268    > {
269        self.prefix_kmer_key_default().empty_payload()
270    }
271
272    /// Configure the writer for prefix-kmer output while retaining sequences.
273    ///
274    /// This preset selects `PrefixKmer64<31>`, retains sequences, and omits
275    /// names and qualities. Callers still supply or derive the key explicitly.
276    #[must_use]
277    pub fn prefix_kmers_with_sequences(
278        self,
279    ) -> DryIceWriterBuilder<
280        W,
281        RawAsciiCodec,
282        OmittedQualityCodec,
283        OmittedNameCodec,
284        PrefixKmer64<31>,
285    > {
286        self.prefix_kmer_key_default().omit_quality().omit_names()
287    }
288
289    /// Configure the writer for prefix-kmer output while retaining names.
290    ///
291    /// This preset selects `PrefixKmer64<31>`, retains names, and omits
292    /// sequences and qualities. Callers still supply or derive the key
293    /// explicitly.
294    #[must_use]
295    pub fn prefix_kmers_with_names(
296        self,
297    ) -> DryIceWriterBuilder<
298        W,
299        OmittedSequenceCodec,
300        OmittedQualityCodec,
301        RawNameCodec,
302        PrefixKmer64<31>,
303    > {
304        self.prefix_kmer_key_default()
305            .omit_sequence()
306            .omit_quality()
307    }
308
309    /// Configure the writer for key-only minimizer output using the default parameters.
310    ///
311    /// This preset selects `Minimizer64<31, 15>` and applies `empty_payload()`.
312    /// It does not derive keys automatically; callers still write through the
313    /// keyed writer APIs such as `write_key_only()` or `write_record_with_key()`.
314    #[must_use]
315    pub fn minimizers(
316        self,
317    ) -> DryIceWriterBuilder<
318        W,
319        OmittedSequenceCodec,
320        OmittedQualityCodec,
321        OmittedNameCodec,
322        Minimizer64<31, 15>,
323    > {
324        self.minimizer_key_default().empty_payload()
325    }
326
327    /// Configure the writer for minimizer output while retaining sequences.
328    ///
329    /// This preset selects `Minimizer64<31, 15>`, retains sequences, and omits
330    /// names and qualities. Callers still supply or derive the key explicitly.
331    #[must_use]
332    pub fn minimizers_with_sequences(
333        self,
334    ) -> DryIceWriterBuilder<
335        W,
336        RawAsciiCodec,
337        OmittedQualityCodec,
338        OmittedNameCodec,
339        Minimizer64<31, 15>,
340    > {
341        self.minimizer_key_default().omit_quality().omit_names()
342    }
343
344    /// Configure the writer for minimizer output while retaining names.
345    ///
346    /// This preset selects `Minimizer64<31, 15>`, retains names, and omits
347    /// sequences and qualities. Callers still supply or derive the key
348    /// explicitly.
349    #[must_use]
350    pub fn minimizers_with_names(
351        self,
352    ) -> DryIceWriterBuilder<
353        W,
354        OmittedSequenceCodec,
355        OmittedQualityCodec,
356        RawNameCodec,
357        Minimizer64<31, 15>,
358    > {
359        self.minimizer_key_default().omit_sequence().omit_quality()
360    }
361}
362
363impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec>
364    DryIceWriterBuilder<W, S, Q, N, NoRecordKey>
365{
366    /// Build an unkeyed writer.
367    #[must_use]
368    pub fn build(self) -> DryIceWriter<W, S, Q, N, NoRecordKey> {
369        DryIceWriter::new_unkeyed(self.inner, self.target_block_records)
370    }
371}
372
373impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K: RecordKey>
374    DryIceWriterBuilder<W, S, Q, N, K>
375{
376    /// Build a keyed writer.
377    #[must_use]
378    pub fn build(self) -> DryIceWriter<W, S, Q, N, K> {
379        DryIceWriter::new_keyed(self.inner, self.target_block_records)
380    }
381}
382
383#[cfg(feature = "async")]
384impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec>
385    DryIceWriterBuilder<W, S, Q, N, NoRecordKey>
386{
387    /// Build an unkeyed async writer.
388    #[must_use]
389    pub fn build_async(self) -> crate::async_io::AsyncDryIceWriter<W, S, Q, N, NoRecordKey> {
390        crate::async_io::AsyncDryIceWriter::new_unkeyed(self.inner, self.target_block_records)
391    }
392}
393
394#[cfg(feature = "async")]
395impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K: RecordKey>
396    DryIceWriterBuilder<W, S, Q, N, K>
397{
398    /// Build a keyed async writer.
399    #[must_use]
400    pub fn build_async(self) -> crate::async_io::AsyncDryIceWriter<W, S, Q, N, K> {
401        crate::async_io::AsyncDryIceWriter::new_keyed(self.inner, self.target_block_records)
402    }
403}
404
405/// Writes sequencing records into the `dryice` block-oriented format.
406pub struct DryIceWriter<
407    W,
408    S: SequenceCodec = RawAsciiCodec,
409    Q: QualityCodec = RawQualityCodec,
410    N: NameCodec = RawNameCodec,
411    K = NoRecordKey,
412> {
413    inner: W,
414    target_block_records: usize,
415    block_builder: BlockBuilder<S, Q, N>,
416    header_written: bool,
417    _key: PhantomData<K>,
418}
419
420impl DryIceWriter<MissingInner, RawAsciiCodec, RawQualityCodec, RawNameCodec, NoRecordKey> {
421    /// Start building a new writer.
422    #[must_use]
423    pub fn builder()
424    -> DryIceWriterBuilder<MissingInner, RawAsciiCodec, RawQualityCodec, RawNameCodec, NoRecordKey>
425    {
426        DryIceWriterBuilder::new()
427    }
428}
429
430impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec> DryIceWriter<W, S, Q, N, NoRecordKey> {
431    fn new_unkeyed(inner: W, target_block_records: usize) -> Self {
432        let block_builder = BlockBuilder::new(&BlockBuilderConfig {
433            record_key_width: None,
434            record_key_tag: None,
435            target_records: target_block_records,
436        });
437
438        Self {
439            inner,
440            target_block_records,
441            block_builder,
442            header_written: false,
443            _key: PhantomData,
444        }
445    }
446}
447
448impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K: RecordKey> DryIceWriter<W, S, Q, N, K> {
449    fn new_keyed(inner: W, target_block_records: usize) -> Self {
450        let block_builder = BlockBuilder::new(&BlockBuilderConfig {
451            record_key_width: Some(K::WIDTH),
452            record_key_tag: Some(K::TYPE_TAG),
453            target_records: target_block_records,
454        });
455
456        Self {
457            inner,
458            target_block_records,
459            block_builder,
460            header_written: false,
461            _key: PhantomData,
462        }
463    }
464}
465
466impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K> DryIceWriter<W, S, Q, N, K> {
467    fn ensure_header_written(&mut self) -> Result<(), DryIceError>
468    where
469        W: Write,
470    {
471        if !self.header_written {
472            format::write_file_header(&mut self.inner)?;
473            self.header_written = true;
474        }
475        Ok(())
476    }
477}
478
479impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec>
480    DryIceWriter<W, S, Q, N, NoRecordKey>
481{
482    /// Create an unkeyed writer from a pre-built options struct.
483    ///
484    /// # Errors
485    ///
486    /// Returns an error if the options request an unsupported block-size policy.
487    pub fn from_options(inner: W, options: &DryIceWriterOptions) -> Result<Self, DryIceError> {
488        let target_block_records = match options.layout.block_size {
489            BlockSizePolicy::TargetRecords(n) => n,
490            BlockSizePolicy::TargetBytes(_) => {
491                return Err(DryIceError::InvalidWriterConfiguration(
492                    "TargetBytes block size policy is not yet supported",
493                ));
494            },
495        };
496
497        Ok(Self::new_unkeyed(inner, target_block_records))
498    }
499
500    /// Assemble the current configuration into an options struct.
501    #[must_use]
502    pub fn options(&self) -> DryIceWriterOptions {
503        DryIceWriterOptions {
504            layout: BlockLayoutOptions {
505                block_size: BlockSizePolicy::TargetRecords(self.target_block_records),
506            },
507        }
508    }
509
510    /// Write a single sequencing record to an unkeyed writer.
511    ///
512    /// # Errors
513    ///
514    /// Returns an error if the record fails validation, if the file header cannot
515    /// be written, or if flushing the current block fails.
516    pub fn write_record<R: SeqRecordLike>(&mut self, record: &R) -> Result<(), DryIceError> {
517        self.ensure_header_written()?;
518        self.block_builder.push_record(record)?;
519
520        if self.block_builder.should_flush() {
521            self.flush_block()?;
522        }
523
524        Ok(())
525    }
526}
527
528impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K: RecordKey>
529    DryIceWriter<W, S, Q, N, K>
530{
531    /// Write a single sequencing record together with its accelerator key.
532    ///
533    /// # Errors
534    ///
535    /// Returns an error if the record fails validation, if the key cannot be
536    /// encoded, if the file header cannot be written, or if flushing the current
537    /// block fails.
538    pub fn write_record_with_key<R: SeqRecordLike>(
539        &mut self,
540        record: &R,
541        key: &K,
542    ) -> Result<(), DryIceError> {
543        self.ensure_header_written()?;
544        self.block_builder.push_record_with_key(record, key)?;
545
546        if self.block_builder.should_flush() {
547            self.flush_block()?;
548        }
549
550        Ok(())
551    }
552
553    /// Write a key-only record with empty payload fields.
554    ///
555    /// # Errors
556    ///
557    /// Returns an error if the file header cannot be written, if the key cannot
558    /// be encoded, or if flushing the current block fails.
559    pub fn write_key_only(&mut self, key: &K) -> Result<(), DryIceError> {
560        self.write_record_with_key(&EMPTY_RECORD, key)
561    }
562}
563
564impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K> DryIceWriter<W, S, Q, N, K> {
565    /// Flush any remaining buffered records and finalize the file.
566    ///
567    /// # Errors
568    ///
569    /// Returns an error if writing the file header or flushing the final block
570    /// fails.
571    pub fn finish(mut self) -> Result<W, DryIceError> {
572        self.ensure_header_written()?;
573
574        if !self.block_builder.is_empty() {
575            self.flush_block()?;
576        }
577
578        Ok(self.inner)
579    }
580
581    fn flush_block(&mut self) -> Result<(), DryIceError> {
582        self.block_builder.write_block(&mut self.inner)?;
583        Ok(())
584    }
585}