1use std::{io::Write, marker::PhantomData};
4
5use crate::{
6 block::{
7 BlockBuilder, BlockBuilderConfig,
8 name::{NameCodec, OmittedNameCodec, RawNameCodec, SplitNameCodec},
9 quality::{BinnedQualityCodec, OmittedQualityCodec, QualityCodec, RawQualityCodec},
10 sequence::{
11 OmittedSequenceCodec, RawAsciiCodec, SequenceCodec, TwoBitExactCodec, TwoBitLossyNCodec,
12 },
13 },
14 config::{BlockLayoutOptions, BlockSizePolicy, DryIceWriterOptions},
15 error::DryIceError,
16 format,
17 key::{Bytes8Key, Bytes16Key, Minimizer64, NoRecordKey, PrefixKmer64, RecordKey},
18 record::{EMPTY_RECORD, SeqRecordLike},
19};
20
21pub struct MissingInner;
23
24pub struct DryIceWriterBuilder<
26 W = MissingInner,
27 S = RawAsciiCodec,
28 Q = RawQualityCodec,
29 N = RawNameCodec,
30 K = NoRecordKey,
31> {
32 inner: W,
33 target_block_records: usize,
34 _codec: PhantomData<S>,
35 _quality: PhantomData<Q>,
36 _name: PhantomData<N>,
37 _key: PhantomData<K>,
38}
39
40impl DryIceWriterBuilder<MissingInner, RawAsciiCodec, RawQualityCodec, RawNameCodec, NoRecordKey> {
41 fn new() -> Self {
42 Self {
43 inner: MissingInner,
44 target_block_records: 8192,
45 _codec: PhantomData,
46 _quality: PhantomData,
47 _name: PhantomData,
48 _key: PhantomData,
49 }
50 }
51}
52
53impl<W, S, Q, N, K> DryIceWriterBuilder<W, S, Q, N, K> {
54 #[must_use]
56 pub fn target_block_records(mut self, n: usize) -> Self {
57 self.target_block_records = n;
58 self
59 }
60}
61
62impl<S, Q, N, K> DryIceWriterBuilder<MissingInner, S, Q, N, K> {
63 #[must_use]
65 pub fn inner<W>(self, inner: W) -> DryIceWriterBuilder<W, S, Q, N, K> {
66 DryIceWriterBuilder {
67 inner,
68 target_block_records: self.target_block_records,
69 _codec: PhantomData,
70 _quality: PhantomData,
71 _name: PhantomData,
72 _key: PhantomData,
73 }
74 }
75}
76
77impl<W, Q, N, K> DryIceWriterBuilder<W, RawAsciiCodec, Q, N, K> {
78 #[must_use]
80 pub fn sequence_codec<S: SequenceCodec>(self) -> DryIceWriterBuilder<W, S, Q, N, K> {
81 DryIceWriterBuilder {
82 inner: self.inner,
83 target_block_records: self.target_block_records,
84 _codec: PhantomData,
85 _quality: PhantomData,
86 _name: PhantomData,
87 _key: PhantomData,
88 }
89 }
90
91 #[must_use]
93 pub fn two_bit_exact(self) -> DryIceWriterBuilder<W, TwoBitExactCodec, Q, N, K> {
94 self.sequence_codec::<TwoBitExactCodec>()
95 }
96
97 #[must_use]
100 pub fn two_bit_lossy_n(self) -> DryIceWriterBuilder<W, TwoBitLossyNCodec, Q, N, K> {
101 self.sequence_codec::<TwoBitLossyNCodec>()
102 }
103
104 #[must_use]
110 pub fn omit_sequence(self) -> DryIceWriterBuilder<W, OmittedSequenceCodec, Q, N, K> {
111 self.sequence_codec::<OmittedSequenceCodec>()
112 }
113}
114
115impl<W, K> DryIceWriterBuilder<W, RawAsciiCodec, RawQualityCodec, RawNameCodec, K> {
116 #[must_use]
121 pub fn empty_payload(
122 self,
123 ) -> DryIceWriterBuilder<W, OmittedSequenceCodec, OmittedQualityCodec, OmittedNameCodec, K>
124 {
125 DryIceWriterBuilder {
126 inner: self.inner,
127 target_block_records: self.target_block_records,
128 _codec: PhantomData,
129 _quality: PhantomData,
130 _name: PhantomData,
131 _key: PhantomData,
132 }
133 }
134}
135
136impl<W, S, N, K> DryIceWriterBuilder<W, S, RawQualityCodec, N, K> {
137 #[must_use]
139 pub fn quality_codec<Q: QualityCodec>(self) -> DryIceWriterBuilder<W, S, Q, N, K> {
140 DryIceWriterBuilder {
141 inner: self.inner,
142 target_block_records: self.target_block_records,
143 _codec: PhantomData,
144 _quality: PhantomData,
145 _name: PhantomData,
146 _key: PhantomData,
147 }
148 }
149
150 #[must_use]
152 pub fn binned_quality(self) -> DryIceWriterBuilder<W, S, BinnedQualityCodec, N, K> {
153 self.quality_codec::<BinnedQualityCodec>()
154 }
155
156 #[must_use]
158 pub fn omit_quality(self) -> DryIceWriterBuilder<W, S, OmittedQualityCodec, N, K> {
159 self.quality_codec::<OmittedQualityCodec>()
160 }
161}
162
163impl<W, S, Q, K> DryIceWriterBuilder<W, S, Q, RawNameCodec, K> {
164 #[must_use]
166 pub fn name_codec<N: NameCodec>(self) -> DryIceWriterBuilder<W, S, Q, N, K> {
167 DryIceWriterBuilder {
168 inner: self.inner,
169 target_block_records: self.target_block_records,
170 _codec: PhantomData,
171 _quality: PhantomData,
172 _name: PhantomData,
173 _key: PhantomData,
174 }
175 }
176
177 #[must_use]
179 pub fn omit_names(self) -> DryIceWriterBuilder<W, S, Q, OmittedNameCodec, K> {
180 self.name_codec::<OmittedNameCodec>()
181 }
182
183 #[must_use]
185 pub fn split_names(self) -> DryIceWriterBuilder<W, S, Q, SplitNameCodec, K> {
186 self.name_codec::<SplitNameCodec>()
187 }
188}
189
190impl<W, S, Q, N> DryIceWriterBuilder<W, S, Q, N, NoRecordKey> {
191 #[must_use]
193 pub fn record_key<K: RecordKey>(self) -> DryIceWriterBuilder<W, S, Q, N, K> {
194 DryIceWriterBuilder {
195 inner: self.inner,
196 target_block_records: self.target_block_records,
197 _codec: PhantomData,
198 _quality: PhantomData,
199 _name: PhantomData,
200 _key: PhantomData,
201 }
202 }
203
204 #[must_use]
206 pub fn bytes8_key(self) -> DryIceWriterBuilder<W, S, Q, N, Bytes8Key> {
207 self.record_key::<Bytes8Key>()
208 }
209
210 #[must_use]
212 pub fn bytes16_key(self) -> DryIceWriterBuilder<W, S, Q, N, Bytes16Key> {
213 self.record_key::<Bytes16Key>()
214 }
215
216 #[must_use]
221 pub fn prefix_kmer_key<const K: u8>(self) -> DryIceWriterBuilder<W, S, Q, N, PrefixKmer64<K>> {
222 self.record_key::<PrefixKmer64<K>>()
223 }
224
225 #[must_use]
230 pub fn minimizer_key<const K: u8, const WN: u8>(
231 self,
232 ) -> DryIceWriterBuilder<W, S, Q, N, Minimizer64<K, WN>> {
233 self.record_key::<Minimizer64<K, WN>>()
234 }
235
236 #[must_use]
240 pub fn prefix_kmer_key_default(self) -> DryIceWriterBuilder<W, S, Q, N, PrefixKmer64<31>> {
241 self.prefix_kmer_key::<31>()
242 }
243
244 #[must_use]
248 pub fn minimizer_key_default(self) -> DryIceWriterBuilder<W, S, Q, N, Minimizer64<31, 15>> {
249 self.minimizer_key::<31, 15>()
250 }
251}
252
253impl<W> DryIceWriterBuilder<W, RawAsciiCodec, RawQualityCodec, RawNameCodec, NoRecordKey> {
254 #[must_use]
260 pub fn prefix_kmers(
261 self,
262 ) -> DryIceWriterBuilder<
263 W,
264 OmittedSequenceCodec,
265 OmittedQualityCodec,
266 OmittedNameCodec,
267 PrefixKmer64<31>,
268 > {
269 self.prefix_kmer_key_default().empty_payload()
270 }
271
272 #[must_use]
277 pub fn prefix_kmers_with_sequences(
278 self,
279 ) -> DryIceWriterBuilder<
280 W,
281 RawAsciiCodec,
282 OmittedQualityCodec,
283 OmittedNameCodec,
284 PrefixKmer64<31>,
285 > {
286 self.prefix_kmer_key_default().omit_quality().omit_names()
287 }
288
289 #[must_use]
295 pub fn prefix_kmers_with_names(
296 self,
297 ) -> DryIceWriterBuilder<
298 W,
299 OmittedSequenceCodec,
300 OmittedQualityCodec,
301 RawNameCodec,
302 PrefixKmer64<31>,
303 > {
304 self.prefix_kmer_key_default()
305 .omit_sequence()
306 .omit_quality()
307 }
308
309 #[must_use]
315 pub fn minimizers(
316 self,
317 ) -> DryIceWriterBuilder<
318 W,
319 OmittedSequenceCodec,
320 OmittedQualityCodec,
321 OmittedNameCodec,
322 Minimizer64<31, 15>,
323 > {
324 self.minimizer_key_default().empty_payload()
325 }
326
327 #[must_use]
332 pub fn minimizers_with_sequences(
333 self,
334 ) -> DryIceWriterBuilder<
335 W,
336 RawAsciiCodec,
337 OmittedQualityCodec,
338 OmittedNameCodec,
339 Minimizer64<31, 15>,
340 > {
341 self.minimizer_key_default().omit_quality().omit_names()
342 }
343
344 #[must_use]
350 pub fn minimizers_with_names(
351 self,
352 ) -> DryIceWriterBuilder<
353 W,
354 OmittedSequenceCodec,
355 OmittedQualityCodec,
356 RawNameCodec,
357 Minimizer64<31, 15>,
358 > {
359 self.minimizer_key_default().omit_sequence().omit_quality()
360 }
361}
362
363impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec>
364 DryIceWriterBuilder<W, S, Q, N, NoRecordKey>
365{
366 #[must_use]
368 pub fn build(self) -> DryIceWriter<W, S, Q, N, NoRecordKey> {
369 DryIceWriter::new_unkeyed(self.inner, self.target_block_records)
370 }
371}
372
373impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K: RecordKey>
374 DryIceWriterBuilder<W, S, Q, N, K>
375{
376 #[must_use]
378 pub fn build(self) -> DryIceWriter<W, S, Q, N, K> {
379 DryIceWriter::new_keyed(self.inner, self.target_block_records)
380 }
381}
382
383#[cfg(feature = "async")]
384impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec>
385 DryIceWriterBuilder<W, S, Q, N, NoRecordKey>
386{
387 #[must_use]
389 pub fn build_async(self) -> crate::async_io::AsyncDryIceWriter<W, S, Q, N, NoRecordKey> {
390 crate::async_io::AsyncDryIceWriter::new_unkeyed(self.inner, self.target_block_records)
391 }
392}
393
394#[cfg(feature = "async")]
395impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K: RecordKey>
396 DryIceWriterBuilder<W, S, Q, N, K>
397{
398 #[must_use]
400 pub fn build_async(self) -> crate::async_io::AsyncDryIceWriter<W, S, Q, N, K> {
401 crate::async_io::AsyncDryIceWriter::new_keyed(self.inner, self.target_block_records)
402 }
403}
404
405pub struct DryIceWriter<
407 W,
408 S: SequenceCodec = RawAsciiCodec,
409 Q: QualityCodec = RawQualityCodec,
410 N: NameCodec = RawNameCodec,
411 K = NoRecordKey,
412> {
413 inner: W,
414 target_block_records: usize,
415 block_builder: BlockBuilder<S, Q, N>,
416 header_written: bool,
417 _key: PhantomData<K>,
418}
419
420impl DryIceWriter<MissingInner, RawAsciiCodec, RawQualityCodec, RawNameCodec, NoRecordKey> {
421 #[must_use]
423 pub fn builder()
424 -> DryIceWriterBuilder<MissingInner, RawAsciiCodec, RawQualityCodec, RawNameCodec, NoRecordKey>
425 {
426 DryIceWriterBuilder::new()
427 }
428}
429
430impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec> DryIceWriter<W, S, Q, N, NoRecordKey> {
431 fn new_unkeyed(inner: W, target_block_records: usize) -> Self {
432 let block_builder = BlockBuilder::new(&BlockBuilderConfig {
433 record_key_width: None,
434 record_key_tag: None,
435 target_records: target_block_records,
436 });
437
438 Self {
439 inner,
440 target_block_records,
441 block_builder,
442 header_written: false,
443 _key: PhantomData,
444 }
445 }
446}
447
448impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K: RecordKey> DryIceWriter<W, S, Q, N, K> {
449 fn new_keyed(inner: W, target_block_records: usize) -> Self {
450 let block_builder = BlockBuilder::new(&BlockBuilderConfig {
451 record_key_width: Some(K::WIDTH),
452 record_key_tag: Some(K::TYPE_TAG),
453 target_records: target_block_records,
454 });
455
456 Self {
457 inner,
458 target_block_records,
459 block_builder,
460 header_written: false,
461 _key: PhantomData,
462 }
463 }
464}
465
466impl<W, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K> DryIceWriter<W, S, Q, N, K> {
467 fn ensure_header_written(&mut self) -> Result<(), DryIceError>
468 where
469 W: Write,
470 {
471 if !self.header_written {
472 format::write_file_header(&mut self.inner)?;
473 self.header_written = true;
474 }
475 Ok(())
476 }
477}
478
479impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec>
480 DryIceWriter<W, S, Q, N, NoRecordKey>
481{
482 pub fn from_options(inner: W, options: &DryIceWriterOptions) -> Result<Self, DryIceError> {
488 let target_block_records = match options.layout.block_size {
489 BlockSizePolicy::TargetRecords(n) => n,
490 BlockSizePolicy::TargetBytes(_) => {
491 return Err(DryIceError::InvalidWriterConfiguration(
492 "TargetBytes block size policy is not yet supported",
493 ));
494 },
495 };
496
497 Ok(Self::new_unkeyed(inner, target_block_records))
498 }
499
500 #[must_use]
502 pub fn options(&self) -> DryIceWriterOptions {
503 DryIceWriterOptions {
504 layout: BlockLayoutOptions {
505 block_size: BlockSizePolicy::TargetRecords(self.target_block_records),
506 },
507 }
508 }
509
510 pub fn write_record<R: SeqRecordLike>(&mut self, record: &R) -> Result<(), DryIceError> {
517 self.ensure_header_written()?;
518 self.block_builder.push_record(record)?;
519
520 if self.block_builder.should_flush() {
521 self.flush_block()?;
522 }
523
524 Ok(())
525 }
526}
527
528impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K: RecordKey>
529 DryIceWriter<W, S, Q, N, K>
530{
531 pub fn write_record_with_key<R: SeqRecordLike>(
539 &mut self,
540 record: &R,
541 key: &K,
542 ) -> Result<(), DryIceError> {
543 self.ensure_header_written()?;
544 self.block_builder.push_record_with_key(record, key)?;
545
546 if self.block_builder.should_flush() {
547 self.flush_block()?;
548 }
549
550 Ok(())
551 }
552
553 pub fn write_key_only(&mut self, key: &K) -> Result<(), DryIceError> {
560 self.write_record_with_key(&EMPTY_RECORD, key)
561 }
562}
563
564impl<W: Write, S: SequenceCodec, Q: QualityCodec, N: NameCodec, K> DryIceWriter<W, S, Q, N, K> {
565 pub fn finish(mut self) -> Result<W, DryIceError> {
572 self.ensure_header_written()?;
573
574 if !self.block_builder.is_empty() {
575 self.flush_block()?;
576 }
577
578 Ok(self.inner)
579 }
580
581 fn flush_block(&mut self) -> Result<(), DryIceError> {
582 self.block_builder.write_block(&mut self.inner)?;
583 Ok(())
584 }
585}