1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
//! Helper utilities for FASTQ loading and saving.

use std::io::prelude::*;

use traits::*;
use util::*;
use super::re::*;
use super::record::Record;
use super::record_list::RecordList;

// FASTQ ITERATOR

/// Iterator to parse individual FASTQ entries from a document.
///
/// Convert a stream to a lazy reader that fetches individual FASTQ entries
/// from the document.
pub struct FastqIter<T: BufRead> {
    reader: T,
    buf: BufferType,
    line: String,
}

impl<T: BufRead> FastqIter<T> {
    /// Create new FastqIter from a buffered reader.
    #[inline]
    pub fn new(reader: T) -> Self {
        FastqIter {
            reader: reader,
            buf: Vec::with_capacity(8000),
            line: String::with_capacity(8000)
        }
    }
}

impl<T: BufRead> Iterator for FastqIter<T> {
    type Item = ResultType<String>;

    fn next(&mut self) -> Option<Self::Item> {
        text_next_skip_whitespace("@", &mut self.reader, &mut self.buf, &mut self.line)
    }
}

// SIZE

/// Estimate the size of a FASTA record.
///
/// Used to prevent reallocations during record exportation to string,
/// to minimize costly library calls.
#[inline]
fn estimate_record_size(record: &Record) -> usize {
    const FASTQ_VOCABULARY_SIZE: usize = 5;
    FASTQ_VOCABULARY_SIZE +
        record.seq_id.len() +
        record.description.len() +
        record.sequence.len() +
        record.quality.len()
}

/// Estimate the size of a FASTA record list.
#[inline]
fn estimate_list_size(list: &RecordList) -> usize {
    list.iter().fold(0, |sum, x| sum + estimate_record_size(x))
}

// WRITER

#[inline(always)]
fn to_fastq<T: Write>(writer: &mut T, record: &Record) -> ResultType<()> {
    record_to_fastq(writer, record)
}

/// Export record to FASTQ.
pub fn record_to_fastq<T: Write>(writer: &mut T, record: &Record)
    -> ResultType<()>
{
    write_alls!(writer, b"@", record.seq_id.as_bytes())?;

    if !record.description.is_empty() {
        write_alls!(writer, b" ", record.description.as_bytes())?;
    }

    write_alls!(
        writer,
        b"\n", record.sequence.as_slice(),
        b"\n+", record.seq_id.as_bytes()
    )?;

    if !record.description.is_empty() {
        write_alls!(writer, b" ", record.description.as_bytes())?;
    }

    write_alls!(writer, record.quality.as_slice())?;

    Ok(())
}

// WRITER -- DEFAULT

#[inline(always)]
fn init_cb<T: Write>(writer: &mut T, delimiter: u8)
    -> ResultType<TextWriterState<T>>
{
    Ok(TextWriterState::new(writer, delimiter))
}

#[inline(always)]
fn export_cb<'a, T: Write>(writer: &mut TextWriterState<T>, record: &'a Record)
    -> ResultType<()>
{
    writer.export(record, &to_fastq)
}

#[inline(always)]
fn dest_cb<T: Write>(_: &mut TextWriterState<T>)
    -> ResultType<()>
{
    Ok(())
}

/// Default exporter from a non-owning iterator to FASTQ.
#[inline(always)]
pub fn reference_iterator_to_fastq<'a, Iter, T>(writer: &mut T, iter: Iter)
    -> ResultType<()>
    where T: Write,
          Iter: Iterator<Item = &'a Record>
{
    reference_iterator_export(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}


/// Default exporter from an owning iterator to FASTQ.
#[inline(always)]
pub fn value_iterator_to_fastq<Iter, T>(writer: &mut T, iter: Iter)
    -> ResultType<()>
    where T: Write,
          Iter: Iterator<Item = ResultType<Record>>
{
    value_iterator_export(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}

// WRITER -- STRICT

/// Strict exporter from a non-owning iterator to FASTQ.
#[inline(always)]
pub fn reference_iterator_to_fastq_strict<'a, Iter, T>(writer: &mut T, iter: Iter)
    -> ResultType<()>
    where T: Write,
          Iter: Iterator<Item = &'a Record>
{
    reference_iterator_export_strict(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}

/// Strict exporter from an owning iterator to FASTQ.
#[inline(always)]
pub fn value_iterator_to_fastq_strict<Iter, T>(writer: &mut T, iter: Iter)
    -> ResultType<()>
    where T: Write,
          Iter: Iterator<Item = ResultType<Record>>
{
    value_iterator_export_strict(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}

// WRITER -- LENIENT

/// Lenient exporter from a non-owning iterator to FASTQ.
#[inline(always)]
pub fn reference_iterator_to_fastq_lenient<'a, Iter, T>(writer: &mut T, iter: Iter)
    -> ResultType<()>
    where T: Write,
          Iter: Iterator<Item = &'a Record>
{
    reference_iterator_export_lenient(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}

/// Lenient exporter from an owning iterator to FASTQ.
#[inline(always)]
pub fn value_iterator_to_fastq_lenient<Iter, T>(writer: &mut T, iter: Iter)
    -> ResultType<()>
    where T: Write,
          Iter: Iterator<Item = ResultType<Record>>
{
    value_iterator_export_lenient(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}

// READER

/// Import record from FASTQ.
#[allow(unused_variables)]
pub fn record_from_fastq<T: BufRead>(reader: &mut T)
    -> ResultType<Record>
{
    // Split along lines.
    // The first line is the first header, short-circuit if it's none.
    let mut lines = reader.lines();
    let header = none_to_error!(lines.next(), InvalidInput)?;

    // process the header and match it to the FASTA record
    let captures = none_to_error!(FastqHeaderRegex::extract().captures(&header), InvalidInput);

    // create the record from the header metadata
    let mut record = Record {
        seq_id: capture_as_string(&captures, FastqHeaderRegex::SEQID_INDEX),
        description: capture_as_string(&captures, FastqHeaderRegex::DESCRIPTION_INDEX),
        length: 0,
        sequence: vec![],
        quality: vec![]
    };

    // get the FASTQ sequence.
    let sequence = none_to_error!(lines.next(), InvalidInput)?;
    record.sequence = sequence.into_bytes();
    record.length = record.sequence.len() as u32;

    // get the header quality line
    let header = none_to_error!(lines.next(), InvalidInput)?;
    bool_to_error!(header.starts_with('+'), InvalidInput);

    // get the FASTQ quality scores
    let quality = none_to_error!(lines.next(), InvalidInput)?;
    record.quality = quality.into_bytes();
    bool_to_error!(record.quality.len() as u32 == record.length, InvalidRecord);

    Ok(record)
}

// READER -- DEFAULT

/// Iterator to lazily load `Record`s from a document.
///
/// Wraps `FastqIter` and converts the text to records.
pub struct FastqRecordIter<T: BufRead> {
    iter: FastqIter<T>
}

impl<T: BufRead> FastqRecordIter<T> {
    /// Create new FastqRecordIter from a buffered reader.
    #[inline]
    pub fn new(reader: T) -> Self {
        FastqRecordIter {
            iter: FastqIter::new(reader)
        }
    }
}

impl<T: BufRead> Iterator for FastqRecordIter<T> {
    type Item = ResultType<Record>;

    fn next(&mut self) -> Option<Self::Item> {
        let text = match self.iter.next()? {
            Err(e)   => return Some(Err(e)),
            Ok(text) => text,

        };

        Some(Record::from_fastq_string(&text))
    }
}

/// Create default record iterator from reader.
#[inline(always)]
pub fn iterator_from_fastq<T: BufRead>(reader: T) -> FastqRecordIter<T> {
    FastqRecordIter::new(reader)
}

// READER -- STRICT

/// Iterator to lazily load `Record`s from a document.
///
/// Wraps `FastqIter` and converts the text to records strictly.
pub type FastqRecordStrictIter<T> = StrictIter<Record, FastqRecordIter<T>>;

/// Create default record iterator from reader.
#[inline(always)]
pub fn iterator_from_fastq_strict<T: BufRead>(reader: T) -> FastqRecordStrictIter<T> {
    FastqRecordStrictIter::new(iterator_from_fastq(reader))
}

// READER -- LENIENT

/// Iterator to lazily load `Record`s from a document.
///
/// Wraps `FastqIter` and converts the text to records leniently.
pub type FastqRecordLenientIter<T> = LenientIter<Record, FastqRecordIter<T>>;

/// Create lenient record iterator from reader.
#[inline(always)]
pub fn iterator_from_fastq_lenient<T: BufRead>(reader: T) -> FastqRecordLenientIter<T> {
    FastqRecordLenientIter::new(iterator_from_fastq(reader))
}

// TRAITS

impl Fastq for Record {
    #[inline]
    fn estimate_fastq_size(&self) -> usize {
        estimate_record_size(self)
    }

    #[inline(always)]
    fn to_fastq<T: Write>(&self, writer: &mut T) -> ResultType<()> {
        record_to_fastq(writer, self)
    }

    fn from_fastq<T: BufRead>(reader: &mut T) -> ResultType<Self> {
        record_from_fastq(reader)
    }
}

impl Fastq for RecordList {
    #[inline]
    fn estimate_fastq_size(&self) -> usize {
        estimate_list_size(self)
    }

    #[inline(always)]
    fn to_fastq<T: Write>(&self, writer: &mut T) -> ResultType<()> {
        reference_iterator_to_fastq(writer, self.iter())
    }

    #[inline(always)]
    fn from_fastq<T: BufRead>(reader: &mut T) -> ResultType<RecordList> {
        iterator_from_fastq(reader).collect()
    }
}

impl FastqCollection for RecordList {
    #[inline(always)]
    fn to_fastq_strict<T: Write>(&self, writer: &mut T) -> ResultType<()> {
        reference_iterator_to_fastq_strict(writer, self.iter())
    }

    #[inline(always)]
    fn to_fastq_lenient<T: Write>(&self, writer: &mut T) -> ResultType<()> {
        reference_iterator_to_fastq_lenient(writer, self.iter())
    }

    #[inline(always)]
    fn from_fastq_strict<T: BufRead>(reader: &mut T) -> ResultType<RecordList> {
        iterator_from_fastq_strict(reader).collect()
    }

    #[inline(always)]
    fn from_fastq_lenient<T: BufRead>(reader: &mut T) -> ResultType<RecordList> {
        Ok(iterator_from_fastq_lenient(reader).filter_map(Result::ok).collect())
    }
}

// TESTS
// -----

#[cfg(test)]
mod tests {
    use std::io::{Cursor};
    use super::*;
    //use super::super::test::*;

    #[test]
    fn fastq_iter_test() {
        // Check iterator over data.

        let s = "@tag desc\nCATTAG\n+tag desc\n;;;;;;\n@tag1 desc1\nTAGCAT\n+tag1 desc1\n;;;;;;";
        let i = FastqIter::new(Cursor::new(s));
        let r: ResultType<Vec<String>> = i.collect();
        assert_eq!(r.unwrap(), &["@tag desc\nCATTAG\n+tag desc\n;;;;;;\n", "@tag1 desc1\nTAGCAT\n+tag1 desc1\n;;;;;;"]);

        // Check iterator over empty string.
        let s = "";
        let i = FastqIter::new(Cursor::new(s));
        let r: ResultType<Vec<String>> = i.collect();
        assert_eq!(r.unwrap(), Vec::<String>::new());
    }

    // TODO(ahuszagh)
    //  Implement the unittests.
}