1#![allow(dead_code)]
39#![allow(missing_docs)]
40
41use crate::error::{IoError, Result};
42use std::fs::File;
43use std::io::{BufRead, BufReader, BufWriter, Write};
44use std::path::Path;
45
46#[derive(Debug, Clone, PartialEq)]
48pub struct FastaRecord {
49 id: String,
51 description: Option<String>,
53 sequence: String,
55}
56
57impl FastaRecord {
58 pub fn new(id: String, sequence: String) -> Self {
60 Self {
61 id,
62 description: None,
63 sequence,
64 }
65 }
66
67 pub fn with_description(id: String, description: String, sequence: String) -> Self {
69 Self {
70 id,
71 description: Some(description),
72 sequence,
73 }
74 }
75
76 pub fn id(&self) -> &str {
78 &self.id
79 }
80
81 pub fn description(&self) -> Option<&str> {
83 self.description.as_deref()
84 }
85
86 pub fn sequence(&self) -> &str {
88 &self.sequence
89 }
90
91 pub fn len(&self) -> usize {
93 self.sequence.len()
94 }
95
96 pub fn is_empty(&self) -> bool {
98 self.sequence.is_empty()
99 }
100
101 pub fn header(&self) -> String {
103 match &self.description {
104 Some(desc) => format!("{} {}", self.id, desc),
105 None => self.id.clone(),
106 }
107 }
108}
109
110pub struct FastaReader {
112 reader: BufReader<File>,
113 line_buffer: String,
114 lookahead: Option<String>,
115}
116
117impl FastaReader {
118 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
120 let file = File::open(path.as_ref())
121 .map_err(|_e| IoError::FileNotFound(path.as_ref().to_string_lossy().to_string()))?;
122 Ok(Self {
123 reader: BufReader::new(file),
124 line_buffer: String::new(),
125 lookahead: None,
126 })
127 }
128
129 pub fn records(&mut self) -> FastaRecordIterator<'_> {
131 FastaRecordIterator { reader: self }
132 }
133
134 fn read_record(&mut self) -> Result<Option<FastaRecord>> {
136 self.line_buffer.clear();
137
138 if let Some(lookahead) = self.lookahead.take() {
140 self.line_buffer = lookahead;
141 } else {
142 loop {
144 if self
145 .reader
146 .read_line(&mut self.line_buffer)
147 .map_err(|e| IoError::ParseError(format!("Failed to read line: {e}")))?
148 == 0
149 {
150 return Ok(None); }
152
153 if self.line_buffer.starts_with('>') {
154 break;
155 }
156 self.line_buffer.clear();
157 }
158 }
159
160 let header = self.line_buffer[1..].trim().to_string();
162 let (id, description) = if let Some(space_pos) = header.find(' ') {
163 let (id_part, desc_part) = header.split_at(space_pos);
164 (id_part.to_string(), Some(desc_part[1..].to_string()))
165 } else {
166 (header, None)
167 };
168
169 let mut sequence = String::new();
171 self.line_buffer.clear();
172
173 loop {
174 let bytes_read = self
175 .reader
176 .read_line(&mut self.line_buffer)
177 .map_err(|e| IoError::ParseError(format!("Failed to read line: {e}")))?;
178
179 if bytes_read == 0 || self.line_buffer.starts_with('>') {
180 if self.line_buffer.starts_with('>') {
182 self.lookahead = Some(self.line_buffer.clone());
184 }
185 break;
186 }
187
188 sequence.push_str(self.line_buffer.trim());
189 if !self.line_buffer.starts_with('>') {
190 self.line_buffer.clear();
191 }
192 }
193
194 Ok(Some(FastaRecord {
195 id,
196 description,
197 sequence,
198 }))
199 }
200}
201
202pub struct FastaRecordIterator<'a> {
204 reader: &'a mut FastaReader,
205}
206
207impl Iterator for FastaRecordIterator<'_> {
208 type Item = Result<FastaRecord>;
209
210 fn next(&mut self) -> Option<Self::Item> {
211 match self.reader.read_record() {
212 Ok(Some(record)) => Some(Ok(record)),
213 Ok(None) => None,
214 Err(e) => Some(Err(e)),
215 }
216 }
217}
218
219pub struct FastaWriter {
221 writer: BufWriter<File>,
222 line_width: usize,
223}
224
225impl FastaWriter {
226 pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
228 let file = File::create(path.as_ref())
229 .map_err(|e| IoError::FileError(format!("Failed to create file: {e}")))?;
230 Ok(Self {
231 writer: BufWriter::new(file),
232 line_width: 80, })
234 }
235
236 pub fn set_line_width(&mut self, width: usize) {
238 self.line_width = width;
239 }
240
241 pub fn write_record(&mut self, record: &FastaRecord) -> Result<()> {
243 write!(self.writer, ">{}", record.header())
245 .map_err(|e| IoError::FileError(format!("Failed to write header: {e}")))?;
246 writeln!(self.writer)
247 .map_err(|e| IoError::FileError(format!("Failed to write newline: {e}")))?;
248
249 let sequence = record.sequence();
251 for chunk in sequence.as_bytes().chunks(self.line_width) {
252 self.writer
253 .write_all(chunk)
254 .map_err(|e| IoError::FileError(format!("Failed to write sequence: {e}")))?;
255 writeln!(self.writer)
256 .map_err(|e| IoError::FileError(format!("Failed to write newline: {e}")))?;
257 }
258
259 Ok(())
260 }
261
262 pub fn flush(&mut self) -> Result<()> {
264 self.writer
265 .flush()
266 .map_err(|e| IoError::FileError(format!("Failed to flush: {e}")))
267 }
268}
269
270#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
272pub enum QualityEncoding {
273 #[default]
275 Sanger,
276 Illumina,
278}
279
280#[derive(Debug, Clone, PartialEq)]
282pub struct FastqRecord {
283 id: String,
285 description: Option<String>,
287 sequence: String,
289 quality: String,
291}
292
293impl FastqRecord {
294 pub fn new(id: String, sequence: String, quality: String) -> Result<Self> {
296 if sequence.len() != quality.len() {
297 return Err(IoError::ParseError(format!(
298 "Sequence and quality lengths don't match: {} vs {}",
299 sequence.len(),
300 quality.len()
301 )));
302 }
303
304 Ok(Self {
305 id,
306 description: None,
307 sequence,
308 quality,
309 })
310 }
311
312 pub fn with_description(
314 id: String,
315 description: String,
316 sequence: String,
317 quality: String,
318 ) -> Result<Self> {
319 if sequence.len() != quality.len() {
320 return Err(IoError::ParseError(format!(
321 "Sequence and quality lengths don't match: {} vs {}",
322 sequence.len(),
323 quality.len()
324 )));
325 }
326
327 Ok(Self {
328 id,
329 description: Some(description),
330 sequence,
331 quality,
332 })
333 }
334
335 pub fn id(&self) -> &str {
337 &self.id
338 }
339
340 pub fn description(&self) -> Option<&str> {
342 self.description.as_deref()
343 }
344
345 pub fn sequence(&self) -> &str {
347 &self.sequence
348 }
349
350 pub fn quality(&self) -> &str {
352 &self.quality
353 }
354
355 pub fn len(&self) -> usize {
357 self.sequence.len()
358 }
359
360 pub fn is_empty(&self) -> bool {
362 self.sequence.is_empty()
363 }
364
365 pub fn quality_scores(&self, encoding: QualityEncoding) -> Vec<u8> {
367 let offset = match encoding {
368 QualityEncoding::Sanger => 33,
369 QualityEncoding::Illumina => 64,
370 };
371
372 self.quality
373 .bytes()
374 .map(|b| b.saturating_sub(offset))
375 .collect()
376 }
377
378 pub fn header(&self) -> String {
380 match &self.description {
381 Some(desc) => format!("{} {}", self.id, desc),
382 None => self.id.clone(),
383 }
384 }
385}
386
387pub struct FastqReader {
389 reader: BufReader<File>,
390 #[allow(dead_code)]
391 encoding: QualityEncoding,
392 line_buffer: String,
393}
394
395impl FastqReader {
396 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
398 let file = File::open(path.as_ref())
399 .map_err(|_e| IoError::FileNotFound(path.as_ref().to_string_lossy().to_string()))?;
400 Ok(Self {
401 reader: BufReader::new(file),
402 encoding: QualityEncoding::default(),
403 line_buffer: String::new(),
404 })
405 }
406
407 pub fn open_with_encoding<P: AsRef<Path>>(path: P, encoding: QualityEncoding) -> Result<Self> {
409 let file = File::open(path.as_ref())
410 .map_err(|_e| IoError::FileNotFound(path.as_ref().to_string_lossy().to_string()))?;
411 Ok(Self {
412 reader: BufReader::new(file),
413 encoding,
414 line_buffer: String::new(),
415 })
416 }
417
418 pub fn records(&mut self) -> FastqRecordIterator<'_> {
420 FastqRecordIterator { reader: self }
421 }
422
423 fn read_record(&mut self) -> Result<Option<FastqRecord>> {
425 self.line_buffer.clear();
427 if self
428 .reader
429 .read_line(&mut self.line_buffer)
430 .map_err(|e| IoError::ParseError(format!("Failed to read header: {e}")))?
431 == 0
432 {
433 return Ok(None); }
435
436 if !self.line_buffer.starts_with('@') {
437 return Err(IoError::ParseError(format!(
438 "Expected '@' at start of header, found: {}",
439 self.line_buffer.trim()
440 )));
441 }
442
443 let header = self.line_buffer[1..].trim().to_string();
445 let (id, description) = if let Some(space_pos) = header.find(' ') {
446 let (id_part, desc_part) = header.split_at(space_pos);
447 (id_part.to_string(), Some(desc_part[1..].to_string()))
448 } else {
449 (header, None)
450 };
451
452 self.line_buffer.clear();
454 self.reader
455 .read_line(&mut self.line_buffer)
456 .map_err(|e| IoError::ParseError(format!("Failed to read sequence: {e}")))?;
457 let sequence = self.line_buffer.trim().to_string();
458
459 self.line_buffer.clear();
461 self.reader
462 .read_line(&mut self.line_buffer)
463 .map_err(|e| IoError::ParseError(format!("Failed to read separator: {e}")))?;
464 if !self.line_buffer.starts_with('+') {
465 return Err(IoError::ParseError(format!(
466 "Expected '+' separator, found: {}",
467 self.line_buffer.trim()
468 )));
469 }
470
471 self.line_buffer.clear();
473 self.reader
474 .read_line(&mut self.line_buffer)
475 .map_err(|e| IoError::ParseError(format!("Failed to read quality: {e}")))?;
476 let quality = self.line_buffer.trim().to_string();
477
478 FastqRecord::new(id.clone(), sequence.clone(), quality.clone())
479 .or_else(|_| {
480 FastqRecord::with_description(
481 id,
482 description.unwrap_or_default(),
483 sequence,
484 quality,
485 )
486 })
487 .map(Some)
488 }
489}
490
491pub struct FastqRecordIterator<'a> {
493 reader: &'a mut FastqReader,
494}
495
496impl Iterator for FastqRecordIterator<'_> {
497 type Item = Result<FastqRecord>;
498
499 fn next(&mut self) -> Option<Self::Item> {
500 match self.reader.read_record() {
501 Ok(Some(record)) => Some(Ok(record)),
502 Ok(None) => None,
503 Err(e) => Some(Err(e)),
504 }
505 }
506}
507
508pub struct FastqWriter {
510 writer: BufWriter<File>,
511 encoding: QualityEncoding,
512}
513
514impl FastqWriter {
515 pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
517 let file = File::create(path.as_ref())
518 .map_err(|e| IoError::FileError(format!("Failed to create file: {e}")))?;
519 Ok(Self {
520 writer: BufWriter::new(file),
521 encoding: QualityEncoding::default(),
522 })
523 }
524
525 pub fn create_with_encoding<P: AsRef<Path>>(
527 path: P,
528 encoding: QualityEncoding,
529 ) -> Result<Self> {
530 let file = File::create(path.as_ref())
531 .map_err(|e| IoError::FileError(format!("Failed to create file: {e}")))?;
532 Ok(Self {
533 writer: BufWriter::new(file),
534 encoding,
535 })
536 }
537
538 pub fn write_record(&mut self, record: &FastqRecord) -> Result<()> {
540 writeln!(self.writer, "@{}", record.header())
542 .map_err(|e| IoError::FileError(format!("Failed to write header: {e}")))?;
543
544 writeln!(self.writer, "{}", record.sequence())
546 .map_err(|e| IoError::FileError(format!("Failed to write sequence: {e}")))?;
547
548 writeln!(self.writer, "+")
550 .map_err(|e| IoError::FileError(format!("Failed to write separator: {e}")))?;
551
552 writeln!(self.writer, "{}", record.quality())
554 .map_err(|e| IoError::FileError(format!("Failed to write quality: {e}")))?;
555
556 Ok(())
557 }
558
559 pub fn write_record_with_scores(
561 &mut self,
562 id: &str,
563 sequence: &str,
564 quality_scores: &[u8],
565 ) -> Result<()> {
566 if sequence.len() != quality_scores.len() {
567 return Err(IoError::FileError(format!(
568 "Sequence and quality lengths don't match: {} vs {}",
569 sequence.len(),
570 quality_scores.len()
571 )));
572 }
573
574 let offset = match self.encoding {
575 QualityEncoding::Sanger => 33,
576 QualityEncoding::Illumina => 64,
577 };
578
579 let quality_string: String = quality_scores
580 .iter()
581 .map(|&score| (score.saturating_add(offset)) as char)
582 .collect();
583
584 let record = FastqRecord::new(id.to_string(), sequence.to_string(), quality_string)?;
585 self.write_record(&record)
586 }
587
588 pub fn flush(&mut self) -> Result<()> {
590 self.writer
591 .flush()
592 .map_err(|e| IoError::FileError(format!("Failed to flush: {e}")))
593 }
594}
595
596#[allow(dead_code)]
598pub fn count_fastasequences<P: AsRef<Path>>(path: P) -> Result<usize> {
599 let file = File::open(path.as_ref())
600 .map_err(|_e| IoError::FileNotFound(path.as_ref().to_string_lossy().to_string()))?;
601 let reader = BufReader::new(file);
602
603 let count = reader
604 .lines()
605 .map_while(|result| result.ok())
606 .filter(|line| line.starts_with('>'))
607 .count();
608
609 Ok(count)
610}
611
612#[allow(dead_code)]
614pub fn count_fastqsequences<P: AsRef<Path>>(path: P) -> Result<usize> {
615 let file = File::open(path.as_ref())
616 .map_err(|_e| IoError::FileNotFound(path.as_ref().to_string_lossy().to_string()))?;
617 let reader = BufReader::new(file);
618
619 let line_count = reader.lines().count();
620 if line_count % 4 != 0 {
621 return Err(IoError::ParseError(format!(
622 "Invalid FASTQ file: line count {line_count} is not divisible by 4"
623 )));
624 }
625
626 Ok(line_count / 4)
627}
628
629pub mod analysis {
631 use super::*;
632 use std::collections::HashMap;
633
634 pub fn gc_content(sequence: &str) -> f64 {
636 if sequence.is_empty() {
637 return 0.0;
638 }
639
640 let gc_count = sequence
641 .chars()
642 .filter(|&c| c == 'G' || c == 'C' || c == 'g' || c == 'c')
643 .count();
644
645 gc_count as f64 / sequence.len() as f64
646 }
647
648 pub fn nucleotide_composition(sequence: &str) -> HashMap<char, usize> {
650 let mut composition = HashMap::new();
651
652 for nucleotide in sequence.chars() {
653 *composition
654 .entry(nucleotide.to_ascii_uppercase())
655 .or_insert(0) += 1;
656 }
657
658 composition
659 }
660
661 pub fn reverse_complement(sequence: &str) -> String {
663 sequence
664 .chars()
665 .rev()
666 .map(|c| match c.to_ascii_uppercase() {
667 'A' => 'T',
668 'T' => 'A',
669 'G' => 'C',
670 'C' => 'G',
671 'U' => 'A', 'N' => 'N', _ => c,
674 })
675 .collect()
676 }
677
678 pub fn translate_dna(sequence: &str) -> String {
680 let codon_table = get_standard_genetic_code();
681
682 sequence
683 .chars()
684 .collect::<Vec<_>>()
685 .chunks(3)
686 .filter_map(|codon| {
687 if codon.len() == 3 {
688 let codon_str: String = codon.iter().collect::<String>().to_uppercase();
689 Some(codon_table.get(&codon_str).unwrap_or(&'X').to_owned())
690 } else {
691 None
692 }
693 })
694 .collect()
695 }
696
697 fn get_standard_genetic_code() -> HashMap<String, char> {
699 let mut code = HashMap::new();
700
701 let codons = [
703 ("TTT", 'F'),
704 ("TTC", 'F'),
705 ("TTA", 'L'),
706 ("TTG", 'L'),
707 ("TCT", 'S'),
708 ("TCC", 'S'),
709 ("TCA", 'S'),
710 ("TCG", 'S'),
711 ("TAT", 'Y'),
712 ("TAC", 'Y'),
713 ("TAA", '*'),
714 ("TAG", '*'),
715 ("TGT", 'C'),
716 ("TGC", 'C'),
717 ("TGA", '*'),
718 ("TGG", 'W'),
719 ("CTT", 'L'),
720 ("CTC", 'L'),
721 ("CTA", 'L'),
722 ("CTG", 'L'),
723 ("CCT", 'P'),
724 ("CCC", 'P'),
725 ("CCA", 'P'),
726 ("CCG", 'P'),
727 ("CAT", 'H'),
728 ("CAC", 'H'),
729 ("CAA", 'Q'),
730 ("CAG", 'Q'),
731 ("CGT", 'R'),
732 ("CGC", 'R'),
733 ("CGA", 'R'),
734 ("CGG", 'R'),
735 ("ATT", 'I'),
736 ("ATC", 'I'),
737 ("ATA", 'I'),
738 ("ATG", 'M'),
739 ("ACT", 'T'),
740 ("ACC", 'T'),
741 ("ACA", 'T'),
742 ("ACG", 'T'),
743 ("AAT", 'N'),
744 ("AAC", 'N'),
745 ("AAA", 'K'),
746 ("AAG", 'K'),
747 ("AGT", 'S'),
748 ("AGC", 'S'),
749 ("AGA", 'R'),
750 ("AGG", 'R'),
751 ("GTT", 'V'),
752 ("GTC", 'V'),
753 ("GTA", 'V'),
754 ("GTG", 'V'),
755 ("GCT", 'A'),
756 ("GCC", 'A'),
757 ("GCA", 'A'),
758 ("GCG", 'A'),
759 ("GAT", 'D'),
760 ("GAC", 'D'),
761 ("GAA", 'E'),
762 ("GAG", 'E'),
763 ("GGT", 'G'),
764 ("GGC", 'G'),
765 ("GGA", 'G'),
766 ("GGG", 'G'),
767 ];
768
769 for (codon, amino_acid) in &codons {
770 code.insert(codon.to_string(), *amino_acid);
771 }
772
773 code
774 }
775
776 pub fn find_orfs(sequence: &str, minlength: usize) -> Vec<Orf> {
778 let mut orfs = Vec::new();
779 let seq_upper = sequence.to_uppercase();
780
781 for frame in 0..3 {
783 let frame_seq = &seq_upper[frame..];
784 let mut start_pos = None;
785
786 for (pos, codon) in frame_seq.chars().collect::<Vec<_>>().chunks(3).enumerate() {
787 if codon.len() < 3 {
788 break;
789 }
790
791 let codon_str: String = codon.iter().collect();
792
793 if codon_str == "ATG" && start_pos.is_none() {
795 start_pos = Some(frame + pos * 3);
796 }
797
798 if matches!(codon_str.as_str(), "TAA" | "TAG" | "TGA") {
800 if let Some(start) = start_pos {
801 let length = frame + pos * 3 + 3 - start;
802 if length >= minlength {
803 let orf_seq = &sequence[start..start + length];
804 orfs.push(Orf {
805 start_pos: start,
806 end_pos: start + length,
807 frame: frame as i8,
808 sequence: orf_seq.to_string(),
809 protein: translate_dna(orf_seq),
810 });
811 }
812 start_pos = None;
813 }
814 }
815 }
816 }
817
818 orfs
819 }
820
821 #[derive(Debug, Clone, PartialEq)]
823 pub struct Orf {
824 pub start_pos: usize,
825 pub end_pos: usize,
826 pub frame: i8,
827 pub sequence: String,
828 pub protein: String,
829 }
830
831 impl Orf {
832 pub fn length(&self) -> usize {
833 self.sequence.len()
834 }
835 }
836
837 pub fn sequence_stats(records: &[FastaRecord]) -> SequenceStats {
839 if records.is_empty() {
840 return SequenceStats::default();
841 }
842
843 let lengths: Vec<usize> = records.iter().map(|r| r.len()).collect();
844 let totallength: usize = lengths.iter().sum();
845 let minlength = *lengths.iter().min().unwrap();
846 let maxlength = *lengths.iter().max().unwrap();
847 let meanlength = totallength as f64 / records.len() as f64;
848
849 let mut sortedlengths = lengths.clone();
851 sortedlengths.sort_by(|a, b| b.cmp(a)); let mut cumulative = 0;
853 let half_total = totallength / 2;
854 let mut n50 = 0;
855
856 for length in sortedlengths {
857 cumulative += length;
858 if cumulative >= half_total {
859 n50 = length;
860 break;
861 }
862 }
863
864 let total_gc: usize = records
866 .iter()
867 .map(|r| {
868 r.sequence()
869 .chars()
870 .filter(|&c| c == 'G' || c == 'C' || c == 'g' || c == 'c')
871 .count()
872 })
873 .sum();
874 let gc_content = total_gc as f64 / totallength as f64;
875
876 SequenceStats {
877 numsequences: records.len(),
878 totallength,
879 minlength,
880 maxlength,
881 meanlength,
882 n50,
883 gc_content,
884 }
885 }
886
887 #[derive(Debug, Clone, PartialEq)]
889 pub struct SequenceStats {
890 pub numsequences: usize,
891 pub totallength: usize,
892 pub minlength: usize,
893 pub maxlength: usize,
894 pub meanlength: f64,
895 pub n50: usize,
896 pub gc_content: f64,
897 }
898
899 impl Default for SequenceStats {
900 fn default() -> Self {
901 Self {
902 numsequences: 0,
903 totallength: 0,
904 minlength: 0,
905 maxlength: 0,
906 meanlength: 0.0,
907 n50: 0,
908 gc_content: 0.0,
909 }
910 }
911 }
912
913 pub fn quality_stats(records: &[FastqRecord], encoding: QualityEncoding) -> QualityStats {
915 if records.is_empty() {
916 return QualityStats::default();
917 }
918
919 let mut all_scores = Vec::new();
920 let mut position_scores: HashMap<usize, Vec<u8>> = HashMap::new();
921
922 for record in records {
923 let scores = record.quality_scores(encoding);
924 all_scores.extend_from_slice(&scores);
925
926 for (pos, &score) in scores.iter().enumerate() {
927 position_scores.entry(pos).or_default().push(score);
928 }
929 }
930
931 let mean_quality = all_scores.iter().sum::<u8>() as f64 / all_scores.len() as f64;
932 let min_quality = *all_scores.iter().min().unwrap_or(&0);
933 let max_quality = *all_scores.iter().max().unwrap_or(&0);
934
935 let mut per_position_mean = Vec::new();
937 let max_pos = position_scores.keys().max().unwrap_or(&0);
938
939 for pos in 0..=*max_pos {
940 if let Some(scores) = position_scores.get(&pos) {
941 let mean = scores.iter().sum::<u8>() as f64 / scores.len() as f64;
942 per_position_mean.push(mean);
943 } else {
944 per_position_mean.push(0.0);
945 }
946 }
947
948 QualityStats {
949 mean_quality,
950 min_quality,
951 max_quality,
952 per_position_mean,
953 }
954 }
955
956 #[derive(Debug, Clone, PartialEq)]
958 pub struct QualityStats {
959 pub mean_quality: f64,
960 pub min_quality: u8,
961 pub max_quality: u8,
962 pub per_position_mean: Vec<f64>,
963 }
964
965 impl Default for QualityStats {
966 fn default() -> Self {
967 Self {
968 mean_quality: 0.0,
969 min_quality: 0,
970 max_quality: 0,
971 per_position_mean: Vec::new(),
972 }
973 }
974 }
975}
976
977#[cfg(test)]
978mod tests {
979 use super::*;
980 use tempfile::NamedTempFile;
981
982 #[test]
983 fn test_fasta_read_write() -> Result<()> {
984 let temp_file = NamedTempFile::new().unwrap();
985 let path = temp_file.path();
986
987 {
989 let mut writer = FastaWriter::create(path)?;
990 writer.write_record(&FastaRecord::new(
991 "seq1".to_string(),
992 "ATCGATCGATCG".to_string(),
993 ))?;
994 writer.write_record(&FastaRecord::with_description(
995 "seq2".to_string(),
996 "test sequence".to_string(),
997 "GCTAGCTAGCTA".to_string(),
998 ))?;
999 writer.flush()?;
1000 }
1001
1002 {
1004 let mut reader = FastaReader::open(path)?;
1005 let records: Vec<_> = reader.records().collect::<Result<Vec<_>>>()?;
1006
1007 assert_eq!(records.len(), 2);
1008 assert_eq!(records[0].id(), "seq1");
1009 assert_eq!(records[0].sequence(), "ATCGATCGATCG");
1010 assert_eq!(records[1].id(), "seq2");
1011 assert_eq!(records[1].description(), Some("test sequence"));
1012 assert_eq!(records[1].sequence(), "GCTAGCTAGCTA");
1013 }
1014
1015 Ok(())
1016 }
1017
1018 #[test]
1019 fn test_fastq_read_write() -> Result<()> {
1020 let temp_file = NamedTempFile::new().unwrap();
1021 let path = temp_file.path();
1022
1023 {
1025 let mut writer = FastqWriter::create(path)?;
1026 writer.write_record(&FastqRecord::new(
1027 "read1".to_string(),
1028 "ATCG".to_string(),
1029 "IIII".to_string(),
1030 )?)?;
1031 writer.write_record_with_scores("read2", "GCTA", &[30, 35, 40, 35])?;
1032 writer.flush()?;
1033 }
1034
1035 {
1037 let mut reader = FastqReader::open(path)?;
1038 let records: Vec<_> = reader.records().collect::<Result<Vec<_>>>()?;
1039
1040 assert_eq!(records.len(), 2);
1041 assert_eq!(records[0].id(), "read1");
1042 assert_eq!(records[0].sequence(), "ATCG");
1043 assert_eq!(records[0].quality(), "IIII");
1044
1045 assert_eq!(records[1].id(), "read2");
1046 assert_eq!(records[1].sequence(), "GCTA");
1047 let scores = records[1].quality_scores(QualityEncoding::Sanger);
1048 assert_eq!(scores, vec![30, 35, 40, 35]);
1049 }
1050
1051 Ok(())
1052 }
1053
1054 #[test]
1055 fn testsequence_counting() -> Result<()> {
1056 let fasta_file = NamedTempFile::new().unwrap();
1057 let fastq_file = NamedTempFile::new().unwrap();
1058
1059 {
1061 let mut writer = FastaWriter::create(fasta_file.path())?;
1062 for i in 0..5 {
1063 writer.write_record(&FastaRecord::new(format!("seq{}", i), "ATCG".to_string()))?;
1064 }
1065 writer.flush()?;
1066 }
1067
1068 {
1070 let mut writer = FastqWriter::create(fastq_file.path())?;
1071 for i in 0..3 {
1072 writer.write_record(&FastqRecord::new(
1073 format!("read{}", i),
1074 "ATCG".to_string(),
1075 "IIII".to_string(),
1076 )?)?;
1077 }
1078 writer.flush()?;
1079 }
1080
1081 assert_eq!(count_fastasequences(fasta_file.path())?, 5);
1082 assert_eq!(count_fastqsequences(fastq_file.path())?, 3);
1083
1084 Ok(())
1085 }
1086}