Skip to main content

scirs2_core/bioinformatics/
sequence.rs

1//! Sequence analysis for bioinformatics
2//!
3//! Provides types and functions for working with biological sequences:
4//! DNA, RNA, and protein sequences with validation, transformation,
5//! and analysis operations.
6
7use std::collections::HashMap;
8use std::fmt;
9
10use crate::error::{CoreError, CoreResult};
11
12/// Represents a single amino acid using the standard one-letter code.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14pub enum AminoAcid {
15    /// Alanine (A)
16    Ala,
17    /// Arginine (R)
18    Arg,
19    /// Asparagine (N)
20    Asn,
21    /// Aspartic acid (D)
22    Asp,
23    /// Cysteine (C)
24    Cys,
25    /// Glutamine (Q)
26    Gln,
27    /// Glutamic acid (E)
28    Glu,
29    /// Glycine (G)
30    Gly,
31    /// Histidine (H)
32    His,
33    /// Isoleucine (I)
34    Ile,
35    /// Leucine (L)
36    Leu,
37    /// Lysine (K)
38    Lys,
39    /// Methionine (M) — also start codon
40    Met,
41    /// Phenylalanine (F)
42    Phe,
43    /// Proline (P)
44    Pro,
45    /// Serine (S)
46    Ser,
47    /// Threonine (T)
48    Thr,
49    /// Tryptophan (W)
50    Trp,
51    /// Tyrosine (Y)
52    Tyr,
53    /// Valine (V)
54    Val,
55    /// Stop codon (*)
56    Stop,
57}
58
59impl AminoAcid {
60    /// Returns the one-letter code for this amino acid.
61    ///
62    /// Stop codons are represented as `*`.
63    #[must_use]
64    pub fn one_letter(&self) -> char {
65        match self {
66            AminoAcid::Ala => 'A',
67            AminoAcid::Arg => 'R',
68            AminoAcid::Asn => 'N',
69            AminoAcid::Asp => 'D',
70            AminoAcid::Cys => 'C',
71            AminoAcid::Gln => 'Q',
72            AminoAcid::Glu => 'E',
73            AminoAcid::Gly => 'G',
74            AminoAcid::His => 'H',
75            AminoAcid::Ile => 'I',
76            AminoAcid::Leu => 'L',
77            AminoAcid::Lys => 'K',
78            AminoAcid::Met => 'M',
79            AminoAcid::Phe => 'F',
80            AminoAcid::Pro => 'P',
81            AminoAcid::Ser => 'S',
82            AminoAcid::Thr => 'T',
83            AminoAcid::Trp => 'W',
84            AminoAcid::Tyr => 'Y',
85            AminoAcid::Val => 'V',
86            AminoAcid::Stop => '*',
87        }
88    }
89
90    /// Returns the three-letter code for this amino acid.
91    #[must_use]
92    pub fn three_letter(&self) -> &'static str {
93        match self {
94            AminoAcid::Ala => "Ala",
95            AminoAcid::Arg => "Arg",
96            AminoAcid::Asn => "Asn",
97            AminoAcid::Asp => "Asp",
98            AminoAcid::Cys => "Cys",
99            AminoAcid::Gln => "Gln",
100            AminoAcid::Glu => "Glu",
101            AminoAcid::Gly => "Gly",
102            AminoAcid::His => "His",
103            AminoAcid::Ile => "Ile",
104            AminoAcid::Leu => "Leu",
105            AminoAcid::Lys => "Lys",
106            AminoAcid::Met => "Met",
107            AminoAcid::Phe => "Phe",
108            AminoAcid::Pro => "Pro",
109            AminoAcid::Ser => "Ser",
110            AminoAcid::Thr => "Thr",
111            AminoAcid::Trp => "Trp",
112            AminoAcid::Tyr => "Tyr",
113            AminoAcid::Val => "Val",
114            AminoAcid::Stop => "Stop",
115        }
116    }
117
118    /// Returns `true` if this amino acid is a stop codon.
119    #[must_use]
120    pub fn is_stop(&self) -> bool {
121        matches!(self, AminoAcid::Stop)
122    }
123}
124
125impl fmt::Display for AminoAcid {
126    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127        write!(f, "{}", self.one_letter())
128    }
129}
130
131/// The type of nucleotide sequence.
132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
133pub enum SequenceType {
134    /// DNA: A, T, G, C (and N for ambiguous)
135    Dna,
136    /// RNA: A, U, G, C (and N for ambiguous)
137    Rna,
138}
139
140/// A validated nucleotide sequence (DNA or RNA).
141///
142/// The sequence is stored as uppercase ASCII bytes. Valid bases are:
143/// - DNA: A, T, G, C, N (ambiguous)
144/// - RNA: A, U, G, C, N (ambiguous)
145///
146/// # Examples
147///
148/// ```rust
149/// use scirs2_core::bioinformatics::sequence::{NucleotideSequence, SequenceType};
150///
151/// let dna = NucleotideSequence::new(b"ATGCATGC", SequenceType::Dna).expect("should succeed");
152/// assert_eq!(dna.len(), 8);
153/// assert_eq!(dna.gc_content(), 0.5);
154/// ```
155#[derive(Debug, Clone, PartialEq, Eq)]
156pub struct NucleotideSequence {
157    data: Vec<u8>,
158    seq_type: SequenceType,
159}
160
161impl NucleotideSequence {
162    /// Creates a new validated nucleotide sequence.
163    ///
164    /// Accepts both upper and lowercase input; internally stores uppercase.
165    ///
166    /// # Errors
167    ///
168    /// Returns `CoreError::ValueError` if the sequence contains invalid characters
169    /// for the given sequence type.
170    pub fn new(seq: &[u8], seq_type: SequenceType) -> CoreResult<Self> {
171        let data: Vec<u8> = seq.iter().map(|&b| b.to_ascii_uppercase()).collect();
172        for (i, &base) in data.iter().enumerate() {
173            if !is_valid_base(base, seq_type) {
174                return Err(CoreError::ValueError(crate::error_context!(format!(
175                    "Invalid {} base '{}' at position {}",
176                    match seq_type {
177                        SequenceType::Dna => "DNA",
178                        SequenceType::Rna => "RNA",
179                    },
180                    base as char,
181                    i
182                ))));
183            }
184        }
185        Ok(Self { data, seq_type })
186    }
187
188    /// Returns the underlying byte slice.
189    #[must_use]
190    pub fn as_bytes(&self) -> &[u8] {
191        &self.data
192    }
193
194    /// Returns the length of the sequence.
195    #[must_use]
196    pub fn len(&self) -> usize {
197        self.data.len()
198    }
199
200    /// Returns `true` if the sequence is empty.
201    #[must_use]
202    pub fn is_empty(&self) -> bool {
203        self.data.is_empty()
204    }
205
206    /// Returns the sequence type (DNA or RNA).
207    #[must_use]
208    pub fn seq_type(&self) -> SequenceType {
209        self.seq_type
210    }
211
212    /// Computes the GC content (fraction of G+C bases) of this sequence.
213    ///
214    /// Returns 0.0 if the sequence is empty.
215    #[must_use]
216    pub fn gc_content(&self) -> f64 {
217        gc_content(&self.data)
218    }
219
220    /// Returns the reverse complement of this DNA sequence.
221    ///
222    /// # Errors
223    ///
224    /// Returns `CoreError::ValueError` if called on an RNA sequence.
225    pub fn reverse_complement(&self) -> CoreResult<NucleotideSequence> {
226        if self.seq_type != SequenceType::Dna {
227            return Err(CoreError::ValueError(crate::error_context!(
228                "reverse_complement is only defined for DNA sequences"
229            )));
230        }
231        let rc = reverse_complement(&self.data);
232        Ok(NucleotideSequence {
233            data: rc,
234            seq_type: SequenceType::Dna,
235        })
236    }
237
238    /// Translates this DNA sequence into a protein sequence.
239    ///
240    /// Reads codons starting from position 0.  Translation stops at the first
241    /// stop codon or when fewer than 3 bases remain.
242    ///
243    /// # Errors
244    ///
245    /// Returns `CoreError::ValueError` if this is an RNA sequence.
246    pub fn translate(&self) -> CoreResult<Vec<AminoAcid>> {
247        if self.seq_type != SequenceType::Dna {
248            return Err(CoreError::ValueError(crate::error_context!(
249                "translate is only defined for DNA sequences"
250            )));
251        }
252        translate(&self.data)
253    }
254
255    /// Counts all k-mers (substrings of length `k`) in the sequence.
256    ///
257    /// # Errors
258    ///
259    /// Returns `CoreError::ValueError` if `k` is 0 or exceeds the sequence length.
260    pub fn kmer_count(&self, k: usize) -> CoreResult<HashMap<Vec<u8>, usize>> {
261        kmer_count(&self.data, k)
262    }
263}
264
265impl fmt::Display for NucleotideSequence {
266    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
267        // SAFETY: data contains only valid ASCII because new() validated it.
268        let s = std::str::from_utf8(&self.data).map_err(|_| fmt::Error)?;
269        write!(f, "{s}")
270    }
271}
272
273// ─── Free-standing functions ──────────────────────────────────────────────────
274
275/// Returns `true` if `base` is a valid nucleotide for the given sequence type.
276///
277/// Accepts uppercase letters only (callers are responsible for normalisation).
278#[must_use]
279pub fn is_valid_base(base: u8, seq_type: SequenceType) -> bool {
280    match seq_type {
281        SequenceType::Dna => matches!(base, b'A' | b'T' | b'G' | b'C' | b'N'),
282        SequenceType::Rna => matches!(base, b'A' | b'U' | b'G' | b'C' | b'N'),
283    }
284}
285
286/// Returns the Watson-Crick complement of a DNA base.
287///
288/// Non-standard bases (including `N`) are left unchanged.
289#[must_use]
290pub fn complement_base(base: u8) -> u8 {
291    match base.to_ascii_uppercase() {
292        b'A' => b'T',
293        b'T' => b'A',
294        b'G' => b'C',
295        b'C' => b'G',
296        other => other,
297    }
298}
299
300/// Returns the complement of a DNA sequence.
301///
302/// Each base is complemented in-place without reversing.
303///
304/// # Examples
305///
306/// ```rust
307/// use scirs2_core::bioinformatics::sequence::complement;
308///
309/// let seq = b"ATGC";
310/// let comp = complement(seq);
311/// assert_eq!(comp, b"TACG");
312/// ```
313#[must_use]
314pub fn complement(seq: &[u8]) -> Vec<u8> {
315    seq.iter().map(|&b| complement_base(b)).collect()
316}
317
318/// Returns the reverse complement of a DNA sequence.
319///
320/// Equivalent to `complement(seq)` reversed.
321///
322/// # Examples
323///
324/// ```rust
325/// use scirs2_core::bioinformatics::sequence::reverse_complement;
326///
327/// let seq = b"ATGCTT";
328/// let rc = reverse_complement(seq);
329/// assert_eq!(rc, b"AAGCAT");
330/// ```
331#[must_use]
332pub fn reverse_complement(seq: &[u8]) -> Vec<u8> {
333    seq.iter().rev().map(|&b| complement_base(b)).collect()
334}
335
336/// Returns the GC content (fraction of G+C bases) in `seq`.
337///
338/// Returns `0.0` for empty sequences.
339///
340/// # Examples
341///
342/// ```rust
343/// use scirs2_core::bioinformatics::sequence::gc_content;
344///
345/// let seq = b"ATGCATGC";
346/// let gc = gc_content(seq);
347/// assert!((gc - 0.5).abs() < 1e-10);
348/// ```
349#[must_use]
350pub fn gc_content(seq: &[u8]) -> f64 {
351    if seq.is_empty() {
352        return 0.0;
353    }
354    let gc = seq
355        .iter()
356        .filter(|&&b| {
357            let ub = b.to_ascii_uppercase();
358            ub == b'G' || ub == b'C'
359        })
360        .count();
361    gc as f64 / seq.len() as f64
362}
363
364/// Translates a DNA sequence (5'→3') into a protein sequence using the
365/// standard genetic code (NCBI translation table 1).
366///
367/// Translation begins at position 0 and reads complete codons.  The resulting
368/// vector includes `AminoAcid::Stop` for stop codons.  After the first stop
369/// codon no further codons are translated.
370///
371/// # Errors
372///
373/// Returns `CoreError::ValueError` for unknown codons (e.g. sequences
374/// containing `N`).
375///
376/// # Examples
377///
378/// ```rust
379/// use scirs2_core::bioinformatics::sequence::{translate, AminoAcid};
380///
381/// let protein = translate(b"ATGAAATAA").expect("should succeed");
382/// assert_eq!(protein, vec![AminoAcid::Met, AminoAcid::Lys, AminoAcid::Stop]);
383/// ```
384pub fn translate(dna: &[u8]) -> CoreResult<Vec<AminoAcid>> {
385    let codon_table = build_codon_table();
386    let mut protein = Vec::new();
387
388    let upper: Vec<u8> = dna.iter().map(|&b| b.to_ascii_uppercase()).collect();
389
390    let mut i = 0;
391    while i + 3 <= upper.len() {
392        let codon: [u8; 3] = [upper[i], upper[i + 1], upper[i + 2]];
393        let aa = codon_table.get(&codon).copied().ok_or_else(|| {
394            CoreError::ValueError(crate::error_context!(format!(
395                "Unknown codon: {}{}{}",
396                codon[0] as char, codon[1] as char, codon[2] as char
397            )))
398        })?;
399        protein.push(aa);
400        if aa == AminoAcid::Stop {
401            break;
402        }
403        i += 3;
404    }
405    Ok(protein)
406}
407
408/// Counts all k-mers (substrings of length `k`) in `seq`.
409///
410/// # Errors
411///
412/// Returns `CoreError::ValueError` if `k` is zero or greater than the sequence length.
413///
414/// # Examples
415///
416/// ```rust
417/// use scirs2_core::bioinformatics::sequence::kmer_count;
418///
419/// let counts = kmer_count(b"ATAT", 2).expect("should succeed");
420/// assert_eq!(*counts.get(b"AT".as_ref()).expect("should succeed"), 2);
421/// assert_eq!(*counts.get(b"TA".as_ref()).expect("should succeed"), 1);
422/// ```
423pub fn kmer_count(seq: &[u8], k: usize) -> CoreResult<HashMap<Vec<u8>, usize>> {
424    if k == 0 {
425        return Err(CoreError::ValueError(crate::error_context!(
426            "k must be at least 1"
427        )));
428    }
429    if k > seq.len() {
430        return Err(CoreError::ValueError(crate::error_context!(format!(
431            "k ({k}) must not exceed sequence length ({})",
432            seq.len()
433        ))));
434    }
435
436    let upper: Vec<u8> = seq.iter().map(|&b| b.to_ascii_uppercase()).collect();
437    let mut counts: HashMap<Vec<u8>, usize> = HashMap::new();
438
439    for window in upper.windows(k) {
440        *counts.entry(window.to_vec()).or_insert(0) += 1;
441    }
442    Ok(counts)
443}
444
445// ─── Standard codon table ─────────────────────────────────────────────────────
446
447/// Builds the standard genetic code (NCBI table 1) mapping `[u8; 3]` codons
448/// to `AminoAcid` values.
449#[must_use]
450pub fn build_codon_table() -> HashMap<[u8; 3], AminoAcid> {
451    let mut t = HashMap::new();
452
453    // TTx
454    t.insert(*b"TTT", AminoAcid::Phe);
455    t.insert(*b"TTC", AminoAcid::Phe);
456    t.insert(*b"TTA", AminoAcid::Leu);
457    t.insert(*b"TTG", AminoAcid::Leu);
458
459    // TCx
460    t.insert(*b"TCT", AminoAcid::Ser);
461    t.insert(*b"TCC", AminoAcid::Ser);
462    t.insert(*b"TCA", AminoAcid::Ser);
463    t.insert(*b"TCG", AminoAcid::Ser);
464
465    // TAx
466    t.insert(*b"TAT", AminoAcid::Tyr);
467    t.insert(*b"TAC", AminoAcid::Tyr);
468    t.insert(*b"TAA", AminoAcid::Stop);
469    t.insert(*b"TAG", AminoAcid::Stop);
470
471    // TGx
472    t.insert(*b"TGT", AminoAcid::Cys);
473    t.insert(*b"TGC", AminoAcid::Cys);
474    t.insert(*b"TGA", AminoAcid::Stop);
475    t.insert(*b"TGG", AminoAcid::Trp);
476
477    // CTx
478    t.insert(*b"CTT", AminoAcid::Leu);
479    t.insert(*b"CTC", AminoAcid::Leu);
480    t.insert(*b"CTA", AminoAcid::Leu);
481    t.insert(*b"CTG", AminoAcid::Leu);
482
483    // CCx
484    t.insert(*b"CCT", AminoAcid::Pro);
485    t.insert(*b"CCC", AminoAcid::Pro);
486    t.insert(*b"CCA", AminoAcid::Pro);
487    t.insert(*b"CCG", AminoAcid::Pro);
488
489    // CAx
490    t.insert(*b"CAT", AminoAcid::His);
491    t.insert(*b"CAC", AminoAcid::His);
492    t.insert(*b"CAA", AminoAcid::Gln);
493    t.insert(*b"CAG", AminoAcid::Gln);
494
495    // CGx
496    t.insert(*b"CGT", AminoAcid::Arg);
497    t.insert(*b"CGC", AminoAcid::Arg);
498    t.insert(*b"CGA", AminoAcid::Arg);
499    t.insert(*b"CGG", AminoAcid::Arg);
500
501    // ATx
502    t.insert(*b"ATT", AminoAcid::Ile);
503    t.insert(*b"ATC", AminoAcid::Ile);
504    t.insert(*b"ATA", AminoAcid::Ile);
505    t.insert(*b"ATG", AminoAcid::Met);
506
507    // ACx
508    t.insert(*b"ACT", AminoAcid::Thr);
509    t.insert(*b"ACC", AminoAcid::Thr);
510    t.insert(*b"ACA", AminoAcid::Thr);
511    t.insert(*b"ACG", AminoAcid::Thr);
512
513    // AAx
514    t.insert(*b"AAT", AminoAcid::Asn);
515    t.insert(*b"AAC", AminoAcid::Asn);
516    t.insert(*b"AAA", AminoAcid::Lys);
517    t.insert(*b"AAG", AminoAcid::Lys);
518
519    // AGx
520    t.insert(*b"AGT", AminoAcid::Ser);
521    t.insert(*b"AGC", AminoAcid::Ser);
522    t.insert(*b"AGA", AminoAcid::Arg);
523    t.insert(*b"AGG", AminoAcid::Arg);
524
525    // GTx
526    t.insert(*b"GTT", AminoAcid::Val);
527    t.insert(*b"GTC", AminoAcid::Val);
528    t.insert(*b"GTA", AminoAcid::Val);
529    t.insert(*b"GTG", AminoAcid::Val);
530
531    // GCx
532    t.insert(*b"GCT", AminoAcid::Ala);
533    t.insert(*b"GCC", AminoAcid::Ala);
534    t.insert(*b"GCA", AminoAcid::Ala);
535    t.insert(*b"GCG", AminoAcid::Ala);
536
537    // GAx
538    t.insert(*b"GAT", AminoAcid::Asp);
539    t.insert(*b"GAC", AminoAcid::Asp);
540    t.insert(*b"GAA", AminoAcid::Glu);
541    t.insert(*b"GAG", AminoAcid::Glu);
542
543    // GGx
544    t.insert(*b"GGT", AminoAcid::Gly);
545    t.insert(*b"GGC", AminoAcid::Gly);
546    t.insert(*b"GGA", AminoAcid::Gly);
547    t.insert(*b"GGG", AminoAcid::Gly);
548
549    t
550}
551
552#[cfg(test)]
553mod tests {
554    use super::*;
555
556    #[test]
557    fn test_nucleotide_sequence_valid_dna() {
558        let seq = NucleotideSequence::new(b"ATGCATGC", SequenceType::Dna);
559        assert!(seq.is_ok());
560        let s = seq.expect("sequence creation failed");
561        assert_eq!(s.len(), 8);
562        assert_eq!(s.seq_type(), SequenceType::Dna);
563    }
564
565    #[test]
566    fn test_nucleotide_sequence_invalid_dna() {
567        // U is not valid in DNA
568        let result = NucleotideSequence::new(b"AUGC", SequenceType::Dna);
569        assert!(result.is_err());
570    }
571
572    #[test]
573    fn test_nucleotide_sequence_valid_rna() {
574        let seq = NucleotideSequence::new(b"AUGCAUGC", SequenceType::Rna);
575        assert!(seq.is_ok());
576    }
577
578    #[test]
579    fn test_complement_basic() {
580        assert_eq!(complement(b"ATGC"), b"TACG");
581        assert_eq!(complement(b"AAAA"), b"TTTT");
582        assert_eq!(complement(b"CCCC"), b"GGGG");
583    }
584
585    #[test]
586    fn test_reverse_complement() {
587        // 5'-ATGCTT-3' → complement TACGAA → reverse AAGCAT
588        let rc = reverse_complement(b"ATGCTT");
589        assert_eq!(rc, b"AAGCAT");
590    }
591
592    #[test]
593    fn test_reverse_complement_palindrome() {
594        // 5'-GAATTC-3' is a palindrome (EcoRI site)
595        let rc = reverse_complement(b"GAATTC");
596        assert_eq!(rc, b"GAATTC");
597    }
598
599    #[test]
600    fn test_gc_content_half() {
601        let gc = gc_content(b"ATGCATGC");
602        assert!((gc - 0.5).abs() < 1e-10);
603    }
604
605    #[test]
606    fn test_gc_content_zero() {
607        let gc = gc_content(b"AAAA");
608        assert!((gc - 0.0).abs() < 1e-10);
609    }
610
611    #[test]
612    fn test_gc_content_one() {
613        let gc = gc_content(b"GCGC");
614        assert!((gc - 1.0).abs() < 1e-10);
615    }
616
617    #[test]
618    fn test_gc_content_empty() {
619        assert_eq!(gc_content(b""), 0.0);
620    }
621
622    #[test]
623    fn test_translate_met_stop() {
624        let protein = translate(b"ATGAAATAA").expect("translation failed");
625        assert_eq!(
626            protein,
627            vec![AminoAcid::Met, AminoAcid::Lys, AminoAcid::Stop]
628        );
629    }
630
631    #[test]
632    fn test_translate_stops_at_first_stop() {
633        // ATG = Met, TAA = Stop, AAA = Lys (should not appear)
634        let protein = translate(b"ATGTAAAAAA").expect("translation failed");
635        assert_eq!(protein, vec![AminoAcid::Met, AminoAcid::Stop]);
636    }
637
638    #[test]
639    fn test_translate_incomplete_codon_ignored() {
640        // 7 bases → 2 complete codons + 1 leftover base (ignored)
641        let protein = translate(b"ATGAAAT").expect("translation failed");
642        assert_eq!(protein, vec![AminoAcid::Met, AminoAcid::Lys]);
643    }
644
645    #[test]
646    fn test_kmer_count_basic() {
647        let counts = kmer_count(b"ATAT", 2).expect("kmer_count failed");
648        assert_eq!(*counts.get(b"AT".as_ref()).expect("AT not found"), 2);
649        assert_eq!(*counts.get(b"TA".as_ref()).expect("TA not found"), 1);
650    }
651
652    #[test]
653    fn test_kmer_count_k_equals_length() {
654        let counts = kmer_count(b"ATGC", 4).expect("kmer_count failed");
655        assert_eq!(counts.len(), 1);
656        assert_eq!(*counts.get(b"ATGC".as_ref()).expect("ATGC not found"), 1);
657    }
658
659    #[test]
660    fn test_kmer_count_k_zero_errors() {
661        let result = kmer_count(b"ATGC", 0);
662        assert!(result.is_err());
663    }
664
665    #[test]
666    fn test_kmer_count_k_too_large_errors() {
667        let result = kmer_count(b"AT", 5);
668        assert!(result.is_err());
669    }
670
671    #[test]
672    fn test_amino_acid_one_letter() {
673        assert_eq!(AminoAcid::Met.one_letter(), 'M');
674        assert_eq!(AminoAcid::Stop.one_letter(), '*');
675    }
676
677    #[test]
678    fn test_codon_table_size() {
679        let table = build_codon_table();
680        // 64 codons in standard code
681        assert_eq!(table.len(), 64);
682    }
683
684    #[test]
685    fn test_lowercase_input_normalised() {
686        // Lower-case input should be accepted and normalised
687        let seq = NucleotideSequence::new(b"atgc", SequenceType::Dna).expect("should succeed");
688        assert_eq!(seq.as_bytes(), b"ATGC");
689    }
690}