1use std::{io, io::ErrorKind};
2
3use bincode::{Decode, Encode};
4
5use crate::Nucleotide::*;
6pub use crate::{
7 amino_acids::{AaIdent, AminoAcid, CodingResult},
8 element::{AtomTypeInRes, Element},
9 nucleotide::{Nucleotide, NucleotideGeneral},
10 restriction_enzyme::RestrictionEnzyme,
11};
12
13pub mod amino_acids;
14pub mod element;
15pub mod ligation;
16pub mod nucleotide;
17pub mod re_lib;
18pub mod restriction_enzyme;
19
20pub type Seq = Vec<Nucleotide>;
22
23pub struct IndexError {}
24
25pub fn seq_complement(seq: &[Nucleotide]) -> Seq {
27 let mut result = seq.to_vec();
28 result.reverse();
29
30 for nt in &mut result {
31 *nt = nt.complement();
32 }
33
34 result
35}
36
37pub fn seq_from_str(str: &str) -> Seq {
39 let mut result = Vec::new();
40
41 for char in str.to_lowercase().chars() {
42 match char {
43 'a' => result.push(A),
44 't' => result.push(T),
45 'c' => result.push(C),
46 'g' => result.push(G),
47 _ => (),
48 };
49 }
50
51 result
52}
53
54pub fn seq_aa_from_str(str: &str) -> Vec<AminoAcid> {
56 let mut result = Vec::new();
57
58 for char in str.chars() {
59 let letter = char.to_string(); if let Ok(aa) = letter.parse::<AminoAcid>() {
61 result.push(aa);
62 }
63 }
64
65 result
66}
67
68pub fn seq_to_str_lower(seq: &[Nucleotide]) -> String {
70 let mut result = String::new();
71
72 for nt in seq {
73 result.push_str(&nt.to_str_lower());
74 }
75
76 result
77}
78
79pub fn seq_to_str_upper(seq: &[Nucleotide]) -> String {
81 let mut result = String::new();
82
83 for nt in seq {
84 result.push_str(&nt.to_str_upper());
85 }
86
87 result
88}
89
90pub fn seq_aa_to_str(seq: &[AminoAcid]) -> String {
92 let mut result = String::new();
93
94 for aa in seq {
95 result.push_str(&aa.to_str(AaIdent::OneLetter));
96 }
97
98 result
99}
100
101pub fn seq_to_u8_upper(seq: &[Nucleotide]) -> Vec<u8> {
103 seq.iter().map(|nt| nt.to_u8_upper()).collect()
104}
105
106pub fn seq_to_u8_lower(seq: &[Nucleotide]) -> Vec<u8> {
108 seq.iter().map(|nt| nt.to_u8_lower()).collect()
109}
110
111pub fn seq_aa_to_u8_upper(seq: &[AminoAcid]) -> Vec<u8> {
113 seq.iter().map(|aa| aa.to_u8_upper()).collect()
114}
115
116pub fn seq_aa_to_u8_lower(seq: &[AminoAcid]) -> Vec<u8> {
118 seq.iter().map(|aa| aa.to_u8_lower()).collect()
119}
120
121pub fn seq_weight(seq: &[Nucleotide]) -> f32 {
123 let mut result = 0.;
124
125 for nt in seq {
126 result += nt.weight();
127 }
128
129 result -= 61.96;
130
131 result
132}
133
134pub fn calc_gc(seq: &[Nucleotide]) -> f32 {
136 let num_gc = seq.iter().filter(|&&nt| nt == C || nt == G).count();
137 num_gc as f32 / seq.len() as f32
138}
139
140pub fn serialize_seq_bin(seq: &[Nucleotide]) -> Vec<u8> {
147 let mut result = Vec::new();
148 result.extend(&(seq.len() as u32).to_be_bytes());
149
150 for i in 0..seq.len() / 4 + 1 {
151 let mut val = 0;
152 for j in 0..4 {
153 let ind = i * 4 + j;
154 if ind + 1 > seq.len() {
155 break;
156 }
157 let nt = seq[ind];
158 val |= (nt as u8) << (j * 2);
159 }
160 result.push(val);
161 }
162 result
163}
164
165pub fn deser_seq_bin(data: &[u8]) -> io::Result<Seq> {
169 let mut result = Vec::new();
170
171 if data.len() < 4 {
172 return Err(io::Error::new(
173 ErrorKind::InvalidData,
174 "Bin nucleotide sequence is too short.",
175 ));
176 }
177
178 let seq_len = u32::from_be_bytes(data[0..4].try_into().unwrap()) as usize;
179
180 for byte in &data[4..] {
181 for i in 0..4 {
182 if result.len() >= seq_len {
184 break;
185 }
186
187 let bits = (byte >> (2 * i)) & 0b11;
188 result.push(Nucleotide::try_from(bits).map_err(|_| {
189 io::Error::new(
190 ErrorKind::InvalidData,
191 format!("Invalid NT serialization: {}, {}", byte, bits),
192 )
193 })?);
194 }
195 }
196
197 Ok(result)
198}
199
200#[derive(Clone, Copy, PartialEq, Debug, Encode, Decode)]
201pub enum SeqTopology {
202 Linear,
203 Circular,
204}
205
206impl Default for SeqTopology {
207 fn default() -> Self {
208 Self::Circular
209 }
210}
211
212pub fn insert_into_seq(
215 seq_vector: &mut Seq,
216 insert: &[Nucleotide],
217 insert_loc: usize,
218) -> Result<(), IndexError> {
219 if insert_loc == 0 || insert_loc > seq_vector.len() {
220 eprintln!("Error: Insert location out of bounds: {insert_loc}");
221 return Err(IndexError {});
222 }
223
224 let insert_i = insert_loc - 1; seq_vector.splice(insert_i..insert_i, insert.iter().cloned());
226
227 Ok(())
228}