1use std::{io, io::ErrorKind};
2
3use bincode::{Decode, Encode};
4
5use crate::Nucleotide::*;
6pub use crate::{
7 amino_acids::{
8 AaIdent, AminoAcid, AminoAcidGeneral, AminoAcidProtenationVariant, CodingResult,
9 },
10 element::{AtomTypeInRes, Element},
11 nucleotide::{Nucleotide, NucleotideGeneral},
12 restriction_enzyme::RestrictionEnzyme,
13};
14
15pub mod amino_acids;
16pub mod element;
17pub mod ligation;
18pub mod nucleotide;
19pub mod re_lib;
20pub mod restriction_enzyme;
21
22pub type Seq = Vec<Nucleotide>;
24
25pub struct IndexError {}
26
27pub fn seq_complement(seq: &[Nucleotide]) -> Seq {
29 let mut result = seq.to_vec();
30 result.reverse();
31
32 for nt in &mut result {
33 *nt = nt.complement();
34 }
35
36 result
37}
38
39pub fn seq_from_str(str: &str) -> Seq {
41 let mut result = Vec::new();
42
43 for char in str.to_lowercase().chars() {
44 match char {
45 'a' => result.push(A),
46 't' => result.push(T),
47 'c' => result.push(C),
48 'g' => result.push(G),
49 _ => (),
50 };
51 }
52
53 result
54}
55
56pub fn seq_aa_from_str(str: &str) -> Vec<AminoAcid> {
58 let mut result = Vec::new();
59
60 for char in str.chars() {
61 let letter = char.to_string(); if let Ok(aa) = letter.parse::<AminoAcid>() {
63 result.push(aa);
64 }
65 }
66
67 result
68}
69
70pub fn seq_to_str_lower(seq: &[Nucleotide]) -> String {
72 let mut result = String::new();
73
74 for nt in seq {
75 result.push_str(&nt.to_str_lower());
76 }
77
78 result
79}
80
81pub fn seq_to_str_upper(seq: &[Nucleotide]) -> String {
83 let mut result = String::new();
84
85 for nt in seq {
86 result.push_str(&nt.to_str_upper());
87 }
88
89 result
90}
91
92pub fn seq_aa_to_str(seq: &[AminoAcid]) -> String {
94 let mut result = String::new();
95
96 for aa in seq {
97 result.push_str(&aa.to_str(AaIdent::OneLetter));
98 }
99
100 result
101}
102
103pub fn seq_to_u8_upper(seq: &[Nucleotide]) -> Vec<u8> {
105 seq.iter().map(|nt| nt.to_u8_upper()).collect()
106}
107
108pub fn seq_to_u8_lower(seq: &[Nucleotide]) -> Vec<u8> {
110 seq.iter().map(|nt| nt.to_u8_lower()).collect()
111}
112
113pub fn seq_aa_to_u8_upper(seq: &[AminoAcid]) -> Vec<u8> {
115 seq.iter().map(|aa| aa.to_u8_upper()).collect()
116}
117
118pub fn seq_aa_to_u8_lower(seq: &[AminoAcid]) -> Vec<u8> {
120 seq.iter().map(|aa| aa.to_u8_lower()).collect()
121}
122
123pub fn seq_weight(seq: &[Nucleotide]) -> f32 {
125 let mut result = 0.;
126
127 for nt in seq {
128 result += nt.weight();
129 }
130
131 result -= 61.96;
132
133 result
134}
135
136pub fn calc_gc(seq: &[Nucleotide]) -> f32 {
138 let num_gc = seq.iter().filter(|&&nt| nt == C || nt == G).count();
139 num_gc as f32 / seq.len() as f32
140}
141
142pub fn serialize_seq_bin(seq: &[Nucleotide]) -> Vec<u8> {
149 let mut result = Vec::new();
150 result.extend(&(seq.len() as u32).to_be_bytes());
151
152 for i in 0..seq.len() / 4 + 1 {
153 let mut val = 0;
154 for j in 0..4 {
155 let ind = i * 4 + j;
156 if ind + 1 > seq.len() {
157 break;
158 }
159 let nt = seq[ind];
160 val |= (nt as u8) << (j * 2);
161 }
162 result.push(val);
163 }
164 result
165}
166
167pub fn deser_seq_bin(data: &[u8]) -> io::Result<Seq> {
171 let mut result = Vec::new();
172
173 if data.len() < 4 {
174 return Err(io::Error::new(
175 ErrorKind::InvalidData,
176 "Bin nucleotide sequence is too short.",
177 ));
178 }
179
180 let seq_len = u32::from_be_bytes(data[0..4].try_into().unwrap()) as usize;
181
182 for byte in &data[4..] {
183 for i in 0..4 {
184 if result.len() >= seq_len {
186 break;
187 }
188
189 let bits = (byte >> (2 * i)) & 0b11;
190 result.push(Nucleotide::try_from(bits).map_err(|_| {
191 io::Error::new(
192 ErrorKind::InvalidData,
193 format!("Invalid NT serialization: {}, {}", byte, bits),
194 )
195 })?);
196 }
197 }
198
199 Ok(result)
200}
201
202#[derive(Clone, Copy, PartialEq, Debug, Encode, Decode)]
203pub enum SeqTopology {
204 Linear,
205 Circular,
206}
207
208impl Default for SeqTopology {
209 fn default() -> Self {
210 Self::Circular
211 }
212}
213
214pub fn insert_into_seq(
217 seq_vector: &mut Seq,
218 insert: &[Nucleotide],
219 insert_loc: usize,
220) -> Result<(), IndexError> {
221 if insert_loc == 0 || insert_loc > seq_vector.len() {
222 eprintln!("Error: Insert location out of bounds: {insert_loc}");
223 return Err(IndexError {});
224 }
225
226 let insert_i = insert_loc - 1; seq_vector.splice(insert_i..insert_i, insert.iter().cloned());
228
229 Ok(())
230}