use bincode::{Decode, Encode};
use bio_apis::rcsb::PdbData;
use na_seq::{
Nucleotide,
amino_acids::{AminoAcid, CodingResult},
};
use crate::{
misc_types::{Feature, FeatureType},
reading_frame::{ReadingFrame, ReadingFrameMatch, find_orf_matches},
state::State,
};
pub const WATER_WEIGHT: f32 = 18.015;
pub const HYDROPATHY_WINDOW_SIZE: usize = 9;
#[derive(Encode, Decode)]
pub struct Protein {
pub feature: Feature, pub reading_frame_match: ReadingFrameMatch,
pub aa_seq: Vec<AminoAcid>,
pub aa_seq_precoding: Vec<AminoAcid>,
pub aa_seq_postcoding: Vec<AminoAcid>,
pub weight: f32,
pub weight_with_prepost: f32,
pub hydropath_data: Vec<(usize, f32)>,
pub pdb_data: Vec<PdbData>,
pub show_hydropath: bool,
}
pub fn proteins_from_seq(
seq: &[Nucleotide],
features: &[Feature],
cr_orf_matches: &[(usize, ReadingFrameMatch)],
) -> Vec<Protein> {
let mut result = Vec::new();
for (i, feature) in features.iter().enumerate() {
if feature.feature_type != FeatureType::CodingRegion {
continue;
}
for (j, om) in cr_orf_matches {
if *j == i {
if let Some(seq_orf_match_dna) = om.range.index_seq(seq) {
let len = seq_orf_match_dna.len();
let mut aa_seq = Vec::new();
let mut aa_seq_precoding = Vec::new();
let mut aa_seq_postcoding = Vec::new();
for i_ in 0..len / 3 {
let i = i_ * 3; let i_actual = i + om.range.start;
let nts = &seq_orf_match_dna[i..i + 3];
if let CodingResult::AminoAcid(aa) =
AminoAcid::from_codons(nts.try_into().unwrap())
{
if i_actual < feature.range.start {
aa_seq_precoding.push(aa);
} else if i_actual > feature.range.end {
aa_seq_postcoding.push(aa);
} else {
aa_seq.push(aa);
}
}
}
let weight = protein_weight(&aa_seq);
let weight_with_prepost = weight
+ protein_weight(&aa_seq_precoding)
+ protein_weight(&aa_seq_postcoding);
let hydropath_data = hydropathy_doolittle(&aa_seq, HYDROPATHY_WINDOW_SIZE);
result.push(Protein {
feature: feature.clone(),
reading_frame_match: om.clone(),
aa_seq,
aa_seq_precoding,
aa_seq_postcoding,
hydropath_data,
weight,
weight_with_prepost,
show_hydropath: true,
pdb_data: Vec::new(),
})
}
}
}
}
result
}
pub fn hydropathy_doolittle(seq: &[AminoAcid], window_size: usize) -> Vec<(usize, f32)> {
if window_size % 2 == 0 {
eprintln!("Window size for KD must be odd");
return Vec::new();
}
let mut result = Vec::new();
let win_div_2 = window_size / 2;
if win_div_2 - 1 >= seq.len() {
eprintln!("Error with window size for hydropathy");
return result;
}
for i in win_div_2..seq.len() - win_div_2 - 1 {
let aas = &seq[i - win_div_2..i + win_div_2 + 1];
let mut val_this_window = 0.;
for aa in aas {
val_this_window += aa.hydropathicity();
}
result.push((i, val_this_window / window_size as f32));
}
result
}
pub fn tango_aggregation(seq: &[AminoAcid]) {}
pub fn aggrescan(seq: &[AminoAcid]) {}
pub fn sync_cr_orf_matches(state: &mut State) {
state.volatile[state.active].cr_orf_matches = Vec::new();
let mut region_matches = Vec::new();
for orf in [
ReadingFrame::Fwd0,
ReadingFrame::Fwd1,
ReadingFrame::Fwd2,
ReadingFrame::Rev0,
ReadingFrame::Rev1,
ReadingFrame::Rev2,
] {
let mut regions = find_orf_matches(state.get_seq(), orf);
region_matches.append(&mut regions);
}
for (i, feature) in state.generic[state.active].features.iter().enumerate() {
if feature.feature_type != FeatureType::CodingRegion {
continue;
}
let label_lower = feature.label.to_lowercase().replace(' ', "");
if label_lower.contains("xhis") || label_lower.contains("Ă—his") {
continue;
}
let mut orf_match = None;
let mut smallest_match = usize::MAX;
for rm in ®ion_matches {
if !rm.range.contains(feature.range.start) || !rm.range.contains(feature.range.end) {
continue;
}
if rm.range.len() < smallest_match {
smallest_match = rm.range.len();
orf_match = Some(rm.clone());
}
}
if let Some(m) = orf_match {
state.volatile[state.active]
.cr_orf_matches
.push((i, m.clone()));
}
}
}
pub fn protein_weight(seq: &[AminoAcid]) -> f32 {
if seq.is_empty() {
return 0.; }
let mut result = 0.;
for aa in seq {
result += aa.weight();
}
(result - WATER_WEIGHT * (seq.len() - 1) as f32) / 1_000.
}