use std::{
collections::{HashMap, hash_map::Entry},
hash::{Hash, Hasher},
};
use crate::{Nucleotide, NucleotideGeneral, Seq};
pub struct LigationProduct {
pub strand_top: Seq,
pub strand_bottom: Seq,
pub alignment: usize,
}
#[derive(Debug, Clone)]
pub struct ReMatch {
pub lib_index: usize,
pub seq_index: usize,
pub match_count: usize,
}
#[derive(Clone, Eq)]
pub struct RestrictionEnzyme {
pub name: String,
pub cut_seq: Vec<NucleotideGeneral>,
pub cut_after: u8,
}
impl Hash for RestrictionEnzyme {
fn hash<H: Hasher>(&self, state: &mut H) {
self.name.hash(state);
}
}
impl PartialEq for RestrictionEnzyme {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
}
}
impl RestrictionEnzyme {
pub fn new(name: &str, cut_seq: Vec<NucleotideGeneral>, cut_after: u8) -> Self {
Self {
name: name.to_owned(),
cut_seq,
cut_after,
}
}
pub fn makes_blunt_ends(&self) -> bool {
self.cut_after as isize + 1 == self.cut_seq.len() as isize / 2
}
pub fn cut_depiction(&self) -> String {
let nt_chars = seq_general_to_str(&self.cut_seq);
let mut result = String::new();
for (i, nt_char) in nt_chars.chars().enumerate() {
result.push(nt_char);
if i as u8 == self.cut_after {
result.push_str(" | ");
}
}
result
}
pub fn overhang_top_left(&self, seq_segment: &[Nucleotide]) -> Vec<Nucleotide> {
let cut = self.cut_after as usize + 1;
let len = self.cut_seq.len();
if cut as isize - 2 >= len as isize / 2 {
Vec::new() } else {
if len - cut < cut {
eprintln!("Error with cut lens. len-cut: {}, Cut: {cut}", len - cut);
return Vec::new();
}
seq_segment[cut..len - cut].to_vec()
}
}
pub fn overhang_top_right(&self, seq_segment: &[Nucleotide]) -> Vec<Nucleotide> {
let cut = self.cut_after as usize + 1;
let len = self.cut_seq.len();
if cut as isize - 2 < len as isize / 2 {
Vec::new() } else {
seq_segment[len - cut..cut].to_vec()
}
}
pub fn overhang_bottom_left(&self, seq_segment: &[Nucleotide]) -> Vec<Nucleotide> {
let x = self.overhang_top_right(seq_segment);
let mut result = x.to_vec();
for nt in &mut result {
*nt = nt.complement();
}
result
}
pub fn overhang_bottom_right(&self, seq_segment: &[Nucleotide]) -> Vec<Nucleotide> {
let x = self.overhang_top_left(seq_segment);
let mut result = x.to_vec();
for nt in &mut result {
*nt = nt.complement();
}
result
}
}
pub fn find_re_matches(seq: &[Nucleotide], lib: &[RestrictionEnzyme]) -> Vec<ReMatch> {
let mut result = Vec::new();
let seq_len = seq.len();
let mut match_counts = HashMap::new();
for (lib_index, re) in lib.iter().enumerate() {
let re_seq_len = re.cut_seq.len();
for i in 0..seq_len {
if i + re_seq_len + 1 >= seq_len {
continue;
}
let mut matches = true;
for (j, nt) in seq[i..i + re_seq_len].iter().enumerate() {
if !re.cut_seq[j].matches(*nt) {
matches = false;
break;
}
}
if !matches {
continue;
}
result.push(ReMatch {
lib_index,
seq_index: i + 1, match_count: 0, });
if let Entry::Vacant(e) = match_counts.entry(lib_index) {
e.insert(1);
} else {
*match_counts.get_mut(&lib_index).unwrap() += 1;
}
}
}
for re_match in &mut result {
re_match.match_count = match_counts[&re_match.lib_index];
}
result
}
pub fn seq_general_to_str(seq: &[NucleotideGeneral]) -> String {
let mut result = String::new();
for nt in seq {
result.push_str(&nt.to_str_upper());
}
result
}