use std::{cmp::min, collections::HashSet, fmt, io, io::ErrorKind, path::Path};
use bincode::{Decode, Encode};
use eframe::egui::{Pos2, pos2};
use na_seq::{
Nucleotide,
ligation::{filter_multiple_seqs, filter_unique_cutters, find_common_res},
restriction_enzyme::RestrictionEnzyme,
seq_complement,
};
use crate::{
Color, ReUi,
file_io::save::QUICKSAVE_FILE,
gui::{
WINDOW_TITLE,
sequence::seq_view::{NT_WIDTH_PX, SEQ_ROW_SPACING_PX, TEXT_X_START, TEXT_Y_START},
},
misc_types::Feature,
state::{State, StateVolatile},
};
const FEATURE_ANNOTATION_MATCH_THRESH: f32 = 0.95;
#[derive(Clone, Copy, Debug, PartialEq, Encode, Decode)]
pub struct RangeIncl {
pub start: usize,
pub end: usize,
}
impl RangeIncl {
pub fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
pub fn contains(&self, val: usize) -> bool {
val >= self.start && val <= self.end
}
pub fn index_seq<'a, T: Clone>(&self, seq: &'a [T]) -> Option<&'a [T]> {
if self.start < 1 || self.end > seq.len() || self.start > self.end {
return None;
}
Some(&seq[self.start - 1..=self.end - 1])
}
pub fn index_seq_wrap<T: Clone>(&self, seq: &[T]) -> Option<Vec<T>> {
if self.start < 1 || self.end > seq.len() || self.start <= self.end {
return None;
}
let part1 = seq[self.start - 1..].to_vec();
let part2 = seq[..self.end].to_vec();
let mut result = part1;
result.extend(part2);
Some(result)
}
pub fn len(&self) -> usize {
if self.end < self.start {
return 0;
}
self.end - self.start + 1
}
}
impl fmt::Display for RangeIncl {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}..{} {}bp", self.start, self.end, self.len())
}
}
pub fn get_row_ranges(len: usize, chars_per_row: usize) -> Vec<RangeIncl> {
let num_rows = len / chars_per_row + 1;
let mut result = Vec::with_capacity(num_rows);
for row_i in 0..num_rows {
let end = row_i * chars_per_row + chars_per_row;
let end = min(end, len);
result.push(RangeIncl::new(row_i * chars_per_row + 1, end));
}
result
}
fn seq_i_to_col_row(seq_i: usize, row_ranges: &[RangeIncl]) -> (usize, usize) {
let mut row = 0;
let mut row_range = RangeIncl::new(0, 10);
for (row_, range) in row_ranges.iter().enumerate() {
if range.contains(seq_i) {
row = row_;
row_range = *range;
break;
}
}
let col = seq_i - row_range.start;
(col, row)
}
pub fn seq_i_to_pixel(seq_i: usize, row_ranges: &[RangeIncl]) -> Pos2 {
let (col, row) = seq_i_to_col_row(seq_i, row_ranges);
let col = if seq_i == 0 { -1. } else { col as f32 };
pos2(
TEXT_X_START + col * NT_WIDTH_PX,
TEXT_Y_START + row as f32 * SEQ_ROW_SPACING_PX,
)
}
pub fn pixel_to_seq_i(pixel: Pos2, row_ranges: &[RangeIncl]) -> Option<usize> {
let row = ((pixel.y - TEXT_Y_START) / SEQ_ROW_SPACING_PX) as usize;
let col = ((pixel.x - TEXT_X_START) / NT_WIDTH_PX) as usize;
for (row_, range) in row_ranges.iter().enumerate() {
if row_ == row {
return Some(range.start + col);
}
}
None
}
pub fn remove_duplicates<T: Eq + std::hash::Hash>(vec: Vec<T>) -> Vec<T> {
let set: HashSet<_> = vec.into_iter().collect();
let vec: Vec<_> = set.into_iter().collect();
vec
}
pub fn get_feature_ranges(
feature_rng: &RangeIncl,
all_ranges: &[RangeIncl],
seq_len: usize,
) -> Vec<RangeIncl> {
let mut result = Vec::new();
let feature_ranges = if feature_rng.end < feature_rng.start {
vec![
RangeIncl::new(1, feature_rng.end),
RangeIncl::new(feature_rng.start, seq_len),
]
} else {
vec![*feature_rng]
};
for ft_rng in &feature_ranges {
for range in all_ranges {
if range.end < range.start || range.end == 0 {
eprintln!("Error with all ranges when finding a feature range: {range}");
return result;
}
if range.contains(ft_rng.start) {
let end = min(range.end, ft_rng.end);
result.push(RangeIncl::new(ft_rng.start, end));
} else if range.contains(ft_rng.end) {
result.push(RangeIncl::new(range.start, ft_rng.end));
} else if ft_rng.start < range.start && ft_rng.end > range.end {
result.push(RangeIncl::new(range.start, range.end));
}
}
}
result
}
pub fn color_from_hex(hex: &str) -> io::Result<Color> {
let hex = &hex[1..];
if hex.len() < 6 {
return Err(io::Error::new(ErrorKind::InvalidData, "Invalid color"));
}
let r = u8::from_str_radix(&hex[0..2], 16)
.map_err(|_| io::Error::new(ErrorKind::InvalidData, "Invalid color radix"))?;
let g = u8::from_str_radix(&hex[2..4], 16)
.map_err(|_| io::Error::new(ErrorKind::InvalidData, "Invalid color radix"))?;
let b = u8::from_str_radix(&hex[4..6], 16)
.map_err(|_| io::Error::new(ErrorKind::InvalidData, "Invalid color radix"))?;
Ok((r, g, b))
}
pub fn color_to_hex(color: Color) -> String {
format!("#{:x}{:x}{:x}", color.0, color.1, color.2)
}
pub fn change_origin(state: &mut State) {
let origin = &state.ui.new_origin;
if *origin < 1 || *origin > state.get_seq().len() {
return;
}
state.generic[state.active].seq.rotate_left(origin - 1);
let seq_len = state.get_seq().len();
for feature in &mut state.generic[state.active].features {
feature.range = RangeIncl::new(
(feature.range.start as i32 + 1 - *origin as i32).rem_euclid(seq_len as i32) as usize,
(feature.range.end as i32 + 1 - *origin as i32).rem_euclid(seq_len as i32) as usize,
)
}
state.sync_seq_related(None);
}
pub fn match_subseq(subseq: &[Nucleotide], seq: &[Nucleotide]) -> (Vec<RangeIncl>, Vec<RangeIncl>) {
let mut result = (Vec::new(), Vec::new());
let seq_len = seq.len();
let subseq_len = subseq.len();
let complement = seq_complement(seq);
for seq_start in 0..seq_len {
let seq_iter = seq.iter().cycle().skip(seq_start).take(subseq_len);
if subseq.iter().eq(seq_iter) {
let seq_end = (seq_start + subseq_len) % seq_len;
result.0.push(RangeIncl::new(seq_start + 1, seq_end));
}
}
for seq_start in 0..seq_len {
let seq_iter = complement.iter().cycle().skip(seq_start).take(subseq_len);
if subseq.iter().eq(seq_iter) {
let seq_end = (seq_start + subseq_len) % seq_len;
if seq_end < 1 {
continue;
}
result
.1
.push(RangeIncl::new(seq_len - seq_end + 1, seq_len - seq_start));
}
}
result
}
pub fn _seq_similarity(seq_a: &[Nucleotide], seq_b: &[Nucleotide]) -> f32 {
let mut matches = 0;
for i in 0..seq_a.len() {
if seq_a[i] == seq_b[i] {
matches += 1;
}
}
matches as f32 / seq_a.len() as f32
}
pub fn merge_feature_sets(existing: &mut Vec<Feature>, new: &[Feature]) {
for feature_new in new {
let mut exists = false;
for feature_existing in &mut *existing {
if feature_new.range == feature_existing.range {
exists = true;
}
}
if !exists {
existing.push(feature_new.clone());
}
}
}
pub fn get_window_title(path: &Path) -> String {
let filename = path
.file_name()
.and_then(|name| name.to_str())
.map(|name_str| name_str.to_string())
.unwrap();
if filename == QUICKSAVE_FILE {
WINDOW_TITLE.to_owned()
} else {
filename
}
}
pub fn filter_res<'a>(
data: &ReUi,
volatile: &[StateVolatile],
lib: &'a [RestrictionEnzyme],
) -> Vec<&'a RestrictionEnzyme> {
let mut re_match_set = Vec::new(); for active in &data.tabs_selected {
re_match_set.push(&volatile[*active].restriction_enzyme_matches);
}
let mut result = find_common_res(&re_match_set, lib, data.sticky_ends_only);
if data.multiple_seqs {
filter_multiple_seqs(&mut result, &re_match_set, lib);
}
if data.unique_cutters_only {
filter_unique_cutters(&mut result, &re_match_set, lib);
}
result
}