use crate::annotate::searcher::BarbellMatch;
use colored::*;
use sassy::Strand;
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
use std::io::{BufWriter, Write};
fn bucket_position(pos: usize, bucket_size: usize) -> usize {
(pos.saturating_sub(1) / bucket_size) * bucket_size
}
pub fn get_group_structure(group: &[BarbellMatch], bucket_size: usize) -> String {
if group.is_empty() {
return String::new();
}
let mut pattern_elements: Vec<String> = Vec::new();
let mut prev_end_pos: Option<usize> = None;
for annotation in group {
let start = annotation.read_start_bar;
let end = annotation.read_end_bar;
let position_tag = if let Some(prev_end) = prev_end_pos {
let distance_to_prev = start.saturating_sub(prev_end);
let distance_to_right = annotation.read_len.saturating_sub(end);
if distance_to_prev <= distance_to_right {
let gap_start_bucket = bucket_position(distance_to_prev, bucket_size);
let gap_end_bucket = gap_start_bucket + bucket_size;
format!("@prev_left({gap_start_bucket}..{gap_end_bucket})")
} else {
let right_start =
bucket_position(annotation.read_len.saturating_sub(end), bucket_size);
let right_end =
bucket_position(annotation.read_len.saturating_sub(start), bucket_size)
+ bucket_size;
format!("@right({right_start}..{right_end})")
}
} else if annotation.rel_dist_to_end > 0 {
let start_bucket = bucket_position(start, bucket_size);
let end_bucket = start_bucket + bucket_size;
format!("@left({start_bucket}..{end_bucket})")
} else {
let right_start = bucket_position(annotation.read_len.saturating_sub(end), bucket_size);
let right_end = bucket_position(annotation.read_len.saturating_sub(start), bucket_size)
+ bucket_size;
format!("@right({right_start}..{right_end})")
};
let cut = if let Some(cuts) = &annotation.cuts {
if !cuts.is_empty() {
match annotation.strand {
Strand::Fwd => ", <<",
Strand::Rc => ", >>",
}
} else {
""
}
.to_string()
} else {
"".to_string()
};
let match_type = annotation.match_type.as_str();
pattern_elements.push(format!(
"{}[{}, *{}, {}]",
match_type,
if annotation.strand == Strand::Fwd {
"fw"
} else {
"rc"
},
cut,
position_tag
));
prev_end_pos = Some(end);
}
pattern_elements.join("__")
}
fn colorize_pattern(input: &str) -> String {
let light_pink: CustomColor = CustomColor::new(255, 182, 193);
let dark_pink: CustomColor = CustomColor::new(231, 84, 128);
let light_blue: CustomColor = CustomColor::new(173, 216, 230);
let dark_blue: CustomColor = CustomColor::new(0, 0, 139);
input
.replace("Fflank", &"Fflank".custom_color(light_pink).to_string())
.replace("Ftag", &"Ftag".custom_color(dark_pink).to_string())
.replace("Rflank", &"Rflank".custom_color(light_blue).to_string())
.replace("Rtag", &"Rtag".custom_color(dark_blue).to_string())
}
pub fn inspect(
annotated_file: &str,
top_n: usize,
read_pattern_out: Option<String>,
bucket_size: usize,
) -> Result<(), Box<dyn Error>> {
let mut reader = csv::ReaderBuilder::new()
.delimiter(b'\t')
.from_path(annotated_file)
.expect("Failed to open annotated file");
let mut current_read_id: Option<String> = None;
let mut current_group: Vec<BarbellMatch> = Vec::new();
let mut read_pattern_out_handle: Option<BufWriter<File>> = None;
if let Some(read_pattern_out) = read_pattern_out {
read_pattern_out_handle = Some(BufWriter::new(
File::create(read_pattern_out).expect("Failed to create read pattern output file"),
));
}
let mut pattern_count: HashMap<String, usize> = HashMap::new();
for result in reader.deserialize() {
let record: BarbellMatch = result?;
if let Some(read_id) = ¤t_read_id {
if *read_id != record.read_id.clone() {
let label = get_group_structure(¤t_group, bucket_size);
let prev_read_id = current_read_id.unwrap();
if let Some(read_pattern_out_handle) = &mut read_pattern_out_handle {
writeln!(read_pattern_out_handle, "{prev_read_id}\t{label}")
.expect("Failed to write read pattern to file");
}
*pattern_count.entry(label).or_insert(0) += 1;
current_group.clear();
current_read_id = Some(record.read_id.clone());
}
} else {
current_read_id = Some(record.read_id.clone());
}
current_group.push(record);
}
if !current_group.is_empty() {
let label = get_group_structure(¤t_group, bucket_size);
let prev_read_id = current_read_id.unwrap();
if let Some(read_pattern_out_handle) = &mut read_pattern_out_handle {
writeln!(read_pattern_out_handle, "{prev_read_id}\t{label}")
.expect("Failed to write read pattern to file");
}
*pattern_count.entry(label).or_insert(0) += 1;
}
println!("Found {} unique patterns", pattern_count.len());
let mut pattern_count_vec: Vec<(String, usize)> = pattern_count.into_iter().collect();
pattern_count_vec.sort_by(|a, b| b.1.cmp(&a.1));
for (i, (pattern, count)) in pattern_count_vec.iter().take(top_n).enumerate() {
let colored = colorize_pattern(pattern);
println!("\tPattern {}: {} occurrences", i + 1, count);
println!("\t\t{colored}");
}
println!("Showed {} / {} patterns", top_n, pattern_count_vec.len());
Ok(())
}