use crate::annotate::searcher::BarbellMatch;
use crate::filter::pattern::*;
use crate::pattern_from_str;
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use std::error::Error;
use std::fs::File;
use std::time::Duration;
fn create_progress_bar() -> (ProgressBar, ProgressBar, ProgressBar) {
let multi_progress = MultiProgress::new();
let total_bar = multi_progress.add(ProgressBar::new_spinner());
let kept_bar = multi_progress.add(ProgressBar::new_spinner());
let dropped_bar = multi_progress.add(ProgressBar::new_spinner());
total_bar.set_style(
ProgressStyle::with_template(
"{spinner:.cyan} {prefix:.bold.white:<8} {msg:.bold.cyan:>6} {elapsed:.dim}",
)
.unwrap()
.tick_chars("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"),
);
kept_bar.set_style(
ProgressStyle::with_template(
"{spinner:.green} {prefix:.bold.white:<8} {msg:.bold.green:>6} {elapsed:.dim}",
)
.unwrap()
.tick_chars("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"),
);
dropped_bar.set_style(
ProgressStyle::with_template(
"{spinner:.red} {prefix:.bold.white:<8} {msg:.bold.red:>6} {elapsed:.dim}",
)
.unwrap()
.tick_chars("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"),
);
total_bar.enable_steady_tick(Duration::from_millis(100));
kept_bar.enable_steady_tick(Duration::from_millis(120)); dropped_bar.enable_steady_tick(Duration::from_millis(140));
total_bar.set_prefix("Total:");
kept_bar.set_prefix("Kept:");
dropped_bar.set_prefix("Dropped:");
(total_bar, kept_bar, dropped_bar)
}
pub fn filter(
annotated_file: &str,
output_file: &str,
dropped_out_file: Option<&str>,
filters: &[Pattern],
) -> Result<(), Box<dyn Error>> {
let (total_bar, kept_bar, dropped_bar) = create_progress_bar();
let mut reader = csv::ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(true)
.from_path(annotated_file)?;
let mut writer = csv::WriterBuilder::new()
.delimiter(b'\t')
.from_path(output_file)
.unwrap();
let mut dropped_writer: Option<csv::Writer<File>> = None;
if let Some(dropped_out_file) = dropped_out_file {
dropped_writer = Some(
csv::WriterBuilder::new()
.delimiter(b'\t')
.from_path(dropped_out_file)
.unwrap(),
);
}
let mut current_read_id: Option<String> = None;
let mut current_group: Vec<BarbellMatch> = Vec::new();
for result in reader.deserialize() {
let record: BarbellMatch = result?;
if let Some(read_id) = ¤t_read_id {
if read_id != &record.read_id {
total_bar.inc(1);
if check_filter_pass(&mut current_group, filters) {
kept_bar.inc(1);
for annotation in ¤t_group {
writer.serialize(annotation)?;
}
} else {
dropped_bar.inc(1);
if let Some(dropped_writer) = &mut dropped_writer {
for annotation in ¤t_group {
dropped_writer.serialize(annotation)?;
}
}
}
current_group.clear();
current_read_id = Some(record.read_id.clone());
total_bar.set_message(total_bar.position().to_string());
kept_bar.set_message(kept_bar.position().to_string());
dropped_bar.set_message(dropped_bar.position().to_string());
}
} else {
current_read_id = Some(record.read_id.clone());
}
current_group.push(record);
}
if !current_group.is_empty() {
total_bar.inc(1);
if check_filter_pass(&mut current_group, filters) {
kept_bar.inc(1);
for annotation in ¤t_group {
writer.serialize(annotation)?;
}
} else {
dropped_bar.inc(1);
if let Some(dropped_writer) = &mut dropped_writer {
for annotation in ¤t_group {
dropped_writer.serialize(annotation)?;
}
}
}
total_bar.set_message(total_bar.position().to_string());
kept_bar.set_message(kept_bar.position().to_string());
dropped_bar.set_message(dropped_bar.position().to_string());
}
writer.flush()?;
if let Some(mut dropped_writer) = dropped_writer {
dropped_writer.flush()?;
}
let total_count = total_bar.position();
let kept_count = kept_bar.position();
let dropped_count = dropped_bar.position();
total_bar.finish_with_message(format!("{total_count} reads"));
kept_bar.finish_with_message(format!("{kept_count} reads"));
dropped_bar.finish_with_message(format!("{dropped_count} reads"));
Ok(())
}
pub fn filter_from_pattern_str(
annotated_file: &str,
pattern_str: &str,
output_file: &str,
dropped_out_file: Option<&str>,
) -> Result<(), Box<dyn Error>> {
let pattern = pattern_from_str!(pattern_str);
filter(annotated_file, output_file, dropped_out_file, &[pattern])
}
pub fn filter_from_text_file(
annotated_file: &str,
text_file: &str,
output_file: &str,
dropped_out_file: Option<&str>,
) -> Result<(), Box<dyn Error>> {
let patterns = std::fs::read_to_string(text_file)?;
let patterns = patterns
.split('\n')
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect::<Vec<&str>>();
let patterns = patterns
.iter()
.map(|s| pattern_from_str!(s))
.collect::<Vec<Pattern>>();
filter(annotated_file, output_file, dropped_out_file, &patterns)
}
fn check_filter_pass(annotations: &mut [BarbellMatch], patterns: &[Pattern]) -> bool {
let mut max_matches = 0;
let mut best_cut_positions: Option<Vec<(usize, Cut)>> = None;
for pattern in patterns {
let (is_match, cut_positions) = match_pattern(annotations, pattern);
if is_match {
let pattern_len = pattern.elements.len();
if pattern_len > max_matches {
max_matches = pattern_len;
best_cut_positions = Some(cut_positions);
}
}
}
if max_matches > 0
&& let Some(cut_positions) = best_cut_positions
{
for (cut_match_idx, cut) in cut_positions {
if let Some(existing_cuts) = &mut annotations[cut_match_idx].cuts {
existing_cuts.push((cut, cut_match_idx));
} else {
annotations[cut_match_idx].cuts = Some(vec![(cut, cut_match_idx)]);
}
}
}
max_matches == annotations.len()
}