use std::collections::HashMap;
pub trait IntoRowOffset {
fn into_row_offset(self) -> Option<f64>;
}
impl IntoRowOffset for f64 {
fn into_row_offset(self) -> Option<f64> { Some(self) }
}
impl IntoRowOffset for Option<f64> {
fn into_row_offset(self) -> Option<f64> { self }
}
fn canonical_rotation(s: &str) -> String {
let n = s.len();
if n == 0 { return String::new(); }
let doubled = format!("{}{}", s, s);
(0..n)
.map(|i| &doubled[i..i + n])
.min()
.expect("range 0..n is non-empty when n > 0")
.to_string()
}
#[derive(Debug, Clone)]
pub struct BrickTemplate {
pub template: HashMap<char, String>,
}
impl Default for BrickTemplate {
fn default() -> Self { Self::new() }
}
impl BrickTemplate {
pub fn new() -> Self {
Self {
template: HashMap::new(),
}
}
pub fn dna(mut self) -> Self {
self.template.insert('A', "rgb(0,150,0)".into());
self.template.insert('C', "rgb(0,0,255)".into());
self.template.insert('G', "rgb(209,113,5)".into());
self.template.insert('T', "rgb(255,0,0)".into());
self
}
pub fn rna(mut self) -> Self {
self.template.insert('A', "green".into());
self.template.insert('C', "blue".into());
self.template.insert('G', "orange".into());
self.template.insert('U', "red".into());
self
}
}
#[derive(Debug, Clone)]
pub struct BrickPlot {
pub sequences: Vec<String>,
pub names: Vec<String>,
pub strigars: Option<Vec<(String, String)>>,
pub motifs: Option<HashMap<char, String>>,
pub strigar_exp: Option<Vec<String>>,
pub template: Option<HashMap<char, String>>,
pub x_offset: f64,
pub x_offsets: Option<Vec<Option<f64>>>,
pub motif_lengths: Option<HashMap<char, usize>>,
pub show_values: bool,
}
impl Default for BrickPlot {
fn default() -> Self { Self::new() }
}
impl BrickPlot {
pub fn new() -> Self {
Self {
sequences: vec![],
names: vec![],
strigars: None,
motifs: None,
strigar_exp: None,
template: Some(HashMap::new()),
motif_lengths: None,
x_offset: 0.0,
x_offsets: None,
show_values: false,
}
}
pub fn with_sequences<T, I>(mut self, sequences: I) -> Self
where
I: IntoIterator<Item = T>,
T: Into<String>,
{
self.sequences = sequences.into_iter().map(|x| x.into()).collect();
self
}
pub fn with_names<T, I>(mut self, names: I) -> Self
where
I: IntoIterator<Item = T>,
T: Into<String>,
{
self.names = names.into_iter().map(|x| x.into()).collect();
self
}
pub fn with_strigars<T, U, I>(mut self, strigars: I) -> Self
where
I: IntoIterator<Item = (T, U)>,
T: Into<String>,
U: Into<String>,
{
self.strigars = Some(strigars.into_iter()
.map(|(motif, strigar)| (motif.into(), strigar.into()))
.collect());
let per_read_maps: Vec<HashMap<char, String>> = self.strigars.as_ref()
.expect("process_strigars called without strigars data")
.iter()
.map(|(motif_str, _)| {
motif_str.split(',')
.map(|pair| {
let parts: Vec<&str> = pair.split(':').collect();
(parts[1].chars().next().expect("STRIGAR motif character is non-empty"), parts[0].to_string())
})
.collect()
})
.collect();
let mut canonical_freq: HashMap<String, usize> = HashMap::new();
let mut rotation_freq: HashMap<String, HashMap<String, usize>> = HashMap::new();
for read_map in &per_read_maps {
for kmer in read_map.values() {
let canon = canonical_rotation(kmer);
*canonical_freq.entry(canon.clone()).or_insert(0) += 1;
*rotation_freq.entry(canon).or_default().entry(kmer.clone()).or_insert(0) += 1;
}
}
let mut sorted_canonicals: Vec<(String, usize)> = canonical_freq.into_iter().collect();
sorted_canonicals.sort_by(|a, b| b.1.cmp(&a.1));
let mut canonical_to_global: HashMap<String, char> = HashMap::new();
let mut global_to_display: HashMap<char, String> = HashMap::new();
let mut global_to_length: HashMap<char, usize> = HashMap::new();
for (idx, (canon, _freq)) in sorted_canonicals.iter().enumerate() {
let global_letter = (b'A' + idx as u8) as char;
canonical_to_global.insert(canon.clone(), global_letter);
let rotations = rotation_freq.get(canon).expect("canon derived from rotation_freq keys");
let display = rotations.iter().max_by_key(|(_, count)| *count).expect("rotation_freq entry is non-empty").0.clone();
global_to_display.insert(global_letter, display.clone());
global_to_length.insert(global_letter, display.len());
}
let mut expanded_strigars: Vec<String> = vec![];
for (i, (_motif_str, strigar_str)) in self.strigars.as_ref().expect("process_strigars called without strigars data").iter().enumerate() {
let read_map = &per_read_maps[i];
let mut local_to_global: HashMap<char, char> = HashMap::new();
for (local_letter, kmer) in read_map {
let canon = canonical_rotation(kmer);
let global = canonical_to_global[&canon];
local_to_global.insert(*local_letter, global);
}
let expanded: String = strigar_str.split(char::is_alphabetic)
.zip(strigar_str.matches(char::is_alphabetic))
.map(|(num, ch)| {
let local = ch.chars().next().expect("STRIGAR letter character is non-empty");
let global = local_to_global[&local];
global.to_string().repeat(num.parse::<usize>().expect("STRIGAR repeat count is a valid integer"))
})
.collect();
expanded_strigars.push(expanded);
}
let motif_colors: &[&str] = &[
"rgb(31,119,180)", "rgb(255,127,14)", "rgb(44,160,44)", "rgb(214,39,40)", "rgb(148,103,189)", "rgb(140,86,75)", "rgb(227,119,194)", "rgb(127,127,127)", "rgb(188,189,34)", "rgb(23,190,207)", ];
let mut auto_template: HashMap<char, String> = HashMap::new();
for (idx, (canon, _)) in sorted_canonicals.iter().enumerate() {
let global_letter = canonical_to_global[canon];
auto_template.insert(global_letter, motif_colors[idx % motif_colors.len()].to_string());
}
self.template = Some(auto_template);
self.motifs = Some(global_to_display);
self.strigar_exp = Some(expanded_strigars);
self.motif_lengths = Some(global_to_length);
self
}
pub fn with_template(mut self, template: HashMap<char, String>) -> Self {
self.template = Some(template);
self
}
pub fn with_x_offset(mut self, x_offset: f64) -> Self {
self.x_offset = x_offset;
self
}
pub fn with_x_offsets<T, I>(mut self, offsets: I) -> Self
where
I: IntoIterator<Item = T>,
T: IntoRowOffset,
{
self.x_offsets = Some(offsets.into_iter().map(|x| x.into_row_offset()).collect());
self
}
pub fn with_values(mut self) -> Self {
self.show_values = true;
self
}
}