use std::collections::{HashMap, HashSet};
use rustbustools::{io::BusRecord, disjoint::DisjointSubsets, consistent_genes::Ec2GeneMapper};
use crate::phantompurger::{Fingerprint, groupby_gene_across_samples, make_fingerprint_simple};
fn make_fingerprint(record_dict: &HashMap<String,Vec<BusRecord>>) -> Fingerprint{
let fingerprint: Fingerprint = record_dict
.iter()
.map(|(k, v)| {
(k.clone(), v.iter().map(|r|r.COUNT).sum::<u32>())
}).collect();
fingerprint
}
fn create_fingerprint2(order: &[String], record_dict: &HashMap<String, Vec<BusRecord>>, ecmapper_dict:&HashMap<String, &Ec2GeneMapper>) -> Vec<Vec<u32>>{
let mut emission: Vec<Vec<u32>> = Vec::new();
for rdict in groupby_gene_across_samples(record_dict, ecmapper_dict){
let fp_hash = make_fingerprint_simple(&rdict);
let fp: Vec<_> = order.iter()
.map(|s| fp_hash.get(s).unwrap_or(&0)).cloned().collect();
emission.push(fp);
}
emission
}
fn create_fingerprint(order: &[String], record_dict: &HashMap<String, Vec<BusRecord>>, ecmapper_dict:&HashMap<String, &Ec2GeneMapper>) -> Vec<Vec<u32>>{
let mut emission: Vec<Vec<u32>> = Vec::new();
let filtered_dict: HashMap<String, BusRecord> = (record_dict).iter()
.filter(|(_k,v)| v.len()==1)
.map(|(k, v)| (k.clone(), v[0].clone())) .collect();
for rdict in groupby_gene_even_simpler(filtered_dict, ecmapper_dict){
let fp_hash = make_fingerprint_simple(&rdict);
let fp: Vec<_> = order.iter()
.map(|s| fp_hash.get(s).unwrap_or(&0)).cloned().collect();
emission.push(fp);
}
emission
}
fn groupby_gene_even_simpler(record_dict: HashMap<String,BusRecord>, ecmapper_dict: &HashMap<String, &Ec2GeneMapper>) -> Vec<HashMap<String, BusRecord>> {
if record_dict.len() == 1{
return vec![record_dict];
};
let mut big_hash:HashMap<String, (BusRecord, String, &HashSet<u32>)> = HashMap::with_capacity(record_dict.len());
for (i, (sname, r)) in record_dict.into_iter().enumerate(){
let ecmapper = ecmapper_dict.get(&sname).unwrap();
let g = ecmapper.get_genes(r.EC);
big_hash.insert(i.to_string(), (r, sname,g ));
}
let mut disjoint_set = DisjointSubsets::new();
for (id, (_r, _sname, gset)) in big_hash.iter(){
disjoint_set.add(id.clone(), (*gset).clone());
}
let mut emit_vector: Vec<HashMap<String, BusRecord>> = Vec::new();
for ids_of_set_elements in disjoint_set.get_disjoint_set_ids(){
let mut emited_dict: HashMap<String, BusRecord> = HashMap::new();
for el_id in ids_of_set_elements{
let (record, samplename, _genes) = big_hash.remove(&el_id).unwrap();
if emited_dict.contains_key(&samplename){
panic!("cant happen, each sample only has one record")
}
else{
emited_dict.insert(samplename, record);
}
}
emit_vector.push(emited_dict);
}
emit_vector
}
#[cfg(test)]
pub mod tests{
use std::collections::HashMap;
use rustbustools::{io::BusRecord, consistent_genes::Ec2GeneMapper};
use crate::phantompurger::create_dummy_ec;
use super::{groupby_gene_even_simpler};
#[test]
fn test_groupby_gene_simple(){
let r1 =BusRecord{CB: 0, UMI: 1, EC: 0, COUNT: 2, FLAG: 0};
let s1 = BusRecord{CB: 0, UMI: 1, EC: 0, COUNT: 3, FLAG: 0};
let es1 = create_dummy_ec();
let es2 = create_dummy_ec();
let es3 = create_dummy_ec();
let es_dict: HashMap<String, &Ec2GeneMapper> = vec![
("s1".to_string(), &es1),
("s2".to_string(), &es2),
("s3".to_string(), &es3)
]
.into_iter().collect();
let record_dict = vec![
("s1".to_string(), r1),
("s2".to_string(), s1),
].into_iter().collect();
let res = groupby_gene_even_simpler(record_dict, &es_dict);
println!("{:?}", res);
assert_eq!(res.len(), 1);
assert_eq!(res[0].len(), 2);
let r1 =BusRecord{CB: 0, UMI: 1, EC: 0, COUNT: 1, FLAG: 0};
let s1 = BusRecord{CB: 0, UMI: 1, EC: 2, COUNT: 1, FLAG: 0};
let record_dict = vec![
("s1".to_string(), r1),
("s2".to_string(), s1),
].into_iter().collect();
let res = groupby_gene_even_simpler(record_dict, &es_dict);
println!("{:?}", res);
assert_eq!(res.len(), 1);
assert_eq!(res[0].len(), 2);
let r1 =BusRecord{CB: 0, UMI: 1, EC: 0, COUNT: 1, FLAG: 0};
let s1 = BusRecord{CB: 0, UMI: 1, EC: 1, COUNT: 1, FLAG: 0};
let record_dict = vec![
("s1".to_string(), r1),
("s2".to_string(), s1),
].into_iter().collect();
let res = groupby_gene_even_simpler(record_dict, &es_dict);
println!("{:?}", res);
assert_eq!(res.len(), 2);
assert_eq!(res[0].len(), 1);
assert_eq!(res[1].len(), 1);
let r1 =BusRecord{CB: 0, UMI: 1, EC: 0, COUNT: 1, FLAG: 0};
let s1 = BusRecord{CB: 0, UMI: 1, EC: 1, COUNT: 1, FLAG: 0};
let t1 = BusRecord{CB: 0, UMI: 1, EC: 2, COUNT: 1, FLAG: 0};
let record_dict = vec![
("s1".to_string(), r1),
("s2".to_string(), s1),
("s3".to_string(), t1),
].into_iter().collect();
let res = groupby_gene_even_simpler(record_dict, &es_dict);
println!("{:?}", res);
assert_eq!(res.len(), 1);
assert_eq!(res[0].len(), 3);
}
}