use std::collections::{HashMap, HashSet};
use crate::{consistent_genes::{update_intersection_via_retain, EC}, io::{BusFolder, BusRecord}};
#[derive(Debug, PartialEq)]
pub enum MappingResultTranscript {
SingleTranscript(TranscriptId),
Multimapped(HashSet<TranscriptId>),
Inconsistent,
}
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd, Copy, Clone, Debug)]
pub struct TranscriptId(pub u32);
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd, Clone, Debug)]
pub struct Transcriptname(pub String);
#[derive(Debug, Clone)]
pub struct Ec2TranscriptMapper {
ec2tid: HashMap<EC, HashSet<TranscriptId>>,
int_to_transcript: HashMap<TranscriptId, Transcriptname>, }
impl Ec2TranscriptMapper {
pub fn new(ec2transcript: HashMap<EC, HashSet<Transcriptname>>) -> Self {
let mut transcript_list: HashSet<Transcriptname> = HashSet::new();
for transcripts in ec2transcript.values() {
for t in transcripts {
transcript_list.insert(t.clone());
}
}
let mut transcript_vector: Vec<Transcriptname> = transcript_list.into_iter().collect();
transcript_vector.sort();
let transcript_to_int: HashMap<Transcriptname, TranscriptId> = transcript_vector
.into_iter()
.enumerate()
.map(|(i, g)| (g, TranscriptId(i as u32)))
.collect();
let int_to_transcript: HashMap<TranscriptId, Transcriptname> =
transcript_to_int.iter().map(|(g, i)| (*i, g.clone())).collect();
let mut ec2tid: HashMap<EC, HashSet<TranscriptId>> = HashMap::new();
for (ec, genes) in ec2transcript.iter() {
let geneids: HashSet<TranscriptId> = genes
.iter()
.map(|gname| *transcript_to_int.get(gname).unwrap())
.collect();
ec2tid.insert(*ec, geneids);
}
Ec2TranscriptMapper { ec2tid, int_to_transcript }
}
pub fn get_transcripts(&self, ec: EC) -> &HashSet<TranscriptId> {
self.ec2tid.get(&ec).unwrap()
}
pub fn get_genenames(&self, ec: EC) -> HashSet<Transcriptname> {
let tids = self.get_transcripts(ec);
let genenames = tids
.iter()
.map(|gid| self.resolve_tid(*gid))
.collect();
genenames
}
pub fn resolve_tid(&self, tid: TranscriptId) -> Transcriptname {
let r = self.int_to_transcript.get(&tid).unwrap();
r.clone()
}
pub fn get_transcript_list(&self) -> Vec<Transcriptname> {
let ntrans = self.int_to_transcript.len();
let transcriptlist_vector: Vec<Transcriptname> = (0..ntrans)
.map(|k| self.resolve_tid(TranscriptId(k as u32)))
.collect();
transcriptlist_vector
}
}
pub fn find_consistent_transcripts(records: &[BusRecord], ec2gene: &Ec2TranscriptMapper) -> MappingResultTranscript {
let mut setlist = records.iter().map(|r| ec2gene.get_transcripts(EC(r.EC)));
let s1 = setlist.next().unwrap();
let mut shared_transcripts = s1.clone();
if records.len() == 1 {
if shared_transcripts.len() == 1 {
let elem = *shared_transcripts.iter().next().unwrap();
return MappingResultTranscript::SingleTranscript(elem);
} else {
return MappingResultTranscript::Multimapped(shared_transcripts);
}
}
for current_set in setlist {
update_intersection_via_retain(&mut shared_transcripts, current_set);
if shared_transcripts.is_empty() {
break;
}
}
match shared_transcripts.len() {
0 => MappingResultTranscript::Inconsistent,
1 => {
let elem = *shared_transcripts.iter().next().unwrap();
MappingResultTranscript::SingleTranscript(elem)
}
_ => MappingResultTranscript::Multimapped(shared_transcripts),
}
}
fn build_ec2transcript(
ec_dict: &HashMap<EC, Vec<TranscriptId>>,
transcript_dict: &HashMap<TranscriptId, Transcriptname>,
) -> HashMap<EC, HashSet<Transcriptname>> {
let mut ec2transcript: HashMap<EC, HashSet<Transcriptname>> = HashMap::new();
for (ec, transcript_ints) in ec_dict.iter() {
let mut transcripts: HashSet<Transcriptname> = HashSet::new();
for t_int in transcript_ints {
let t_name = transcript_dict.get(t_int).unwrap();
transcripts.insert(t_name.clone());
}
ec2transcript.insert(*ec, transcripts);
}
ec2transcript
}
pub (crate) fn make_mapper_transcript(busfolder: &BusFolder) -> Ec2TranscriptMapper{
let e2g = build_ec2transcript(
&busfolder.parse_ecmatrix(),
&busfolder.parse_transcript()
);
Ec2TranscriptMapper::new(e2g)
}