use super::chain::ChainFile;
use crate::coords::{hgvs_pos_to_index, index_to_hgvs_pos};
use crate::error::FerroError;
use crate::reference::transcript::GenomeBuild;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct LiftoverResult {
pub source_build: GenomeBuild,
pub source_contig: String,
pub source_pos: u64,
pub target_build: GenomeBuild,
pub target_contig: String,
pub target_pos: u64,
pub chain_id: u64,
pub chain_score: u64,
pub in_gap: bool,
pub multiple_mappings: bool,
}
#[derive(Debug, Clone)]
pub struct IntervalLiftoverResult {
pub source_interval: (u64, u64),
pub target_interval: Option<(u64, u64)>,
pub status: LiftoverStatus,
pub chain_id: Option<u64>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum LiftoverStatus {
Success,
PartiallyMapped {
mapped_fraction: f64,
},
SplitAcrossChains,
Unmappable,
ContigNotFound,
}
#[derive(Debug)]
pub struct Liftover {
forward: ChainFile,
reverse: ChainFile,
}
impl Liftover {
pub fn new(forward: ChainFile, reverse: ChainFile) -> Self {
Self { forward, reverse }
}
pub fn from_files<P: AsRef<Path>>(
grch37_to_38: P,
grch38_to_37: P,
) -> Result<Self, FerroError> {
let forward = ChainFile::from_file(grch37_to_38)?;
let reverse = ChainFile::from_file(grch38_to_37)?;
Ok(Self::new(forward, reverse))
}
pub fn one_way(chain_file: ChainFile) -> Self {
Self {
forward: chain_file,
reverse: ChainFile::new(),
}
}
fn get_chain_file(&self, from: GenomeBuild, to: GenomeBuild) -> Result<&ChainFile, FerroError> {
match (from, to) {
(GenomeBuild::GRCh37, GenomeBuild::GRCh38) => Ok(&self.forward),
(GenomeBuild::GRCh38, GenomeBuild::GRCh37) => Ok(&self.reverse),
_ => Err(FerroError::InvalidCoordinates {
msg: format!("Unsupported liftover direction: {:?} -> {:?}", from, to),
}),
}
}
pub fn lift(
&self,
from: GenomeBuild,
to: GenomeBuild,
contig: &str,
pos: u64,
) -> Result<LiftoverResult, FerroError> {
let chain_file = self.get_chain_file(from, to)?;
let pos_0based = hgvs_pos_to_index(pos) as u64;
let chains = chain_file.find_chains(contig, pos_0based);
if chains.is_empty() {
let suggestion = if from == GenomeBuild::GRCh37 {
"Position may not exist in GRCh38 (deleted region) or contig name may differ."
} else {
"Position may not exist in GRCh37 (new sequence in GRCh38) or contig name may differ."
};
return Err(FerroError::InvalidCoordinates {
msg: format!(
"No chain found for {}:{} in {:?} -> {:?} liftover. {}",
contig, pos, from, to, suggestion
),
});
}
let multiple_mappings = chains.len() > 1;
let chain = chains.into_iter().max_by_key(|c| c.score).unwrap();
match chain.lift_position(pos_0based) {
Some(lifted_0based) => Ok(LiftoverResult {
source_build: from,
source_contig: contig.to_string(),
source_pos: pos,
target_build: to,
target_contig: chain.query_name.clone(),
target_pos: index_to_hgvs_pos(lifted_0based as usize), chain_id: chain.id,
chain_score: chain.score,
in_gap: false,
multiple_mappings,
}),
None => {
Err(FerroError::InvalidCoordinates {
msg: format!("Position {}:{} falls in an alignment gap", contig, pos),
})
}
}
}
pub fn lift_37_to_38(&self, contig: &str, pos: u64) -> Result<LiftoverResult, FerroError> {
self.lift(GenomeBuild::GRCh37, GenomeBuild::GRCh38, contig, pos)
}
pub fn lift_38_to_37(&self, contig: &str, pos: u64) -> Result<LiftoverResult, FerroError> {
self.lift(GenomeBuild::GRCh38, GenomeBuild::GRCh37, contig, pos)
}
pub fn lift_interval(
&self,
from: GenomeBuild,
to: GenomeBuild,
contig: &str,
start: u64,
end: u64,
) -> Result<IntervalLiftoverResult, FerroError> {
let chain_file = self.get_chain_file(from, to)?;
let start_0based = hgvs_pos_to_index(start) as u64;
let end_0based = hgvs_pos_to_index(end) as u64;
let start_chains = chain_file.find_chains(contig, start_0based);
let end_chains = chain_file.find_chains(contig, end_0based);
if start_chains.is_empty() || end_chains.is_empty() {
return Ok(IntervalLiftoverResult {
source_interval: (start, end),
target_interval: None,
status: if start_chains.is_empty() && end_chains.is_empty() {
LiftoverStatus::ContigNotFound
} else {
LiftoverStatus::Unmappable
},
chain_id: None,
});
}
let common_chain = start_chains
.iter()
.find(|sc| end_chains.iter().any(|ec| ec.id == sc.id));
match common_chain {
Some(chain) => {
let lifted_start = chain.lift_position(start_0based);
let lifted_end = chain.lift_position(end_0based);
match (lifted_start, lifted_end) {
(Some(ls), Some(le)) => {
let (target_start, target_end) = if ls <= le {
(ls + 1, le + 1)
} else {
(le + 1, ls + 1) };
Ok(IntervalLiftoverResult {
source_interval: (start, end),
target_interval: Some((target_start, target_end)),
status: LiftoverStatus::Success,
chain_id: Some(chain.id),
})
}
(Some(ls), None) | (None, Some(ls)) => {
Ok(IntervalLiftoverResult {
source_interval: (start, end),
target_interval: Some((ls + 1, ls + 1)),
status: LiftoverStatus::PartiallyMapped {
mapped_fraction: 0.5,
},
chain_id: Some(chain.id),
})
}
(None, None) => {
Ok(IntervalLiftoverResult {
source_interval: (start, end),
target_interval: None,
status: LiftoverStatus::Unmappable,
chain_id: Some(chain.id),
})
}
}
}
None => {
Ok(IntervalLiftoverResult {
source_interval: (start, end),
target_interval: None,
status: LiftoverStatus::SplitAcrossChains,
chain_id: None,
})
}
}
}
pub fn forward_chains(&self) -> &ChainFile {
&self.forward
}
pub fn reverse_chains(&self) -> &ChainFile {
&self.reverse
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_chain_file() -> ChainFile {
let chain_data = r#"chain 1000 chr1 1000000 + 10000 20000 chr1 1000100 + 10050 20150 1
5000
5000
"#;
ChainFile::parse(chain_data.as_bytes()).unwrap()
}
#[test]
fn test_lift_position() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover
.lift(GenomeBuild::GRCh37, GenomeBuild::GRCh38, "chr1", 10001)
.unwrap();
assert_eq!(result.source_pos, 10001);
assert_eq!(result.target_contig, "chr1");
assert!(!result.in_gap);
}
#[test]
fn test_lift_position_not_found() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover.lift(GenomeBuild::GRCh37, GenomeBuild::GRCh38, "chr1", 1);
assert!(result.is_err());
}
#[test]
fn test_lift_interval() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover
.lift_interval(
GenomeBuild::GRCh37,
GenomeBuild::GRCh38,
"chr1",
10001,
10100,
)
.unwrap();
assert_eq!(result.status, LiftoverStatus::Success);
assert!(result.target_interval.is_some());
}
#[test]
fn test_lift_unknown_contig() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover.lift(GenomeBuild::GRCh37, GenomeBuild::GRCh38, "chrZ", 1000);
assert!(result.is_err());
}
#[test]
fn test_liftover_result_fields() {
let result = LiftoverResult {
source_build: GenomeBuild::GRCh37,
source_contig: "chr1".to_string(),
source_pos: 12345,
target_build: GenomeBuild::GRCh38,
target_contig: "chr1".to_string(),
target_pos: 12445,
chain_id: 1,
chain_score: 1000,
in_gap: false,
multiple_mappings: false,
};
assert_eq!(result.source_pos, 12345);
assert_eq!(result.target_pos, 12445);
assert_eq!(result.chain_score, 1000);
assert!(!result.in_gap);
assert!(!result.multiple_mappings);
}
#[test]
fn test_liftover_result_clone() {
let result = LiftoverResult {
source_build: GenomeBuild::GRCh37,
source_contig: "chr1".to_string(),
source_pos: 100,
target_build: GenomeBuild::GRCh38,
target_contig: "chr1".to_string(),
target_pos: 200,
chain_id: 1,
chain_score: 500,
in_gap: true,
multiple_mappings: true,
};
let cloned = result.clone();
assert_eq!(cloned.source_pos, result.source_pos);
assert_eq!(cloned.target_pos, result.target_pos);
assert_eq!(cloned.in_gap, result.in_gap);
}
#[test]
fn test_interval_liftover_result_success() {
let result = IntervalLiftoverResult {
source_interval: (1000, 2000),
target_interval: Some((1100, 2100)),
status: LiftoverStatus::Success,
chain_id: Some(1),
};
assert_eq!(result.source_interval, (1000, 2000));
assert_eq!(result.target_interval, Some((1100, 2100)));
assert_eq!(result.status, LiftoverStatus::Success);
assert_eq!(result.chain_id, Some(1));
}
#[test]
fn test_interval_liftover_result_unmappable() {
let result = IntervalLiftoverResult {
source_interval: (1000, 2000),
target_interval: None,
status: LiftoverStatus::Unmappable,
chain_id: None,
};
assert!(result.target_interval.is_none());
assert_eq!(result.status, LiftoverStatus::Unmappable);
}
#[test]
fn test_liftover_status_partially_mapped() {
let status = LiftoverStatus::PartiallyMapped {
mapped_fraction: 0.75,
};
if let LiftoverStatus::PartiallyMapped { mapped_fraction } = status {
assert!((mapped_fraction - 0.75).abs() < 0.01);
} else {
panic!("Expected PartiallyMapped status");
}
}
#[test]
fn test_liftover_status_equality() {
assert_eq!(LiftoverStatus::Success, LiftoverStatus::Success);
assert_eq!(LiftoverStatus::Unmappable, LiftoverStatus::Unmappable);
assert_eq!(
LiftoverStatus::ContigNotFound,
LiftoverStatus::ContigNotFound
);
assert_ne!(LiftoverStatus::Success, LiftoverStatus::Unmappable);
}
#[test]
fn test_liftover_status_split_across_chains() {
let status = LiftoverStatus::SplitAcrossChains;
assert_eq!(status, LiftoverStatus::SplitAcrossChains);
}
#[test]
fn test_liftover_new() {
let forward = ChainFile::new();
let reverse = ChainFile::new();
let liftover = Liftover::new(forward, reverse);
let _ = liftover.forward_chains();
let _ = liftover.reverse_chains();
}
#[test]
fn test_liftover_one_way() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover.lift(GenomeBuild::GRCh37, GenomeBuild::GRCh38, "chr1", 10001);
assert!(result.is_ok());
let reverse_result = liftover.lift(GenomeBuild::GRCh38, GenomeBuild::GRCh37, "chr1", 10001);
assert!(reverse_result.is_err());
}
#[test]
fn test_lift_37_to_38_convenience() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover.lift_37_to_38("chr1", 10001);
assert!(result.is_ok());
let r = result.unwrap();
assert_eq!(r.source_build, GenomeBuild::GRCh37);
assert_eq!(r.target_build, GenomeBuild::GRCh38);
}
#[test]
fn test_lift_38_to_37_no_reverse_chain() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover.lift_38_to_37("chr1", 10001);
assert!(result.is_err());
}
#[test]
fn test_lift_interval_contig_not_found() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover
.lift_interval(
GenomeBuild::GRCh37,
GenomeBuild::GRCh38,
"chrUnknown",
1000,
2000,
)
.unwrap();
assert_eq!(result.status, LiftoverStatus::ContigNotFound);
assert!(result.target_interval.is_none());
}
#[test]
fn test_lift_interval_partial_unmappable() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover
.lift_interval(
GenomeBuild::GRCh37,
GenomeBuild::GRCh38,
"chr1",
10001,
99999, )
.unwrap();
assert_ne!(result.status, LiftoverStatus::Success);
}
#[test]
fn test_lift_position_boundary() {
let chain_file = test_chain_file();
let liftover = Liftover::one_way(chain_file);
let result = liftover.lift(GenomeBuild::GRCh37, GenomeBuild::GRCh38, "chr1", 10001);
assert!(result.is_ok());
}
#[test]
fn test_get_chain_file_unsupported_direction() {
let liftover = Liftover::new(ChainFile::new(), ChainFile::new());
let result = liftover.lift(GenomeBuild::GRCh37, GenomeBuild::GRCh37, "chr1", 1000);
assert!(result.is_err());
}
#[test]
fn test_liftover_debug() {
let result = LiftoverResult {
source_build: GenomeBuild::GRCh37,
source_contig: "chr1".to_string(),
source_pos: 100,
target_build: GenomeBuild::GRCh38,
target_contig: "chr1".to_string(),
target_pos: 200,
chain_id: 1,
chain_score: 500,
in_gap: false,
multiple_mappings: false,
};
let debug_str = format!("{:?}", result);
assert!(debug_str.contains("LiftoverResult"));
assert!(debug_str.contains("source_pos"));
}
#[test]
fn test_interval_result_debug() {
let result = IntervalLiftoverResult {
source_interval: (100, 200),
target_interval: Some((150, 250)),
status: LiftoverStatus::Success,
chain_id: Some(1),
};
let debug_str = format!("{:?}", result);
assert!(debug_str.contains("IntervalLiftoverResult"));
}
#[test]
fn test_liftover_status_debug() {
let status = LiftoverStatus::PartiallyMapped {
mapped_fraction: 0.5,
};
let debug_str = format!("{:?}", status);
assert!(debug_str.contains("PartiallyMapped"));
assert!(debug_str.contains("0.5"));
}
}