use crate::model::error::ChainError;
const BIGNUM: i32 = 0x3fff_ffff;
const DEFAULT_GAP_COSTS: &str = "tablesize 11
smallSize 111
position 1 2 3 11 111 2111 12111 32111 72111 152111 252111
qGap 325 360 400 450 600 1100 3600 7600 15600 31600 56600
tGap 325 360 400 450 600 1100 3600 7600 15600 31600 56600
bothGap 625 660 700 750 900 1400 4000 8000 16000 32000 57000
";
const ORIGINAL_GAP_COSTS: &str = "tableSize 11
smallSize 111
position 1 2 3 11 111 2111 12111 32111 72111 152111 252111
qGap 350 425 450 600 900 2900 22900 57900 117900 217900 317900
tGap 350 425 450 600 900 2900 22900 57900 117900 217900 317900
bothGap 750 825 850 1000 1300 3300 23300 58300 118300 218300 318300
";
const CHEAP_GAP_COSTS: &str = "tableSize 3
smallSize 100
position 1 100 1000
qGap 0 30 300
tGap 0 30 300
bothGap 0 30 300
";
const RNA_DNA_GAP_COSTS: &str = "tablesize 12
smallSize 111
position 1 2 3 11 31 111 2111 12111 32111 72111 152111 252111
qGap 325 360 400 450 600 800 1100 3600 7600 15600 31600 56600
tGap 200 210 220 250 300 400 500 600 800 1200 2000 4000
bothGap 625 660 700 750 900 1100 1400 4000 8000 16000 32000 57000
";
#[derive(Debug, Clone)]
pub struct GapCalc {
small_size: i32,
q_small: Vec<i32>,
t_small: Vec<i32>,
b_small: Vec<i32>,
long_pos: Vec<i32>,
q_long: Vec<f64>,
t_long: Vec<f64>,
b_long: Vec<f64>,
q_last_pos: i32,
t_last_pos: i32,
b_last_pos: i32,
q_last_val: f64,
t_last_val: f64,
b_last_val: f64,
q_last_slope: f64,
t_last_slope: f64,
b_last_slope: f64,
}
impl GapCalc {
pub fn default_costs() -> GapCalc {
Self::from_spec_str(DEFAULT_GAP_COSTS).expect("built-in default gap costs are valid")
}
pub fn from_linear_gap(arg: &str) -> Result<GapCalc, ChainError> {
match arg {
"loose" => Self::from_spec_str(DEFAULT_GAP_COSTS),
"medium" | "original" => Self::from_spec_str(ORIGINAL_GAP_COSTS),
"cheap" => Self::from_spec_str(CHEAP_GAP_COSTS),
"rnaDna" => Self::from_spec_str(RNA_DNA_GAP_COSTS),
path => {
let contents = std::fs::read_to_string(path).map_err(|err| {
gap_error(format!("cannot read linearGap file {path}: {err}"))
})?;
Self::from_spec_str(&contents)
}
}
}
pub fn from_spec_str(spec: &str) -> Result<GapCalc, ChainError> {
let mut table_size: Option<usize> = None;
let mut small_size: Option<i32> = None;
let mut position: Option<Vec<i32>> = None;
let mut q_gap: Option<Vec<f64>> = None;
let mut t_gap: Option<Vec<f64>> = None;
let mut both_gap: Option<Vec<f64>> = None;
for raw in spec.lines() {
let line = raw.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let mut words = line.split_whitespace();
let tag = words.next().unwrap_or("");
let rest: Vec<&str> = words.collect();
match tag.to_ascii_lowercase().as_str() {
"tablesize" => table_size = Some(parse_one_usize(&rest, "tableSize")?),
"smallsize" => small_size = Some(parse_one_i32(&rest, "smallSize")?),
"position" => position = Some(parse_i32_list(&rest, "position")?),
"qgap" => q_gap = Some(parse_f64_list(&rest, "qGap")?),
"tgap" => t_gap = Some(parse_f64_list(&rest, "tGap")?),
"bothgap" => both_gap = Some(parse_f64_list(&rest, "bothGap")?),
other => {
return Err(gap_error(format!(
"unknown gap specification tag '{other}'"
)));
}
}
}
let small_size =
small_size.ok_or_else(|| gap_error("gap specification missing smallSize"))?;
let position = position.ok_or_else(|| gap_error("gap specification missing position"))?;
let q_gap = q_gap.ok_or_else(|| gap_error("gap specification missing qGap"))?;
let t_gap = t_gap.ok_or_else(|| gap_error("gap specification missing tGap"))?;
let both_gap = both_gap.ok_or_else(|| gap_error("gap specification missing bothGap"))?;
let n = position.len();
if let Some(declared) = table_size
&& declared != n
{
return Err(gap_error(format!(
"tableSize {declared} does not match {n} position values"
)));
}
if q_gap.len() != n || t_gap.len() != n || both_gap.len() != n {
return Err(gap_error(
"position/qGap/tGap/bothGap must all have the same length",
));
}
if n < 2 {
return Err(gap_error("gap specification needs at least two knots"));
}
if small_size < 1 {
return Err(gap_error("smallSize must be positive"));
}
Self::build(small_size, position, q_gap, t_gap, both_gap)
}
fn build(
small_size: i32,
position: Vec<i32>,
q_gap: Vec<f64>,
t_gap: Vec<f64>,
both_gap: Vec<f64>,
) -> Result<GapCalc, ChainError> {
let small_len = small_size as usize;
let mut q_small = vec![0i32; small_len];
let mut t_small = vec![0i32; small_len];
let mut b_small = vec![0i32; small_len];
for i in 1..small_size {
let idx = i as usize;
q_small[idx] = interpolate(i, &position, &q_gap);
t_small[idx] = interpolate(i, &position, &t_gap);
b_small[idx] = interpolate(i, &position, &both_gap);
}
let start = position
.iter()
.position(|&p| p == small_size)
.ok_or_else(|| gap_error("smallSize must appear in the position list"))?;
let long_pos: Vec<i32> = position[start..].to_vec();
let q_long: Vec<f64> = q_gap[start..].to_vec();
let t_long: Vec<f64> = t_gap[start..].to_vec();
let b_long: Vec<f64> = both_gap[start..].to_vec();
let m = long_pos.len();
if m < 2 {
return Err(gap_error(
"gap specification needs at least two long knots past smallSize",
));
}
let last = m - 1;
let span = (long_pos[last] - long_pos[last - 1]) as f64;
let q_last_slope = (q_long[last] - q_long[last - 1]) / span;
let t_last_slope = (t_long[last] - t_long[last - 1]) / span;
let b_last_slope = (b_long[last] - b_long[last - 1]) / span;
Ok(GapCalc {
small_size,
q_small,
t_small,
b_small,
q_last_pos: long_pos[last],
t_last_pos: long_pos[last],
b_last_pos: long_pos[last],
q_last_val: q_long[last],
t_last_val: t_long[last],
b_last_val: b_long[last],
q_last_slope,
t_last_slope,
b_last_slope,
long_pos,
q_long,
t_long,
b_long,
})
}
#[inline]
pub fn cost(&self, dq: i32, dt: i32) -> i32 {
let dt = dt.max(0);
let dq = dq.max(0);
if dt == 0 {
if dq < self.small_size {
self.q_small[dq as usize]
} else if dq >= self.q_last_pos {
let cost =
(self.q_last_val + self.q_last_slope * f64::from(dq - self.q_last_pos)) as i32;
if cost < 0 { BIGNUM } else { cost }
} else {
interpolate(dq, &self.long_pos, &self.q_long)
}
} else if dq == 0 {
if dt < self.small_size {
self.t_small[dt as usize]
} else if dt >= self.t_last_pos {
let cost =
(self.t_last_val + self.t_last_slope * f64::from(dt - self.t_last_pos)) as i32;
if cost < 0 { BIGNUM } else { cost }
} else {
interpolate(dt, &self.long_pos, &self.t_long)
}
} else {
let both = dq.wrapping_add(dt);
if both < self.small_size {
self.b_small[both as usize]
} else if both >= self.b_last_pos {
let cost = (self.b_last_val + self.b_last_slope * f64::from(both - self.b_last_pos))
as i32;
if cost < 0 { BIGNUM } else { cost }
} else {
interpolate(both, &self.long_pos, &self.b_long)
}
}
}
}
fn interpolate(x: i32, s: &[i32], v: &[f64]) -> i32 {
let n = s.len();
for i in 0..n {
let ss = s[i];
if x == ss {
return v[i] as i32;
} else if x < ss {
if i == 0 {
return v[0] as i32;
}
let ds = f64::from(ss - s[i - 1]);
let dv = v[i] - v[i - 1];
return (v[i - 1] + dv * f64::from(x - s[i - 1]) / ds) as i32;
}
}
let i = n - 1;
let ds = f64::from(s[i] - s[i - 1]);
let dv = v[i] - v[i - 1];
(v[i - 1] + dv * f64::from(x - s[i - 1]) / ds) as i32
}
fn parse_one_usize(rest: &[&str], tag: &str) -> Result<usize, ChainError> {
let first = rest
.first()
.ok_or_else(|| gap_error(format!("{tag} is missing a value")))?;
first.parse::<usize>().map_err(|_| {
gap_error(format!(
"{tag} value '{first}' is not a non-negative integer"
))
})
}
fn parse_one_i32(rest: &[&str], tag: &str) -> Result<i32, ChainError> {
let first = rest
.first()
.ok_or_else(|| gap_error(format!("{tag} is missing a value")))?;
first
.parse::<i32>()
.map_err(|_| gap_error(format!("{tag} value '{first}' is not an integer")))
}
fn parse_i32_list(rest: &[&str], tag: &str) -> Result<Vec<i32>, ChainError> {
if rest.is_empty() {
return Err(gap_error(format!("{tag} has no values")));
}
rest.iter()
.map(|word| {
word.parse::<i32>()
.map_err(|_| gap_error(format!("{tag} value '{word}' is not an integer")))
})
.collect()
}
fn parse_f64_list(rest: &[&str], tag: &str) -> Result<Vec<f64>, ChainError> {
if rest.is_empty() {
return Err(gap_error(format!("{tag} has no values")));
}
rest.iter()
.map(|word| {
word.parse::<f64>()
.map_err(|_| gap_error(format!("{tag} value '{word}' is not a number")))
})
.collect()
}
fn gap_error(message: impl Into<String>) -> ChainError {
ChainError::Unsupported {
msg: message.into().into(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_acceptance_vectors() {
let g = GapCalc::default_costs();
let cases = [
(0, 0, 0),
(1, 0, 325),
(0, 1, 325),
(1, 1, 660),
(5, 0, 412),
(2, 3, 712),
(50, 60, 898),
(110, 0, 598),
(111, 0, 600),
(0, 111, 600),
(200, 0, 622),
(2111, 0, 1100),
(1000, 1000, 1372),
(252111, 0, 56600),
(300000, 0, 68572),
(150000, 150000, 68972),
];
for (dq, dt, expected) in cases {
assert_eq!(g.cost(dq, dt), expected, "cost({dq}, {dt})");
}
}
#[test]
fn small_table_spot_values() {
let g = GapCalc::default_costs();
let q_expected = [
(1, 325),
(2, 360),
(3, 400),
(5, 412),
(10, 443),
(50, 508),
(110, 598),
];
for (dq, expected) in q_expected {
assert_eq!(g.cost(dq, 0), expected, "qSmall[{dq}]");
}
let b_expected = [(2, 660), (5, 712), (110, 898)];
for (both, expected) in b_expected {
assert_eq!(g.cost(1, both - 1), expected, "bSmall[{both}]");
}
}
#[test]
fn last_slopes_are_quarter() {
let g = GapCalc::default_costs();
assert!((g.q_last_slope - 0.25).abs() < 1e-12);
assert!((g.t_last_slope - 0.25).abs() < 1e-12);
assert!((g.b_last_slope - 0.25).abs() < 1e-12);
}
#[test]
fn negative_gaps_are_clamped_to_zero() {
let g = GapCalc::default_costs();
assert_eq!(g.cost(-5, -5), 0);
assert_eq!(g.cost(-5, 0), 0);
assert_eq!(g.cost(0, -5), 0);
assert_eq!(g.cost(-5, 1), 325);
}
#[test]
fn medium_alias_parses() {
let g = GapCalc::from_linear_gap("medium").expect("medium parses");
assert_eq!(g.cost(1, 0), 350);
assert_eq!(g.cost(0, 1), 350);
assert_eq!(g.cost(1, 1), 825); }
#[test]
fn rna_dna_is_asymmetric() {
let g = GapCalc::from_linear_gap("rnaDna").expect("rnaDna parses");
assert_eq!(g.cost(1, 0), 325);
assert_eq!(g.cost(0, 1), 200);
}
#[test]
fn cheap_table_parses() {
let g = GapCalc::from_linear_gap("cheap").expect("cheap parses");
assert_eq!(g.cost(1, 0), 0);
assert_eq!(g.cost(100, 0), 30);
assert_eq!(g.cost(1000, 0), 300);
}
#[test]
fn unknown_tag_is_rejected() {
let err = GapCalc::from_spec_str("bogus 1\n").unwrap_err();
assert!(err.to_string().contains("unknown gap specification tag"));
}
#[test]
fn whitespace_and_case_tolerant() {
let spec = " TABLESIZE 3\n\tsmallsize 100\nPOSITION 1 100 1000\nQGAP 0 30 300\nTGAP 0 30 300\nBOTHGAP 0 30 300\n";
let g = GapCalc::from_spec_str(spec).expect("tolerant parse");
assert_eq!(g.cost(100, 0), 30);
}
}