Skip to main content

salmon_core/
transcript.rs

1//! Transcript record: identity, lengths, accumulated mass/counts, and the
2//! effective-length computation.
3//!
4//! Mirrors the modeling-relevant parts of salmon's `Transcript`
5//! (`include/.../model/Transcript.hpp`). Mutable count/mass state is stored in
6//! atomics so worker threads can update it concurrently during a quantification
7//! round, matching the C++ design. Per-transcript sequence storage and bias
8//! state are added in a later phase (bias models).
9
10use crate::atomic::AtomicF64;
11use crate::math::{log_add, LOG_0};
12use std::sync::atomic::{AtomicU64, Ordering};
13
14/// A reference transcript and its mutable quantification state.
15#[derive(Debug)]
16pub struct Transcript {
17    /// dense transcript id (index into the experiment's transcript vector)
18    pub id: u32,
19    /// reference name as it appears in the index / output
20    pub name: String,
21    /// length used for modeling (may be poly-A clipped)
22    pub ref_length: u32,
23    /// full reference length (reported in `quant.sf` `Length` column)
24    pub complete_length: u32,
25    /// true if this is a decoy sequence
26    pub is_decoy: bool,
27
28    /// cached log effective length
29    log_eff_length: AtomicF64,
30    /// fragments mapping uniquely to this transcript
31    unique_count: AtomicU64,
32    /// total fragments touching this transcript (unique + shared)
33    total_count: AtomicU64,
34    /// accumulated (fractional) mass assigned to this transcript, log space
35    mass: AtomicF64,
36}
37
38impl Transcript {
39    pub fn new(id: u32, name: impl Into<String>, complete_length: u32) -> Self {
40        Self {
41            id,
42            name: name.into(),
43            ref_length: complete_length,
44            complete_length,
45            is_decoy: false,
46            log_eff_length: AtomicF64::new(LOG_0),
47            unique_count: AtomicU64::new(0),
48            total_count: AtomicU64::new(0),
49            mass: AtomicF64::new(LOG_0),
50        }
51    }
52
53    pub fn with_ref_length(mut self, ref_length: u32) -> Self {
54        self.ref_length = ref_length;
55        self
56    }
57
58    pub fn as_decoy(mut self) -> Self {
59        self.is_decoy = true;
60        self
61    }
62
63    // --- count / mass accessors ------------------------------------------------
64
65    pub fn unique_count(&self) -> u64 {
66        self.unique_count.load(Ordering::Relaxed)
67    }
68    pub fn total_count(&self) -> u64 {
69        self.total_count.load(Ordering::Relaxed)
70    }
71    pub fn add_unique(&self, n: u64) {
72        self.unique_count.fetch_add(n, Ordering::Relaxed);
73    }
74    pub fn add_total(&self, n: u64) {
75        self.total_count.fetch_add(n, Ordering::Relaxed);
76    }
77
78    /// Current accumulated mass (log space).
79    pub fn mass(&self) -> f64 {
80        self.mass.load()
81    }
82    pub fn set_mass(&self, log_mass: f64) {
83        self.mass.store(log_mass);
84    }
85    /// Atomically add `log_inc` (a log-space value) to the accumulated mass.
86    pub fn add_mass(&self, log_inc: f64) {
87        self.mass.log_add_assign(log_inc);
88    }
89
90    // --- effective length ------------------------------------------------------
91
92    pub fn log_effective_length(&self) -> f64 {
93        self.log_eff_length.load()
94    }
95    pub fn set_log_effective_length(&self, v: f64) {
96        self.log_eff_length.store(v);
97    }
98    pub fn effective_length(&self) -> f64 {
99        self.log_eff_length.load().exp()
100    }
101
102    /// Compute the log effective length given a log-space fragment-length PMF
103    /// covering lengths `[min_val, min_val + log_pmf.len())`.
104    ///
105    /// Mirrors salmon's `Transcript::computeLogEffectiveLength`:
106    /// `effLen = sum_{l=min}^{min(refLen,max)} pmf(l) * (refLen - l + 1)` in
107    /// log space. If the transcript is shorter than the minimum fragment
108    /// length, it falls back to `log(refLen)`.
109    pub fn compute_log_effective_length(&self, log_pmf: &[f64], min_val: usize) -> f64 {
110        let ref_len = self.ref_length as usize;
111        let max_val = min_val + log_pmf.len().saturating_sub(1);
112        let upper = ref_len.min(max_val);
113
114        let mut eff = LOG_0;
115        if ref_len >= min_val {
116            let mut l = min_val;
117            while l <= upper {
118                let pmf = log_pmf[l - min_val];
119                // log(refLen - l + 1)
120                let span = ((ref_len - l + 1) as f64).ln();
121                eff = log_add(eff, pmf + span);
122                l += 1;
123            }
124        }
125
126        // Guard: a too-short transcript or empty support falls back to log(refLen).
127        if !(eff.is_finite()) || eff < 1.0_f64.ln() {
128            (ref_len.max(1) as f64).ln()
129        } else {
130            eff
131        }
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138    use crate::math::LOG_1;
139
140    #[test]
141    fn mass_accumulates_in_log_space() {
142        let t = Transcript::new(0, "t0", 1000);
143        assert_eq!(t.mass(), LOG_0);
144        t.add_mass(LOG_1); // add 1.0
145        t.add_mass(LOG_1); // add 1.0 -> log(2)
146        assert!((t.mass() - 2.0_f64.ln()).abs() < 1e-9);
147    }
148
149    #[test]
150    fn counts_accumulate() {
151        let t = Transcript::new(1, "t1", 500);
152        t.add_unique(3);
153        t.add_total(5);
154        assert_eq!(t.unique_count(), 3);
155        assert_eq!(t.total_count(), 5);
156    }
157
158    #[test]
159    fn eff_length_point_mass_is_intuitive() {
160        // A point-mass FLD at length 100 on a 1000nt transcript gives
161        // effLen = 1000 - 100 + 1 = 901.
162        let t = Transcript::new(0, "t", 1000);
163        let min_val = 100usize;
164        let log_pmf = vec![LOG_1]; // pmf(100) = 1
165        let eff = t.compute_log_effective_length(&log_pmf, min_val).exp();
166        assert!((eff - 901.0).abs() < 1e-6, "got {eff}");
167    }
168
169    #[test]
170    fn eff_length_short_transcript_falls_back() {
171        // transcript shorter than the minimum fragment length -> log(refLen)
172        let t = Transcript::new(0, "t", 50);
173        let log_pmf = vec![LOG_1];
174        let eff = t.compute_log_effective_length(&log_pmf, 100).exp();
175        assert!((eff - 50.0).abs() < 1e-6, "got {eff}");
176    }
177}