Skip to main content

fibertools_rs/subcommands/
pileup.rs

1/// This module is used to extract the fire calls as well as nucs and msps from a bam file
2/// for every position in the bam file and output the results to a bed file.
3/// all calculations are done in total as well as for haplotype 1 and haplotype 2.
4use crate::cli::PileupOptions;
5use crate::fiber::FiberseqData;
6use crate::utils::bamannotations;
7use crate::utils::bio_io;
8use crate::*;
9use anyhow::{anyhow, Ok};
10use std::collections::HashMap;
11use std::io::BufRead;
12//use polars::prelude::*;
13use ordered_float::NotNan;
14use rust_htslib::bam::ext::BamRecordExtensions;
15use rust_htslib::bam::{FetchDefinition, IndexedReader};
16
17const MIN_FIRE_COVERAGE: i32 = 4;
18const MIN_FIRE_QUAL: u8 = 229; // floor(255*0.9)
19static WINDOW_SIZE: usize = 1_000_000;
20
21#[derive(Debug)]
22pub struct FireRow<'a> {
23    pub coverage: &'a i32,
24    pub fire_coverage: &'a i32,
25    pub score: &'a f32,
26    pub nuc_coverage: &'a i32,
27    pub msp_coverage: &'a i32,
28    pub cpg_coverage: &'a i32,
29    pub m6a_coverage: &'a i32,
30    pileup_opts: &'a PileupOptions,
31}
32
33impl PartialEq for FireRow<'_> {
34    fn eq(&self, other: &Self) -> bool {
35        let m6a = if self.pileup_opts.m6a {
36            self.m6a_coverage == other.m6a_coverage
37        } else {
38            true
39        };
40        let cpg = if self.pileup_opts.cpg {
41            self.cpg_coverage == other.cpg_coverage
42        } else {
43            true
44        };
45
46        self.coverage == other.coverage
47            && self.fire_coverage == other.fire_coverage
48            && self.score == other.score
49            && self.nuc_coverage == other.nuc_coverage
50            && self.msp_coverage == other.msp_coverage
51            && cpg
52            && m6a
53    }
54}
55
56impl std::fmt::Display for FireRow<'_> {
57    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
58        let mut rtn = format!(
59            "\t{}\t{}\t{}",
60            self.coverage, self.fire_coverage, self.score
61        );
62        if !self.pileup_opts.no_nuc {
63            rtn += &format!("\t{}", self.nuc_coverage);
64        }
65        if !self.pileup_opts.no_msp {
66            rtn += &format!("\t{}", self.msp_coverage);
67        }
68        if self.pileup_opts.m6a {
69            rtn += &format!("\t{}", self.m6a_coverage);
70        }
71        if self.pileup_opts.cpg {
72            rtn += &format!("\t{}", self.cpg_coverage);
73        }
74        write!(f, "{rtn}")
75    }
76}
77
78pub struct ShuffledFibers {
79    pub shuffled_fiber_starts: HashMap<(String, String, i64), i64>,
80}
81
82impl ShuffledFibers {
83    pub fn new(file_path: &str) -> Result<Self> {
84        let buffer = bio_io::buffer_from(file_path)?;
85        let mut shuffled_fiber_starts = HashMap::new();
86        for line in buffer.lines() {
87            let line = line?;
88            if line.starts_with('#') {
89                continue;
90            }
91            let mut parts = line.split('\t');
92            // error if there are not at least 4 parts
93            let chrom = parts.next().ok_or(anyhow!("missing chrom"))?;
94            let start = parts
95                .next()
96                .ok_or(anyhow!("missing fiber start"))?
97                .parse::<i64>()?;
98            let _end = parts
99                .next()
100                .ok_or(anyhow!("missing fiber end"))?
101                .parse::<i64>()?;
102            let fiber_name = parts.next().ok_or(anyhow!("missing fiber name"))?;
103            let original_start = parts
104                .next()
105                .ok_or(anyhow!("missing original start"))?
106                .parse::<i64>()?;
107            shuffled_fiber_starts.insert(
108                (chrom.to_string(), fiber_name.to_string(), original_start),
109                start,
110            );
111        }
112        log::info!("Read {} shuffled fibers", shuffled_fiber_starts.len());
113        Ok(Self {
114            shuffled_fiber_starts,
115        })
116    }
117
118    pub fn get_shuffled_start(&self, fiber: &FiberseqData) -> Option<i64> {
119        let target_name = fiber.target_name.clone();
120        let fiber_name = fiber.get_qname();
121        self.shuffled_fiber_starts
122            .get(&(target_name, fiber_name, fiber.record.reference_start()))
123            .copied()
124    }
125
126    pub fn has_fiber(&self, fiber: &FiberseqData) -> bool {
127        let target_name = fiber.target_name.clone();
128        let fiber_name = fiber.get_qname();
129        self.shuffled_fiber_starts.contains_key(&(
130            target_name,
131            fiber_name,
132            fiber.record.reference_start(),
133        ))
134    }
135
136    /// to get the shuffle offset which we ADD(+) to features of the fiber
137    /// to create a shuffled version of the fiber
138    pub fn get_shuffle_offset(&self, fiber: &FiberseqData) -> Option<i64> {
139        let start = fiber.record.reference_start();
140        let shuffled_start = self.get_shuffled_start(fiber)?;
141        Some(shuffled_start - start)
142    }
143}
144
145pub struct FireTrack<'a> {
146    pub chrom_start: usize,
147    pub chrom_end: usize,
148    pub track_len: usize,
149    pub raw_scores: Vec<f32>,
150    pub scores: Vec<f32>,
151    pub coverage: Vec<i32>,
152    pub fire_coverage: Vec<i32>,
153    pub msp_coverage: Vec<i32>,
154    pub nuc_coverage: Vec<i32>,
155    pub cpg_coverage: Vec<i32>,
156    pub m6a_coverage: Vec<i32>,
157    pileup_opts: &'a PileupOptions,
158    shuffled_fibers: &'a Option<ShuffledFibers>,
159    cur_offset: i64,
160}
161
162impl<'a> FireTrack<'a> {
163    pub fn new(
164        chrom_start: usize,
165        chrom_end: usize,
166        pileup_opts: &'a PileupOptions,
167        shuffled_fibers: &'a Option<ShuffledFibers>,
168    ) -> Self {
169        let track_len = chrom_end - chrom_start + 1;
170        let raw_scores = vec![-1.0; track_len];
171        let scores = vec![-1.0; track_len];
172        Self {
173            chrom_start,
174            chrom_end,
175            track_len,
176            raw_scores,
177            scores,
178            coverage: vec![0; track_len],
179            fire_coverage: vec![0; track_len],
180            msp_coverage: vec![0; track_len],
181            nuc_coverage: vec![0; track_len],
182            cpg_coverage: vec![0; track_len],
183            m6a_coverage: vec![0; track_len],
184            pileup_opts,
185            shuffled_fibers,
186            cur_offset: 0,
187        }
188    }
189
190    #[inline]
191    fn add_range_set(
192        array: &mut [i32],
193        ranges: &bamannotations::Ranges,
194        cur_offset: i64,
195        chrom_start: usize,
196    ) {
197        for annotation in ranges {
198            match (
199                annotation.reference_start,
200                annotation.reference_end,
201                annotation.reference_length,
202            ) {
203                (Some(rs), Some(re), Some(_)) => {
204                    let re = if rs == re { re + 1 } else { re };
205                    for i in rs..re {
206                        let pos = i + cur_offset - chrom_start as i64;
207                        // skip if pos is before the start of the track
208                        if pos < 0 || pos >= array.len() as i64 {
209                            continue;
210                        }
211                        array[pos as usize] += 1;
212                    }
213                }
214                _ => continue,
215            }
216        }
217    }
218
219    fn fiber_start_and_end(&self, fiber: &FiberseqData) -> (i64, i64) {
220        if !self.pileup_opts.fiber_coverage {
221            return (
222                fiber.record.reference_start() + self.cur_offset,
223                fiber.record.reference_end() + self.cur_offset,
224            );
225        }
226        let mut start = i64::MAX;
227        let mut end = i64::MIN;
228        for annotation in &fiber.msp {
229            match (
230                annotation.reference_start,
231                annotation.reference_end,
232                annotation.reference_length,
233            ) {
234                (Some(rs), Some(re), Some(_)) => {
235                    start = std::cmp::min(start, rs);
236                    end = std::cmp::max(end, re);
237                }
238                _ => continue,
239            }
240        }
241        for annotation in &fiber.nuc {
242            match (
243                annotation.reference_start,
244                annotation.reference_end,
245                annotation.reference_length,
246            ) {
247                (Some(rs), Some(re), Some(_)) => {
248                    start = std::cmp::min(start, rs);
249                    end = std::cmp::max(end, re);
250                }
251                _ => continue,
252            }
253        }
254        if start == i64::MAX {
255            start = fiber.record.reference_start();
256        }
257        if end == i64::MIN {
258            end = fiber.record.reference_end();
259        }
260        (start + self.cur_offset, end + self.cur_offset)
261    }
262
263    /// inline this function
264    #[inline]
265    fn update_with_fiber(&mut self, fiber: &FiberseqData) {
266        // skip this fiber if it has no MSP/NUC information
267        // and we are looking at fiber_coverage
268        if self.pileup_opts.fiber_coverage
269            && fiber.msp.reference_starts().is_empty()
270            && fiber.nuc.reference_starts().is_empty()
271        {
272            return;
273        }
274
275        // find the offset if we are shuffling data
276        self.cur_offset = match self.shuffled_fibers {
277            Some(shuffled_fibers) => match shuffled_fibers.get_shuffle_offset(fiber) {
278                Some(offset) => offset,
279                None => return, // skip missing fiber if it is not in the shuffle
280            },
281            None => 0,
282        };
283
284        if self.cur_offset != 0 && self.chrom_start != 0 {
285            panic!("Cannot apply shuffling unless the entire chromosome is being read at once.");
286        }
287
288        let (start, end) = self.fiber_start_and_end(fiber);
289        // calculate the coverage
290        for i in start..end {
291            let pos = i - self.chrom_start as i64;
292            if pos < 0 || pos >= self.track_len as i64 {
293                continue;
294            }
295            self.coverage[pos as usize] += 1;
296        }
297
298        // calculate the fire coverage and fire score
299        for annotation in &fiber.msp {
300            match (
301                annotation.reference_start,
302                annotation.reference_end,
303                annotation.reference_length,
304            ) {
305                (Some(rs), Some(re), Some(_)) => {
306                    if annotation.qual < MIN_FIRE_QUAL {
307                        continue;
308                    }
309                    let score_update = (1.0 - annotation.qual as f32 / 255.0).log10() * -50.0;
310                    for i in rs..re {
311                        let pos = i + self.cur_offset - self.chrom_start as i64;
312                        if pos < 0 || pos >= self.track_len as i64 {
313                            continue;
314                        }
315                        self.fire_coverage[pos as usize] += 1;
316                        self.raw_scores[pos as usize] += score_update;
317                    }
318                }
319                _ => continue,
320            }
321        }
322
323        // add other sets of data to the FireTrack depending on CLI opts
324        let mut pairs = vec![];
325        if !self.pileup_opts.no_nuc {
326            pairs.push((&mut self.nuc_coverage, &fiber.nuc));
327        }
328        if !self.pileup_opts.no_msp {
329            pairs.push((&mut self.msp_coverage, &fiber.msp));
330        }
331        if self.pileup_opts.m6a {
332            pairs.push((&mut self.m6a_coverage, &fiber.m6a));
333        }
334        if self.pileup_opts.cpg {
335            pairs.push((&mut self.cpg_coverage, &fiber.cpg));
336        }
337
338        for (array, ranges) in pairs {
339            Self::add_range_set(array, ranges, self.cur_offset, self.chrom_start);
340        }
341    }
342
343    pub fn calculate_scores(&mut self) {
344        for i in 0..self.track_len {
345            if self.fire_coverage[i] <= 0 {
346                self.scores[i] = -1.0;
347            } else if self.fire_coverage[i] < MIN_FIRE_COVERAGE
348                && self.pileup_opts.shuffle.is_none()
349            {
350                // there is no minimum fire coverage if we are shuffling
351                self.scores[i] = -1.0;
352            } else {
353                self.scores[i] = self.raw_scores[i] / self.coverage[i] as f32;
354            }
355        }
356    }
357
358    pub fn calculate_rolling_max_score(&mut self) -> Vec<f32> {
359        let mut rolling_max = vec![-1.0; self.track_len];
360        let window_size = self.pileup_opts.rolling_max.unwrap();
361        let look_back = window_size / 2;
362        for (i, cur_roll_max) in rolling_max.iter_mut().enumerate().take(self.track_len) {
363            let start = i.saturating_sub(look_back);
364            let mut end = i + look_back;
365            if end > self.track_len {
366                end = self.track_len;
367            }
368            let relevant_scores = &self.scores[start..end];
369            *cur_roll_max = *relevant_scores
370                .iter()
371                .max_by_key(|x| NotNan::new(**x).unwrap())
372                .unwrap_or(&-1.0);
373        }
374        rolling_max
375    }
376
377    pub fn row(&self, i: usize) -> FireRow<'_> {
378        FireRow {
379            score: &self.scores[i],
380            coverage: &self.coverage[i],
381            fire_coverage: &self.fire_coverage[i],
382            msp_coverage: &self.msp_coverage[i],
383            nuc_coverage: &self.nuc_coverage[i],
384            cpg_coverage: &self.cpg_coverage[i],
385            m6a_coverage: &self.m6a_coverage[i],
386            pileup_opts: self.pileup_opts,
387        }
388    }
389}
390
391pub struct FiberseqPileup<'a> {
392    pub all_data: FireTrack<'a>,
393    pub hap1_data: Option<FireTrack<'a>>,
394    pub hap2_data: Option<FireTrack<'a>>,
395    pub shuffled_data: Option<FireTrack<'a>>,
396    pub chrom: String,
397    pub chrom_start: usize,
398    pub chrom_end: usize,
399    pub track_len: usize,
400    has_data: bool,
401    pileup_opts: &'a PileupOptions,
402    shuffled_fibers: &'a Option<ShuffledFibers>,
403    rolling_max: Option<Vec<f32>>,
404}
405
406impl<'a> FiberseqPileup<'a> {
407    pub fn new(
408        chrom: &str,
409        chrom_start: usize,
410        chrom_end: usize,
411        pileup_opts: &'a PileupOptions,
412        shuffled_fibers: &'a Option<ShuffledFibers>,
413    ) -> Self {
414        let track_len = chrom_end - chrom_start + 1;
415        let all_data = FireTrack::new(chrom_start, chrom_end, pileup_opts, &None);
416        let (hap1_data, hap2_data) = if pileup_opts.haps {
417            (
418                Some(FireTrack::new(chrom_start, chrom_end, pileup_opts, &None)),
419                Some(FireTrack::new(chrom_start, chrom_end, pileup_opts, &None)),
420            )
421        } else {
422            (None, None)
423        };
424
425        let shuffled_data = if shuffled_fibers.is_some() {
426            Some(FireTrack::new(
427                chrom_start,
428                chrom_end,
429                pileup_opts,
430                shuffled_fibers,
431            ))
432        } else {
433            None
434        };
435
436        Self {
437            all_data,
438            hap1_data,
439            hap2_data,
440            shuffled_data,
441            chrom: chrom.to_string(),
442            chrom_start,
443            chrom_end,
444            track_len,
445            has_data: false,
446            pileup_opts,
447            shuffled_fibers,
448            rolling_max: None,
449        }
450    }
451
452    pub fn has_data(&self) -> bool {
453        self.has_data
454    }
455
456    pub fn add_records(
457        &mut self,
458        records: bam::Records<'a, IndexedReader>,
459    ) -> Result<(), anyhow::Error> {
460        self.pileup_opts
461            .input
462            .filters
463            .filter_on_bit_flags(records)
464            .chunks(1000)
465            .into_iter()
466            .map(|r| r.collect::<Vec<_>>())
467            .for_each(|r| {
468                let fibers: Vec<FiberseqData> = FiberseqData::from_records(
469                    r,
470                    &self.pileup_opts.input.header_view(),
471                    &self.pileup_opts.input.filters,
472                );
473                if !fibers.is_empty() {
474                    self.has_data = true;
475                }
476                for fiber in fibers {
477                    // skip if the fiber was unable to be shuffled
478                    if self.shuffled_fibers.is_some()
479                        && !self.shuffled_fibers.as_ref().unwrap().has_fiber(&fiber)
480                    {
481                        continue;
482                    }
483
484                    self.all_data.update_with_fiber(&fiber);
485                    // add hap1 data
486                    if let Some(hap1_data) = &mut self.hap1_data {
487                        if fiber.get_hp() == "H1" {
488                            hap1_data.update_with_fiber(&fiber);
489                        }
490                    }
491                    // add hap2 data
492                    if let Some(hap2_data) = &mut self.hap2_data {
493                        if fiber.get_hp() == "H2" {
494                            hap2_data.update_with_fiber(&fiber);
495                        }
496                    }
497                    // add shuffled data
498                    if let Some(shuffled_data) = &mut self.shuffled_data {
499                        shuffled_data.update_with_fiber(&fiber);
500                    }
501                }
502            });
503        self.calculate_scores();
504        Ok(())
505    }
506
507    pub fn header(pileup_opts: &PileupOptions) -> String {
508        let mut header = format!("{}\t{}\t{}", "#chrom", "start", "end");
509
510        let mut suffixes = vec![""];
511        if pileup_opts.haps {
512            suffixes.push("_H1");
513            suffixes.push("_H2");
514        }
515        if pileup_opts.shuffle.is_some() {
516            suffixes.push("_shuffled");
517        }
518
519        for suffix in suffixes {
520            header += &format!(
521                "\t{}{suffix}\t{}{suffix}\t{}{suffix}",
522                "coverage", "fire_coverage", "score",
523            );
524            if !pileup_opts.no_nuc {
525                header += &format!("\t{}{suffix}", "nuc_coverage");
526            }
527            if !pileup_opts.no_msp {
528                header += &format!("\t{}{suffix}", "msp_coverage");
529            }
530            if pileup_opts.m6a {
531                header += &format!("\t{}{suffix}", "m6a_coverage");
532            }
533            if pileup_opts.cpg {
534                header += &format!("\t{}{suffix}", "cpg_coverage");
535            }
536        }
537        if pileup_opts.rolling_max.is_some() {
538            header += "\trolling_max";
539        }
540        header += "\n";
541        header
542    }
543
544    fn calculate_scores(&mut self) {
545        self.all_data.calculate_scores();
546        // calculate rolling max
547        if self.pileup_opts.rolling_max.is_some() {
548            self.rolling_max = Some(self.all_data.calculate_rolling_max_score());
549        }
550        // scores for other tracks
551        if let Some(hap1_data) = &mut self.hap1_data {
552            hap1_data.calculate_scores();
553        }
554        if let Some(hap2_data) = &mut self.hap2_data {
555            hap2_data.calculate_scores();
556        }
557        if let Some(shuffled_data) = &mut self.shuffled_data {
558            shuffled_data.calculate_scores();
559        }
560    }
561
562    /// check if the ith row has the same data as the previous row
563    /// if it does, return true, otherwise return false
564    /// this is used to determine if the data should be written to the output
565    pub fn is_same_as_previous(&self, i: usize) -> bool {
566        if i == 0 {
567            true
568        } else {
569            let total_same = self.all_data.row(i) == self.all_data.row(i - 1);
570            let haps_same = if self.pileup_opts.haps {
571                let h1 = self.hap1_data.as_ref().unwrap();
572                let h2 = self.hap2_data.as_ref().unwrap();
573                let hap1_same = h1.row(i) == h1.row(i - 1);
574                let hap2_same = h2.row(i) == h2.row(i - 1);
575                hap1_same && hap2_same
576            } else {
577                true
578            };
579            let shuffled_same = if self.shuffled_data.is_some() {
580                self.shuffled_data.as_ref().unwrap().row(i)
581                    == self.shuffled_data.as_ref().unwrap().row(i - 1)
582            } else {
583                true
584            };
585            total_same && haps_same && shuffled_same
586        }
587    }
588
589    fn wait_to_write(&self, i: usize) -> bool {
590        // write every row
591        if self.pileup_opts.per_base {
592            return false;
593        }
594        self.is_same_as_previous(i) && i != self.track_len - 1
595    }
596
597    pub fn log_stats(&self) {
598        let mut data_tracks = vec![&self.all_data];
599        if self.pileup_opts.haps {
600            data_tracks.push(self.hap1_data.as_ref().unwrap());
601            data_tracks.push(self.hap2_data.as_ref().unwrap());
602        }
603        if self.shuffled_data.is_some() {
604            data_tracks.push(self.shuffled_data.as_ref().unwrap());
605        }
606        for data in data_tracks {
607            let total_coverage: i64 = data.coverage.iter().map(|x| *x as i64).sum();
608            let total_fire_coverage: i64 = data.fire_coverage.iter().map(|x| *x as i64).sum();
609            let total_score: f64 = data.scores.iter().map(|x| *x as f64).sum();
610            log::info!(
611                "Total coverage: {total_coverage}, Total fire coverage: {total_fire_coverage}, Total score: {total_score}"
612            );
613        }
614    }
615
616    pub fn write(&self, out: &mut Box<dyn Write>) -> Result<(), anyhow::Error> {
617        if !self.has_data {
618            return Ok(());
619        }
620        if self.shuffled_data.is_some() {
621            self.log_stats();
622        }
623        let mut write_start_index = 0;
624        let mut write_end_index = 1;
625        for i in 1..self.track_len {
626            // do we have the same data as the previous row?
627            if self.wait_to_write(i) {
628                write_end_index = i + 1;
629            } else {
630                let mut line = format!(
631                    "{}\t{}\t{}",
632                    self.chrom,
633                    write_start_index + self.chrom_start,
634                    write_end_index + self.chrom_start
635                );
636
637                let mut data_tracks = vec![&self.all_data];
638                if self.pileup_opts.haps {
639                    data_tracks.push(self.hap1_data.as_ref().unwrap());
640                    data_tracks.push(self.hap2_data.as_ref().unwrap());
641                }
642                if self.shuffled_data.is_some() {
643                    data_tracks.push(self.shuffled_data.as_ref().unwrap());
644                }
645
646                for data in data_tracks {
647                    line += data.row(write_start_index).to_string().as_str();
648                }
649                if self.pileup_opts.rolling_max.is_some() {
650                    line += &format!(
651                        "\t{}",
652                        self.rolling_max.as_ref().unwrap()[write_start_index]
653                    );
654                }
655                // don't write empty lines unless keep_zeros is set
656                let mut cov = self.all_data.coverage[write_start_index];
657                if let Some(shuffled_data) = &self.shuffled_data {
658                    cov += shuffled_data.coverage[write_start_index];
659                }
660                if self.pileup_opts.keep_zeros || cov > 0 {
661                    line += "\n";
662                    bio_io::write_to_file(&line, out);
663                }
664                // reset the write indexes
665                write_start_index = i;
666                write_end_index = i + 1;
667            }
668        }
669        out.flush()?;
670        Ok(())
671    }
672}
673
674/// split up a FetchDefinition into multiple regions of a certain size
675/// TODO set up run_rgn to take a list of regions and multithread it
676pub fn split_fetch_definition(
677    rgn: &FetchDefinition,
678    chrom_len: usize,
679    window_size: usize,
680) -> Vec<(i64, i64)> {
681    let (start, end) = match rgn {
682        FetchDefinition::RegionString(_chrom, start, end) => (*start, *end),
683        _ => (0, chrom_len as i64),
684    };
685    let mut rgns = vec![];
686    let mut cur_start = start;
687    while cur_start < end {
688        let cur_end = std::cmp::min(cur_start + window_size as i64, end);
689        rgns.push((cur_start, cur_end));
690        cur_start = cur_end;
691    }
692    rgns
693}
694
695fn run_rgn(
696    chrom: &str,
697    rgn: FetchDefinition,
698    bam: &mut IndexedReader,
699    out: &mut Box<dyn Write>,
700    pileup_opts: &PileupOptions,
701    shuffled_fibers: &Option<ShuffledFibers>,
702) -> Result<(), anyhow::Error> {
703    let tid = bam.header().tid(chrom.as_bytes()).unwrap();
704    let chrom_len = bam.header().target_len(tid).unwrap() as i64;
705
706    let window_size = if shuffled_fibers.is_some() {
707        (chrom_len + 1) as usize
708    } else {
709        WINDOW_SIZE
710    };
711    log::info!("Window size on {chrom}: {window_size}");
712
713    let windows = split_fetch_definition(&rgn, chrom_len as usize, window_size);
714    log::debug!("Splitting {} into {} windows", chrom, windows.len());
715    for (chrom_start, mut chrom_end) in windows {
716        if chrom_start >= chrom_len {
717            continue;
718        } else if chrom_end > chrom_len {
719            chrom_end = chrom_len;
720        }
721
722        // check if region has data
723        bam.fetch((chrom, chrom_start, chrom_end))?;
724        let mut tmp_records = bam.records();
725        if tmp_records.next().is_none() {
726            continue;
727        }
728        // fetch the data
729        bam.fetch((chrom, chrom_start, chrom_end))?;
730        let records = bam.records();
731        // make the pileup
732        log::debug!("Initializing pileup for {chrom}:{chrom_start}-{chrom_end}");
733        let mut pileup = FiberseqPileup::new(
734            chrom,
735            chrom_start as usize,
736            chrom_end as usize,
737            pileup_opts,
738            shuffled_fibers,
739        );
740        pileup.add_records(records)?;
741        pileup.write(out)?;
742    }
743
744    Ok(())
745}
746
747/// extract existing fire calls into a bed9+ like file
748pub fn pileup_track(pileup_opts: &mut PileupOptions) -> Result<(), anyhow::Error> {
749    // read in the bam from stdin or from a file
750    let mut bam = pileup_opts.input.indexed_bam_reader();
751    let header = pileup_opts.input.header_view();
752
753    let mut out = bio_io::writer(&pileup_opts.out)?;
754    // add the header
755    out.write_all(FiberseqPileup::header(pileup_opts).as_bytes())?;
756
757    let shuffled_fibers = match &pileup_opts.shuffle {
758        Some(file_path) => Some(ShuffledFibers::new(file_path)?),
759        None => None,
760    };
761
762    match &pileup_opts.rgn {
763        // if a region is specified, only process that region
764        Some(rgn) => {
765            let (rgn, chrom) = region_parser(rgn);
766            run_rgn(
767                &chrom,
768                rgn,
769                &mut bam,
770                &mut out,
771                pileup_opts,
772                &shuffled_fibers,
773            )?;
774        }
775        // if no region is specified, process all regions
776        None => {
777            for chrom in header.target_names() {
778                let rgn = FetchDefinition::String(chrom);
779                run_rgn(
780                    &String::from_utf8_lossy(chrom),
781                    rgn,
782                    &mut bam,
783                    &mut out,
784                    pileup_opts,
785                    &shuffled_fibers,
786                )?;
787            }
788        }
789    }
790    Ok(())
791}