[][src]Crate perbase_lib

A library of funcionality for perbase analysis of genomic regions.

This library is a work in progress and subject to changes as needed by perbase.

Currently, the key module of this crate is the par_granges module, which allows for parallel iteration and operation over genomic intervals.

The position module provides data-structures and methods for accumulating position related information.

Example

use anyhow::Result;
use perbase_lib::{
   par_granges::{self, RegionProcessor},
   position::{Position, ReadFilter},
};
use rust_htslib::bam::{self, record::Record, Read};
use std::path::PathBuf;

// To use ParGranges you will need to implement a [par_granges::RegionProcessor],
// which requires a single method [par_granges::RegionProcessor::process_region]
// and an associated type P, which is type of the values returned in the Vec by
// `process_region`. The returned `P` objects will be kept in order and serialized
// to the specified output.
struct BasicProcessor<F: ReadFilter> {
   // An indexed bamfile to query for the region we were passed
   bamfile: PathBuf,
   // This is an object that implements [position::ReadFilter] and will be applied to
   // each read
   read_filter: F,
}

// A struct that will hold or filter info and impl ReadFilter
struct BasicReadFilter {
   include_flags: u16,
   exclude_flags: u16,
   min_mapq: u8,
}

// The actual implementation of a read filter
impl ReadFilter for BasicReadFilter {
   // Filter reads based SAM flags and mapping quality, true means pass
   #[inline]
   fn filter_read(&self, read: &Record) -> bool {
       let flags = read.flags();
       (!flags) & &self.include_flags == 0
           && flags & &self.exclude_flags == 0
           && &read.mapq() >= &self.min_mapq
   }
}

impl<F: ReadFilter> RegionProcessor for BasicProcessor<F> {
   type P = Position;

   // This function receives an interval to examine.
   fn process_region(&self, tid: u32, start: u64, stop: u64) -> Vec<Self::P> {
       let mut reader = bam::IndexedReader::from_path(&self.bamfile).expect("Indexed reader");
       let header = reader.header().to_owned();
       // fetch the region
       reader.fetch(tid, start, stop).expect("Fetched ROI");
       // Walk over pileups
       let result: Vec<Position> = reader
           .pileup()
           .flat_map(|p| {
               let pileup = p.expect("Extracted a pileup");
               // Verify that we are within the bounds of the chunk we are iterating on
               // Since pileup will pull reads that overhang edges.
               if (pileup.pos() as u64) >= start && (pileup.pos() as u64) < stop {
                   Some(Position::from_pileup(pileup, &header, &self.read_filter))
               } else {
                   None
               }
           })
           .collect();
       result
   }
}

fn main() -> Result<()> {
   // Create the read filter
   let read_filter = BasicReadFilter {
       include_flags: 0,
       exclude_flags: 3848,
       min_mapq: 20,
   };

   // Create the region processor
   let basic_processor = BasicProcessor {
       bamfile: PathBuf::from("test/test.bam"),
       read_filter: read_filter,
   };

   // Create a par_granges runner
   let par_granges_runner = par_granges::ParGranges::new(
       PathBuf::from("test/test.bam"),       // pass in bam
       None,                                 // optional ref fasta
       Some(PathBuf::from("test/test.bed")), // bedfile to narrow regions
       None,                                 // optional output file, will use stdout outherwise
       None,                                 // optional allowed number of threads, defaults to max
       None,                                 // optional chunksize modification
       basic_processor,
   );

   // Run the processor
   par_granges_runner.process()?;

   Ok(())
}

Modules

par_granges

ParGranges

position

Position

utils

General utility methods.