perbase_lib/mod.rs
1//! A library of funcionality for perbase analysis of genomic regions.
2//!
3//! This library is a work in progress and subject to changes as needed by
4//! [`perbase`](https://github.com/sstadick/perbase).
5//!
6//! Currently, the key module of this crate is the `par_granges` module, which allows for
7//! parallel iteration and operation over genomic intervals.
8//!
9//! The `position` module provides data-structures and methods for accumulating position
10//! related information.
11//!
12//! # Example
13//! ```no_run
14//! use anyhow::Result;
15//! use perbase_lib::{
16//! par_granges::{self, RegionProcessor},
17//! position::pileup_position::PileupPosition,
18//! read_filter::ReadFilter,
19//! };
20//! use rust_htslib::bam::{self, record::Record, Read, pileup::Alignment};
21//! use std::path::PathBuf;
22//!
23//! // To use ParGranges you will need to implement a [`RegionProcessor`](par_granges::RegionProcessor),
24//! // which requires a single method [`RegionProcessor::process_region`](par_granges::RegionProcessor::process_region)
25//! // and an associated type P, which is the type of the values returned in the Vec by
26//! // `process_region`. The returned `P` objects will be kept in order and accessible on the
27//! // receiver channel returned by the `[ParGranges::process`](par_granges::ParGranges::process) method.
28//! struct BasicProcessor<F: ReadFilter> {
29//! // An indexed bamfile to query for the region we were passed
30//! bamfile: PathBuf,
31//! // This is an object that implements `position::ReadFilter` and will be applied to
32//! // each read
33//! read_filter: F,
34//! }
35//!
36//! // A struct that holds the filter values that will be used to implement `ReadFilter`
37//! struct BasicReadFilter {
38//! include_flags: u16,
39//! exclude_flags: u16,
40//! min_mapq: u8,
41//! }
42//!
43//! // The actual implementation of `ReadFilter`
44//! impl ReadFilter for BasicReadFilter {
45//! // Filter reads based SAM flags and mapping quality, true means pass
46//! #[inline]
47//! fn filter_read(&self, read: &Record, _: Option<&Alignment>) -> bool {
48//! let flags = read.flags();
49//! (!flags) & &self.include_flags == 0
50//! && flags & &self.exclude_flags == 0
51//! && &read.mapq() >= &self.min_mapq
52//! }
53//! }
54//!
55//! // Implementation of the `RegionProcessor` trait to process each region
56//! impl<F: ReadFilter> RegionProcessor for BasicProcessor<F> {
57//! type P = PileupPosition;
58//!
59//! // This function receives an interval to examine.
60//! fn process_region(&self, tid: u32, start: u32, stop: u32) -> Vec<Self::P> {
61//! let mut reader = bam::IndexedReader::from_path(&self.bamfile).expect("Indexed reader");
62//! let header = reader.header().to_owned();
63//! // fetch the region
64//! reader.fetch((tid, start, stop)).expect("Fetched ROI");
65//! // Walk over pileups
66//! let result: Vec<PileupPosition> = reader
67//! .pileup()
68//! .flat_map(|p| {
69//! let pileup = p.expect("Extracted a pileup");
70//! // Verify that we are within the bounds of the chunk we are iterating on
71//! // Since pileup will pull reads that overhang edges.
72//! if pileup.pos() >= start && pileup.pos() < stop {
73//! Some(PileupPosition::from_pileup(pileup, &header, &self.read_filter, None))
74//! } else {
75//! None
76//! }
77//! })
78//! .collect();
79//! result
80//! }
81//! }
82//!
83//! fn main() -> Result<()> {
84//! // Create the read filter
85//! let read_filter = BasicReadFilter {
86//! include_flags: 0,
87//! exclude_flags: 3848,
88//! min_mapq: 20,
89//! };
90//!
91//! // Create the region processor
92//! let basic_processor = BasicProcessor {
93//! bamfile: PathBuf::from("test/test.bam"),
94//! read_filter: read_filter,
95//! };
96//!
97//! // Create a par_granges runner
98//! let par_granges_runner = par_granges::ParGranges::new(
99//! PathBuf::from("test/test.bam"), // pass in bam
100//! None, // optional ref fasta
101//! None, // optional bcf/vcf file specifying positions of interest
102//! Some(PathBuf::from("test/test.bed")), // bedfile to narrow regions
103//! true, // merge any overlapping intervals in the BED file
104//! None, // optional allowed number of threads, defaults to max
105//! None, // optional chunksize modification
106//! None, // optional channel size modification
107//! basic_processor,
108//! );
109//!
110//! // Run the processor
111//! let receiver = par_granges_runner.process()?;
112//! // Pull the in-order results from the receiver channel
113//! receiver.into_iter().for_each(|p: PileupPosition| {
114//! // Note that the returned values are required to be `serde::Serialize`, so more fancy things
115//! // than just debug printing are doable.
116//! println!("{:?}", p);
117//! });
118//!
119//! Ok(())
120//! }
121//!```
122#![warn(missing_docs)]
123pub mod par_granges;
124pub mod position;
125pub mod read_filter;
126pub mod reference;
127pub mod utils;