1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
//! *bam* is a crate that allows to read BAM files, written completely in Rust. Currently, it //! allows to read BAM files in *indexed* and *consecutive* modes ([bam::IndexedReader](bam_reader/struct.IndexedReader.html) //! and [bam::Reader](bam_reader/struct.Reader.html)). The future versions will support writing BAM files. //! //! ## Why? //! //! Having a crate written completely in Rust reduces the number of dependencies and compilation time. //! Additionally, it removes the need to install additional C libraries. //! //! Errors produced by this crate are more readable and easier to catch and fix on-the-fly. //! //! ## Usage //! //! Currently, there are two readers: //! * [bam::IndexedReader](bam_reader/struct.IndexedReader.html), which allows to fetch records from //! random genomic regions, //! * [bam::Reader](bam_reader/struct.Reader.html), which allows to read the BAM file consecutively. //! //! ### IndexedReader //! //! The following code would load BAM file `test.bam` and its index `test.bam.bai`, take all records //! from `2:100001-200000` and print them on the stdout. //! //! ```rust //! extern crate bam; //! //! fn main() { //! let mut reader = bam::IndexedReader::from_path("test.bam").unwrap(); //! //! // We need to clone the header to have access to reference names as the //! // reader will be blocked during fetch. //! let header = reader.header().clone(); //! let mut stdout = std::io::BufWriter::new(std::io::stdout()); //! //! for record in reader.fetch(1, 100_000, 200_000) { //! record.unwrap().write_sam(&mut stdout, &header).unwrap(); //! } //! } //! ``` //! //! Additionally, you can use `read_into(&mut record)` to save time on record allocation: //! ```rust //! extern crate bam; //! //! // You need to import BamReader trait //! use bam::BamReader; //! //! fn main() { //! let mut reader = bam::IndexedReader::from_path("test.bam").unwrap(); //! //! let header = reader.header().clone(); //! let mut stdout = std::io::BufWriter::new(std::io::stdout()); //! //! let mut viewer = reader.fetch(1, 100_000, 200_000); //! let mut record = bam::Record::new(); //! loop { //! match viewer.read_into(&mut record) { //! Ok(()) => {}, //! Err(bam::Error::NoMoreRecords) => break, //! Err(e) => panic!("{}", e), //! } //! record.write_sam(&mut stdout, &header).unwrap(); //! } //! } //! ``` //! //! Note that currently printing the read is much slower than loading it. Without printing, it //! takes almost the same time to load records using *bam* crate and `samtools view`. //! //! If only records with specific MAPQ or FLAGs are needed, you can use `fetch_by`. For example, //! ```rust //! reader.fetch_by(1, 100_000, 200_000, //! |record| record.mapq() >= 30 && !record.is_secondary()) //! ``` //! to load only records with MAPQ at least 30 and skip all secondary alignments. In some cases it //! helps to save time by not calculating the right-most aligned read position, as well as //! remove additional allocations. //! //! You can also use [IndexedReaderBuilder](bam_reader/struct.IndexedReaderBuilder.html), //! which gives more control over loading //! [IndexedReader](bam_reader/struct.IndexedReader.html). //! For example you can create a reader using a different BAI path, and a different cache capacity: //! ```rust //! let mut reader = bam::IndexedReader::build() //! .bai_path("other_dir/test.bai") //! .cache_capacity(10000) //! .from_path("test.bam").unwrap(); //! ``` //! //! ### Reader //! //! [Reader](bam_reader/struct.Reader.html) allows to read all records from the BAM file //! consecutively. [Reader](bam_reader/struct.Reader.html) itself is an iterator //! and implements the same trait [BamReader](bam_reader/trait.BamReader.html), which allows //! to load records similarly: //! ```rust //! extern crate bam; //! //! fn main() { //! let reader = bam::Reader::from_path("test.bam").unwrap(); //! //! let header = reader.header().clone(); //! let mut stdout = std::io::BufWriter::new(std::io::stdout()); //! //! for record in reader { //! record.unwrap().write_sam(&mut stdout, &header).unwrap(); //! } //! } //! ``` //! //! Similarly, you can skip allocation with //! ```rust //! extern crate bam; //! //! // You need to import BamReader trait //! use bam::BamReader; //! //! fn main() { //! let mut reader = bam::Reader::from_path("test.bam").unwrap(); //! //! let header = reader.header().clone(); //! let mut stdout = std::io::BufWriter::new(std::io::stdout()); //! //! let mut record = bam::Record::new(); //! loop { //! match reader.read_into(&mut record) { //! Ok(()) => {}, //! Err(bam::Error::NoMoreRecords) => break, //! Err(e) => panic!("{}", e), //! } //! record.write_sam(&mut stdout, &header).unwrap(); //! } //! } //! ``` //! //! However, there is no way to skip records using a predicate like `fetch_by`. //! //! ## CRC32 //! //! Each bgzip block contains a CRC32 checksum. By default, the *bam* crate does not compare //! checksums to save time. //! However, you can compare checksums by adding the following line to your Cargo.toml: //! ``` //! bam = { version = "*", features = ["check_crc"] } //! ``` //! extern crate byteorder; extern crate inflate; extern crate lru_cache; #[cfg(feature = "check_crc")] extern crate crc; /// A module that works with BAI index. pub mod index; /// A module that works with Bgzip files (BGZF) and bgzip blocks. pub mod bgzip; /// A module that supports Cigar and operations on it. pub mod cigar; /// A module with BAM records and operations on them. pub mod record; /// A module with various BAM readers. pub mod bam_reader; pub use bam_reader::Header; pub use bam_reader::IndexedReader; pub use bam_reader::Reader; pub use bam_reader::BamReader; pub use record::Record; pub use record::Error;