1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
//! *bam* is a crate that allows to read BAM files, written completely in Rust. Currently, it
//! allows to read BAM files in *indexed* and *consecutive* modes ([bam::IndexedReader](bam_reader/struct.IndexedReader.html)
//! and [bam::Reader](bam_reader/struct.Reader.html)). The future versions will support writing BAM files.
//!
//! ## Why?
//!
//! Having a crate written completely in Rust reduces the number of dependencies and compilation time.
//! Additionally, it removes the need to install additional C libraries.
//!
//! Errors produced by this crate are more readable and easier to catch and fix on-the-fly.
//!
//! ## Usage
//!
//! Currently, there are two readers:
//! * [bam::IndexedReader](bam_reader/struct.IndexedReader.html), which allows to fetch records from
//! random genomic regions,
//! * [bam::Reader](bam_reader/struct.Reader.html), which allows to read the BAM file consecutively.
//!
//! ### IndexedReader
//!
//! The following code would load BAM file `test.bam` and its index `test.bam.bai`, take all records
//! from `2:100001-200000` and print them on the stdout.
//!
//! ```rust
//! extern crate bam;
//!
//! fn main() {
//!     let mut reader = bam::IndexedReader::from_path("test.bam").unwrap();
//!
//!     // We need to clone the header to have access to reference names as the
//!     // reader will be blocked during fetch.
//!     let header = reader.header().clone();
//!     let mut stdout = std::io::BufWriter::new(std::io::stdout());
//!
//!     for record in reader.fetch(1, 100_000, 200_000) {
//!         record.unwrap().write_sam(&mut stdout, &header).unwrap();
//!     }
//! }
//! ```
//!
//! Additionally, you can use `read_into(&mut record)` to save time on record allocation:
//! ```rust
//! extern crate bam;
//!
//! // You need to import BamReader trait
//! use bam::BamReader;
//!
//! fn main() {
//!     let mut reader = bam::IndexedReader::from_path("test.bam").unwrap();
//!
//!     let header = reader.header().clone();
//!     let mut stdout = std::io::BufWriter::new(std::io::stdout());
//!
//!     let mut viewer = reader.fetch(1, 100_000, 200_000);
//!     let mut record = bam::Record::new();
//!     loop {
//!         match viewer.read_into(&mut record) {
//!             Ok(()) => {},
//!             Err(bam::Error::NoMoreRecords) => break,
//!             Err(e) => panic!("{}", e),
//!         }
//!         record.write_sam(&mut stdout, &header).unwrap();
//!     }
//! }
//! ```
//!
//! Note that currently printing the read is much slower than loading it. Without printing, it
//! takes almost the same time to load records using *bam* crate and `samtools view`.
//!
//! If only records with specific MAPQ or FLAGs are needed, you can use `fetch_by`. For example,
//! ```rust
//! reader.fetch_by(1, 100_000, 200_000,
//!     |record| record.mapq() >= 30 && !record.is_secondary())
//! ```
//! to load only records with MAPQ at least 30 and skip all secondary alignments. In some cases it
//! helps to save time by not calculating the right-most aligned read position, as well as
//! remove additional allocations.
//!
//! You can also use [IndexedReaderBuilder](bam_reader/struct.IndexedReaderBuilder.html),
//! which gives more control over loading
//! [IndexedReader](bam_reader/struct.IndexedReader.html).
//! For example you can create a reader using a different BAI path, and a different cache capacity:
//! ```rust
//! let mut reader = bam::IndexedReader::build()
//!     .bai_path("other_dir/test.bai")
//!     .cache_capacity(10000)
//!     .from_path("test.bam").unwrap();
//! ```
//!
//! ### Reader
//!
//! [Reader](bam_reader/struct.Reader.html) allows to read all records from the BAM file
//! consecutively. [Reader](bam_reader/struct.Reader.html) itself is an iterator
//! and implements the same trait [BamReader](bam_reader/trait.BamReader.html), which allows
//! to load records similarly:
//! ```rust
//! extern crate bam;
//!
//! fn main() {
//!     let reader = bam::Reader::from_path("test.bam").unwrap();
//!
//!     let header = reader.header().clone();
//!     let mut stdout = std::io::BufWriter::new(std::io::stdout());
//!
//!     for record in reader {
//!         record.unwrap().write_sam(&mut stdout, &header).unwrap();
//!     }
//! }
//! ```
//!
//! Similarly, you can skip allocation with
//! ```rust
//! extern crate bam;
//!
//! // You need to import BamReader trait
//! use bam::BamReader;
//!
//! fn main() {
//!     let mut reader = bam::Reader::from_path("test.bam").unwrap();
//!
//!     let header = reader.header().clone();
//!     let mut stdout = std::io::BufWriter::new(std::io::stdout());
//!
//!     let mut record = bam::Record::new();
//!     loop {
//!         match reader.read_into(&mut record) {
//!             Ok(()) => {},
//!             Err(bam::Error::NoMoreRecords) => break,
//!             Err(e) => panic!("{}", e),
//!         }
//!         record.write_sam(&mut stdout, &header).unwrap();
//!     }
//! }
//! ```
//!
//! However, there is no way to skip records using a predicate like `fetch_by`.
//!
//! ## CRC32
//!
//! Each bgzip block contains a CRC32 checksum. By default, the *bam* crate does not compare
//! checksums to save time.
//! However, you can compare checksums by adding the following line to your Cargo.toml:
//! ```
//! bam = { version = "*", features = ["check_crc"] }
//! ```
//!

extern crate byteorder;
extern crate inflate;
extern crate lru_cache;
#[cfg(feature = "check_crc")]
extern crate crc;

/// A module that works with BAI index.
pub mod index;
/// A module that works with Bgzip files (BGZF) and bgzip blocks.
pub mod bgzip;
/// A module that supports Cigar and operations on it.
pub mod cigar;
/// A module with BAM records and operations on them.
pub mod record;
/// A module with various BAM readers.
pub mod bam_reader;

pub use bam_reader::Header;
pub use bam_reader::IndexedReader;
pub use bam_reader::Reader;
pub use bam_reader::BamReader;

pub use record::Record;
pub use record::Error;