Expand description
Structs and trait to read and write files in FASTA format.
§Example
§Read
In this example, we parse a fasta file from stdin and compute some statistics
use bio::io::fasta;
use std::io;
let mut reader = fasta::Reader::new(io::stdin());
let mut nb_reads = 0;
let mut nb_bases = 0;
for result in reader.records() {
let record = result.expect("Error during fasta record parsing");
println!("{}", record.id());
nb_reads += 1;
nb_bases += record.seq().len();
}
println!("Number of reads: {}", nb_reads);
println!("Number of bases: {}", nb_bases);
We can also use a while
loop to iterate over records.
This is slightly faster than the for
loop.
use bio::io::fasta;
use std::io;
let mut records = fasta::Reader::new(io::stdin()).records();
let mut nb_reads = 0;
let mut nb_bases = 0;
while let Some(Ok(record)) = records.next() {
nb_reads += 1;
nb_bases += record.seq().len();
}
println!("Number of reads: {}", nb_reads);
println!("Number of bases: {}", nb_bases);
§Write
In this example we generate 10 random sequences with length 100 and write them to stdout.
use std::io;
use bio::io::fasta;
let mut seed = 42;
let nucleotides = [b'A', b'C', b'G', b'T'];
let mut writer = fasta::Writer::new(io::stdout());
for _ in 0..10 {
let seq = (0..100).map(|_| {
seed = ((seed ^ seed << 13) ^ seed >> 7) ^ seed << 17; // don't use this random generator
nucleotides[seed % 4]
}).collect::<Vec<u8>>();
writer.write("random", None, seq.as_slice()).expect("Error writing record.");
}
§Read and Write
In this example we filter reads from stdin on sequence length and write them to stdout
use bio::io::fasta;
use bio::io::fasta::FastaRead;
use std::io;
let mut reader = fasta::Reader::new(io::stdin());
let mut writer = fasta::Writer::new(io::stdout());
let mut record = fasta::Record::new();
while let Ok(()) = reader.read(&mut record) {
if record.is_empty() {
break;
}
if record.seq().len() > 100 {
writer
.write_record(&record)
.ok()
.expect("Error writing record.");
}
}
§Index
Random access to FASTA files is facilitated by Index
and IndexedReader
. The FASTA files
must already be indexed with samtools faidx
.
In this example, we read in the first 10 bases of the sequence named “chr1”.
use bio::io::fasta::IndexedReader;
// create dummy files
const FASTA_FILE: &[u8] = b">chr1\nGTAGGCTGAAAA\nCCCC";
const FAI_FILE: &[u8] = b"chr1\t16\t6\t12\t13";
let seq_name = "chr1";
let start: u64 = 0; // start is 0-based, inclusive
let stop: u64 = 10; // stop is 0-based, exclusive
// load the index
let mut faidx = IndexedReader::new(std::io::Cursor::new(FASTA_FILE), FAI_FILE).unwrap();
// move the pointer in the index to the desired sequence and interval
faidx
.fetch(seq_name, start, stop)
.expect("Couldn't fetch interval");
// read the subsequence defined by the interval into a vector
let mut seq = Vec::new();
faidx.read(&mut seq).expect("Couldn't read the interval");
assert_eq!(seq, b"GTAGGCTGAA");
Structs§
- A FASTA index as created by SAMtools (.fai).
- A FASTA reader with an index as created by SAMtools (.fai).
- A FASTA reader.
- A FASTA record.
- An iterator over the records of a Fasta file.
- A sequence record returned by the FASTA index.
- A Fasta writer.
Traits§
- Trait for FASTA readers.