Module bio::io::fasta

source ·
Expand description

Structs and trait to read and write files in FASTA format.

§Example

§Read

In this example, we parse a fasta file from stdin and compute some statistics

use bio::io::fasta;
use std::io;

let mut reader = fasta::Reader::new(io::stdin());

let mut nb_reads = 0;
let mut nb_bases = 0;

for result in reader.records() {
    let record = result.expect("Error during fasta record parsing");
    println!("{}", record.id());

    nb_reads += 1;
    nb_bases += record.seq().len();
}

println!("Number of reads: {}", nb_reads);
println!("Number of bases: {}", nb_bases);

We can also use a while loop to iterate over records. This is slightly faster than the for loop.

use bio::io::fasta;
use std::io;
let mut records = fasta::Reader::new(io::stdin()).records();

let mut nb_reads = 0;
let mut nb_bases = 0;

while let Some(Ok(record)) = records.next() {
    nb_reads += 1;
    nb_bases += record.seq().len();
}

println!("Number of reads: {}", nb_reads);
println!("Number of bases: {}", nb_bases);

§Write

In this example we generate 10 random sequences with length 100 and write them to stdout.

use std::io;
use bio::io::fasta;

let mut seed = 42;

let nucleotides = [b'A', b'C', b'G', b'T'];

let mut writer = fasta::Writer::new(io::stdout());

for _ in 0..10 {
    let seq = (0..100).map(|_| {
        seed = ((seed ^ seed << 13) ^ seed >> 7) ^ seed << 17; // don't use this random generator
        nucleotides[seed % 4]
    }).collect::<Vec<u8>>();

   writer.write("random", None, seq.as_slice()).expect("Error writing record.");
}

§Read and Write

In this example we filter reads from stdin on sequence length and write them to stdout

use bio::io::fasta;
use bio::io::fasta::FastaRead;
use std::io;

let mut reader = fasta::Reader::new(io::stdin());
let mut writer = fasta::Writer::new(io::stdout());
let mut record = fasta::Record::new();

while let Ok(()) = reader.read(&mut record) {
    if record.is_empty() {
        break;
    }

    if record.seq().len() > 100 {
        writer
            .write_record(&record)
            .ok()
            .expect("Error writing record.");
    }
}

§Index

Random access to FASTA files is facilitated by Index and IndexedReader. The FASTA files must already be indexed with samtools faidx.

In this example, we read in the first 10 bases of the sequence named “chr1”.

use bio::io::fasta::IndexedReader;
// create dummy files
const FASTA_FILE: &[u8] = b">chr1\nGTAGGCTGAAAA\nCCCC";
const FAI_FILE: &[u8] = b"chr1\t16\t6\t12\t13";

let seq_name = "chr1";
let start: u64 = 0; // start is 0-based, inclusive
let stop: u64 = 10; // stop is 0-based, exclusive
                    // load the index
let mut faidx = IndexedReader::new(std::io::Cursor::new(FASTA_FILE), FAI_FILE).unwrap();
// move the pointer in the index to the desired sequence and interval
faidx
    .fetch(seq_name, start, stop)
    .expect("Couldn't fetch interval");
// read the subsequence defined by the interval into a vector
let mut seq = Vec::new();
faidx.read(&mut seq).expect("Couldn't read the interval");
assert_eq!(seq, b"GTAGGCTGAA");

Structs§

Traits§