1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#![feature(plugin,type_macros,question_mark)]
#![plugin(phf_macros)]
extern crate phf;

#[macro_use]
extern crate nom;
extern crate scoped_threadpool;
extern crate monster;

mod codons;
mod translator;

use std::str;
use std::io;
use std::io::prelude::*;
use nom::{not_line_ending,line_ending};
use nom::IResult;
use std::sync::Arc;
use scoped_threadpool::Pool as ThreadPool;
use std::sync::mpsc::channel;
use std::fmt;

#[derive(Debug)]
struct Fasta<'a> {
    pub id: &'a str,
    pub sequence: Vec<&'a str>
}

pub fn start_parse<Output>(input: &[u8], mut output: Output, n_threads: u32) -> Result where
    Output: Write
{
    let mut threadpool = ThreadPool::new(n_threads);
    let (tx, rx) = channel();
    match fasta_deserialize(input){
        IResult::Done(_,o) => {
            threadpool.scoped(|threadpool| {
                for fasta in o {
                    let tx = tx.clone();
                    let amino_seq: Arc<Vec<u8>> = Arc::new(fasta.sequence
                        .iter()
                        .fold(Vec::new(), |mut acc, item| {
                                acc.extend(item.as_bytes());
                                acc
                        }));
                    let fasta_id = fasta.id;
                    let dispatch_decoding = |window, decoder: fn(&[u8]) -> String| {
                        let amino_seq = amino_seq.clone();
                        let tx = tx.clone();
                        threadpool.execute(move || {
                            tx.send(FastaComplete::new(window, fasta_id, decoder(&amino_seq))).expect("send");
                        });
                    };
                    dispatch_decoding("> No Move|", translator::no_move);
                    dispatch_decoding("> Shift Left One|", translator::nucleotide_shift_left_one);
                    dispatch_decoding("> Shift Left Two|", translator::nucleotide_shift_left_two);
                    dispatch_decoding("> Rev. No Move|", translator::rev_no_move);
                    dispatch_decoding("> Rev. Shift Left One|", translator::rev_nucleotide_shift_left_one);
                    dispatch_decoding("> Rev. Shift Left Two|", translator::rev_nucleotide_shift_left_two);
                }
            });

            drop(tx);

            for result in rx {
                output.write(result.window.as_bytes())?;
                output.write(result.id.as_bytes())?;
                output.write(result.sequence.as_bytes())?;
            }

            Ok(())
        }
        _ => Err(Error::Parsing)
    }
}

#[derive(Debug)]
struct FastaComplete<'a> {
    window: &'a str,
    id: &'a str,
    sequence: String,
}

impl<'a> FastaComplete<'a> {
    fn new(window: &'a str, id: &'a str, sequence: String) -> FastaComplete<'a> {
        FastaComplete {
            window: window,
            id: id,
            sequence: sequence
        }
    }
}

//FastaComplete.window are hardcoded to include labeling the id with '>
//and their reading frame.
//There is probably substantial room for improvement and the code could be deduplicated
//After memmapping the file.  I personally prefer laying it all out even if it does get
//a bit lengthy.
fn fasta_deserialize(input:&[u8]) -> IResult<&[u8], Vec<Fasta>>  {
    many0!(input,
      chain!(
        tag!(">") ~
        id: map_res!(not_line_ending, str::from_utf8) ~ line_ending ~
        sequence: many0!(terminated!(map_res!( is_not!(">\n"), str::from_utf8), tag!("\n"))),
        ||{
            Fasta {
                id: id,
                sequence: sequence
            }
        }
      )
   )
}

pub type Result = std::result::Result<(), Error>;

#[derive(Debug)]
pub enum Error {
    Io(io::Error),
    Parsing,
}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            Error::Io(ref e) => write!(f, "{}", e),
            Error::Parsing => write!(f, "Parsing failed")
        }
    }
}

impl From<io::Error> for Error {
    fn from(e: io::Error) -> Self {
        Error::Io(e)
    }
}