crast 1.0.4

CRAST, Context RNA Alignment Search Tool
Documentation
extern crate getopts;
extern crate time;
extern crate scoped_threadpool;
extern crate bio;
extern crate std;
extern crate cpr;
extern crate itertools;
extern crate fnv;

pub use self::getopts::Options;
pub use self::time::now;
pub use self::scoped_threadpool::Pool;
pub use self::std::env;
pub use self::std::path::Path;
pub use self::bio::io::fasta::Reader;
pub use self::std::io::prelude::*;
pub use self::std::io::{BufReader, BufWriter};
pub use self::std::fs::File;
pub use self::std::fs::{remove_dir_all, create_dir};
pub use self::cpr::{cpr, CntxtDstSq};
pub use self::itertools::multizip;
pub use self::std::collections::HashMap;
use self::std::hash::BuildHasherDefault;
use self::fnv::FnvHasher;

pub type Arg = String;
pub type Id = String;
pub type BElm = u8;
pub type BSq = Vec<BElm>;
pub type ThrdNm = u32;
type AlgnPs = usize;
pub type AlgnPsPr = (AlgnPs, AlgnPs); // Start/end pos. pair
pub type BSqLn = usize;
pub type Strnd = bool;
pub type Prb = f32;
pub type PrbDst = [Prb; PRB_DST_DM];
pub type PrbDstSq = Vec<PrbDst>;
pub type AlgnScr = Prb;
pub type SfxAr = Vec<usize>;
pub type SfxArIndxPr = (usize, usize);
pub type Hshr = BuildHasherDefault<FnvHasher>;
pub type PtrnHshMp = HashMap<BSq, SfxArIndxPr, Hshr>;
pub type IdStr<'a> = &'a str;

pub const MX_SCR: AlgnScr = 1.;
pub const PRB_DST_DM: usize = 6;
pub const DFLT_MX_SPN: usize = 200;
pub const CNFG_FL: &'static str = "cnfg.dat";
pub const DB_SZ_FL: &'static str = "db_sz.dat";
pub const ID_FL: &'static str = "sq_ids.dat.bz2";
pub const SFX_AR_FL: &'static str = "sfx_ars.dat.bz2";
pub const RVRS_SFX_AR_FL: &'static str = "rvrs_sfx_ars.dat.bz2";
pub const PTRN_HSH_MP_FL: &'static str = "ptrn_hsh_mps.dat.bz2";
pub const RVRS_PTRN_HSH_MP_FL: &'static str = "rvrs_ptrn_hsh_mps.dat.bz2";
pub const PRB_DST_SQ_FL: &'static str = "prb_dst_sqs.dat.bz2";
pub const RVRS_PRB_DST_SQ_FL: &'static str = "rvrs_prb_dst_sqs.dat.bz2";
pub const SQ_FL: &'static str = "sqs.dat.bz2";
pub const MX_K_MR_SZ: usize = 5;
pub const A: BElm = 'A' as BElm;
pub const U: BElm = 'U' as BElm;
pub const T: BElm = 'T' as BElm;
pub const G: BElm = 'G' as BElm;
pub const C: BElm = 'C' as BElm;
pub const LWR_A: BElm = 'a' as BElm;
pub const LWR_U: BElm = 'u' as BElm;
pub const LWR_T: BElm = 't' as BElm;
pub const LWR_G: BElm = 'g' as BElm;
pub const LWR_C: BElm = 'c' as BElm;
pub const N: BElm = 'N' as BElm;

#[inline]
pub fn gt_cmplmnt_bs(bs: BElm) -> BElm {
  match bs {
    A => T,
    T => A,
    U => A,
    C => G,
    G => C,
    LWR_A => LWR_T,
    LWR_T => LWR_A,
    LWR_U => LWR_A,
    LWR_C => LWR_G,
    LWR_G => LWR_C,
    _ => N,
  }
}

#[inline]
pub fn prnt_usg(prgrm: &str, opts: &Options) {
  let brf = format!("Usage: {} [options]", prgrm);
  print!("{}", opts.usage(&brf));
}

#[inline]
pub fn gt_tm_stmp() -> String {
  let nw = now();
  (nw.tm_year + 1900).to_string() + "-" + &(nw.tm_mon + 1).to_string() + "-" + &nw.tm_mday.to_string() + "-" + &nw.tm_hour.to_string() + ":" + &nw.tm_min.to_string() + ":" + &nw.tm_sec.to_string()
}

#[inline]
pub fn wrt_cntxt_dst_sq(id: IdStr, cntxt_dst_sq: &CntxtDstSq, tmp_dr: &Path) {
  let mut tmp_otpt_fl = BufWriter::new(File::create(tmp_dr.join(&(String::from(id) + ".dat"))).expect("Failed to create temp. output file"));
  let _ = tmp_otpt_fl.write_all((String::from(">") + id + "\n").as_bytes());
  for cntxt_dst in cntxt_dst_sq {
    let mut bfr = cntxt_dst.into_iter().fold(String::new(), |bfr, prb| bfr + &format!("{:e} ", prb));
    bfr.pop();
    bfr += "\n";
    let _ = tmp_otpt_fl.write_all(bfr.as_bytes());
  }
  let _ = tmp_otpt_fl.write_all(b"\n");
}

#[inline]
fn cmprs(ptrn_chr: BElm, sq: &[BElm], sfx_indx: usize, ofst: usize, sq_ln: usize, is_gt: bool) -> bool {
  let ps = sfx_indx + ofst;
  let sq_chr = sq[ps];
  if ps >= sq_ln {
    false
  } else if ptrn_chr != sq_chr {
    if is_gt {ptrn_chr > sq_chr} else {ptrn_chr < sq_chr}
  } else {
    false
  }
}

#[inline]
pub fn gt_sfx_ar_indx_pr(ptrn: &[BElm], sq: &[BElm], sfx_ar: &[usize], srch_rng: &SfxArIndxPr, ofst: usize, sq_ln: usize) -> SfxArIndxPr {
  let ptrn_chr = ptrn[ofst];
  let (mut lft, mut rght) = *srch_rng;
  while lft < rght {
    let md = (lft + rght) / 2;
    let sfx_indx = sfx_ar[md];
    let cmprs = cmprs(ptrn_chr, sq, sfx_indx, ofst, sq_ln, true);
    if cmprs {
      lft = md + 1;
    } else {
      rght = md;
    }
  }
  let (tmp, mut rght) = (lft, srch_rng.1);
  while lft < rght {
    let md = (lft + rght) / 2;
    let sfx_indx = sfx_ar[md];
    let cmprs = cmprs(ptrn_chr, sq, sfx_indx, ofst, sq_ln, false);
    if cmprs {
      rght = md;
    } else {
      lft = md + 1;
    }
  }
  (tmp, rght)
}