#![allow(non_snake_case)]
#[macro_use]
extern crate error_chain;
extern crate rand;
extern crate rayon;
#[cfg(test)]
#[macro_use]
extern crate assert_approx_eq;
pub mod io;
pub mod histogram;
pub mod error_analysis;
pub mod correlation_analysis;
pub mod statistics;
use histogram::Dataset;
use std::f64;
use std::fmt;
use std::io::prelude::*;
use rayon::prelude::*;
pub mod errors { error_chain!{} }
use errors::*;
#[allow(non_upper_case_globals)]
static k_B: f64 = 0.008_314_462_1; #[derive(Debug)]
pub struct Config {
pub metadata_file: String,
pub hist_min: Vec<f64>,
pub hist_max: Vec<f64>,
pub num_bins: Vec<usize>,
pub dimens: usize,
pub verbose: bool,
pub tolerance: f64,
pub max_iterations: usize,
pub temperature: f64,
pub cyclic: bool,
pub output: String,
pub bootstrap: usize,
pub bootstrap_seed: u64,
pub start: f64,
pub end: f64,
pub uncorr: bool,
pub convdt: f64,
pub ignore_empty: bool
}
impl fmt::Display for Config {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Metadata={}, hist_min={:?}, hist_max={:?}, bins={:?},
verbose={}, tolerance={}, iterations={}, temperature={},
cyclic={:?}, uncorr={:?}, bootstrap={:?}, seed={:?},
uncorr={:?}, start={:?}, end={:?}, convdt={:?}, ignore_empty={:?}",
self.metadata_file, self.hist_min, self.hist_max, self.num_bins,
self.verbose, self.tolerance, self.max_iterations, self.temperature,
self.cyclic, self.uncorr, self.bootstrap, self.bootstrap_seed,
self.uncorr, self.start, self.end, self.convdt, self.ignore_empty)
}
}
fn is_converged(old_F: &[f64], new_F: &[f64], tolerance: f64) -> bool {
!new_F.iter()
.zip(old_F.iter())
.map(|x| { (x.0-x.1).abs() })
.any(|diff| { diff > tolerance })
}
fn calc_bin_probability(bin: usize, dataset: &Dataset, F: &[f64]) -> f64 {
let mut denom_sum: f64 = 0.0;
let bin_count: f64 = dataset.get_weighted_bin_count(bin);
for (window, h) in dataset.histograms.iter().enumerate() {
let bias = dataset.get_bias(bin, window);
denom_sum += (dataset.weights[window] * h.num_points as f64)
* bias * F[window];
}
bin_count / denom_sum
}
fn calc_window_F(window: usize, dataset: &Dataset, P: &[f64]) -> f64 {
let f: f64 = (0..dataset.num_bins).zip(P.iter()) .map(|bin_and_prob: (usize, &f64)| {
let bias = dataset.get_bias(bin_and_prob.0, window);
bin_and_prob.1 * bias
}).sum();
1.0/f
}
fn perform_wham_iteration(dataset: &Dataset, F_prev: &[f64], F: &mut Vec<f64>, P: &mut Vec<f64>) {
(0..dataset.num_bins).into_par_iter()
.map(|bin| { calc_bin_probability(bin, dataset, F_prev) })
.collect_into_vec(P);
(0..dataset.num_windows).into_par_iter()
.map(|window| {calc_window_F(window, dataset, P)} )
.collect_into_vec(F);
}
pub fn perform_wham(cfg: &Config, dataset: &Dataset)
-> Result<(Vec<f64>, Vec<f64>, Vec<f64>)> {
let mut P: Vec<f64> = vec![f64::NAN; dataset.num_bins];
let mut F: Vec<f64> = vec![1.0; dataset.num_windows];
let mut F_prev: Vec<f64> = vec![f64::NAN; dataset.num_windows];
let mut F_tmp: Vec<f64> = vec![f64::NAN; dataset.num_windows];
let mut iteration = 0;
let mut converged = false;
while !converged && iteration < cfg.max_iterations {
iteration += 1;
F_prev.copy_from_slice(&F);
perform_wham_iteration(&dataset, &F_prev, &mut F, &mut P);
if iteration % 10 == 0 {
F_tmp.copy_from_slice(&F);
for f in F.iter_mut() { *f = -dataset.kT * f.ln() }
for f in F_prev.iter_mut() { *f = -dataset.kT * f.ln() }
converged = is_converged(&F_prev, &F, cfg.tolerance);
if cfg.verbose {
println!("Iteration {}: dF={}", &iteration, &diff_avg(&F_prev, &F));
}
F.copy_from_slice(&F_tmp);
}
}
let P_sum: f64 = P.iter().sum();
for p in P.iter_mut() {
*p /= P_sum;
}
if iteration == cfg.max_iterations {
bail!("WHAM not converged! (max iterations reached)");
}
Ok((P, F, F_prev))
}
pub fn run(cfg: &Config) -> Result<()>{
println!("Supplied WHAM options: {}", &cfg);
println!("Reading input files.");
let datasets = io::read_data(&cfg).chain_err(|| "Failed to read data.")?;
for (idx, dataset) in datasets.iter().enumerate() {
if datasets.len() > 1 {
println!("Dataset {}/{}: {}", idx+1, datasets.len(), &dataset);
}
else {
println!("{}", &dataset);
}
let (P, F, F_prev) = perform_wham(&cfg, &dataset)?;
println!("WHAM converged.");
let (P_std, free_energy_std) = if cfg.bootstrap > 0 {
println!("Bootstrapping..");
error_analysis::run_bootstrap(&cfg, dataset.clone(), cfg.bootstrap)
} else {
(vec![0.0; P.len()], vec![0.0; P.len()])
};
println!("Finished. Dumping PMF");
let free_energy = calc_free_energy(&dataset, &P);
dump_state(&dataset, &F, &F_prev, &P, &P_std, &free_energy, &free_energy_std);
let append = idx > 0 && datasets.len() > 1;
let index = if datasets.len() > 1 {
Some(idx)
} else {
None
};
io::write_results(&cfg.output, append, &dataset, &free_energy, &free_energy_std, &P, &P_std, index)
.chain_err(|| "Could not write results to output file")?;
}
Ok(())
}
fn diff_avg(F: &[f64], F_prev: &[f64]) -> f64 {
let mut F_sum: f64 = 0.0;
for i in 0..F.len() {
F_sum += (F[i]-F_prev[i]).abs()
}
F_sum / F.len() as f64
}
fn calc_free_energy(dataset: &Dataset, P: &[f64]) -> Vec<f64> {
let mut minimum = f64::MAX;
let mut free_energy: Vec<f64> = P.iter()
.map(|p| {
-dataset.kT * p.ln()
})
.inspect(|free_e| {
if free_e < &minimum {
minimum = *free_e;
}
})
.collect();
for e in free_energy.iter_mut() {
*e -= minimum
}
free_energy
}
fn dump_state(dataset: &Dataset, F: &[f64], F_prev: &[f64], P: &[f64],
P_std: &[f64], A: &[f64], A_std: &[f64]) {
let out = std::io::stdout();
let mut lock = out.lock();
writeln!(lock, "# PMF").unwrap();
writeln!(lock, "#bin\t\tFree Energy\t\t+/-\t\tP(x)\t\t+/-").unwrap();
for bin in 0..dataset.num_bins {
writeln!(lock, "{:9.5}\t{:9.5}\t{:9.5}\t{:9.5}\t{:9.5}",
bin, A[bin], A_std[bin], P[bin], P_std[bin]).unwrap();
}
writeln!(lock, "# Bias offsets").unwrap();
writeln!(lock, "#Window\t\tF\t\tF_prev").unwrap();
for window in 0..dataset.num_windows {
writeln!(lock, "{}\t{:9.5}\t{:8.8}",
window, F[window], (F[window]-F_prev[window]).abs()).unwrap();
}
}
#[cfg(test)]
mod tests {
use super::histogram::{Dataset,Histogram};
use std::f64;
use super::k_B;
macro_rules! assert_delta {
($x:expr, $y:expr, $d:expr) => {
assert!(($x-$y).abs() < $d, "{} != {}", $x, $y)
}
}
fn create_test_dataset() -> Dataset {
let h1 = Histogram::new(10, vec![0.0, 1.0, 1.0, 8.0, 0.0]);
let h2 = Histogram::new(10, vec![0.0, 0.0, 8.0, 1.0, 1.0]);
Dataset::new(5, vec![5], vec![1.0], vec![0.0], vec![4.0],
vec![1.0, 1.0], vec![10.0, 10.0], 300.0*k_B, vec![h1, h2], false)
}
#[test]
fn is_converged() {
let new = vec![1.0,1.0];
let old = vec![0.95, 1.0];
let tolerance = 0.1;
let converged = super::is_converged(&old, &new, tolerance);
assert!(converged);
let old = vec![0.8, 1.0];
let converged = super::is_converged(&old, &new, tolerance);
assert!(!converged);
}
#[test]
fn calc_bin_probability() {
let dataset = create_test_dataset();
let F = vec![1.0; dataset.num_bins] ;
let expected = vec!(0.0, 0.082_529_668_703_131_6, 40.923_558_470_974_93,
124_226.700_033_77, 2_308_526_035.528_374_7);
expected.iter().enumerate().for_each(|(i, exp)| {
let p = super::calc_bin_probability(i, &dataset, &F);
assert_delta!(exp, p, 0.000_000_1);
})
}
#[test]
fn calc_bias_offset() {
let dataset = create_test_dataset();
let probability = vec!(0.0, 0.1, 0.2, 0.3, 0.4);
let expected = vec!(15.927_477_169_990_633, 15.927_477_169_990_633);
expected.iter().enumerate().for_each(|(i, exp)| {
let F = super::calc_window_F(i, &dataset, &probability);
assert_delta!(exp, F, 0.000_000_1);
})
}
#[test]
fn perform_wham_iteration() {
let dataset = create_test_dataset();
let prev_F = vec![1.0; dataset.num_windows];
let mut F = vec![f64::NAN; dataset.num_windows];
let mut P = vec![f64::NAN; dataset.num_bins];
super::perform_wham_iteration(&dataset, &prev_F, &mut F, &mut P);
let expected_F = vec!(1.0, 1.0);
let expected_P = vec!(0.0, 0.082_529_668_703_131_6, 40.923_558_470_974_93,
124_226.700_033_77, 2_308_526_035.528_374_7);
for bin in 0..dataset.num_bins {
assert_delta!(expected_P[bin], P[bin], 0.01)
}
for window in 0..dataset.num_windows {
assert_delta!(expected_F[window], F[window], 0.01)
}
}
}