use crate::error::{StatsError, StatsResult as Result};
use scirs2_core::ndarray::{Array1, Array2};
use scirs2_core::random::{Distribution, Normal};
use scirs2_core::validation::*;
use scirs2_core::{Rng, RngExt};
use std::fmt::Debug;
pub trait DifferentiableTarget: Send + Sync {
fn log_density(&self, x: &Array1<f64>) -> f64;
fn gradient(&self, x: &Array1<f64>) -> Array1<f64>;
fn dim(&self) -> usize;
fn log_density_and_gradient(&self, x: &Array1<f64>) -> (f64, Array1<f64>) {
(self.log_density(x), self.gradient(x))
}
}
pub struct HamiltonianMonteCarlo<T: DifferentiableTarget> {
pub target: T,
pub position: Array1<f64>,
pub current_log_density: f64,
pub stepsize: f64,
pub n_steps: usize,
pub mass_matrix: Array2<f64>,
pub mass_inv: Array2<f64>,
pub n_accepted: usize,
pub n_proposed: usize,
}
impl<T: DifferentiableTarget> HamiltonianMonteCarlo<T> {
pub fn new(target: T, initial: Array1<f64>, stepsize: f64, nsteps: usize) -> Result<Self> {
checkarray_finite(&initial, "initial")?;
check_positive(stepsize, "stepsize")?;
check_positive(nsteps, "nsteps")?;
if initial.len() != target.dim() {
return Err(StatsError::DimensionMismatch(format!(
"initial dimension ({}) must match target dimension ({})",
initial.len(),
target.dim()
)));
}
let dim = initial.len();
let mass_matrix = Array2::eye(dim);
let mass_inv = Array2::eye(dim);
let current_log_density = target.log_density(&initial);
Ok(Self {
target,
position: initial,
current_log_density,
stepsize,
n_steps: nsteps,
mass_matrix,
mass_inv,
n_accepted: 0,
n_proposed: 0,
})
}
pub fn with_mass_matrix(mut self, massmatrix: Array2<f64>) -> Result<Self> {
checkarray_finite(&massmatrix, "massmatrix")?;
if massmatrix.nrows() != self.position.len() || massmatrix.ncols() != self.position.len() {
return Err(StatsError::DimensionMismatch(format!(
"massmatrix shape ({}, {}) must be ({}, {})",
massmatrix.nrows(),
massmatrix.ncols(),
self.position.len(),
self.position.len()
)));
}
let mass_inv = scirs2_linalg::inv(&massmatrix.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to invert mass matrix: {}", e))
})?;
self.mass_matrix = massmatrix;
self.mass_inv = mass_inv;
Ok(self)
}
pub fn step<R: Rng + ?Sized>(&mut self, rng: &mut R) -> Result<Array1<f64>> {
let _dim = self.position.len();
let momentum = self.sample_momentum(rng)?;
let initial_position = self.position.clone();
let initial_momentum = momentum.clone();
let initial_log_density = self.current_log_density;
let (final_position, final_momentum) = self.leapfrog(initial_position.clone(), momentum)?;
let initial_hamiltonian =
-initial_log_density + 0.5 * self.kinetic_energy(&initial_momentum);
let final_log_density = self.target.log_density(&final_position);
let final_hamiltonian = -final_log_density + 0.5 * self.kinetic_energy(&final_momentum);
let log_alpha = -(final_hamiltonian - initial_hamiltonian);
let u: f64 = rng.random();
self.n_proposed += 1;
if u.ln() < log_alpha {
self.position = final_position;
self.current_log_density = final_log_density;
self.n_accepted += 1;
}
Ok(self.position.clone())
}
fn sample_momentum<R: Rng + ?Sized>(&self, rng: &mut R) -> Result<Array1<f64>> {
let dim = self.position.len();
let normal = Normal::new(0.0, 1.0).map_err(|e| {
StatsError::ComputationError(format!("Failed to create normal distribution: {}", e))
})?;
let z = Array1::from_shape_fn(dim, |_| normal.sample(rng));
let mut momentum = Array1::zeros(dim);
for i in 0..dim {
momentum[i] = z[i] * self.mass_matrix[[i, i]].sqrt();
}
Ok(momentum)
}
fn kinetic_energy(&self, momentum: &Array1<f64>) -> f64 {
let mut energy = 0.0;
for i in 0..momentum.len() {
energy += momentum[i] * momentum[i] * self.mass_inv[[i, i]];
}
0.5 * energy
}
fn leapfrog(
&self,
mut position: Array1<f64>,
mut momentum: Array1<f64>,
) -> Result<(Array1<f64>, Array1<f64>)> {
let gradient = self.target.gradient(&position);
momentum = momentum + 0.5 * self.stepsize * gradient;
for _ in 0..self.n_steps {
let momentum_update = self.mass_inv.dot(&momentum);
position = position + self.stepsize * momentum_update;
if self.n_steps > 1 {
let gradient = self.target.gradient(&position);
momentum = momentum + self.stepsize * gradient;
}
}
let gradient = self.target.gradient(&position);
momentum = momentum + 0.5 * self.stepsize * gradient;
momentum = -momentum;
Ok((position, momentum))
}
pub fn sample<R: Rng + ?Sized>(
&mut self,
n_samples_: usize,
rng: &mut R,
) -> Result<Array2<f64>> {
let dim = self.position.len();
let mut samples = Array2::zeros((n_samples_, dim));
for i in 0..n_samples_ {
let sample = self.step(rng)?;
samples.row_mut(i).assign(&sample);
}
Ok(samples)
}
pub fn sample_with_burnin<R: Rng + ?Sized>(
&mut self,
n_samples_: usize,
burnin: usize,
rng: &mut R,
) -> Result<Array2<f64>> {
check_positive(burnin, "burnin")?;
for _ in 0..burnin {
self.step(rng)?;
}
self.reset_counters();
self.sample(n_samples_, rng)
}
pub fn acceptance_rate(&self) -> f64 {
if self.n_proposed == 0 {
0.0
} else {
self.n_accepted as f64 / self.n_proposed as f64
}
}
pub fn reset_counters(&mut self) {
self.n_accepted = 0;
self.n_proposed = 0;
}
}
pub struct NoUTurnSampler<T: DifferentiableTarget> {
pub hmc: HamiltonianMonteCarlo<T>,
pub max_tree_depth: usize,
pub target_accept_prob: f64,
pub stepsize_adaptation: DualAveragingAdaptation,
}
#[derive(Debug, Clone)]
pub struct DualAveragingAdaptation {
pub target: f64,
pub gamma: f64,
pub t0: f64,
pub kappa: f64,
pub iteration: usize,
pub log_step_avg: f64,
pub h_avg: f64,
}
impl DualAveragingAdaptation {
pub fn new(target: f64, initial_logstep: f64) -> Self {
Self {
target,
gamma: 0.05,
t0: 10.0,
kappa: 0.75,
iteration: 0,
log_step_avg: initial_logstep,
h_avg: 0.0,
}
}
pub fn update(&mut self, alpha: f64) -> f64 {
self.iteration += 1;
let m = self.iteration as f64;
self.h_avg =
(1.0 - 1.0 / (m + self.t0)) * self.h_avg + (self.target - alpha) / (m + self.t0);
let log_step = self.log_step_avg - self.h_avg / (self.gamma * m.powf(self.kappa));
let weight = m.powf(-self.kappa);
self.log_step_avg = (1.0 - weight) * self.log_step_avg + weight * log_step;
log_step.exp()
}
}
impl<T: DifferentiableTarget> NoUTurnSampler<T> {
pub fn new(target: T, initial: Array1<f64>, initial_stepsize: f64) -> Result<Self> {
let hmc = HamiltonianMonteCarlo::new(target, initial, initial_stepsize, 1)?;
let stepsize_adaptation = DualAveragingAdaptation::new(0.8, initial_stepsize.ln());
Ok(Self {
hmc,
max_tree_depth: 10,
target_accept_prob: 0.8,
stepsize_adaptation,
})
}
pub fn step<R: Rng + ?Sized>(&mut self, rng: &mut R) -> Result<Array1<f64>> {
let momentum = self.hmc.sample_momentum(rng)?;
let (new_position, alpha) =
self.build_tree(self.hmc.position.clone(), momentum, 0.0, 1, rng)?;
let new_stepsize = self.stepsize_adaptation.update(alpha);
self.hmc.stepsize = new_stepsize;
if !new_position
.iter()
.zip(self.hmc.position.iter())
.all(|(a, b)| (a - b).abs() < f64::EPSILON)
{
self.hmc.position = new_position;
self.hmc.current_log_density = self.hmc.target.log_density(&self.hmc.position);
self.hmc.n_accepted += 1;
}
self.hmc.n_proposed += 1;
Ok(self.hmc.position.clone())
}
fn build_tree<R: Rng + ?Sized>(
&self,
position: Array1<f64>,
momentum: Array1<f64>,
log_u: f64,
depth: usize,
rng: &mut R,
) -> Result<(Array1<f64>, f64)> {
if depth >= self.max_tree_depth {
return Ok((position, 0.0));
}
let (new_position, new_momentum) = self.hmc.leapfrog(position.clone(), momentum.clone())?;
let new_log_density = self.hmc.target.log_density(&new_position);
let new_hamiltonian = -new_log_density + 0.5 * self.hmc.kinetic_energy(&new_momentum);
let current_hamiltonian =
-self.hmc.current_log_density + 0.5 * self.hmc.kinetic_energy(&momentum);
let log_alpha = -(new_hamiltonian - current_hamiltonian);
let alpha = log_alpha.exp().min(1.0);
if log_u <= log_alpha {
Ok((new_position, alpha))
} else {
Ok((position, alpha))
}
}
pub fn sample_adaptive<R: Rng + ?Sized>(
&mut self,
n_samples_: usize,
n_adapt: usize,
rng: &mut R,
) -> Result<Array2<f64>> {
for _ in 0..n_adapt {
self.step(rng)?;
}
self.hmc.reset_counters();
let dim = self.hmc.position.len();
let mut samples = Array2::zeros((n_samples_, dim));
for i in 0..n_samples_ {
let sample = self.step(rng)?;
samples.row_mut(i).assign(&sample);
}
Ok(samples)
}
}
#[derive(Debug, Clone)]
pub struct MultivariateNormalHMC {
pub mean: Array1<f64>,
pub precision: Array2<f64>,
pub log_norm_const: f64,
}
impl MultivariateNormalHMC {
pub fn new(mean: Array1<f64>, covariance: Array2<f64>) -> Result<Self> {
checkarray_finite(&mean, "mean")?;
checkarray_finite(&covariance, "covariance")?;
if covariance.nrows() != mean.len() || covariance.ncols() != mean.len() {
return Err(StatsError::DimensionMismatch(format!(
"covariance shape ({}, {}) must be ({}, {})",
covariance.nrows(),
covariance.ncols(),
mean.len(),
mean.len()
)));
}
let precision = scirs2_linalg::inv(&covariance.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to invert covariance: {}", e))
})?;
let det = scirs2_linalg::det(&covariance.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to compute determinant: {}", e))
})?;
if det <= 0.0 {
return Err(StatsError::InvalidArgument(
"Covariance must be positive definite".to_string(),
));
}
let d = mean.len() as f64;
let log_norm_const = -0.5 * (d * (2.0 * std::f64::consts::PI).ln() + det.ln());
Ok(Self {
mean,
precision,
log_norm_const,
})
}
}
impl DifferentiableTarget for MultivariateNormalHMC {
fn log_density(&self, x: &Array1<f64>) -> f64 {
let diff = x - &self.mean;
let quad_form = diff.dot(&self.precision.dot(&diff));
self.log_norm_const - 0.5 * quad_form
}
fn gradient(&self, x: &Array1<f64>) -> Array1<f64> {
let diff = x - &self.mean;
-self.precision.dot(&diff)
}
fn dim(&self) -> usize {
self.mean.len()
}
fn log_density_and_gradient(&self, x: &Array1<f64>) -> (f64, Array1<f64>) {
let diff = x - &self.mean;
let quad_form = diff.dot(&self.precision.dot(&diff));
let log_density = self.log_norm_const - 0.5 * quad_form;
let gradient = -self.precision.dot(&diff);
(log_density, gradient)
}
}
pub struct CustomDifferentiableTarget<F, G> {
pub log_density_fn: F,
pub gradient_fn: G,
pub dim: usize,
}
impl<F, G> CustomDifferentiableTarget<F, G> {
pub fn new(dim: usize, log_density_fn: F, gradientfn: G) -> Result<Self> {
check_positive(dim, "dim")?;
Ok(Self {
log_density_fn,
gradient_fn: gradientfn,
dim,
})
}
}
impl<F, G> DifferentiableTarget for CustomDifferentiableTarget<F, G>
where
F: Fn(&Array1<f64>) -> f64 + Send + Sync,
G: Fn(&Array1<f64>) -> Array1<f64> + Send + Sync,
{
fn log_density(&self, x: &Array1<f64>) -> f64 {
(self.log_density_fn)(x)
}
fn gradient(&self, x: &Array1<f64>) -> Array1<f64> {
(self.gradient_fn)(x)
}
fn dim(&self) -> usize {
self.dim
}
}