use crate::booster::core::PerpetualBooster;
use crate::constraints::ConstraintMap;
use crate::data::Matrix;
use crate::errors::PerpetualError;
use crate::objective::Objective;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
#[allow(clippy::too_many_arguments)]
fn create_booster(
budget: f32,
objective: Objective,
max_bin: u16,
num_threads: Option<usize>,
monotone_constraints: Option<ConstraintMap>,
interaction_constraints: Option<Vec<Vec<usize>>>,
force_children_to_bound_parent: bool,
missing: f64,
allow_missing_splits: bool,
create_missing_branch: bool,
terminate_missing_features: HashSet<usize>,
missing_node_treatment: crate::booster::config::MissingNodeTreatment,
log_iterations: usize,
seed: u64,
reset: Option<bool>,
categorical_features: Option<HashSet<usize>>,
timeout: Option<f32>,
iteration_limit: Option<usize>,
memory_limit: Option<f32>,
stopping_rounds: Option<usize>,
) -> Result<PerpetualBooster, PerpetualError> {
PerpetualBooster::new(
objective,
budget,
0.0, max_bin,
num_threads,
monotone_constraints,
interaction_constraints,
force_children_to_bound_parent,
missing,
allow_missing_splits,
create_missing_branch,
terminate_missing_features,
missing_node_treatment,
log_iterations,
seed,
reset,
categorical_features,
timeout,
iteration_limit,
memory_limit,
stopping_rounds,
false,
)
}
fn extract_subsets(x: &Matrix<f64>, w: &[f64], y: &[f64]) -> (Vec<f64>, Vec<f64>, usize, Vec<f64>, Vec<f64>, usize) {
let n = x.rows;
let idx0: Vec<usize> = w
.iter()
.enumerate()
.filter(|&(_, &v)| v == 0.0)
.map(|(i, _)| i)
.collect();
let idx1: Vec<usize> = w
.iter()
.enumerate()
.filter(|&(_, &v)| v == 1.0)
.map(|(i, _)| i)
.collect();
let n0 = idx0.len();
let n1 = idx1.len();
let get_subset = |indices: &[usize]| -> (Vec<f64>, Vec<f64>) {
let mut sub_x = Vec::with_capacity(indices.len() * x.cols);
let mut sub_y = Vec::with_capacity(indices.len());
for col in 0..x.cols {
let col_data = &x.data[col * n..(col + 1) * n];
for &i in indices {
sub_x.push(col_data[i]);
}
}
for &i in indices {
sub_y.push(y[i]);
}
(sub_x, sub_y)
};
let (x0_data, y0) = get_subset(&idx0);
let (x1_data, y1) = get_subset(&idx1);
(x0_data, y0, n0, x1_data, y1, n1)
}
#[derive(Serialize, Deserialize)]
pub struct SLearner {
pub model: PerpetualBooster,
}
impl SLearner {
#[allow(clippy::too_many_arguments)]
pub fn new(
budget: f32,
max_bin: u16,
num_threads: Option<usize>,
monotone_constraints: Option<ConstraintMap>,
interaction_constraints: Option<Vec<Vec<usize>>>,
force_children_to_bound_parent: bool,
missing: f64,
allow_missing_splits: bool,
create_missing_branch: bool,
terminate_missing_features: HashSet<usize>,
missing_node_treatment: crate::booster::config::MissingNodeTreatment,
log_iterations: usize,
seed: u64,
reset: Option<bool>,
categorical_features: Option<HashSet<usize>>,
timeout: Option<f32>,
iteration_limit: Option<usize>,
memory_limit: Option<f32>,
stopping_rounds: Option<usize>,
) -> Result<Self, PerpetualError> {
let model = create_booster(
budget,
Objective::SquaredLoss,
max_bin,
num_threads,
monotone_constraints,
interaction_constraints,
force_children_to_bound_parent,
missing,
allow_missing_splits,
create_missing_branch,
terminate_missing_features,
missing_node_treatment,
log_iterations,
seed,
reset,
categorical_features,
timeout,
iteration_limit,
memory_limit,
stopping_rounds,
)?;
Ok(Self { model })
}
pub fn fit(&mut self, x: &Matrix<f64>, w: &[f64], y: &[f64]) -> Result<(), PerpetualError> {
let rows = x.rows;
let x_cols = x.cols;
let mut data = Vec::with_capacity(x.data.len() + rows);
data.extend_from_slice(x.data);
data.extend_from_slice(w);
let matrix_aug = Matrix::new(&data, rows, x_cols + 1);
self.model.fit(&matrix_aug, y, None, None)
}
pub fn predict(&self, x: &Matrix<f64>) -> Vec<f64> {
let rows = x.rows;
let x_cols = x.cols;
let mut data_1 = Vec::with_capacity(x.data.len() + rows);
data_1.extend_from_slice(x.data);
data_1.resize(data_1.len() + rows, 1.0);
let matrix_1 = Matrix::new(&data_1, rows, x_cols + 1);
let mu1 = self.model.predict(&matrix_1, true);
let mut data_0 = Vec::with_capacity(x.data.len() + rows);
data_0.extend_from_slice(x.data);
data_0.resize(data_0.len() + rows, 0.0);
let matrix_0 = Matrix::new(&data_0, rows, x_cols + 1);
let mu0 = self.model.predict(&matrix_0, true);
mu1.iter().zip(mu0.iter()).map(|(m1, m0)| m1 - m0).collect()
}
}
#[derive(Serialize, Deserialize)]
pub struct TLearner {
pub mu0: PerpetualBooster,
pub mu1: PerpetualBooster,
}
impl TLearner {
#[allow(clippy::too_many_arguments)]
pub fn new(
budget: f32,
max_bin: u16,
num_threads: Option<usize>,
monotone_constraints: Option<ConstraintMap>,
interaction_constraints: Option<Vec<Vec<usize>>>,
force_children_to_bound_parent: bool,
missing: f64,
allow_missing_splits: bool,
create_missing_branch: bool,
terminate_missing_features: HashSet<usize>,
missing_node_treatment: crate::booster::config::MissingNodeTreatment,
log_iterations: usize,
seed: u64,
reset: Option<bool>,
categorical_features: Option<HashSet<usize>>,
timeout: Option<f32>,
iteration_limit: Option<usize>,
memory_limit: Option<f32>,
stopping_rounds: Option<usize>,
) -> Result<Self, PerpetualError> {
let mu0 = create_booster(
budget,
Objective::SquaredLoss,
max_bin,
num_threads,
monotone_constraints.clone(),
interaction_constraints.clone(),
force_children_to_bound_parent,
missing,
allow_missing_splits,
create_missing_branch,
terminate_missing_features.clone(),
missing_node_treatment,
log_iterations,
seed + 1,
reset,
categorical_features.clone(),
timeout,
iteration_limit,
memory_limit,
stopping_rounds,
)?;
let mu1 = create_booster(
budget,
Objective::SquaredLoss,
max_bin,
num_threads,
monotone_constraints,
interaction_constraints,
force_children_to_bound_parent,
missing,
allow_missing_splits,
create_missing_branch,
terminate_missing_features,
missing_node_treatment,
log_iterations,
seed + 1, reset,
categorical_features,
timeout,
iteration_limit,
memory_limit,
stopping_rounds,
)?;
Ok(Self { mu0, mu1 })
}
pub fn fit(&mut self, x: &Matrix<f64>, w: &[f64], y: &[f64]) -> Result<(), PerpetualError> {
let (x0_data, y0, n0, x1_data, y1, n1) = extract_subsets(x, w, y);
let matrix0 = Matrix::new(&x0_data, n0, x.cols);
let matrix1 = Matrix::new(&x1_data, n1, x.cols);
self.mu0.fit(&matrix0, &y0, None, None)?;
self.mu1.fit(&matrix1, &y1, None, None)?;
Ok(())
}
pub fn predict(&self, x: &Matrix<f64>) -> Vec<f64> {
let p1 = self.mu1.predict(x, true);
let p0 = self.mu0.predict(x, true);
p1.iter().zip(p0.iter()).map(|(a, b)| a - b).collect()
}
}
#[derive(Serialize, Deserialize)]
pub struct XLearner {
pub mu0: PerpetualBooster,
pub mu1: PerpetualBooster,
pub tau0: PerpetualBooster,
pub tau1: PerpetualBooster,
pub propensity: PerpetualBooster,
}
impl XLearner {
#[allow(clippy::too_many_arguments)]
pub fn new(
budget: f32,
propensity_budget: Option<f32>,
max_bin: u16,
num_threads: Option<usize>,
monotone_constraints: Option<ConstraintMap>,
interaction_constraints: Option<Vec<Vec<usize>>>,
force_children_to_bound_parent: bool,
missing: f64,
allow_missing_splits: bool,
create_missing_branch: bool,
terminate_missing_features: HashSet<usize>,
missing_node_treatment: crate::booster::config::MissingNodeTreatment,
log_iterations: usize,
seed: u64,
reset: Option<bool>,
categorical_features: Option<HashSet<usize>>,
timeout: Option<f32>,
iteration_limit: Option<usize>,
memory_limit: Option<f32>,
stopping_rounds: Option<usize>,
) -> Result<Self, PerpetualError> {
let p_budget = propensity_budget.unwrap_or(budget);
let make = |obj: Objective, b: f32, s: u64| {
create_booster(
b,
obj,
max_bin,
num_threads,
monotone_constraints.clone(),
interaction_constraints.clone(),
force_children_to_bound_parent,
missing,
allow_missing_splits,
create_missing_branch,
terminate_missing_features.clone(),
missing_node_treatment,
log_iterations,
s,
reset,
categorical_features.clone(),
timeout,
iteration_limit,
memory_limit,
stopping_rounds,
)
};
Ok(Self {
mu0: make(Objective::SquaredLoss, budget, seed)?,
mu1: make(Objective::SquaredLoss, budget, seed + 1)?,
tau0: make(Objective::SquaredLoss, budget, seed + 2)?,
tau1: make(Objective::SquaredLoss, budget, seed + 3)?,
propensity: make(Objective::LogLoss, p_budget, seed + 4)?,
})
}
pub fn fit(&mut self, x: &Matrix<f64>, w: &[f64], y: &[f64]) -> Result<(), PerpetualError> {
let (x0_data, y0, n0, x1_data, y1, n1) = extract_subsets(x, w, y);
let matrix0 = Matrix::new(&x0_data, n0, x.cols);
let matrix1 = Matrix::new(&x1_data, n1, x.cols);
self.mu0.fit(&matrix0, &y0, None, None)?;
self.mu1.fit(&matrix1, &y1, None, None)?;
let mu0_on_1 = self.mu0.predict(&matrix1, true);
let d1: Vec<f64> = y1.iter().zip(mu0_on_1.iter()).map(|(yi, m)| yi - m).collect();
let mu1_on_0 = self.mu1.predict(&matrix0, true);
let d0: Vec<f64> = mu1_on_0.iter().zip(y0.iter()).map(|(m, yi)| m - yi).collect();
self.tau1.fit(&matrix1, &d1, None, None)?;
self.tau0.fit(&matrix0, &d0, None, None)?;
self.propensity.fit(x, w, None, None)?;
Ok(())
}
pub fn predict(&self, x: &Matrix<f64>) -> Vec<f64> {
let t0 = self.tau0.predict(x, true);
let t1 = self.tau1.predict(x, true);
let log_odds = self.propensity.predict(x, true);
let p: Vec<f64> = log_odds.iter().map(|lo| 1.0 / (1.0 + (-lo).exp())).collect();
t0.iter()
.zip(t1.iter())
.zip(p.iter())
.map(|((t0_i, t1_i), p_i)| p_i * t0_i + (1.0 - p_i) * t1_i)
.collect()
}
}
#[derive(Serialize, Deserialize)]
pub struct DRLearner {
pub mu0: PerpetualBooster,
pub mu1: PerpetualBooster,
pub propensity: PerpetualBooster,
pub effect: PerpetualBooster,
}
impl DRLearner {
#[allow(clippy::too_many_arguments)]
pub fn new(
budget: f32,
propensity_budget: Option<f32>,
max_bin: u16,
num_threads: Option<usize>,
monotone_constraints: Option<ConstraintMap>,
interaction_constraints: Option<Vec<Vec<usize>>>,
force_children_to_bound_parent: bool,
missing: f64,
allow_missing_splits: bool,
create_missing_branch: bool,
terminate_missing_features: HashSet<usize>,
missing_node_treatment: crate::booster::config::MissingNodeTreatment,
log_iterations: usize,
seed: u64,
reset: Option<bool>,
categorical_features: Option<HashSet<usize>>,
timeout: Option<f32>,
iteration_limit: Option<usize>,
memory_limit: Option<f32>,
stopping_rounds: Option<usize>,
) -> Result<Self, PerpetualError> {
let p_budget = propensity_budget.unwrap_or(budget);
let make = |obj: Objective, b: f32, s: u64| {
create_booster(
b,
obj,
max_bin,
num_threads,
monotone_constraints.clone(),
interaction_constraints.clone(),
force_children_to_bound_parent,
missing,
allow_missing_splits,
create_missing_branch,
terminate_missing_features.clone(),
missing_node_treatment,
log_iterations,
s,
reset,
categorical_features.clone(),
timeout,
iteration_limit,
memory_limit,
stopping_rounds,
)
};
Ok(Self {
mu0: make(Objective::SquaredLoss, budget, seed)?,
mu1: make(Objective::SquaredLoss, budget, seed + 1)?,
propensity: make(Objective::LogLoss, p_budget, seed + 2)?,
effect: make(Objective::SquaredLoss, budget, seed + 3)?,
})
}
pub fn fit(&mut self, x: &Matrix<f64>, w: &[f64], y: &[f64]) -> Result<(), PerpetualError> {
let n = x.rows;
let (x0_data, y0, n0, x1_data, y1, n1) = extract_subsets(x, w, y);
let matrix0 = Matrix::new(&x0_data, n0, x.cols);
let matrix1 = Matrix::new(&x1_data, n1, x.cols);
self.mu0.fit(&matrix0, &y0, None, None)?;
self.mu1.fit(&matrix1, &y1, None, None)?;
self.propensity.fit(x, w, None, None)?;
let mu0_hat = self.mu0.predict(x, true);
let mu1_hat = self.mu1.predict(x, true);
let log_odds = self.propensity.predict(x, true);
let p_hat: Vec<f64> = log_odds.iter().map(|lo| 1.0 / (1.0 + (-lo).exp())).collect();
let mut gamma = Vec::with_capacity(n);
for i in 0..n {
let m1 = mu1_hat[i];
let m0 = mu0_hat[i];
let p = p_hat[i].clamp(1e-3, 1.0 - 1e-3);
let wi = w[i];
let yi = y[i];
let term1 = m1 - m0;
let term2 = wi * (yi - m1) / p;
let term3 = (1.0 - wi) * (yi - m0) / (1.0 - p);
gamma.push(term1 + term2 - term3);
}
self.effect.fit(x, &gamma, None, None)?;
Ok(())
}
pub fn predict(&self, x: &Matrix<f64>) -> Vec<f64> {
self.effect.predict(x, true)
}
}