gbrt_rs/objective/binary_classification.rs
1//! Binary Classification Objectives for Gradient Boosting
2//!
3//! This module provides objective functions for binary classification tasks,
4//! where the target variable is in {0, 1}. The primary implementation is
5//! [`LogLossObjective`], which optimizes the binary cross-entropy (logistic loss).
6//!
7//! # Mathematical Background
8//!
9//! Binary classification in gradient boosting works by:
10//! 1. Computing raw predictions (log-odds)
11//! 2. Applying sigmoid transformation to get probabilities
12//! 3. Using log loss to measure prediction quality
13//! 4. Computing gradients and Hessians for boosting iterations
14//!
15//! # Log Loss Formula
16//!
17//! For true label `y ∈ {0, 1}` and predicted probability `p ∈ (0, 1)`:
18//!
19//! ```text
20//! L(y, p) = -[y·log(p) + (1-y)·log(1-p)]
21//! ```
22//!
23//! The raw prediction (before sigmoid) represents log-odds:
24//!
25//! ```text
26//! log(p/(1-p)) = raw_prediction
27//! p = 1/(1 + exp(-raw_prediction)) // sigmoid function
28//! ```
29
30use super::{Objective, ObjectiveError, ObjectiveResult};
31use serde::{Deserialize, Serialize};
32use std::fmt;
33
34/// Extended trait for binary classification objectives.
35///
36/// This trait extends the base [`Objective`] trait with methods specific to
37/// binary classification, such as probability-to-class conversion and accuracy
38/// metrics.
39///
40/// # Type Conversions
41///
42/// - Raw predictions are log-odds (any real number)
43/// - Transformed predictions are probabilities in (0, 1)
44/// - Class predictions are 0.0 or 1.0 based on threshold
45pub trait BinaryClassificationObjective: Objective {
46 /// Converts predicted probabilities to class labels (0.0 or 1.0).
47 ///
48 /// # Parameters
49 /// - `probabilities`: Slice of probability values in [0, 1]
50 /// - `threshold`: Decision threshold (default: 0.5)
51 ///
52 /// # Returns
53 /// Vector of class predictions (0.0 or 1.0)
54 ///
55 /// # Panics
56 /// Panics if threshold is not in [0, 1]
57 fn predict_classes(&self, probabilities: &[f64], threshold: f64) -> Vec<f64>;
58
59 /// Computes classification accuracy.
60 ///
61 /// # Parameters
62 /// - `y_true`: True binary labels (0.0 or 1.0)
63 /// - `y_pred_proba`: Predicted probabilities
64 /// - `threshold`: Decision threshold for classification
65 ///
66 /// # Returns
67 /// Accuracy score in [0, 1]
68 fn accuracy(&self, y_true: &[f64], y_pred_proba: &[f64], threshold: f64) -> ObjectiveResult<f64>;
69
70 /// Computes binary cross-entropy loss.
71 ///
72 /// This is the same as [`Objective::loss`] but operates on probabilities
73 /// instead of raw predictions.
74 fn binary_cross_entropy(&self, y_true: &[f64], y_pred_proba: &[f64]) -> ObjectiveResult<f64>;
75}
76
77/// Binary logistic loss objective for binary classification.
78///
79/// This objective implements the log loss (binary cross-entropy) function with
80/// numerical stability improvements. It uses epsilon-clipping to avoid log(0)
81/// and provides sigmoid transformations for probability calculations.
82///
83/// # Mathematical Details
84///
85/// - **Sigmoid**: `σ(x) = 1/(1 + exp(-x))`
86/// - **Loss**: `L(y, p) = -[y·log(p) + (1-y)·log(1-p)]` where p = σ(raw)
87/// - **Gradient**: `∂L/∂raw = p - y`
88/// - **Hessian**: `∂²L/∂raw² = p·(1-p)`
89/// - **Initial Prediction**: log-odds of positive class proportion
90///
91/// # Numerical Stability
92///
93/// Probabilities are clipped to `[epsilon, 1-epsilon]` where `epsilon = 1e-15`
94/// by default. This prevents infinite loss values while maintaining precision.
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct LogLossObjective {
97 epsilon: f64,
98}
99
100impl LogLossObjective {
101 /// Creates a new LogLossObjective with default epsilon (1e-15).
102 pub fn new() -> Self {
103 Self { epsilon: 1e-15 }
104 }
105
106 /// Sets custom epsilon for numerical stability.
107 ///
108 /// # Parameters
109 /// - `epsilon`: Clipping threshold (default: 1e-15)
110 ///
111 /// # Panics
112 /// Model training may produce NaN if epsilon is too small (≤ 1e-20)
113 pub fn with_epsilon(mut self, epsilon: f64) -> Self {
114 self.epsilon = epsilon;
115 self
116 }
117
118 /// Sigmoid activation function.
119 ///
120 /// Maps log-odds to probability in (0, 1).
121 fn sigmoid(&self, x: f64) -> f64 {
122 1.0 / (1.0 + (-x).exp())
123 }
124
125 /// Clips probability to avoid numerical instability in log calculations.
126 fn clip_probability(&self, p: f64) -> f64 {
127 p.max(self.epsilon).min(1.0 - self.epsilon)
128 }
129
130 /// Converts log-odds to probability using sigmoid.
131 fn log_odds_to_probability(&self, log_odds: f64) -> f64 {
132 self.sigmoid(log_odds)
133 }
134}
135
136impl Default for LogLossObjective {
137 fn default() -> Self {
138 Self::new()
139 }
140}
141
142impl Objective for LogLossObjective {
143 /// Computes binary cross-entropy loss.
144 fn loss(&self, y_true: &[f64], y_pred: &[f64]) -> ObjectiveResult<f64> {
145 self.binary_cross_entropy(y_true, y_pred)
146 }
147
148 /// Computes gradient of loss with respect to raw predictions.
149 ///
150 /// Gradient = σ(raw) - y, where y ∈ {0, 1}
151 fn gradient(&self, y_true: &[f64], y_pred: &[f64]) -> ObjectiveResult<Vec<f64>> {
152 if y_true.len() != y_pred.len() {
153 return Err(ObjectiveError::InvalidInput(
154 "Targets and predictions must have the same length".to_string()
155 ));
156 }
157
158 Ok(y_true.iter()
159 .zip(y_pred.iter())
160 .map(|(&y, &y_hat)| {
161 let p = self.sigmoid(y_hat);
162 p - y
163 })
164 .collect())
165 }
166
167 /// Computes Hessian (second derivative) of loss.
168 ///
169 /// Hessian = p·(1-p) where p = σ(raw)
170 fn hessian(&self, _y_true: &[f64], y_pred: &[f64]) -> ObjectiveResult<Vec<f64>> {
171 Ok(y_pred.iter()
172 .map(|&y_hat| {
173 let p = self.sigmoid(y_hat);
174 p * (1.0 - p)
175 })
176 .collect())
177 }
178
179 /// Computes gradient and Hessian in a single pass for efficiency.
180 fn gradient_hessian(&self, y_true: &[f64], y_pred: &[f64]) -> ObjectiveResult<(Vec<f64>, Vec<f64>)> {
181 let gradient = self.gradient(y_true, y_pred)?;
182
183 let hessian = y_pred.iter()
184 .map(|&y_hat| {
185 let p = self.sigmoid(y_hat);
186 p * (1.0 - p)
187 })
188 .collect();
189
190 Ok((gradient, hessian))
191 }
192
193 /// Transforms raw predictions to probabilities using sigmoid.
194 fn transform(&self, y_pred: &[f64]) -> Vec<f64> {
195 y_pred.iter()
196 .map(|&x| self.log_odds_to_probability(x))
197 .collect()
198 }
199
200 fn name(&self) -> &str {
201 "log_loss"
202 }
203
204 fn is_regression(&self) -> bool {
205 false
206 }
207
208 fn is_classification(&self) -> bool {
209 true
210 }
211
212 /// Computes optimal initial prediction as log-odds of positive class proportion.
213 fn initial_prediction(&self, y_true: &[f64]) -> ObjectiveResult<f64> {
214 if y_true.is_empty() {
215 return Err(ObjectiveError::InvalidInput("Empty targets".to_string()));
216 }
217
218 // For classification, use log-odds of positive class proportion
219 let positive_count = y_true.iter().filter(|&&y| y == 1.0).count() as f64;
220 let positive_ratio = positive_count / y_true.len() as f64;
221 let positive_ratio_clamped = positive_ratio.max(self.epsilon).min(1.0 - self.epsilon);
222
223 Ok((positive_ratio_clamped / (1.0 - positive_ratio_clamped)).ln())
224 }
225
226 /// Validates that targets are binary (0.0 or 1.0).
227 fn validate_targets(&self, y_true: &[f64]) -> ObjectiveResult<()> {
228 if y_true.is_empty() {
229 return Err(ObjectiveError::InvalidInput("Empty targets".to_string()));
230 }
231
232 // Check that targets are either 0 or 1
233 for &y in y_true {
234 if y != 0.0 && y != 1.0 {
235 return Err(ObjectiveError::InvalidInput(
236 format!("Binary classification targets must be 0 or 1, got {}", y)
237 ));
238 }
239 }
240
241 Ok(())
242 }
243}
244
245impl BinaryClassificationObjective for LogLossObjective {
246 /// Converts probabilities to binary class predictions using threshold.
247 fn predict_classes(&self, probabilities: &[f64], threshold: f64) -> Vec<f64> {
248 if !(0.0..=1.0).contains(&threshold) {
249 panic!("Threshold must be between 0 and 1");
250 }
251
252 probabilities.iter()
253 .map(|&p| if p >= threshold { 1.0 } else { 0.0 })
254 .collect()
255 }
256
257 /// Computes classification accuracy.
258 fn accuracy(&self, y_true: &[f64], y_pred_proba: &[f64], threshold: f64) -> ObjectiveResult<f64> {
259 if y_true.len() != y_pred_proba.len() {
260 return Err(ObjectiveError::InvalidInput(
261 "Targets and predictions must have the same length".to_string()
262 ));
263 }
264
265 self.validate_targets(y_true)?;
266
267 let predictions = self.predict_classes(y_pred_proba, threshold);
268 let correct = y_true.iter()
269 .zip(predictions.iter())
270 .filter(|&(&true_val, &pred_val)| (true_val - pred_val).abs() < 1e-10)
271 .count();
272
273 Ok(correct as f64 / y_true.len() as f64)
274 }
275
276 /// Computes binary cross-entropy loss with epsilon-clipping for stability.
277 fn binary_cross_entropy(&self, y_true: &[f64], y_pred_proba: &[f64]) -> ObjectiveResult<f64> {
278 if y_true.len() != y_pred_proba.len() {
279 return Err(ObjectiveError::InvalidInput(
280 "Targets and predictions must have the same length".to_string()
281 ));
282 }
283
284 self.validate_targets(y_true)?;
285
286 let loss: f64 = y_true.iter()
287 .zip(y_pred_proba.iter())
288 .map(|(&y, &p)| {
289 let p_clipped = self.clip_probability(p);
290 - (y * p_clipped.ln() + (1.0 - y) * (1.0 - p_clipped).ln())
291 })
292 .sum();
293
294 Ok(loss / y_true.len() as f64)
295 }
296}
297
298impl fmt::Display for LogLossObjective {
299 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300 write!(f, "LogLoss(epsilon={})", self.epsilon)
301 }
302}
303