gbrt_rs/objective/
binary_classification.rs

1//! Binary Classification Objectives for Gradient Boosting
2//! 
3//! This module provides objective functions for binary classification tasks,
4//! where the target variable is in {0, 1}. The primary implementation is
5//! [`LogLossObjective`], which optimizes the binary cross-entropy (logistic loss).
6//! 
7//! # Mathematical Background
8//! 
9//! Binary classification in gradient boosting works by:
10//! 1. Computing raw predictions (log-odds)
11//! 2. Applying sigmoid transformation to get probabilities
12//! 3. Using log loss to measure prediction quality
13//! 4. Computing gradients and Hessians for boosting iterations
14//! 
15//! # Log Loss Formula
16//! 
17//! For true label `y ∈ {0, 1}` and predicted probability `p ∈ (0, 1)`:
18//! 
19//! ```text
20//! L(y, p) = -[y·log(p) + (1-y)·log(1-p)]
21//! ```
22//! 
23//! The raw prediction (before sigmoid) represents log-odds:
24//! 
25//! ```text
26//! log(p/(1-p)) = raw_prediction
27//! p = 1/(1 + exp(-raw_prediction))  // sigmoid function
28//! ```
29
30use super::{Objective, ObjectiveError, ObjectiveResult};
31use serde::{Deserialize, Serialize};
32use std::fmt;
33
34/// Extended trait for binary classification objectives.
35/// 
36/// This trait extends the base [`Objective`] trait with methods specific to
37/// binary classification, such as probability-to-class conversion and accuracy
38/// metrics.
39/// 
40/// # Type Conversions
41/// 
42/// - Raw predictions are log-odds (any real number)
43/// - Transformed predictions are probabilities in (0, 1)
44/// - Class predictions are 0.0 or 1.0 based on threshold
45pub trait BinaryClassificationObjective: Objective {
46    /// Converts predicted probabilities to class labels (0.0 or 1.0).
47    /// 
48    /// # Parameters
49    /// - `probabilities`: Slice of probability values in [0, 1]
50    /// - `threshold`: Decision threshold (default: 0.5)
51    /// 
52    /// # Returns
53    /// Vector of class predictions (0.0 or 1.0)
54    /// 
55    /// # Panics
56    /// Panics if threshold is not in [0, 1]
57    fn predict_classes(&self, probabilities: &[f64], threshold: f64) -> Vec<f64>;
58    
59    /// Computes classification accuracy.
60    /// 
61    /// # Parameters
62    /// - `y_true`: True binary labels (0.0 or 1.0)
63    /// - `y_pred_proba`: Predicted probabilities
64    /// - `threshold`: Decision threshold for classification
65    /// 
66    /// # Returns
67    /// Accuracy score in [0, 1]
68    fn accuracy(&self, y_true: &[f64], y_pred_proba: &[f64], threshold: f64) -> ObjectiveResult<f64>;
69    
70    /// Computes binary cross-entropy loss.
71    /// 
72    /// This is the same as [`Objective::loss`] but operates on probabilities
73    /// instead of raw predictions.
74    fn binary_cross_entropy(&self, y_true: &[f64], y_pred_proba: &[f64]) -> ObjectiveResult<f64>;
75}
76
77/// Binary logistic loss objective for binary classification.
78/// 
79/// This objective implements the log loss (binary cross-entropy) function with
80/// numerical stability improvements. It uses epsilon-clipping to avoid log(0)
81/// and provides sigmoid transformations for probability calculations.
82/// 
83/// # Mathematical Details
84/// 
85/// - **Sigmoid**: `σ(x) = 1/(1 + exp(-x))`
86/// - **Loss**: `L(y, p) = -[y·log(p) + (1-y)·log(1-p)]` where p = σ(raw)
87/// - **Gradient**: `∂L/∂raw = p - y`
88/// - **Hessian**: `∂²L/∂raw² = p·(1-p)`
89/// - **Initial Prediction**: log-odds of positive class proportion
90/// 
91/// # Numerical Stability
92/// 
93/// Probabilities are clipped to `[epsilon, 1-epsilon]` where `epsilon = 1e-15`
94/// by default. This prevents infinite loss values while maintaining precision.
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct LogLossObjective {
97    epsilon: f64,
98}
99
100impl LogLossObjective {
101    /// Creates a new LogLossObjective with default epsilon (1e-15).
102    pub fn new() -> Self {
103        Self { epsilon: 1e-15 }
104    }
105    
106    /// Sets custom epsilon for numerical stability.
107    ///
108    /// # Parameters
109    /// - `epsilon`: Clipping threshold (default: 1e-15)
110    ///
111    /// # Panics
112    /// Model training may produce NaN if epsilon is too small (≤ 1e-20)
113    pub fn with_epsilon(mut self, epsilon: f64) -> Self {
114        self.epsilon = epsilon;
115        self
116    }
117    
118    /// Sigmoid activation function.
119    /// 
120    /// Maps log-odds to probability in (0, 1).
121    fn sigmoid(&self, x: f64) -> f64 {
122        1.0 / (1.0 + (-x).exp())
123    }
124    
125    /// Clips probability to avoid numerical instability in log calculations.
126    fn clip_probability(&self, p: f64) -> f64 {
127        p.max(self.epsilon).min(1.0 - self.epsilon)
128    }
129    
130    /// Converts log-odds to probability using sigmoid.
131    fn log_odds_to_probability(&self, log_odds: f64) -> f64 {
132        self.sigmoid(log_odds)
133    }
134}
135
136impl Default for LogLossObjective {
137    fn default() -> Self {
138        Self::new()
139    }
140}
141
142impl Objective for LogLossObjective {
143    /// Computes binary cross-entropy loss.
144    fn loss(&self, y_true: &[f64], y_pred: &[f64]) -> ObjectiveResult<f64> {
145        self.binary_cross_entropy(y_true, y_pred)
146    }
147
148    /// Computes gradient of loss with respect to raw predictions.
149    /// 
150    /// Gradient = σ(raw) - y, where y ∈ {0, 1}
151    fn gradient(&self, y_true: &[f64], y_pred: &[f64]) -> ObjectiveResult<Vec<f64>> {
152        if y_true.len() != y_pred.len() {
153            return Err(ObjectiveError::InvalidInput(
154                "Targets and predictions must have the same length".to_string()
155            ));
156        }
157        
158        Ok(y_true.iter()
159            .zip(y_pred.iter())
160            .map(|(&y, &y_hat)| {
161                let p = self.sigmoid(y_hat);
162                p - y
163            })
164            .collect())
165    }
166    
167    /// Computes Hessian (second derivative) of loss.
168    /// 
169    /// Hessian = p·(1-p) where p = σ(raw)
170    fn hessian(&self, _y_true: &[f64], y_pred: &[f64]) -> ObjectiveResult<Vec<f64>> {
171        Ok(y_pred.iter()
172            .map(|&y_hat| {
173                let p = self.sigmoid(y_hat);
174                p * (1.0 - p)
175            })
176            .collect())
177    }
178    
179    /// Computes gradient and Hessian in a single pass for efficiency.
180    fn gradient_hessian(&self, y_true: &[f64], y_pred: &[f64]) -> ObjectiveResult<(Vec<f64>, Vec<f64>)> {
181        let gradient = self.gradient(y_true, y_pred)?;
182        
183        let hessian = y_pred.iter()
184            .map(|&y_hat| {
185                let p = self.sigmoid(y_hat);
186                p * (1.0 - p)
187            })
188            .collect();
189        
190        Ok((gradient, hessian))
191    }
192    
193    /// Transforms raw predictions to probabilities using sigmoid.
194    fn transform(&self, y_pred: &[f64]) -> Vec<f64> {
195        y_pred.iter()
196            .map(|&x| self.log_odds_to_probability(x))
197            .collect()
198    }
199    
200    fn name(&self) -> &str {
201        "log_loss"
202    }
203    
204    fn is_regression(&self) -> bool {
205        false
206    }
207    
208    fn is_classification(&self) -> bool {
209        true
210    }
211    
212    /// Computes optimal initial prediction as log-odds of positive class proportion.
213    fn initial_prediction(&self, y_true: &[f64]) -> ObjectiveResult<f64> {
214        if y_true.is_empty() {
215            return Err(ObjectiveError::InvalidInput("Empty targets".to_string()));
216        }
217        
218        // For classification, use log-odds of positive class proportion
219        let positive_count = y_true.iter().filter(|&&y| y == 1.0).count() as f64;
220        let positive_ratio = positive_count / y_true.len() as f64;
221        let positive_ratio_clamped = positive_ratio.max(self.epsilon).min(1.0 - self.epsilon);
222        
223        Ok((positive_ratio_clamped / (1.0 - positive_ratio_clamped)).ln())
224    }
225    
226    /// Validates that targets are binary (0.0 or 1.0).
227    fn validate_targets(&self, y_true: &[f64]) -> ObjectiveResult<()> {
228        if y_true.is_empty() {
229            return Err(ObjectiveError::InvalidInput("Empty targets".to_string()));
230        }
231        
232        // Check that targets are either 0 or 1
233        for &y in y_true {
234            if y != 0.0 && y != 1.0 {
235                return Err(ObjectiveError::InvalidInput(
236                    format!("Binary classification targets must be 0 or 1, got {}", y)
237                ));
238            }
239        }
240        
241        Ok(())
242    }
243}
244
245impl BinaryClassificationObjective for LogLossObjective {
246    /// Converts probabilities to binary class predictions using threshold.
247    fn predict_classes(&self, probabilities: &[f64], threshold: f64) -> Vec<f64> {
248        if !(0.0..=1.0).contains(&threshold) {
249            panic!("Threshold must be between 0 and 1");
250        }
251        
252        probabilities.iter()
253            .map(|&p| if p >= threshold { 1.0 } else { 0.0 })
254            .collect()
255    }
256   
257    /// Computes classification accuracy.
258    fn accuracy(&self, y_true: &[f64], y_pred_proba: &[f64], threshold: f64) -> ObjectiveResult<f64> {
259        if y_true.len() != y_pred_proba.len() {
260            return Err(ObjectiveError::InvalidInput(
261                "Targets and predictions must have the same length".to_string()
262            ));
263        }
264        
265        self.validate_targets(y_true)?;
266        
267        let predictions = self.predict_classes(y_pred_proba, threshold);
268        let correct = y_true.iter()
269            .zip(predictions.iter())
270            .filter(|&(&true_val, &pred_val)| (true_val - pred_val).abs() < 1e-10)
271            .count();
272        
273        Ok(correct as f64 / y_true.len() as f64)
274    }
275    
276    /// Computes binary cross-entropy loss with epsilon-clipping for stability.
277    fn binary_cross_entropy(&self, y_true: &[f64], y_pred_proba: &[f64]) -> ObjectiveResult<f64> {
278        if y_true.len() != y_pred_proba.len() {
279            return Err(ObjectiveError::InvalidInput(
280                "Targets and predictions must have the same length".to_string()
281            ));
282        }
283        
284        self.validate_targets(y_true)?;
285        
286        let loss: f64 = y_true.iter()
287            .zip(y_pred_proba.iter())
288            .map(|(&y, &p)| {
289                let p_clipped = self.clip_probability(p);
290                - (y * p_clipped.ln() + (1.0 - y) * (1.0 - p_clipped).ln())
291            })
292            .sum();
293        
294        Ok(loss / y_true.len() as f64)
295    }
296}
297
298impl fmt::Display for LogLossObjective {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        write!(f, "LogLoss(epsilon={})", self.epsilon)
301    }
302}
303