1use scirs2_core::ndarray::{Array1, Array2};
8use sklears_core::{
9 error::{Result, SklearsError},
10 traits::{Fit, Predict},
11 types::Float,
12};
13
14#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum LossFunction {
17 SquaredLoss,
19 AbsoluteLoss,
21 HuberLoss,
23 QuantileLoss,
25 LogisticLoss,
27 DevianceLoss,
29 ExponentialLoss,
31 ModifiedHuberLoss,
33 PseudoHuber,
35 Fair,
37 LogCosh,
39 EpsilonInsensitive,
41 Tukey,
43 Cauchy,
45 Welsch,
47}
48
49impl LossFunction {
50 pub fn loss(&self, y_true: Float, y_pred: Float) -> Float {
52 match self {
53 LossFunction::SquaredLoss => 0.5 * (y_true - y_pred).powi(2),
54 LossFunction::AbsoluteLoss => (y_true - y_pred).abs(),
55 LossFunction::HuberLoss => {
56 let delta = 1.0;
57 let residual = y_true - y_pred;
58 if residual.abs() <= delta {
59 0.5 * residual.powi(2)
60 } else {
61 delta * (residual.abs() - 0.5 * delta)
62 }
63 }
64 LossFunction::LogisticLoss => {
65 let z = y_true * y_pred;
66 if z > 0.0 {
67 (1.0 + (-z).exp()).ln()
68 } else {
69 -z + (1.0 + z.exp()).ln()
70 }
71 }
72 LossFunction::PseudoHuber => {
73 let delta: Float = 1.0;
74 let residual = y_true - y_pred;
75 delta.powi(2) * ((1.0 + (residual / delta).powi(2)).sqrt() - 1.0)
76 }
77 LossFunction::Fair => {
78 let c = 1.0;
79 let residual = y_true - y_pred;
80 c * (residual.abs() / c - (1.0 + residual.abs() / c).ln())
81 }
82 LossFunction::LogCosh => {
83 let residual = y_true - y_pred;
84 residual.cosh().ln()
85 }
86 _ => (y_true - y_pred).powi(2), }
88 }
89
90 pub fn gradient(&self, y_true: Float, y_pred: Float) -> Float {
92 match self {
93 LossFunction::SquaredLoss => y_pred - y_true,
94 LossFunction::AbsoluteLoss => {
95 if y_pred > y_true {
96 1.0
97 } else if y_pred < y_true {
98 -1.0
99 } else {
100 0.0
101 }
102 }
103 LossFunction::HuberLoss => {
104 let delta = 1.0;
105 let residual = y_pred - y_true;
106 if residual.abs() <= delta {
107 residual
108 } else {
109 delta * residual.signum()
110 }
111 }
112 LossFunction::LogisticLoss => {
113 let z = y_true * y_pred;
114 -y_true / (1.0 + z.exp())
115 }
116 LossFunction::PseudoHuber => {
117 let delta = 1.0;
118 let residual = y_pred - y_true;
119 residual / (1.0 + (residual / delta).powi(2)).sqrt()
120 }
121 LossFunction::Fair => {
122 let c = 1.0;
123 let residual = y_pred - y_true;
124 residual / (1.0 + residual.abs() / c)
125 }
126 LossFunction::LogCosh => {
127 let residual = y_pred - y_true;
128 residual.tanh()
129 }
130 _ => y_pred - y_true, }
132 }
133
134 pub fn hessian(&self, y_true: Float, y_pred: Float) -> Float {
136 match self {
137 LossFunction::SquaredLoss => 1.0,
138 LossFunction::AbsoluteLoss => 0.0, LossFunction::HuberLoss => {
140 let delta = 1.0;
141 let residual = y_pred - y_true;
142 if residual.abs() <= delta {
143 1.0
144 } else {
145 0.0
146 }
147 }
148 LossFunction::LogisticLoss => {
149 let z = y_true * y_pred;
150 let exp_z = z.exp();
151 y_true.powi(2) * exp_z / (1.0 + exp_z).powi(2)
152 }
153 LossFunction::PseudoHuber => {
154 let delta = 1.0;
155 let residual = y_pred - y_true;
156 1.0 / (1.0 + (residual / delta).powi(2)).powf(1.5)
157 }
158 LossFunction::Fair => {
159 let c = 1.0;
160 let residual = y_pred - y_true;
161 c / (c + residual.abs()).powi(2)
162 }
163 LossFunction::LogCosh => {
164 let residual = y_pred - y_true;
165 1.0 - residual.tanh().powi(2)
166 }
167 _ => 1.0, }
169 }
170
171 pub fn is_robust(&self) -> bool {
173 matches!(
174 self,
175 LossFunction::AbsoluteLoss
176 | LossFunction::HuberLoss
177 | LossFunction::PseudoHuber
178 | LossFunction::Fair
179 | LossFunction::Tukey
180 | LossFunction::Cauchy
181 | LossFunction::Welsch
182 )
183 }
184}
185
186#[derive(Debug, Clone)]
188pub enum GradientBoostingTree {
189 DecisionTree,
191 HistogramTree,
193 NeuralNetwork,
195}
196
197#[derive(Debug, Clone)]
199pub struct GradientBoostingConfig {
200 pub n_estimators: usize,
201 pub learning_rate: Float,
202 pub max_depth: usize,
203 pub min_samples_split: usize,
204 pub min_samples_leaf: usize,
205 pub subsample: Float,
206 pub loss_function: LossFunction,
207 pub random_state: Option<u64>,
208 pub tree_type: GradientBoostingTree,
209 pub early_stopping: Option<usize>,
210 pub validation_fraction: Float,
211}
212
213impl Default for GradientBoostingConfig {
214 fn default() -> Self {
215 Self {
216 n_estimators: 100,
217 learning_rate: 0.1,
218 max_depth: 3,
219 min_samples_split: 2,
220 min_samples_leaf: 1,
221 subsample: 1.0,
222 loss_function: LossFunction::SquaredLoss,
223 random_state: None,
224 tree_type: GradientBoostingTree::DecisionTree,
225 early_stopping: None,
226 validation_fraction: 0.1,
227 }
228 }
229}
230
231#[derive(Debug, Clone)]
233pub struct FeatureImportanceMetrics {
234 pub gain: Array1<Float>,
235 pub frequency: Array1<Float>,
236 pub cover: Array1<Float>,
237}
238
239impl FeatureImportanceMetrics {
240 pub fn new(n_features: usize) -> Self {
241 Self {
242 gain: Array1::zeros(n_features),
243 frequency: Array1::zeros(n_features),
244 cover: Array1::zeros(n_features),
245 }
246 }
247}
248
249#[derive(Debug, Clone)]
251pub struct GradientBoostingClassifier {
252 config: GradientBoostingConfig,
253}
254
255impl GradientBoostingClassifier {
256 pub fn new(config: GradientBoostingConfig) -> Self {
257 Self { config }
258 }
259
260 pub fn builder() -> GradientBoostingClassifierBuilder {
261 GradientBoostingClassifierBuilder::default()
262 }
263}
264
265#[derive(Debug, Clone)]
267pub struct TrainedGradientBoostingClassifier {
268 config: GradientBoostingConfig,
269 feature_importance: FeatureImportanceMetrics,
270 n_features: usize,
271 classes: Array1<Float>,
272}
273
274impl Fit<Array2<Float>, Array1<Float>> for GradientBoostingClassifier {
275 type Fitted = TrainedGradientBoostingClassifier;
276
277 fn fit(self, _X: &Array2<Float>, _y: &Array1<Float>) -> Result<Self::Fitted> {
278 let n_features = _X.ncols();
280 let classes = Array1::from_vec(vec![0.0, 1.0]); Ok(TrainedGradientBoostingClassifier {
283 config: self.config,
284 feature_importance: FeatureImportanceMetrics::new(n_features),
285 n_features,
286 classes,
287 })
288 }
289}
290
291impl Predict<Array2<Float>, Array1<Float>> for TrainedGradientBoostingClassifier {
292 fn predict(&self, X: &Array2<Float>) -> Result<Array1<Float>> {
293 if X.ncols() != self.n_features {
294 return Err(SklearsError::FeatureMismatch {
295 expected: self.n_features,
296 actual: X.ncols(),
297 });
298 }
299
300 Ok(Array1::zeros(X.nrows()))
302 }
303}
304
305impl TrainedGradientBoostingClassifier {
306 pub fn feature_importances_gain(&self) -> &Array1<Float> {
307 &self.feature_importance.gain
308 }
309
310 pub fn feature_importances_frequency(&self) -> &Array1<Float> {
311 &self.feature_importance.frequency
312 }
313
314 pub fn feature_importances_cover(&self) -> &Array1<Float> {
315 &self.feature_importance.cover
316 }
317}
318
319#[derive(Debug, Clone)]
321pub struct GradientBoostingRegressor {
322 config: GradientBoostingConfig,
323}
324
325impl GradientBoostingRegressor {
326 pub fn new(config: GradientBoostingConfig) -> Self {
327 Self { config }
328 }
329
330 pub fn builder() -> GradientBoostingRegressorBuilder {
331 GradientBoostingRegressorBuilder::default()
332 }
333}
334
335#[derive(Debug, Clone)]
337pub struct TrainedGradientBoostingRegressor {
338 config: GradientBoostingConfig,
339 feature_importance: FeatureImportanceMetrics,
340 n_features: usize,
341}
342
343impl Fit<Array2<Float>, Array1<Float>> for GradientBoostingRegressor {
344 type Fitted = TrainedGradientBoostingRegressor;
345
346 fn fit(self, X: &Array2<Float>, _y: &Array1<Float>) -> Result<Self::Fitted> {
347 let n_features = X.ncols();
348
349 Ok(TrainedGradientBoostingRegressor {
350 config: self.config,
351 feature_importance: FeatureImportanceMetrics::new(n_features),
352 n_features,
353 })
354 }
355}
356
357impl Predict<Array2<Float>, Array1<Float>> for TrainedGradientBoostingRegressor {
358 fn predict(&self, X: &Array2<Float>) -> Result<Array1<Float>> {
359 if X.ncols() != self.n_features {
360 return Err(SklearsError::FeatureMismatch {
361 expected: self.n_features,
362 actual: X.ncols(),
363 });
364 }
365
366 Ok(Array1::zeros(X.nrows()))
368 }
369}
370
371impl TrainedGradientBoostingRegressor {
372 pub fn feature_importances_gain(&self) -> &Array1<Float> {
373 &self.feature_importance.gain
374 }
375
376 pub fn feature_importances_frequency(&self) -> &Array1<Float> {
377 &self.feature_importance.frequency
378 }
379
380 pub fn feature_importances_cover(&self) -> &Array1<Float> {
381 &self.feature_importance.cover
382 }
383}
384
385#[derive(Debug, Default)]
387pub struct GradientBoostingClassifierBuilder {
388 config: GradientBoostingConfig,
389}
390
391impl GradientBoostingClassifierBuilder {
392 pub fn n_estimators(mut self, n_estimators: usize) -> Self {
393 self.config.n_estimators = n_estimators;
394 self
395 }
396
397 pub fn learning_rate(mut self, learning_rate: Float) -> Self {
398 self.config.learning_rate = learning_rate;
399 self
400 }
401
402 pub fn max_depth(mut self, max_depth: usize) -> Self {
403 self.config.max_depth = max_depth;
404 self
405 }
406
407 pub fn loss_function(mut self, loss_function: LossFunction) -> Self {
408 self.config.loss_function = loss_function;
409 self
410 }
411
412 pub fn tree_type(mut self, tree_type: GradientBoostingTree) -> Self {
413 self.config.tree_type = tree_type;
414 self
415 }
416
417 pub fn build(self) -> GradientBoostingClassifier {
418 GradientBoostingClassifier::new(self.config)
419 }
420}
421
422#[derive(Debug, Default)]
424pub struct GradientBoostingRegressorBuilder {
425 config: GradientBoostingConfig,
426}
427
428impl GradientBoostingRegressorBuilder {
429 pub fn n_estimators(mut self, n_estimators: usize) -> Self {
430 self.config.n_estimators = n_estimators;
431 self
432 }
433
434 pub fn learning_rate(mut self, learning_rate: Float) -> Self {
435 self.config.learning_rate = learning_rate;
436 self
437 }
438
439 pub fn max_depth(mut self, max_depth: usize) -> Self {
440 self.config.max_depth = max_depth;
441 self
442 }
443
444 pub fn loss_function(mut self, loss_function: LossFunction) -> Self {
445 self.config.loss_function = loss_function;
446 self
447 }
448
449 pub fn tree_type(mut self, tree_type: GradientBoostingTree) -> Self {
450 self.config.tree_type = tree_type;
451 self
452 }
453
454 pub fn build(self) -> GradientBoostingRegressor {
455 GradientBoostingRegressor::new(self.config)
456 }
457}