sklears_ensemble/adaboost/
logit_classifier.rs1use super::helpers::*;
4use super::types::*;
5use scirs2_core::ndarray::{Array1, Array2};
6use sklears_core::{
7 error::{Result, SklearsError},
8 prelude::{Fit, Predict},
9 traits::{Trained, Untrained},
10 types::Float,
11};
12use std::marker::PhantomData;
13
14use super::types::LogitBoostClassifier;
15
16impl LogitBoostClassifier<Untrained> {
17 pub fn new() -> Self {
19 Self {
20 config: LogitBoostConfig::default(),
21 state: PhantomData,
22 estimators_: None,
23 estimator_weights_: None,
24 classes_: None,
25 n_classes_: None,
26 n_features_in_: None,
27 intercept_: None,
28 }
29 }
30
31 pub fn n_estimators(mut self, n_estimators: usize) -> Self {
33 self.config.n_estimators = n_estimators;
34 self
35 }
36
37 pub fn learning_rate(mut self, learning_rate: Float) -> Self {
39 self.config.learning_rate = learning_rate;
40 self
41 }
42
43 pub fn random_state(mut self, random_state: u64) -> Self {
45 self.config.random_state = Some(random_state);
46 self
47 }
48
49 pub fn max_depth(mut self, max_depth: Option<usize>) -> Self {
51 self.config.max_depth = max_depth;
52 self
53 }
54
55 pub fn tolerance(mut self, tolerance: Float) -> Self {
57 self.config.tolerance = tolerance;
58 self
59 }
60
61 pub fn max_iter(mut self, max_iter: usize) -> Self {
63 self.config.max_iter = max_iter;
64 self
65 }
66
67 fn sigmoid(x: Float) -> Float {
69 1.0 / (1.0 + (-x).exp())
70 }
71
72 fn calculate_working_response_and_weights(
74 &self,
75 y: &Array1<Float>,
76 p: &Array1<Float>,
77 ) -> (Array1<Float>, Array1<Float>) {
78 let n_samples = y.len();
79 let mut z = Array1::<Float>::zeros(n_samples); let mut w = Array1::<Float>::zeros(n_samples); for i in 0..n_samples {
83 let p_i = p[i].clamp(1e-15, 1.0 - 1e-15); z[i] = (y[i] - p_i) / (p_i * (1.0 - p_i));
87
88 w[i] = p_i * (1.0 - p_i);
90 }
91
92 (z, w)
93 }
94
95 fn fit_weighted_tree(
97 &self,
98 x: &Array2<Float>,
99 z: &Array1<Float>,
100 w: &Array1<Float>,
101 ) -> Result<DecisionTreeRegressor<Trained>> {
102 let base_estimator =
104 DecisionTreeRegressor::new().max_depth(self.config.max_depth.unwrap_or(3));
105
106 base_estimator.fit(x, z)
110 }
111}
112
113impl Default for LogitBoostClassifier<Untrained> {
114 fn default() -> Self {
115 Self::new()
116 }
117}
118
119impl Fit<Array2<Float>, Array1<Float>> for LogitBoostClassifier<Untrained> {
120 type Fitted = LogitBoostClassifier<Trained>;
121 fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> Result<Self::Fitted> {
122 let (n_samples, n_features) = x.dim();
123 if n_samples == 0 {
124 return Err(SklearsError::InvalidInput(
125 "Cannot fit LogitBoost on empty dataset".to_string(),
126 ));
127 }
128 if self.config.n_estimators == 0 {
129 return Err(SklearsError::InvalidParameter {
130 name: "n_estimators".to_string(),
131 reason: "Number of estimators must be positive".to_string(),
132 });
133 }
134 let classes = AdaBoostClassifier::<Untrained>::find_classes(y);
135 let n_classes = classes.len();
136 if n_classes != 2 {
137 return Err(SklearsError::InvalidInput(
138 "LogitBoost currently supports only binary classification".to_string(),
139 ));
140 }
141 let mut y_binary = Array1::<Float>::zeros(n_samples);
142 for i in 0..n_samples {
143 y_binary[i] = if y[i] == classes[0] { 0.0 } else { 1.0 };
144 }
145 let class_1_count = y_binary.sum();
146 let class_0_count = n_samples as Float - class_1_count;
147 let initial_logit = if class_1_count > 0.0 && class_0_count > 0.0 {
148 (class_1_count / class_0_count).ln()
149 } else {
150 0.0
151 };
152 let mut f = Array1::<Float>::from_elem(n_samples, initial_logit);
153 let mut estimators = Vec::new();
154 let mut estimator_weights = Vec::new();
155 for _iteration in 0..self.config.n_estimators {
156 let mut p = Array1::<Float>::zeros(n_samples);
157 for i in 0..n_samples {
158 p[i] = Self::sigmoid(f[i]);
159 }
160 let (z, w) = self.calculate_working_response_and_weights(&y_binary, &p);
161 let gradient_norm: Float = z
162 .iter()
163 .zip(w.iter())
164 .map(|(&z_i, &w_i)| z_i * z_i * w_i)
165 .sum::<Float>()
166 .sqrt();
167 if gradient_norm < self.config.tolerance {
168 break;
169 }
170 let fitted_estimator = self.fit_weighted_tree(x, &z, &w)?;
171 let tree_predictions = fitted_estimator.predict(x)?;
172 for i in 0..n_samples {
173 f[i] += self.config.learning_rate * tree_predictions[i];
174 }
175 estimators.push(fitted_estimator);
176 estimator_weights.push(self.config.learning_rate);
177 }
178 if estimators.is_empty() {
179 return Err(SklearsError::InvalidInput(
180 "LogitBoost failed to fit any estimators".to_string(),
181 ));
182 }
183 Ok(LogitBoostClassifier {
184 config: self.config,
185 state: PhantomData,
186 estimators_: Some(estimators),
187 estimator_weights_: Some(Array1::from_vec(estimator_weights)),
188 classes_: Some(classes),
189 n_classes_: Some(n_classes),
190 n_features_in_: Some(n_features),
191 intercept_: Some(initial_logit),
192 })
193 }
194}
195
196impl LogitBoostClassifier<Trained> {
197 pub fn estimators(&self) -> &[DecisionTreeRegressor<Trained>] {
199 self.estimators_
200 .as_ref()
201 .expect("LogitBoost should be fitted")
202 }
203
204 pub fn estimator_weights(&self) -> &Array1<Float> {
206 self.estimator_weights_
207 .as_ref()
208 .expect("LogitBoost should be fitted")
209 }
210
211 pub fn classes(&self) -> &Array1<Float> {
213 self.classes_.as_ref().expect("LogitBoost should be fitted")
214 }
215
216 pub fn n_classes(&self) -> usize {
218 self.n_classes_.expect("LogitBoost should be fitted")
219 }
220
221 pub fn n_features_in(&self) -> usize {
223 self.n_features_in_.expect("LogitBoost should be fitted")
224 }
225
226 pub fn intercept(&self) -> Float {
228 self.intercept_.expect("LogitBoost should be fitted")
229 }
230
231 pub fn predict_proba(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
233 let (n_samples, n_features) = x.dim();
234
235 if n_features != self.n_features_in() {
236 return Err(SklearsError::FeatureMismatch {
237 expected: self.n_features_in(),
238 actual: n_features,
239 });
240 }
241
242 let estimators = self.estimators();
243 let weights = self.estimator_weights();
244 let intercept = self.intercept();
245
246 let mut f = Array1::<Float>::from_elem(n_samples, intercept);
248
249 for (estimator, &weight) in estimators.iter().zip(weights.iter()) {
250 let tree_predictions = estimator.predict(x)?;
251 for i in 0..n_samples {
252 f[i] += weight * tree_predictions[i];
253 }
254 }
255
256 let mut probabilities = Array2::<Float>::zeros((n_samples, 2));
258 for i in 0..n_samples {
259 let p1 = LogitBoostClassifier::<Untrained>::sigmoid(f[i]);
260 let p0 = 1.0 - p1;
261 probabilities[[i, 0]] = p0;
262 probabilities[[i, 1]] = p1;
263 }
264
265 Ok(probabilities)
266 }
267
268 pub fn decision_function(&self, x: &Array2<Float>) -> Result<Array1<Float>> {
270 let (n_samples, n_features) = x.dim();
271
272 if n_features != self.n_features_in() {
273 return Err(SklearsError::FeatureMismatch {
274 expected: self.n_features_in(),
275 actual: n_features,
276 });
277 }
278
279 let estimators = self.estimators();
280 let weights = self.estimator_weights();
281 let intercept = self.intercept();
282
283 let mut f = Array1::<Float>::from_elem(n_samples, intercept);
285
286 for (estimator, &weight) in estimators.iter().zip(weights.iter()) {
287 let tree_predictions = estimator.predict(x)?;
288 for i in 0..n_samples {
289 f[i] += weight * tree_predictions[i];
290 }
291 }
292
293 Ok(f)
294 }
295}
296
297impl Predict<Array2<Float>, Array1<Float>> for LogitBoostClassifier<Trained> {
298 fn predict(&self, x: &Array2<Float>) -> Result<Array1<Float>> {
299 let probas = self.predict_proba(x)?;
300 let classes = self.classes();
301 let mut predictions = Array1::<Float>::zeros(probas.nrows());
302 for (i, row) in probas.rows().into_iter().enumerate() {
303 let max_idx = row
304 .iter()
305 .enumerate()
306 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
307 .map(|(idx, _)| idx)
308 .unwrap_or(0);
309 predictions[i] = classes[max_idx];
310 }
311 Ok(predictions)
312 }
313}