aprender-core 0.29.1

Next-generation machine learning library in pure Rust
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431

#[test]
fn test_gradient_boosting_n_estimators_effect() {
    let x = Matrix::from_vec(
        6,
        2,
        vec![
            0.0, 0.0, 0.1, 0.1, 0.0, 0.2, // class 0
            1.0, 1.0, 0.9, 0.9, 1.0, 0.8, // class 1
        ],
    )
    .expect("Matrix creation should succeed in tests");
    let y = vec![0, 0, 0, 1, 1, 1];

    // Few estimators
    let mut gbm_few = GradientBoostingClassifier::new()
        .with_n_estimators(5)
        .with_learning_rate(0.1);
    gbm_few.fit(&x, &y).expect("fit should succeed");

    // Many estimators
    let mut gbm_many = GradientBoostingClassifier::new()
        .with_n_estimators(50)
        .with_learning_rate(0.1);
    gbm_many.fit(&x, &y).expect("fit should succeed");

    // More estimators should generally lead to more trees (up to limit)
    assert!(gbm_many.n_estimators() >= gbm_few.n_estimators());
}

#[test]
fn test_gradient_boosting_max_depth_effect() {
    let x = Matrix::from_vec(
        6,
        2,
        vec![
            0.0, 0.0, 0.1, 0.1, 0.0, 0.2, // class 0
            1.0, 1.0, 0.9, 0.9, 1.0, 0.8, // class 1
        ],
    )
    .expect("Matrix creation should succeed in tests");
    let y = vec![0, 0, 0, 1, 1, 1];

    // Shallow trees
    let mut gbm_shallow = GradientBoostingClassifier::new()
        .with_n_estimators(20)
        .with_max_depth(1);
    gbm_shallow.fit(&x, &y).expect("fit should succeed");
    let pred_shallow = gbm_shallow.predict(&x).expect("predict should succeed");

    // Deeper trees
    let mut gbm_deep = GradientBoostingClassifier::new()
        .with_n_estimators(20)
        .with_max_depth(5);
    gbm_deep.fit(&x, &y).expect("fit should succeed");
    let pred_deep = gbm_deep.predict(&x).expect("predict should succeed");

    // Both should make predictions
    assert_eq!(pred_shallow.len(), 6);
    assert_eq!(pred_deep.len(), 6);
}

#[test]
fn test_gradient_boosting_binary_classification() {
    // More realistic binary classification problem
    let x = Matrix::from_vec(
        10,
        2,
        vec![
            // Class 0 (bottom-left cluster)
            0.0, 0.0, 0.1, 0.1, 0.0, 0.2, 0.2, 0.0, 0.1, 0.2, // Class 1 (top-right cluster)
            1.0, 1.0, 0.9, 0.9, 1.0, 0.8, 0.8, 1.0, 0.9, 1.1,
        ],
    )
    .expect("Matrix creation should succeed in tests");
    let y = vec![0, 0, 0, 0, 0, 1, 1, 1, 1, 1];

    let mut gbm = GradientBoostingClassifier::new()
        .with_n_estimators(30)
        .with_learning_rate(0.1)
        .with_max_depth(3);

    gbm.fit(&x, &y).expect("fit should succeed");
    let predictions = gbm.predict(&x).expect("predict should succeed");

    // Should achieve reasonable accuracy
    let correct = predictions
        .iter()
        .zip(y.iter())
        .filter(|(pred, true_label)| *pred == *true_label)
        .count();

    // Should get at least 7 out of 10 correct for well-separated clusters
    assert!(
        correct >= 7,
        "Expected at least 7/10 correct, got {correct}/10"
    );
}

#[test]
fn test_gradient_boosting_default() {
    let gbm1 = GradientBoostingClassifier::new();
    let gbm2 = GradientBoostingClassifier::default();

    assert_eq!(
        gbm1.configured_n_estimators(),
        gbm2.configured_n_estimators()
    );
    assert!((gbm1.learning_rate() - gbm2.learning_rate()).abs() < 1e-6);
    assert_eq!(gbm1.max_depth(), gbm2.max_depth());
}

// ========================================================================
// Decision Tree Regression Tests (RED Phase - Issue #29)
// ========================================================================

#[test]
fn test_regression_tree_creation() {
    let tree = DecisionTreeRegressor::new();
    assert!(tree.tree.is_none());
    assert!(tree.max_depth.is_none());
}

#[test]
fn test_regression_tree_with_max_depth() {
    let tree = DecisionTreeRegressor::new().with_max_depth(5);
    assert_eq!(tree.max_depth, Some(5));
}

#[test]
fn test_regression_tree_fit_simple_linear() {
    // Simple linear relationship: y = 2x + 1
    let x = Matrix::from_vec(5, 1, vec![1.0, 2.0, 3.0, 4.0, 5.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[3.0, 5.0, 7.0, 9.0, 11.0]);

    let mut tree = DecisionTreeRegressor::new().with_max_depth(3);
    tree.fit(&x, &y).expect("fit should succeed");

    let predictions = tree.predict(&x);

    // Tree should learn the linear pattern reasonably well
    let pred_slice = predictions.as_slice();
    let y_slice = y.as_slice();
    for i in 0..predictions.len() {
        assert!(
            (pred_slice[i] - y_slice[i]).abs() < 2.0,
            "Prediction {} too far from true value {}",
            pred_slice[i],
            y_slice[i]
        );
    }
}

#[test]
fn test_regression_tree_predict_nonlinear() {
    // Quadratic relationship: y = x^2
    let x = Matrix::from_vec(5, 1, vec![1.0, 2.0, 3.0, 4.0, 5.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[1.0, 4.0, 9.0, 16.0, 25.0]);

    let mut tree = DecisionTreeRegressor::new().with_max_depth(4);
    tree.fit(&x, &y).expect("fit should succeed");

    let predictions = tree.predict(&x);

    // Should capture quadratic pattern with enough depth
    let mut mse_sum = 0.0_f32;
    let pred_slice = predictions.as_slice();
    let y_slice = y.as_slice();
    for i in 0..predictions.len() {
        mse_sum += (pred_slice[i] - y_slice[i]).powi(2);
    }
    let mse = mse_sum / predictions.len() as f32;

    assert!(mse < 50.0, "MSE {mse} too high for quadratic fit");
}

#[test]
fn test_regression_tree_score() {
    // Perfect predictions should give R² = 1.0
    let x = Matrix::from_vec(4, 1, vec![1.0, 2.0, 3.0, 4.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[2.0, 4.0, 6.0, 8.0]);

    let mut tree = DecisionTreeRegressor::new().with_max_depth(3);
    tree.fit(&x, &y).expect("fit should succeed");

    let r2 = tree.score(&x, &y);

    // R² should be high for training data
    assert!(r2 > 0.5, "R² score {r2} too low");
    assert!(r2 <= 1.0, "R² score {r2} exceeds maximum");
}

#[test]
fn test_regression_tree_max_depth_limits_complexity() {
    let x = Matrix::from_vec(8, 1, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0]);

    // Shallow tree
    let mut tree_shallow = DecisionTreeRegressor::new().with_max_depth(1);
    tree_shallow.fit(&x, &y).expect("fit should succeed");
    let depth_shallow = tree_shallow
        .tree
        .as_ref()
        .expect("tree should exist after fit")
        .depth();
    assert!(
        depth_shallow <= 1,
        "Shallow tree depth {depth_shallow} exceeds max"
    );

    // Deep tree
    let mut tree_deep = DecisionTreeRegressor::new().with_max_depth(5);
    tree_deep.fit(&x, &y).expect("fit should succeed");
    let depth_deep = tree_deep
        .tree
        .as_ref()
        .expect("tree should exist after fit")
        .depth();
    assert!(depth_deep <= 5, "Deep tree depth {depth_deep} exceeds max");

    // Deeper tree should fit better
    let r2_shallow = tree_shallow.score(&x, &y);
    let r2_deep = tree_deep.score(&x, &y);
    assert!(
        r2_deep >= r2_shallow,
        "Deeper tree R²={r2_deep} should be >= shallow tree R²={r2_shallow}"
    );
}

#[test]
#[should_panic(expected = "Model not fitted")]
fn test_regression_tree_predict_before_fit_panics() {
    let tree = DecisionTreeRegressor::new();
    let x =
        Matrix::from_vec(2, 1, vec![1.0, 2.0]).expect("Matrix creation should succeed in tests");
    let _ = tree.predict(&x); // Should panic
}

#[test]
fn test_regression_tree_multidimensional_features() {
    // 2D features: y = x1 + 2*x2
    let x = Matrix::from_vec(
        6,
        2,
        vec![
            1.0, 1.0, // y = 3
            2.0, 1.0, // y = 4
            1.0, 2.0, // y = 5
            2.0, 2.0, // y = 6
            3.0, 1.0, // y = 5
            1.0, 3.0, // y = 7
        ],
    )
    .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[3.0, 4.0, 5.0, 6.0, 5.0, 7.0]);

    let mut tree = DecisionTreeRegressor::new().with_max_depth(4);
    tree.fit(&x, &y).expect("fit should succeed");

    let r2 = tree.score(&x, &y);
    assert!(r2 > 0.5, "R² score {r2} too low for 2D features");
}

#[test]
fn test_regression_tree_constant_target() {
    // All y values the same
    let x = Matrix::from_vec(4, 1, vec![1.0, 2.0, 3.0, 4.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[5.0, 5.0, 5.0, 5.0]);

    let mut tree = DecisionTreeRegressor::new();
    tree.fit(&x, &y).expect("fit should succeed");

    let predictions = tree.predict(&x);

    // Should predict the constant value
    for &pred in predictions.as_slice() {
        assert!(
            (pred - 5.0).abs() < 1e-5,
            "Prediction {pred} should be 5.0 for constant target"
        );
    }
}

#[test]
fn test_regression_tree_single_sample() {
    let x = Matrix::from_vec(1, 1, vec![5.0]).expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[10.0]);

    let mut tree = DecisionTreeRegressor::new();
    tree.fit(&x, &y).expect("fit should succeed");

    let predictions = tree.predict(&x);
    assert_eq!(predictions.len(), 1);
    assert!((predictions[0] - 10.0).abs() < 1e-5);
}

#[test]
fn test_regression_tree_fit_validation() {
    let x = Matrix::from_vec(3, 1, vec![1.0, 2.0, 3.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[1.0, 2.0]); // Wrong size

    let mut tree = DecisionTreeRegressor::new();
    let result = tree.fit(&x, &y);

    assert!(result.is_err(), "Should error on mismatched dimensions");
}

#[test]
fn test_regression_tree_zero_samples() {
    let x = Matrix::from_vec(0, 1, vec![]).expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[]);

    let mut tree = DecisionTreeRegressor::new();
    let result = tree.fit(&x, &y);

    assert!(result.is_err(), "Should error on zero samples");
}

#[test]
fn test_regression_tree_min_samples_split() {
    let x = Matrix::from_vec(6, 1, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[1.0, 4.0, 9.0, 16.0, 25.0, 36.0]);

    // Tree with min_samples_split=4 should not split nodes with fewer samples
    let mut tree = DecisionTreeRegressor::new()
        .with_max_depth(5)
        .with_min_samples_split(4);

    tree.fit(&x, &y).expect("fit should succeed");

    // Should still fit successfully
    let r2 = tree.score(&x, &y);
    assert!(r2 > 0.0, "Tree with min_samples_split should still fit");
}

#[test]
fn test_regression_tree_min_samples_leaf() {
    let x = Matrix::from_vec(8, 1, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0]);

    // Tree with min_samples_leaf=3 should ensure leaves have at least 3 samples
    let mut tree = DecisionTreeRegressor::new()
        .with_max_depth(5)
        .with_min_samples_leaf(3);

    tree.fit(&x, &y).expect("fit should succeed");

    // Should fit without error
    let predictions = tree.predict(&x);
    assert_eq!(predictions.len(), 8);
}

#[test]
fn test_regression_tree_default() {
    let tree1 = DecisionTreeRegressor::new();
    let tree2 = DecisionTreeRegressor::default();

    assert_eq!(tree1.max_depth, tree2.max_depth);
    assert_eq!(tree1.tree.is_none(), tree2.tree.is_none());
}

#[test]
fn test_regression_tree_comparison_with_linear_regression() {
    // On perfectly linear data, both should perform well
    let x = Matrix::from_vec(5, 1, vec![1.0, 2.0, 3.0, 4.0, 5.0])
        .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[2.0, 4.0, 6.0, 8.0, 10.0]);

    // Train tree
    let mut tree = DecisionTreeRegressor::new().with_max_depth(4);
    tree.fit(&x, &y).expect("fit should succeed");
    let tree_r2 = tree.score(&x, &y);

    // Train linear model
    let mut lr = crate::linear_model::LinearRegression::new();
    lr.fit(&x, &y).expect("fit should succeed");
    let lr_r2 = lr.score(&x, &y);

    // Both should achieve high R² on linear data
    assert!(tree_r2 > 0.9, "Tree R² {tree_r2} too low on linear data");
    assert!(lr_r2 > 0.99, "Linear regression R² {lr_r2} too low");
}

// ===================================================================
// Random Forest Regression Tests
// ===================================================================

#[test]
fn test_random_forest_regressor_creation() {
    let rf = RandomForestRegressor::new(10);
    assert_eq!(rf.n_estimators, 10);
    assert!(rf.trees.is_empty());
    assert!(rf.max_depth.is_none());
}

#[test]
fn test_random_forest_regressor_with_max_depth() {
    let rf = RandomForestRegressor::new(5).with_max_depth(3);
    assert_eq!(rf.max_depth, Some(3));
}

#[test]
fn test_random_forest_regressor_fit_simple_linear() {
    // Simple linear data: y = 2x + 1
    let x = Matrix::from_vec(
        10,
        1,
        vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
    )
    .expect("Matrix creation should succeed in tests");
    let y = Vector::from_slice(&[3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0]);

    let mut rf = RandomForestRegressor::new(10).with_max_depth(5);
    rf.fit(&x, &y).expect("fit should succeed");

    // Should have trained 10 trees
    assert_eq!(rf.trees.len(), 10);

    // Should make reasonable predictions
    let _predictions = rf.predict(&x);
    let r2 = rf.score(&x, &y);
    assert!(r2 > 0.8, "R² should be high on training data: {r2}");
}