1use alloc::vec;
9use alloc::vec::Vec;
10
11use crate::histogram::BinEdges;
12
13#[derive(Debug, Clone)]
16pub struct FeatureHistogram {
17 pub grad_sums: Vec<f64>,
22 pub hess_sums: Vec<f64>,
24 pub counts: Vec<u64>,
26 pub edges: BinEdges,
28 decay_scale: f64,
38}
39
40impl FeatureHistogram {
41 pub fn new(edges: BinEdges) -> Self {
43 let n = edges.n_bins();
44 Self {
45 grad_sums: vec![0.0; n],
46 hess_sums: vec![0.0; n],
47 counts: vec![0; n],
48 edges,
49 decay_scale: 1.0,
50 }
51 }
52
53 #[inline]
58 pub fn accumulate(&mut self, value: f64, gradient: f64, hessian: f64) {
59 let bin = self.edges.find_bin(value);
60 self.grad_sums[bin] += gradient;
61 self.hess_sums[bin] += hessian;
62 self.counts[bin] += 1;
63 }
64
65 pub fn total_gradient(&self) -> f64 {
69 let raw = {
70 #[cfg(feature = "simd")]
71 {
72 crate::histogram::simd::sum_f64(&self.grad_sums)
73 }
74 #[cfg(not(feature = "simd"))]
75 {
76 self.grad_sums.iter().sum::<f64>()
77 }
78 };
79 raw * self.decay_scale
80 }
81
82 pub fn total_hessian(&self) -> f64 {
86 let raw = {
87 #[cfg(feature = "simd")]
88 {
89 crate::histogram::simd::sum_f64(&self.hess_sums)
90 }
91 #[cfg(not(feature = "simd"))]
92 {
93 self.hess_sums.iter().sum::<f64>()
94 }
95 };
96 raw * self.decay_scale
97 }
98
99 pub fn total_count(&self) -> u64 {
101 self.counts.iter().sum()
102 }
103
104 #[inline]
106 pub fn n_bins(&self) -> usize {
107 self.edges.n_bins()
108 }
109
110 #[inline]
124 pub fn accumulate_with_decay(&mut self, value: f64, gradient: f64, hessian: f64, alpha: f64) {
125 self.decay_scale *= alpha;
126 let inv_scale = 1.0 / self.decay_scale;
127 let bin = self.edges.find_bin(value);
128 self.grad_sums[bin] += gradient * inv_scale;
129 self.hess_sums[bin] += hessian * inv_scale;
130 self.counts[bin] += 1;
131
132 if self.decay_scale < 1e-100 {
136 self.materialize_decay();
137 }
138 }
139
140 #[inline]
146 pub fn materialize_decay(&mut self) {
147 if (self.decay_scale - 1.0).abs() > f64::EPSILON {
148 for i in 0..self.grad_sums.len() {
149 self.grad_sums[i] *= self.decay_scale;
150 self.hess_sums[i] *= self.decay_scale;
151 }
152 self.decay_scale = 1.0;
153 }
154 }
155
156 pub fn reset(&mut self) {
158 self.grad_sums.fill(0.0);
159 self.hess_sums.fill(0.0);
160 self.counts.fill(0);
161 self.decay_scale = 1.0;
162 }
163
164 pub fn subtract(&self, child: &FeatureHistogram) -> FeatureHistogram {
176 debug_assert_eq!(
177 self.n_bins(),
178 child.n_bins(),
179 "cannot subtract histograms with different bin counts"
180 );
181 let n = self.n_bins();
182 let s_self = self.decay_scale;
183 let s_child = child.decay_scale;
184
185 let scales_trivial =
187 (s_self - 1.0).abs() <= f64::EPSILON && (s_child - 1.0).abs() <= f64::EPSILON;
188
189 #[cfg(feature = "simd")]
190 {
191 if scales_trivial {
192 let mut grad_sums = vec![0.0; n];
193 let mut hess_sums = vec![0.0; n];
194 let mut counts = vec![0u64; n];
195 crate::histogram::simd::subtract_f64(
196 &self.grad_sums,
197 &child.grad_sums,
198 &mut grad_sums,
199 );
200 crate::histogram::simd::subtract_f64(
201 &self.hess_sums,
202 &child.hess_sums,
203 &mut hess_sums,
204 );
205 crate::histogram::simd::subtract_u64(&self.counts, &child.counts, &mut counts);
206 return FeatureHistogram {
207 grad_sums,
208 hess_sums,
209 counts,
210 edges: self.edges.clone(),
211 decay_scale: 1.0,
212 };
213 }
214 }
215
216 let _ = scales_trivial; let mut grad_sums = Vec::with_capacity(n);
219 let mut hess_sums = Vec::with_capacity(n);
220 let mut counts = Vec::with_capacity(n);
221
222 for i in 0..n {
223 grad_sums.push(self.grad_sums[i] * s_self - child.grad_sums[i] * s_child);
224 hess_sums.push(self.hess_sums[i] * s_self - child.hess_sums[i] * s_child);
225 counts.push(self.counts[i].saturating_sub(child.counts[i]));
226 }
227
228 FeatureHistogram {
229 grad_sums,
230 hess_sums,
231 counts,
232 edges: self.edges.clone(),
233 decay_scale: 1.0,
234 }
235 }
236}
237
238#[derive(Debug, Clone)]
243pub struct LeafHistograms {
244 pub histograms: Vec<FeatureHistogram>,
245}
246
247impl LeafHistograms {
248 pub fn new(edges_per_feature: &[BinEdges]) -> Self {
250 let histograms = edges_per_feature
251 .iter()
252 .map(|edges| FeatureHistogram::new(edges.clone()))
253 .collect();
254 Self { histograms }
255 }
256
257 pub fn accumulate(&mut self, features: &[f64], gradient: f64, hessian: f64) {
263 debug_assert_eq!(
264 features.len(),
265 self.histograms.len(),
266 "feature count mismatch: got {} features but have {} histograms",
267 features.len(),
268 self.histograms.len(),
269 );
270 for (hist, &value) in self.histograms.iter_mut().zip(features.iter()) {
271 hist.accumulate(value, gradient, hessian);
272 }
273 }
274
275 pub fn accumulate_with_decay(
279 &mut self,
280 features: &[f64],
281 gradient: f64,
282 hessian: f64,
283 alpha: f64,
284 ) {
285 debug_assert_eq!(
286 features.len(),
287 self.histograms.len(),
288 "feature count mismatch: got {} features but have {} histograms",
289 features.len(),
290 self.histograms.len(),
291 );
292 for (hist, &value) in self.histograms.iter_mut().zip(features.iter()) {
293 hist.accumulate_with_decay(value, gradient, hessian, alpha);
294 }
295 }
296
297 pub fn total_count(&self) -> u64 {
302 self.histograms.first().map_or(0, |h| h.total_count())
303 }
304
305 #[inline]
307 pub fn n_features(&self) -> usize {
308 self.histograms.len()
309 }
310
311 pub fn materialize_decay(&mut self) {
316 for hist in &mut self.histograms {
317 hist.materialize_decay();
318 }
319 }
320
321 pub fn reset(&mut self) {
323 for hist in &mut self.histograms {
324 hist.reset();
325 }
326 }
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332 use crate::histogram::BinEdges;
333
334 fn four_bin_edges() -> BinEdges {
337 BinEdges {
338 edges: vec![2.0, 4.0, 6.0],
339 }
340 }
341
342 #[test]
343 fn feature_histogram_new_is_zeroed() {
344 let h = FeatureHistogram::new(four_bin_edges());
345 assert_eq!(h.n_bins(), 4);
346 assert_eq!(h.total_gradient(), 0.0);
347 assert_eq!(h.total_hessian(), 0.0);
348 assert_eq!(h.total_count(), 0);
349 }
350
351 #[test]
352 fn feature_histogram_accumulate_single() {
353 let mut h = FeatureHistogram::new(four_bin_edges());
354 h.accumulate(3.0, 1.5, 0.5);
356 assert_eq!(h.counts[1], 1);
357 assert_eq!(h.grad_sums[1], 1.5);
358 assert_eq!(h.hess_sums[1], 0.5);
359 assert_eq!(h.total_count(), 1);
360 }
361
362 #[test]
363 fn feature_histogram_accumulate_multiple() {
364 let mut h = FeatureHistogram::new(four_bin_edges());
365 h.accumulate(1.0, 0.1, 0.01);
367 h.accumulate(0.5, 0.2, 0.02);
368 h.accumulate(2.0, 0.3, 0.03);
370 h.accumulate(5.0, 0.4, 0.04);
372 h.accumulate(7.0, 0.5, 0.05);
374 h.accumulate(8.0, 0.6, 0.06);
375
376 assert_eq!(h.counts, vec![2, 1, 1, 2]);
377 assert_eq!(h.total_count(), 6);
378
379 let total_grad = 0.1 + 0.2 + 0.3 + 0.4 + 0.5 + 0.6;
380 assert!((h.total_gradient() - total_grad).abs() < 1e-12);
381
382 let total_hess = 0.01 + 0.02 + 0.03 + 0.04 + 0.05 + 0.06;
383 assert!((h.total_hessian() - total_hess).abs() < 1e-12);
384 }
385
386 #[test]
387 fn feature_histogram_subtraction_trick() {
388 let edges = four_bin_edges();
389 let mut parent = FeatureHistogram::new(edges.clone());
390 let mut child = FeatureHistogram::new(edges);
391
392 parent.accumulate(1.0, 0.1, 0.01);
394 parent.accumulate(3.0, 0.2, 0.02);
395 parent.accumulate(5.0, 0.3, 0.03);
396 parent.accumulate(7.0, 0.4, 0.04);
397 parent.accumulate(1.5, 0.5, 0.05);
398 parent.accumulate(5.5, 0.6, 0.06);
399
400 child.accumulate(1.0, 0.1, 0.01);
402 child.accumulate(5.0, 0.3, 0.03);
403 child.accumulate(7.0, 0.4, 0.04);
404
405 let sibling = parent.subtract(&child);
406
407 assert_eq!(sibling.total_count(), 3);
409 assert_eq!(sibling.counts[0], 1);
411 assert!((sibling.grad_sums[0] - 0.5).abs() < 1e-12);
412 assert!((sibling.hess_sums[0] - 0.05).abs() < 1e-12);
413 assert_eq!(sibling.counts[1], 1);
415 assert!((sibling.grad_sums[1] - 0.2).abs() < 1e-12);
416 assert_eq!(sibling.counts[2], 1);
418 assert!((sibling.grad_sums[2] - 0.6).abs() < 1e-12);
419 assert_eq!(sibling.counts[3], 0);
421 assert!((sibling.grad_sums[3]).abs() < 1e-12);
422 }
423
424 #[test]
425 fn feature_histogram_reset() {
426 let mut h = FeatureHistogram::new(four_bin_edges());
427 h.accumulate(1.0, 1.0, 1.0);
428 h.accumulate(3.0, 2.0, 2.0);
429 assert_eq!(h.total_count(), 2);
430
431 h.reset();
432 assert_eq!(h.total_count(), 0);
433 assert_eq!(h.total_gradient(), 0.0);
434 assert_eq!(h.total_hessian(), 0.0);
435 assert_eq!(h.n_bins(), 4); }
437
438 #[test]
439 fn leaf_histograms_multi_feature() {
440 let edges_f0 = BinEdges { edges: vec![5.0] }; let edges_f1 = BinEdges {
442 edges: vec![2.0, 4.0, 6.0],
443 }; let mut leaf = LeafHistograms::new(&[edges_f0, edges_f1]);
446 assert_eq!(leaf.n_features(), 2);
447
448 leaf.accumulate(&[3.0, 5.0], 1.0, 0.1);
450 leaf.accumulate(&[7.0, 1.0], 2.0, 0.2);
452 leaf.accumulate(&[3.0, 3.0], 3.0, 0.3);
454
455 assert_eq!(leaf.total_count(), 3);
456
457 let h0 = &leaf.histograms[0];
459 assert_eq!(h0.counts, vec![2, 1]); assert!((h0.total_gradient() - 6.0).abs() < 1e-12);
461 assert!((h0.total_hessian() - 0.6).abs() < 1e-12);
462
463 let h1 = &leaf.histograms[1];
465 assert_eq!(h1.counts, vec![1, 1, 1, 0]);
466 assert!((h1.total_gradient() - 6.0).abs() < 1e-12);
467 }
468
469 #[test]
470 fn leaf_histograms_reset() {
471 let edges = BinEdges { edges: vec![3.0] };
472 let mut leaf = LeafHistograms::new(&[edges.clone(), edges]);
473 leaf.accumulate(&[1.0, 5.0], 1.0, 1.0);
474 assert_eq!(leaf.total_count(), 1);
475
476 leaf.reset();
477 assert_eq!(leaf.total_count(), 0);
478 assert_eq!(leaf.n_features(), 2);
479 }
480
481 #[test]
482 fn leaf_histograms_empty() {
483 let leaf = LeafHistograms::new(&[]);
484 assert_eq!(leaf.n_features(), 0);
485 assert_eq!(leaf.total_count(), 0);
486 }
487
488 #[test]
489 fn single_edge_histogram() {
490 let edges = BinEdges { edges: vec![5.0] };
492 let mut h = FeatureHistogram::new(edges);
493 assert_eq!(h.n_bins(), 2);
494
495 h.accumulate(3.0, 1.0, 0.1);
496 h.accumulate(5.0, 2.0, 0.2); h.accumulate(7.0, 3.0, 0.3);
498
499 assert_eq!(h.counts, vec![1, 2]);
500 assert!((h.grad_sums[0] - 1.0).abs() < 1e-12);
501 assert!((h.grad_sums[1] - 5.0).abs() < 1e-12);
502 }
503
504 #[test]
505 fn no_edges_single_bin() {
506 let edges = BinEdges { edges: vec![] };
508 let mut h = FeatureHistogram::new(edges);
509 assert_eq!(h.n_bins(), 1);
510
511 h.accumulate(42.0, 1.0, 0.5);
512 h.accumulate(-100.0, 2.0, 0.3);
513
514 assert_eq!(h.counts, vec![2]);
515 assert!((h.total_gradient() - 3.0).abs() < 1e-12);
516 assert!((h.total_hessian() - 0.8).abs() < 1e-12);
517 }
518
519 #[test]
520 fn accumulate_with_decay_recent_dominates() {
521 let edges = BinEdges {
523 edges: vec![3.0, 7.0],
524 };
525 let mut h = FeatureHistogram::new(edges);
526 let alpha = 0.9;
527
528 for _ in 0..100 {
530 h.accumulate_with_decay(1.0, 1.0, 1.0, alpha);
531 }
532
533 for _ in 0..50 {
535 h.accumulate_with_decay(8.0, 1.0, 1.0, alpha);
536 }
537
538 h.materialize_decay();
540
541 assert!(
543 h.grad_sums[2] > h.grad_sums[0],
544 "recent bin should dominate: bin2={} > bin0={}",
545 h.grad_sums[2],
546 h.grad_sums[0],
547 );
548 }
549
550 #[test]
551 fn lazy_decay_matches_eager() {
552 let edges = BinEdges {
555 edges: vec![3.0, 6.0],
556 }; let alpha = 0.95;
558
559 let mut lazy = FeatureHistogram::new(edges.clone());
561
562 let n = 3;
565 let mut eager_grad = vec![0.0; n];
566 let mut eager_hess = vec![0.0; n];
567
568 let samples: Vec<(f64, f64, f64)> = vec![
569 (1.0, 0.5, 1.0), (4.0, -0.3, 0.8), (1.0, 0.7, 1.2), (8.0, -1.0, 0.5), (5.0, 0.2, 0.9), (1.0, 0.1, 1.1), (8.0, 0.4, 0.6), (4.0, -0.5, 1.0), ];
578
579 let edge_vals = [3.0, 6.0];
580 for &(value, gradient, hessian) in &samples {
581 for i in 0..n {
583 eager_grad[i] *= alpha;
584 eager_hess[i] *= alpha;
585 }
586 let bin = if value <= edge_vals[0] {
587 0
588 } else if value <= edge_vals[1] {
589 1
590 } else {
591 2
592 };
593 eager_grad[bin] += gradient;
594 eager_hess[bin] += hessian;
595
596 lazy.accumulate_with_decay(value, gradient, hessian, alpha);
598 }
599
600 lazy.materialize_decay();
602
603 for i in 0..n {
604 assert!(
605 (lazy.grad_sums[i] - eager_grad[i]).abs() < 1e-10,
606 "grad_sums[{}]: lazy={}, eager={}",
607 i,
608 lazy.grad_sums[i],
609 eager_grad[i],
610 );
611 assert!(
612 (lazy.hess_sums[i] - eager_hess[i]).abs() < 1e-10,
613 "hess_sums[{}]: lazy={}, eager={}",
614 i,
615 lazy.hess_sums[i],
616 eager_hess[i],
617 );
618 }
619 }
620
621 #[test]
622 fn lazy_decay_total_gradient_without_materialize() {
623 let edges = BinEdges { edges: vec![5.0] }; let alpha = 0.9;
627 let mut h = FeatureHistogram::new(edges);
628
629 h.accumulate_with_decay(1.0, 1.0, 1.0, alpha); h.accumulate_with_decay(7.0, 1.0, 1.0, alpha); let total = h.total_gradient();
635 assert!(
636 (total - 1.9).abs() < 1e-10,
637 "total_gradient should account for decay_scale: got {}",
638 total,
639 );
640 }
641
642 #[test]
643 fn materialize_is_idempotent() {
644 let edges = BinEdges { edges: vec![5.0] };
645 let mut h = FeatureHistogram::new(edges);
646 let alpha = 0.95;
647
648 for _ in 0..50 {
649 h.accumulate_with_decay(1.0, 1.0, 1.0, alpha);
650 }
651
652 h.materialize_decay();
653 let grad_after_first = h.grad_sums.clone();
654
655 h.materialize_decay(); assert_eq!(
657 h.grad_sums, grad_after_first,
658 "second materialize should be a no-op"
659 );
660 }
661
662 #[test]
663 fn lazy_decay_renormalization() {
664 let edges = BinEdges { edges: vec![5.0] };
666 let mut h = FeatureHistogram::new(edges);
667 let alpha = 0.5; for _ in 0..500 {
670 h.accumulate_with_decay(1.0, 1.0, 1.0, alpha);
671 }
672
673 let total = h.total_gradient();
675 assert!(
676 total.is_finite(),
677 "gradient should be finite after renormalization, got {}",
678 total
679 );
680 assert!(total > 0.0, "gradient should be positive, got {}", total);
681
682 assert!(
684 (total - 2.0).abs() < 0.1,
685 "total gradient should converge to ~2.0, got {}",
686 total,
687 );
688 }
689}