1use gam_math::probability::normal_cdf;
2use gam_runtime::resource::{ByteLruCache, ResidentBytes};
3use smallvec::{SmallVec, smallvec};
4use std::hash::{Hash, Hasher};
5use std::sync::Arc;
6use std::sync::atomic::{AtomicU64, Ordering};
7
8#[derive(Clone, Debug)]
17pub enum CubicCellKernelError {
18 InvalidInterval { reason: String },
21 InvalidCellShape { reason: String },
26 InsufficientMoments { reason: String },
29 BivariateNormalDomain { reason: String },
32}
33
34impl_reason_error_boilerplate! {
35 CubicCellKernelError {
36 InvalidInterval,
37 InvalidCellShape,
38 InsufficientMoments,
39 BivariateNormalDomain,
40 }
41}
42
43impl CubicCellKernelError {
44 #[inline]
45 fn invalid_interval(reason: impl Into<String>) -> Self {
46 CubicCellKernelError::InvalidInterval {
47 reason: reason.into(),
48 }
49 }
50 #[inline]
51 fn invalid_cell_shape(reason: impl Into<String>) -> Self {
52 CubicCellKernelError::InvalidCellShape {
53 reason: reason.into(),
54 }
55 }
56 #[inline]
57 fn insufficient_moments(reason: impl Into<String>) -> Self {
58 CubicCellKernelError::InsufficientMoments {
59 reason: reason.into(),
60 }
61 }
62 #[inline]
63 fn bivariate_normal_domain(reason: impl Into<String>) -> Self {
64 CubicCellKernelError::BivariateNormalDomain {
65 reason: reason.into(),
66 }
67 }
68}
69
70#[derive(Clone, Copy, Debug, PartialEq)]
96pub struct LocalSpanCubic {
97 pub left: f64,
98 pub right: f64,
99 pub c0: f64,
100 pub c1: f64,
101 pub c2: f64,
102 pub c3: f64,
103}
104
105impl LocalSpanCubic {
106 #[inline]
107 pub fn evaluate(self, x: f64) -> f64 {
108 let t = x - self.left;
109 self.c0 + self.c1 * t + self.c2 * t * t + self.c3 * t * t * t
110 }
111
112 #[inline]
113 pub fn first_derivative(self, x: f64) -> f64 {
114 let t = x - self.left;
115 self.c1 + 2.0 * self.c2 * t + 3.0 * self.c3 * t * t
116 }
117
118 #[inline]
119 pub fn second_derivative(self, x: f64) -> f64 {
120 let t = x - self.left;
121 2.0 * self.c2 + 6.0 * self.c3 * t
122 }
123}
124
125pub const ANCHORED_DEVIATION_KERNEL: &str = "DenestedCubicTransport";
126pub const NORMALIZED_CELL_BRANCH_TOL: f64 = 1e-10;
134
135const INV_TWO_PI: f64 = 1.0 / std::f64::consts::TAU;
136
137#[cfg(target_os = "linux")]
141pub const GL_NODES_FOR_GPU_KERNEL: &[f64; 384] = &GL_NODES;
142#[cfg(target_os = "linux")]
144pub const GL_WEIGHTS_FOR_GPU_KERNEL: &[f64; 384] = &GL_WEIGHTS;
145
146const GL_NODES: [f64; 384] = [
147 -9.999_804_411_726_474e-1,
148 -9.998_969_471_378_596e-1,
149 -9.997_467_408_113_523e-1,
150 -9.995_297_988_558_859e-1,
151 -9.992_461_316_671_845e-1,
152 -9.988_957_572_063_257e-1,
153 -9.984_786_985_384_589e-1,
154 -9.979_949_833_727_938e-1,
155 -9.974_446_439_389_107e-1,
156 -9.968_277_169_440_913e-1,
157 -9.961_442_435_551_087e-1,
158 -9.953_942_693_885_953e-1,
159 -9.945_778_445_047_068e-1,
160 -9.936_950_234_020_883e-1,
161 -9.927_458_650_133_153e-1,
162 -9.917_304_327_004_32e-1,
163 -9.906_487_942_504_061e-1,
164 -9.895_010_218_704_087e-1,
165 -9.882_871_921_828_699e-1,
166 -9.870_073_862_202_815e-1,
167 -9.856_616_894_197_333e-1,
168 -9.842_501_916_171_713e-1,
169 -9.827_729_870_413_743e-1,
170 -9.812_301_743_076_443e-1,
171 -9.796_218_564_112_101e-1,
172 -9.779_481_407_203_411e-1,
173 -9.762_091_389_691_724e-1,
174 -9.744_049_672_502_397e-1,
175 -9.725_357_460_067_257e-1,
176 -9.706_016_000_244_151e-1,
177 -9.686_026_584_233_628e-1,
178 -9.665_390_546_492_71e-1,
179 -9.644_109_264_645_802e-1,
180 -9.622_184_159_392_698e-1,
181 -9.599_616_694_413_742e-1,
182 -9.576_408_376_272_095e-1,
183 -9.552_560_754_313_16e-1,
184 -9.528_075_420_561_144e-1,
185 -9.502_954_009_612_771e-1,
186 -9.477_198_198_528_157e-1,
187 -9.450_809_706_718_851e-1,
188 -9.423_790_295_833_044e-1,
189 -9.396_141_769_637_963e-1,
190 -9.367_865_973_899_459e-1,
191 -9.338_964_796_258_775e-1,
192 -9.309_440_166_106_54e-1,
193 -9.279_294_054_453_956e-1,
194 -9.248_528_473_801_222e-1,
195 -9.217_145_478_003_181e-1,
196 -9.185_147_162_132_208e-1,
197 -9.152_535_662_338_34e-1,
198 -9.119_313_155_706_682e-1,
199 -9.085_481_860_112_055e-1,
200 -9.051_044_034_070_944e-1,
201 -9.016_001_976_590_722e-1,
202 -8.980_358_027_016_164e-1,
203 -8.944_114_564_873_288e-1,
204 -8.907_274_009_710_492e-1,
205 -8.869_838_820_937_034e-1,
206 -8.831_811_497_658_847e-1,
207 -8.793_194_578_511_7e-1,
208 -8.753_990_641_491_725e-1,
209 -8.714_202_303_783_312e-1,
210 -8.673_832_221_584_393e-1,
211 -8.632_883_089_929_12e-1,
212 -8.591_357_642_507_945e-1,
213 -8.549_258_651_485_127e-1,
214 -8.506_588_927_313_666e-1,
215 -8.463_351_318_547_683e-1,
216 -8.419_548_711_652_254e-1,
217 -8.375_184_030_810_715e-1,
218 -8.330_260_237_729_452e-1,
219 -8.284_780_331_440_178e-1,
220 -8.238_747_348_099_726e-1,
221 -8.192_164_360_787_36e-1,
222 -8.145_034_479_299_62e-1,
223 -8.097_360_849_942_72e-1,
224 -8.049_146_655_322_506e-1,
225 -8.000_395_114_131_988e-1,
226 -7.951_109_480_936_471e-1,
227 -7.901_293_045_956_28e-1,
228 -7.850_949_134_847_117e-1,
229 -7.800_081_108_478_04e-1,
230 -7.748_692_362_707_1e-1,
231 -7.696_786_328_154_644e-1,
232 -7.644_366_469_974_285e-1,
233 -7.591_436_287_621_58e-1,
234 -7.537_999_314_620_412e-1,
235 -7.484_059_118_327_094e-1,
236 -7.429_619_299_692_227e-1,
237 -7.374_683_493_020_299e-1,
238 -7.319_255_365_727_068e-1,
239 -7.263_338_618_094_733e-1,
240 -7.206_936_983_024_912e-1,
241 -7.150_054_225_789_432e-1,
242 -7.092_694_143_778_975e-1,
243 -7.034_860_566_249_567e-1,
244 -6.976_557_354_066_943e-1,
245 -6.917_788_399_448_808e-1,
246 -6.858_557_625_704_99e-1,
247 -6.798_868_986_975_534e-1,
248 -6.738_726_467_966_731e-1,
249 -6.678_134_083_685_102e-1,
250 -6.617_095_879_169_366e-1,
251 -6.555_615_929_220_4e-1,
252 -6.493_698_338_129_212e-1,
253 -6.431_347_239_402_948e-1,
254 -6.368_566_795_488_945e-1,
255 -6.305_361_197_496_849e-1,
256 -6.241_734_664_918_837e-1,
257 -6.177_691_445_347_913e-1,
258 -6.113_235_814_194_364e-1,
259 -6.048_372_074_400_329e-1,
260 -5.983_104_556_152_549e-1,
261 -5.917_437_616_593_286e-1,
262 -5.851_375_639_529_456e-1,
263 -5.784_923_035_139_965e-1,
264 -5.718_084_239_681_3e-1,
265 -5.650_863_715_191_369e-1,
266 -5.583_265_949_191_623e-1,
267 -5.515_295_454_387_482e-1,
268 -5.446_956_768_367_068e-1,
269 -5.378_254_453_298_289e-1,
270 -5.309_193_095_624_275e-1,
271 -5.239_777_305_757_194e-1,
272 -5.170_011_717_770_473e-1,
273 -5.099_900_989_089_429e-1,
274 -5.029_449_800_180_356e-1,
275 -4.958_662_854_238_058_4e-1,
276 -4.887_544_876_871_878e-1,
277 -4.816_100_615_790_221e-1,
278 -4.744_334_840_483_605_5e-1,
279 -4.672_252_341_906_264e-1,
280 -4.599_857_932_156_304e-1,
281 -4.527_156_444_154_463_7e-1,
282 -4.454_152_731_321_473_5e-1,
283 -4.380_851_667_254_05e-1,
284 -4.307_258_145_399_544_5e-1,
285 -4.233_377_078_729_265e-1,
286 -4.159_213_399_410_494e-1,
287 -4.084_772_058_477_228e-1,
288 -4.010_058_025_499_653e-1,
289 -3.935_076_288_252_386e-1,
290 -3.859_831_852_381_500_6e-1,
291 -3.784_329_741_070_358_6e-1,
292 -3.708_574_994_704_271e-1,
293 -3.632_572_670_534_011e-1,
294 -3.556_327_842_338_202e-1,
295 -3.479_845_600_084_600_6e-1,
296 -3.403_131_049_590_297e-1,
297 -3.326_189_312_180_866e-1,
298 -3.249_025_524_348_469_5e-1,
299 -3.171_644_837_408_958_4e-1,
300 -3.094_052_417_157_978e-1,
301 -3.016_253_443_526_109e-1,
302 -2.938_253_110_233_064_5e-1,
303 -2.860_056_624_440_967_5e-1,
304 -2.781_669_206_406_729e-1,
305 -2.703_096_089_133_553e-1,
306 -2.624_342_518_021_592_4e-1,
307 -2.545_413_750_517_773e-1,
308 -2.466_315_055_764_817_5e-1,
309 -2.387_051_714_249_486_3e-1,
310 -2.307_629_017_450_062e-1,
311 -2.228_052_267_483_099_4e-1,
312 -2.148_326_776_749_466_5e-1,
313 -2.068_457_867_579_697_5e-1,
314 -1.988_450_871_878_683_4e-1,
315 -1.908_311_130_769_724_5e-1,
316 -1.828_043_994_237_965_6e-1,
317 -1.747_654_820_773_241_2e-1,
318 -1.667_148_977_012_352_4e-1,
319 -1.586_531_837_380_799_3e-1,
320 -1.505_808_783_733_995e-1,
321 -1.424_985_204_997_981_4e-1,
322 -1.344_066_496_809_674_7e-1,
323 -1.263_058_061_156_663e-1,
324 -1.181_965_306_016_578_4e-1,
325 -1.100_793_644_996_070_4e-1,
326 -1.019_548_496_969_403_7e-1,
327 -9.382_352_857_167_028e-2,
328 -8.568_594_395_618_719e-2,
329 -7.754_263_910_102_077e-2,
330 -6.939_415_763_857_37e-2,
331 -6.124_104_354_682_962e-2,
332 -5.308_384_111_303_817_6e-2,
333 -4.492_309_489_737_94e-2,
334 -3.675_934_969_660_982e-2,
335 -2.859_315_050_769_284_7e-2,
336 -2.042_504_249_141_571e-2,
337 -1.225_557_093_599_553_8e-2,
338 -4.085_281_220_676_868e-3,
339 4.085_281_220_676_868e-3,
340 1.225_557_093_599_553_8e-2,
341 2.042_504_249_141_571e-2,
342 2.859_315_050_769_284_7e-2,
343 3.675_934_969_660_982e-2,
344 4.492_309_489_737_94e-2,
345 5.308_384_111_303_817_6e-2,
346 6.124_104_354_682_962e-2,
347 6.939_415_763_857_37e-2,
348 7.754_263_910_102_077e-2,
349 8.568_594_395_618_719e-2,
350 9.382_352_857_167_028e-2,
351 1.019_548_496_969_403_7e-1,
352 1.100_793_644_996_070_4e-1,
353 1.181_965_306_016_578_4e-1,
354 1.263_058_061_156_663e-1,
355 1.344_066_496_809_674_7e-1,
356 1.424_985_204_997_981_4e-1,
357 1.505_808_783_733_995e-1,
358 1.586_531_837_380_799_3e-1,
359 1.667_148_977_012_352_4e-1,
360 1.747_654_820_773_241_2e-1,
361 1.828_043_994_237_965_6e-1,
362 1.908_311_130_769_724_5e-1,
363 1.988_450_871_878_683_4e-1,
364 2.068_457_867_579_697_5e-1,
365 2.148_326_776_749_466_5e-1,
366 2.228_052_267_483_099_4e-1,
367 2.307_629_017_450_062e-1,
368 2.387_051_714_249_486_3e-1,
369 2.466_315_055_764_817_5e-1,
370 2.545_413_750_517_773e-1,
371 2.624_342_518_021_592_4e-1,
372 2.703_096_089_133_553e-1,
373 2.781_669_206_406_729e-1,
374 2.860_056_624_440_967_5e-1,
375 2.938_253_110_233_064_5e-1,
376 3.016_253_443_526_109e-1,
377 3.094_052_417_157_978e-1,
378 3.171_644_837_408_958_4e-1,
379 3.249_025_524_348_469_5e-1,
380 3.326_189_312_180_866e-1,
381 3.403_131_049_590_297e-1,
382 3.479_845_600_084_600_6e-1,
383 3.556_327_842_338_202e-1,
384 3.632_572_670_534_011e-1,
385 3.708_574_994_704_271e-1,
386 3.784_329_741_070_358_6e-1,
387 3.859_831_852_381_500_6e-1,
388 3.935_076_288_252_386e-1,
389 4.010_058_025_499_653e-1,
390 4.084_772_058_477_228e-1,
391 4.159_213_399_410_494e-1,
392 4.233_377_078_729_265e-1,
393 4.307_258_145_399_544_5e-1,
394 4.380_851_667_254_05e-1,
395 4.454_152_731_321_473_5e-1,
396 4.527_156_444_154_463_7e-1,
397 4.599_857_932_156_304e-1,
398 4.672_252_341_906_264e-1,
399 4.744_334_840_483_605_5e-1,
400 4.816_100_615_790_221e-1,
401 4.887_544_876_871_878e-1,
402 4.958_662_854_238_058_4e-1,
403 5.029_449_800_180_356e-1,
404 5.099_900_989_089_429e-1,
405 5.170_011_717_770_473e-1,
406 5.239_777_305_757_194e-1,
407 5.309_193_095_624_275e-1,
408 5.378_254_453_298_289e-1,
409 5.446_956_768_367_068e-1,
410 5.515_295_454_387_482e-1,
411 5.583_265_949_191_623e-1,
412 5.650_863_715_191_369e-1,
413 5.718_084_239_681_3e-1,
414 5.784_923_035_139_965e-1,
415 5.851_375_639_529_456e-1,
416 5.917_437_616_593_286e-1,
417 5.983_104_556_152_549e-1,
418 6.048_372_074_400_329e-1,
419 6.113_235_814_194_364e-1,
420 6.177_691_445_347_913e-1,
421 6.241_734_664_918_837e-1,
422 6.305_361_197_496_849e-1,
423 6.368_566_795_488_945e-1,
424 6.431_347_239_402_948e-1,
425 6.493_698_338_129_212e-1,
426 6.555_615_929_220_4e-1,
427 6.617_095_879_169_366e-1,
428 6.678_134_083_685_102e-1,
429 6.738_726_467_966_731e-1,
430 6.798_868_986_975_534e-1,
431 6.858_557_625_704_99e-1,
432 6.917_788_399_448_808e-1,
433 6.976_557_354_066_943e-1,
434 7.034_860_566_249_567e-1,
435 7.092_694_143_778_975e-1,
436 7.150_054_225_789_432e-1,
437 7.206_936_983_024_912e-1,
438 7.263_338_618_094_733e-1,
439 7.319_255_365_727_068e-1,
440 7.374_683_493_020_299e-1,
441 7.429_619_299_692_227e-1,
442 7.484_059_118_327_094e-1,
443 7.537_999_314_620_412e-1,
444 7.591_436_287_621_58e-1,
445 7.644_366_469_974_285e-1,
446 7.696_786_328_154_644e-1,
447 7.748_692_362_707_1e-1,
448 7.800_081_108_478_04e-1,
449 7.850_949_134_847_117e-1,
450 7.901_293_045_956_28e-1,
451 7.951_109_480_936_471e-1,
452 8.000_395_114_131_988e-1,
453 8.049_146_655_322_506e-1,
454 8.097_360_849_942_72e-1,
455 8.145_034_479_299_62e-1,
456 8.192_164_360_787_36e-1,
457 8.238_747_348_099_726e-1,
458 8.284_780_331_440_178e-1,
459 8.330_260_237_729_452e-1,
460 8.375_184_030_810_715e-1,
461 8.419_548_711_652_254e-1,
462 8.463_351_318_547_683e-1,
463 8.506_588_927_313_666e-1,
464 8.549_258_651_485_127e-1,
465 8.591_357_642_507_945e-1,
466 8.632_883_089_929_12e-1,
467 8.673_832_221_584_393e-1,
468 8.714_202_303_783_312e-1,
469 8.753_990_641_491_725e-1,
470 8.793_194_578_511_7e-1,
471 8.831_811_497_658_847e-1,
472 8.869_838_820_937_034e-1,
473 8.907_274_009_710_492e-1,
474 8.944_114_564_873_288e-1,
475 8.980_358_027_016_164e-1,
476 9.016_001_976_590_722e-1,
477 9.051_044_034_070_944e-1,
478 9.085_481_860_112_055e-1,
479 9.119_313_155_706_682e-1,
480 9.152_535_662_338_34e-1,
481 9.185_147_162_132_208e-1,
482 9.217_145_478_003_181e-1,
483 9.248_528_473_801_222e-1,
484 9.279_294_054_453_956e-1,
485 9.309_440_166_106_54e-1,
486 9.338_964_796_258_775e-1,
487 9.367_865_973_899_459e-1,
488 9.396_141_769_637_963e-1,
489 9.423_790_295_833_044e-1,
490 9.450_809_706_718_851e-1,
491 9.477_198_198_528_157e-1,
492 9.502_954_009_612_771e-1,
493 9.528_075_420_561_144e-1,
494 9.552_560_754_313_16e-1,
495 9.576_408_376_272_095e-1,
496 9.599_616_694_413_742e-1,
497 9.622_184_159_392_698e-1,
498 9.644_109_264_645_802e-1,
499 9.665_390_546_492_71e-1,
500 9.686_026_584_233_628e-1,
501 9.706_016_000_244_151e-1,
502 9.725_357_460_067_257e-1,
503 9.744_049_672_502_397e-1,
504 9.762_091_389_691_724e-1,
505 9.779_481_407_203_411e-1,
506 9.796_218_564_112_101e-1,
507 9.812_301_743_076_443e-1,
508 9.827_729_870_413_743e-1,
509 9.842_501_916_171_713e-1,
510 9.856_616_894_197_333e-1,
511 9.870_073_862_202_815e-1,
512 9.882_871_921_828_699e-1,
513 9.895_010_218_704_087e-1,
514 9.906_487_942_504_061e-1,
515 9.917_304_327_004_32e-1,
516 9.927_458_650_133_153e-1,
517 9.936_950_234_020_883e-1,
518 9.945_778_445_047_068e-1,
519 9.953_942_693_885_953e-1,
520 9.961_442_435_551_087e-1,
521 9.968_277_169_440_913e-1,
522 9.974_446_439_389_107e-1,
523 9.979_949_833_727_938e-1,
524 9.984_786_985_384_589e-1,
525 9.988_957_572_063_257e-1,
526 9.992_461_316_671_845e-1,
527 9.995_297_988_558_859e-1,
528 9.997_467_408_113_523e-1,
529 9.998_969_471_378_596e-1,
530 9.999_804_411_726_474e-1,
531];
532const GL_WEIGHTS: [f64; 384] = [
533 5.019_410_348_676_869_6e-5,
534 1.168_390_665_730_266_3e-4,
535 1.835_749_193_551_655_8e-4,
536 2.503_070_890_844_105e-4,
537 3.170_242_698_112_815e-4,
538 3.837_208_020_912_921_4e-4,
539 4.503_919_137_716_827e-4,
540 5.170_330_453_491_649e-4,
541 5.836_397_042_630_135e-4,
542 6.502_074_240_969_948e-4,
543 7.167_317_509_947_801e-4,
544 7.832_082_385_905_168e-4,
545 8.496_324_460_039_209e-4,
546 9.159_999_370_632_641e-4,
547 9.823_062_800_663_463e-4,
548 1.048_547_047_793_689_5e-3,
549 1.114_717_817_647_310_6e-3,
550 1.180_814_171_855_922e-3,
551 1.246_831_697_715_441_5e-3,
552 1.312_765_987_850_66e-3,
553 1.378_612_640_487_646_8e-3,
554 1.444_367_259_734_736e-3,
555 1.510_025_455_865_810_3e-3,
556 1.575_582_845_607_936_8e-3,
557 1.641_035_052_429_271_5e-3,
558 1.706_377_706_828_447_1e-3,
559 1.771_606_446_623_834_7e-3,
560 1.836_716_917_243_567_5e-3,
561 1.901_704_772_014_899_2e-3,
562 1.966_565_672_453_437e-3,
563 2.031_295_288_552_398_4e-3,
564 2.095_889_299_071_020_6e-3,
565 2.160_343_391_822_734_3e-3,
566 2.224_653_263_962_713e-3,
567 2.288_814_622_274_955e-3,
568 2.352_823_183_458_769e-3,
569 2.416_674_674_414_340_5e-3,
570 2.480_364_832_528_265_6e-3,
571 2.543_889_405_957_74e-3,
572 2.607_244_153_914_452e-3,
573 2.670_424_846_947_554e-3,
574 2.733_427_267_226_093_3e-3,
575 2.796_247_208_820_428e-3,
576 2.858_880_477_983_06e-3,
577 2.921_322_893_428_515_3e-3,
578 2.983_570_286_612_554_5e-3,
579 3.045_618_502_010_327_8e-3,
580 3.107_463_397_393_755_5e-3,
581 3.169_100_844_108_32e-3,
582 3.230_526_727_348_174e-3,
583 3.291_736_946_431_361e-3,
584 3.352_727_415_073_250_3e-3,
585 3.413_494_061_659_418_4e-3,
586 3.474_032_829_517_317e-3,
587 3.534_339_677_187_348_4e-3,
588 3.594_410_578_692_452e-3,
589 3.654_241_523_806_987e-3,
590 3.713_828_518_324_312_5e-3,
591 3.773_167_584_323_583_5e-3,
592 3.832_254_760_435_171e-3,
593 3.891_086_102_105_193_4e-3,
594 3.949_657_681_858_895e-3,
595 4.007_965_589_562_678e-3,
596 4.066_005_932_685_269e-3,
597 4.123_774_836_557_6e-3,
598 4.181_268_444_631_281e-3,
599 4.238_482_918_736_289e-3,
600 4.295_414_439_336_925e-3,
601 4.352_059_205_787_275e-3,
602 4.408_413_436_584_285e-3,
603 4.464_473_369_620_78e-3,
604 4.520_235_262_436_235e-3,
605 4.575_695_392_466_791e-3,
606 4.630_850_057_293_894e-3,
607 4.685_695_574_891_041e-3,
608 4.740_228_283_870_022e-3,
609 4.794_444_543_725_102e-3,
610 4.848_340_735_076_109e-3,
611 4.901_913_259_910_197e-3,
612 4.955_158_541_821_682_4e-3,
613 5.008_073_026_251_332e-3,
614 5.060_653_180_723_101_4e-3,
615 5.112_895_495_080_397e-3,
616 5.164_796_481_720_011e-3,
617 5.216_352_675_825_451e-3,
618 5.267_560_635_597_735e-3,
619 5.318_416_942_485_385e-3,
620 5.368_918_201_412_827e-3,
621 5.419_061_041_006_627e-3,
622 5.468_842_113_820_941e-3,
623 5.518_258_096_560_71e-3,
624 5.567_305_690_303_767e-3,
625 5.615_981_620_720_803e-3,
626 5.664_282_638_294_182e-3,
627 5.712_205_518_534_655e-3,
628 5.759_747_062_196_925_5e-3,
629 5.806_904_095_492_818e-3,
630 5.853_673_470_303_617_4e-3,
631 5.900_052_064_389_824e-3,
632 5.946_036_781_599_814e-3,
633 5.991_624_552_076_468e-3,
634 6.036_812_332_462_087e-3,
635 6.081_597_106_101_673e-3,
636 6.125_975_883_244_196e-3,
637 6.169_945_701_242_237e-3,
638 6.213_503_624_749_591e-3,
639 6.256_646_745_917_723e-3,
640 6.299_372_184_589_237e-3,
641 6.341_677_088_490_664e-3,
642 6.383_558_633_422_572e-3,
643 6.425_014_023_448_273e-3,
644 6.466_040_491_080_434e-3,
645 6.506_635_297_465_724e-3,
646 6.546_795_732_567_842_5e-3,
647 6.586_519_115_348_261e-3,
648 6.625_802_793_945_317e-3,
649 6.664_644_145_851_14e-3,
650 6.703_040_578_086_941e-3,
651 6.740_989_527_375_895e-3,
652 6.778_488_460_314_126e-3,
653 6.815_534_873_540_5e-3,
654 6.852_126_293_902_878e-3,
655 6.888_260_278_623_754e-3,
656 6.923_934_415_463_31e-3,
657 6.959_146_322_880_146_5e-3,
658 6.993_893_650_190_702e-3,
659 7.028_174_077_725_734e-3,
660 7.061_985_316_985_506e-3,
661 7.095_325_110_792_439e-3,
662 7.128_191_233_441_844e-3,
663 7.160_581_490_850_321e-3,
664 7.192_493_720_702_486e-3,
665 7.223_925_792_595_309e-3,
666 7.254_875_608_179_984e-3,
667 7.285_341_101_302_512e-3,
668 7.315_320_238_141_324_5e-3,
669 7.344_811_017_343_063e-3,
670 7.373_811_470_156_258e-3,
671 7.402_319_660_562_818e-3,
672 7.430_333_685_407_178e-3,
673 7.457_851_674_523_319e-3,
674 7.484_871_790_859_79e-3,
675 7.511_392_230_602_079e-3,
676 7.537_411_223_293_362e-3,
677 7.562_927_031_952_382e-3,
678 7.587_937_953_189_561_5e-3,
679 7.612_442_317_320_796e-3,
680 7.636_438_488_478_739e-3,
681 7.659_924_864_722_064e-3,
682 7.682_899_878_142_539e-3,
683 7.705_361_994_969_524e-3,
684 7.727_309_715_672_44e-3,
685 7.748_741_575_060_914e-3,
686 7.769_656_142_382_462e-3,
687 7.790_052_021_418_226e-3,
688 7.809_927_850_575_903e-3,
689 7.829_282_302_980_82e-3,
690 7.848_114_086_564_56e-3,
691 7.866_421_944_151_094e-3,
692 7.884_204_653_540_665e-3,
693 7.901_461_027_591_6e-3,
694 7.918_189_914_299_318e-3,
695 7.934_390_196_873_448e-3,
696 7.950_060_793_812_204e-3,
697 7.965_200_658_974_709e-3,
698 7.979_808_781_650_77e-3,
699 7.993_884_186_628_266e-3,
700 8.007_425_934_258_548e-3,
701 8.020_433_120_518_866e-3,
702 8.032_904_877_072_8e-3,
703 8.044_840_371_328_26e-3,
704 8.056_238_806_493_175e-3,
705 8.067_099_421_628_42e-3,
706 8.077_421_491_698_82e-3,
707 8.087_204_327_621_594e-3,
708 8.096_447_276_312_202e-3,
709 8.105_149_720_727_933e-3,
710 8.113_311_079_909_208e-3,
711 8.120_930_809_018_415e-3,
712 8.128_008_399_376_085e-3,
713 8.134_543_378_495_033e-3,
714 8.140_535_310_111_77e-3,
715 8.145_983_794_215_77e-3,
716 8.150_888_467_075_875e-3,
717 8.155_249_001_265_092e-3,
718 8.159_065_105_681_899e-3,
719 8.162_336_525_570_1e-3,
720 8.165_063_042_535_465e-3,
721 8.167_244_474_560_707e-3,
722 8.168_880_676_017_344e-3,
723 8.169_971_537_675_47e-3,
724 8.170_516_986_711_104e-3,
725 8.170_516_986_711_104e-3,
726 8.169_971_537_675_47e-3,
727 8.168_880_676_017_344e-3,
728 8.167_244_474_560_707e-3,
729 8.165_063_042_535_465e-3,
730 8.162_336_525_570_1e-3,
731 8.159_065_105_681_899e-3,
732 8.155_249_001_265_092e-3,
733 8.150_888_467_075_875e-3,
734 8.145_983_794_215_77e-3,
735 8.140_535_310_111_77e-3,
736 8.134_543_378_495_033e-3,
737 8.128_008_399_376_085e-3,
738 8.120_930_809_018_415e-3,
739 8.113_311_079_909_208e-3,
740 8.105_149_720_727_933e-3,
741 8.096_447_276_312_202e-3,
742 8.087_204_327_621_594e-3,
743 8.077_421_491_698_82e-3,
744 8.067_099_421_628_42e-3,
745 8.056_238_806_493_175e-3,
746 8.044_840_371_328_26e-3,
747 8.032_904_877_072_8e-3,
748 8.020_433_120_518_866e-3,
749 8.007_425_934_258_548e-3,
750 7.993_884_186_628_266e-3,
751 7.979_808_781_650_77e-3,
752 7.965_200_658_974_709e-3,
753 7.950_060_793_812_204e-3,
754 7.934_390_196_873_448e-3,
755 7.918_189_914_299_318e-3,
756 7.901_461_027_591_6e-3,
757 7.884_204_653_540_665e-3,
758 7.866_421_944_151_094e-3,
759 7.848_114_086_564_56e-3,
760 7.829_282_302_980_82e-3,
761 7.809_927_850_575_903e-3,
762 7.790_052_021_418_226e-3,
763 7.769_656_142_382_462e-3,
764 7.748_741_575_060_914e-3,
765 7.727_309_715_672_44e-3,
766 7.705_361_994_969_524e-3,
767 7.682_899_878_142_539e-3,
768 7.659_924_864_722_064e-3,
769 7.636_438_488_478_739e-3,
770 7.612_442_317_320_796e-3,
771 7.587_937_953_189_561_5e-3,
772 7.562_927_031_952_382e-3,
773 7.537_411_223_293_362e-3,
774 7.511_392_230_602_079e-3,
775 7.484_871_790_859_79e-3,
776 7.457_851_674_523_319e-3,
777 7.430_333_685_407_178e-3,
778 7.402_319_660_562_818e-3,
779 7.373_811_470_156_258e-3,
780 7.344_811_017_343_063e-3,
781 7.315_320_238_141_324_5e-3,
782 7.285_341_101_302_512e-3,
783 7.254_875_608_179_984e-3,
784 7.223_925_792_595_309e-3,
785 7.192_493_720_702_486e-3,
786 7.160_581_490_850_321e-3,
787 7.128_191_233_441_844e-3,
788 7.095_325_110_792_439e-3,
789 7.061_985_316_985_506e-3,
790 7.028_174_077_725_734e-3,
791 6.993_893_650_190_702e-3,
792 6.959_146_322_880_146_5e-3,
793 6.923_934_415_463_31e-3,
794 6.888_260_278_623_754e-3,
795 6.852_126_293_902_878e-3,
796 6.815_534_873_540_5e-3,
797 6.778_488_460_314_126e-3,
798 6.740_989_527_375_895e-3,
799 6.703_040_578_086_941e-3,
800 6.664_644_145_851_14e-3,
801 6.625_802_793_945_317e-3,
802 6.586_519_115_348_261e-3,
803 6.546_795_732_567_842_5e-3,
804 6.506_635_297_465_724e-3,
805 6.466_040_491_080_434e-3,
806 6.425_014_023_448_273e-3,
807 6.383_558_633_422_572e-3,
808 6.341_677_088_490_664e-3,
809 6.299_372_184_589_237e-3,
810 6.256_646_745_917_723e-3,
811 6.213_503_624_749_591e-3,
812 6.169_945_701_242_237e-3,
813 6.125_975_883_244_196e-3,
814 6.081_597_106_101_673e-3,
815 6.036_812_332_462_087e-3,
816 5.991_624_552_076_468e-3,
817 5.946_036_781_599_814e-3,
818 5.900_052_064_389_824e-3,
819 5.853_673_470_303_617_4e-3,
820 5.806_904_095_492_818e-3,
821 5.759_747_062_196_925_5e-3,
822 5.712_205_518_534_655e-3,
823 5.664_282_638_294_182e-3,
824 5.615_981_620_720_803e-3,
825 5.567_305_690_303_767e-3,
826 5.518_258_096_560_71e-3,
827 5.468_842_113_820_941e-3,
828 5.419_061_041_006_627e-3,
829 5.368_918_201_412_827e-3,
830 5.318_416_942_485_385e-3,
831 5.267_560_635_597_735e-3,
832 5.216_352_675_825_451e-3,
833 5.164_796_481_720_011e-3,
834 5.112_895_495_080_397e-3,
835 5.060_653_180_723_101_4e-3,
836 5.008_073_026_251_332e-3,
837 4.955_158_541_821_682_4e-3,
838 4.901_913_259_910_197e-3,
839 4.848_340_735_076_109e-3,
840 4.794_444_543_725_102e-3,
841 4.740_228_283_870_022e-3,
842 4.685_695_574_891_041e-3,
843 4.630_850_057_293_894e-3,
844 4.575_695_392_466_791e-3,
845 4.520_235_262_436_235e-3,
846 4.464_473_369_620_78e-3,
847 4.408_413_436_584_285e-3,
848 4.352_059_205_787_275e-3,
849 4.295_414_439_336_925e-3,
850 4.238_482_918_736_289e-3,
851 4.181_268_444_631_281e-3,
852 4.123_774_836_557_6e-3,
853 4.066_005_932_685_269e-3,
854 4.007_965_589_562_678e-3,
855 3.949_657_681_858_895e-3,
856 3.891_086_102_105_193_4e-3,
857 3.832_254_760_435_171e-3,
858 3.773_167_584_323_583_5e-3,
859 3.713_828_518_324_312_5e-3,
860 3.654_241_523_806_987e-3,
861 3.594_410_578_692_452e-3,
862 3.534_339_677_187_348_4e-3,
863 3.474_032_829_517_317e-3,
864 3.413_494_061_659_418_4e-3,
865 3.352_727_415_073_250_3e-3,
866 3.291_736_946_431_361e-3,
867 3.230_526_727_348_174e-3,
868 3.169_100_844_108_32e-3,
869 3.107_463_397_393_755_5e-3,
870 3.045_618_502_010_327_8e-3,
871 2.983_570_286_612_554_5e-3,
872 2.921_322_893_428_515_3e-3,
873 2.858_880_477_983_06e-3,
874 2.796_247_208_820_428e-3,
875 2.733_427_267_226_093_3e-3,
876 2.670_424_846_947_554e-3,
877 2.607_244_153_914_452e-3,
878 2.543_889_405_957_74e-3,
879 2.480_364_832_528_265_6e-3,
880 2.416_674_674_414_340_5e-3,
881 2.352_823_183_458_769e-3,
882 2.288_814_622_274_955e-3,
883 2.224_653_263_962_713e-3,
884 2.160_343_391_822_734_3e-3,
885 2.095_889_299_071_020_6e-3,
886 2.031_295_288_552_398_4e-3,
887 1.966_565_672_453_437e-3,
888 1.901_704_772_014_899_2e-3,
889 1.836_716_917_243_567_5e-3,
890 1.771_606_446_623_834_7e-3,
891 1.706_377_706_828_447_1e-3,
892 1.641_035_052_429_271_5e-3,
893 1.575_582_845_607_936_8e-3,
894 1.510_025_455_865_810_3e-3,
895 1.444_367_259_734_736e-3,
896 1.378_612_640_487_646_8e-3,
897 1.312_765_987_850_66e-3,
898 1.246_831_697_715_441_5e-3,
899 1.180_814_171_855_922e-3,
900 1.114_717_817_647_310_6e-3,
901 1.048_547_047_793_689_5e-3,
902 9.823_062_800_663_463e-4,
903 9.159_999_370_632_641e-4,
904 8.496_324_460_039_209e-4,
905 7.832_082_385_905_168e-4,
906 7.167_317_509_947_801e-4,
907 6.502_074_240_969_948e-4,
908 5.836_397_042_630_135e-4,
909 5.170_330_453_491_649e-4,
910 4.503_919_137_716_827e-4,
911 3.837_208_020_912_921_4e-4,
912 3.170_242_698_112_815e-4,
913 2.503_070_890_844_105e-4,
914 1.835_749_193_551_655_8e-4,
915 1.168_390_665_730_266_3e-4,
916 5.019_410_348_676_869_6e-5,
917];
918
919#[derive(Clone, Copy, Debug, Eq, PartialEq)]
920pub enum ExactCellBranch {
921 Affine,
922 Quartic,
923 Sextic,
924}
925
926#[inline]
943fn effective_branch_tol(cell: DenestedCubicCell) -> f64 {
944 let anchor_scale = cell.c0.abs().max(cell.c1.abs()).max(1.0);
945 NORMALIZED_CELL_BRANCH_TOL * anchor_scale
946}
947
948#[derive(Clone, Copy, Debug, PartialEq)]
949pub struct DenestedCubicCell {
950 pub left: f64,
951 pub right: f64,
952 pub c0: f64,
953 pub c1: f64,
954 pub c2: f64,
955 pub c3: f64,
956}
957
958impl DenestedCubicCell {
959 #[inline]
960 pub fn eta(self, z: f64) -> f64 {
961 self.c0 + self.c1 * z + self.c2 * z * z + self.c3 * z * z * z
962 }
963
964 #[inline]
965 pub fn q(self, z: f64) -> f64 {
966 let eta = self.eta(z);
967 0.5 * (z * z + eta * eta)
968 }
969}
970
971#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
972pub struct CellMomentFingerprint {
973 pub hash: u64,
974 bins: [u64; 6],
975}
976
977#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
978pub struct CellMomentCacheKey {
979 pub fingerprint: CellMomentFingerprint,
980 pub max_degree: usize,
981}
982
983#[derive(Clone, Copy, Debug, Default, PartialEq)]
984pub struct CellMomentDedupStats {
985 pub lookups: u64,
986 pub hits: u64,
987 pub misses: u64,
988}
989
990impl CellMomentDedupStats {
991 #[inline]
992 pub fn hit_rate(self) -> f64 {
993 if self.lookups == 0 {
994 0.0
995 } else {
996 self.hits as f64 / self.lookups as f64
997 }
998 }
999}
1000
1001#[inline]
1002fn splitmix64(x: u64) -> u64 {
1003 gam_linalg::utils::splitmix64_hash(x)
1004}
1005
1006#[inline]
1007fn mix_fingerprint_words(words: &[u64]) -> u64 {
1008 let mut h = 0xcbf2_9ce4_8422_2325u64;
1009 for &word in words {
1010 h ^= splitmix64(word);
1011 h = h.wrapping_mul(0x100_0000_01b3);
1012 }
1013 h
1014}
1015
1016#[inline]
1017fn quantized_cell_word(x: f64, epsilon: f64) -> u64 {
1018 if epsilon == 0.0 || !epsilon.is_finite() || epsilon < 0.0 || !x.is_finite() {
1019 return x.to_bits();
1020 }
1021 (x / epsilon).round().to_bits()
1022}
1023
1024pub fn cell_moment_fingerprint(cell: DenestedCubicCell, epsilon: f64) -> CellMomentFingerprint {
1032 let bins = [
1033 quantized_cell_word(cell.left, epsilon),
1034 quantized_cell_word(cell.right, epsilon),
1035 quantized_cell_word(cell.c0, epsilon),
1036 quantized_cell_word(cell.c1, epsilon),
1037 quantized_cell_word(cell.c2, epsilon),
1038 quantized_cell_word(cell.c3, epsilon),
1039 ];
1040 CellMomentFingerprint {
1041 hash: mix_fingerprint_words(&bins),
1042 bins,
1043 }
1044}
1045
1046#[inline]
1047pub fn cell_moment_cache_key(
1048 cell: DenestedCubicCell,
1049 max_degree: usize,
1050 epsilon: f64,
1051) -> CellMomentCacheKey {
1052 CellMomentCacheKey {
1053 fingerprint: cell_moment_fingerprint(cell, epsilon),
1054 max_degree,
1055 }
1056}
1057
1058#[derive(Clone, Copy, Debug, PartialEq)]
1059pub struct DenestedPartitionCell {
1060 pub cell: DenestedCubicCell,
1061 pub score_span: LocalSpanCubic,
1062 pub link_span: LocalSpanCubic,
1063 pub left_edge: PartitionEdge,
1069 pub right_edge: PartitionEdge,
1070}
1071
1072impl DenestedPartitionCell {}
1073
1074#[derive(Clone, Copy, Debug, PartialEq)]
1076pub enum PartitionEdge {
1077 Fixed(f64),
1080 Crossing { tau: f64 },
1083}
1084
1085impl PartitionEdge {
1086 #[inline]
1088 pub fn z_at(self, a: f64, b: f64) -> f64 {
1089 match self {
1090 Self::Fixed(z) => z,
1091 Self::Crossing { tau } => (tau - a) / b,
1092 }
1093 }
1094}
1095
1096#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
1097struct TailCellMomentCacheKey {
1098 c0_bits: u64,
1099 c1_bits: u64,
1100 endpoint_bits: u64,
1101 side: i8,
1102 max_degree: usize,
1103}
1104
1105const TAIL_CELL_MOMENT_CACHE_MAX_BYTES: usize = 64 * 1024 * 1024;
1106const TAIL_CELL_MOMENT_CACHE_MAX_ENTRIES: usize = 262_144;
1107
1108#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
1109pub struct TailCellMomentCacheStats {
1110 pub hits: usize,
1111 pub misses: usize,
1112 pub entries: usize,
1113}
1114
1115impl TailCellMomentCacheStats {
1116 #[inline]
1117 pub fn requests(self) -> usize {
1118 self.hits + self.misses
1119 }
1120
1121 #[inline]
1122 pub fn hit_rate(self) -> f64 {
1123 let requests = self.requests();
1124 if requests == 0 {
1125 0.0
1126 } else {
1127 self.hits as f64 / requests as f64
1128 }
1129 }
1130}
1131
1132#[derive(Debug)]
1146pub struct TailCellMomentCache {
1147 moments: ByteLruCache<TailCellMomentCacheKey, CellMomentState>,
1148 hits: std::sync::atomic::AtomicUsize,
1149 misses: std::sync::atomic::AtomicUsize,
1150}
1151
1152impl Default for TailCellMomentCache {
1153 fn default() -> Self {
1154 let shard_count = std::thread::available_parallelism()
1158 .map(|workers| workers.get().saturating_mul(8))
1159 .unwrap_or(32)
1160 .clamp(8, 256);
1161 Self {
1162 moments: ByteLruCache::with_max_entries_sharded(
1163 TAIL_CELL_MOMENT_CACHE_MAX_BYTES,
1164 TAIL_CELL_MOMENT_CACHE_MAX_ENTRIES,
1165 shard_count,
1166 ),
1167 hits: std::sync::atomic::AtomicUsize::new(0),
1168 misses: std::sync::atomic::AtomicUsize::new(0),
1169 }
1170 }
1171}
1172
1173impl TailCellMomentCache {
1174 #[inline]
1176 pub fn new() -> Self {
1177 Self::default()
1178 }
1179
1180 #[inline]
1183 pub fn clear(&self) {
1184 self.moments.clear();
1185 self.hits.store(0, std::sync::atomic::Ordering::Relaxed);
1186 self.misses.store(0, std::sync::atomic::Ordering::Relaxed);
1187 }
1188
1189 #[inline]
1191 pub fn stats(&self) -> TailCellMomentCacheStats {
1192 TailCellMomentCacheStats {
1193 hits: self.hits.load(std::sync::atomic::Ordering::Relaxed),
1194 misses: self.misses.load(std::sync::atomic::Ordering::Relaxed),
1195 entries: self.moments.len(),
1196 }
1197 }
1198
1199 pub fn evaluate(
1209 &self,
1210 cell: DenestedCubicCell,
1211 max_degree: usize,
1212 ) -> Result<CellMomentState, String> {
1213 let Some(key) = tail_cell_cache_key(cell, max_degree) else {
1214 return evaluate_cell_moments_uncached(cell, max_degree);
1215 };
1216 if let Some(state) = self.moments.get(&key) {
1217 self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1218 return Ok(state);
1219 }
1220 let state = evaluate_cell_moments_uncached(cell, max_degree)?;
1221 self.misses
1222 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1223 self.moments.insert(key, state.clone());
1224 Ok(state)
1225 }
1226}
1227
1228static TAIL_CELL_MOMENT_CACHE: std::sync::OnceLock<TailCellMomentCache> =
1229 std::sync::OnceLock::new();
1230static TAIL_CELL_MOMENT_CACHE_ENABLED: std::sync::atomic::AtomicBool =
1231 std::sync::atomic::AtomicBool::new(true);
1232
1233fn tail_cell_moment_cache() -> &'static TailCellMomentCache {
1234 TAIL_CELL_MOMENT_CACHE.get_or_init(TailCellMomentCache::default)
1235}
1236
1237#[inline]
1238fn tail_cell_cache_key(
1239 cell: DenestedCubicCell,
1240 max_degree: usize,
1241) -> Option<TailCellMomentCacheKey> {
1242 if cell.c2.abs() > NORMALIZED_CELL_BRANCH_TOL || cell.c3.abs() > NORMALIZED_CELL_BRANCH_TOL {
1243 return None;
1244 }
1245 match (!cell.left.is_finite(), !cell.right.is_finite()) {
1246 (true, false) if cell.right.is_finite() => Some(TailCellMomentCacheKey {
1247 c0_bits: cell.c0.to_bits(),
1248 c1_bits: cell.c1.to_bits(),
1249 endpoint_bits: cell.right.to_bits(),
1250 side: -1,
1251 max_degree,
1252 }),
1253 (false, true) if cell.left.is_finite() => Some(TailCellMomentCacheKey {
1254 c0_bits: cell.c0.to_bits(),
1255 c1_bits: cell.c1.to_bits(),
1256 endpoint_bits: cell.left.to_bits(),
1257 side: 1,
1258 max_degree,
1259 }),
1260 _ => None,
1261 }
1262}
1263
1264pub fn set_tail_cell_moment_cache_enabled(enabled: bool) {
1265 TAIL_CELL_MOMENT_CACHE_ENABLED.store(enabled, std::sync::atomic::Ordering::Relaxed);
1266}
1267
1268pub fn reset_tail_cell_moment_cache() {
1269 tail_cell_moment_cache().clear();
1270}
1271
1272pub fn tail_cell_moment_cache_stats() -> TailCellMomentCacheStats {
1273 tail_cell_moment_cache().stats()
1274}
1275
1276#[derive(Clone, Copy, Debug, Eq)]
1277pub struct CellFingerprint {
1278 c0: u64,
1279 c1: u64,
1280 c2: u64,
1281 c3: u64,
1282 left: u64,
1283 right: u64,
1284}
1285
1286impl CellFingerprint {
1287 #[inline]
1288 pub fn new(cell: DenestedCubicCell) -> Self {
1289 Self {
1290 c0: cell.c0.to_bits(),
1291 c1: cell.c1.to_bits(),
1292 c2: cell.c2.to_bits(),
1293 c3: cell.c3.to_bits(),
1294 left: cell.left.to_bits(),
1295 right: cell.right.to_bits(),
1296 }
1297 }
1298}
1299
1300impl PartialEq for CellFingerprint {
1301 #[inline]
1302 fn eq(&self, other: &Self) -> bool {
1303 self.c0 == other.c0
1304 && self.c1 == other.c1
1305 && self.c2 == other.c2
1306 && self.c3 == other.c3
1307 && self.left == other.left
1308 && self.right == other.right
1309 }
1310}
1311
1312impl Hash for CellFingerprint {
1313 #[inline]
1314 fn hash<H: Hasher>(&self, state: &mut H) {
1315 self.c0.hash(state);
1316 self.c1.hash(state);
1317 self.c2.hash(state);
1318 self.c3.hash(state);
1319 self.left.hash(state);
1320 self.right.hash(state);
1321 }
1322}
1323
1324#[derive(Clone, Debug, Default, PartialEq)]
1325pub struct CachedCellMoments {
1326 state: Option<Arc<CellMomentState>>,
1333 derivative_state: Option<Arc<CellDerivativeMomentState>>,
1340}
1341
1342impl CachedCellMoments {
1343 #[inline]
1344 pub fn new(state: Arc<CellMomentState>) -> Self {
1345 Self {
1346 state: Some(state),
1347 derivative_state: None,
1348 }
1349 }
1350
1351 #[inline]
1352 pub fn new_derivative(state: Arc<CellDerivativeMomentState>) -> Self {
1353 Self {
1354 state: None,
1355 derivative_state: Some(state),
1356 }
1357 }
1358
1359 #[inline]
1360 pub fn state_for_degree(&self, max_degree: usize) -> Option<CellMomentState> {
1361 let state = self.state.as_ref()?;
1362 if state.moments.len().saturating_sub(1) < max_degree {
1363 return None;
1364 }
1365 let mut state = (**state).clone();
1370 state.moments.truncate(max_degree + 1);
1371 Some(state)
1372 }
1373
1374 #[inline]
1375 pub fn derivative_state_for_degree(
1376 &self,
1377 max_degree: usize,
1378 ) -> Option<CellDerivativeMomentState> {
1379 let state = self.derivative_state.as_ref()?;
1380 if state.moments.len().saturating_sub(1) < max_degree {
1381 return None;
1382 }
1383 let mut state = (**state).clone();
1385 state.moments.truncate(max_degree + 1);
1386 Some(state)
1387 }
1388
1389 #[inline]
1390 pub fn with_value(mut self, state: Arc<CellMomentState>) -> Self {
1391 self.state = Some(state);
1392 self
1393 }
1394
1395 #[inline]
1396 pub fn with_derivative(mut self, state: Arc<CellDerivativeMomentState>) -> Self {
1397 self.derivative_state = Some(state);
1398 self
1399 }
1400}
1401
1402impl ResidentBytes for CachedCellMoments {
1403 fn resident_bytes(&self) -> usize {
1404 let value_bytes = self
1405 .state
1406 .as_ref()
1407 .map_or(0, |state| state.resident_bytes());
1408 let derivative_bytes = self
1409 .derivative_state
1410 .as_ref()
1411 .map_or(0, |state| state.resident_bytes());
1412 std::mem::size_of::<Self>()
1413 .saturating_add(value_bytes)
1414 .saturating_add(derivative_bytes)
1415 }
1416}
1417
1418#[derive(Debug, Default)]
1419pub struct CellMomentCacheStats {
1420 hits: AtomicU64,
1421 misses: AtomicU64,
1422}
1423
1424impl CellMomentCacheStats {
1425 #[inline]
1426 pub fn snapshot(&self) -> (u64, u64) {
1427 (
1428 self.hits.load(Ordering::Relaxed),
1429 self.misses.load(Ordering::Relaxed),
1430 )
1431 }
1432
1433 #[inline]
1434 pub fn hit_rate_delta(&self, before: (u64, u64)) -> (u64, u64, f64) {
1435 let (hits, misses) = self.snapshot();
1436 let dh = hits.saturating_sub(before.0);
1437 let dm = misses.saturating_sub(before.1);
1438 let total = dh + dm;
1439 let rate = if total == 0 {
1440 0.0
1441 } else {
1442 dh as f64 / total as f64
1443 };
1444 (dh, dm, rate)
1445 }
1446}
1447
1448pub type CellMomentLruCache = ByteLruCache<CellFingerprint, CachedCellMoments>;
1449
1450pub const CELL_MOMENT_INLINE_CAPACITY: usize = 10;
1451
1452pub type CellMomentVec = SmallVec<[f64; CELL_MOMENT_INLINE_CAPACITY]>;
1453
1454#[derive(Clone, Debug, PartialEq)]
1455pub struct CellMomentState {
1456 pub branch: ExactCellBranch,
1457 pub value: f64,
1458 pub moments: CellMomentVec,
1459}
1460
1461impl ResidentBytes for CellMomentState {
1462 fn resident_bytes(&self) -> usize {
1463 let spilled_bytes = if self.moments.spilled() {
1464 self.moments
1465 .capacity()
1466 .saturating_mul(std::mem::size_of::<f64>())
1467 } else {
1468 0
1469 };
1470 std::mem::size_of::<Self>().saturating_add(spilled_bytes)
1471 }
1472}
1473
1474#[derive(Clone, Debug, PartialEq)]
1475pub struct CellDerivativeMomentState {
1476 pub branch: ExactCellBranch,
1477 pub moments: CellMomentVec,
1478}
1479
1480impl ResidentBytes for CellDerivativeMomentState {
1481 fn resident_bytes(&self) -> usize {
1482 let spilled_bytes = if self.moments.spilled() {
1483 self.moments
1484 .capacity()
1485 .saturating_mul(std::mem::size_of::<f64>())
1486 } else {
1487 0
1488 };
1489 std::mem::size_of::<Self>().saturating_add(spilled_bytes)
1490 }
1491}
1492
1493#[derive(Clone, Copy, Debug, PartialEq)]
1494pub struct CellMomentStateRef<'a> {
1495 pub branch: ExactCellBranch,
1496 pub value: f64,
1497 pub moments: &'a [f64],
1498}
1499
1500#[derive(Clone, Debug)]
1501pub struct CellMomentScratch {
1502 moments: Vec<f64>,
1503}
1504
1505impl Default for CellMomentScratch {
1506 fn default() -> Self {
1507 Self {
1511 moments: Vec::with_capacity(MAX_AFFINE_ANCHOR_DEGREE + 1),
1512 }
1513 }
1514}
1515
1516impl CellMomentScratch {
1517 pub fn new() -> Self {
1518 Self::default()
1519 }
1520
1521 pub fn with_capacity(max_degree: usize) -> Self {
1522 Self {
1523 moments: Vec::with_capacity(max_degree + 1),
1524 }
1525 }
1526
1527 #[inline]
1528 fn prepare_moments(&mut self, len: usize) -> &mut [f64] {
1529 if self.moments.capacity() < len {
1530 CELL_MOMENT_REALLOCS.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1531 self.moments.reserve(len - self.moments.capacity());
1532 }
1533 self.moments.resize(len, 0.0);
1534 self.moments.fill(0.0);
1535 &mut self.moments
1536 }
1537}
1538
1539pub(crate) static CELL_MOMENT_REALLOCS: std::sync::atomic::AtomicUsize =
1543 std::sync::atomic::AtomicUsize::new(0);
1544
1545pub const GL20_NODES: [f64; 20] = [
1552 -0.993_128_599_185_094_9,
1553 -0.963_971_927_277_913_8,
1554 -0.912_234_428_251_326,
1555 -0.839_116_971_822_218_8,
1556 -0.746_331_906_460_150_8,
1557 -0.636_053_680_726_515,
1558 -0.510_867_001_950_827_1,
1559 -0.373_706_088_715_419_6,
1560 -0.227_785_851_141_645_1,
1561 -0.076_526_521_133_497_33,
1562 0.076_526_521_133_497_33,
1563 0.227_785_851_141_645_1,
1564 0.373_706_088_715_419_6,
1565 0.510_867_001_950_827_1,
1566 0.636_053_680_726_515,
1567 0.746_331_906_460_150_8,
1568 0.839_116_971_822_218_8,
1569 0.912_234_428_251_326,
1570 0.963_971_927_277_913_8,
1571 0.993_128_599_185_094_9,
1572];
1573
1574pub const GL20_WEIGHTS: [f64; 20] = [
1576 0.017_614_007_139_152_12,
1577 0.040_601_429_800_386_94,
1578 0.062_672_048_334_109_06,
1579 0.083_276_741_576_704_75,
1580 0.101_930_119_817_240_4,
1581 0.118_194_531_961_518_4,
1582 0.131_688_638_449_176_6,
1583 0.142_096_109_318_382_1,
1584 0.149_172_986_472_603_7,
1585 0.152_753_387_130_725_9,
1586 0.152_753_387_130_725_9,
1587 0.149_172_986_472_603_7,
1588 0.142_096_109_318_382_1,
1589 0.131_688_638_449_176_6,
1590 0.118_194_531_961_518_4,
1591 0.101_930_119_817_240_4,
1592 0.083_276_741_576_704_75,
1593 0.062_672_048_334_109_06,
1594 0.040_601_429_800_386_94,
1595 0.017_614_007_139_152_12,
1596];
1597
1598fn dedup_sorted_tagged_breakpoints(points: &mut Vec<(f64, PartitionEdge)>) {
1604 points.sort_by(|lhs, rhs| {
1605 lhs.0
1606 .partial_cmp(&rhs.0)
1607 .unwrap_or(std::cmp::Ordering::Equal)
1608 });
1609 points.dedup_by(|lhs, rhs| {
1610 let coincide = if lhs.0 == rhs.0 {
1611 true
1612 } else if lhs.0.is_finite() && rhs.0.is_finite() {
1613 (lhs.0 - rhs.0).abs() <= 1e-12
1614 } else {
1615 false
1616 };
1617 if coincide && matches!(lhs.1, PartitionEdge::Fixed(_)) {
1618 rhs.1 = lhs.1;
1621 }
1622 coincide
1623 });
1624}
1625
1626#[inline]
1627pub fn interval_probe_point(left: f64, right: f64) -> Result<f64, String> {
1628 if !(left < right) {
1629 return Err(CubicCellKernelError::invalid_interval(format!(
1630 "interval probe requires ordered bounds, got [{left}, {right}]"
1631 ))
1632 .into());
1633 }
1634 if left.is_finite() && right.is_finite() {
1635 Ok(0.5 * (left + right))
1636 } else if left == f64::NEG_INFINITY && right == f64::INFINITY {
1637 Ok(0.0)
1638 } else if left == f64::NEG_INFINITY && right.is_finite() {
1639 Ok(right - 1.0)
1640 } else if left.is_finite() && right == f64::INFINITY {
1641 Ok(left + 1.0)
1642 } else {
1643 Err(CubicCellKernelError::invalid_interval(format!(
1644 "interval probe requires finite bounds or full infinities, got [{left}, {right}]"
1645 ))
1646 .into())
1647 }
1648}
1649
1650#[inline]
1651pub fn quartic_qprime_coefficients(c0: f64, c1: f64, c2: f64) -> [f64; 4] {
1652 [
1653 c0 * c1,
1654 1.0 + c1 * c1 + 2.0 * c0 * c2,
1655 3.0 * c1 * c2,
1656 2.0 * c2 * c2,
1657 ]
1658}
1659
1660#[inline]
1661pub fn sextic_qprime_coefficients(c0: f64, c1: f64, c2: f64, c3: f64) -> [f64; 6] {
1662 [
1663 c0 * c1,
1664 1.0 + c1 * c1 + 2.0 * c0 * c2,
1665 3.0 * c0 * c3 + 3.0 * c1 * c2,
1666 4.0 * c1 * c3 + 2.0 * c2 * c2,
1667 5.0 * c2 * c3,
1668 3.0 * c3 * c3,
1669 ]
1670}
1671
1672#[inline]
1677fn moment_boundary_term_with_powers(
1678 cell: DenestedCubicCell,
1679 left_pow_n: f64,
1680 right_pow_n: f64,
1681) -> f64 {
1682 let left_term = if cell.left.is_infinite() {
1683 0.0
1684 } else {
1685 left_pow_n * (-cell.q(cell.left)).exp()
1686 };
1687 let right_term = if cell.right.is_infinite() {
1688 0.0
1689 } else {
1690 right_pow_n * (-cell.q(cell.right)).exp()
1691 };
1692 right_term - left_term
1693}
1694
1695pub fn reduce_quartic_moments(
1696 cell: DenestedCubicCell,
1697 base_m0_m2: [f64; 3],
1698 max_degree: usize,
1699) -> Result<Vec<f64>, String> {
1700 if max_degree <= 2 {
1701 return Ok(base_m0_m2[..=max_degree].to_vec());
1702 }
1703 let d = quartic_qprime_coefficients(cell.c0, cell.c1, cell.c2);
1704 let lead = d[3];
1705 if !lead.is_finite() || lead.abs() <= 1e-18 {
1706 return Err(CubicCellKernelError::invalid_cell_shape(format!(
1707 "quartic moment reduction requires nonzero leading coefficient, got {lead:.3e}"
1708 ))
1709 .into());
1710 }
1711 let mut moments = vec![0.0; max_degree + 1];
1712 moments[0] = base_m0_m2[0];
1713 moments[1] = base_m0_m2[1];
1714 moments[2] = base_m0_m2[2];
1715 let left_finite = cell.left.is_finite();
1720 let right_finite = cell.right.is_finite();
1721 let mut left_pow_n = if left_finite { 1.0 } else { 0.0 };
1722 let mut right_pow_n = if right_finite { 1.0 } else { 0.0 };
1723 for n in 0..=(max_degree - 3) {
1724 let b_n = moment_boundary_term_with_powers(cell, left_pow_n, right_pow_n);
1725 let mut numer = if n == 0 {
1726 0.0
1727 } else {
1728 (n as f64) * moments[n - 1]
1729 };
1730 for j in 0..=2 {
1731 numer -= d[j] * moments[n + j];
1732 }
1733 numer -= b_n;
1734 moments[n + 3] = numer / lead;
1735 if left_finite {
1736 left_pow_n *= cell.left;
1737 }
1738 if right_finite {
1739 right_pow_n *= cell.right;
1740 }
1741 }
1742 Ok(moments)
1743}
1744
1745pub fn reduce_sextic_moments(
1746 cell: DenestedCubicCell,
1747 base_m0_m4: [f64; 5],
1748 max_degree: usize,
1749) -> Result<Vec<f64>, String> {
1750 if max_degree <= 4 {
1751 return Ok(base_m0_m4[..=max_degree].to_vec());
1752 }
1753 let d = sextic_qprime_coefficients(cell.c0, cell.c1, cell.c2, cell.c3);
1754 let lead = d[5];
1755 if !lead.is_finite() {
1756 return Err(CubicCellKernelError::invalid_cell_shape(format!(
1757 "sextic moment reduction encountered non-finite leading coefficient: {lead:.3e}"
1758 ))
1759 .into());
1760 }
1761 if let Some(lower_branch) = degenerate_sextic_branch(cell, lead)? {
1762 if lower_branch == ExactCellBranch::Quartic {
1763 return evaluate_non_affine_cell_state(
1764 DenestedCubicCell { c3: 0.0, ..cell },
1765 ExactCellBranch::Quartic,
1766 max_degree,
1767 )
1768 .map(|state| state.moments.into_vec());
1769 }
1770 return evaluate_affine_cell_state(
1771 DenestedCubicCell {
1772 left: cell.left,
1773 right: cell.right,
1774 c0: cell.c0,
1775 c1: cell.c1,
1776 c2: 0.0,
1777 c3: 0.0,
1778 },
1779 max_degree,
1780 )
1781 .map(|state| state.moments.into_vec());
1782 }
1783 let mut moments = vec![0.0; max_degree + 1];
1784 for (idx, value) in base_m0_m4.into_iter().enumerate() {
1785 moments[idx] = value;
1786 }
1787 let left_finite = cell.left.is_finite();
1788 let right_finite = cell.right.is_finite();
1789 let mut left_pow_n = if left_finite { 1.0 } else { 0.0 };
1790 let mut right_pow_n = if right_finite { 1.0 } else { 0.0 };
1791 for n in 0..=(max_degree - 5) {
1792 let b_n = moment_boundary_term_with_powers(cell, left_pow_n, right_pow_n);
1793 let mut numer = if n == 0 {
1794 0.0
1795 } else {
1796 (n as f64) * moments[n - 1]
1797 };
1798 for j in 0..=4 {
1799 numer -= d[j] * moments[n + j];
1800 }
1801 numer -= b_n;
1802 moments[n + 5] = numer / lead;
1803 if left_finite {
1804 left_pow_n *= cell.left;
1805 }
1806 if right_finite {
1807 right_pow_n *= cell.right;
1808 }
1809 }
1810 Ok(moments)
1811}
1812
1813#[inline]
1814pub fn cell_first_derivative_from_moments(
1815 derivative_coefficients: &[f64],
1816 moments: &[f64],
1817) -> Result<f64, String> {
1818 let value = moment_dot_with_coefficients(derivative_coefficients, moments, "first derivative")?;
1819 Ok(value * INV_TWO_PI)
1820}
1821
1822#[inline]
1830pub fn cell_first_derivative_required_max_degree(derivative_coefficients: &[f64]) -> usize {
1831 derivative_coefficients.len().saturating_sub(1)
1832}
1833
1834#[inline]
1843pub fn cell_second_derivative_required_max_degree(
1844 first_coefficients_r: &[f64],
1845 first_coefficients_s: &[f64],
1846 second_coefficients_rs: &[f64],
1847) -> usize {
1848 let second_degree = second_coefficients_rs.len().saturating_sub(1);
1849 let product_degree = first_coefficients_r.len().saturating_sub(1)
1850 + first_coefficients_s.len().saturating_sub(1)
1851 + 3;
1852 second_degree.max(product_degree)
1853}
1854
1855#[inline]
1856pub fn cell_polynomial_integral_from_moments(
1857 polynomial_coefficients: &[f64],
1858 moments: &[f64],
1859 label: &str,
1860) -> Result<f64, String> {
1861 let value = moment_dot_with_coefficients(polynomial_coefficients, moments, label)?;
1862 Ok(value * INV_TWO_PI)
1863}
1864
1865#[inline]
1866pub fn cell_second_derivative_from_moments(
1867 cell: DenestedCubicCell,
1868 first_coefficients_r: &[f64],
1869 first_coefficients_s: &[f64],
1870 second_coefficients_rs: &[f64],
1871 moments: &[f64],
1872) -> Result<f64, String> {
1873 let second_degree = second_coefficients_rs.len().saturating_sub(1);
1874 let product_degree = first_coefficients_r.len().saturating_sub(1)
1875 + first_coefficients_s.len().saturating_sub(1)
1876 + 3;
1877 let needed = second_degree.max(product_degree) + 1;
1878 if needed > moments.len() {
1879 return Err(CubicCellKernelError::insufficient_moments(format!(
1880 "insufficient reduced moments for second derivative: need {}, have {}",
1881 needed,
1882 moments.len()
1883 ))
1884 .into());
1885 }
1886 let second_term = moment_dot_with_coefficients_unchecked(second_coefficients_rs, moments);
1887 let cubic = [cell.c0, cell.c1, cell.c2, cell.c3];
1894 const SCRATCH: usize = 32;
1898 let mut eta_r = [0.0_f64; SCRATCH];
1899 let mut eta_rs = [0.0_f64; SCRATCH];
1900 let er_len = poly_conv_into(&cubic, first_coefficients_r, &mut eta_r);
1901 let ers_len = poly_conv_into(&eta_r[..er_len], first_coefficients_s, &mut eta_rs);
1902 let mut eta_term = 0.0;
1903 for k in 0..ers_len {
1904 eta_term = eta_rs[k].mul_add(moments[k], eta_term);
1905 }
1906 Ok((second_term - eta_term) * INV_TWO_PI)
1907}
1908
1909#[inline]
1929pub fn cell_second_derivative_boundary_integrand(
1930 cell: DenestedCubicCell,
1931 first_coefficients_r: &[f64],
1932 first_coefficients_s: &[f64],
1933 second_coefficients_rs: &[f64],
1934 z: f64,
1935) -> f64 {
1936 let eta = cell.eta(z);
1937 let c_r = poly_eval_at(first_coefficients_r, z);
1938 let c_s = poly_eval_at(first_coefficients_s, z);
1939 let c_rs = poly_eval_at(second_coefficients_rs, z);
1940 (c_rs - eta * c_r * c_s) * (-cell.q(z)).exp() * INV_TWO_PI
1941}
1942
1943pub fn cell_density_boundary_integrand(cell: DenestedCubicCell, g: &[f64], z: f64) -> f64 {
1957 poly_eval_at(g, z) * (-cell.q(z)).exp() * INV_TWO_PI
1958}
1959
1960#[inline]
1962fn poly_eval_at(coefficients: &[f64], z: f64) -> f64 {
1963 let mut acc = 0.0_f64;
1964 for &c in coefficients.iter().rev() {
1965 acc = acc.mul_add(z, c);
1966 }
1967 acc
1968}
1969
1970#[inline]
1971fn moment_dot_with_coefficients(
1972 coefficients: &[f64],
1973 moments: &[f64],
1974 label: &str,
1975) -> Result<f64, String> {
1976 if coefficients.len() > moments.len() {
1977 return Err(CubicCellKernelError::insufficient_moments(format!(
1978 "insufficient reduced moments for {label}: need {}, have {}",
1979 coefficients.len(),
1980 moments.len()
1981 ))
1982 .into());
1983 }
1984 Ok(moment_dot_with_coefficients_unchecked(
1985 coefficients,
1986 moments,
1987 ))
1988}
1989
1990#[inline]
1991fn moment_dot_with_coefficients_unchecked(coefficients: &[f64], moments: &[f64]) -> f64 {
1992 let mut acc = 0.0;
1993 for (idx, &coeff) in coefficients.iter().enumerate() {
1994 acc = coeff.mul_add(moments[idx], acc);
1995 }
1996 acc
1997}
1998
1999#[inline]
2009fn poly_conv_into(lhs: &[f64], rhs: &[f64], out: &mut [f64]) -> usize {
2010 if lhs.is_empty() || rhs.is_empty() {
2011 return 0;
2012 }
2013 let len = lhs.len() + rhs.len() - 1;
2014 assert!(out.len() >= len);
2015 for slot in out[..len].iter_mut() {
2016 *slot = 0.0;
2017 }
2018 for (i, &lv) in lhs.iter().enumerate() {
2019 for (j, &rv) in rhs.iter().enumerate() {
2020 out[i + j] = lv.mul_add(rv, out[i + j]);
2021 }
2022 }
2023 len
2024}
2025
2026#[inline]
2027fn require_moments_degree(
2028 required_degree: usize,
2029 moments: &[f64],
2030 label: &str,
2031) -> Result<(), String> {
2032 if required_degree >= moments.len() {
2033 return Err(CubicCellKernelError::insufficient_moments(format!(
2034 "insufficient reduced moments for {label}: need {}, have {}",
2035 required_degree + 1,
2036 moments.len()
2037 ))
2038 .into());
2039 }
2040 Ok::<(), _>(())
2041}
2042
2043#[inline]
2044fn require_scratch_capacity(
2045 required_len: usize,
2046 capacity: usize,
2047 label: &str,
2048) -> Result<(), String> {
2049 if required_len > capacity {
2050 return Err(CubicCellKernelError::insufficient_moments(format!(
2051 "{label} polynomial convolution scratch too small: need {required_len}, have {capacity}"
2052 ))
2053 .into());
2054 }
2055 Ok::<(), _>(())
2056}
2057
2058#[inline]
2059fn convolution_chain_len(lengths: &[usize]) -> usize {
2060 if lengths.is_empty() || lengths.contains(&0) {
2061 0
2062 } else {
2063 lengths.iter().sum::<usize>() - (lengths.len() - 1)
2064 }
2065}
2066
2067#[inline]
2068fn first_coefficients_degree(label: &str, coefficients: &[f64]) -> Result<usize, String> {
2069 coefficients
2070 .len()
2071 .checked_sub(1)
2072 .ok_or_else(|| format!("{label} first-derivative coefficients must be non-empty"))
2073}
2074
2075#[inline]
2076pub fn cell_third_derivative_from_moments(
2077 cell: DenestedCubicCell,
2078 first_coefficients_r: &[f64],
2079 first_coefficients_s: &[f64],
2080 first_coefficients_t: &[f64],
2081 second_coefficients_rs: &[f64],
2082 second_coefficients_rt: &[f64],
2083 second_coefficients_st: &[f64],
2084 third_coefficients_rst: &[f64],
2085 moments: &[f64],
2086) -> Result<f64, String> {
2087 let eta = [cell.c0, cell.c1, cell.c2, cell.c3];
2088 let r_degree = first_coefficients_degree("r", first_coefficients_r)?;
2089 let s_degree = first_coefficients_degree("s", first_coefficients_s)?;
2090 let t_degree = first_coefficients_degree("t", first_coefficients_t)?;
2091 let second_sum_degree = [
2092 second_coefficients_rs.len() + first_coefficients_t.len(),
2093 second_coefficients_rt.len() + first_coefficients_s.len(),
2094 second_coefficients_st.len() + first_coefficients_r.len(),
2095 ]
2096 .into_iter()
2097 .max()
2098 .unwrap_or(0)
2099 .saturating_sub(1);
2100 let triple_product_degree = r_degree + s_degree + t_degree;
2101 let needed = (third_coefficients_rst.len().saturating_sub(1))
2102 .max(3 + second_sum_degree)
2103 .max(6 + triple_product_degree);
2104 require_moments_degree(needed, moments, "third derivative")?;
2105
2106 let third_term = moment_dot_with_coefficients_unchecked(third_coefficients_rst, moments);
2107
2108 const SCRATCH: usize = 32;
2112 let max_linear_conv_len = [
2113 convolution_chain_len(&[
2114 eta.len(),
2115 second_coefficients_rs.len(),
2116 first_coefficients_t.len(),
2117 ]),
2118 convolution_chain_len(&[
2119 eta.len(),
2120 second_coefficients_rt.len(),
2121 first_coefficients_s.len(),
2122 ]),
2123 convolution_chain_len(&[
2124 eta.len(),
2125 second_coefficients_st.len(),
2126 first_coefficients_r.len(),
2127 ]),
2128 ]
2129 .into_iter()
2130 .max()
2131 .unwrap_or(0);
2132 let max_cubic_conv_len = convolution_chain_len(&[
2133 7,
2134 first_coefficients_r.len(),
2135 first_coefficients_s.len(),
2136 first_coefficients_t.len(),
2137 ]);
2138 require_scratch_capacity(
2139 max_linear_conv_len.max(max_cubic_conv_len),
2140 SCRATCH,
2141 "third derivative",
2142 )?;
2143 let mut buf_a = [0.0_f64; SCRATCH];
2144 let mut buf_b = [0.0_f64; SCRATCH];
2145
2146 let mut eta_second_term = 0.0;
2149 let conv_dot = |first: &[f64],
2150 second: &[f64],
2151 buf_a: &mut [f64; SCRATCH],
2152 buf_b: &mut [f64; SCRATCH]|
2153 -> f64 {
2154 let m = poly_conv_into(first, second, buf_a);
2155 let n = poly_conv_into(&eta, &buf_a[..m], buf_b);
2156 let mut acc = 0.0;
2157 for k in 0..n {
2158 acc = buf_b[k].mul_add(moments[k], acc);
2159 }
2160 acc
2161 };
2162 eta_second_term += conv_dot(
2163 second_coefficients_rs,
2164 first_coefficients_t,
2165 &mut buf_a,
2166 &mut buf_b,
2167 );
2168 eta_second_term += conv_dot(
2169 second_coefficients_rt,
2170 first_coefficients_s,
2171 &mut buf_a,
2172 &mut buf_b,
2173 );
2174 eta_second_term += conv_dot(
2175 second_coefficients_st,
2176 first_coefficients_r,
2177 &mut buf_a,
2178 &mut buf_b,
2179 );
2180
2181 let mut eta_sq_minus_one = [0.0_f64; 7];
2184 for (i, &eta_i) in eta.iter().enumerate() {
2185 for (j, &eta_j) in eta.iter().enumerate() {
2186 eta_sq_minus_one[i + j] = eta_i.mul_add(eta_j, eta_sq_minus_one[i + j]);
2187 }
2188 }
2189 eta_sq_minus_one[0] -= 1.0;
2190
2191 let rs_len = poly_conv_into(first_coefficients_r, first_coefficients_s, &mut buf_a);
2192 let rst_len = poly_conv_into(&buf_a[..rs_len], first_coefficients_t, &mut buf_b);
2193 let final_len = poly_conv_into(&eta_sq_minus_one, &buf_b[..rst_len], &mut buf_a);
2195 let mut cubic_coeff_term = 0.0;
2196 for k in 0..final_len {
2197 cubic_coeff_term = buf_a[k].mul_add(moments[k], cubic_coeff_term);
2198 }
2199
2200 Ok((third_term - eta_second_term + cubic_coeff_term) * INV_TWO_PI)
2201}
2202
2203#[inline]
2204pub fn cell_fourth_derivative_from_moments(
2205 cell: DenestedCubicCell,
2206 first_coefficients_r: &[f64],
2207 first_coefficients_s: &[f64],
2208 first_coefficients_t: &[f64],
2209 first_coefficients_u: &[f64],
2210 second_coefficients_rs: &[f64],
2211 second_coefficients_rt: &[f64],
2212 second_coefficients_ru: &[f64],
2213 second_coefficients_st: &[f64],
2214 second_coefficients_su: &[f64],
2215 second_coefficients_tu: &[f64],
2216 third_coefficients_rst: &[f64],
2217 third_coefficients_rsu: &[f64],
2218 third_coefficients_rtu: &[f64],
2219 third_coefficients_stu: &[f64],
2220 fourth_coefficients_rstu: &[f64],
2221 moments: &[f64],
2222) -> Result<f64, String> {
2223 let eta = [cell.c0, cell.c1, cell.c2, cell.c3];
2224 let r_degree = first_coefficients_degree("r", first_coefficients_r)?;
2225 let s_degree = first_coefficients_degree("s", first_coefficients_s)?;
2226 let t_degree = first_coefficients_degree("t", first_coefficients_t)?;
2227 let u_degree = first_coefficients_degree("u", first_coefficients_u)?;
2228 let linear_sum_degree = [
2229 third_coefficients_rst.len() + first_coefficients_u.len(),
2230 third_coefficients_rsu.len() + first_coefficients_t.len(),
2231 third_coefficients_rtu.len() + first_coefficients_s.len(),
2232 third_coefficients_stu.len() + first_coefficients_r.len(),
2233 second_coefficients_rs.len() + second_coefficients_tu.len(),
2234 second_coefficients_rt.len() + second_coefficients_su.len(),
2235 second_coefficients_ru.len() + second_coefficients_st.len(),
2236 ]
2237 .into_iter()
2238 .max()
2239 .unwrap_or(0)
2240 .saturating_sub(1);
2241 let quad_sum_degree = [
2242 second_coefficients_rs.len() + first_coefficients_t.len() + first_coefficients_u.len(),
2243 second_coefficients_rt.len() + first_coefficients_s.len() + first_coefficients_u.len(),
2244 second_coefficients_ru.len() + first_coefficients_s.len() + first_coefficients_t.len(),
2245 second_coefficients_st.len() + first_coefficients_r.len() + first_coefficients_u.len(),
2246 second_coefficients_su.len() + first_coefficients_r.len() + first_coefficients_t.len(),
2247 second_coefficients_tu.len() + first_coefficients_r.len() + first_coefficients_s.len(),
2248 ]
2249 .into_iter()
2250 .max()
2251 .unwrap_or(0)
2252 .saturating_sub(2);
2253 let quartic_product_degree = r_degree + s_degree + t_degree + u_degree;
2254 let needed = (fourth_coefficients_rstu.len().saturating_sub(1))
2255 .max(3 + linear_sum_degree)
2256 .max(6 + quad_sum_degree)
2257 .max(9 + quartic_product_degree);
2258 require_moments_degree(needed, moments, "fourth derivative")?;
2259
2260 let fourth_term = moment_dot_with_coefficients_unchecked(fourth_coefficients_rstu, moments);
2261
2262 const SCRATCH: usize = 32;
2266 let max_linear_conv_len = [
2267 convolution_chain_len(&[
2268 eta.len(),
2269 third_coefficients_rst.len(),
2270 first_coefficients_u.len(),
2271 ]),
2272 convolution_chain_len(&[
2273 eta.len(),
2274 third_coefficients_rsu.len(),
2275 first_coefficients_t.len(),
2276 ]),
2277 convolution_chain_len(&[
2278 eta.len(),
2279 third_coefficients_rtu.len(),
2280 first_coefficients_s.len(),
2281 ]),
2282 convolution_chain_len(&[
2283 eta.len(),
2284 third_coefficients_stu.len(),
2285 first_coefficients_r.len(),
2286 ]),
2287 convolution_chain_len(&[
2288 eta.len(),
2289 second_coefficients_rs.len(),
2290 second_coefficients_tu.len(),
2291 ]),
2292 convolution_chain_len(&[
2293 eta.len(),
2294 second_coefficients_rt.len(),
2295 second_coefficients_su.len(),
2296 ]),
2297 convolution_chain_len(&[
2298 eta.len(),
2299 second_coefficients_ru.len(),
2300 second_coefficients_st.len(),
2301 ]),
2302 ]
2303 .into_iter()
2304 .max()
2305 .unwrap_or(0);
2306 let max_quad_conv_len = [
2307 convolution_chain_len(&[
2308 7,
2309 second_coefficients_rs.len(),
2310 first_coefficients_t.len(),
2311 first_coefficients_u.len(),
2312 ]),
2313 convolution_chain_len(&[
2314 7,
2315 second_coefficients_rt.len(),
2316 first_coefficients_s.len(),
2317 first_coefficients_u.len(),
2318 ]),
2319 convolution_chain_len(&[
2320 7,
2321 second_coefficients_ru.len(),
2322 first_coefficients_s.len(),
2323 first_coefficients_t.len(),
2324 ]),
2325 convolution_chain_len(&[
2326 7,
2327 second_coefficients_st.len(),
2328 first_coefficients_r.len(),
2329 first_coefficients_u.len(),
2330 ]),
2331 convolution_chain_len(&[
2332 7,
2333 second_coefficients_su.len(),
2334 first_coefficients_r.len(),
2335 first_coefficients_t.len(),
2336 ]),
2337 convolution_chain_len(&[
2338 7,
2339 second_coefficients_tu.len(),
2340 first_coefficients_r.len(),
2341 first_coefficients_s.len(),
2342 ]),
2343 ]
2344 .into_iter()
2345 .max()
2346 .unwrap_or(0);
2347 let max_quartic_conv_len = convolution_chain_len(&[
2348 10,
2349 first_coefficients_r.len(),
2350 first_coefficients_s.len(),
2351 first_coefficients_t.len(),
2352 first_coefficients_u.len(),
2353 ]);
2354 require_scratch_capacity(
2355 max_linear_conv_len
2356 .max(max_quad_conv_len)
2357 .max(max_quartic_conv_len),
2358 SCRATCH,
2359 "fourth derivative",
2360 )?;
2361 let mut buf_a = [0.0_f64; SCRATCH];
2362 let mut buf_b = [0.0_f64; SCRATCH];
2363
2364 let conv_eta_dot = |first: &[f64],
2368 second: &[f64],
2369 buf_a: &mut [f64; SCRATCH],
2370 buf_b: &mut [f64; SCRATCH]|
2371 -> f64 {
2372 let m = poly_conv_into(first, second, buf_a);
2373 let n = poly_conv_into(&eta, &buf_a[..m], buf_b);
2374 let mut acc = 0.0;
2375 for k in 0..n {
2376 acc = buf_b[k].mul_add(moments[k], acc);
2377 }
2378 acc
2379 };
2380 let mut eta_linear_term = 0.0;
2381 eta_linear_term += conv_eta_dot(
2382 third_coefficients_rst,
2383 first_coefficients_u,
2384 &mut buf_a,
2385 &mut buf_b,
2386 );
2387 eta_linear_term += conv_eta_dot(
2388 third_coefficients_rsu,
2389 first_coefficients_t,
2390 &mut buf_a,
2391 &mut buf_b,
2392 );
2393 eta_linear_term += conv_eta_dot(
2394 third_coefficients_rtu,
2395 first_coefficients_s,
2396 &mut buf_a,
2397 &mut buf_b,
2398 );
2399 eta_linear_term += conv_eta_dot(
2400 third_coefficients_stu,
2401 first_coefficients_r,
2402 &mut buf_a,
2403 &mut buf_b,
2404 );
2405 eta_linear_term += conv_eta_dot(
2406 second_coefficients_rs,
2407 second_coefficients_tu,
2408 &mut buf_a,
2409 &mut buf_b,
2410 );
2411 eta_linear_term += conv_eta_dot(
2412 second_coefficients_rt,
2413 second_coefficients_su,
2414 &mut buf_a,
2415 &mut buf_b,
2416 );
2417 eta_linear_term += conv_eta_dot(
2418 second_coefficients_ru,
2419 second_coefficients_st,
2420 &mut buf_a,
2421 &mut buf_b,
2422 );
2423
2424 let mut eta_sq_minus_one = [0.0_f64; 7];
2425 for (i, &eta_i) in eta.iter().enumerate() {
2426 for (j, &eta_j) in eta.iter().enumerate() {
2427 eta_sq_minus_one[i + j] = eta_i.mul_add(eta_j, eta_sq_minus_one[i + j]);
2428 }
2429 }
2430 eta_sq_minus_one[0] -= 1.0;
2431
2432 let mut buf_c = [0.0_f64; SCRATCH];
2435 let conv_weighted_triple_dot = |weight: &[f64],
2436 a: &[f64],
2437 b: &[f64],
2438 c: &[f64],
2439 buf_a: &mut [f64; SCRATCH],
2440 buf_b: &mut [f64; SCRATCH],
2441 buf_c: &mut [f64; SCRATCH]|
2442 -> f64 {
2443 let ab_len = poly_conv_into(a, b, buf_a);
2444 let abc_len = poly_conv_into(&buf_a[..ab_len], c, buf_b);
2445 let final_len = poly_conv_into(weight, &buf_b[..abc_len], buf_c);
2446 let mut acc = 0.0;
2447 for k in 0..final_len {
2448 acc = buf_c[k].mul_add(moments[k], acc);
2449 }
2450 acc
2451 };
2452 let mut quad_coeff_term = 0.0;
2453 quad_coeff_term += conv_weighted_triple_dot(
2454 &eta_sq_minus_one,
2455 second_coefficients_rs,
2456 first_coefficients_t,
2457 first_coefficients_u,
2458 &mut buf_a,
2459 &mut buf_b,
2460 &mut buf_c,
2461 );
2462 quad_coeff_term += conv_weighted_triple_dot(
2463 &eta_sq_minus_one,
2464 second_coefficients_rt,
2465 first_coefficients_s,
2466 first_coefficients_u,
2467 &mut buf_a,
2468 &mut buf_b,
2469 &mut buf_c,
2470 );
2471 quad_coeff_term += conv_weighted_triple_dot(
2472 &eta_sq_minus_one,
2473 second_coefficients_ru,
2474 first_coefficients_s,
2475 first_coefficients_t,
2476 &mut buf_a,
2477 &mut buf_b,
2478 &mut buf_c,
2479 );
2480 quad_coeff_term += conv_weighted_triple_dot(
2481 &eta_sq_minus_one,
2482 second_coefficients_st,
2483 first_coefficients_r,
2484 first_coefficients_u,
2485 &mut buf_a,
2486 &mut buf_b,
2487 &mut buf_c,
2488 );
2489 quad_coeff_term += conv_weighted_triple_dot(
2490 &eta_sq_minus_one,
2491 second_coefficients_su,
2492 first_coefficients_r,
2493 first_coefficients_t,
2494 &mut buf_a,
2495 &mut buf_b,
2496 &mut buf_c,
2497 );
2498 quad_coeff_term += conv_weighted_triple_dot(
2499 &eta_sq_minus_one,
2500 second_coefficients_tu,
2501 first_coefficients_r,
2502 first_coefficients_s,
2503 &mut buf_a,
2504 &mut buf_b,
2505 &mut buf_c,
2506 );
2507
2508 let mut eta_sq = [0.0_f64; 7];
2511 for (i, &eta_i) in eta.iter().enumerate() {
2512 for (j, &eta_j) in eta.iter().enumerate() {
2513 eta_sq[i + j] = eta_i.mul_add(eta_j, eta_sq[i + j]);
2514 }
2515 }
2516 let mut cubic_weight = [0.0_f64; 10];
2517 for (i, &eta_sq_i) in eta_sq.iter().enumerate() {
2518 for (j, &eta_j) in eta.iter().enumerate() {
2519 cubic_weight[i + j] = (-eta_sq_i).mul_add(eta_j, cubic_weight[i + j]);
2520 }
2521 }
2522 for (idx, &eta_coeff) in eta.iter().enumerate() {
2523 cubic_weight[idx] += 3.0 * eta_coeff;
2524 }
2525
2526 let rs_len = poly_conv_into(first_coefficients_r, first_coefficients_s, &mut buf_a);
2531 let rst_len = poly_conv_into(&buf_a[..rs_len], first_coefficients_t, &mut buf_b);
2532 let rstu_len = poly_conv_into(&buf_b[..rst_len], first_coefficients_u, &mut buf_a);
2533 let final_len = poly_conv_into(&cubic_weight, &buf_a[..rstu_len], &mut buf_b);
2534 let mut quartic_coeff_term = 0.0;
2535 for k in 0..final_len {
2536 quartic_coeff_term = buf_b[k].mul_add(moments[k], quartic_coeff_term);
2537 }
2538
2539 Ok((fourth_term - eta_linear_term + quad_coeff_term + quartic_coeff_term) * INV_TWO_PI)
2540}
2541
2542#[inline]
2543pub fn global_cubic_from_local(span: LocalSpanCubic) -> (f64, f64, f64, f64) {
2544 let left = span.left;
2545 let q0 = span.c0 - span.c1 * left + span.c2 * left * left - span.c3 * left * left * left;
2546 let q1 = span.c1 - 2.0 * span.c2 * left + 3.0 * span.c3 * left * left;
2547 let q2 = span.c2 - 3.0 * span.c3 * left;
2548 let q3 = span.c3;
2549 (q0, q1, q2, q3)
2550}
2551
2552#[inline]
2576pub fn transformed_link_cubic(link_span: LocalSpanCubic, a: f64, b: f64) -> (f64, f64, f64, f64) {
2577 let shift = a - link_span.left;
2578 let d0 = link_span.c0
2579 + link_span.c1 * shift
2580 + link_span.c2 * shift * shift
2581 + link_span.c3 * shift * shift * shift;
2582 let d1 = b * (link_span.c1 + 2.0 * link_span.c2 * shift + 3.0 * link_span.c3 * shift * shift);
2583 let d2 = b * b * (link_span.c2 + 3.0 * link_span.c3 * shift);
2584 let d3 = link_span.c3 * b * b * b;
2585 (d0, d1, d2, d3)
2586}
2587
2588#[inline]
2589pub fn denested_cell_coefficients(
2590 score_span: LocalSpanCubic,
2591 link_span: LocalSpanCubic,
2592 a: f64,
2593 b: f64,
2594) -> [f64; 4] {
2595 let (h0, h1, h2, h3) = global_cubic_from_local(score_span);
2596 let (d0, d1, d2, d3) = transformed_link_cubic(link_span, a, b);
2597 [a + b * h0 + d0, b + b * h1 + d1, b * h2 + d2, b * h3 + d3]
2598}
2599
2600#[inline]
2601pub fn denested_cell_coefficient_partials(
2602 score_span: LocalSpanCubic,
2603 link_span: LocalSpanCubic,
2604 a: f64,
2605 b: f64,
2606) -> ([f64; 4], [f64; 4]) {
2607 let (h0, h1, h2, h3) = global_cubic_from_local(score_span);
2608 let shift = a - link_span.left;
2609 let alpha1 = link_span.c1;
2610 let alpha2 = link_span.c2;
2611 let alpha3 = link_span.c3;
2612 let dc_da = [
2613 1.0 + alpha1 + 2.0 * alpha2 * shift + 3.0 * alpha3 * shift * shift,
2614 b * (2.0 * alpha2 + 6.0 * alpha3 * shift),
2615 3.0 * alpha3 * b * b,
2616 0.0,
2617 ];
2618 let dc_db = [
2619 h0,
2620 1.0 + h1 + alpha1 + 2.0 * alpha2 * shift + 3.0 * alpha3 * shift * shift,
2621 h2 + 2.0 * b * (alpha2 + 3.0 * alpha3 * shift),
2622 h3 + 3.0 * alpha3 * b * b,
2623 ];
2624 (dc_da, dc_db)
2625}
2626
2627#[inline]
2628fn link_cubic_second_partials(
2629 link_span: LocalSpanCubic,
2630 a: f64,
2631 b: f64,
2632) -> ([f64; 4], [f64; 4], [f64; 4]) {
2633 let shift = a - link_span.left;
2634 let alpha2 = link_span.c2;
2635 let alpha3 = link_span.c3;
2636 let dc_daa = [
2637 2.0 * alpha2 + 6.0 * alpha3 * shift,
2638 6.0 * alpha3 * b,
2639 0.0,
2640 0.0,
2641 ];
2642 let dc_dab = [
2643 0.0,
2644 2.0 * alpha2 + 6.0 * alpha3 * shift,
2645 6.0 * alpha3 * b,
2646 0.0,
2647 ];
2648 let dc_dbb = [
2649 0.0,
2650 0.0,
2651 2.0 * (alpha2 + 3.0 * alpha3 * shift),
2652 6.0 * alpha3 * b,
2653 ];
2654 (dc_daa, dc_dab, dc_dbb)
2655}
2656
2657#[inline]
2658pub fn denested_cell_second_partials(
2659 score_span: LocalSpanCubic,
2660 link_span: LocalSpanCubic,
2661 a: f64,
2662 b: f64,
2663) -> ([f64; 4], [f64; 4], [f64; 4]) {
2664 let score_left = score_span.left;
2665 if !score_left.is_finite() {
2666 return ([f64::NAN; 4], [f64::NAN; 4], [f64::NAN; 4]);
2667 }
2668 link_cubic_second_partials(link_span, a, b)
2669}
2670
2671#[inline]
2672fn link_cubic_third_partials(
2673 link_span: LocalSpanCubic,
2674) -> ([f64; 4], [f64; 4], [f64; 4], [f64; 4]) {
2675 let alpha3 = link_span.c3;
2676 (
2677 [6.0 * alpha3, 0.0, 0.0, 0.0],
2678 [0.0, 6.0 * alpha3, 0.0, 0.0],
2679 [0.0, 0.0, 6.0 * alpha3, 0.0],
2680 [0.0, 0.0, 0.0, 6.0 * alpha3],
2681 )
2682}
2683
2684#[inline]
2685pub fn denested_cell_third_partials(
2686 link_span: LocalSpanCubic,
2687) -> ([f64; 4], [f64; 4], [f64; 4], [f64; 4]) {
2688 link_cubic_third_partials(link_span)
2689}
2690
2691#[inline]
2692pub fn score_basis_cell_coefficients(score_basis_span: LocalSpanCubic, b: f64) -> [f64; 4] {
2693 let (h0, h1, h2, h3) = global_cubic_from_local(score_basis_span);
2694 [b * h0, b * h1, b * h2, b * h3]
2695}
2696
2697#[inline]
2698pub fn link_basis_cell_coefficients(link_basis_span: LocalSpanCubic, a: f64, b: f64) -> [f64; 4] {
2699 let (d0, d1, d2, d3) = transformed_link_cubic(link_basis_span, a, b);
2700 [d0, d1, d2, d3]
2701}
2702
2703#[inline]
2704pub fn link_basis_cell_coefficient_partials(
2705 link_basis_span: LocalSpanCubic,
2706 a: f64,
2707 b: f64,
2708) -> ([f64; 4], [f64; 4]) {
2709 let shift = a - link_basis_span.left;
2710 let alpha1 = link_basis_span.c1;
2711 let alpha2 = link_basis_span.c2;
2712 let alpha3 = link_basis_span.c3;
2713 let dc_da = [
2714 alpha1 + 2.0 * alpha2 * shift + 3.0 * alpha3 * shift * shift,
2715 b * (2.0 * alpha2 + 6.0 * alpha3 * shift),
2716 3.0 * alpha3 * b * b,
2717 0.0,
2718 ];
2719 let dc_db = [
2720 0.0,
2721 alpha1 + 2.0 * alpha2 * shift + 3.0 * alpha3 * shift * shift,
2722 2.0 * b * (alpha2 + 3.0 * alpha3 * shift),
2723 3.0 * alpha3 * b * b,
2724 ];
2725 (dc_da, dc_db)
2726}
2727
2728#[inline]
2729pub fn link_basis_cell_second_partials(
2730 link_basis_span: LocalSpanCubic,
2731 a: f64,
2732 b: f64,
2733) -> ([f64; 4], [f64; 4], [f64; 4]) {
2734 link_cubic_second_partials(link_basis_span, a, b)
2735}
2736
2737#[inline]
2738pub fn link_basis_cell_third_partials(
2739 link_basis_span: LocalSpanCubic,
2740) -> ([f64; 4], [f64; 4], [f64; 4], [f64; 4]) {
2741 link_cubic_third_partials(link_basis_span)
2742}
2743
2744pub fn build_denested_partition_cells<FS, FL>(
2745 a: f64,
2746 b: f64,
2747 score_breaks: &[f64],
2748 link_breaks: &[f64],
2749 score_span_at: FS,
2750 link_span_at: FL,
2751) -> Result<Vec<DenestedPartitionCell>, String>
2752where
2753 FS: FnMut(f64) -> Result<LocalSpanCubic, String>,
2754 FL: FnMut(f64) -> Result<LocalSpanCubic, String>,
2755{
2756 build_denested_partition_cells_with_tails(
2757 a,
2758 b,
2759 score_breaks,
2760 link_breaks,
2761 score_span_at,
2762 link_span_at,
2763 )
2764}
2765
2766pub fn build_denested_partition_cells_with_tails<FS, FL>(
2775 a: f64,
2776 b: f64,
2777 score_breaks: &[f64],
2778 link_breaks: &[f64],
2779 mut score_span_at: FS,
2780 mut link_span_at: FL,
2781) -> Result<Vec<DenestedPartitionCell>, String>
2782where
2783 FS: FnMut(f64) -> Result<LocalSpanCubic, String>,
2784 FL: FnMut(f64) -> Result<LocalSpanCubic, String>,
2785{
2786 let mut split_points: Vec<(f64, PartitionEdge)> = score_breaks
2791 .iter()
2792 .map(|&sigma| (sigma, PartitionEdge::Fixed(sigma)))
2793 .collect();
2794 if b.abs() > 1e-12 {
2795 for &tau in link_breaks {
2796 let z = (tau - a) / b;
2797 if z.is_finite() {
2798 split_points.push((z, PartitionEdge::Crossing { tau }));
2799 }
2800 }
2801 }
2802 dedup_sorted_tagged_breakpoints(&mut split_points);
2803
2804 let mut out = Vec::new();
2805
2806 if split_points.is_empty() {
2807 let score_span = score_span_at(0.0)?;
2808 let link_span = link_span_at(a)?;
2809 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
2810 return Ok(vec![DenestedPartitionCell {
2811 cell: DenestedCubicCell {
2812 left: f64::NEG_INFINITY,
2813 right: f64::INFINITY,
2814 c0: coeffs[0],
2815 c1: coeffs[1],
2816 c2: 0.0,
2817 c3: 0.0,
2818 },
2819 score_span,
2820 link_span,
2821 left_edge: PartitionEdge::Fixed(f64::NEG_INFINITY),
2822 right_edge: PartitionEdge::Fixed(f64::INFINITY),
2823 }]);
2824 }
2825
2826 let (leftmost, leftmost_edge) = split_points[0];
2828 let left_probe = interval_probe_point(f64::NEG_INFINITY, leftmost)?;
2831 let left_score_span = score_span_at(left_probe)?;
2832 let left_link_span = link_span_at(a + b * left_probe)?;
2833 let left_coeffs = denested_cell_coefficients(left_score_span, left_link_span, a, b);
2834 if left_coeffs[2].abs() > NORMALIZED_CELL_BRANCH_TOL
2835 || left_coeffs[3].abs() > NORMALIZED_CELL_BRANCH_TOL
2836 {
2837 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2838 "left tail cell must be affine (deviations constant outside support), \
2839 got c2={:.3e}, c3={:.3e}",
2840 left_coeffs[2], left_coeffs[3]
2841 ))
2842 .into());
2843 }
2844 out.push(DenestedPartitionCell {
2845 cell: DenestedCubicCell {
2846 left: f64::NEG_INFINITY,
2847 right: leftmost,
2848 c0: left_coeffs[0],
2849 c1: left_coeffs[1],
2850 c2: 0.0,
2851 c3: 0.0,
2852 },
2853 score_span: left_score_span,
2854 link_span: left_link_span,
2855 left_edge: PartitionEdge::Fixed(f64::NEG_INFINITY),
2856 right_edge: leftmost_edge,
2857 });
2858
2859 for window in split_points.windows(2) {
2861 let (left, left_edge) = window[0];
2862 let (right, right_edge) = window[1];
2863 if !left.is_finite() || !right.is_finite() || right - left <= 1e-12 {
2864 continue;
2865 }
2866 let mid = interval_probe_point(left, right)?;
2867 let score_span = score_span_at(mid)?;
2868 let link_span = link_span_at(a + b * mid)?;
2869 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
2870 out.push(DenestedPartitionCell {
2871 cell: DenestedCubicCell {
2872 left,
2873 right,
2874 c0: coeffs[0],
2875 c1: coeffs[1],
2876 c2: coeffs[2],
2877 c3: coeffs[3],
2878 },
2879 score_span,
2880 link_span,
2881 left_edge,
2882 right_edge,
2883 });
2884 }
2885
2886 let (rightmost, rightmost_edge) = *split_points.last().unwrap();
2888 let right_probe = interval_probe_point(rightmost, f64::INFINITY)?;
2889 let right_score_span = score_span_at(right_probe)?;
2890 let right_link_span = link_span_at(a + b * right_probe)?;
2891 let right_coeffs = denested_cell_coefficients(right_score_span, right_link_span, a, b);
2892 if right_coeffs[2].abs() > NORMALIZED_CELL_BRANCH_TOL
2893 || right_coeffs[3].abs() > NORMALIZED_CELL_BRANCH_TOL
2894 {
2895 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2896 "right tail cell must be affine (deviations constant outside support), \
2897 got c2={:.3e}, c3={:.3e}",
2898 right_coeffs[2], right_coeffs[3]
2899 ))
2900 .into());
2901 }
2902 out.push(DenestedPartitionCell {
2903 cell: DenestedCubicCell {
2904 left: rightmost,
2905 right: f64::INFINITY,
2906 c0: right_coeffs[0],
2907 c1: right_coeffs[1],
2908 c2: 0.0,
2909 c3: 0.0,
2910 },
2911 score_span: right_score_span,
2912 link_span: right_link_span,
2913 left_edge: rightmost_edge,
2914 right_edge: PartitionEdge::Fixed(f64::INFINITY),
2915 });
2916
2917 Ok(out)
2918}
2919
2920#[inline]
2921pub fn normalized_non_affine_coefficients(
2922 left: f64,
2923 right: f64,
2924 c0: f64,
2925 c1: f64,
2926 c2: f64,
2927 c3: f64,
2928) -> Result<(f64, f64), String> {
2929 let width = right - left;
2930 if !width.is_finite() || width <= 0.0 {
2931 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2932 "normalized cubic coefficients require a positive finite cell width, got left={left}, right={right}"
2933 ))
2934 .into());
2935 }
2936 let anchor_scale = c0.abs() + c1.abs();
2937 if !anchor_scale.is_finite() {
2938 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2939 "normalized cubic coefficients require finite affine coefficients, got c0={c0}, c1={c1}"
2940 ))
2941 .into());
2942 }
2943 let mid = 0.5 * (left + right);
2944 let half = 0.5 * width;
2945 let k2 = half * half * (c2 + 3.0 * c3 * mid);
2946 let k3 = c3 * half * half * half;
2947 Ok((k2, k3))
2948}
2949
2950#[inline]
2951pub fn branch_cell(cell: DenestedCubicCell) -> Result<ExactCellBranch, String> {
2952 let tol = effective_branch_tol(cell);
2953 if !cell.left.is_finite() || !cell.right.is_finite() {
2954 if cell.c2.abs() <= tol && cell.c3.abs() <= tol {
2955 return Ok(ExactCellBranch::Affine);
2956 }
2957 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2958 "non-affine cells require finite bounds, got [{}, {}] with c2={:.6e}, c3={:.6e}",
2959 cell.left, cell.right, cell.c2, cell.c3
2960 ))
2961 .into());
2962 }
2963 let (k2, k3) = normalized_non_affine_coefficients(
2964 cell.left, cell.right, cell.c0, cell.c1, cell.c2, cell.c3,
2965 )?;
2966 if k2.abs() <= tol && k3.abs() <= tol {
2967 Ok(ExactCellBranch::Affine)
2968 } else if k3.abs() <= tol {
2969 Ok(ExactCellBranch::Quartic)
2970 } else {
2971 Ok(ExactCellBranch::Sextic)
2972 }
2973}
2974
2975#[inline]
2976fn degenerate_sextic_branch(
2977 cell: DenestedCubicCell,
2978 lead: f64,
2979) -> Result<Option<ExactCellBranch>, String> {
2980 let (normalized_k2, normalized_k3) = normalized_non_affine_coefficients(
2984 cell.left, cell.right, cell.c0, cell.c1, cell.c2, cell.c3,
2985 )?;
2986 if normalized_k3.abs() > NORMALIZED_CELL_BRANCH_TOL && lead.abs() > 1e-18 {
2987 return Ok(None);
2988 }
2989 if normalized_k2.abs() > NORMALIZED_CELL_BRANCH_TOL {
2990 Ok(Some(ExactCellBranch::Quartic))
2991 } else {
2992 Ok(Some(ExactCellBranch::Affine))
2993 }
2994}
2995
2996#[inline]
2997fn validate_bvn_args(h: f64, k: f64, rho: f64) -> Result<(), String> {
2998 if !h.is_finite() && !h.is_infinite() {
2999 return Err(CubicCellKernelError::bivariate_normal_domain(
3000 "bivariate normal cdf requires finite or infinite h",
3001 )
3002 .into());
3003 }
3004 if !k.is_finite() && !k.is_infinite() {
3005 return Err(CubicCellKernelError::bivariate_normal_domain(
3006 "bivariate normal cdf requires finite or infinite k",
3007 )
3008 .into());
3009 }
3010 if !rho.is_finite() {
3011 return Err(CubicCellKernelError::bivariate_normal_domain(format!(
3012 "bivariate normal cdf requires finite correlation, got {rho}"
3013 ))
3014 .into());
3015 }
3016 Ok::<(), _>(())
3017}
3018
3019#[inline]
3020fn bvn_gl_sum(h: f64, k: f64, rho_clamped: f64, asr: f64) -> f64 {
3021 if rho_clamped == 0.0 {
3028 return 0.0;
3029 }
3030 let hs = 0.5 * (h * h + k * k);
3031 let hk = h * k;
3032 let half_asr = 0.5 * asr;
3033 let (sin_mid, cos_mid) = half_asr.sin_cos();
3034 let mut sum = 0.0;
3035 for i in 0..10 {
3036 let node = GL20_NODES[i].abs();
3037 let weight = GL20_WEIGHTS[i];
3038 let (sin_delta, cos_delta) = (half_asr * node).sin_cos();
3039
3040 let sn_lo = sin_mid * cos_delta - cos_mid * sin_delta;
3041 let one_minus_lo = 1.0 - sn_lo * sn_lo;
3042 let expo_lo = ((sn_lo * hk) - hs) / one_minus_lo;
3043
3044 let sn_hi = sin_mid * cos_delta + cos_mid * sin_delta;
3045 let one_minus_hi = 1.0 - sn_hi * sn_hi;
3046 let expo_hi = ((sn_hi * hk) - hs) / one_minus_hi;
3047
3048 sum += weight * (expo_lo.exp() + expo_hi.exp());
3049 }
3050 sum
3051}
3052
3053pub fn bivariate_normal_cdf(h: f64, k: f64, rho: f64) -> Result<f64, String> {
3054 validate_bvn_args(h, k, rho)?;
3055 if h == f64::NEG_INFINITY || k == f64::NEG_INFINITY {
3056 return Ok(0.0);
3057 }
3058 if h == f64::INFINITY {
3059 return Ok(normal_cdf(k));
3060 }
3061 if k == f64::INFINITY {
3062 return Ok(normal_cdf(h));
3063 }
3064
3065 let rho_clamped = rho.clamp(-1.0, 1.0);
3066 if rho_clamped >= 1.0 - 1e-12 {
3067 return Ok(normal_cdf(h.min(k)));
3068 }
3069 if rho_clamped <= -1.0 + 1e-12 {
3070 return Ok((normal_cdf(h) - normal_cdf(-k)).clamp(0.0, 1.0));
3071 }
3072 if rho_clamped == 0.0 {
3073 return Ok((normal_cdf(h) * normal_cdf(k)).clamp(0.0, 1.0));
3074 }
3075 if h == 0.0 && k == 0.0 {
3076 return Ok((0.25 + rho_clamped.asin() / std::f64::consts::TAU).clamp(0.0, 1.0));
3077 }
3078
3079 let asr = rho_clamped.asin();
3080 let sum = bvn_gl_sum(h, k, rho_clamped, asr);
3081 Ok((normal_cdf(h) * normal_cdf(k) + asr * sum / (4.0 * std::f64::consts::PI)).clamp(0.0, 1.0))
3082}
3083
3084#[inline]
3085fn bvn_gl_sum_interval(h: f64, left: f64, right: f64, rho_clamped: f64, asr: f64) -> f64 {
3086 if rho_clamped == 0.0 {
3087 return 0.0;
3088 }
3089 let h2 = h * h;
3090 let right_hs = 0.5 * (h2 + right * right);
3091 let left_hs = 0.5 * (h2 + left * left);
3092 let half_asr = 0.5 * asr;
3093 let (sin_mid, cos_mid) = half_asr.sin_cos();
3094 let mut sum = 0.0;
3095 for i in 0..10 {
3096 let node = GL20_NODES[i].abs();
3097 let weight = GL20_WEIGHTS[i];
3098 let (sin_delta, cos_delta) = (half_asr * node).sin_cos();
3099
3100 let sn_lo = sin_mid * cos_delta - cos_mid * sin_delta;
3101 let one_minus_lo = 1.0 - sn_lo * sn_lo;
3102 let lo_right = (((sn_lo * h * right) - right_hs) / one_minus_lo).exp();
3103 let lo_left = (((sn_lo * h * left) - left_hs) / one_minus_lo).exp();
3104
3105 let sn_hi = sin_mid * cos_delta + cos_mid * sin_delta;
3106 let one_minus_hi = 1.0 - sn_hi * sn_hi;
3107 let hi_right = (((sn_hi * h * right) - right_hs) / one_minus_hi).exp();
3108 let hi_left = (((sn_hi * h * left) - left_hs) / one_minus_hi).exp();
3109
3110 sum += weight * ((lo_right - lo_left) + (hi_right - hi_left));
3111 }
3112 sum
3113}
3114
3115fn bivariate_normal_cdf_interval(h: f64, left: f64, right: f64, rho: f64) -> Result<f64, String> {
3116 if right <= left {
3117 return Ok(0.0);
3118 }
3119 if left == f64::NEG_INFINITY && right == f64::INFINITY {
3120 return Ok(normal_cdf(h));
3121 }
3122 if !left.is_finite() || !right.is_finite() {
3123 let upper = bivariate_normal_cdf(h, right, rho)?;
3124 let lower = bivariate_normal_cdf(h, left, rho)?;
3125 return Ok((upper - lower).clamp(0.0, 1.0));
3126 }
3127 validate_bvn_args(h, left, rho)?;
3128 validate_bvn_args(h, right, rho)?;
3129 if h == f64::NEG_INFINITY {
3130 return Ok(0.0);
3131 }
3132 if h == f64::INFINITY {
3133 return Ok((normal_cdf(right) - normal_cdf(left)).clamp(0.0, 1.0));
3134 }
3135
3136 let rho_clamped = rho.clamp(-1.0, 1.0);
3137 if rho_clamped >= 1.0 - 1e-12 || rho_clamped <= -1.0 + 1e-12 {
3138 let upper = bivariate_normal_cdf(h, right, rho_clamped)?;
3139 let lower = bivariate_normal_cdf(h, left, rho_clamped)?;
3140 return Ok((upper - lower).clamp(0.0, 1.0));
3141 }
3142
3143 let cdf_h = normal_cdf(h);
3144 let normal_part = cdf_h * (normal_cdf(right) - normal_cdf(left));
3145 if rho_clamped == 0.0 {
3146 return Ok(normal_part.clamp(0.0, 1.0));
3147 }
3148 let asr = rho_clamped.asin();
3149 let sum = bvn_gl_sum_interval(h, left, right, rho_clamped, asr);
3150 Ok((normal_part + asr * sum / (4.0 * std::f64::consts::PI)).clamp(0.0, 1.0))
3151}
3152
3153fn exp_neg_half_square(x: f64) -> f64 {
3154 if x.is_infinite() {
3155 0.0
3156 } else {
3157 (-0.5 * x * x).exp()
3158 }
3159}
3160
3161fn truncated_gaussian_zeroth_moment(a: f64, b: f64) -> f64 {
3201 let inv_sqrt2 = 1.0 / std::f64::consts::SQRT_2;
3202 let za = a * inv_sqrt2;
3203 let zb = b * inv_sqrt2;
3204 let erf_diff = if za >= 0.0 {
3205 libm::erfc(za) - libm::erfc(zb)
3206 } else if zb <= 0.0 {
3207 libm::erfc(-zb) - libm::erfc(-za)
3208 } else {
3209 2.0 - libm::erfc(zb) - libm::erfc(-za)
3210 };
3211 (std::f64::consts::PI / 2.0).sqrt() * erf_diff
3213}
3214
3215fn fill_truncated_gaussian_moments(a: f64, b: f64, out: &mut [f64]) {
3237 if out.is_empty() {
3238 return;
3239 }
3240 out[0] = truncated_gaussian_zeroth_moment(a, b);
3241 if out.len() == 1 {
3242 return;
3243 }
3244 let ea = exp_neg_half_square(a);
3245 let eb = exp_neg_half_square(b);
3246 out[1] = ea - eb;
3247 if out.len() == 2 {
3248 return;
3249 }
3250 let a_finite = a.is_finite();
3251 let b_finite = b.is_finite();
3252 let mut a_pow_n_minus_1 = a; let mut b_pow_n_minus_1 = b;
3260 for n in 2..out.len() {
3261 let left = if a_finite { a_pow_n_minus_1 * ea } else { 0.0 };
3262 let right = if b_finite { b_pow_n_minus_1 * eb } else { 0.0 };
3263 out[n] = left - right + (n as f64 - 1.0) * out[n - 2];
3264 a_pow_n_minus_1 *= a;
3265 b_pow_n_minus_1 *= b;
3266 }
3267}
3268
3269const MAX_AFFINE_ANCHOR_DEGREE: usize = 64;
3274
3275pub fn affine_anchor_moment_vector(
3276 alpha: f64,
3277 beta: f64,
3278 left: f64,
3279 right: f64,
3280 max_degree: usize,
3281) -> Vec<f64> {
3282 let mut out = vec![0.0; max_degree + 1];
3283 affine_anchor_moment_vector_into(alpha, beta, left, right, max_degree, &mut out);
3284 out
3285}
3286
3287fn affine_anchor_moment_vector_into(
3288 alpha: f64,
3289 beta: f64,
3290 left: f64,
3291 right: f64,
3292 max_degree: usize,
3293 out: &mut [f64],
3294) {
3295 assert_eq!(out.len(), max_degree + 1);
3296 let s = (1.0 + beta * beta).sqrt();
3297 let mu = -alpha * beta / (1.0 + beta * beta);
3298 let y_left = if left.is_infinite() {
3299 if left.is_sign_positive() {
3300 f64::INFINITY
3301 } else {
3302 f64::NEG_INFINITY
3303 }
3304 } else {
3305 s * (left - mu)
3306 };
3307 let y_right = if right.is_infinite() {
3308 if right.is_sign_positive() {
3309 f64::INFINITY
3310 } else {
3311 f64::NEG_INFINITY
3312 }
3313 } else {
3314 s * (right - mu)
3315 };
3316 let anchor = (-alpha * alpha / (2.0 * s * s)).exp() / s;
3317 assert!(
3318 max_degree <= MAX_AFFINE_ANCHOR_DEGREE,
3319 "affine_anchor_moment_vector max_degree {} exceeds compile-time bound {}",
3320 max_degree,
3321 MAX_AFFINE_ANCHOR_DEGREE
3322 );
3323 let mut t = [0.0_f64; MAX_AFFINE_ANCHOR_DEGREE + 1];
3324 fill_truncated_gaussian_moments(y_left, y_right, &mut t[..=max_degree]);
3325 let mut mu_pow = [1.0_f64; MAX_AFFINE_ANCHOR_DEGREE + 1];
3331 for k in 1..=max_degree {
3332 mu_pow[k] = mu_pow[k - 1] * mu;
3333 }
3334 let inv_s = 1.0 / s;
3335 let mut inv_s_pow = [1.0_f64; MAX_AFFINE_ANCHOR_DEGREE + 1];
3336 for k in 1..=max_degree {
3337 inv_s_pow[k] = inv_s_pow[k - 1] * inv_s;
3338 }
3339 out.fill(0.0);
3340 for n in 0..=max_degree {
3341 let mut acc = 0.0;
3342 let mut binom = 1.0;
3344 for k in 0..=n {
3345 let term = binom * mu_pow[n - k] * inv_s_pow[k];
3346 acc = term.mul_add(t[k], acc);
3347 if k < n {
3348 binom = binom * (n - k) as f64 / (k + 1) as f64;
3349 }
3350 }
3351 out[n] = anchor * acc;
3352 }
3353}
3354
3355fn affine_value_from_moment_primitive(alpha: f64, beta: f64, left: f64, right: f64) -> f64 {
3356 let s = (1.0 + beta * beta).sqrt();
3368 let h = alpha / s;
3369 let rho = -beta / s;
3370 bivariate_normal_cdf_interval(h, left, right, rho).unwrap_or(0.0)
3371}
3372
3373pub fn evaluate_affine_cell_state(
3380 cell: DenestedCubicCell,
3381 max_degree: usize,
3382) -> Result<CellMomentState, String> {
3383 let alpha = cell.c0;
3384 let beta = cell.c1;
3385 let value = affine_value_from_moment_primitive(alpha, beta, cell.left, cell.right);
3386 let moments = affine_anchor_moment_vector(alpha, beta, cell.left, cell.right, max_degree);
3387 Ok(CellMomentState {
3388 branch: ExactCellBranch::Affine,
3389 value,
3390 moments: moments.into(),
3391 })
3392}
3393
3394fn evaluate_affine_cell_derivative_state(
3395 cell: DenestedCubicCell,
3396 max_degree: usize,
3397) -> Result<CellDerivativeMomentState, String> {
3398 let alpha = cell.c0;
3399 let beta = cell.c1;
3400 let moments = affine_anchor_moment_vector(alpha, beta, cell.left, cell.right, max_degree);
3401 Ok(CellDerivativeMomentState {
3402 branch: ExactCellBranch::Affine,
3403 moments: moments.into(),
3404 })
3405}
3406
3407#[inline]
3414fn accumulate_moments_unrolled4(moments: &mut [f64], mw: f64, z: f64) {
3415 let mut z_pow = 1.0_f64;
3416 for slot in moments.iter_mut() {
3417 *slot = mw.mul_add(z_pow, *slot);
3418 z_pow *= z;
3419 }
3420}
3421
3422#[inline(always)]
3465fn evaluate_non_affine_cell_with_rule<const COMPUTE_VALUE: bool>(
3466 cell: DenestedCubicCell,
3467 max_degree: usize,
3468 gl_nodes: &[f64],
3469 gl_weights: &[f64],
3470) -> (CellMomentVec, f64) {
3471 let mut moments: CellMomentVec = smallvec![0.0_f64; max_degree + 1];
3472 let mut value_integral = 0.0_f64;
3473 let center = 0.5 * (cell.left + cell.right);
3474 let half_width = 0.5 * (cell.right - cell.left);
3475 let c0 = cell.c0;
3476 let c1 = cell.c1;
3477 let c2 = cell.c2;
3478 let c3 = cell.c3;
3479 let moments_slice: &mut [f64] = &mut moments;
3480 assert_eq!(gl_nodes.len(), gl_weights.len());
3481 use wide::f64x4;
3482 let center_v = f64x4::splat(center);
3483 let half_width_v = f64x4::splat(half_width);
3484 let c0_v = f64x4::splat(c0);
3485 let c1_v = f64x4::splat(c1);
3486 let c2_v = f64x4::splat(c2);
3487 let c3_v = f64x4::splat(c3);
3488 let neg_half_v = f64x4::splat(-0.5);
3489 let n_total = gl_nodes.len();
3490 let n_simd = n_total - (n_total % 4);
3491 let mut i = 0;
3492 while i < n_simd {
3493 let node_v = f64x4::from([
3494 gl_nodes[i],
3495 gl_nodes[i + 1],
3496 gl_nodes[i + 2],
3497 gl_nodes[i + 3],
3498 ]);
3499 let weight_v = f64x4::from([
3500 gl_weights[i],
3501 gl_weights[i + 1],
3502 gl_weights[i + 2],
3503 gl_weights[i + 3],
3504 ]);
3505 let z_v = half_width_v.mul_add(node_v, center_v);
3506 let eta_v = c3_v
3508 .mul_add(z_v, c2_v)
3509 .mul_add(z_v, c1_v)
3510 .mul_add(z_v, c0_v);
3511 let z2_v = z_v * z_v;
3512 let neg_q_v = neg_half_v * (z2_v + eta_v * eta_v);
3513 let exp_negq_v = neg_q_v.exp();
3514 let moment_weight_v = weight_v * exp_negq_v;
3515 let z_arr = z_v.to_array();
3516 let mw_arr = moment_weight_v.to_array();
3517 if COMPUTE_VALUE {
3518 for lane in 0..4 {
3519 let z = z_arr[lane];
3520 let mw = mw_arr[lane];
3521 accumulate_moments_unrolled4(moments_slice, mw, z);
3522 let node = gl_nodes[i + lane];
3535 let weight = gl_weights[i + lane];
3536 let z_ref = center + half_width * node;
3537 let eta_ref = c0 + c1 * z_ref + c2 * z_ref * z_ref + c3 * z_ref * z_ref * z_ref;
3538 value_integral += weight * (-0.5 * z_ref * z_ref).exp() * normal_cdf(eta_ref);
3539 }
3540 } else {
3541 for lane in 0..4 {
3542 let z = z_arr[lane];
3543 let mw = mw_arr[lane];
3544 accumulate_moments_unrolled4(moments_slice, mw, z);
3545 }
3546 }
3547 i += 4;
3548 }
3549 while i < n_total {
3550 let node = gl_nodes[i];
3551 let weight = gl_weights[i];
3552 let z = center + half_width * node;
3553 let eta = c3.mul_add(z, c2).mul_add(z, c1).mul_add(z, c0);
3554 let q = 0.5 * (z * z + eta * eta);
3555 let moment_weight = weight * (-q).exp();
3556 accumulate_moments_unrolled4(moments_slice, moment_weight, z);
3557 if COMPUTE_VALUE {
3558 let eta_ref = c0 + c1 * z + c2 * z * z + c3 * z * z * z;
3563 value_integral += weight * (-0.5 * z * z).exp() * normal_cdf(eta_ref);
3564 }
3565 i += 1;
3566 }
3567 for moment in moments_slice.iter_mut() {
3571 *moment *= half_width;
3572 }
3573 let value = if COMPUTE_VALUE {
3574 value_integral * half_width
3575 } else {
3576 value_integral
3577 };
3578 (moments, value)
3579}
3580
3581const NON_AFFINE_LADDER_RTOL: f64 = 1e-15;
3607
3608const NON_AFFINE_LADDER_RUNGS: [usize; 5] = [12, 24, 48, 96, 192];
3611
3612fn non_affine_ladder_rules() -> &'static [(Vec<f64>, Vec<f64>)] {
3619 static RULES: std::sync::OnceLock<Vec<(Vec<f64>, Vec<f64>)>> = std::sync::OnceLock::new();
3620 RULES.get_or_init(|| {
3621 NON_AFFINE_LADDER_RUNGS
3622 .iter()
3623 .map(|&n| gauss_legendre_rule(n))
3624 .collect()
3625 })
3626}
3627
3628fn gauss_legendre_rule(n: usize) -> (Vec<f64>, Vec<f64>) {
3635 let mut nodes = vec![0.0_f64; n];
3636 let mut weights = vec![0.0_f64; n];
3637 for i in 0..n.div_ceil(2) {
3638 let mut z = (std::f64::consts::PI * (i as f64 + 0.75) / (n as f64 + 0.5)).cos();
3639 let mut pp = 0.0_f64;
3640 for _ in 0..100 {
3641 let mut p1 = 1.0_f64;
3643 let mut p2 = 0.0_f64;
3644 for j in 1..=n {
3645 let p3 = p2;
3646 p2 = p1;
3647 p1 = ((2 * j - 1) as f64 * z * p2 - (j - 1) as f64 * p3) / j as f64;
3648 }
3649 pp = n as f64 * (z * p1 - p2) / (z * z - 1.0);
3650 let z_prev = z;
3651 z = z_prev - p1 / pp;
3652 if (z - z_prev).abs() <= f64::EPSILON {
3653 break;
3654 }
3655 }
3656 nodes[i] = -z;
3657 nodes[n - 1 - i] = z;
3658 let w = 2.0 / ((1.0 - z * z) * pp * pp);
3659 weights[i] = w;
3660 weights[n - 1 - i] = w;
3661 }
3662 (nodes, weights)
3663}
3664
3665fn non_affine_ladder_converged(coarse: &CellMomentVec, fine: &CellMomentVec) -> bool {
3680 let mut scale = 0.0_f64;
3681 let mut err = 0.0_f64;
3682 for (&c, &f) in coarse.iter().zip(fine.iter()) {
3683 scale = scale.max(f.abs());
3684 err = err.max((c - f).abs());
3685 }
3686 if !(scale.is_finite() && err.is_finite()) {
3687 return false;
3688 }
3689 err <= NON_AFFINE_LADDER_RTOL * scale
3690}
3691
3692pub(crate) static NON_AFFINE_LADDER_CERT_COUNTS: [AtomicU64; NON_AFFINE_LADDER_RUNGS.len() + 1] = [
3700 AtomicU64::new(0),
3701 AtomicU64::new(0),
3702 AtomicU64::new(0),
3703 AtomicU64::new(0),
3704 AtomicU64::new(0),
3705 AtomicU64::new(0),
3706];
3707
3708pub fn non_affine_ladder_cert_histogram() -> (Vec<(usize, u64)>, u64) {
3711 let per_rung = NON_AFFINE_LADDER_RUNGS
3712 .iter()
3713 .enumerate()
3714 .map(|(i, &n)| (n, NON_AFFINE_LADDER_CERT_COUNTS[i].load(Ordering::Relaxed)))
3715 .collect();
3716 let terminal =
3717 NON_AFFINE_LADDER_CERT_COUNTS[NON_AFFINE_LADDER_RUNGS.len()].load(Ordering::Relaxed);
3718 (per_rung, terminal)
3719}
3720
3721#[inline]
3726fn evaluate_non_affine_cell_simd<const COMPUTE_VALUE: bool>(
3727 cell: DenestedCubicCell,
3728 max_degree: usize,
3729) -> (CellMomentVec, f64) {
3730 let mut prev: Option<(CellMomentVec, f64)> = None;
3731 for (i, (nodes, weights)) in non_affine_ladder_rules().iter().enumerate() {
3732 let cur =
3733 evaluate_non_affine_cell_with_rule::<COMPUTE_VALUE>(cell, max_degree, nodes, weights);
3734 if let Some(prev) = prev.as_ref()
3735 && non_affine_ladder_converged(&prev.0, &cur.0)
3736 {
3737 NON_AFFINE_LADDER_CERT_COUNTS[i].fetch_add(1, Ordering::Relaxed);
3738 return cur;
3739 }
3740 prev = Some(cur);
3741 }
3742 NON_AFFINE_LADDER_CERT_COUNTS[NON_AFFINE_LADDER_RUNGS.len()].fetch_add(1, Ordering::Relaxed);
3743 evaluate_non_affine_cell_with_rule::<COMPUTE_VALUE>(cell, max_degree, &GL_NODES, &GL_WEIGHTS)
3744}
3745
3746fn evaluate_non_affine_cell_value_terminal(cell: DenestedCubicCell) -> f64 {
3766 let center = 0.5 * (cell.left + cell.right);
3767 let half_width = 0.5 * (cell.right - cell.left);
3768 let c0 = cell.c0;
3769 let c1 = cell.c1;
3770 let c2 = cell.c2;
3771 let c3 = cell.c3;
3772 let mut value_integral = 0.0_f64;
3773 for (&node, &weight) in GL_NODES.iter().zip(GL_WEIGHTS.iter()) {
3774 let z = center + half_width * node;
3775 let eta = c0 + c1 * z + c2 * z * z + c3 * z * z * z;
3776 value_integral += weight * (-0.5 * z * z).exp() * normal_cdf(eta);
3777 }
3778 value_integral * half_width
3779}
3780
3781fn evaluate_non_affine_cell_state(
3782 cell: DenestedCubicCell,
3783 branch: ExactCellBranch,
3784 max_degree: usize,
3785) -> Result<CellMomentState, String> {
3786 let (moments, _) = evaluate_non_affine_cell_simd::<false>(cell, max_degree);
3787 let value_integral = evaluate_non_affine_cell_value_terminal(cell);
3788 Ok(CellMomentState {
3793 branch,
3794 value: value_integral / (std::f64::consts::TAU).sqrt(),
3795 moments,
3796 })
3797}
3798
3799fn evaluate_non_affine_cell_derivative_state(
3800 cell: DenestedCubicCell,
3801 branch: ExactCellBranch,
3802 max_degree: usize,
3803) -> Result<CellDerivativeMomentState, String> {
3804 let (moments, _) = evaluate_non_affine_cell_simd::<false>(cell, max_degree);
3805 Ok(CellDerivativeMomentState { branch, moments })
3806}
3807
3808pub fn evaluate_cell_moments(
3814 cell: DenestedCubicCell,
3815 max_degree: usize,
3816) -> Result<CellMomentState, String> {
3817 if !TAIL_CELL_MOMENT_CACHE_ENABLED.load(std::sync::atomic::Ordering::Relaxed) {
3818 return evaluate_cell_moments_uncached(cell, max_degree);
3819 }
3820 tail_cell_moment_cache().evaluate(cell, max_degree)
3821}
3822
3823pub fn evaluate_cell_moments_uncached(
3828 cell: DenestedCubicCell,
3829 max_degree: usize,
3830) -> Result<CellMomentState, String> {
3831 evaluate_cell_state_dispatched(
3832 cell,
3833 max_degree,
3834 evaluate_affine_cell_state,
3835 evaluate_non_affine_cell_state,
3836 )
3837}
3838
3839pub fn evaluate_cell_derivative_moments_uncached(
3846 cell: DenestedCubicCell,
3847 max_degree: usize,
3848) -> Result<CellDerivativeMomentState, String> {
3849 evaluate_cell_state_dispatched(
3850 cell,
3851 max_degree,
3852 evaluate_affine_cell_derivative_state,
3853 evaluate_non_affine_cell_derivative_state,
3854 )
3855}
3856
3857fn evaluate_cell_state_dispatched<S>(
3866 cell: DenestedCubicCell,
3867 max_degree: usize,
3868 affine: fn(DenestedCubicCell, usize) -> Result<S, String>,
3869 non_affine: fn(DenestedCubicCell, ExactCellBranch, usize) -> Result<S, String>,
3870) -> Result<S, String> {
3871 let left_inf = !cell.left.is_finite();
3872 let right_inf = !cell.right.is_finite();
3873 if left_inf || right_inf {
3874 if cell.c2.abs() > NORMALIZED_CELL_BRANCH_TOL || cell.c3.abs() > NORMALIZED_CELL_BRANCH_TOL
3878 {
3879 return Err(CubicCellKernelError::invalid_cell_shape(format!(
3880 "semi-infinite cell [{}, {}] must be affine (c2=c3=0), got c2={:.3e}, c3={:.3e}",
3881 cell.left, cell.right, cell.c2, cell.c3
3882 ))
3883 .into());
3884 }
3885 return affine(cell, max_degree);
3886 }
3887 if cell.right <= cell.left {
3888 return Err(CubicCellKernelError::invalid_cell_shape(format!(
3889 "finite cell must have left < right, got [{}, {}]",
3890 cell.left, cell.right
3891 ))
3892 .into());
3893 }
3894 let branch = branch_cell(cell)?;
3895 if branch == ExactCellBranch::Affine {
3896 return affine(cell, max_degree);
3897 }
3898 if branch == ExactCellBranch::Sextic {
3899 let lead = sextic_qprime_coefficients(cell.c0, cell.c1, cell.c2, cell.c3)[5];
3900 if !lead.is_finite() {
3901 return Err(CubicCellKernelError::invalid_cell_shape(format!(
3902 "sextic cell evaluation encountered non-finite leading coefficient: {lead:.3e}"
3903 ))
3904 .into());
3905 }
3906 if let Some(lower_branch) = degenerate_sextic_branch(cell, lead)? {
3907 return match lower_branch {
3908 ExactCellBranch::Quartic => non_affine(
3909 DenestedCubicCell { c3: 0.0, ..cell },
3910 ExactCellBranch::Quartic,
3911 max_degree,
3912 ),
3913 ExactCellBranch::Affine => affine(
3914 DenestedCubicCell {
3915 c2: 0.0,
3916 c3: 0.0,
3917 ..cell
3918 },
3919 max_degree,
3920 ),
3921 ExactCellBranch::Sextic => Err(CubicCellKernelError::invalid_cell_shape(
3922 "internal: degenerate_sextic_branch returned Sextic as a lowered branch",
3923 )
3924 .into()),
3925 };
3926 }
3927 }
3928 non_affine(cell, branch, max_degree)
3929}
3930
3931pub fn evaluate_cell_moments_cached(
3938 cell: DenestedCubicCell,
3939 max_degree: usize,
3940 cache: &CellMomentLruCache,
3941 stats: Option<&CellMomentCacheStats>,
3942) -> Result<CellMomentState, String> {
3943 if matches!(branch_cell(cell), Ok(ExactCellBranch::Affine)) {
3952 if let Some(stats) = stats {
3953 stats.misses.fetch_add(1, Ordering::Relaxed);
3954 }
3955 return evaluate_cell_moments_uncached(cell, max_degree);
3956 }
3957 let key = CellFingerprint::new(cell);
3958 let existing_derivative = match cache.get(&key) {
3959 Some(cached) => {
3960 if let Some(state) = cached.state_for_degree(max_degree) {
3961 if let Some(stats) = stats {
3962 stats.hits.fetch_add(1, Ordering::Relaxed);
3963 }
3964 return Ok(state);
3965 }
3966 cached.derivative_state.clone()
3970 }
3971 None => None,
3972 };
3973 if let Some(stats) = stats {
3974 stats.misses.fetch_add(1, Ordering::Relaxed);
3975 }
3976 let state = evaluate_cell_moments(cell, max_degree)?;
3977 let shared = Arc::new(state);
3982 let mut entry = CachedCellMoments::new(Arc::clone(&shared));
3983 if let Some(derivative) = existing_derivative {
3984 entry = entry.with_derivative(derivative);
3985 }
3986 cache.insert(key, entry);
3987 Ok(Arc::try_unwrap(shared).unwrap_or_else(|a| (*a).clone()))
3988}
3989
3990pub fn evaluate_cell_derivative_moments_cached(
3996 cell: DenestedCubicCell,
3997 max_degree: usize,
3998 cache: &CellMomentLruCache,
3999 stats: Option<&CellMomentCacheStats>,
4000) -> Result<CellDerivativeMomentState, String> {
4001 if matches!(branch_cell(cell), Ok(ExactCellBranch::Affine)) {
4005 if let Some(stats) = stats {
4006 stats.misses.fetch_add(1, Ordering::Relaxed);
4007 }
4008 return evaluate_cell_derivative_moments_uncached(cell, max_degree);
4009 }
4010 let key = CellFingerprint::new(cell);
4011 let existing_value = match cache.get(&key) {
4012 Some(cached) => {
4013 if let Some(state) = cached.derivative_state_for_degree(max_degree) {
4014 if let Some(stats) = stats {
4015 stats.hits.fetch_add(1, Ordering::Relaxed);
4016 }
4017 return Ok(state);
4018 }
4019 cached.state.clone()
4023 }
4024 None => None,
4025 };
4026 if let Some(stats) = stats {
4027 stats.misses.fetch_add(1, Ordering::Relaxed);
4028 }
4029 let state = evaluate_cell_derivative_moments_uncached(cell, max_degree)?;
4030 let shared = Arc::new(state);
4035 let mut entry = CachedCellMoments::new_derivative(Arc::clone(&shared));
4036 if let Some(value) = existing_value {
4037 entry = entry.with_value(value);
4038 }
4039 cache.insert(key, entry);
4040 Ok(Arc::try_unwrap(shared).unwrap_or_else(|a| (*a).clone()))
4041}
4042
4043pub fn evaluate_cell_moments_with_scratch<'a>(
4050 cell: DenestedCubicCell,
4051 max_degree: usize,
4052 scratch: &'a mut CellMomentScratch,
4053) -> Result<CellMomentStateRef<'a>, String> {
4054 let state = evaluate_cell_moments(cell, max_degree)?;
4055 let out = scratch.prepare_moments(max_degree + 1);
4056 out.copy_from_slice(&state.moments);
4057 Ok(CellMomentStateRef {
4058 branch: state.branch,
4059 value: state.value,
4060 moments: out,
4061 })
4062}
4063
4064#[cfg(test)]
4065mod tests {
4066 use super::*;
4067 use gam_math::probability::normal_pdf;
4068
4069 #[inline]
4077 fn cell_third_derivative_boundary_integrand(
4078 cell: DenestedCubicCell,
4079 first_coefficients_r: &[f64],
4080 first_coefficients_s: &[f64],
4081 first_coefficients_t: &[f64],
4082 second_coefficients_rs: &[f64],
4083 second_coefficients_rt: &[f64],
4084 second_coefficients_st: &[f64],
4085 third_coefficients_rst: &[f64],
4086 z: f64,
4087 ) -> f64 {
4088 let eta = cell.eta(z);
4089 let c_r = poly_eval_at(first_coefficients_r, z);
4090 let c_s = poly_eval_at(first_coefficients_s, z);
4091 let c_t = poly_eval_at(first_coefficients_t, z);
4092 let c_rs = poly_eval_at(second_coefficients_rs, z);
4093 let c_rt = poly_eval_at(second_coefficients_rt, z);
4094 let c_st = poly_eval_at(second_coefficients_st, z);
4095 let c_rst = poly_eval_at(third_coefficients_rst, z);
4096 let amplitude = c_rst - eta * (c_rs * c_t + c_rt * c_s + c_st * c_r)
4097 + (eta * eta - 1.0) * c_r * c_s * c_t;
4098 amplitude * (-cell.q(z)).exp() * INV_TWO_PI
4099 }
4100
4101 #[inline]
4102 pub(super) fn polynomial_value(coefficients: &[f64], z: f64) -> f64 {
4103 coefficients
4104 .iter()
4105 .rev()
4106 .fold(0.0, |acc, &coeff| acc * z + coeff)
4107 }
4108
4109 fn reset_cell_moment_test_reallocs() {
4110 super::CELL_MOMENT_REALLOCS.store(0, std::sync::atomic::Ordering::Relaxed);
4111 }
4112
4113 fn cell_moment_test_reallocs() -> usize {
4114 super::CELL_MOMENT_REALLOCS.load(std::sync::atomic::Ordering::Relaxed)
4115 }
4116
4117 fn assert_close_rel(label: &str, actual: f64, expected: f64, tol: f64) {
4118 let denom = expected.abs().max(1.0);
4119 let rel = (actual - expected).abs() / denom;
4120 assert!(
4121 rel <= tol,
4122 "{label}: actual={actual:.17e} expected={expected:.17e} rel={rel:.3e} tol={tol:.3e}"
4123 );
4124 }
4125
4126 #[test]
4141 fn link_basis_cell_fourth_ab_partials_vanish_third_are_nonzero() {
4142 let span = LocalSpanCubic {
4143 left: -0.4,
4144 right: 1.6,
4145 c0: 0.37,
4146 c1: -0.81,
4147 c2: 0.53,
4148 c3: -0.29,
4149 };
4150 let a0 = 0.23_f64;
4151 let b0 = 0.61_f64;
4152 let h = 1e-2_f64;
4153
4154 let stencil = |order: usize| -> &'static [(i64, f64)] {
4156 match order {
4157 0 => &[(0, 1.0)],
4158 1 => &[(-1, -0.5), (1, 0.5)],
4159 2 => &[(-1, 1.0), (0, -2.0), (1, 1.0)],
4160 3 => &[(-2, -0.5), (-1, 1.0), (1, -1.0), (2, 0.5)],
4161 4 => &[(-2, 1.0), (-1, -4.0), (0, 6.0), (1, -4.0), (2, 1.0)],
4162 _ => &[(0, 1.0)],
4163 }
4164 };
4165 let fd = |k: usize, na: usize, nb: usize| -> f64 {
4167 let mut acc = 0.0;
4168 for &(ia, wa) in stencil(na) {
4169 for &(ib, wb) in stencil(nb) {
4170 let a = a0 + (ia as f64) * h;
4171 let b = b0 + (ib as f64) * h;
4172 acc += wa * wb * link_basis_cell_coefficients(span, a, b)[k];
4173 }
4174 }
4175 acc / h.powi((na + nb) as i32)
4176 };
4177
4178 let (p3_aaa, p3_aab, p3_abb, p3_bbb) = link_basis_cell_third_partials(span);
4179
4180 let mut max_third = 0.0_f64;
4184 for k in 0..4 {
4185 for (label, (na, nb), analytic) in [
4186 ("aaa", (3usize, 0usize), p3_aaa[k]),
4187 ("aab", (2, 1), p3_aab[k]),
4188 ("abb", (1, 2), p3_abb[k]),
4189 ("bbb", (0, 3), p3_bbb[k]),
4190 ] {
4191 let got = fd(k, na, nb);
4192 assert!(
4193 (got - analytic).abs() <= 1e-4 + 1e-3 * analytic.abs(),
4194 "3rd partial {label}[{k}] analytic {analytic:+.6e} vs FD {got:+.6e}"
4195 );
4196 max_third = max_third.max(analytic.abs());
4197 }
4198 }
4199 assert!(
4200 max_third > 1e-1,
4201 "expected an appreciable nonzero 3rd (a,b)-partial; max |analytic| = {max_third:.3e}"
4202 );
4203
4204 for k in 0..4 {
4208 for (na, nb) in [(4usize, 0usize), (3, 1), (2, 2), (1, 3), (0, 4)] {
4209 let got = fd(k, na, nb);
4210 assert!(
4211 got.abs() <= 1e-2,
4212 "4th (a,b)-partial ∂^{na}_a∂^{nb}_b of cell coeff[{k}] must vanish, FD = {got:+.6e}"
4213 );
4214 }
4215 }
4216 }
4217
4218 #[test]
4219 fn non_affine_cell_state_grid_matches_public_cell_moments_reference() {
4220 let cells = [
4221 DenestedCubicCell {
4222 left: -1.25,
4223 right: -0.2,
4224 c0: -0.35,
4225 c1: 0.85,
4226 c2: 0.04,
4227 c3: -0.015,
4228 },
4229 DenestedCubicCell {
4230 left: -0.2,
4231 right: 0.55,
4232 c0: 0.12,
4233 c1: -0.65,
4234 c2: -0.025,
4235 c3: 0.02,
4236 },
4237 DenestedCubicCell {
4238 left: 0.55,
4239 right: 1.6,
4240 c0: 0.42,
4241 c1: 0.35,
4242 c2: 0.018,
4243 c3: 0.012,
4244 },
4245 ];
4246 for cell in cells {
4247 let branch = branch_cell(cell).expect("branch");
4248 assert_ne!(branch, ExactCellBranch::Affine);
4249 for max_degree in [0usize, 2, 4, 9, 16] {
4250 let direct = evaluate_non_affine_cell_state(cell, branch, max_degree)
4251 .expect("direct non-affine transport");
4252 let public = evaluate_cell_moments(cell, max_degree).expect("public evaluator");
4253 assert_eq!(direct.branch, public.branch);
4254 assert_eq!(direct.moments.len(), public.moments.len());
4255 let value_scale = direct.value.abs().max(public.value.abs()).max(1.0);
4256 assert!(
4257 (direct.value - public.value).abs() <= 1e-10 * value_scale,
4258 "value mismatch for {cell:?} degree {max_degree}: direct={} public={}",
4259 direct.value,
4260 public.value
4261 );
4262 for (degree, (lhs, rhs)) in
4263 direct.moments.iter().zip(public.moments.iter()).enumerate()
4264 {
4265 let scale = lhs.abs().max(rhs.abs()).max(1.0);
4266 assert!(
4267 (lhs - rhs).abs() <= 1e-10 * scale,
4268 "moment {degree} mismatch for {cell:?} degree {max_degree}: {lhs} vs {rhs}"
4269 );
4270 }
4271 }
4272 }
4273 }
4274
4275 #[test]
4276 fn affine_tail_cell_memo_matches_uncached_grid_and_records_hits() {
4277 let cache = TailCellMomentCache::new();
4283 let c0s = [-2.0, -0.25, 0.0, 1.5];
4284 let c1s = [-1.2, -0.05, 0.0, 0.8];
4285 let endpoints = [-4.0, -1.0, 0.0, 2.5, 6.0];
4286 let degrees = [0_usize, 4, 9, 16, 24];
4287
4288 for &c0 in &c0s {
4289 for &c1 in &c1s {
4290 for &endpoint in &endpoints {
4291 for &max_degree in °rees {
4292 for &(left, right) in
4293 &[(f64::NEG_INFINITY, endpoint), (endpoint, f64::INFINITY)]
4294 {
4295 let cell = DenestedCubicCell {
4296 left,
4297 right,
4298 c0,
4299 c1,
4300 c2: 0.0,
4301 c3: 0.0,
4302 };
4303 let expected = evaluate_cell_moments_uncached(cell, max_degree)
4304 .expect("uncached affine tail moments");
4305 let actual = cache
4306 .evaluate(cell, max_degree)
4307 .expect("cached affine tail moments miss");
4308 let repeat = cache
4309 .evaluate(cell, max_degree)
4310 .expect("cached affine tail moments hit");
4311 assert_eq!(actual.branch, expected.branch);
4312 assert_eq!(repeat.branch, expected.branch);
4313 assert_close_rel(
4314 "tail value miss",
4315 actual.value,
4316 expected.value,
4317 1e-14,
4318 );
4319 assert_close_rel("tail value hit", repeat.value, expected.value, 1e-14);
4320 assert_eq!(actual.moments.len(), expected.moments.len());
4321 assert_eq!(repeat.moments.len(), expected.moments.len());
4322 for (idx, ((a, r), e)) in actual
4323 .moments
4324 .iter()
4325 .zip(repeat.moments.iter())
4326 .zip(expected.moments.iter())
4327 .enumerate()
4328 {
4329 assert_close_rel(
4330 &format!("tail moment miss[{idx}]"),
4331 *a,
4332 *e,
4333 1e-14,
4334 );
4335 assert_close_rel(&format!("tail moment hit[{idx}]"), *r, *e, 1e-14);
4336 }
4337 }
4338 }
4339 }
4340 }
4341 }
4342
4343 let stats = cache.stats();
4344 assert_eq!(stats.misses, stats.entries);
4345 assert!(
4346 stats.hits >= stats.misses,
4347 "expected repeat hits: {stats:?}"
4348 );
4349 assert!(
4350 stats.hit_rate() >= 0.5,
4351 "unexpected low hit rate: {stats:?}"
4352 );
4353 }
4354
4355 fn reference_bivariate_normal_cdf_20(h: f64, k: f64, rho: f64) -> f64 {
4356 if h == f64::NEG_INFINITY || k == f64::NEG_INFINITY {
4357 return 0.0;
4358 }
4359 if h == f64::INFINITY {
4360 return normal_cdf(k);
4361 }
4362 if k == f64::INFINITY {
4363 return normal_cdf(h);
4364 }
4365 let rho_clamped = rho.clamp(-1.0, 1.0);
4366 if rho_clamped >= 1.0 - 1e-12 {
4367 return normal_cdf(h.min(k));
4368 }
4369 if rho_clamped <= -1.0 + 1e-12 {
4370 return (normal_cdf(h) - normal_cdf(-k)).clamp(0.0, 1.0);
4371 }
4372
4373 let hs = 0.5 * (h * h + k * k);
4374 let asr = rho_clamped.asin();
4375 let mut sum = 0.0;
4376 for (&node, &weight) in GL20_NODES.iter().zip(GL20_WEIGHTS.iter()) {
4377 let sn = (0.5 * asr * (node + 1.0)).sin();
4378 let one_minus = 1.0 - sn * sn;
4379 let expo = ((sn * h * k) - hs) / one_minus;
4380 sum += weight * expo.exp();
4381 }
4382 (normal_cdf(h) * normal_cdf(k) + asr * sum / (4.0 * std::f64::consts::PI)).clamp(0.0, 1.0)
4383 }
4384
4385 #[test]
4386 fn non_affine_cell_state_reference_grid_matches_public_moments() {
4387 let c0s = [-0.4, 0.0, 0.35];
4388 let c1s = [-0.8, 0.25, 1.1];
4389 let c2s = [-0.12, 0.08];
4390 let c3s = [-0.04, 0.03];
4391 let intervals = [(-1.25, -0.2), (-0.5, 0.75), (0.1, 1.4)];
4392 let degrees = [3usize, 6, 9, 12];
4393
4394 for &c0 in &c0s {
4395 for &c1 in &c1s {
4396 for &c2 in &c2s {
4397 for &c3 in &c3s {
4398 for &(left, right) in &intervals {
4399 let cell = DenestedCubicCell {
4400 left,
4401 right,
4402 c0,
4403 c1,
4404 c2,
4405 c3,
4406 };
4407 let branch = branch_cell(cell).expect("branch");
4408 assert_ne!(branch, ExactCellBranch::Affine);
4409 for °ree in °rees {
4410 let direct = evaluate_non_affine_cell_state(cell, branch, degree)
4411 .expect("direct non-affine state");
4412 let public = evaluate_cell_moments(cell, degree)
4413 .expect("public non-affine state");
4414 assert_eq!(direct.branch, public.branch);
4415 let value_scale =
4416 direct.value.abs().max(public.value.abs()).max(1.0);
4417 assert!(
4418 (direct.value - public.value).abs() / value_scale <= 1.0e-15,
4419 "value mismatch for {cell:?}, degree {degree}: direct={:.17e}, public={:.17e}",
4420 direct.value,
4421 public.value
4422 );
4423 assert_eq!(direct.moments.len(), public.moments.len());
4424 for (idx, (&a, &b)) in
4425 direct.moments.iter().zip(public.moments.iter()).enumerate()
4426 {
4427 let scale = a.abs().max(b.abs()).max(1.0);
4428 assert!(
4429 (a - b).abs() / scale <= 1.0e-15,
4430 "moment {idx} mismatch for {cell:?}, degree {degree}: direct={a:.17e}, public={b:.17e}"
4431 );
4432 }
4433 }
4434 }
4435 }
4436 }
4437 }
4438 }
4439 }
4440
4441 #[test]
4442 fn bivariate_normal_cdf_matches_reference_grid_to_1e_minus_10() {
4443 let hs = [-8.0, -5.0, -3.0, -1.5, -0.5, 0.0, 0.25, 1.0, 2.5, 5.0, 8.0];
4444 let ks = [-8.0, -4.0, -2.0, -0.75, 0.0, 0.4, 1.25, 3.0, 6.0, 8.0];
4445 let rhos = [
4446 -0.999_999_999_999,
4447 -0.999,
4448 -0.95,
4449 -0.7,
4450 -0.3,
4451 -1.0e-12,
4452 0.0,
4453 1.0e-12,
4454 0.3,
4455 0.7,
4456 0.95,
4457 0.999,
4458 0.999_999_999_999,
4459 ];
4460 for &h in &hs {
4461 for &k in &ks {
4462 for &rho in &rhos {
4463 let actual = bivariate_normal_cdf(h, k, rho).expect("bvn");
4464 let expected = reference_bivariate_normal_cdf_20(h, k, rho);
4465 let scale = expected.abs().max(1.0e-300);
4466 let rel = (actual - expected).abs() / scale;
4467 assert!(
4468 rel < 1.0e-10 || (actual - expected).abs() < 1.0e-14,
4469 "h={h} k={k} rho={rho} actual={actual:.17e} expected={expected:.17e} rel={rel:.3e}"
4470 );
4471 }
4472 }
4473 }
4474 }
4475
4476 #[test]
4477 fn bivariate_normal_cdf_matches_reference_lcg_property_samples() {
4478 let mut seed = 0x5eed_cafe_f00d_u64;
4479 let mut next_unit = || {
4480 seed = seed.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
4481 ((seed >> 11) as f64) * (1.0 / ((1_u64 << 53) as f64))
4482 };
4483 for _ in 0..4096 {
4484 let h = -8.0 + 16.0 * next_unit();
4485 let k = -8.0 + 16.0 * next_unit();
4486 let rho = -0.999 + 1.998 * next_unit();
4487 let actual = bivariate_normal_cdf(h, k, rho).expect("bvn");
4488 let expected = reference_bivariate_normal_cdf_20(h, k, rho);
4489 let scale = expected.abs().max(1.0e-300);
4490 let rel = (actual - expected).abs() / scale;
4491 assert!(
4492 rel < 1.0e-10 || (actual - expected).abs() < 1.0e-14,
4493 "h={h} k={k} rho={rho} actual={actual:.17e} expected={expected:.17e} rel={rel:.3e}"
4494 );
4495 }
4496 }
4497
4498 #[test]
4499 fn affine_bvn_interval_primitive_matches_two_cdf_difference() {
4500 let hs = [-6.0, -2.0, -0.25, 0.0, 0.8, 3.0, 6.0];
4501 let bounds = [
4502 (-5.0, -2.0),
4503 (-3.0, -0.1),
4504 (-1.0, 0.0),
4505 (-0.25, 0.75),
4506 (0.2, 3.5),
4507 (2.0, 7.0),
4508 ];
4509 let rhos = [-0.98, -0.8, -0.25, 0.0, 0.25, 0.8, 0.98];
4510 for &h in &hs {
4511 for &(left, right) in &bounds {
4512 for &rho in &rhos {
4513 let actual =
4514 bivariate_normal_cdf_interval(h, left, right, rho).expect("interval");
4515 let expected = (reference_bivariate_normal_cdf_20(h, right, rho)
4516 - reference_bivariate_normal_cdf_20(h, left, rho))
4517 .clamp(0.0, 1.0);
4518 let scale = expected.abs().max(1.0e-300);
4519 let rel = (actual - expected).abs() / scale;
4520 assert!(
4521 rel < 1.0e-10 || (actual - expected).abs() < 1.0e-12,
4522 "h={h} left={left} right={right} rho={rho} actual={actual:.17e} expected={expected:.17e} rel={rel:.3e}"
4523 );
4524 }
4525 }
4526 }
4527 }
4528
4529 fn simpson_integral<F>(left: f64, right: f64, steps: usize, f: F) -> f64
4530 where
4531 F: Fn(f64) -> f64,
4532 {
4533 let n = if steps.is_multiple_of(2) {
4534 steps
4535 } else {
4536 steps + 1
4537 };
4538 let h = (right - left) / n as f64;
4539 let mut acc = f(left) + f(right);
4540 for k in 1..n {
4541 let x = left + h * k as f64;
4542 let w = if k % 2 == 0 { 2.0 } else { 4.0 };
4543 acc += w * f(x);
4544 }
4545 acc * h / 3.0
4546 }
4547
4548 #[test]
4549 fn global_transform_preserves_local_span_polynomial() {
4550 let span = LocalSpanCubic {
4551 left: -1.2,
4552 right: 0.8,
4553 c0: 0.3,
4554 c1: -0.25,
4555 c2: 0.11,
4556 c3: -0.04,
4557 };
4558 let (g0, g1, g2, g3) = global_cubic_from_local(span);
4559 for &x in &[-1.2, -0.7, -0.1, 0.4, 0.8] {
4560 let local = span.evaluate(x);
4561 let global = g0 + g1 * x + g2 * x * x + g3 * x * x * x;
4562 assert!((local - global).abs() < 1e-12);
4563 }
4564 }
4565
4566 #[test]
4567 fn bivariate_normal_cdf_independent_factorizes() {
4568 let h = -0.35;
4569 let k = 0.8;
4570 let out = bivariate_normal_cdf(h, k, 0.0).expect("bvn");
4571 let target = normal_cdf(h) * normal_cdf(k);
4572 assert!((out - target).abs() < 1e-12);
4573 }
4574
4575 #[test]
4576 fn evaluate_affine_cell_state_matches_numeric_integrals() {
4577 let cell = DenestedCubicCell {
4578 left: -0.9,
4579 right: 0.8,
4580 c0: 0.15,
4581 c1: -0.35,
4582 c2: 0.0,
4583 c3: 0.0,
4584 };
4585 let state = evaluate_affine_cell_state(cell, 6).expect("affine cell");
4586 let value_numeric = simpson_integral(cell.left, cell.right, 4000, |z| {
4587 super::normal_cdf(cell.eta(z)) * normal_pdf(z)
4588 });
4589 assert_eq!(state.branch, ExactCellBranch::Affine);
4590 assert!((state.value - value_numeric).abs() < 1e-9);
4591 for degree in 0..=6 {
4592 let target = simpson_integral(cell.left, cell.right, 4000, |z| {
4593 z.powi(degree as i32) * (-cell.q(z)).exp()
4594 });
4595 assert!((state.moments[degree] - target).abs() < 1e-9);
4596 }
4597 }
4598
4599 #[test]
4600 fn affine_cell_value_matches_zero_moment_derivative() {
4601 let cell = DenestedCubicCell {
4602 left: -1.1,
4603 right: 0.7,
4604 c0: 0.23,
4605 c1: -0.41,
4606 c2: 0.0,
4607 c3: 0.0,
4608 };
4609 let h = 1e-6;
4610 let plus = evaluate_affine_cell_state(
4611 DenestedCubicCell {
4612 c0: cell.c0 + h,
4613 ..cell
4614 },
4615 0,
4616 )
4617 .expect("affine plus");
4618 let minus = evaluate_affine_cell_state(
4619 DenestedCubicCell {
4620 c0: cell.c0 - h,
4621 ..cell
4622 },
4623 0,
4624 )
4625 .expect("affine minus");
4626 let center = evaluate_affine_cell_state(cell, 0).expect("affine center");
4627 let d_value = (plus.value - minus.value) / (2.0 * h);
4628 let target = INV_TWO_PI * center.moments[0];
4629 assert!((d_value - target).abs() < 1e-8);
4630 }
4631
4632 #[test]
4633 fn coefficient_partials_match_exact_span_derivatives() {
4634 let score_span = LocalSpanCubic {
4635 left: -0.75,
4636 right: 0.25,
4637 c0: 0.08,
4638 c1: -0.03,
4639 c2: 0.02,
4640 c3: -0.01,
4641 };
4642 let link_span = LocalSpanCubic {
4643 left: -0.6,
4644 right: 0.9,
4645 c0: -0.05,
4646 c1: 0.04,
4647 c2: -0.02,
4648 c3: 0.015,
4649 };
4650 let a = 0.3;
4651 let b = -0.7;
4652 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
4653 for &z in &[-0.75, -0.4, -0.1, 0.2] {
4654 let u = a + b * z;
4655 let eta_a = 1.0 + link_span.first_derivative(u);
4656 let eta_b = z + score_span.evaluate(z) + z * link_span.first_derivative(u);
4657 assert!((polynomial_value(&dc_da, z) - eta_a).abs() < 1e-12);
4658 assert!((polynomial_value(&dc_db, z) - eta_b).abs() < 1e-12);
4659 }
4660 }
4661
4662 #[test]
4663 fn second_coefficient_partials_match_exact_span_derivatives() {
4664 let score_span = LocalSpanCubic {
4665 left: -0.75,
4666 right: 0.25,
4667 c0: 0.08,
4668 c1: -0.03,
4669 c2: 0.02,
4670 c3: -0.01,
4671 };
4672 let link_span = LocalSpanCubic {
4673 left: -0.6,
4674 right: 0.9,
4675 c0: -0.05,
4676 c1: 0.04,
4677 c2: -0.02,
4678 c3: 0.015,
4679 };
4680 let a = 0.3;
4681 let b = -0.7;
4682 let second_partials = denested_cell_second_partials(score_span, link_span, a, b);
4683 let dc_daa = second_partials.0;
4684 let dc_dab = second_partials.1;
4685 let dc_dbb = second_partials.2;
4686 for &z in &[-0.75, -0.4, -0.1, 0.2] {
4687 let u = a + b * z;
4688 let eta_aa = link_span.second_derivative(u);
4689 let eta_ab = z * link_span.second_derivative(u);
4690 let eta_bb = z * z * link_span.second_derivative(u);
4691 assert!((polynomial_value(&dc_daa, z) - eta_aa).abs() < 1e-12);
4692 assert!((polynomial_value(&dc_dab, z) - eta_ab).abs() < 1e-12);
4693 assert!((polynomial_value(&dc_dbb, z) - eta_bb).abs() < 1e-12);
4694 }
4695 }
4696
4697 #[test]
4698 fn higher_derivative_moment_helpers_reject_empty_first_coefficients() {
4699 let cell = DenestedCubicCell {
4700 left: -1.0,
4701 right: 1.0,
4702 c0: 0.0,
4703 c1: 1.0,
4704 c2: 0.0,
4705 c3: 0.0,
4706 };
4707 let moments = [1.0; 16];
4708
4709 let third_err = cell_third_derivative_from_moments(
4710 cell,
4711 &[],
4712 &[1.0],
4713 &[1.0],
4714 &[],
4715 &[],
4716 &[],
4717 &[],
4718 &moments,
4719 )
4720 .expect_err("empty first coefficients should be rejected");
4721 assert!(third_err.contains("r first-derivative coefficients must be non-empty"));
4722
4723 let fourth_err = cell_fourth_derivative_from_moments(
4724 cell,
4725 &[1.0],
4726 &[],
4727 &[1.0],
4728 &[1.0],
4729 &[],
4730 &[],
4731 &[],
4732 &[],
4733 &[],
4734 &[],
4735 &[],
4736 &[],
4737 &[],
4738 &[],
4739 &[],
4740 &moments,
4741 )
4742 .expect_err("empty first coefficients should be rejected");
4743 assert!(fourth_err.contains("s first-derivative coefficients must be non-empty"));
4744 }
4745
4746 #[test]
4747 fn fourth_derivative_rejects_overlong_scratch_convolutions() {
4748 let cell = DenestedCubicCell {
4749 left: -1.0,
4750 right: 1.0,
4751 c0: 0.0,
4752 c1: 1.0,
4753 c2: 0.0,
4754 c3: 0.0,
4755 };
4756 let long_first = [1.0; 10];
4757 let zero = [0.0; 1];
4758 let moments = [1.0; 64];
4759
4760 let err = cell_fourth_derivative_from_moments(
4761 cell,
4762 &long_first,
4763 &long_first,
4764 &long_first,
4765 &long_first,
4766 &zero,
4767 &zero,
4768 &zero,
4769 &zero,
4770 &zero,
4771 &zero,
4772 &zero,
4773 &zero,
4774 &zero,
4775 &zero,
4776 &zero,
4777 &moments,
4778 )
4779 .expect_err("oversized convolution should be rejected before writing scratch");
4780 assert!(err.contains("fourth derivative polynomial convolution scratch too small"));
4781 }
4782
4783 #[test]
4784 fn score_and_link_basis_cell_coefficients_match_direct_construction() {
4785 let score_basis_span = LocalSpanCubic {
4786 left: -0.7,
4787 right: 0.4,
4788 c0: 0.2,
4789 c1: -0.04,
4790 c2: 0.03,
4791 c3: -0.01,
4792 };
4793 let link_basis_span = LocalSpanCubic {
4794 left: -0.5,
4795 right: 1.1,
4796 c0: -0.03,
4797 c1: 0.05,
4798 c2: -0.02,
4799 c3: 0.01,
4800 };
4801 let a = 0.25;
4802 let b = -0.8;
4803 let score_coeffs = score_basis_cell_coefficients(score_basis_span, b);
4804 let link_coeffs = link_basis_cell_coefficients(link_basis_span, a, b);
4805 for &z in &[-0.7, -0.1, 0.2, 0.4] {
4806 let score_poly = polynomial_value(&score_coeffs, z);
4807 let link_poly = polynomial_value(&link_coeffs, z);
4808 assert!((score_poly - b * score_basis_span.evaluate(z)).abs() < 1e-12);
4809 assert!((link_poly - link_basis_span.evaluate(a + b * z)).abs() < 1e-12);
4810 }
4811 }
4812
4813 #[test]
4814 fn link_basis_partials_match_exact_span_derivatives() {
4815 let link_basis_span = LocalSpanCubic {
4816 left: -0.5,
4817 right: 1.1,
4818 c0: -0.03,
4819 c1: 0.05,
4820 c2: -0.02,
4821 c3: 0.01,
4822 };
4823 let a = 0.25;
4824 let b = -0.8;
4825 let (dc_da, dc_db) = link_basis_cell_coefficient_partials(link_basis_span, a, b);
4826 let (dc_daa, dc_dab, dc_dbb) = link_basis_cell_second_partials(link_basis_span, a, b);
4827 for &z in &[-0.6, -0.2, 0.15, 0.5] {
4828 let u = a + b * z;
4829 let eta_a = link_basis_span.first_derivative(u);
4830 let eta_b = z * link_basis_span.first_derivative(u);
4831 let eta_aa = link_basis_span.second_derivative(u);
4832 let eta_ab = z * link_basis_span.second_derivative(u);
4833 let eta_bb = z * z * link_basis_span.second_derivative(u);
4834 assert!((polynomial_value(&dc_da, z) - eta_a).abs() < 1e-12);
4835 assert!((polynomial_value(&dc_db, z) - eta_b).abs() < 1e-12);
4836 assert!((polynomial_value(&dc_daa, z) - eta_aa).abs() < 1e-12);
4837 assert!((polynomial_value(&dc_dab, z) - eta_ab).abs() < 1e-12);
4838 assert!((polynomial_value(&dc_dbb, z) - eta_bb).abs() < 1e-12);
4839 }
4840 }
4841
4842 #[test]
4843 fn denested_third_partials_match_exact_span_derivatives() {
4844 let link_span = LocalSpanCubic {
4845 left: -0.6,
4846 right: 0.9,
4847 c0: -0.05,
4848 c1: 0.04,
4849 c2: -0.02,
4850 c3: 0.015,
4851 };
4852 let (dc_daaa, dc_daab, dc_dabb, dc_dbbb) = denested_cell_third_partials(link_span);
4853 let link_third = 6.0 * link_span.c3;
4854 for &z in &[-0.75, -0.4, -0.1, 0.2] {
4855 let eta_aaa = link_third;
4856 let eta_aab = z * link_third;
4857 let eta_abb = z * z * link_third;
4858 let eta_bbb = z * z * z * link_third;
4859 assert!((polynomial_value(&dc_daaa, z) - eta_aaa).abs() < 1e-12);
4860 assert!((polynomial_value(&dc_daab, z) - eta_aab).abs() < 1e-12);
4861 assert!((polynomial_value(&dc_dabb, z) - eta_abb).abs() < 1e-12);
4862 assert!((polynomial_value(&dc_dbbb, z) - eta_bbb).abs() < 1e-12);
4863 }
4864 }
4865
4866 #[test]
4867 fn link_basis_third_partials_match_exact_span_derivatives() {
4868 let link_basis_span = LocalSpanCubic {
4869 left: -0.5,
4870 right: 1.1,
4871 c0: -0.03,
4872 c1: 0.05,
4873 c2: -0.02,
4874 c3: 0.01,
4875 };
4876 let (dc_daaa, dc_daab, dc_dabb, dc_dbbb) = link_basis_cell_third_partials(link_basis_span);
4877 let link_third = 6.0 * link_basis_span.c3;
4878 for &z in &[-0.6, -0.2, 0.15, 0.5] {
4879 let eta_aaa = link_third;
4880 let eta_aab = z * link_third;
4881 let eta_abb = z * z * link_third;
4882 let eta_bbb = z * z * z * link_third;
4883 assert!((polynomial_value(&dc_daaa, z) - eta_aaa).abs() < 1e-12);
4884 assert!((polynomial_value(&dc_daab, z) - eta_aab).abs() < 1e-12);
4885 assert!((polynomial_value(&dc_dabb, z) - eta_abb).abs() < 1e-12);
4886 assert!((polynomial_value(&dc_dbbb, z) - eta_bbb).abs() < 1e-12);
4887 }
4888 }
4889
4890 #[test]
4891 fn branch_selection_uses_normalized_non_affine_coefficients() {
4892 let affine = DenestedCubicCell {
4893 left: -1.0,
4894 right: 1.0,
4895 c0: 0.1,
4896 c1: -0.4,
4897 c2: 1e-13,
4898 c3: -1e-13,
4899 };
4900 let quartic = DenestedCubicCell {
4901 c2: 2e-4,
4902 c3: 1e-13,
4903 ..affine
4904 };
4905 let sextic = DenestedCubicCell {
4906 c2: 2e-4,
4907 c3: 5e-3,
4908 ..affine
4909 };
4910 assert_eq!(branch_cell(affine).unwrap(), ExactCellBranch::Affine);
4911 assert_eq!(branch_cell(quartic).unwrap(), ExactCellBranch::Quartic);
4912 assert_eq!(branch_cell(sextic).unwrap(), ExactCellBranch::Sextic);
4913 }
4914
4915 #[test]
4916 fn affine_anchor_moments_match_whole_line_closed_forms() {
4917 let out = affine_anchor_moment_vector(0.0, 0.0, f64::NEG_INFINITY, f64::INFINITY, 4);
4918 let sqrt_2pi = (2.0 * std::f64::consts::PI).sqrt();
4919 assert!((out[0] - sqrt_2pi).abs() < 1e-12);
4920 assert!(out[1].abs() < 1e-12);
4921 assert!((out[2] - sqrt_2pi).abs() < 1e-12);
4922 }
4923
4924 #[test]
4925 fn affine_anchor_moments_match_shifted_gaussian_whole_line() {
4926 let alpha = 0.7;
4927 let beta = -0.4;
4928 let out = affine_anchor_moment_vector(alpha, beta, f64::NEG_INFINITY, f64::INFINITY, 4);
4929 let s = (1.0 + beta * beta).sqrt();
4930 let mu = -alpha * beta / (1.0 + beta * beta);
4931 let scale = (-alpha * alpha / (2.0 * s * s)).exp() / s;
4932 let sqrt_2pi = (2.0 * std::f64::consts::PI).sqrt();
4933 assert!((out[0] - scale * sqrt_2pi).abs() < 1e-12);
4934 assert!((out[1] - scale * sqrt_2pi * mu).abs() < 1e-12);
4935 assert!((out[2] - scale * sqrt_2pi * (mu * mu + 1.0 / (s * s))).abs() < 1e-10);
4936 }
4937
4938 #[test]
4939 fn quartic_recurrence_reduces_higher_moments() {
4940 let cell = DenestedCubicCell {
4941 left: -1.0,
4942 right: 0.9,
4943 c0: 0.2,
4944 c1: -0.3,
4945 c2: 0.18,
4946 c3: 0.0,
4947 };
4948 let exact = |k: usize| {
4949 simpson_integral(cell.left, cell.right, 2000, |z| {
4950 z.powi(k as i32) * (-cell.q(z)).exp()
4951 })
4952 };
4953 let reduced = reduce_quartic_moments(cell, [exact(0), exact(1), exact(2)], 6)
4954 .expect("quartic reduction");
4955 for k in 0..=6 {
4956 let target = exact(k);
4957 assert!(
4958 (reduced[k] - target).abs() < 1e-7,
4959 "quartic reduced moment M{k} mismatch: {} vs {}",
4960 reduced[k],
4961 target
4962 );
4963 }
4964 }
4965
4966 #[test]
4967 fn sextic_recurrence_reduces_higher_moments() {
4968 let cell = DenestedCubicCell {
4969 left: -0.8,
4970 right: 0.7,
4971 c0: -0.1,
4972 c1: 0.25,
4973 c2: -0.14,
4974 c3: 0.22,
4975 };
4976 let exact = |k: usize| {
4977 simpson_integral(cell.left, cell.right, 3000, |z| {
4978 z.powi(k as i32) * (-cell.q(z)).exp()
4979 })
4980 };
4981 let reduced =
4982 reduce_sextic_moments(cell, [exact(0), exact(1), exact(2), exact(3), exact(4)], 9)
4983 .expect("sextic reduction");
4984 for k in 0..=9 {
4985 let target = exact(k);
4986 assert!(
4987 (reduced[k] - target).abs() < 1e-7,
4988 "sextic reduced moment M{k} mismatch: {} vs {}",
4989 reduced[k],
4990 target
4991 );
4992 }
4993 }
4994
4995 #[test]
4996 fn degenerate_sextic_branch_preserves_quadratic_coefficient() {
4997 let cell = DenestedCubicCell {
4998 left: -1.0,
4999 right: 1.0,
5000 c0: 0.0,
5001 c1: 0.0,
5002 c2: 0.1,
5003 c3: 2.0e-10,
5004 };
5005 assert_eq!(branch_cell(cell).unwrap(), ExactCellBranch::Sextic);
5006
5007 let state = evaluate_cell_moments(cell, 9).expect("degenerate sextic cell");
5008 let quartic_cell = DenestedCubicCell { c3: 0.0, ..cell };
5009 let quartic = evaluate_cell_moments(quartic_cell, 9).expect("quartic cell");
5010 let affine = evaluate_affine_cell_state(
5011 DenestedCubicCell {
5012 c2: 0.0,
5013 c3: 0.0,
5014 ..cell
5015 },
5016 9,
5017 )
5018 .expect("affine cell");
5019
5020 assert_eq!(state.branch, ExactCellBranch::Quartic);
5021 for k in 0..=9 {
5022 assert!(
5023 (state.moments[k] - quartic.moments[k]).abs() < 1e-12,
5024 "lowered moment M{k} should match the quartic cell: {} vs {}",
5025 state.moments[k],
5026 quartic.moments[k]
5027 );
5028 }
5029 assert!(
5030 (state.moments[0] - affine.moments[0]).abs() > 1e-4,
5031 "degenerate sextic handling must not drop the nonzero c2 term"
5032 );
5033 }
5034
5035 #[test]
5036 fn moment_reduced_first_and_second_derivatives_match_numeric_integrals() {
5037 let cell = DenestedCubicCell {
5038 left: -0.9,
5039 right: 0.6,
5040 c0: 0.15,
5041 c1: -0.2,
5042 c2: 0.08,
5043 c3: 0.17,
5044 };
5045 let moments = reduce_sextic_moments(
5046 cell,
5047 [
5048 simpson_integral(cell.left, cell.right, 3000, |z| (-cell.q(z)).exp()),
5049 simpson_integral(cell.left, cell.right, 3000, |z| z * (-cell.q(z)).exp()),
5050 simpson_integral(cell.left, cell.right, 3000, |z| z * z * (-cell.q(z)).exp()),
5051 simpson_integral(cell.left, cell.right, 3000, |z| {
5052 z.powi(3) * (-cell.q(z)).exp()
5053 }),
5054 simpson_integral(cell.left, cell.right, 3000, |z| {
5055 z.powi(4) * (-cell.q(z)).exp()
5056 }),
5057 ],
5058 9,
5059 )
5060 .expect("reduced moments");
5061
5062 let r = [0.7, -0.1, 0.3];
5063 let s = [0.2, 0.5];
5064 let second = [0.4, -0.2, 0.1];
5065 let exact_first = cell_first_derivative_from_moments(&r, &moments).expect("first");
5066 let exact_second =
5067 cell_second_derivative_from_moments(cell, &r, &s, &second, &moments).expect("second");
5068
5069 let numeric_first = simpson_integral(cell.left, cell.right, 3000, |z| {
5070 polynomial_value(&r, z) * (-cell.q(z)).exp() / (2.0 * std::f64::consts::PI)
5071 });
5072 let numeric_second = simpson_integral(cell.left, cell.right, 3000, |z| {
5073 let eta = cell.eta(z);
5074 (polynomial_value(&second, z) - eta * polynomial_value(&r, z) * polynomial_value(&s, z))
5075 * (-cell.q(z)).exp()
5076 / (2.0 * std::f64::consts::PI)
5077 });
5078
5079 assert!((exact_first - numeric_first).abs() < 1e-7);
5080 assert!((exact_second - numeric_second).abs() < 1e-7);
5081 }
5082
5083 #[test]
5084 fn moment_reduced_third_derivative_matches_numeric_integral() {
5085 let cell = DenestedCubicCell {
5086 left: -0.85,
5087 right: 0.7,
5088 c0: -0.12,
5089 c1: 0.18,
5090 c2: 0.09,
5091 c3: -0.11,
5092 };
5093 let moments = evaluate_cell_moments(cell, 12).expect("cell moments");
5094 let r = [0.35, -0.12, 0.08];
5095 let s = [0.17, 0.09];
5096 let t = [-0.21, 0.14, -0.04];
5097 let rs = [0.11, -0.07, 0.05];
5098 let rt = [-0.06, 0.03];
5099 let st = [0.08, -0.02, 0.01];
5100 let rst = [0.04, -0.05, 0.02];
5101
5102 let exact_third = cell_third_derivative_from_moments(
5103 cell,
5104 &r,
5105 &s,
5106 &t,
5107 &rs,
5108 &rt,
5109 &st,
5110 &rst,
5111 &moments.moments,
5112 )
5113 .expect("third derivative");
5114 let numeric_third = simpson_integral(cell.left, cell.right, 4000, |z| {
5115 let eta = cell.eta(z);
5116 let rz = polynomial_value(&r, z);
5117 let sz = polynomial_value(&s, z);
5118 let tz = polynomial_value(&t, z);
5119 let rsz = polynomial_value(&rs, z);
5120 let rtz = polynomial_value(&rt, z);
5121 let stz = polynomial_value(&st, z);
5122 let rstz = polynomial_value(&rst, z);
5123 (rstz - eta * (rsz * tz + rtz * sz + stz * rz) + (eta * eta - 1.0) * rz * sz * tz)
5124 * (-cell.q(z)).exp()
5125 / (2.0 * std::f64::consts::PI)
5126 });
5127
5128 assert!((exact_third - numeric_third).abs() < 1e-7);
5129 }
5130
5131 #[test]
5132 fn moment_reduced_fourth_derivative_matches_numeric_integral() {
5133 let cell = DenestedCubicCell {
5134 left: -0.8,
5135 right: 0.65,
5136 c0: 0.11,
5137 c1: -0.22,
5138 c2: 0.07,
5139 c3: 0.13,
5140 };
5141 let moments = evaluate_cell_moments(cell, 16).expect("cell moments");
5142 let r = [0.21, -0.13, 0.06];
5143 let s = [-0.18, 0.04];
5144 let t = [0.09, 0.07, -0.03];
5145 let u = [-0.14, 0.05];
5146 let rs = [0.08, -0.03, 0.02];
5147 let rt = [-0.05, 0.01];
5148 let ru = [0.04, -0.02, 0.01];
5149 let st = [0.03, 0.02];
5150 let su = [-0.02, 0.05, -0.01];
5151 let tu = [0.07, -0.04];
5152 let rst = [0.03, -0.01, 0.02];
5153 let rsu = [-0.02, 0.04];
5154 let rtu = [0.01, 0.02, -0.01];
5155 let stu = [-0.03, 0.02];
5156 let rstu = [0.02, -0.01, 0.01];
5157
5158 let exact_fourth = cell_fourth_derivative_from_moments(
5159 cell,
5160 &r,
5161 &s,
5162 &t,
5163 &u,
5164 &rs,
5165 &rt,
5166 &ru,
5167 &st,
5168 &su,
5169 &tu,
5170 &rst,
5171 &rsu,
5172 &rtu,
5173 &stu,
5174 &rstu,
5175 &moments.moments,
5176 )
5177 .expect("fourth derivative");
5178 let numeric_fourth = simpson_integral(cell.left, cell.right, 5000, |z| {
5179 let eta = cell.eta(z);
5180 let rz = polynomial_value(&r, z);
5181 let sz = polynomial_value(&s, z);
5182 let tz = polynomial_value(&t, z);
5183 let uz = polynomial_value(&u, z);
5184 let rsz = polynomial_value(&rs, z);
5185 let rtz = polynomial_value(&rt, z);
5186 let ruz = polynomial_value(&ru, z);
5187 let stz = polynomial_value(&st, z);
5188 let suz = polynomial_value(&su, z);
5189 let tuz = polynomial_value(&tu, z);
5190 let rstz = polynomial_value(&rst, z);
5191 let rsuz = polynomial_value(&rsu, z);
5192 let rtuz = polynomial_value(&rtu, z);
5193 let stuz = polynomial_value(&stu, z);
5194 let rstuz = polynomial_value(&rstu, z);
5195 let linear =
5196 rstz * uz + rsuz * tz + rtuz * sz + stuz * rz + rsz * tuz + rtz * suz + ruz * stz;
5197 let quadratic = rsz * tz * uz
5198 + rtz * sz * uz
5199 + ruz * sz * tz
5200 + stz * rz * uz
5201 + suz * rz * tz
5202 + tuz * rz * sz;
5203 let quartic = rz * sz * tz * uz;
5204 (rstuz - eta * linear
5205 + (eta * eta - 1.0) * quadratic
5206 + (-eta * eta * eta + 3.0 * eta) * quartic)
5207 * (-cell.q(z)).exp()
5208 / (2.0 * std::f64::consts::PI)
5209 });
5210
5211 assert!((exact_fourth - numeric_fourth).abs() < 2e-7);
5212 }
5213
5214 #[test]
5215 fn denested_cell_parameter_derivatives_match_exact_integrands() {
5216 let score_span = LocalSpanCubic {
5217 left: -0.75,
5218 right: 0.25,
5219 c0: 0.08,
5220 c1: -0.03,
5221 c2: 0.02,
5222 c3: -0.01,
5223 };
5224 let link_span = LocalSpanCubic {
5225 left: -0.6,
5226 right: 0.9,
5227 c0: -0.05,
5228 c1: 0.04,
5229 c2: -0.02,
5230 c3: 0.015,
5231 };
5232 let a = 0.3;
5233 let b = -0.7;
5234 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
5235 let cell = DenestedCubicCell {
5236 left: score_span.left,
5237 right: score_span.right,
5238 c0: coeffs[0],
5239 c1: coeffs[1],
5240 c2: coeffs[2],
5241 c3: coeffs[3],
5242 };
5243 let state = evaluate_cell_moments(cell, 24).expect("cell moments");
5244 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
5245 let (dc_daa, dc_dab, dc_dbb) = denested_cell_second_partials(score_span, link_span, a, b);
5246 let (dc_daaa, dc_daab, dc_dabb, dc_dbbb) = denested_cell_third_partials(link_span);
5247 let zero = [0.0; 4];
5248 let link_third = 6.0 * link_span.c3;
5249
5250 let eta_a = |z: f64| 1.0 + link_span.first_derivative(a + b * z);
5251 let eta_b = |z: f64| z + score_span.evaluate(z) + z * link_span.first_derivative(a + b * z);
5252 let eta_aa = |z: f64| link_span.second_derivative(a + b * z);
5253 let eta_ab = |z: f64| z * link_span.second_derivative(a + b * z);
5254 let eta_bb = |z: f64| z * z * link_span.second_derivative(a + b * z);
5255 let eta_aaa = |z: f64| link_third + 0.0 * z;
5256 let eta_aab = |z: f64| z * link_third;
5257 let eta_abb = |z: f64| z * z * link_third;
5258 let eta_bbb = |z: f64| z * z * z * link_third;
5259
5260 let exact_a = cell_first_derivative_from_moments(&dc_da, &state.moments).expect("a");
5261 let exact_b = cell_first_derivative_from_moments(&dc_db, &state.moments).expect("b");
5262 let exact_aa =
5263 cell_second_derivative_from_moments(cell, &dc_da, &dc_da, &dc_daa, &state.moments)
5264 .expect("aa");
5265 let exact_ab =
5266 cell_second_derivative_from_moments(cell, &dc_da, &dc_db, &dc_dab, &state.moments)
5267 .expect("ab");
5268 let exact_bb =
5269 cell_second_derivative_from_moments(cell, &dc_db, &dc_db, &dc_dbb, &state.moments)
5270 .expect("bb");
5271 let exact_aaa = cell_third_derivative_from_moments(
5272 cell,
5273 &dc_da,
5274 &dc_da,
5275 &dc_da,
5276 &dc_daa,
5277 &dc_daa,
5278 &dc_daa,
5279 &dc_daaa,
5280 &state.moments,
5281 )
5282 .expect("aaa");
5283 let exact_aab = cell_third_derivative_from_moments(
5284 cell,
5285 &dc_da,
5286 &dc_da,
5287 &dc_db,
5288 &dc_daa,
5289 &dc_dab,
5290 &dc_dab,
5291 &dc_daab,
5292 &state.moments,
5293 )
5294 .expect("aab");
5295 let exact_abb = cell_third_derivative_from_moments(
5296 cell,
5297 &dc_da,
5298 &dc_db,
5299 &dc_db,
5300 &dc_dab,
5301 &dc_dab,
5302 &dc_dbb,
5303 &dc_dabb,
5304 &state.moments,
5305 )
5306 .expect("abb");
5307 let exact_bbb = cell_third_derivative_from_moments(
5308 cell,
5309 &dc_db,
5310 &dc_db,
5311 &dc_db,
5312 &dc_dbb,
5313 &dc_dbb,
5314 &dc_dbb,
5315 &dc_dbbb,
5316 &state.moments,
5317 )
5318 .expect("bbb");
5319 let exact_aaaa = cell_fourth_derivative_from_moments(
5320 cell,
5321 &dc_da,
5322 &dc_da,
5323 &dc_da,
5324 &dc_da,
5325 &dc_daa,
5326 &dc_daa,
5327 &dc_daa,
5328 &dc_daa,
5329 &dc_daa,
5330 &dc_daa,
5331 &dc_daaa,
5332 &dc_daaa,
5333 &dc_daaa,
5334 &dc_daaa,
5335 &zero,
5336 &state.moments,
5337 )
5338 .expect("aaaa");
5339 let exact_aaab = cell_fourth_derivative_from_moments(
5340 cell,
5341 &dc_da,
5342 &dc_da,
5343 &dc_da,
5344 &dc_db,
5345 &dc_daa,
5346 &dc_daa,
5347 &dc_dab,
5348 &dc_daa,
5349 &dc_dab,
5350 &dc_dab,
5351 &dc_daaa,
5352 &dc_daab,
5353 &dc_daab,
5354 &dc_daab,
5355 &zero,
5356 &state.moments,
5357 )
5358 .expect("aaab");
5359 let exact_aabb = cell_fourth_derivative_from_moments(
5360 cell,
5361 &dc_da,
5362 &dc_da,
5363 &dc_db,
5364 &dc_db,
5365 &dc_daa,
5366 &dc_dab,
5367 &dc_dab,
5368 &dc_dab,
5369 &dc_dab,
5370 &dc_dbb,
5371 &dc_daab,
5372 &dc_daab,
5373 &dc_dabb,
5374 &dc_dabb,
5375 &zero,
5376 &state.moments,
5377 )
5378 .expect("aabb");
5379 let exact_abbb = cell_fourth_derivative_from_moments(
5380 cell,
5381 &dc_da,
5382 &dc_db,
5383 &dc_db,
5384 &dc_db,
5385 &dc_dab,
5386 &dc_dab,
5387 &dc_dab,
5388 &dc_dbb,
5389 &dc_dbb,
5390 &dc_dbb,
5391 &dc_dabb,
5392 &dc_dabb,
5393 &dc_dabb,
5394 &dc_dbbb,
5395 &zero,
5396 &state.moments,
5397 )
5398 .expect("abbb");
5399 let exact_bbbb = cell_fourth_derivative_from_moments(
5400 cell,
5401 &dc_db,
5402 &dc_db,
5403 &dc_db,
5404 &dc_db,
5405 &dc_dbb,
5406 &dc_dbb,
5407 &dc_dbb,
5408 &dc_dbb,
5409 &dc_dbb,
5410 &dc_dbb,
5411 &dc_dbbb,
5412 &dc_dbbb,
5413 &dc_dbbb,
5414 &dc_dbbb,
5415 &zero,
5416 &state.moments,
5417 )
5418 .expect("bbbb");
5419
5420 let numeric_a = simpson_integral(cell.left, cell.right, 5000, |z| {
5421 eta_a(z) * (-cell.q(z)).exp() * INV_TWO_PI
5422 });
5423 let numeric_b = simpson_integral(cell.left, cell.right, 5000, |z| {
5424 eta_b(z) * (-cell.q(z)).exp() * INV_TWO_PI
5425 });
5426 let numeric_aa = simpson_integral(cell.left, cell.right, 5000, |z| {
5427 (eta_aa(z) - cell.eta(z) * eta_a(z) * eta_a(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5428 });
5429 let numeric_ab = simpson_integral(cell.left, cell.right, 5000, |z| {
5430 (eta_ab(z) - cell.eta(z) * eta_a(z) * eta_b(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5431 });
5432 let numeric_bb = simpson_integral(cell.left, cell.right, 5000, |z| {
5433 (eta_bb(z) - cell.eta(z) * eta_b(z) * eta_b(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5434 });
5435 let numeric_aaa = simpson_integral(cell.left, cell.right, 5000, |z| {
5436 let eta = cell.eta(z);
5437 (eta_aaa(z) - 3.0 * eta * eta_aa(z) * eta_a(z) + (eta * eta - 1.0) * eta_a(z).powi(3))
5438 * (-cell.q(z)).exp()
5439 * INV_TWO_PI
5440 });
5441 let numeric_aab = simpson_integral(cell.left, cell.right, 5000, |z| {
5442 let eta = cell.eta(z);
5443 let a_z = eta_a(z);
5444 let b_z = eta_b(z);
5445 (eta_aab(z) - eta * (eta_aa(z) * b_z + 2.0 * eta_ab(z) * a_z)
5446 + (eta * eta - 1.0) * a_z * a_z * b_z)
5447 * (-cell.q(z)).exp()
5448 * INV_TWO_PI
5449 });
5450 let numeric_abb = simpson_integral(cell.left, cell.right, 5000, |z| {
5451 let eta = cell.eta(z);
5452 let a_z = eta_a(z);
5453 let b_z = eta_b(z);
5454 (eta_abb(z) - eta * (2.0 * eta_ab(z) * b_z + eta_bb(z) * a_z)
5455 + (eta * eta - 1.0) * a_z * b_z * b_z)
5456 * (-cell.q(z)).exp()
5457 * INV_TWO_PI
5458 });
5459 let numeric_bbb = simpson_integral(cell.left, cell.right, 5000, |z| {
5460 let eta = cell.eta(z);
5461 (eta_bbb(z) - 3.0 * eta * eta_bb(z) * eta_b(z) + (eta * eta - 1.0) * eta_b(z).powi(3))
5462 * (-cell.q(z)).exp()
5463 * INV_TWO_PI
5464 });
5465 let numeric_aaaa = simpson_integral(cell.left, cell.right, 5000, |z| {
5466 let eta = cell.eta(z);
5467 let eta_a_z = eta_a(z);
5468 let eta_aa_z = eta_aa(z);
5469 let eta_aaa_z = eta_aaa(z);
5470 (-eta * (4.0 * eta_aaa_z * eta_a_z + 3.0 * eta_aa_z * eta_aa_z)
5471 + (eta * eta - 1.0) * (6.0 * eta_aa_z * eta_a_z * eta_a_z)
5472 + (-eta * eta * eta + 3.0 * eta) * eta_a_z.powi(4))
5473 * (-cell.q(z)).exp()
5474 * INV_TWO_PI
5475 });
5476 let numeric_aaab = simpson_integral(cell.left, cell.right, 5000, |z| {
5477 let eta = cell.eta(z);
5478 let a_z = eta_a(z);
5479 let b_z = eta_b(z);
5480 let aa_z = eta_aa(z);
5481 let ab_z = eta_ab(z);
5482 let aaa_z = eta_aaa(z);
5483 let aab_z = eta_aab(z);
5484 (-eta * (aaa_z * b_z + 3.0 * aab_z * a_z + 3.0 * aa_z * ab_z)
5485 + (eta * eta - 1.0) * (3.0 * aa_z * a_z * b_z + 3.0 * ab_z * a_z * a_z)
5486 + (-eta * eta * eta + 3.0 * eta) * a_z.powi(3) * b_z)
5487 * (-cell.q(z)).exp()
5488 * INV_TWO_PI
5489 });
5490 let numeric_aabb = simpson_integral(cell.left, cell.right, 5000, |z| {
5491 let eta = cell.eta(z);
5492 let a_z = eta_a(z);
5493 let b_z = eta_b(z);
5494 let aa_z = eta_aa(z);
5495 let ab_z = eta_ab(z);
5496 let bb_z = eta_bb(z);
5497 let aab_z = eta_aab(z);
5498 let abb_z = eta_abb(z);
5499 (-eta * (2.0 * aab_z * b_z + 2.0 * abb_z * a_z + aa_z * bb_z + 2.0 * ab_z * ab_z)
5500 + (eta * eta - 1.0)
5501 * (aa_z * b_z * b_z + 4.0 * ab_z * a_z * b_z + bb_z * a_z * a_z)
5502 + (-eta * eta * eta + 3.0 * eta) * a_z * a_z * b_z * b_z)
5503 * (-cell.q(z)).exp()
5504 * INV_TWO_PI
5505 });
5506 let numeric_abbb = simpson_integral(cell.left, cell.right, 5000, |z| {
5507 let eta = cell.eta(z);
5508 let a_z = eta_a(z);
5509 let b_z = eta_b(z);
5510 let ab_z = eta_ab(z);
5511 let bb_z = eta_bb(z);
5512 let abb_z = eta_abb(z);
5513 let bbb_z = eta_bbb(z);
5514 (-eta * (3.0 * abb_z * b_z + bbb_z * a_z + 3.0 * ab_z * bb_z)
5515 + (eta * eta - 1.0) * (3.0 * ab_z * b_z * b_z + 3.0 * bb_z * a_z * b_z)
5516 + (-eta * eta * eta + 3.0 * eta) * a_z * b_z.powi(3))
5517 * (-cell.q(z)).exp()
5518 * INV_TWO_PI
5519 });
5520 let numeric_bbbb = simpson_integral(cell.left, cell.right, 5000, |z| {
5521 let eta = cell.eta(z);
5522 let eta_b_z = eta_b(z);
5523 let eta_bb_z = eta_bb(z);
5524 let eta_bbb_z = eta_bbb(z);
5525 (-eta * (4.0 * eta_bbb_z * eta_b_z + 3.0 * eta_bb_z * eta_bb_z)
5526 + (eta * eta - 1.0) * (6.0 * eta_bb_z * eta_b_z * eta_b_z)
5527 + (-eta * eta * eta + 3.0 * eta) * eta_b_z.powi(4))
5528 * (-cell.q(z)).exp()
5529 * INV_TWO_PI
5530 });
5531
5532 assert!((exact_a - numeric_a).abs() < 1e-8);
5533 assert!((exact_b - numeric_b).abs() < 1e-8);
5534 assert!((exact_aa - numeric_aa).abs() < 1e-8);
5535 assert!((exact_ab - numeric_ab).abs() < 1e-8);
5536 assert!((exact_bb - numeric_bb).abs() < 1e-8);
5537 assert!((exact_aaa - numeric_aaa).abs() < 2e-7);
5538 assert!((exact_aab - numeric_aab).abs() < 2e-7);
5539 assert!((exact_abb - numeric_abb).abs() < 2e-7);
5540 assert!((exact_bbb - numeric_bbb).abs() < 2e-7);
5541 assert!((exact_aaaa - numeric_aaaa).abs() < 2e-6);
5542 assert!((exact_aaab - numeric_aaab).abs() < 2e-6);
5543 assert!((exact_aabb - numeric_aabb).abs() < 2e-6);
5544 assert!((exact_abbb - numeric_abbb).abs() < 2e-6);
5545 assert!((exact_bbbb - numeric_bbbb).abs() < 2e-6);
5546 }
5547
5548 #[test]
5549 fn link_basis_cell_derivatives_match_exact_integrands() {
5550 let score_span = LocalSpanCubic {
5551 left: -0.75,
5552 right: 0.25,
5553 c0: 0.08,
5554 c1: -0.03,
5555 c2: 0.02,
5556 c3: -0.01,
5557 };
5558 let link_span = LocalSpanCubic {
5559 left: -0.6,
5560 right: 0.9,
5561 c0: -0.05,
5562 c1: 0.04,
5563 c2: -0.02,
5564 c3: 0.015,
5565 };
5566 let link_basis_span = LocalSpanCubic {
5567 left: -0.6,
5568 right: 0.9,
5569 c0: 0.02,
5570 c1: -0.01,
5571 c2: 0.03,
5572 c3: -0.02,
5573 };
5574 let a = 0.3;
5575 let b = -0.7;
5576 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
5577 let cell = DenestedCubicCell {
5578 left: score_span.left,
5579 right: score_span.right,
5580 c0: coeffs[0],
5581 c1: coeffs[1],
5582 c2: coeffs[2],
5583 c3: coeffs[3],
5584 };
5585 let state = evaluate_cell_moments(cell, 24).expect("cell moments");
5586 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
5587 let second_partials = denested_cell_second_partials(score_span, link_span, a, b);
5588 let dc_daa = second_partials.0;
5589 let dc_dab = second_partials.1;
5590 let dc_dbb = second_partials.2;
5591 let denested_third = denested_cell_third_partials(link_span);
5592 let dc_daaa = denested_third.0;
5593 let dc_dbbb = denested_third.3;
5594
5595 let coeff_w = link_basis_cell_coefficients(link_basis_span, a, b);
5596 let (coeff_aw, coeff_bw) = link_basis_cell_coefficient_partials(link_basis_span, a, b);
5597 let (coeff_aaw, coeff_abw, coeff_bbw) =
5598 link_basis_cell_second_partials(link_basis_span, a, b);
5599 let link_basis_third = link_basis_cell_third_partials(link_basis_span);
5600 let coeff_aaaw = link_basis_third.0;
5601 let coeff_bbbw = link_basis_third.3;
5602 let zero = [0.0; 4];
5603 let basis_third = 6.0 * link_basis_span.c3;
5604
5605 let eta_a = |z: f64| 1.0 + link_span.first_derivative(a + b * z);
5606 let eta_b = |z: f64| z + score_span.evaluate(z) + z * link_span.first_derivative(a + b * z);
5607 let eta_aa = |z: f64| link_span.second_derivative(a + b * z);
5608 let eta_ab = |z: f64| z * link_span.second_derivative(a + b * z);
5609 let eta_bb = |z: f64| z * z * link_span.second_derivative(a + b * z);
5610 let eta_w = |z: f64| link_basis_span.evaluate(a + b * z);
5611 let eta_aw = |z: f64| link_basis_span.first_derivative(a + b * z);
5612 let eta_bw = |z: f64| z * link_basis_span.first_derivative(a + b * z);
5613 let eta_aaw = |z: f64| link_basis_span.second_derivative(a + b * z);
5614 let eta_abw = |z: f64| z * link_basis_span.second_derivative(a + b * z);
5615 let eta_bbw = |z: f64| z * z * link_basis_span.second_derivative(a + b * z);
5616 let eta_aaaw = |z: f64| basis_third + 0.0 * z;
5617 let eta_bbbw = |z: f64| z * z * z * basis_third;
5618
5619 let exact_w = cell_first_derivative_from_moments(&coeff_w, &state.moments).expect("w");
5620 let exact_aw =
5621 cell_second_derivative_from_moments(cell, &dc_da, &coeff_w, &coeff_aw, &state.moments)
5622 .expect("aw");
5623 let exact_bw =
5624 cell_second_derivative_from_moments(cell, &dc_db, &coeff_w, &coeff_bw, &state.moments)
5625 .expect("bw");
5626 let exact_ww =
5627 cell_second_derivative_from_moments(cell, &coeff_w, &coeff_w, &zero, &state.moments)
5628 .expect("ww");
5629 let exact_aaw = cell_third_derivative_from_moments(
5630 cell,
5631 &dc_da,
5632 &dc_da,
5633 &coeff_w,
5634 &dc_daa,
5635 &coeff_aw,
5636 &coeff_aw,
5637 &coeff_aaw,
5638 &state.moments,
5639 )
5640 .expect("aaw");
5641 let exact_abw = cell_third_derivative_from_moments(
5642 cell,
5643 &dc_da,
5644 &dc_db,
5645 &coeff_w,
5646 &dc_dab,
5647 &coeff_aw,
5648 &coeff_bw,
5649 &coeff_abw,
5650 &state.moments,
5651 )
5652 .expect("abw");
5653 let exact_bbw = cell_third_derivative_from_moments(
5654 cell,
5655 &dc_db,
5656 &dc_db,
5657 &coeff_w,
5658 &dc_dbb,
5659 &coeff_bw,
5660 &coeff_bw,
5661 &coeff_bbw,
5662 &state.moments,
5663 )
5664 .expect("bbw");
5665 let exact_www = cell_third_derivative_from_moments(
5666 cell,
5667 &coeff_w,
5668 &coeff_w,
5669 &coeff_w,
5670 &zero,
5671 &zero,
5672 &zero,
5673 &zero,
5674 &state.moments,
5675 )
5676 .expect("www");
5677 let exact_aaaw = cell_fourth_derivative_from_moments(
5678 cell,
5679 &dc_da,
5680 &dc_da,
5681 &dc_da,
5682 &coeff_w,
5683 &dc_daa,
5684 &dc_daa,
5685 &coeff_aw,
5686 &dc_daa,
5687 &coeff_aw,
5688 &coeff_aw,
5689 &dc_daaa,
5690 &coeff_aaw,
5691 &coeff_aaw,
5692 &coeff_aaw,
5693 &coeff_aaaw,
5694 &state.moments,
5695 )
5696 .expect("aaaw");
5697 let exact_aaww = cell_fourth_derivative_from_moments(
5698 cell,
5699 &dc_da,
5700 &dc_da,
5701 &coeff_w,
5702 &coeff_w,
5703 &dc_daa,
5704 &coeff_aw,
5705 &coeff_aw,
5706 &coeff_aw,
5707 &coeff_aw,
5708 &zero,
5709 &coeff_aaw,
5710 &coeff_aaw,
5711 &zero,
5712 &zero,
5713 &zero,
5714 &state.moments,
5715 )
5716 .expect("aaww");
5717 let exact_abww = cell_fourth_derivative_from_moments(
5718 cell,
5719 &dc_da,
5720 &dc_db,
5721 &coeff_w,
5722 &coeff_w,
5723 &dc_dab,
5724 &coeff_aw,
5725 &coeff_aw,
5726 &coeff_bw,
5727 &coeff_bw,
5728 &zero,
5729 &coeff_abw,
5730 &coeff_abw,
5731 &zero,
5732 &zero,
5733 &zero,
5734 &state.moments,
5735 )
5736 .expect("abww");
5737 let exact_bbww = cell_fourth_derivative_from_moments(
5738 cell,
5739 &dc_db,
5740 &dc_db,
5741 &coeff_w,
5742 &coeff_w,
5743 &dc_dbb,
5744 &coeff_bw,
5745 &coeff_bw,
5746 &coeff_bw,
5747 &coeff_bw,
5748 &zero,
5749 &coeff_bbw,
5750 &coeff_bbw,
5751 &zero,
5752 &zero,
5753 &zero,
5754 &state.moments,
5755 )
5756 .expect("bbww");
5757 let exact_bbbw = cell_fourth_derivative_from_moments(
5758 cell,
5759 &dc_db,
5760 &dc_db,
5761 &dc_db,
5762 &coeff_w,
5763 &dc_dbb,
5764 &dc_dbb,
5765 &coeff_bw,
5766 &dc_dbb,
5767 &coeff_bw,
5768 &coeff_bw,
5769 &dc_dbbb,
5770 &coeff_bbw,
5771 &coeff_bbw,
5772 &coeff_bbw,
5773 &coeff_bbbw,
5774 &state.moments,
5775 )
5776 .expect("bbbw");
5777 let exact_wwww = cell_fourth_derivative_from_moments(
5778 cell,
5779 &coeff_w,
5780 &coeff_w,
5781 &coeff_w,
5782 &coeff_w,
5783 &zero,
5784 &zero,
5785 &zero,
5786 &zero,
5787 &zero,
5788 &zero,
5789 &zero,
5790 &zero,
5791 &zero,
5792 &zero,
5793 &zero,
5794 &state.moments,
5795 )
5796 .expect("wwww");
5797
5798 let numeric_w = simpson_integral(cell.left, cell.right, 5000, |z| {
5799 eta_w(z) * (-cell.q(z)).exp() * INV_TWO_PI
5800 });
5801 let numeric_aw = simpson_integral(cell.left, cell.right, 5000, |z| {
5802 (eta_aw(z) - cell.eta(z) * eta_a(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5803 });
5804 let numeric_bw = simpson_integral(cell.left, cell.right, 5000, |z| {
5805 (eta_bw(z) - cell.eta(z) * eta_b(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5806 });
5807 let numeric_ww = simpson_integral(cell.left, cell.right, 5000, |z| {
5808 (-cell.eta(z) * eta_w(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5809 });
5810 let numeric_aaw = simpson_integral(cell.left, cell.right, 5000, |z| {
5811 let eta = cell.eta(z);
5812 let w_z = eta_w(z);
5813 let a_z = eta_a(z);
5814 (eta_aaw(z) - eta * (eta_aa(z) * w_z + 2.0 * eta_aw(z) * a_z)
5815 + (eta * eta - 1.0) * a_z * a_z * w_z)
5816 * (-cell.q(z)).exp()
5817 * INV_TWO_PI
5818 });
5819 let numeric_abw = simpson_integral(cell.left, cell.right, 5000, |z| {
5820 let eta = cell.eta(z);
5821 let w_z = eta_w(z);
5822 let a_z = eta_a(z);
5823 let b_z = eta_b(z);
5824 (eta_abw(z) - eta * (eta_ab(z) * w_z + eta_aw(z) * b_z + eta_bw(z) * a_z)
5825 + (eta * eta - 1.0) * a_z * b_z * w_z)
5826 * (-cell.q(z)).exp()
5827 * INV_TWO_PI
5828 });
5829 let numeric_bbw = simpson_integral(cell.left, cell.right, 5000, |z| {
5830 let eta = cell.eta(z);
5831 let w_z = eta_w(z);
5832 let b_z = eta_b(z);
5833 (eta_bbw(z) - eta * (eta_bb(z) * w_z + 2.0 * eta_bw(z) * b_z)
5834 + (eta * eta - 1.0) * b_z * b_z * w_z)
5835 * (-cell.q(z)).exp()
5836 * INV_TWO_PI
5837 });
5838 let numeric_www = simpson_integral(cell.left, cell.right, 5000, |z| {
5839 let eta = cell.eta(z);
5840 let w_z = eta_w(z);
5841 ((eta * eta - 1.0) * w_z * w_z * w_z) * (-cell.q(z)).exp() * INV_TWO_PI
5842 });
5843 let numeric_aaaw = simpson_integral(cell.left, cell.right, 5000, |z| {
5844 let eta = cell.eta(z);
5845 let a_z = eta_a(z);
5846 let w_z = eta_w(z);
5847 let aa_z = eta_aa(z);
5848 let aw_z = eta_aw(z);
5849 (eta_aaaw(z)
5850 - eta * ((dc_daaa[0] + 0.0 * z) * w_z + 3.0 * eta_aaw(z) * a_z + 3.0 * aa_z * aw_z)
5851 + (eta * eta - 1.0) * (3.0 * aa_z * a_z * w_z + 3.0 * aw_z * a_z * a_z)
5852 + (-eta * eta * eta + 3.0 * eta) * a_z * a_z * a_z * w_z)
5853 * (-cell.q(z)).exp()
5854 * INV_TWO_PI
5855 });
5856 let numeric_aaww = simpson_integral(cell.left, cell.right, 5000, |z| {
5857 let eta = cell.eta(z);
5858 let a_z = eta_a(z);
5859 let w_z = eta_w(z);
5860 let aw_z = eta_aw(z);
5861 (-(2.0 * eta * (eta_aaw(z) * w_z + aw_z * aw_z))
5862 + (eta * eta - 1.0) * (eta_aa(z) * w_z * w_z + 4.0 * aw_z * a_z * w_z)
5863 + (-eta * eta * eta + 3.0 * eta) * a_z * a_z * w_z * w_z)
5864 * (-cell.q(z)).exp()
5865 * INV_TWO_PI
5866 });
5867 let numeric_abww = simpson_integral(cell.left, cell.right, 5000, |z| {
5868 let eta = cell.eta(z);
5869 let a_z = eta_a(z);
5870 let b_z = eta_b(z);
5871 let w_z = eta_w(z);
5872 let aw_z = eta_aw(z);
5873 let bw_z = eta_bw(z);
5874 (-(2.0 * eta * (eta_abw(z) * w_z + aw_z * bw_z))
5875 + (eta * eta - 1.0)
5876 * (eta_ab(z) * w_z * w_z + 2.0 * aw_z * b_z * w_z + 2.0 * bw_z * a_z * w_z)
5877 + (-eta * eta * eta + 3.0 * eta) * a_z * b_z * w_z * w_z)
5878 * (-cell.q(z)).exp()
5879 * INV_TWO_PI
5880 });
5881 let numeric_bbww = simpson_integral(cell.left, cell.right, 5000, |z| {
5882 let eta = cell.eta(z);
5883 let b_z = eta_b(z);
5884 let w_z = eta_w(z);
5885 let bw_z = eta_bw(z);
5886 (-(2.0 * eta * (eta_bbw(z) * w_z + bw_z * bw_z))
5887 + (eta * eta - 1.0) * (eta_bb(z) * w_z * w_z + 4.0 * bw_z * b_z * w_z)
5888 + (-eta * eta * eta + 3.0 * eta) * b_z * b_z * w_z * w_z)
5889 * (-cell.q(z)).exp()
5890 * INV_TWO_PI
5891 });
5892 let numeric_bbbw = simpson_integral(cell.left, cell.right, 5000, |z| {
5893 let eta = cell.eta(z);
5894 let b_z = eta_b(z);
5895 let w_z = eta_w(z);
5896 let bb_z = eta_bb(z);
5897 let bw_z = eta_bw(z);
5898 (eta_bbbw(z)
5899 - eta
5900 * ((dc_dbbb[3] * z * z * z) * w_z + 3.0 * eta_bbw(z) * b_z + 3.0 * bb_z * bw_z)
5901 + (eta * eta - 1.0) * (3.0 * bb_z * b_z * w_z + 3.0 * bw_z * b_z * b_z)
5902 + (-eta * eta * eta + 3.0 * eta) * b_z * b_z * b_z * w_z)
5903 * (-cell.q(z)).exp()
5904 * INV_TWO_PI
5905 });
5906 let numeric_wwww = simpson_integral(cell.left, cell.right, 5000, |z| {
5907 let eta = cell.eta(z);
5908 let w_z = eta_w(z);
5909 ((-eta * eta * eta + 3.0 * eta) * w_z * w_z * w_z * w_z)
5910 * (-cell.q(z)).exp()
5911 * INV_TWO_PI
5912 });
5913
5914 assert!((exact_w - numeric_w).abs() < 1e-8);
5915 assert!((exact_aw - numeric_aw).abs() < 1e-7);
5916 assert!((exact_bw - numeric_bw).abs() < 1e-7);
5917 assert!((exact_ww - numeric_ww).abs() < 1e-7);
5918 assert!((exact_aaw - numeric_aaw).abs() < 2e-6);
5919 assert!((exact_abw - numeric_abw).abs() < 2e-6);
5920 assert!((exact_bbw - numeric_bbw).abs() < 2e-6);
5921 assert!((exact_www - numeric_www).abs() < 2e-6);
5922 assert!((exact_aaaw - numeric_aaaw).abs() < 3e-6);
5923 assert!((exact_aaww - numeric_aaww).abs() < 3e-6);
5924 assert!((exact_abww - numeric_abww).abs() < 3e-6);
5925 assert!((exact_bbww - numeric_bbww).abs() < 3e-6);
5926 assert!((exact_bbbw - numeric_bbbw).abs() < 3e-6);
5927 assert!((exact_wwww - numeric_wwww).abs() < 3e-6);
5928 }
5929
5930 #[test]
5931 fn score_basis_cell_derivatives_match_exact_integrands() {
5932 let score_span = LocalSpanCubic {
5933 left: -0.75,
5934 right: 0.25,
5935 c0: 0.08,
5936 c1: -0.03,
5937 c2: 0.02,
5938 c3: -0.01,
5939 };
5940 let score_basis_span = LocalSpanCubic {
5941 left: -0.75,
5942 right: 0.25,
5943 c0: -0.04,
5944 c1: 0.06,
5945 c2: -0.01,
5946 c3: 0.02,
5947 };
5948 let link_span = LocalSpanCubic {
5949 left: -0.6,
5950 right: 0.9,
5951 c0: -0.05,
5952 c1: 0.04,
5953 c2: -0.02,
5954 c3: 0.015,
5955 };
5956 let a = 0.3;
5957 let b = -0.7;
5958 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
5959 let cell = DenestedCubicCell {
5960 left: score_span.left,
5961 right: score_span.right,
5962 c0: coeffs[0],
5963 c1: coeffs[1],
5964 c2: coeffs[2],
5965 c3: coeffs[3],
5966 };
5967 let state = evaluate_cell_moments(cell, 24).expect("cell moments");
5968 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
5969 let second_partials = denested_cell_second_partials(score_span, link_span, a, b);
5970 let dc_daa = second_partials.0;
5971 let dc_dab = second_partials.1;
5972 let dc_dbb = second_partials.2;
5973 let denested_third = denested_cell_third_partials(link_span);
5974 let dc_dbbb = denested_third.3;
5975
5976 let coeff_h = score_basis_cell_coefficients(score_basis_span, b);
5977 let coeff_bh = score_basis_cell_coefficients(score_basis_span, 1.0);
5978 let zero = [0.0; 4];
5979
5980 let eta_a = |z: f64| 1.0 + link_span.first_derivative(a + b * z);
5981 let eta_b = |z: f64| z + score_span.evaluate(z) + z * link_span.first_derivative(a + b * z);
5982 let eta_ab = |z: f64| z * link_span.second_derivative(a + b * z);
5983 let eta_bb = |z: f64| z * z * link_span.second_derivative(a + b * z);
5984 let eta_h = |z: f64| b * score_basis_span.evaluate(z);
5985 let eta_bh = |z: f64| score_basis_span.evaluate(z);
5986
5987 let exact_h = cell_first_derivative_from_moments(&coeff_h, &state.moments).expect("h");
5988 let exact_ah =
5989 cell_second_derivative_from_moments(cell, &dc_da, &coeff_h, &zero, &state.moments)
5990 .expect("ah");
5991 let exact_bh =
5992 cell_second_derivative_from_moments(cell, &dc_db, &coeff_h, &coeff_bh, &state.moments)
5993 .expect("bh");
5994 let exact_hh =
5995 cell_second_derivative_from_moments(cell, &coeff_h, &coeff_h, &zero, &state.moments)
5996 .expect("hh");
5997 let exact_abh = cell_third_derivative_from_moments(
5998 cell,
5999 &dc_da,
6000 &dc_db,
6001 &coeff_h,
6002 &dc_dab,
6003 &zero,
6004 &coeff_bh,
6005 &zero,
6006 &state.moments,
6007 )
6008 .expect("abh");
6009 let exact_bbh = cell_third_derivative_from_moments(
6010 cell,
6011 &dc_db,
6012 &dc_db,
6013 &coeff_h,
6014 &dc_dbb,
6015 &coeff_bh,
6016 &coeff_bh,
6017 &zero,
6018 &state.moments,
6019 )
6020 .expect("bbh");
6021 let exact_bhh = cell_third_derivative_from_moments(
6022 cell,
6023 &dc_db,
6024 &coeff_h,
6025 &coeff_h,
6026 &coeff_bh,
6027 &coeff_bh,
6028 &zero,
6029 &zero,
6030 &state.moments,
6031 )
6032 .expect("bhh");
6033 let exact_hhh = cell_third_derivative_from_moments(
6034 cell,
6035 &coeff_h,
6036 &coeff_h,
6037 &coeff_h,
6038 &zero,
6039 &zero,
6040 &zero,
6041 &zero,
6042 &state.moments,
6043 )
6044 .expect("hhh");
6045 let exact_bbbh = cell_fourth_derivative_from_moments(
6046 cell,
6047 &dc_db,
6048 &dc_db,
6049 &dc_db,
6050 &coeff_h,
6051 &dc_dbb,
6052 &dc_dbb,
6053 &coeff_bh,
6054 &dc_dbb,
6055 &coeff_bh,
6056 &coeff_bh,
6057 &dc_dbbb,
6058 &zero,
6059 &zero,
6060 &zero,
6061 &zero,
6062 &state.moments,
6063 )
6064 .expect("bbbh");
6065 let exact_aahh = cell_fourth_derivative_from_moments(
6066 cell,
6067 &dc_da,
6068 &dc_da,
6069 &coeff_h,
6070 &coeff_h,
6071 &dc_daa,
6072 &zero,
6073 &zero,
6074 &zero,
6075 &zero,
6076 &zero,
6077 &zero,
6078 &zero,
6079 &zero,
6080 &zero,
6081 &zero,
6082 &state.moments,
6083 )
6084 .expect("aahh");
6085 let exact_abhh = cell_fourth_derivative_from_moments(
6086 cell,
6087 &dc_da,
6088 &dc_db,
6089 &coeff_h,
6090 &coeff_h,
6091 &dc_dab,
6092 &zero,
6093 &zero,
6094 &coeff_bh,
6095 &coeff_bh,
6096 &zero,
6097 &zero,
6098 &zero,
6099 &zero,
6100 &zero,
6101 &zero,
6102 &state.moments,
6103 )
6104 .expect("abhh");
6105 let exact_bbhh = cell_fourth_derivative_from_moments(
6106 cell,
6107 &dc_db,
6108 &dc_db,
6109 &coeff_h,
6110 &coeff_h,
6111 &dc_dbb,
6112 &coeff_bh,
6113 &coeff_bh,
6114 &coeff_bh,
6115 &coeff_bh,
6116 &zero,
6117 &zero,
6118 &zero,
6119 &zero,
6120 &zero,
6121 &zero,
6122 &state.moments,
6123 )
6124 .expect("bbhh");
6125 let exact_bhhh = cell_fourth_derivative_from_moments(
6126 cell,
6127 &dc_db,
6128 &coeff_h,
6129 &coeff_h,
6130 &coeff_h,
6131 &coeff_bh,
6132 &coeff_bh,
6133 &coeff_bh,
6134 &zero,
6135 &zero,
6136 &zero,
6137 &zero,
6138 &zero,
6139 &zero,
6140 &zero,
6141 &zero,
6142 &state.moments,
6143 )
6144 .expect("bhhh");
6145 let exact_hhhh = cell_fourth_derivative_from_moments(
6146 cell,
6147 &coeff_h,
6148 &coeff_h,
6149 &coeff_h,
6150 &coeff_h,
6151 &zero,
6152 &zero,
6153 &zero,
6154 &zero,
6155 &zero,
6156 &zero,
6157 &zero,
6158 &zero,
6159 &zero,
6160 &zero,
6161 &zero,
6162 &state.moments,
6163 )
6164 .expect("hhhh");
6165
6166 let numeric_h = simpson_integral(cell.left, cell.right, 5000, |z| {
6167 eta_h(z) * (-cell.q(z)).exp() * INV_TWO_PI
6168 });
6169 let numeric_ah = simpson_integral(cell.left, cell.right, 5000, |z| {
6170 (-cell.eta(z) * eta_a(z) * eta_h(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6171 });
6172 let numeric_bh = simpson_integral(cell.left, cell.right, 5000, |z| {
6173 (eta_bh(z) - cell.eta(z) * eta_b(z) * eta_h(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6174 });
6175 let numeric_hh = simpson_integral(cell.left, cell.right, 5000, |z| {
6176 (-cell.eta(z) * eta_h(z) * eta_h(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6177 });
6178 let numeric_abh = simpson_integral(cell.left, cell.right, 5000, |z| {
6179 let eta = cell.eta(z);
6180 (-(eta * (eta_ab(z) * eta_h(z) + eta_bh(z) * eta_a(z)))
6181 + (eta * eta - 1.0) * eta_a(z) * eta_b(z) * eta_h(z))
6182 * (-cell.q(z)).exp()
6183 * INV_TWO_PI
6184 });
6185 let numeric_bbh = simpson_integral(cell.left, cell.right, 5000, |z| {
6186 let eta = cell.eta(z);
6187 (-(eta * (eta_bb(z) * eta_h(z) + 2.0 * eta_bh(z) * eta_b(z)))
6188 + (eta * eta - 1.0) * eta_b(z) * eta_b(z) * eta_h(z))
6189 * (-cell.q(z)).exp()
6190 * INV_TWO_PI
6191 });
6192 let numeric_bhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6193 let eta = cell.eta(z);
6194 (-(2.0 * eta * eta_bh(z) * eta_h(z))
6195 + (eta * eta - 1.0) * eta_b(z) * eta_h(z) * eta_h(z))
6196 * (-cell.q(z)).exp()
6197 * INV_TWO_PI
6198 });
6199 let numeric_hhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6200 let eta = cell.eta(z);
6201 ((eta * eta - 1.0) * eta_h(z) * eta_h(z) * eta_h(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6202 });
6203 let numeric_bbbh = simpson_integral(cell.left, cell.right, 5000, |z| {
6204 let eta = cell.eta(z);
6205 let b_z = eta_b(z);
6206 let h_z = eta_h(z);
6207 let bb_z = eta_bb(z);
6208 let bh_z = eta_bh(z);
6209 (-(eta * ((dc_dbbb[3] * z * z * z) * h_z + 3.0 * bb_z * bh_z))
6210 + (eta * eta - 1.0) * (3.0 * bb_z * b_z * h_z + 3.0 * bh_z * b_z * b_z)
6211 + (-eta * eta * eta + 3.0 * eta) * b_z * b_z * b_z * h_z)
6212 * (-cell.q(z)).exp()
6213 * INV_TWO_PI
6214 });
6215 let numeric_aahh = simpson_integral(cell.left, cell.right, 5000, |z| {
6216 let eta = cell.eta(z);
6217 let a_z = eta_a(z);
6218 let h_z = eta_h(z);
6219 ((eta * eta - 1.0) * polynomial_value(&dc_daa, z) * h_z * h_z
6220 + (-eta * eta * eta + 3.0 * eta) * a_z * a_z * h_z * h_z)
6221 * (-cell.q(z)).exp()
6222 * INV_TWO_PI
6223 });
6224 let numeric_abhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6225 let eta = cell.eta(z);
6226 let a_z = eta_a(z);
6227 let b_z = eta_b(z);
6228 let h_z = eta_h(z);
6229 ((eta * eta - 1.0) * (eta_ab(z) * h_z * h_z + 2.0 * eta_bh(z) * a_z * h_z)
6230 + (-eta * eta * eta + 3.0 * eta) * a_z * b_z * h_z * h_z)
6231 * (-cell.q(z)).exp()
6232 * INV_TWO_PI
6233 });
6234 let numeric_bbhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6235 let eta = cell.eta(z);
6236 let b_z = eta_b(z);
6237 let h_z = eta_h(z);
6238 let bh_z = eta_bh(z);
6239 (-(2.0 * eta * bh_z * bh_z)
6240 + (eta * eta - 1.0) * (eta_bb(z) * h_z * h_z + 4.0 * bh_z * b_z * h_z)
6241 + (-eta * eta * eta + 3.0 * eta) * b_z * b_z * h_z * h_z)
6242 * (-cell.q(z)).exp()
6243 * INV_TWO_PI
6244 });
6245 let numeric_bhhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6246 let eta = cell.eta(z);
6247 let h_z = eta_h(z);
6248 (-(eta * (3.0 * eta_bh(z) * h_z * h_z))
6249 + (eta * eta - 1.0) * (3.0 * eta_bh(z) * h_z * h_z)
6250 + (-eta * eta * eta + 3.0 * eta) * eta_b(z) * h_z * h_z * h_z)
6251 * (-cell.q(z)).exp()
6252 * INV_TWO_PI
6253 });
6254 let numeric_hhhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6255 let eta = cell.eta(z);
6256 let h_z = eta_h(z);
6257 ((-eta * eta * eta + 3.0 * eta) * h_z * h_z * h_z * h_z)
6258 * (-cell.q(z)).exp()
6259 * INV_TWO_PI
6260 });
6261
6262 assert!((exact_h - numeric_h).abs() < 1e-8);
6263 assert!((exact_ah - numeric_ah).abs() < 1e-7);
6264 assert!((exact_bh - numeric_bh).abs() < 1e-7);
6265 assert!((exact_hh - numeric_hh).abs() < 1e-7);
6266 assert!((exact_abh - numeric_abh).abs() < 2e-6);
6267 assert!((exact_bbh - numeric_bbh).abs() < 2e-6);
6268 assert!((exact_bhh - numeric_bhh).abs() < 2e-6);
6269 assert!((exact_hhh - numeric_hhh).abs() < 2e-6);
6270 assert!((exact_bbbh - numeric_bbbh).abs() < 3e-6);
6271 assert!((exact_aahh - numeric_aahh).abs() < 3e-6);
6272 assert!((exact_abhh - numeric_abhh).abs() < 3e-6);
6273 assert!((exact_bbhh - numeric_bbhh).abs() < 3e-6);
6274 assert!((exact_bhhh - numeric_bhhh).abs() < 3e-6);
6275 assert!((exact_hhhh - numeric_hhhh).abs() < 3e-6);
6276 }
6277
6278 #[test]
6279 fn cross_basis_cell_derivatives_match_exact_integrands() {
6280 let score_span = LocalSpanCubic {
6281 left: -0.75,
6282 right: 0.25,
6283 c0: 0.08,
6284 c1: -0.03,
6285 c2: 0.02,
6286 c3: -0.01,
6287 };
6288 let score_basis_span = LocalSpanCubic {
6289 left: -0.75,
6290 right: 0.25,
6291 c0: -0.04,
6292 c1: 0.06,
6293 c2: -0.01,
6294 c3: 0.02,
6295 };
6296 let link_span = LocalSpanCubic {
6297 left: -0.6,
6298 right: 0.9,
6299 c0: -0.05,
6300 c1: 0.04,
6301 c2: -0.02,
6302 c3: 0.015,
6303 };
6304 let link_basis_span = LocalSpanCubic {
6305 left: -0.6,
6306 right: 0.9,
6307 c0: 0.02,
6308 c1: -0.01,
6309 c2: 0.03,
6310 c3: -0.02,
6311 };
6312 let a = 0.3;
6313 let b = -0.7;
6314 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
6315 let cell = DenestedCubicCell {
6316 left: score_span.left,
6317 right: score_span.right,
6318 c0: coeffs[0],
6319 c1: coeffs[1],
6320 c2: coeffs[2],
6321 c3: coeffs[3],
6322 };
6323 let state = evaluate_cell_moments(cell, 24).expect("cell moments");
6324 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
6325 let (dc_daa, dc_dab, _) = denested_cell_second_partials(score_span, link_span, a, b);
6326
6327 let coeff_h = score_basis_cell_coefficients(score_basis_span, b);
6328 let coeff_bh = score_basis_cell_coefficients(score_basis_span, 1.0);
6329 let coeff_w = link_basis_cell_coefficients(link_basis_span, a, b);
6330 let (coeff_aw, coeff_bw) = link_basis_cell_coefficient_partials(link_basis_span, a, b);
6331 let (coeff_aaw, coeff_abw, _) = link_basis_cell_second_partials(link_basis_span, a, b);
6332 let zero = [0.0; 4];
6333
6334 let eta_a = |z: f64| 1.0 + link_span.first_derivative(a + b * z);
6335 let eta_b = |z: f64| z + score_span.evaluate(z) + z * link_span.first_derivative(a + b * z);
6336 let eta_h = |z: f64| b * score_basis_span.evaluate(z);
6337 let eta_bh = |z: f64| score_basis_span.evaluate(z);
6338 let eta_w = |z: f64| link_basis_span.evaluate(a + b * z);
6339 let eta_ab = |z: f64| z * link_span.second_derivative(a + b * z);
6340 let eta_aw = |z: f64| link_basis_span.first_derivative(a + b * z);
6341 let eta_bw = |z: f64| z * link_basis_span.first_derivative(a + b * z);
6342
6343 let exact_hw =
6344 cell_second_derivative_from_moments(cell, &coeff_h, &coeff_w, &zero, &state.moments)
6345 .expect("hw");
6346 let exact_ahw = cell_third_derivative_from_moments(
6347 cell,
6348 &dc_da,
6349 &coeff_h,
6350 &coeff_w,
6351 &zero,
6352 &coeff_aw,
6353 &zero,
6354 &zero,
6355 &state.moments,
6356 )
6357 .expect("ahw");
6358 let exact_bhw = cell_third_derivative_from_moments(
6359 cell,
6360 &dc_db,
6361 &coeff_h,
6362 &coeff_w,
6363 &coeff_bh,
6364 &coeff_bw,
6365 &zero,
6366 &zero,
6367 &state.moments,
6368 )
6369 .expect("bhw");
6370 let exact_hhw = cell_third_derivative_from_moments(
6371 cell,
6372 &coeff_h,
6373 &coeff_h,
6374 &coeff_w,
6375 &zero,
6376 &zero,
6377 &zero,
6378 &zero,
6379 &state.moments,
6380 )
6381 .expect("hhw");
6382 let exact_hww = cell_third_derivative_from_moments(
6383 cell,
6384 &coeff_h,
6385 &coeff_w,
6386 &coeff_w,
6387 &zero,
6388 &zero,
6389 &zero,
6390 &zero,
6391 &state.moments,
6392 )
6393 .expect("hww");
6394 let exact_aahw = cell_fourth_derivative_from_moments(
6395 cell,
6396 &dc_da,
6397 &dc_da,
6398 &coeff_h,
6399 &coeff_w,
6400 &dc_daa,
6401 &zero,
6402 &coeff_aw,
6403 &zero,
6404 &coeff_aw,
6405 &zero,
6406 &zero,
6407 &coeff_aaw,
6408 &zero,
6409 &zero,
6410 &zero,
6411 &state.moments,
6412 )
6413 .expect("aahw");
6414 let exact_hhww = cell_fourth_derivative_from_moments(
6415 cell,
6416 &coeff_h,
6417 &coeff_h,
6418 &coeff_w,
6419 &coeff_w,
6420 &zero,
6421 &zero,
6422 &zero,
6423 &zero,
6424 &zero,
6425 &zero,
6426 &zero,
6427 &zero,
6428 &zero,
6429 &zero,
6430 &zero,
6431 &state.moments,
6432 )
6433 .expect("hhww");
6434 let exact_hhhw = cell_fourth_derivative_from_moments(
6435 cell,
6436 &coeff_h,
6437 &coeff_h,
6438 &coeff_h,
6439 &coeff_w,
6440 &zero,
6441 &zero,
6442 &zero,
6443 &zero,
6444 &zero,
6445 &zero,
6446 &zero,
6447 &zero,
6448 &zero,
6449 &zero,
6450 &zero,
6451 &state.moments,
6452 )
6453 .expect("hhhw");
6454 let exact_abhw = cell_fourth_derivative_from_moments(
6455 cell,
6456 &dc_da,
6457 &dc_db,
6458 &coeff_h,
6459 &coeff_w,
6460 &dc_dab,
6461 &zero,
6462 &coeff_aw,
6463 &coeff_bh,
6464 &coeff_bw,
6465 &zero,
6466 &zero,
6467 &coeff_abw,
6468 &zero,
6469 &zero,
6470 &zero,
6471 &state.moments,
6472 )
6473 .expect("abhw");
6474 let exact_ahww = cell_fourth_derivative_from_moments(
6475 cell,
6476 &dc_da,
6477 &coeff_h,
6478 &coeff_w,
6479 &coeff_w,
6480 &zero,
6481 &coeff_aw,
6482 &coeff_aw,
6483 &zero,
6484 &zero,
6485 &zero,
6486 &zero,
6487 &zero,
6488 &zero,
6489 &zero,
6490 &zero,
6491 &state.moments,
6492 )
6493 .expect("ahww");
6494 let exact_bhww = cell_fourth_derivative_from_moments(
6495 cell,
6496 &dc_db,
6497 &coeff_h,
6498 &coeff_w,
6499 &coeff_w,
6500 &coeff_bh,
6501 &coeff_bw,
6502 &coeff_bw,
6503 &zero,
6504 &zero,
6505 &zero,
6506 &zero,
6507 &zero,
6508 &zero,
6509 &zero,
6510 &zero,
6511 &state.moments,
6512 )
6513 .expect("bhww");
6514 let exact_hwww = cell_fourth_derivative_from_moments(
6515 cell,
6516 &coeff_h,
6517 &coeff_w,
6518 &coeff_w,
6519 &coeff_w,
6520 &zero,
6521 &zero,
6522 &zero,
6523 &zero,
6524 &zero,
6525 &zero,
6526 &zero,
6527 &zero,
6528 &zero,
6529 &zero,
6530 &zero,
6531 &state.moments,
6532 )
6533 .expect("hwww");
6534
6535 let numeric_hw = simpson_integral(cell.left, cell.right, 5000, |z| {
6536 (-cell.eta(z) * eta_h(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6537 });
6538 let numeric_ahw = simpson_integral(cell.left, cell.right, 5000, |z| {
6539 let eta = cell.eta(z);
6540 (-(eta * eta_aw(z) * eta_h(z)) + (eta * eta - 1.0) * eta_a(z) * eta_h(z) * eta_w(z))
6541 * (-cell.q(z)).exp()
6542 * INV_TWO_PI
6543 });
6544 let numeric_bhw = simpson_integral(cell.left, cell.right, 5000, |z| {
6545 let eta = cell.eta(z);
6546 (-(eta * (eta_bh(z) * eta_w(z) + eta_bw(z) * eta_h(z)))
6547 + (eta * eta - 1.0) * eta_b(z) * eta_h(z) * eta_w(z))
6548 * (-cell.q(z)).exp()
6549 * INV_TWO_PI
6550 });
6551 let numeric_hhw = simpson_integral(cell.left, cell.right, 5000, |z| {
6552 let eta = cell.eta(z);
6553 ((eta * eta - 1.0) * eta_h(z) * eta_h(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6554 });
6555 let numeric_hww = simpson_integral(cell.left, cell.right, 5000, |z| {
6556 let eta = cell.eta(z);
6557 ((eta * eta - 1.0) * eta_h(z) * eta_w(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6558 });
6559 let numeric_aahw = simpson_integral(cell.left, cell.right, 5000, |z| {
6560 let eta = cell.eta(z);
6561 (-(eta * polynomial_value(&coeff_aaw, z) * eta_h(z))
6562 + (eta * eta - 1.0)
6563 * (polynomial_value(&dc_daa, z) * eta_h(z) * eta_w(z)
6564 + 2.0 * eta_aw(z) * eta_a(z) * eta_h(z))
6565 + (-eta * eta * eta + 3.0 * eta) * eta_a(z) * eta_a(z) * eta_h(z) * eta_w(z))
6566 * (-cell.q(z)).exp()
6567 * INV_TWO_PI
6568 });
6569 let numeric_hhww = simpson_integral(cell.left, cell.right, 5000, |z| {
6570 let eta = cell.eta(z);
6571 ((-eta * eta * eta + 3.0 * eta) * eta_h(z) * eta_h(z) * eta_w(z) * eta_w(z))
6572 * (-cell.q(z)).exp()
6573 * INV_TWO_PI
6574 });
6575 let numeric_hhhw = simpson_integral(cell.left, cell.right, 5000, |z| {
6576 let eta = cell.eta(z);
6577 ((-eta * eta * eta + 3.0 * eta) * eta_h(z) * eta_h(z) * eta_h(z) * eta_w(z))
6578 * (-cell.q(z)).exp()
6579 * INV_TWO_PI
6580 });
6581 let numeric_abhw = simpson_integral(cell.left, cell.right, 5000, |z| {
6582 let eta = cell.eta(z);
6583 (-(eta * polynomial_value(&coeff_abw, z) * eta_h(z) + eta * eta_aw(z) * eta_bh(z))
6584 + (eta * eta - 1.0)
6585 * (eta_ab(z) * eta_h(z) * eta_w(z)
6586 + eta_aw(z) * eta_b(z) * eta_h(z)
6587 + eta_bh(z) * eta_a(z) * eta_w(z)
6588 + eta_bw(z) * eta_a(z) * eta_h(z))
6589 + (-eta * eta * eta + 3.0 * eta) * eta_a(z) * eta_b(z) * eta_h(z) * eta_w(z))
6590 * (-cell.q(z)).exp()
6591 * INV_TWO_PI
6592 });
6593 let numeric_ahww = simpson_integral(cell.left, cell.right, 5000, |z| {
6594 let eta = cell.eta(z);
6595 (2.0 * (eta * eta - 1.0) * eta_aw(z) * eta_h(z) * eta_w(z)
6596 + (-eta * eta * eta + 3.0 * eta) * eta_a(z) * eta_h(z) * eta_w(z) * eta_w(z))
6597 * (-cell.q(z)).exp()
6598 * INV_TWO_PI
6599 });
6600 let numeric_bhww = simpson_integral(cell.left, cell.right, 5000, |z| {
6601 let eta = cell.eta(z);
6602 let h_z = eta_h(z);
6603 let w_z = eta_w(z);
6604 ((eta * eta - 1.0) * (eta_bh(z) * w_z * w_z + 2.0 * eta_bw(z) * h_z * w_z)
6605 + (-eta * eta * eta + 3.0 * eta) * eta_b(z) * h_z * w_z * w_z)
6606 * (-cell.q(z)).exp()
6607 * INV_TWO_PI
6608 });
6609 let numeric_hwww = simpson_integral(cell.left, cell.right, 5000, |z| {
6610 let eta = cell.eta(z);
6611 ((-eta * eta * eta + 3.0 * eta) * eta_h(z) * eta_w(z) * eta_w(z) * eta_w(z))
6612 * (-cell.q(z)).exp()
6613 * INV_TWO_PI
6614 });
6615
6616 assert!((exact_hw - numeric_hw).abs() < 1e-7);
6617 assert!((exact_ahw - numeric_ahw).abs() < 2e-6);
6618 assert!((exact_bhw - numeric_bhw).abs() < 2e-6);
6619 assert!((exact_hhw - numeric_hhw).abs() < 2e-6);
6620 assert!((exact_hww - numeric_hww).abs() < 2e-6);
6621 assert!((exact_aahw - numeric_aahw).abs() < 3e-6);
6622 assert!((exact_hhww - numeric_hhww).abs() < 3e-6);
6623 assert!((exact_hhhw - numeric_hhhw).abs() < 3e-6);
6624 assert!((exact_abhw - numeric_abhw).abs() < 3e-6);
6625 assert!((exact_ahww - numeric_ahww).abs() < 3e-6);
6626 assert!((exact_bhww - numeric_bhww).abs() < 3e-6);
6627 assert!((exact_hwww - numeric_hwww).abs() < 3e-6);
6628 }
6629
6630 #[test]
6631 fn cell_moment_scratch_reuses_buffers_under_margslope_like_pressure() {
6632 let cells = [
6633 DenestedCubicCell {
6634 left: -1.2,
6635 right: -0.35,
6636 c0: 0.18,
6637 c1: 0.72,
6638 c2: -0.045,
6639 c3: 0.018,
6640 },
6641 DenestedCubicCell {
6642 left: -0.35,
6643 right: 0.48,
6644 c0: -0.08,
6645 c1: 0.91,
6646 c2: 0.038,
6647 c3: -0.014,
6648 },
6649 DenestedCubicCell {
6650 left: 0.48,
6651 right: 1.4,
6652 c0: 0.11,
6653 c1: 0.83,
6654 c2: 0.022,
6655 c3: 0.012,
6656 },
6657 ];
6658 let mut scratch = CellMomentScratch::with_capacity(MAX_AFFINE_ANCHOR_DEGREE);
6659 for cell in cells {
6660 let baseline = evaluate_cell_moments(cell, 9).expect("baseline moments");
6661 let scratch_state =
6662 evaluate_cell_moments_with_scratch(cell, 9, &mut scratch).expect("scratch moments");
6663 assert_eq!(baseline.branch, scratch_state.branch);
6664 assert!((baseline.value - scratch_state.value).abs() <= 1e-10);
6665 assert_eq!(baseline.moments.len(), scratch_state.moments.len());
6666 for (lhs, rhs) in baseline.moments.iter().zip(scratch_state.moments.iter()) {
6667 assert!((lhs - rhs).abs() <= 1e-10, "{lhs} vs {rhs}");
6668 }
6669 }
6670
6671 reset_cell_moment_test_reallocs();
6672 let mut checksum = 0.0;
6673 for i in 0..5_000 {
6674 let cell = cells[i % cells.len()];
6675 let state = evaluate_cell_moments_with_scratch(cell, 9, &mut scratch)
6676 .expect("scratch moments under repeated pressure");
6677 checksum += state.value + state.moments[0] * 1e-12;
6678 }
6679 assert!(checksum.is_finite());
6680 assert_eq!(
6681 cell_moment_test_reallocs(),
6682 0,
6683 "scratch-backed inner cell-moment calls should not grow Vec buffers"
6684 );
6685 }
6686
6687 #[test]
6688 fn evaluate_cell_moments_matches_numeric_integrals() {
6689 let cell = DenestedCubicCell {
6690 left: -0.9,
6691 right: 0.8,
6692 c0: 0.15,
6693 c1: -0.35,
6694 c2: 0.11,
6695 c3: -0.07,
6696 };
6697 let state = evaluate_cell_moments(cell, 6).expect("cell moments");
6698 let value_numeric = simpson_integral(cell.left, cell.right, 4000, |z| {
6699 super::normal_cdf(cell.eta(z)) * normal_pdf(z)
6700 });
6701 assert!((state.value - value_numeric).abs() < 1e-9);
6702 for degree in 0..=6 {
6703 let target = simpson_integral(cell.left, cell.right, 4000, |z| {
6704 z.powi(degree as i32) * (-cell.q(z)).exp()
6705 });
6706 assert!((state.moments[degree] - target).abs() < 1e-9);
6707 }
6708 }
6709
6710 #[test]
6711 fn partition_builder_moves_link_preimages_with_intercept() {
6712 let score_breaks = [-2.0, -1.0, 0.0, 1.0, 2.0];
6713 let link_breaks = [-1.5, -0.5, 0.5, 1.5];
6714 let score_span = |z: f64| {
6715 let left = if z < -1.0 {
6716 -2.0
6717 } else if z < 0.0 {
6718 -1.0
6719 } else if z < 1.0 {
6720 0.0
6721 } else {
6722 1.0
6723 };
6724 Ok(LocalSpanCubic {
6725 left,
6726 right: left + 1.0,
6727 c0: 0.1,
6728 c1: 0.2,
6729 c2: 0.0,
6730 c3: 0.0,
6731 })
6732 };
6733 let link_span = |u: f64| {
6734 let left = if u < -0.5 {
6735 -1.5
6736 } else if u < 0.5 {
6737 -0.5
6738 } else {
6739 0.5
6740 };
6741 Ok(LocalSpanCubic {
6742 left,
6743 right: left + 1.0,
6744 c0: -0.05,
6745 c1: 0.1,
6746 c2: 0.0,
6747 c3: 0.0,
6748 })
6749 };
6750 let cells_a0 = build_denested_partition_cells(
6751 0.25,
6752 0.9,
6753 &score_breaks,
6754 &link_breaks,
6755 score_span,
6756 link_span,
6757 )
6758 .expect("cells a0");
6759 let cells_a1 = build_denested_partition_cells(
6760 0.55,
6761 0.9,
6762 &score_breaks,
6763 &link_breaks,
6764 score_span,
6765 link_span,
6766 )
6767 .expect("cells a1");
6768 assert!(cells_a0.len() >= score_breaks.len() - 1);
6769 assert!(
6770 cells_a0
6771 .windows(2)
6772 .all(|w| (w[0].cell.right - w[1].cell.left).abs() <= 1e-12)
6773 );
6774 assert!(
6775 cells_a0
6776 .iter()
6777 .zip(cells_a1.iter())
6778 .any(|(lhs, rhs)| (lhs.cell.left - rhs.cell.left).abs() > 1e-10)
6779 );
6780 assert!(cells_a0.first().unwrap().cell.left.is_infinite());
6781 assert!(cells_a0.last().unwrap().cell.right.is_infinite());
6782 }
6783
6784 #[test]
6785 fn partition_builder_without_breaks_returns_single_global_cell() {
6786 let cells = build_denested_partition_cells_with_tails(
6787 0.3,
6788 -0.4,
6789 &[],
6790 &[],
6791 |z| {
6792 if z.is_nan() {
6793 return Err("probe z is NaN".to_string());
6794 }
6795 Ok(LocalSpanCubic {
6796 left: 0.0,
6797 right: 1.0,
6798 c0: 0.0,
6799 c1: 0.0,
6800 c2: 0.0,
6801 c3: 0.0,
6802 })
6803 },
6804 |u| {
6805 if u.is_nan() {
6806 return Err("probe u is NaN".to_string());
6807 }
6808 Ok(LocalSpanCubic {
6809 left: 0.0,
6810 right: 1.0,
6811 c0: 0.0,
6812 c1: 0.0,
6813 c2: 0.0,
6814 c3: 0.0,
6815 })
6816 },
6817 )
6818 .expect("global cell");
6819 assert_eq!(cells.len(), 1);
6820 assert_eq!(cells[0].cell.left, f64::NEG_INFINITY);
6821 assert_eq!(cells[0].cell.right, f64::INFINITY);
6822 assert!(cells[0].cell.c2.abs() < 1e-12);
6823 assert!(cells[0].cell.c3.abs() < 1e-12);
6824 }
6825
6826 #[test]
6827 fn polynomial_integral_helper_matches_moment_sum() {
6828 let cell = DenestedCubicCell {
6829 left: -1.5,
6830 right: 1.25,
6831 c0: 0.2,
6832 c1: -0.4,
6833 c2: 0.15,
6834 c3: 0.03,
6835 };
6836 let state = evaluate_cell_moments(cell, 8).expect("cell moments");
6837 let coeffs = [1.5, -0.25, 0.75, 0.1];
6838 let expected = INV_TWO_PI
6839 * coeffs
6840 .iter()
6841 .enumerate()
6842 .map(|(idx, coeff)| coeff * state.moments[idx])
6843 .sum::<f64>();
6844 let got = cell_polynomial_integral_from_moments(&coeffs, &state.moments, "test poly")
6845 .expect("poly integral");
6846 assert!((got - expected).abs() < 1e-14);
6847 }
6848
6849 #[test]
6850 fn batched_cell_moment_max_degree_matches_direct_non_affine_grid() {
6851 let cells = [
6852 DenestedCubicCell {
6853 left: -2.0,
6854 right: -0.25,
6855 c0: -0.7,
6856 c1: 0.8,
6857 c2: 0.015,
6858 c3: -0.004,
6859 },
6860 DenestedCubicCell {
6861 left: -0.5,
6862 right: 0.75,
6863 c0: 0.2,
6864 c1: -0.35,
6865 c2: -0.025,
6866 c3: 0.0,
6867 },
6868 DenestedCubicCell {
6869 left: 0.1,
6870 right: 1.6,
6871 c0: 0.4,
6872 c1: 0.25,
6873 c2: 0.01,
6874 c3: 0.006,
6875 },
6876 DenestedCubicCell {
6877 left: -1.25,
6878 right: 2.25,
6879 c0: -0.1,
6880 c1: 0.55,
6881 c2: -0.012,
6882 c3: 0.003,
6883 },
6884 ];
6885 for cell in cells {
6886 let branch = branch_cell(cell).expect("branch");
6887 if branch == ExactCellBranch::Affine {
6888 continue;
6889 }
6890 let batched =
6891 evaluate_non_affine_cell_state(cell, branch, 21).expect("degree-21 state");
6892 for degree in [9usize, 15, 21] {
6893 let direct =
6894 evaluate_non_affine_cell_state(cell, branch, degree).expect("direct state");
6895 assert_eq!(batched.branch, direct.branch);
6896 let denom = direct.value.abs().max(1.0);
6897 assert!(((batched.value - direct.value).abs() / denom) < 1e-10);
6898 for k in 0..=degree {
6899 let denom = direct.moments[k].abs().max(1.0);
6900 let rel = (batched.moments[k] - direct.moments[k]).abs() / denom;
6901 assert!(
6902 rel < 1e-10,
6903 "cell={cell:?} degree={degree} moment={k} rel={rel:e}"
6904 );
6905 }
6906 }
6907 }
6908 }
6909
6910 #[test]
6911 fn derivative_moment_evaluator_matches_value_evaluator_moments() {
6912 let cells = [
6913 DenestedCubicCell {
6914 left: -2.0,
6915 right: -0.4,
6916 c0: 0.15,
6917 c1: -0.8,
6918 c2: 0.0,
6919 c3: 0.0,
6920 },
6921 DenestedCubicCell {
6922 left: -0.75,
6923 right: 1.4,
6924 c0: -0.25,
6925 c1: 0.6,
6926 c2: 0.12,
6927 c3: 0.0,
6928 },
6929 DenestedCubicCell {
6930 left: -1.1,
6931 right: 0.9,
6932 c0: 0.35,
6933 c1: -0.3,
6934 c2: 0.05,
6935 c3: -0.015,
6936 },
6937 ];
6938 for cell in cells {
6939 for degree in [4usize, 9, 15, 21] {
6940 let full = evaluate_cell_moments_uncached(cell, degree).expect("full moments");
6941 let derivative = evaluate_cell_derivative_moments_uncached(cell, degree)
6942 .expect("derivative moments");
6943 assert_eq!(full.branch, derivative.branch);
6944 assert_eq!(full.moments.len(), derivative.moments.len());
6945 for k in 0..full.moments.len() {
6946 assert_eq!(full.moments[k].to_bits(), derivative.moments[k].to_bits());
6947 }
6948 }
6949 }
6950 }
6951
6952 #[test]
6953 fn cell_moment_lru_matches_uncached_non_affine_grid() {
6954 let cache = CellMomentLruCache::new(16 * 1024 * 1024);
6955 let stats = CellMomentCacheStats::default();
6956 let c0s = [-0.75, 0.0, 0.5];
6957 let c1s = [-1.2, 0.25, 1.1];
6958 let c2s = [-0.18, 0.07];
6959 let c3s = [0.0, 0.025];
6960 let bounds = [(-2.0, -0.5), (-0.25, 1.5)];
6961 let degrees = [4usize, 9, 15, 21];
6962 for &c0 in &c0s {
6963 for &c1 in &c1s {
6964 for &c2 in &c2s {
6965 for &c3 in &c3s {
6966 for &(left, right) in &bounds {
6967 for &max_degree in °rees {
6968 let cell = DenestedCubicCell {
6969 left,
6970 right,
6971 c0,
6972 c1,
6973 c2,
6974 c3,
6975 };
6976 let branch = branch_cell(cell).expect("branch");
6977 if branch == ExactCellBranch::Affine {
6978 continue;
6979 }
6980 let expected =
6981 evaluate_non_affine_cell_state(cell, branch, max_degree)
6982 .expect("uncached non-affine moments");
6983 let got = evaluate_cell_moments_cached(
6984 cell,
6985 max_degree,
6986 &cache,
6987 Some(&stats),
6988 )
6989 .expect("cached moments");
6990 assert_eq!(got.branch, expected.branch);
6991 assert_eq!(got.moments.len(), max_degree + 1);
6992 let denom = expected.value.abs().max(1.0);
6993 assert!(
6994 ((got.value - expected.value).abs() / denom) < 1e-10,
6995 "value mismatch for {cell:?} degree {max_degree}: got {} expected {}",
6996 got.value,
6997 expected.value
6998 );
6999 for (idx, (&lhs, &rhs)) in
7000 got.moments.iter().zip(expected.moments.iter()).enumerate()
7001 {
7002 let denom = rhs.abs().max(1.0);
7003 assert!(
7004 ((lhs - rhs).abs() / denom) < 1e-10,
7005 "moment {idx} mismatch for {cell:?} degree {max_degree}: got {lhs} expected {rhs}"
7006 );
7007 }
7008 let warm = evaluate_cell_moments_cached(
7009 cell,
7010 max_degree,
7011 &cache,
7012 Some(&stats),
7013 )
7014 .expect("warm cached moments");
7015 assert_eq!(warm, got);
7016 }
7017 }
7018 }
7019 }
7020 }
7021 }
7022 let (hits, misses) = stats.snapshot();
7023 assert!(hits > 0, "expected warm LRU hits");
7024 assert!(misses > 0, "expected cold LRU misses");
7025 }
7026
7027 #[test]
7028 fn cell_moment_fingerprint_exact_cache_matches_current_evaluator() {
7029 let cells = [
7030 DenestedCubicCell {
7031 left: -1.75,
7032 right: -0.25,
7033 c0: 0.15,
7034 c1: -0.35,
7035 c2: 0.08,
7036 c3: -0.015,
7037 },
7038 DenestedCubicCell {
7039 left: -0.5,
7040 right: 0.8,
7041 c0: -0.2,
7042 c1: 0.45,
7043 c2: -0.12,
7044 c3: 0.025,
7045 },
7046 DenestedCubicCell {
7047 left: 0.1,
7048 right: 1.6,
7049 c0: 0.05,
7050 c1: 0.2,
7051 c2: 0.03,
7052 c3: 0.004,
7053 },
7054 ];
7055 let mut cache = std::collections::HashMap::new();
7056 for max_degree in [0usize, 3, 4, 9, 16] {
7057 for cell in cells {
7058 let baseline = evaluate_cell_moments(cell, max_degree).expect("baseline moments");
7059 let key = cell_moment_cache_key(cell, max_degree, 0.0);
7060 let cached = cache.entry(key).or_insert_with(|| {
7061 evaluate_cell_moments(cell, max_degree).expect("cached moments")
7062 });
7063 assert_eq!(baseline.branch, cached.branch);
7064 assert_eq!(baseline.value.to_bits(), cached.value.to_bits());
7065 assert_eq!(baseline.moments.len(), cached.moments.len());
7066 for (lhs, rhs) in baseline.moments.iter().zip(cached.moments.iter()) {
7067 assert_eq!(lhs.to_bits(), rhs.to_bits());
7068 }
7069 }
7070 }
7071 }
7072
7073 #[test]
7074 fn fuzzy_cell_moment_fingerprint_error_scales_with_epsilon() {
7075 for epsilon in [1e-8, 1e-6] {
7076 let base = DenestedCubicCell {
7077 left: -1.25,
7078 right: 1.1,
7079 c0: 0.1,
7080 c1: -0.25,
7081 c2: 0.04,
7082 c3: -0.006,
7083 };
7084 let perturbed = DenestedCubicCell {
7085 left: base.left + 0.001 * epsilon,
7086 right: base.right - 0.001 * epsilon,
7087 c0: base.c0 + 0.001 * epsilon,
7088 c1: base.c1 - 0.001 * epsilon,
7089 c2: base.c2 + 0.001 * epsilon,
7090 c3: base.c3 - 0.001 * epsilon,
7091 };
7092 assert_eq!(
7093 cell_moment_cache_key(base, 9, epsilon),
7094 cell_moment_cache_key(perturbed, 9, epsilon)
7095 );
7096 let lhs = evaluate_cell_moments(base, 9).expect("base moments");
7097 let rhs = evaluate_cell_moments(perturbed, 9).expect("perturbed moments");
7098 let max_rel = lhs
7099 .moments
7100 .iter()
7101 .zip(rhs.moments.iter())
7102 .map(|(a, b)| (a - b).abs() / a.abs().max(b.abs()).max(1.0))
7103 .fold(0.0_f64, f64::max);
7104 assert!(
7105 max_rel <= 10.0 * epsilon,
7106 "epsilon={epsilon:.1e} max_rel={max_rel:.3e}"
7107 );
7108 }
7109 }
7110
7111 #[test]
7119 fn non_affine_cell_state_matches_prefold_reference_to_1e_minus_13() {
7120 fn reference(
7124 cell: DenestedCubicCell,
7125 branch: ExactCellBranch,
7126 max_degree: usize,
7127 ) -> CellMomentState {
7128 let mut moments: CellMomentVec = smallvec![0.0_f64; max_degree + 1];
7129 let mut value_integral = 0.0_f64;
7130 let center = 0.5 * (cell.left + cell.right);
7131 let half_width = 0.5 * (cell.right - cell.left);
7132 for (&node, &weight) in GL_NODES.iter().zip(GL_WEIGHTS.iter()) {
7133 let z = center + half_width * node;
7134 let eta = cell.eta(z);
7135 let moment_weight = weight * (-cell.q(z)).exp();
7136 let mut z_pow = 1.0_f64;
7137 for moment in &mut moments {
7138 *moment = moment_weight.mul_add(z_pow, *moment);
7139 z_pow *= z;
7140 }
7141 value_integral += weight * (-0.5 * z * z).exp() * normal_cdf(eta);
7142 }
7143 for moment in &mut moments {
7144 *moment *= half_width;
7145 }
7146 CellMomentState {
7147 branch,
7148 value: value_integral * half_width / (std::f64::consts::TAU).sqrt(),
7149 moments,
7150 }
7151 }
7152
7153 let cells = [
7158 DenestedCubicCell {
7159 left: -1.25,
7160 right: -0.2,
7161 c0: -0.35,
7162 c1: 0.85,
7163 c2: 0.04,
7164 c3: -0.015,
7165 },
7166 DenestedCubicCell {
7167 left: -0.2,
7168 right: 0.55,
7169 c0: 0.12,
7170 c1: -0.65,
7171 c2: -0.025,
7172 c3: 0.02,
7173 },
7174 DenestedCubicCell {
7175 left: 0.55,
7176 right: 1.6,
7177 c0: 0.42,
7178 c1: 0.35,
7179 c2: 0.018,
7180 c3: 0.012,
7181 },
7182 DenestedCubicCell {
7183 left: -3.0,
7184 right: -1.0,
7185 c0: 1.7,
7186 c1: -0.4,
7187 c2: 0.11,
7188 c3: -0.07,
7189 },
7190 ];
7191 let degrees = [0_usize, 4, 9, 16, 24];
7192 for cell in cells {
7193 let branch = branch_cell(cell).expect("branch");
7194 assert_ne!(branch, ExactCellBranch::Affine);
7195 for max_degree in degrees {
7196 let actual = evaluate_non_affine_cell_state(cell, branch, max_degree)
7197 .expect("optimized non-affine");
7198 let expected = reference(cell, branch, max_degree);
7199 assert_eq!(actual.branch, expected.branch);
7200 assert_eq!(actual.moments.len(), expected.moments.len());
7201 let denom_v = expected.value.abs().max(1.0);
7202 let rel_v = (actual.value - expected.value).abs() / denom_v;
7203 let actual_v = actual.value;
7204 let expected_v = expected.value;
7205 assert!(
7206 rel_v <= 1e-13,
7207 "value rel mismatch for {cell:?} degree {max_degree}: \
7208 actual={actual_v:.17e} expected={expected_v:.17e} rel={rel_v:.3e}"
7209 );
7210 for (k, (lhs, rhs)) in actual
7211 .moments
7212 .iter()
7213 .zip(expected.moments.iter())
7214 .enumerate()
7215 {
7216 let denom = rhs.abs().max(1.0);
7217 let rel = (lhs - rhs).abs() / denom;
7218 assert!(
7219 rel <= 1e-13,
7220 "moment {k} rel mismatch for {cell:?} degree {max_degree}: \
7221 actual={lhs:.17e} expected={rhs:.17e} rel={rel:.3e}"
7222 );
7223 }
7224
7225 let actual_deriv =
7228 evaluate_non_affine_cell_derivative_state(cell, branch, max_degree)
7229 .expect("optimized derivative");
7230 for (k, (lhs, rhs)) in actual_deriv
7231 .moments
7232 .iter()
7233 .zip(expected.moments.iter())
7234 .enumerate()
7235 {
7236 let denom = rhs.abs().max(1.0);
7237 let rel = (lhs - rhs).abs() / denom;
7238 assert!(
7239 rel <= 1e-13,
7240 "deriv moment {k} rel mismatch for {cell:?} degree {max_degree}: \
7241 actual={lhs:.17e} expected={rhs:.17e} rel={rel:.3e}"
7242 );
7243 }
7244 }
7245 }
7246 }
7247
7248 #[test]
7254 fn third_derivative_kernel_matches_fd_of_second_with_eta_perturbation() {
7255 let base = DenestedCubicCell {
7257 left: -0.6,
7258 right: 0.9,
7259 c0: 0.30,
7260 c1: 0.45,
7261 c2: -0.20,
7262 c3: 0.12,
7263 };
7264 let eta_u = [0.11_f64, -0.07, 0.05, 0.02];
7267 let eta_v = [-0.09_f64, 0.13, -0.04, 0.03];
7268 let eta_t = [0.17_f64, 0.06, -0.10, 0.04]; let eta_uv = [0.02_f64, 0.01, -0.015, 0.005];
7271 let eta_ut = [-0.01_f64, 0.02, 0.007, -0.003];
7272 let eta_vt = [0.015_f64, -0.008, 0.01, 0.004];
7273 let eta_uvt = [0.003_f64, -0.002, 0.001, 0.0005];
7275
7276 let neg = |a: &[f64; 4]| a.map(|v| -v);
7277 let max_degree = 15usize;
7278
7279 let f_uv_at = |s: f64| -> f64 {
7286 let cell_s = DenestedCubicCell {
7287 c0: base.c0 + s * eta_t[0],
7288 c1: base.c1 + s * eta_t[1],
7289 c2: base.c2 + s * eta_t[2],
7290 c3: base.c3 + s * eta_t[3],
7291 ..base
7292 };
7293 let st = evaluate_cell_moments(cell_s, max_degree).unwrap();
7295 let neg_cell = DenestedCubicCell {
7296 c0: -cell_s.c0,
7297 c1: -cell_s.c1,
7298 c2: -cell_s.c2,
7299 c3: -cell_s.c3,
7300 ..cell_s
7301 };
7302 let u_s = [
7303 eta_u[0] + s * eta_ut[0],
7304 eta_u[1] + s * eta_ut[1],
7305 eta_u[2] + s * eta_ut[2],
7306 eta_u[3] + s * eta_ut[3],
7307 ];
7308 let v_s = [
7309 eta_v[0] + s * eta_vt[0],
7310 eta_v[1] + s * eta_vt[1],
7311 eta_v[2] + s * eta_vt[2],
7312 eta_v[3] + s * eta_vt[3],
7313 ];
7314 let uv_s = [
7315 eta_uv[0] + s * eta_uvt[0],
7316 eta_uv[1] + s * eta_uvt[1],
7317 eta_uv[2] + s * eta_uvt[2],
7318 eta_uv[3] + s * eta_uvt[3],
7319 ];
7320 cell_second_derivative_from_moments(
7321 neg_cell,
7322 &neg(&u_s),
7323 &neg(&v_s),
7324 &neg(&uv_s),
7325 &st.moments,
7326 )
7327 .unwrap()
7328 };
7329
7330 let h = 1e-5;
7331 let fd = (f_uv_at(h) - f_uv_at(-h)) / (2.0 * h);
7332
7333 let st0 = evaluate_cell_moments(base, max_degree).unwrap();
7336 let neg_cell0 = DenestedCubicCell {
7337 c0: -base.c0,
7338 c1: -base.c1,
7339 c2: -base.c2,
7340 c3: -base.c3,
7341 ..base
7342 };
7343 let analytic = cell_third_derivative_from_moments(
7344 neg_cell0,
7345 &neg(&eta_u),
7346 &neg(&eta_v),
7347 &neg(&eta_t),
7348 &neg(&eta_uv),
7349 &neg(&eta_ut),
7350 &neg(&eta_vt),
7351 &neg(&eta_uvt),
7352 &st0.moments,
7353 )
7354 .unwrap();
7355
7356 let denom = fd.abs().max(1e-3);
7357 let rel = (analytic - fd).abs() / denom;
7358 assert!(
7359 rel <= 1e-5,
7360 "third kernel vs FD-of-second mismatch: analytic={analytic:.12e} fd={fd:.12e} rel={rel:.3e}"
7361 );
7362 }
7363
7364 #[test]
7365 fn moving_shared_edge_second_integral_derivative_has_leibniz_jump_sign() {
7366 let edge0 = 0.2_f64;
7367 let edge_velocity = -0.37_f64;
7368
7369 let left_eta = [0.22_f64, -0.18, 0.09, 0.03];
7370 let right_eta = [-0.11_f64, 0.26, -0.04, 0.02];
7371 let left_r = [0.08_f64, -0.05, 0.03, 0.01];
7372 let left_s = [-0.06_f64, 0.04, 0.02, -0.015];
7373 let left_rs = [0.025_f64, -0.012, 0.006, 0.004];
7374 let right_r = [-0.03_f64, 0.07, -0.02, 0.012];
7375 let right_s = [0.05_f64, -0.025, 0.018, 0.007];
7376 let right_rs = [-0.018_f64, 0.014, -0.005, 0.003];
7377
7378 let integral_at = |shift: f64| -> f64 {
7379 let edge = edge0 + edge_velocity * shift;
7380 let left = DenestedCubicCell {
7381 left: -0.7,
7382 right: edge,
7383 c0: left_eta[0],
7384 c1: left_eta[1],
7385 c2: left_eta[2],
7386 c3: left_eta[3],
7387 };
7388 let right = DenestedCubicCell {
7389 left: edge,
7390 right: 1.1,
7391 c0: right_eta[0],
7392 c1: right_eta[1],
7393 c2: right_eta[2],
7394 c3: right_eta[3],
7395 };
7396 let left_state = evaluate_cell_moments(left, 12).expect("left moments");
7397 let right_state = evaluate_cell_moments(right, 12).expect("right moments");
7398 cell_second_derivative_from_moments(
7399 left,
7400 &left_r,
7401 &left_s,
7402 &left_rs,
7403 &left_state.moments,
7404 )
7405 .expect("left second")
7406 + cell_second_derivative_from_moments(
7407 right,
7408 &right_r,
7409 &right_s,
7410 &right_rs,
7411 &right_state.moments,
7412 )
7413 .expect("right second")
7414 };
7415
7416 let h = 1e-5;
7417 let fd = (integral_at(h) - integral_at(-h)) / (2.0 * h);
7418
7419 let left = DenestedCubicCell {
7420 left: -0.7,
7421 right: edge0,
7422 c0: left_eta[0],
7423 c1: left_eta[1],
7424 c2: left_eta[2],
7425 c3: left_eta[3],
7426 };
7427 let right = DenestedCubicCell {
7428 left: edge0,
7429 right: 1.1,
7430 c0: right_eta[0],
7431 c1: right_eta[1],
7432 c2: right_eta[2],
7433 c3: right_eta[3],
7434 };
7435 let f_left =
7436 cell_second_derivative_boundary_integrand(left, &left_r, &left_s, &left_rs, edge0);
7437 let f_right =
7438 cell_second_derivative_boundary_integrand(right, &right_r, &right_s, &right_rs, edge0);
7439 let analytic = edge_velocity * (f_left - f_right);
7440
7441 let denom = analytic.abs().max(1e-8);
7442 let rel = (fd - analytic).abs() / denom;
7443 assert!(
7444 rel <= 5e-8,
7445 "moving edge sign mismatch: fd={fd:.12e} analytic={analytic:.12e} rel={rel:.3e}"
7446 );
7447 }
7448
7449 #[test]
7450 fn moving_shared_edge_second_integral_mixed_derivative_has_full_leibniz_terms() {
7451 let edge0 = -0.15_f64;
7452 let edge_d1 = 0.31_f64;
7453 let edge_d2 = -0.27_f64;
7454 let edge_d12 = 0.19_f64;
7455
7456 let left_eta = [0.16_f64, -0.21, 0.07, -0.025];
7457 let right_eta = [-0.09_f64, 0.18, -0.055, 0.018];
7458 let left_r = [0.075_f64, -0.045, 0.018, 0.009];
7459 let left_s = [-0.052_f64, 0.033, 0.014, -0.011];
7460 let left_rs = [0.021_f64, -0.009, 0.005, 0.0025];
7461 let right_r = [-0.028_f64, 0.063, -0.017, 0.010];
7462 let right_s = [0.047_f64, -0.023, 0.016, 0.006];
7463 let right_rs = [-0.015_f64, 0.012, -0.004, 0.002];
7464
7465 let integral_at = |s1: f64, s2: f64| -> f64 {
7466 let edge = edge0 + edge_d1 * s1 + edge_d2 * s2 + edge_d12 * s1 * s2;
7467 let left = DenestedCubicCell {
7468 left: -0.8,
7469 right: edge,
7470 c0: left_eta[0],
7471 c1: left_eta[1],
7472 c2: left_eta[2],
7473 c3: left_eta[3],
7474 };
7475 let right = DenestedCubicCell {
7476 left: edge,
7477 right: 0.9,
7478 c0: right_eta[0],
7479 c1: right_eta[1],
7480 c2: right_eta[2],
7481 c3: right_eta[3],
7482 };
7483 let left_state = evaluate_cell_moments(left, 12).expect("left moments");
7484 let right_state = evaluate_cell_moments(right, 12).expect("right moments");
7485 cell_second_derivative_from_moments(
7486 left,
7487 &left_r,
7488 &left_s,
7489 &left_rs,
7490 &left_state.moments,
7491 )
7492 .expect("left second")
7493 + cell_second_derivative_from_moments(
7494 right,
7495 &right_r,
7496 &right_s,
7497 &right_rs,
7498 &right_state.moments,
7499 )
7500 .expect("right second")
7501 };
7502
7503 let h = 2e-4;
7504 let fd = (integral_at(h, h) - integral_at(h, -h) - integral_at(-h, h)
7505 + integral_at(-h, -h))
7506 / (4.0 * h * h);
7507
7508 let left = DenestedCubicCell {
7509 left: -0.8,
7510 right: edge0,
7511 c0: left_eta[0],
7512 c1: left_eta[1],
7513 c2: left_eta[2],
7514 c3: left_eta[3],
7515 };
7516 let right = DenestedCubicCell {
7517 left: edge0,
7518 right: 0.9,
7519 c0: right_eta[0],
7520 c1: right_eta[1],
7521 c2: right_eta[2],
7522 c3: right_eta[3],
7523 };
7524
7525 let boundary_z_derivative =
7526 |cell: DenestedCubicCell, r: &[f64], s: &[f64], rs: &[f64]| -> f64 {
7527 let eta = cell.eta(edge0);
7528 let eta_z = cell.c1 + 2.0 * cell.c2 * edge0 + 3.0 * cell.c3 * edge0 * edge0;
7529 let cr = poly_eval_at(r, edge0);
7530 let cs = poly_eval_at(s, edge0);
7531 let crs = poly_eval_at(rs, edge0);
7532 let cr_z = r.iter().enumerate().skip(1).fold(0.0, |acc, (k, val)| {
7533 acc + (k as f64) * val * edge0.powi(k as i32 - 1)
7534 });
7535 let cs_z = s.iter().enumerate().skip(1).fold(0.0, |acc, (k, val)| {
7536 acc + (k as f64) * val * edge0.powi(k as i32 - 1)
7537 });
7538 let crs_z = rs.iter().enumerate().skip(1).fold(0.0, |acc, (k, val)| {
7539 acc + (k as f64) * val * edge0.powi(k as i32 - 1)
7540 });
7541 let amp = crs - eta * cr * cs;
7542 let amp_z = crs_z - eta_z * cr * cs - eta * cr_z * cs - eta * cr * cs_z;
7543 let q_z = edge0 + eta * eta_z;
7544 (amp_z - amp * q_z) * (-cell.q(edge0)).exp() * INV_TWO_PI
7545 };
7546
7547 let f_left =
7548 cell_second_derivative_boundary_integrand(left, &left_r, &left_s, &left_rs, edge0);
7549 let f_right =
7550 cell_second_derivative_boundary_integrand(right, &right_r, &right_s, &right_rs, edge0);
7551 let fz_left = boundary_z_derivative(left, &left_r, &left_s, &left_rs);
7552 let fz_right = boundary_z_derivative(right, &right_r, &right_s, &right_rs);
7553 let analytic = edge_d12 * (f_left - f_right) + edge_d1 * edge_d2 * (fz_left - fz_right);
7554
7555 let denom = analytic.abs().max(1e-8);
7556 let rel = (fd - analytic).abs() / denom;
7557 assert!(
7558 rel <= 2e-7,
7559 "moving edge mixed term mismatch: fd={fd:.12e} analytic={analytic:.12e} rel={rel:.3e}"
7560 );
7561 }
7562
7563 #[test]
7588 fn third_order_self_flux_telescopes_but_third_integrand_jumps_at_c2_knot_1454() {
7589 let edge0 = 0.13_f64;
7590 let edge_velocity = -0.41_f64;
7591
7592 let left_eta = [0.18_f64, -0.12, 0.07, 0.04];
7596 let right_c3 = 0.04_f64 + 0.09; let l0 = left_eta[0];
7603 let l1 = left_eta[1];
7604 let l2 = left_eta[2];
7605 let l3 = left_eta[3];
7606 let e = edge0;
7607 let eta_val = l0 + l1 * e + l2 * e * e + l3 * e * e * e;
7608 let eta_d1 = l1 + 2.0 * l2 * e + 3.0 * l3 * e * e;
7609 let eta_d2 = 2.0 * l2 + 6.0 * l3 * e;
7610 let rc2 = (eta_d2 - 6.0 * right_c3 * e) / 2.0;
7611 let rc1 = eta_d1 - 2.0 * rc2 * e - 3.0 * right_c3 * e * e;
7612 let rc0 = eta_val - rc1 * e - rc2 * e * e - right_c3 * e * e * e;
7613 let right_eta = [rc0, rc1, rc2, right_c3];
7614
7615 let common_r = [0.06_f64, -0.04, 0.02, 0.0];
7621 let common_s = [-0.05_f64, 0.03, 0.015, 0.0];
7622 let common_t = [0.08_f64, 0.05, -0.03, 0.0];
7623 let common_rs = [0.02_f64, -0.01, 0.005, 0.0];
7624 let common_rt = [-0.012_f64, 0.008, 0.004, 0.0];
7625 let common_st = [0.015_f64, -0.006, 0.003, 0.0];
7626 let left_rst = [6.0 * l3, 0.0, 0.0, 0.0];
7628 let right_rst = [6.0 * right_c3, 0.0, 0.0, 0.0];
7629
7630 let max_degree = 15usize;
7631 let neg = |a: &[f64; 4]| a.map(|v| -v);
7632
7633 let integral_at = |shift: f64| -> f64 {
7638 let edge = edge0 + edge_velocity * shift;
7639 let left = DenestedCubicCell {
7640 left: -0.7,
7641 right: edge,
7642 c0: left_eta[0],
7643 c1: left_eta[1],
7644 c2: left_eta[2],
7645 c3: left_eta[3],
7646 };
7647 let right = DenestedCubicCell {
7648 left: edge,
7649 right: 1.0,
7650 c0: right_eta[0],
7651 c1: right_eta[1],
7652 c2: right_eta[2],
7653 c3: right_eta[3],
7654 };
7655 let lst = evaluate_cell_moments(left, max_degree).unwrap();
7656 let rst_m = evaluate_cell_moments(right, max_degree).unwrap();
7657 let neg_left = DenestedCubicCell {
7658 c0: -left.c0,
7659 c1: -left.c1,
7660 c2: -left.c2,
7661 c3: -left.c3,
7662 ..left
7663 };
7664 let neg_right = DenestedCubicCell {
7665 c0: -right.c0,
7666 c1: -right.c1,
7667 c2: -right.c2,
7668 c3: -right.c3,
7669 ..right
7670 };
7671 let li = cell_third_derivative_from_moments(
7672 neg_left,
7673 &neg(&common_r),
7674 &neg(&common_s),
7675 &neg(&common_t),
7676 &neg(&common_rs),
7677 &neg(&common_rt),
7678 &neg(&common_st),
7679 &neg(&left_rst),
7680 &lst.moments,
7681 )
7682 .unwrap();
7683 let ri = cell_third_derivative_from_moments(
7684 neg_right,
7685 &neg(&common_r),
7686 &neg(&common_s),
7687 &neg(&common_t),
7688 &neg(&common_rs),
7689 &neg(&common_rt),
7690 &neg(&common_st),
7691 &neg(&right_rst),
7692 &rst_m.moments,
7693 )
7694 .unwrap();
7695 li + ri
7696 };
7697
7698 let h = 1e-5;
7699 let fd = (integral_at(h) - integral_at(-h)) / (2.0 * h);
7700
7701 let left0 = DenestedCubicCell {
7709 left: -0.7,
7710 right: edge0,
7711 c0: left_eta[0],
7712 c1: left_eta[1],
7713 c2: left_eta[2],
7714 c3: left_eta[3],
7715 };
7716 let right0 = DenestedCubicCell {
7717 left: edge0,
7718 right: 1.0,
7719 c0: right_eta[0],
7720 c1: right_eta[1],
7721 c2: right_eta[2],
7722 c3: right_eta[3],
7723 };
7724 let f_left = cell_third_derivative_boundary_integrand(
7725 left0, &common_r, &common_s, &common_t, &common_rs, &common_rt, &common_st, &left_rst,
7726 edge0,
7727 );
7728 let f_right = cell_third_derivative_boundary_integrand(
7729 right0, &common_r, &common_s, &common_t, &common_rs, &common_rt, &common_st,
7730 &right_rst, edge0,
7731 );
7732
7733 let jump = f_left - f_right;
7737 assert!(
7738 jump.abs() > 1e-4,
7739 "third-derivative integrand must jump across the C² knot (α₃ discontinuity); \
7740 got jump={jump:.3e}"
7741 );
7742
7743 let analytic_flux = edge_velocity * jump;
7744 let denom = fd.abs().max(1e-6);
7745 let rel = (fd - analytic_flux).abs() / denom;
7746 assert!(
7747 rel <= 1e-5,
7748 "moving-edge third-derivative flux mismatch (#1454): fd={fd:.12e} \
7749 analytic_flux={analytic_flux:.12e} rel={rel:.3e}"
7750 );
7751
7752 let a_row = 0.21_f64;
7765 let b_row = 1.37_f64;
7766 let knot = a_row + b_row * edge0; let left_link = LocalSpanCubic {
7770 left: knot - 0.6,
7771 right: knot + 0.6,
7772 c0: 0.0,
7773 c1: 0.0,
7774 c2: 0.08,
7775 c3: -0.05,
7776 };
7777 let right_alpha3 = -0.05_f64 + 0.11; let right_left_coord = knot - 0.4;
7780 let lhs = 2.0 * left_link.c2 + 6.0 * left_link.c3 * (knot - left_link.left);
7781 let right_alpha2 = (lhs - 6.0 * right_alpha3 * (knot - right_left_coord)) / 2.0;
7782 let right_link = LocalSpanCubic {
7783 left: right_left_coord,
7784 right: right_left_coord + 0.8,
7785 c0: 0.0,
7786 c1: 0.0,
7787 c2: right_alpha2,
7788 c3: right_alpha3,
7789 };
7790 let (_, _, dc_dbb_left) = link_cubic_second_partials(left_link, a_row, b_row);
7791 let (_, _, dc_dbb_right) = link_cubic_second_partials(right_link, a_row, b_row);
7792 assert!(
7794 (dc_dbb_left[3] - dc_dbb_right[3]).abs() > 1e-3,
7795 "α₃ jump must make the raw dc_dbb coefficient arrays differ"
7796 );
7797 let c_bb_left = poly_eval_at(&dc_dbb_left, edge0);
7800 let c_bb_right = poly_eval_at(&dc_dbb_right, edge0);
7801 assert!(
7802 (c_bb_left - c_bb_right).abs() <= 1e-12,
7803 "second-derivative slope-slope integrand must be CONTINUOUS across the \
7804 C² knot (telescoping self-flux): left={c_bb_left:.15e} right={c_bb_right:.15e}"
7805 );
7806 }
7807}