1use gam_math::probability::normal_cdf;
2use gam_runtime::resource::{ByteLruCache, ResidentBytes};
3use smallvec::{SmallVec, smallvec};
4use std::hash::{Hash, Hasher};
5use std::sync::Arc;
6use std::sync::atomic::{AtomicU64, Ordering};
7
8#[derive(Clone, Debug)]
17pub enum CubicCellKernelError {
18 InvalidInterval { reason: String },
21 InvalidCellShape { reason: String },
26 InsufficientMoments { reason: String },
29 BivariateNormalDomain { reason: String },
32}
33
34impl_reason_error_boilerplate! {
35 CubicCellKernelError {
36 InvalidInterval,
37 InvalidCellShape,
38 InsufficientMoments,
39 BivariateNormalDomain,
40 }
41}
42
43impl CubicCellKernelError {
44 #[inline]
45 fn invalid_interval(reason: impl Into<String>) -> Self {
46 CubicCellKernelError::InvalidInterval {
47 reason: reason.into(),
48 }
49 }
50 #[inline]
51 fn invalid_cell_shape(reason: impl Into<String>) -> Self {
52 CubicCellKernelError::InvalidCellShape {
53 reason: reason.into(),
54 }
55 }
56 #[inline]
57 fn insufficient_moments(reason: impl Into<String>) -> Self {
58 CubicCellKernelError::InsufficientMoments {
59 reason: reason.into(),
60 }
61 }
62 #[inline]
63 fn bivariate_normal_domain(reason: impl Into<String>) -> Self {
64 CubicCellKernelError::BivariateNormalDomain {
65 reason: reason.into(),
66 }
67 }
68}
69
70#[derive(Clone, Copy, Debug, PartialEq)]
96pub struct LocalSpanCubic {
97 pub left: f64,
98 pub right: f64,
99 pub c0: f64,
100 pub c1: f64,
101 pub c2: f64,
102 pub c3: f64,
103}
104
105impl LocalSpanCubic {
106 #[inline]
107 pub fn evaluate(self, x: f64) -> f64 {
108 let t = x - self.left;
109 self.c0 + self.c1 * t + self.c2 * t * t + self.c3 * t * t * t
110 }
111
112 #[inline]
113 pub fn first_derivative(self, x: f64) -> f64 {
114 let t = x - self.left;
115 self.c1 + 2.0 * self.c2 * t + 3.0 * self.c3 * t * t
116 }
117
118 #[inline]
119 pub fn second_derivative(self, x: f64) -> f64 {
120 let t = x - self.left;
121 2.0 * self.c2 + 6.0 * self.c3 * t
122 }
123}
124
125pub const ANCHORED_DEVIATION_KERNEL: &str = "DenestedCubicTransport";
126pub const NORMALIZED_CELL_BRANCH_TOL: f64 = 1e-10;
134
135const INV_TWO_PI: f64 = 1.0 / std::f64::consts::TAU;
136
137#[cfg(target_os = "linux")]
141pub const GL_NODES_FOR_GPU_KERNEL: &[f64; 384] = &GL_NODES;
142#[cfg(target_os = "linux")]
144pub const GL_WEIGHTS_FOR_GPU_KERNEL: &[f64; 384] = &GL_WEIGHTS;
145
146const GL_NODES: [f64; 384] = [
147 -9.999_804_411_726_474e-1,
148 -9.998_969_471_378_596e-1,
149 -9.997_467_408_113_523e-1,
150 -9.995_297_988_558_859e-1,
151 -9.992_461_316_671_845e-1,
152 -9.988_957_572_063_257e-1,
153 -9.984_786_985_384_589e-1,
154 -9.979_949_833_727_938e-1,
155 -9.974_446_439_389_107e-1,
156 -9.968_277_169_440_913e-1,
157 -9.961_442_435_551_087e-1,
158 -9.953_942_693_885_953e-1,
159 -9.945_778_445_047_068e-1,
160 -9.936_950_234_020_883e-1,
161 -9.927_458_650_133_153e-1,
162 -9.917_304_327_004_32e-1,
163 -9.906_487_942_504_061e-1,
164 -9.895_010_218_704_087e-1,
165 -9.882_871_921_828_699e-1,
166 -9.870_073_862_202_815e-1,
167 -9.856_616_894_197_333e-1,
168 -9.842_501_916_171_713e-1,
169 -9.827_729_870_413_743e-1,
170 -9.812_301_743_076_443e-1,
171 -9.796_218_564_112_101e-1,
172 -9.779_481_407_203_411e-1,
173 -9.762_091_389_691_724e-1,
174 -9.744_049_672_502_397e-1,
175 -9.725_357_460_067_257e-1,
176 -9.706_016_000_244_151e-1,
177 -9.686_026_584_233_628e-1,
178 -9.665_390_546_492_71e-1,
179 -9.644_109_264_645_802e-1,
180 -9.622_184_159_392_698e-1,
181 -9.599_616_694_413_742e-1,
182 -9.576_408_376_272_095e-1,
183 -9.552_560_754_313_16e-1,
184 -9.528_075_420_561_144e-1,
185 -9.502_954_009_612_771e-1,
186 -9.477_198_198_528_157e-1,
187 -9.450_809_706_718_851e-1,
188 -9.423_790_295_833_044e-1,
189 -9.396_141_769_637_963e-1,
190 -9.367_865_973_899_459e-1,
191 -9.338_964_796_258_775e-1,
192 -9.309_440_166_106_54e-1,
193 -9.279_294_054_453_956e-1,
194 -9.248_528_473_801_222e-1,
195 -9.217_145_478_003_181e-1,
196 -9.185_147_162_132_208e-1,
197 -9.152_535_662_338_34e-1,
198 -9.119_313_155_706_682e-1,
199 -9.085_481_860_112_055e-1,
200 -9.051_044_034_070_944e-1,
201 -9.016_001_976_590_722e-1,
202 -8.980_358_027_016_164e-1,
203 -8.944_114_564_873_288e-1,
204 -8.907_274_009_710_492e-1,
205 -8.869_838_820_937_034e-1,
206 -8.831_811_497_658_847e-1,
207 -8.793_194_578_511_7e-1,
208 -8.753_990_641_491_725e-1,
209 -8.714_202_303_783_312e-1,
210 -8.673_832_221_584_393e-1,
211 -8.632_883_089_929_12e-1,
212 -8.591_357_642_507_945e-1,
213 -8.549_258_651_485_127e-1,
214 -8.506_588_927_313_666e-1,
215 -8.463_351_318_547_683e-1,
216 -8.419_548_711_652_254e-1,
217 -8.375_184_030_810_715e-1,
218 -8.330_260_237_729_452e-1,
219 -8.284_780_331_440_178e-1,
220 -8.238_747_348_099_726e-1,
221 -8.192_164_360_787_36e-1,
222 -8.145_034_479_299_62e-1,
223 -8.097_360_849_942_72e-1,
224 -8.049_146_655_322_506e-1,
225 -8.000_395_114_131_988e-1,
226 -7.951_109_480_936_471e-1,
227 -7.901_293_045_956_28e-1,
228 -7.850_949_134_847_117e-1,
229 -7.800_081_108_478_04e-1,
230 -7.748_692_362_707_1e-1,
231 -7.696_786_328_154_644e-1,
232 -7.644_366_469_974_285e-1,
233 -7.591_436_287_621_58e-1,
234 -7.537_999_314_620_412e-1,
235 -7.484_059_118_327_094e-1,
236 -7.429_619_299_692_227e-1,
237 -7.374_683_493_020_299e-1,
238 -7.319_255_365_727_068e-1,
239 -7.263_338_618_094_733e-1,
240 -7.206_936_983_024_912e-1,
241 -7.150_054_225_789_432e-1,
242 -7.092_694_143_778_975e-1,
243 -7.034_860_566_249_567e-1,
244 -6.976_557_354_066_943e-1,
245 -6.917_788_399_448_808e-1,
246 -6.858_557_625_704_99e-1,
247 -6.798_868_986_975_534e-1,
248 -6.738_726_467_966_731e-1,
249 -6.678_134_083_685_102e-1,
250 -6.617_095_879_169_366e-1,
251 -6.555_615_929_220_4e-1,
252 -6.493_698_338_129_212e-1,
253 -6.431_347_239_402_948e-1,
254 -6.368_566_795_488_945e-1,
255 -6.305_361_197_496_849e-1,
256 -6.241_734_664_918_837e-1,
257 -6.177_691_445_347_913e-1,
258 -6.113_235_814_194_364e-1,
259 -6.048_372_074_400_329e-1,
260 -5.983_104_556_152_549e-1,
261 -5.917_437_616_593_286e-1,
262 -5.851_375_639_529_456e-1,
263 -5.784_923_035_139_965e-1,
264 -5.718_084_239_681_3e-1,
265 -5.650_863_715_191_369e-1,
266 -5.583_265_949_191_623e-1,
267 -5.515_295_454_387_482e-1,
268 -5.446_956_768_367_068e-1,
269 -5.378_254_453_298_289e-1,
270 -5.309_193_095_624_275e-1,
271 -5.239_777_305_757_194e-1,
272 -5.170_011_717_770_473e-1,
273 -5.099_900_989_089_429e-1,
274 -5.029_449_800_180_356e-1,
275 -4.958_662_854_238_058_4e-1,
276 -4.887_544_876_871_878e-1,
277 -4.816_100_615_790_221e-1,
278 -4.744_334_840_483_605_5e-1,
279 -4.672_252_341_906_264e-1,
280 -4.599_857_932_156_304e-1,
281 -4.527_156_444_154_463_7e-1,
282 -4.454_152_731_321_473_5e-1,
283 -4.380_851_667_254_05e-1,
284 -4.307_258_145_399_544_5e-1,
285 -4.233_377_078_729_265e-1,
286 -4.159_213_399_410_494e-1,
287 -4.084_772_058_477_228e-1,
288 -4.010_058_025_499_653e-1,
289 -3.935_076_288_252_386e-1,
290 -3.859_831_852_381_500_6e-1,
291 -3.784_329_741_070_358_6e-1,
292 -3.708_574_994_704_271e-1,
293 -3.632_572_670_534_011e-1,
294 -3.556_327_842_338_202e-1,
295 -3.479_845_600_084_600_6e-1,
296 -3.403_131_049_590_297e-1,
297 -3.326_189_312_180_866e-1,
298 -3.249_025_524_348_469_5e-1,
299 -3.171_644_837_408_958_4e-1,
300 -3.094_052_417_157_978e-1,
301 -3.016_253_443_526_109e-1,
302 -2.938_253_110_233_064_5e-1,
303 -2.860_056_624_440_967_5e-1,
304 -2.781_669_206_406_729e-1,
305 -2.703_096_089_133_553e-1,
306 -2.624_342_518_021_592_4e-1,
307 -2.545_413_750_517_773e-1,
308 -2.466_315_055_764_817_5e-1,
309 -2.387_051_714_249_486_3e-1,
310 -2.307_629_017_450_062e-1,
311 -2.228_052_267_483_099_4e-1,
312 -2.148_326_776_749_466_5e-1,
313 -2.068_457_867_579_697_5e-1,
314 -1.988_450_871_878_683_4e-1,
315 -1.908_311_130_769_724_5e-1,
316 -1.828_043_994_237_965_6e-1,
317 -1.747_654_820_773_241_2e-1,
318 -1.667_148_977_012_352_4e-1,
319 -1.586_531_837_380_799_3e-1,
320 -1.505_808_783_733_995e-1,
321 -1.424_985_204_997_981_4e-1,
322 -1.344_066_496_809_674_7e-1,
323 -1.263_058_061_156_663e-1,
324 -1.181_965_306_016_578_4e-1,
325 -1.100_793_644_996_070_4e-1,
326 -1.019_548_496_969_403_7e-1,
327 -9.382_352_857_167_028e-2,
328 -8.568_594_395_618_719e-2,
329 -7.754_263_910_102_077e-2,
330 -6.939_415_763_857_37e-2,
331 -6.124_104_354_682_962e-2,
332 -5.308_384_111_303_817_6e-2,
333 -4.492_309_489_737_94e-2,
334 -3.675_934_969_660_982e-2,
335 -2.859_315_050_769_284_7e-2,
336 -2.042_504_249_141_571e-2,
337 -1.225_557_093_599_553_8e-2,
338 -4.085_281_220_676_868e-3,
339 4.085_281_220_676_868e-3,
340 1.225_557_093_599_553_8e-2,
341 2.042_504_249_141_571e-2,
342 2.859_315_050_769_284_7e-2,
343 3.675_934_969_660_982e-2,
344 4.492_309_489_737_94e-2,
345 5.308_384_111_303_817_6e-2,
346 6.124_104_354_682_962e-2,
347 6.939_415_763_857_37e-2,
348 7.754_263_910_102_077e-2,
349 8.568_594_395_618_719e-2,
350 9.382_352_857_167_028e-2,
351 1.019_548_496_969_403_7e-1,
352 1.100_793_644_996_070_4e-1,
353 1.181_965_306_016_578_4e-1,
354 1.263_058_061_156_663e-1,
355 1.344_066_496_809_674_7e-1,
356 1.424_985_204_997_981_4e-1,
357 1.505_808_783_733_995e-1,
358 1.586_531_837_380_799_3e-1,
359 1.667_148_977_012_352_4e-1,
360 1.747_654_820_773_241_2e-1,
361 1.828_043_994_237_965_6e-1,
362 1.908_311_130_769_724_5e-1,
363 1.988_450_871_878_683_4e-1,
364 2.068_457_867_579_697_5e-1,
365 2.148_326_776_749_466_5e-1,
366 2.228_052_267_483_099_4e-1,
367 2.307_629_017_450_062e-1,
368 2.387_051_714_249_486_3e-1,
369 2.466_315_055_764_817_5e-1,
370 2.545_413_750_517_773e-1,
371 2.624_342_518_021_592_4e-1,
372 2.703_096_089_133_553e-1,
373 2.781_669_206_406_729e-1,
374 2.860_056_624_440_967_5e-1,
375 2.938_253_110_233_064_5e-1,
376 3.016_253_443_526_109e-1,
377 3.094_052_417_157_978e-1,
378 3.171_644_837_408_958_4e-1,
379 3.249_025_524_348_469_5e-1,
380 3.326_189_312_180_866e-1,
381 3.403_131_049_590_297e-1,
382 3.479_845_600_084_600_6e-1,
383 3.556_327_842_338_202e-1,
384 3.632_572_670_534_011e-1,
385 3.708_574_994_704_271e-1,
386 3.784_329_741_070_358_6e-1,
387 3.859_831_852_381_500_6e-1,
388 3.935_076_288_252_386e-1,
389 4.010_058_025_499_653e-1,
390 4.084_772_058_477_228e-1,
391 4.159_213_399_410_494e-1,
392 4.233_377_078_729_265e-1,
393 4.307_258_145_399_544_5e-1,
394 4.380_851_667_254_05e-1,
395 4.454_152_731_321_473_5e-1,
396 4.527_156_444_154_463_7e-1,
397 4.599_857_932_156_304e-1,
398 4.672_252_341_906_264e-1,
399 4.744_334_840_483_605_5e-1,
400 4.816_100_615_790_221e-1,
401 4.887_544_876_871_878e-1,
402 4.958_662_854_238_058_4e-1,
403 5.029_449_800_180_356e-1,
404 5.099_900_989_089_429e-1,
405 5.170_011_717_770_473e-1,
406 5.239_777_305_757_194e-1,
407 5.309_193_095_624_275e-1,
408 5.378_254_453_298_289e-1,
409 5.446_956_768_367_068e-1,
410 5.515_295_454_387_482e-1,
411 5.583_265_949_191_623e-1,
412 5.650_863_715_191_369e-1,
413 5.718_084_239_681_3e-1,
414 5.784_923_035_139_965e-1,
415 5.851_375_639_529_456e-1,
416 5.917_437_616_593_286e-1,
417 5.983_104_556_152_549e-1,
418 6.048_372_074_400_329e-1,
419 6.113_235_814_194_364e-1,
420 6.177_691_445_347_913e-1,
421 6.241_734_664_918_837e-1,
422 6.305_361_197_496_849e-1,
423 6.368_566_795_488_945e-1,
424 6.431_347_239_402_948e-1,
425 6.493_698_338_129_212e-1,
426 6.555_615_929_220_4e-1,
427 6.617_095_879_169_366e-1,
428 6.678_134_083_685_102e-1,
429 6.738_726_467_966_731e-1,
430 6.798_868_986_975_534e-1,
431 6.858_557_625_704_99e-1,
432 6.917_788_399_448_808e-1,
433 6.976_557_354_066_943e-1,
434 7.034_860_566_249_567e-1,
435 7.092_694_143_778_975e-1,
436 7.150_054_225_789_432e-1,
437 7.206_936_983_024_912e-1,
438 7.263_338_618_094_733e-1,
439 7.319_255_365_727_068e-1,
440 7.374_683_493_020_299e-1,
441 7.429_619_299_692_227e-1,
442 7.484_059_118_327_094e-1,
443 7.537_999_314_620_412e-1,
444 7.591_436_287_621_58e-1,
445 7.644_366_469_974_285e-1,
446 7.696_786_328_154_644e-1,
447 7.748_692_362_707_1e-1,
448 7.800_081_108_478_04e-1,
449 7.850_949_134_847_117e-1,
450 7.901_293_045_956_28e-1,
451 7.951_109_480_936_471e-1,
452 8.000_395_114_131_988e-1,
453 8.049_146_655_322_506e-1,
454 8.097_360_849_942_72e-1,
455 8.145_034_479_299_62e-1,
456 8.192_164_360_787_36e-1,
457 8.238_747_348_099_726e-1,
458 8.284_780_331_440_178e-1,
459 8.330_260_237_729_452e-1,
460 8.375_184_030_810_715e-1,
461 8.419_548_711_652_254e-1,
462 8.463_351_318_547_683e-1,
463 8.506_588_927_313_666e-1,
464 8.549_258_651_485_127e-1,
465 8.591_357_642_507_945e-1,
466 8.632_883_089_929_12e-1,
467 8.673_832_221_584_393e-1,
468 8.714_202_303_783_312e-1,
469 8.753_990_641_491_725e-1,
470 8.793_194_578_511_7e-1,
471 8.831_811_497_658_847e-1,
472 8.869_838_820_937_034e-1,
473 8.907_274_009_710_492e-1,
474 8.944_114_564_873_288e-1,
475 8.980_358_027_016_164e-1,
476 9.016_001_976_590_722e-1,
477 9.051_044_034_070_944e-1,
478 9.085_481_860_112_055e-1,
479 9.119_313_155_706_682e-1,
480 9.152_535_662_338_34e-1,
481 9.185_147_162_132_208e-1,
482 9.217_145_478_003_181e-1,
483 9.248_528_473_801_222e-1,
484 9.279_294_054_453_956e-1,
485 9.309_440_166_106_54e-1,
486 9.338_964_796_258_775e-1,
487 9.367_865_973_899_459e-1,
488 9.396_141_769_637_963e-1,
489 9.423_790_295_833_044e-1,
490 9.450_809_706_718_851e-1,
491 9.477_198_198_528_157e-1,
492 9.502_954_009_612_771e-1,
493 9.528_075_420_561_144e-1,
494 9.552_560_754_313_16e-1,
495 9.576_408_376_272_095e-1,
496 9.599_616_694_413_742e-1,
497 9.622_184_159_392_698e-1,
498 9.644_109_264_645_802e-1,
499 9.665_390_546_492_71e-1,
500 9.686_026_584_233_628e-1,
501 9.706_016_000_244_151e-1,
502 9.725_357_460_067_257e-1,
503 9.744_049_672_502_397e-1,
504 9.762_091_389_691_724e-1,
505 9.779_481_407_203_411e-1,
506 9.796_218_564_112_101e-1,
507 9.812_301_743_076_443e-1,
508 9.827_729_870_413_743e-1,
509 9.842_501_916_171_713e-1,
510 9.856_616_894_197_333e-1,
511 9.870_073_862_202_815e-1,
512 9.882_871_921_828_699e-1,
513 9.895_010_218_704_087e-1,
514 9.906_487_942_504_061e-1,
515 9.917_304_327_004_32e-1,
516 9.927_458_650_133_153e-1,
517 9.936_950_234_020_883e-1,
518 9.945_778_445_047_068e-1,
519 9.953_942_693_885_953e-1,
520 9.961_442_435_551_087e-1,
521 9.968_277_169_440_913e-1,
522 9.974_446_439_389_107e-1,
523 9.979_949_833_727_938e-1,
524 9.984_786_985_384_589e-1,
525 9.988_957_572_063_257e-1,
526 9.992_461_316_671_845e-1,
527 9.995_297_988_558_859e-1,
528 9.997_467_408_113_523e-1,
529 9.998_969_471_378_596e-1,
530 9.999_804_411_726_474e-1,
531];
532const GL_WEIGHTS: [f64; 384] = [
533 5.019_410_348_676_869_6e-5,
534 1.168_390_665_730_266_3e-4,
535 1.835_749_193_551_655_8e-4,
536 2.503_070_890_844_105e-4,
537 3.170_242_698_112_815e-4,
538 3.837_208_020_912_921_4e-4,
539 4.503_919_137_716_827e-4,
540 5.170_330_453_491_649e-4,
541 5.836_397_042_630_135e-4,
542 6.502_074_240_969_948e-4,
543 7.167_317_509_947_801e-4,
544 7.832_082_385_905_168e-4,
545 8.496_324_460_039_209e-4,
546 9.159_999_370_632_641e-4,
547 9.823_062_800_663_463e-4,
548 1.048_547_047_793_689_5e-3,
549 1.114_717_817_647_310_6e-3,
550 1.180_814_171_855_922e-3,
551 1.246_831_697_715_441_5e-3,
552 1.312_765_987_850_66e-3,
553 1.378_612_640_487_646_8e-3,
554 1.444_367_259_734_736e-3,
555 1.510_025_455_865_810_3e-3,
556 1.575_582_845_607_936_8e-3,
557 1.641_035_052_429_271_5e-3,
558 1.706_377_706_828_447_1e-3,
559 1.771_606_446_623_834_7e-3,
560 1.836_716_917_243_567_5e-3,
561 1.901_704_772_014_899_2e-3,
562 1.966_565_672_453_437e-3,
563 2.031_295_288_552_398_4e-3,
564 2.095_889_299_071_020_6e-3,
565 2.160_343_391_822_734_3e-3,
566 2.224_653_263_962_713e-3,
567 2.288_814_622_274_955e-3,
568 2.352_823_183_458_769e-3,
569 2.416_674_674_414_340_5e-3,
570 2.480_364_832_528_265_6e-3,
571 2.543_889_405_957_74e-3,
572 2.607_244_153_914_452e-3,
573 2.670_424_846_947_554e-3,
574 2.733_427_267_226_093_3e-3,
575 2.796_247_208_820_428e-3,
576 2.858_880_477_983_06e-3,
577 2.921_322_893_428_515_3e-3,
578 2.983_570_286_612_554_5e-3,
579 3.045_618_502_010_327_8e-3,
580 3.107_463_397_393_755_5e-3,
581 3.169_100_844_108_32e-3,
582 3.230_526_727_348_174e-3,
583 3.291_736_946_431_361e-3,
584 3.352_727_415_073_250_3e-3,
585 3.413_494_061_659_418_4e-3,
586 3.474_032_829_517_317e-3,
587 3.534_339_677_187_348_4e-3,
588 3.594_410_578_692_452e-3,
589 3.654_241_523_806_987e-3,
590 3.713_828_518_324_312_5e-3,
591 3.773_167_584_323_583_5e-3,
592 3.832_254_760_435_171e-3,
593 3.891_086_102_105_193_4e-3,
594 3.949_657_681_858_895e-3,
595 4.007_965_589_562_678e-3,
596 4.066_005_932_685_269e-3,
597 4.123_774_836_557_6e-3,
598 4.181_268_444_631_281e-3,
599 4.238_482_918_736_289e-3,
600 4.295_414_439_336_925e-3,
601 4.352_059_205_787_275e-3,
602 4.408_413_436_584_285e-3,
603 4.464_473_369_620_78e-3,
604 4.520_235_262_436_235e-3,
605 4.575_695_392_466_791e-3,
606 4.630_850_057_293_894e-3,
607 4.685_695_574_891_041e-3,
608 4.740_228_283_870_022e-3,
609 4.794_444_543_725_102e-3,
610 4.848_340_735_076_109e-3,
611 4.901_913_259_910_197e-3,
612 4.955_158_541_821_682_4e-3,
613 5.008_073_026_251_332e-3,
614 5.060_653_180_723_101_4e-3,
615 5.112_895_495_080_397e-3,
616 5.164_796_481_720_011e-3,
617 5.216_352_675_825_451e-3,
618 5.267_560_635_597_735e-3,
619 5.318_416_942_485_385e-3,
620 5.368_918_201_412_827e-3,
621 5.419_061_041_006_627e-3,
622 5.468_842_113_820_941e-3,
623 5.518_258_096_560_71e-3,
624 5.567_305_690_303_767e-3,
625 5.615_981_620_720_803e-3,
626 5.664_282_638_294_182e-3,
627 5.712_205_518_534_655e-3,
628 5.759_747_062_196_925_5e-3,
629 5.806_904_095_492_818e-3,
630 5.853_673_470_303_617_4e-3,
631 5.900_052_064_389_824e-3,
632 5.946_036_781_599_814e-3,
633 5.991_624_552_076_468e-3,
634 6.036_812_332_462_087e-3,
635 6.081_597_106_101_673e-3,
636 6.125_975_883_244_196e-3,
637 6.169_945_701_242_237e-3,
638 6.213_503_624_749_591e-3,
639 6.256_646_745_917_723e-3,
640 6.299_372_184_589_237e-3,
641 6.341_677_088_490_664e-3,
642 6.383_558_633_422_572e-3,
643 6.425_014_023_448_273e-3,
644 6.466_040_491_080_434e-3,
645 6.506_635_297_465_724e-3,
646 6.546_795_732_567_842_5e-3,
647 6.586_519_115_348_261e-3,
648 6.625_802_793_945_317e-3,
649 6.664_644_145_851_14e-3,
650 6.703_040_578_086_941e-3,
651 6.740_989_527_375_895e-3,
652 6.778_488_460_314_126e-3,
653 6.815_534_873_540_5e-3,
654 6.852_126_293_902_878e-3,
655 6.888_260_278_623_754e-3,
656 6.923_934_415_463_31e-3,
657 6.959_146_322_880_146_5e-3,
658 6.993_893_650_190_702e-3,
659 7.028_174_077_725_734e-3,
660 7.061_985_316_985_506e-3,
661 7.095_325_110_792_439e-3,
662 7.128_191_233_441_844e-3,
663 7.160_581_490_850_321e-3,
664 7.192_493_720_702_486e-3,
665 7.223_925_792_595_309e-3,
666 7.254_875_608_179_984e-3,
667 7.285_341_101_302_512e-3,
668 7.315_320_238_141_324_5e-3,
669 7.344_811_017_343_063e-3,
670 7.373_811_470_156_258e-3,
671 7.402_319_660_562_818e-3,
672 7.430_333_685_407_178e-3,
673 7.457_851_674_523_319e-3,
674 7.484_871_790_859_79e-3,
675 7.511_392_230_602_079e-3,
676 7.537_411_223_293_362e-3,
677 7.562_927_031_952_382e-3,
678 7.587_937_953_189_561_5e-3,
679 7.612_442_317_320_796e-3,
680 7.636_438_488_478_739e-3,
681 7.659_924_864_722_064e-3,
682 7.682_899_878_142_539e-3,
683 7.705_361_994_969_524e-3,
684 7.727_309_715_672_44e-3,
685 7.748_741_575_060_914e-3,
686 7.769_656_142_382_462e-3,
687 7.790_052_021_418_226e-3,
688 7.809_927_850_575_903e-3,
689 7.829_282_302_980_82e-3,
690 7.848_114_086_564_56e-3,
691 7.866_421_944_151_094e-3,
692 7.884_204_653_540_665e-3,
693 7.901_461_027_591_6e-3,
694 7.918_189_914_299_318e-3,
695 7.934_390_196_873_448e-3,
696 7.950_060_793_812_204e-3,
697 7.965_200_658_974_709e-3,
698 7.979_808_781_650_77e-3,
699 7.993_884_186_628_266e-3,
700 8.007_425_934_258_548e-3,
701 8.020_433_120_518_866e-3,
702 8.032_904_877_072_8e-3,
703 8.044_840_371_328_26e-3,
704 8.056_238_806_493_175e-3,
705 8.067_099_421_628_42e-3,
706 8.077_421_491_698_82e-3,
707 8.087_204_327_621_594e-3,
708 8.096_447_276_312_202e-3,
709 8.105_149_720_727_933e-3,
710 8.113_311_079_909_208e-3,
711 8.120_930_809_018_415e-3,
712 8.128_008_399_376_085e-3,
713 8.134_543_378_495_033e-3,
714 8.140_535_310_111_77e-3,
715 8.145_983_794_215_77e-3,
716 8.150_888_467_075_875e-3,
717 8.155_249_001_265_092e-3,
718 8.159_065_105_681_899e-3,
719 8.162_336_525_570_1e-3,
720 8.165_063_042_535_465e-3,
721 8.167_244_474_560_707e-3,
722 8.168_880_676_017_344e-3,
723 8.169_971_537_675_47e-3,
724 8.170_516_986_711_104e-3,
725 8.170_516_986_711_104e-3,
726 8.169_971_537_675_47e-3,
727 8.168_880_676_017_344e-3,
728 8.167_244_474_560_707e-3,
729 8.165_063_042_535_465e-3,
730 8.162_336_525_570_1e-3,
731 8.159_065_105_681_899e-3,
732 8.155_249_001_265_092e-3,
733 8.150_888_467_075_875e-3,
734 8.145_983_794_215_77e-3,
735 8.140_535_310_111_77e-3,
736 8.134_543_378_495_033e-3,
737 8.128_008_399_376_085e-3,
738 8.120_930_809_018_415e-3,
739 8.113_311_079_909_208e-3,
740 8.105_149_720_727_933e-3,
741 8.096_447_276_312_202e-3,
742 8.087_204_327_621_594e-3,
743 8.077_421_491_698_82e-3,
744 8.067_099_421_628_42e-3,
745 8.056_238_806_493_175e-3,
746 8.044_840_371_328_26e-3,
747 8.032_904_877_072_8e-3,
748 8.020_433_120_518_866e-3,
749 8.007_425_934_258_548e-3,
750 7.993_884_186_628_266e-3,
751 7.979_808_781_650_77e-3,
752 7.965_200_658_974_709e-3,
753 7.950_060_793_812_204e-3,
754 7.934_390_196_873_448e-3,
755 7.918_189_914_299_318e-3,
756 7.901_461_027_591_6e-3,
757 7.884_204_653_540_665e-3,
758 7.866_421_944_151_094e-3,
759 7.848_114_086_564_56e-3,
760 7.829_282_302_980_82e-3,
761 7.809_927_850_575_903e-3,
762 7.790_052_021_418_226e-3,
763 7.769_656_142_382_462e-3,
764 7.748_741_575_060_914e-3,
765 7.727_309_715_672_44e-3,
766 7.705_361_994_969_524e-3,
767 7.682_899_878_142_539e-3,
768 7.659_924_864_722_064e-3,
769 7.636_438_488_478_739e-3,
770 7.612_442_317_320_796e-3,
771 7.587_937_953_189_561_5e-3,
772 7.562_927_031_952_382e-3,
773 7.537_411_223_293_362e-3,
774 7.511_392_230_602_079e-3,
775 7.484_871_790_859_79e-3,
776 7.457_851_674_523_319e-3,
777 7.430_333_685_407_178e-3,
778 7.402_319_660_562_818e-3,
779 7.373_811_470_156_258e-3,
780 7.344_811_017_343_063e-3,
781 7.315_320_238_141_324_5e-3,
782 7.285_341_101_302_512e-3,
783 7.254_875_608_179_984e-3,
784 7.223_925_792_595_309e-3,
785 7.192_493_720_702_486e-3,
786 7.160_581_490_850_321e-3,
787 7.128_191_233_441_844e-3,
788 7.095_325_110_792_439e-3,
789 7.061_985_316_985_506e-3,
790 7.028_174_077_725_734e-3,
791 6.993_893_650_190_702e-3,
792 6.959_146_322_880_146_5e-3,
793 6.923_934_415_463_31e-3,
794 6.888_260_278_623_754e-3,
795 6.852_126_293_902_878e-3,
796 6.815_534_873_540_5e-3,
797 6.778_488_460_314_126e-3,
798 6.740_989_527_375_895e-3,
799 6.703_040_578_086_941e-3,
800 6.664_644_145_851_14e-3,
801 6.625_802_793_945_317e-3,
802 6.586_519_115_348_261e-3,
803 6.546_795_732_567_842_5e-3,
804 6.506_635_297_465_724e-3,
805 6.466_040_491_080_434e-3,
806 6.425_014_023_448_273e-3,
807 6.383_558_633_422_572e-3,
808 6.341_677_088_490_664e-3,
809 6.299_372_184_589_237e-3,
810 6.256_646_745_917_723e-3,
811 6.213_503_624_749_591e-3,
812 6.169_945_701_242_237e-3,
813 6.125_975_883_244_196e-3,
814 6.081_597_106_101_673e-3,
815 6.036_812_332_462_087e-3,
816 5.991_624_552_076_468e-3,
817 5.946_036_781_599_814e-3,
818 5.900_052_064_389_824e-3,
819 5.853_673_470_303_617_4e-3,
820 5.806_904_095_492_818e-3,
821 5.759_747_062_196_925_5e-3,
822 5.712_205_518_534_655e-3,
823 5.664_282_638_294_182e-3,
824 5.615_981_620_720_803e-3,
825 5.567_305_690_303_767e-3,
826 5.518_258_096_560_71e-3,
827 5.468_842_113_820_941e-3,
828 5.419_061_041_006_627e-3,
829 5.368_918_201_412_827e-3,
830 5.318_416_942_485_385e-3,
831 5.267_560_635_597_735e-3,
832 5.216_352_675_825_451e-3,
833 5.164_796_481_720_011e-3,
834 5.112_895_495_080_397e-3,
835 5.060_653_180_723_101_4e-3,
836 5.008_073_026_251_332e-3,
837 4.955_158_541_821_682_4e-3,
838 4.901_913_259_910_197e-3,
839 4.848_340_735_076_109e-3,
840 4.794_444_543_725_102e-3,
841 4.740_228_283_870_022e-3,
842 4.685_695_574_891_041e-3,
843 4.630_850_057_293_894e-3,
844 4.575_695_392_466_791e-3,
845 4.520_235_262_436_235e-3,
846 4.464_473_369_620_78e-3,
847 4.408_413_436_584_285e-3,
848 4.352_059_205_787_275e-3,
849 4.295_414_439_336_925e-3,
850 4.238_482_918_736_289e-3,
851 4.181_268_444_631_281e-3,
852 4.123_774_836_557_6e-3,
853 4.066_005_932_685_269e-3,
854 4.007_965_589_562_678e-3,
855 3.949_657_681_858_895e-3,
856 3.891_086_102_105_193_4e-3,
857 3.832_254_760_435_171e-3,
858 3.773_167_584_323_583_5e-3,
859 3.713_828_518_324_312_5e-3,
860 3.654_241_523_806_987e-3,
861 3.594_410_578_692_452e-3,
862 3.534_339_677_187_348_4e-3,
863 3.474_032_829_517_317e-3,
864 3.413_494_061_659_418_4e-3,
865 3.352_727_415_073_250_3e-3,
866 3.291_736_946_431_361e-3,
867 3.230_526_727_348_174e-3,
868 3.169_100_844_108_32e-3,
869 3.107_463_397_393_755_5e-3,
870 3.045_618_502_010_327_8e-3,
871 2.983_570_286_612_554_5e-3,
872 2.921_322_893_428_515_3e-3,
873 2.858_880_477_983_06e-3,
874 2.796_247_208_820_428e-3,
875 2.733_427_267_226_093_3e-3,
876 2.670_424_846_947_554e-3,
877 2.607_244_153_914_452e-3,
878 2.543_889_405_957_74e-3,
879 2.480_364_832_528_265_6e-3,
880 2.416_674_674_414_340_5e-3,
881 2.352_823_183_458_769e-3,
882 2.288_814_622_274_955e-3,
883 2.224_653_263_962_713e-3,
884 2.160_343_391_822_734_3e-3,
885 2.095_889_299_071_020_6e-3,
886 2.031_295_288_552_398_4e-3,
887 1.966_565_672_453_437e-3,
888 1.901_704_772_014_899_2e-3,
889 1.836_716_917_243_567_5e-3,
890 1.771_606_446_623_834_7e-3,
891 1.706_377_706_828_447_1e-3,
892 1.641_035_052_429_271_5e-3,
893 1.575_582_845_607_936_8e-3,
894 1.510_025_455_865_810_3e-3,
895 1.444_367_259_734_736e-3,
896 1.378_612_640_487_646_8e-3,
897 1.312_765_987_850_66e-3,
898 1.246_831_697_715_441_5e-3,
899 1.180_814_171_855_922e-3,
900 1.114_717_817_647_310_6e-3,
901 1.048_547_047_793_689_5e-3,
902 9.823_062_800_663_463e-4,
903 9.159_999_370_632_641e-4,
904 8.496_324_460_039_209e-4,
905 7.832_082_385_905_168e-4,
906 7.167_317_509_947_801e-4,
907 6.502_074_240_969_948e-4,
908 5.836_397_042_630_135e-4,
909 5.170_330_453_491_649e-4,
910 4.503_919_137_716_827e-4,
911 3.837_208_020_912_921_4e-4,
912 3.170_242_698_112_815e-4,
913 2.503_070_890_844_105e-4,
914 1.835_749_193_551_655_8e-4,
915 1.168_390_665_730_266_3e-4,
916 5.019_410_348_676_869_6e-5,
917];
918
919#[derive(Clone, Copy, Debug, Eq, PartialEq)]
920pub enum ExactCellBranch {
921 Affine,
922 Quartic,
923 Sextic,
924}
925
926#[inline]
943fn effective_branch_tol(cell: DenestedCubicCell) -> f64 {
944 let anchor_scale = cell.c0.abs().max(cell.c1.abs()).max(1.0);
945 NORMALIZED_CELL_BRANCH_TOL * anchor_scale
946}
947
948#[derive(Clone, Copy, Debug, PartialEq)]
949pub struct DenestedCubicCell {
950 pub left: f64,
951 pub right: f64,
952 pub c0: f64,
953 pub c1: f64,
954 pub c2: f64,
955 pub c3: f64,
956}
957
958impl DenestedCubicCell {
959 #[inline]
960 pub fn eta(self, z: f64) -> f64 {
961 self.c0 + self.c1 * z + self.c2 * z * z + self.c3 * z * z * z
962 }
963
964 #[inline]
965 pub fn q(self, z: f64) -> f64 {
966 let eta = self.eta(z);
967 0.5 * (z * z + eta * eta)
968 }
969}
970
971#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
972pub struct CellMomentFingerprint {
973 pub hash: u64,
974 bins: [u64; 6],
975}
976
977#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
978pub struct CellMomentCacheKey {
979 pub fingerprint: CellMomentFingerprint,
980 pub max_degree: usize,
981}
982
983#[derive(Clone, Copy, Debug, Default, PartialEq)]
984pub struct CellMomentDedupStats {
985 pub lookups: u64,
986 pub hits: u64,
987 pub misses: u64,
988}
989
990impl CellMomentDedupStats {
991 #[inline]
992 pub fn hit_rate(self) -> f64 {
993 if self.lookups == 0 {
994 0.0
995 } else {
996 self.hits as f64 / self.lookups as f64
997 }
998 }
999}
1000
1001#[inline]
1002fn splitmix64(x: u64) -> u64 {
1003 gam_linalg::utils::splitmix64_hash(x)
1004}
1005
1006#[inline]
1007fn mix_fingerprint_words(words: &[u64]) -> u64 {
1008 let mut h = 0xcbf2_9ce4_8422_2325u64;
1009 for &word in words {
1010 h ^= splitmix64(word);
1011 h = h.wrapping_mul(0x100_0000_01b3);
1012 }
1013 h
1014}
1015
1016#[inline]
1017fn quantized_cell_word(x: f64, epsilon: f64) -> u64 {
1018 if epsilon == 0.0 || !epsilon.is_finite() || epsilon < 0.0 || !x.is_finite() {
1019 return x.to_bits();
1020 }
1021 (x / epsilon).round().to_bits()
1022}
1023
1024pub fn cell_moment_fingerprint(cell: DenestedCubicCell, epsilon: f64) -> CellMomentFingerprint {
1032 let bins = [
1033 quantized_cell_word(cell.left, epsilon),
1034 quantized_cell_word(cell.right, epsilon),
1035 quantized_cell_word(cell.c0, epsilon),
1036 quantized_cell_word(cell.c1, epsilon),
1037 quantized_cell_word(cell.c2, epsilon),
1038 quantized_cell_word(cell.c3, epsilon),
1039 ];
1040 CellMomentFingerprint {
1041 hash: mix_fingerprint_words(&bins),
1042 bins,
1043 }
1044}
1045
1046#[inline]
1047pub fn cell_moment_cache_key(
1048 cell: DenestedCubicCell,
1049 max_degree: usize,
1050 epsilon: f64,
1051) -> CellMomentCacheKey {
1052 CellMomentCacheKey {
1053 fingerprint: cell_moment_fingerprint(cell, epsilon),
1054 max_degree,
1055 }
1056}
1057
1058#[derive(Clone, Copy, Debug, PartialEq)]
1059pub struct DenestedPartitionCell {
1060 pub cell: DenestedCubicCell,
1061 pub score_span: LocalSpanCubic,
1062 pub link_span: LocalSpanCubic,
1063 pub left_edge: PartitionEdge,
1069 pub right_edge: PartitionEdge,
1070}
1071
1072impl DenestedPartitionCell {}
1073
1074#[derive(Clone, Copy, Debug, PartialEq)]
1076pub enum PartitionEdge {
1077 Fixed(f64),
1080 Crossing { tau: f64 },
1083}
1084
1085impl PartitionEdge {
1086 #[inline]
1088 pub fn z_at(self, a: f64, b: f64) -> f64 {
1089 match self {
1090 Self::Fixed(z) => z,
1091 Self::Crossing { tau } => (tau - a) / b,
1092 }
1093 }
1094}
1095
1096#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
1097struct TailCellMomentCacheKey {
1098 c0_bits: u64,
1099 c1_bits: u64,
1100 endpoint_bits: u64,
1101 side: i8,
1102 max_degree: usize,
1103}
1104
1105const TAIL_CELL_MOMENT_CACHE_MAX_BYTES: usize = 64 * 1024 * 1024;
1106const TAIL_CELL_MOMENT_CACHE_MAX_ENTRIES: usize = 262_144;
1107
1108#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
1109pub struct TailCellMomentCacheStats {
1110 pub hits: usize,
1111 pub misses: usize,
1112 pub entries: usize,
1113}
1114
1115impl TailCellMomentCacheStats {
1116 #[inline]
1117 pub fn requests(self) -> usize {
1118 self.hits + self.misses
1119 }
1120
1121 #[inline]
1122 pub fn hit_rate(self) -> f64 {
1123 let requests = self.requests();
1124 if requests == 0 {
1125 0.0
1126 } else {
1127 self.hits as f64 / requests as f64
1128 }
1129 }
1130}
1131
1132#[derive(Debug)]
1146pub struct TailCellMomentCache {
1147 moments: ByteLruCache<TailCellMomentCacheKey, CellMomentState>,
1148 hits: std::sync::atomic::AtomicUsize,
1149 misses: std::sync::atomic::AtomicUsize,
1150}
1151
1152impl Default for TailCellMomentCache {
1153 fn default() -> Self {
1154 let shard_count = std::thread::available_parallelism()
1158 .map(|workers| workers.get().saturating_mul(8))
1159 .unwrap_or(32)
1160 .clamp(8, 256);
1161 Self {
1162 moments: ByteLruCache::with_max_entries_sharded(
1163 TAIL_CELL_MOMENT_CACHE_MAX_BYTES,
1164 TAIL_CELL_MOMENT_CACHE_MAX_ENTRIES,
1165 shard_count,
1166 ),
1167 hits: std::sync::atomic::AtomicUsize::new(0),
1168 misses: std::sync::atomic::AtomicUsize::new(0),
1169 }
1170 }
1171}
1172
1173impl TailCellMomentCache {
1174 #[inline]
1176 pub fn new() -> Self {
1177 Self::default()
1178 }
1179
1180 #[inline]
1183 pub fn clear(&self) {
1184 self.moments.clear();
1185 self.hits.store(0, std::sync::atomic::Ordering::Relaxed);
1186 self.misses.store(0, std::sync::atomic::Ordering::Relaxed);
1187 }
1188
1189 #[inline]
1191 pub fn stats(&self) -> TailCellMomentCacheStats {
1192 TailCellMomentCacheStats {
1193 hits: self.hits.load(std::sync::atomic::Ordering::Relaxed),
1194 misses: self.misses.load(std::sync::atomic::Ordering::Relaxed),
1195 entries: self.moments.len(),
1196 }
1197 }
1198
1199 pub fn evaluate(
1209 &self,
1210 cell: DenestedCubicCell,
1211 max_degree: usize,
1212 ) -> Result<CellMomentState, String> {
1213 let Some(key) = tail_cell_cache_key(cell, max_degree) else {
1214 return evaluate_cell_moments_uncached(cell, max_degree);
1215 };
1216 if let Some(state) = self.moments.get(&key) {
1217 self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1218 return Ok(state);
1219 }
1220 let state = evaluate_cell_moments_uncached(cell, max_degree)?;
1221 self.misses
1222 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1223 self.moments.insert(key, state.clone());
1224 Ok(state)
1225 }
1226}
1227
1228static TAIL_CELL_MOMENT_CACHE: std::sync::OnceLock<TailCellMomentCache> =
1229 std::sync::OnceLock::new();
1230static TAIL_CELL_MOMENT_CACHE_ENABLED: std::sync::atomic::AtomicBool =
1231 std::sync::atomic::AtomicBool::new(true);
1232
1233fn tail_cell_moment_cache() -> &'static TailCellMomentCache {
1234 TAIL_CELL_MOMENT_CACHE.get_or_init(TailCellMomentCache::default)
1235}
1236
1237#[inline]
1238fn tail_cell_cache_key(
1239 cell: DenestedCubicCell,
1240 max_degree: usize,
1241) -> Option<TailCellMomentCacheKey> {
1242 if cell.c2.abs() > NORMALIZED_CELL_BRANCH_TOL || cell.c3.abs() > NORMALIZED_CELL_BRANCH_TOL {
1243 return None;
1244 }
1245 match (!cell.left.is_finite(), !cell.right.is_finite()) {
1246 (true, false) if cell.right.is_finite() => Some(TailCellMomentCacheKey {
1247 c0_bits: cell.c0.to_bits(),
1248 c1_bits: cell.c1.to_bits(),
1249 endpoint_bits: cell.right.to_bits(),
1250 side: -1,
1251 max_degree,
1252 }),
1253 (false, true) if cell.left.is_finite() => Some(TailCellMomentCacheKey {
1254 c0_bits: cell.c0.to_bits(),
1255 c1_bits: cell.c1.to_bits(),
1256 endpoint_bits: cell.left.to_bits(),
1257 side: 1,
1258 max_degree,
1259 }),
1260 _ => None,
1261 }
1262}
1263
1264pub fn set_tail_cell_moment_cache_enabled(enabled: bool) {
1265 TAIL_CELL_MOMENT_CACHE_ENABLED.store(enabled, std::sync::atomic::Ordering::Relaxed);
1266}
1267
1268pub fn reset_tail_cell_moment_cache() {
1269 tail_cell_moment_cache().clear();
1270}
1271
1272pub fn tail_cell_moment_cache_stats() -> TailCellMomentCacheStats {
1273 tail_cell_moment_cache().stats()
1274}
1275
1276#[derive(Clone, Copy, Debug, Eq)]
1277pub struct CellFingerprint {
1278 c0: u64,
1279 c1: u64,
1280 c2: u64,
1281 c3: u64,
1282 left: u64,
1283 right: u64,
1284}
1285
1286impl CellFingerprint {
1287 #[inline]
1288 pub fn new(cell: DenestedCubicCell) -> Self {
1289 Self {
1290 c0: cell.c0.to_bits(),
1291 c1: cell.c1.to_bits(),
1292 c2: cell.c2.to_bits(),
1293 c3: cell.c3.to_bits(),
1294 left: cell.left.to_bits(),
1295 right: cell.right.to_bits(),
1296 }
1297 }
1298}
1299
1300impl PartialEq for CellFingerprint {
1301 #[inline]
1302 fn eq(&self, other: &Self) -> bool {
1303 self.c0 == other.c0
1304 && self.c1 == other.c1
1305 && self.c2 == other.c2
1306 && self.c3 == other.c3
1307 && self.left == other.left
1308 && self.right == other.right
1309 }
1310}
1311
1312impl Hash for CellFingerprint {
1313 #[inline]
1314 fn hash<H: Hasher>(&self, state: &mut H) {
1315 self.c0.hash(state);
1316 self.c1.hash(state);
1317 self.c2.hash(state);
1318 self.c3.hash(state);
1319 self.left.hash(state);
1320 self.right.hash(state);
1321 }
1322}
1323
1324#[derive(Clone, Debug, Default, PartialEq)]
1325pub struct CachedCellMoments {
1326 state: Option<Arc<CellMomentState>>,
1333 derivative_state: Option<Arc<CellDerivativeMomentState>>,
1340}
1341
1342impl CachedCellMoments {
1343 #[inline]
1344 pub fn new(state: Arc<CellMomentState>) -> Self {
1345 Self {
1346 state: Some(state),
1347 derivative_state: None,
1348 }
1349 }
1350
1351 #[inline]
1352 pub fn new_derivative(state: Arc<CellDerivativeMomentState>) -> Self {
1353 Self {
1354 state: None,
1355 derivative_state: Some(state),
1356 }
1357 }
1358
1359 #[inline]
1360 pub fn state_for_degree(&self, max_degree: usize) -> Option<CellMomentState> {
1361 let state = self.state.as_ref()?;
1362 if state.moments.len().saturating_sub(1) < max_degree {
1363 return None;
1364 }
1365 let mut state = (**state).clone();
1370 state.moments.truncate(max_degree + 1);
1371 Some(state)
1372 }
1373
1374 #[inline]
1375 pub fn derivative_state_for_degree(
1376 &self,
1377 max_degree: usize,
1378 ) -> Option<CellDerivativeMomentState> {
1379 let state = self.derivative_state.as_ref()?;
1380 if state.moments.len().saturating_sub(1) < max_degree {
1381 return None;
1382 }
1383 let mut state = (**state).clone();
1385 state.moments.truncate(max_degree + 1);
1386 Some(state)
1387 }
1388
1389 #[inline]
1390 pub fn with_value(mut self, state: Arc<CellMomentState>) -> Self {
1391 self.state = Some(state);
1392 self
1393 }
1394
1395 #[inline]
1396 pub fn with_derivative(mut self, state: Arc<CellDerivativeMomentState>) -> Self {
1397 self.derivative_state = Some(state);
1398 self
1399 }
1400}
1401
1402impl ResidentBytes for CachedCellMoments {
1403 fn resident_bytes(&self) -> usize {
1404 let value_bytes = self
1405 .state
1406 .as_ref()
1407 .map_or(0, |state| state.resident_bytes());
1408 let derivative_bytes = self
1409 .derivative_state
1410 .as_ref()
1411 .map_or(0, |state| state.resident_bytes());
1412 std::mem::size_of::<Self>()
1413 .saturating_add(value_bytes)
1414 .saturating_add(derivative_bytes)
1415 }
1416}
1417
1418#[derive(Debug, Default)]
1419pub struct CellMomentCacheStats {
1420 hits: AtomicU64,
1421 misses: AtomicU64,
1422}
1423
1424impl CellMomentCacheStats {
1425 #[inline]
1426 pub fn snapshot(&self) -> (u64, u64) {
1427 (
1428 self.hits.load(Ordering::Relaxed),
1429 self.misses.load(Ordering::Relaxed),
1430 )
1431 }
1432
1433 #[inline]
1434 pub fn hit_rate_delta(&self, before: (u64, u64)) -> (u64, u64, f64) {
1435 let (hits, misses) = self.snapshot();
1436 let dh = hits.saturating_sub(before.0);
1437 let dm = misses.saturating_sub(before.1);
1438 let total = dh + dm;
1439 let rate = if total == 0 {
1440 0.0
1441 } else {
1442 dh as f64 / total as f64
1443 };
1444 (dh, dm, rate)
1445 }
1446}
1447
1448pub type CellMomentLruCache = ByteLruCache<CellFingerprint, CachedCellMoments>;
1449
1450pub const CELL_MOMENT_INLINE_CAPACITY: usize = 10;
1451
1452pub type CellMomentVec = SmallVec<[f64; CELL_MOMENT_INLINE_CAPACITY]>;
1453
1454#[derive(Clone, Debug, PartialEq)]
1455pub struct CellMomentState {
1456 pub branch: ExactCellBranch,
1457 pub value: f64,
1458 pub moments: CellMomentVec,
1459}
1460
1461impl ResidentBytes for CellMomentState {
1462 fn resident_bytes(&self) -> usize {
1463 let spilled_bytes = if self.moments.spilled() {
1464 self.moments
1465 .capacity()
1466 .saturating_mul(std::mem::size_of::<f64>())
1467 } else {
1468 0
1469 };
1470 std::mem::size_of::<Self>().saturating_add(spilled_bytes)
1471 }
1472}
1473
1474#[derive(Clone, Debug, PartialEq)]
1475pub struct CellDerivativeMomentState {
1476 pub branch: ExactCellBranch,
1477 pub moments: CellMomentVec,
1478}
1479
1480impl ResidentBytes for CellDerivativeMomentState {
1481 fn resident_bytes(&self) -> usize {
1482 let spilled_bytes = if self.moments.spilled() {
1483 self.moments
1484 .capacity()
1485 .saturating_mul(std::mem::size_of::<f64>())
1486 } else {
1487 0
1488 };
1489 std::mem::size_of::<Self>().saturating_add(spilled_bytes)
1490 }
1491}
1492
1493#[derive(Clone, Copy, Debug, PartialEq)]
1494pub struct CellMomentStateRef<'a> {
1495 pub branch: ExactCellBranch,
1496 pub value: f64,
1497 pub moments: &'a [f64],
1498}
1499
1500#[derive(Clone, Debug)]
1501pub struct CellMomentScratch {
1502 moments: Vec<f64>,
1503}
1504
1505impl Default for CellMomentScratch {
1506 fn default() -> Self {
1507 Self {
1511 moments: Vec::with_capacity(MAX_AFFINE_ANCHOR_DEGREE + 1),
1512 }
1513 }
1514}
1515
1516impl CellMomentScratch {
1517 pub fn new() -> Self {
1518 Self::default()
1519 }
1520
1521 pub fn with_capacity(max_degree: usize) -> Self {
1522 Self {
1523 moments: Vec::with_capacity(max_degree + 1),
1524 }
1525 }
1526
1527 #[inline]
1528 fn prepare_moments(&mut self, len: usize) -> &mut [f64] {
1529 if self.moments.capacity() < len {
1530 CELL_MOMENT_REALLOCS.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1531 self.moments.reserve(len - self.moments.capacity());
1532 }
1533 self.moments.resize(len, 0.0);
1534 self.moments.fill(0.0);
1535 &mut self.moments
1536 }
1537}
1538
1539pub(crate) static CELL_MOMENT_REALLOCS: std::sync::atomic::AtomicUsize =
1543 std::sync::atomic::AtomicUsize::new(0);
1544
1545pub const GL20_NODES: [f64; 20] = [
1552 -0.993_128_599_185_094_9,
1553 -0.963_971_927_277_913_8,
1554 -0.912_234_428_251_326,
1555 -0.839_116_971_822_218_8,
1556 -0.746_331_906_460_150_8,
1557 -0.636_053_680_726_515,
1558 -0.510_867_001_950_827_1,
1559 -0.373_706_088_715_419_6,
1560 -0.227_785_851_141_645_1,
1561 -0.076_526_521_133_497_33,
1562 0.076_526_521_133_497_33,
1563 0.227_785_851_141_645_1,
1564 0.373_706_088_715_419_6,
1565 0.510_867_001_950_827_1,
1566 0.636_053_680_726_515,
1567 0.746_331_906_460_150_8,
1568 0.839_116_971_822_218_8,
1569 0.912_234_428_251_326,
1570 0.963_971_927_277_913_8,
1571 0.993_128_599_185_094_9,
1572];
1573
1574pub const GL20_WEIGHTS: [f64; 20] = [
1576 0.017_614_007_139_152_12,
1577 0.040_601_429_800_386_94,
1578 0.062_672_048_334_109_06,
1579 0.083_276_741_576_704_75,
1580 0.101_930_119_817_240_4,
1581 0.118_194_531_961_518_4,
1582 0.131_688_638_449_176_6,
1583 0.142_096_109_318_382_1,
1584 0.149_172_986_472_603_7,
1585 0.152_753_387_130_725_9,
1586 0.152_753_387_130_725_9,
1587 0.149_172_986_472_603_7,
1588 0.142_096_109_318_382_1,
1589 0.131_688_638_449_176_6,
1590 0.118_194_531_961_518_4,
1591 0.101_930_119_817_240_4,
1592 0.083_276_741_576_704_75,
1593 0.062_672_048_334_109_06,
1594 0.040_601_429_800_386_94,
1595 0.017_614_007_139_152_12,
1596];
1597
1598fn dedup_sorted_tagged_breakpoints(points: &mut Vec<(f64, PartitionEdge)>) {
1604 points.sort_by(|lhs, rhs| {
1605 lhs.0
1606 .partial_cmp(&rhs.0)
1607 .unwrap_or(std::cmp::Ordering::Equal)
1608 });
1609 points.dedup_by(|lhs, rhs| {
1610 let coincide = if lhs.0 == rhs.0 {
1611 true
1612 } else if lhs.0.is_finite() && rhs.0.is_finite() {
1613 (lhs.0 - rhs.0).abs() <= 1e-12
1614 } else {
1615 false
1616 };
1617 if coincide && matches!(lhs.1, PartitionEdge::Fixed(_)) {
1618 rhs.1 = lhs.1;
1621 }
1622 coincide
1623 });
1624}
1625
1626#[inline]
1627pub fn interval_probe_point(left: f64, right: f64) -> Result<f64, String> {
1628 if !(left < right) {
1629 return Err(CubicCellKernelError::invalid_interval(format!(
1630 "interval probe requires ordered bounds, got [{left}, {right}]"
1631 ))
1632 .into());
1633 }
1634 if left.is_finite() && right.is_finite() {
1635 Ok(0.5 * (left + right))
1636 } else if left == f64::NEG_INFINITY && right == f64::INFINITY {
1637 Ok(0.0)
1638 } else if left == f64::NEG_INFINITY && right.is_finite() {
1639 Ok(right - 1.0)
1640 } else if left.is_finite() && right == f64::INFINITY {
1641 Ok(left + 1.0)
1642 } else {
1643 Err(CubicCellKernelError::invalid_interval(format!(
1644 "interval probe requires finite bounds or full infinities, got [{left}, {right}]"
1645 ))
1646 .into())
1647 }
1648}
1649
1650#[inline]
1651pub fn quartic_qprime_coefficients(c0: f64, c1: f64, c2: f64) -> [f64; 4] {
1652 [
1653 c0 * c1,
1654 1.0 + c1 * c1 + 2.0 * c0 * c2,
1655 3.0 * c1 * c2,
1656 2.0 * c2 * c2,
1657 ]
1658}
1659
1660#[inline]
1661pub fn sextic_qprime_coefficients(c0: f64, c1: f64, c2: f64, c3: f64) -> [f64; 6] {
1662 [
1663 c0 * c1,
1664 1.0 + c1 * c1 + 2.0 * c0 * c2,
1665 3.0 * c0 * c3 + 3.0 * c1 * c2,
1666 4.0 * c1 * c3 + 2.0 * c2 * c2,
1667 5.0 * c2 * c3,
1668 3.0 * c3 * c3,
1669 ]
1670}
1671
1672#[inline]
1677fn moment_boundary_term_with_powers(
1678 cell: DenestedCubicCell,
1679 left_pow_n: f64,
1680 right_pow_n: f64,
1681) -> f64 {
1682 let left_term = if cell.left.is_infinite() {
1683 0.0
1684 } else {
1685 left_pow_n * (-cell.q(cell.left)).exp()
1686 };
1687 let right_term = if cell.right.is_infinite() {
1688 0.0
1689 } else {
1690 right_pow_n * (-cell.q(cell.right)).exp()
1691 };
1692 right_term - left_term
1693}
1694
1695#[inline]
1696fn base_moments_match_direct(base: &[f64], direct: &[f64]) -> bool {
1697 base.iter()
1698 .zip(direct.iter())
1699 .all(|(&lhs, &rhs)| (lhs - rhs).abs() <= 1e-10 * (1.0 + lhs.abs().max(rhs.abs())))
1700}
1701
1702#[inline]
1703fn direct_non_affine_moments_if_base_matches(
1704 cell: DenestedCubicCell,
1705 base: &[f64],
1706 max_degree: usize,
1707) -> Option<Vec<f64>> {
1708 if !cell.left.is_finite() || !cell.right.is_finite() {
1709 return None;
1710 }
1711 let (moments, _) = evaluate_non_affine_cell_simd::<false>(cell, max_degree);
1719 if base_moments_match_direct(base, &moments) {
1720 Some(moments.into_vec())
1721 } else {
1722 None
1723 }
1724}
1725
1726pub fn reduce_quartic_moments(
1727 cell: DenestedCubicCell,
1728 base_m0_m2: [f64; 3],
1729 max_degree: usize,
1730) -> Result<Vec<f64>, String> {
1731 if max_degree <= 2 {
1732 return Ok(base_m0_m2[..=max_degree].to_vec());
1733 }
1734 if let Some(moments) = direct_non_affine_moments_if_base_matches(cell, &base_m0_m2, max_degree)
1735 {
1736 return Ok(moments);
1737 }
1738 let d = quartic_qprime_coefficients(cell.c0, cell.c1, cell.c2);
1739 let lead = d[3];
1740 if !lead.is_finite() || lead.abs() <= 1e-18 {
1741 return Err(CubicCellKernelError::invalid_cell_shape(format!(
1742 "quartic moment reduction requires nonzero leading coefficient, got {lead:.3e}"
1743 ))
1744 .into());
1745 }
1746 let mut moments = vec![0.0; max_degree + 1];
1747 moments[0] = base_m0_m2[0];
1748 moments[1] = base_m0_m2[1];
1749 moments[2] = base_m0_m2[2];
1750 let left_finite = cell.left.is_finite();
1755 let right_finite = cell.right.is_finite();
1756 let mut left_pow_n = if left_finite { 1.0 } else { 0.0 };
1757 let mut right_pow_n = if right_finite { 1.0 } else { 0.0 };
1758 for n in 0..=(max_degree - 3) {
1759 let b_n = moment_boundary_term_with_powers(cell, left_pow_n, right_pow_n);
1760 let mut numer = if n == 0 {
1761 0.0
1762 } else {
1763 (n as f64) * moments[n - 1]
1764 };
1765 for j in 0..=2 {
1766 numer -= d[j] * moments[n + j];
1767 }
1768 numer -= b_n;
1769 moments[n + 3] = numer / lead;
1770 if left_finite {
1771 left_pow_n *= cell.left;
1772 }
1773 if right_finite {
1774 right_pow_n *= cell.right;
1775 }
1776 }
1777 Ok(moments)
1778}
1779
1780pub fn reduce_sextic_moments(
1781 cell: DenestedCubicCell,
1782 base_m0_m4: [f64; 5],
1783 max_degree: usize,
1784) -> Result<Vec<f64>, String> {
1785 if max_degree <= 4 {
1786 return Ok(base_m0_m4[..=max_degree].to_vec());
1787 }
1788 if let Some(moments) = direct_non_affine_moments_if_base_matches(cell, &base_m0_m4, max_degree)
1789 {
1790 return Ok(moments);
1791 }
1792 let d = sextic_qprime_coefficients(cell.c0, cell.c1, cell.c2, cell.c3);
1793 let lead = d[5];
1794 if !lead.is_finite() {
1795 return Err(CubicCellKernelError::invalid_cell_shape(format!(
1796 "sextic moment reduction encountered non-finite leading coefficient: {lead:.3e}"
1797 ))
1798 .into());
1799 }
1800 if let Some(lower_branch) = degenerate_sextic_branch(cell, lead)? {
1801 if lower_branch == ExactCellBranch::Quartic {
1802 return evaluate_non_affine_cell_state(
1803 DenestedCubicCell { c3: 0.0, ..cell },
1804 ExactCellBranch::Quartic,
1805 max_degree,
1806 )
1807 .map(|state| state.moments.into_vec());
1808 }
1809 return evaluate_affine_cell_state(
1810 DenestedCubicCell {
1811 left: cell.left,
1812 right: cell.right,
1813 c0: cell.c0,
1814 c1: cell.c1,
1815 c2: 0.0,
1816 c3: 0.0,
1817 },
1818 max_degree,
1819 )
1820 .map(|state| state.moments.into_vec());
1821 }
1822 let mut moments = vec![0.0; max_degree + 1];
1823 for (idx, value) in base_m0_m4.into_iter().enumerate() {
1824 moments[idx] = value;
1825 }
1826 let left_finite = cell.left.is_finite();
1827 let right_finite = cell.right.is_finite();
1828 let mut left_pow_n = if left_finite { 1.0 } else { 0.0 };
1829 let mut right_pow_n = if right_finite { 1.0 } else { 0.0 };
1830 for n in 0..=(max_degree - 5) {
1831 let b_n = moment_boundary_term_with_powers(cell, left_pow_n, right_pow_n);
1832 let mut numer = if n == 0 {
1833 0.0
1834 } else {
1835 (n as f64) * moments[n - 1]
1836 };
1837 for j in 0..=4 {
1838 numer -= d[j] * moments[n + j];
1839 }
1840 numer -= b_n;
1841 moments[n + 5] = numer / lead;
1842 if left_finite {
1843 left_pow_n *= cell.left;
1844 }
1845 if right_finite {
1846 right_pow_n *= cell.right;
1847 }
1848 }
1849 Ok(moments)
1850}
1851
1852#[inline]
1853pub fn cell_first_derivative_from_moments(
1854 derivative_coefficients: &[f64],
1855 moments: &[f64],
1856) -> Result<f64, String> {
1857 let value = moment_dot_with_coefficients(derivative_coefficients, moments, "first derivative")?;
1858 Ok(value * INV_TWO_PI)
1859}
1860
1861#[inline]
1869pub fn cell_first_derivative_required_max_degree(derivative_coefficients: &[f64]) -> usize {
1870 derivative_coefficients.len().saturating_sub(1)
1871}
1872
1873#[inline]
1882pub fn cell_second_derivative_required_max_degree(
1883 first_coefficients_r: &[f64],
1884 first_coefficients_s: &[f64],
1885 second_coefficients_rs: &[f64],
1886) -> usize {
1887 let second_degree = second_coefficients_rs.len().saturating_sub(1);
1888 let product_degree = first_coefficients_r.len().saturating_sub(1)
1889 + first_coefficients_s.len().saturating_sub(1)
1890 + 3;
1891 second_degree.max(product_degree)
1892}
1893
1894#[inline]
1895pub fn cell_polynomial_integral_from_moments(
1896 polynomial_coefficients: &[f64],
1897 moments: &[f64],
1898 label: &str,
1899) -> Result<f64, String> {
1900 let value = moment_dot_with_coefficients(polynomial_coefficients, moments, label)?;
1901 Ok(value * INV_TWO_PI)
1902}
1903
1904#[inline]
1905pub fn cell_second_derivative_from_moments(
1906 cell: DenestedCubicCell,
1907 first_coefficients_r: &[f64],
1908 first_coefficients_s: &[f64],
1909 second_coefficients_rs: &[f64],
1910 moments: &[f64],
1911) -> Result<f64, String> {
1912 let second_degree = second_coefficients_rs.len().saturating_sub(1);
1913 let product_degree = first_coefficients_r.len().saturating_sub(1)
1914 + first_coefficients_s.len().saturating_sub(1)
1915 + 3;
1916 let needed = second_degree.max(product_degree) + 1;
1917 if needed > moments.len() {
1918 return Err(CubicCellKernelError::insufficient_moments(format!(
1919 "insufficient reduced moments for second derivative: need {}, have {}",
1920 needed,
1921 moments.len()
1922 ))
1923 .into());
1924 }
1925 let second_term = moment_dot_with_coefficients_unchecked(second_coefficients_rs, moments);
1926 let cubic = [cell.c0, cell.c1, cell.c2, cell.c3];
1933 const SCRATCH: usize = 32;
1937 let mut eta_r = [0.0_f64; SCRATCH];
1938 let mut eta_rs = [0.0_f64; SCRATCH];
1939 let er_len = poly_conv_into(&cubic, first_coefficients_r, &mut eta_r);
1940 let ers_len = poly_conv_into(&eta_r[..er_len], first_coefficients_s, &mut eta_rs);
1941 let mut eta_term = 0.0;
1942 for k in 0..ers_len {
1943 eta_term = eta_rs[k].mul_add(moments[k], eta_term);
1944 }
1945 Ok((second_term - eta_term) * INV_TWO_PI)
1946}
1947
1948#[inline]
1968pub fn cell_second_derivative_boundary_integrand(
1969 cell: DenestedCubicCell,
1970 first_coefficients_r: &[f64],
1971 first_coefficients_s: &[f64],
1972 second_coefficients_rs: &[f64],
1973 z: f64,
1974) -> f64 {
1975 let eta = cell.eta(z);
1976 let c_r = poly_eval_at(first_coefficients_r, z);
1977 let c_s = poly_eval_at(first_coefficients_s, z);
1978 let c_rs = poly_eval_at(second_coefficients_rs, z);
1979 (c_rs - eta * c_r * c_s) * (-cell.q(z)).exp() * INV_TWO_PI
1980}
1981
1982pub fn cell_density_boundary_integrand(cell: DenestedCubicCell, g: &[f64], z: f64) -> f64 {
1996 poly_eval_at(g, z) * (-cell.q(z)).exp() * INV_TWO_PI
1997}
1998
1999#[inline]
2001fn poly_eval_at(coefficients: &[f64], z: f64) -> f64 {
2002 let mut acc = 0.0_f64;
2003 for &c in coefficients.iter().rev() {
2004 acc = acc.mul_add(z, c);
2005 }
2006 acc
2007}
2008
2009#[inline]
2010fn moment_dot_with_coefficients(
2011 coefficients: &[f64],
2012 moments: &[f64],
2013 label: &str,
2014) -> Result<f64, String> {
2015 if coefficients.len() > moments.len() {
2016 return Err(CubicCellKernelError::insufficient_moments(format!(
2017 "insufficient reduced moments for {label}: need {}, have {}",
2018 coefficients.len(),
2019 moments.len()
2020 ))
2021 .into());
2022 }
2023 Ok(moment_dot_with_coefficients_unchecked(
2024 coefficients,
2025 moments,
2026 ))
2027}
2028
2029#[inline]
2030fn moment_dot_with_coefficients_unchecked(coefficients: &[f64], moments: &[f64]) -> f64 {
2031 let mut acc = 0.0;
2032 for (idx, &coeff) in coefficients.iter().enumerate() {
2033 acc = coeff.mul_add(moments[idx], acc);
2034 }
2035 acc
2036}
2037
2038#[inline]
2048fn poly_conv_into(lhs: &[f64], rhs: &[f64], out: &mut [f64]) -> usize {
2049 if lhs.is_empty() || rhs.is_empty() {
2050 return 0;
2051 }
2052 let len = lhs.len() + rhs.len() - 1;
2053 assert!(out.len() >= len);
2054 for slot in out[..len].iter_mut() {
2055 *slot = 0.0;
2056 }
2057 for (i, &lv) in lhs.iter().enumerate() {
2058 for (j, &rv) in rhs.iter().enumerate() {
2059 out[i + j] = lv.mul_add(rv, out[i + j]);
2060 }
2061 }
2062 len
2063}
2064
2065#[inline]
2066fn require_moments_degree(
2067 required_degree: usize,
2068 moments: &[f64],
2069 label: &str,
2070) -> Result<(), String> {
2071 if required_degree >= moments.len() {
2072 return Err(CubicCellKernelError::insufficient_moments(format!(
2073 "insufficient reduced moments for {label}: need {}, have {}",
2074 required_degree + 1,
2075 moments.len()
2076 ))
2077 .into());
2078 }
2079 Ok::<(), _>(())
2080}
2081
2082#[inline]
2083fn require_scratch_capacity(
2084 required_len: usize,
2085 capacity: usize,
2086 label: &str,
2087) -> Result<(), String> {
2088 if required_len > capacity {
2089 return Err(CubicCellKernelError::insufficient_moments(format!(
2090 "{label} polynomial convolution scratch too small: need {required_len}, have {capacity}"
2091 ))
2092 .into());
2093 }
2094 Ok::<(), _>(())
2095}
2096
2097#[inline]
2098fn convolution_chain_len(lengths: &[usize]) -> usize {
2099 if lengths.is_empty() || lengths.contains(&0) {
2100 0
2101 } else {
2102 lengths.iter().sum::<usize>() - (lengths.len() - 1)
2103 }
2104}
2105
2106#[inline]
2107fn first_coefficients_degree(label: &str, coefficients: &[f64]) -> Result<usize, String> {
2108 coefficients
2109 .len()
2110 .checked_sub(1)
2111 .ok_or_else(|| format!("{label} first-derivative coefficients must be non-empty"))
2112}
2113
2114#[inline]
2115pub fn cell_third_derivative_from_moments(
2116 cell: DenestedCubicCell,
2117 first_coefficients_r: &[f64],
2118 first_coefficients_s: &[f64],
2119 first_coefficients_t: &[f64],
2120 second_coefficients_rs: &[f64],
2121 second_coefficients_rt: &[f64],
2122 second_coefficients_st: &[f64],
2123 third_coefficients_rst: &[f64],
2124 moments: &[f64],
2125) -> Result<f64, String> {
2126 let eta = [cell.c0, cell.c1, cell.c2, cell.c3];
2127 let r_degree = first_coefficients_degree("r", first_coefficients_r)?;
2128 let s_degree = first_coefficients_degree("s", first_coefficients_s)?;
2129 let t_degree = first_coefficients_degree("t", first_coefficients_t)?;
2130 let second_sum_degree = [
2131 second_coefficients_rs.len() + first_coefficients_t.len(),
2132 second_coefficients_rt.len() + first_coefficients_s.len(),
2133 second_coefficients_st.len() + first_coefficients_r.len(),
2134 ]
2135 .into_iter()
2136 .max()
2137 .unwrap_or(0)
2138 .saturating_sub(1);
2139 let triple_product_degree = r_degree + s_degree + t_degree;
2140 let needed = (third_coefficients_rst.len().saturating_sub(1))
2141 .max(3 + second_sum_degree)
2142 .max(6 + triple_product_degree);
2143 require_moments_degree(needed, moments, "third derivative")?;
2144
2145 let third_term = moment_dot_with_coefficients_unchecked(third_coefficients_rst, moments);
2146
2147 const SCRATCH: usize = 32;
2151 let max_linear_conv_len = [
2152 convolution_chain_len(&[
2153 eta.len(),
2154 second_coefficients_rs.len(),
2155 first_coefficients_t.len(),
2156 ]),
2157 convolution_chain_len(&[
2158 eta.len(),
2159 second_coefficients_rt.len(),
2160 first_coefficients_s.len(),
2161 ]),
2162 convolution_chain_len(&[
2163 eta.len(),
2164 second_coefficients_st.len(),
2165 first_coefficients_r.len(),
2166 ]),
2167 ]
2168 .into_iter()
2169 .max()
2170 .unwrap_or(0);
2171 let max_cubic_conv_len = convolution_chain_len(&[
2172 7,
2173 first_coefficients_r.len(),
2174 first_coefficients_s.len(),
2175 first_coefficients_t.len(),
2176 ]);
2177 require_scratch_capacity(
2178 max_linear_conv_len.max(max_cubic_conv_len),
2179 SCRATCH,
2180 "third derivative",
2181 )?;
2182 let mut buf_a = [0.0_f64; SCRATCH];
2183 let mut buf_b = [0.0_f64; SCRATCH];
2184
2185 let mut eta_second_term = 0.0;
2188 let conv_dot = |first: &[f64],
2189 second: &[f64],
2190 buf_a: &mut [f64; SCRATCH],
2191 buf_b: &mut [f64; SCRATCH]|
2192 -> f64 {
2193 let m = poly_conv_into(first, second, buf_a);
2194 let n = poly_conv_into(&eta, &buf_a[..m], buf_b);
2195 let mut acc = 0.0;
2196 for k in 0..n {
2197 acc = buf_b[k].mul_add(moments[k], acc);
2198 }
2199 acc
2200 };
2201 eta_second_term += conv_dot(
2202 second_coefficients_rs,
2203 first_coefficients_t,
2204 &mut buf_a,
2205 &mut buf_b,
2206 );
2207 eta_second_term += conv_dot(
2208 second_coefficients_rt,
2209 first_coefficients_s,
2210 &mut buf_a,
2211 &mut buf_b,
2212 );
2213 eta_second_term += conv_dot(
2214 second_coefficients_st,
2215 first_coefficients_r,
2216 &mut buf_a,
2217 &mut buf_b,
2218 );
2219
2220 let mut eta_sq_minus_one = [0.0_f64; 7];
2223 for (i, &eta_i) in eta.iter().enumerate() {
2224 for (j, &eta_j) in eta.iter().enumerate() {
2225 eta_sq_minus_one[i + j] = eta_i.mul_add(eta_j, eta_sq_minus_one[i + j]);
2226 }
2227 }
2228 eta_sq_minus_one[0] -= 1.0;
2229
2230 let rs_len = poly_conv_into(first_coefficients_r, first_coefficients_s, &mut buf_a);
2231 let rst_len = poly_conv_into(&buf_a[..rs_len], first_coefficients_t, &mut buf_b);
2232 let final_len = poly_conv_into(&eta_sq_minus_one, &buf_b[..rst_len], &mut buf_a);
2234 let mut cubic_coeff_term = 0.0;
2235 for k in 0..final_len {
2236 cubic_coeff_term = buf_a[k].mul_add(moments[k], cubic_coeff_term);
2237 }
2238
2239 Ok((third_term - eta_second_term + cubic_coeff_term) * INV_TWO_PI)
2240}
2241
2242#[inline]
2243pub fn cell_fourth_derivative_from_moments(
2244 cell: DenestedCubicCell,
2245 first_coefficients_r: &[f64],
2246 first_coefficients_s: &[f64],
2247 first_coefficients_t: &[f64],
2248 first_coefficients_u: &[f64],
2249 second_coefficients_rs: &[f64],
2250 second_coefficients_rt: &[f64],
2251 second_coefficients_ru: &[f64],
2252 second_coefficients_st: &[f64],
2253 second_coefficients_su: &[f64],
2254 second_coefficients_tu: &[f64],
2255 third_coefficients_rst: &[f64],
2256 third_coefficients_rsu: &[f64],
2257 third_coefficients_rtu: &[f64],
2258 third_coefficients_stu: &[f64],
2259 fourth_coefficients_rstu: &[f64],
2260 moments: &[f64],
2261) -> Result<f64, String> {
2262 let eta = [cell.c0, cell.c1, cell.c2, cell.c3];
2263 let r_degree = first_coefficients_degree("r", first_coefficients_r)?;
2264 let s_degree = first_coefficients_degree("s", first_coefficients_s)?;
2265 let t_degree = first_coefficients_degree("t", first_coefficients_t)?;
2266 let u_degree = first_coefficients_degree("u", first_coefficients_u)?;
2267 let linear_sum_degree = [
2268 third_coefficients_rst.len() + first_coefficients_u.len(),
2269 third_coefficients_rsu.len() + first_coefficients_t.len(),
2270 third_coefficients_rtu.len() + first_coefficients_s.len(),
2271 third_coefficients_stu.len() + first_coefficients_r.len(),
2272 second_coefficients_rs.len() + second_coefficients_tu.len(),
2273 second_coefficients_rt.len() + second_coefficients_su.len(),
2274 second_coefficients_ru.len() + second_coefficients_st.len(),
2275 ]
2276 .into_iter()
2277 .max()
2278 .unwrap_or(0)
2279 .saturating_sub(1);
2280 let quad_sum_degree = [
2281 second_coefficients_rs.len() + first_coefficients_t.len() + first_coefficients_u.len(),
2282 second_coefficients_rt.len() + first_coefficients_s.len() + first_coefficients_u.len(),
2283 second_coefficients_ru.len() + first_coefficients_s.len() + first_coefficients_t.len(),
2284 second_coefficients_st.len() + first_coefficients_r.len() + first_coefficients_u.len(),
2285 second_coefficients_su.len() + first_coefficients_r.len() + first_coefficients_t.len(),
2286 second_coefficients_tu.len() + first_coefficients_r.len() + first_coefficients_s.len(),
2287 ]
2288 .into_iter()
2289 .max()
2290 .unwrap_or(0)
2291 .saturating_sub(2);
2292 let quartic_product_degree = r_degree + s_degree + t_degree + u_degree;
2293 let needed = (fourth_coefficients_rstu.len().saturating_sub(1))
2294 .max(3 + linear_sum_degree)
2295 .max(6 + quad_sum_degree)
2296 .max(9 + quartic_product_degree);
2297 require_moments_degree(needed, moments, "fourth derivative")?;
2298
2299 let fourth_term = moment_dot_with_coefficients_unchecked(fourth_coefficients_rstu, moments);
2300
2301 const SCRATCH: usize = 32;
2305 let max_linear_conv_len = [
2306 convolution_chain_len(&[
2307 eta.len(),
2308 third_coefficients_rst.len(),
2309 first_coefficients_u.len(),
2310 ]),
2311 convolution_chain_len(&[
2312 eta.len(),
2313 third_coefficients_rsu.len(),
2314 first_coefficients_t.len(),
2315 ]),
2316 convolution_chain_len(&[
2317 eta.len(),
2318 third_coefficients_rtu.len(),
2319 first_coefficients_s.len(),
2320 ]),
2321 convolution_chain_len(&[
2322 eta.len(),
2323 third_coefficients_stu.len(),
2324 first_coefficients_r.len(),
2325 ]),
2326 convolution_chain_len(&[
2327 eta.len(),
2328 second_coefficients_rs.len(),
2329 second_coefficients_tu.len(),
2330 ]),
2331 convolution_chain_len(&[
2332 eta.len(),
2333 second_coefficients_rt.len(),
2334 second_coefficients_su.len(),
2335 ]),
2336 convolution_chain_len(&[
2337 eta.len(),
2338 second_coefficients_ru.len(),
2339 second_coefficients_st.len(),
2340 ]),
2341 ]
2342 .into_iter()
2343 .max()
2344 .unwrap_or(0);
2345 let max_quad_conv_len = [
2346 convolution_chain_len(&[
2347 7,
2348 second_coefficients_rs.len(),
2349 first_coefficients_t.len(),
2350 first_coefficients_u.len(),
2351 ]),
2352 convolution_chain_len(&[
2353 7,
2354 second_coefficients_rt.len(),
2355 first_coefficients_s.len(),
2356 first_coefficients_u.len(),
2357 ]),
2358 convolution_chain_len(&[
2359 7,
2360 second_coefficients_ru.len(),
2361 first_coefficients_s.len(),
2362 first_coefficients_t.len(),
2363 ]),
2364 convolution_chain_len(&[
2365 7,
2366 second_coefficients_st.len(),
2367 first_coefficients_r.len(),
2368 first_coefficients_u.len(),
2369 ]),
2370 convolution_chain_len(&[
2371 7,
2372 second_coefficients_su.len(),
2373 first_coefficients_r.len(),
2374 first_coefficients_t.len(),
2375 ]),
2376 convolution_chain_len(&[
2377 7,
2378 second_coefficients_tu.len(),
2379 first_coefficients_r.len(),
2380 first_coefficients_s.len(),
2381 ]),
2382 ]
2383 .into_iter()
2384 .max()
2385 .unwrap_or(0);
2386 let max_quartic_conv_len = convolution_chain_len(&[
2387 10,
2388 first_coefficients_r.len(),
2389 first_coefficients_s.len(),
2390 first_coefficients_t.len(),
2391 first_coefficients_u.len(),
2392 ]);
2393 require_scratch_capacity(
2394 max_linear_conv_len
2395 .max(max_quad_conv_len)
2396 .max(max_quartic_conv_len),
2397 SCRATCH,
2398 "fourth derivative",
2399 )?;
2400 let mut buf_a = [0.0_f64; SCRATCH];
2401 let mut buf_b = [0.0_f64; SCRATCH];
2402
2403 let conv_eta_dot = |first: &[f64],
2407 second: &[f64],
2408 buf_a: &mut [f64; SCRATCH],
2409 buf_b: &mut [f64; SCRATCH]|
2410 -> f64 {
2411 let m = poly_conv_into(first, second, buf_a);
2412 let n = poly_conv_into(&eta, &buf_a[..m], buf_b);
2413 let mut acc = 0.0;
2414 for k in 0..n {
2415 acc = buf_b[k].mul_add(moments[k], acc);
2416 }
2417 acc
2418 };
2419 let mut eta_linear_term = 0.0;
2420 eta_linear_term += conv_eta_dot(
2421 third_coefficients_rst,
2422 first_coefficients_u,
2423 &mut buf_a,
2424 &mut buf_b,
2425 );
2426 eta_linear_term += conv_eta_dot(
2427 third_coefficients_rsu,
2428 first_coefficients_t,
2429 &mut buf_a,
2430 &mut buf_b,
2431 );
2432 eta_linear_term += conv_eta_dot(
2433 third_coefficients_rtu,
2434 first_coefficients_s,
2435 &mut buf_a,
2436 &mut buf_b,
2437 );
2438 eta_linear_term += conv_eta_dot(
2439 third_coefficients_stu,
2440 first_coefficients_r,
2441 &mut buf_a,
2442 &mut buf_b,
2443 );
2444 eta_linear_term += conv_eta_dot(
2445 second_coefficients_rs,
2446 second_coefficients_tu,
2447 &mut buf_a,
2448 &mut buf_b,
2449 );
2450 eta_linear_term += conv_eta_dot(
2451 second_coefficients_rt,
2452 second_coefficients_su,
2453 &mut buf_a,
2454 &mut buf_b,
2455 );
2456 eta_linear_term += conv_eta_dot(
2457 second_coefficients_ru,
2458 second_coefficients_st,
2459 &mut buf_a,
2460 &mut buf_b,
2461 );
2462
2463 let mut eta_sq_minus_one = [0.0_f64; 7];
2464 for (i, &eta_i) in eta.iter().enumerate() {
2465 for (j, &eta_j) in eta.iter().enumerate() {
2466 eta_sq_minus_one[i + j] = eta_i.mul_add(eta_j, eta_sq_minus_one[i + j]);
2467 }
2468 }
2469 eta_sq_minus_one[0] -= 1.0;
2470
2471 let mut buf_c = [0.0_f64; SCRATCH];
2474 let conv_weighted_triple_dot = |weight: &[f64],
2475 a: &[f64],
2476 b: &[f64],
2477 c: &[f64],
2478 buf_a: &mut [f64; SCRATCH],
2479 buf_b: &mut [f64; SCRATCH],
2480 buf_c: &mut [f64; SCRATCH]|
2481 -> f64 {
2482 let ab_len = poly_conv_into(a, b, buf_a);
2483 let abc_len = poly_conv_into(&buf_a[..ab_len], c, buf_b);
2484 let final_len = poly_conv_into(weight, &buf_b[..abc_len], buf_c);
2485 let mut acc = 0.0;
2486 for k in 0..final_len {
2487 acc = buf_c[k].mul_add(moments[k], acc);
2488 }
2489 acc
2490 };
2491 let mut quad_coeff_term = 0.0;
2492 quad_coeff_term += conv_weighted_triple_dot(
2493 &eta_sq_minus_one,
2494 second_coefficients_rs,
2495 first_coefficients_t,
2496 first_coefficients_u,
2497 &mut buf_a,
2498 &mut buf_b,
2499 &mut buf_c,
2500 );
2501 quad_coeff_term += conv_weighted_triple_dot(
2502 &eta_sq_minus_one,
2503 second_coefficients_rt,
2504 first_coefficients_s,
2505 first_coefficients_u,
2506 &mut buf_a,
2507 &mut buf_b,
2508 &mut buf_c,
2509 );
2510 quad_coeff_term += conv_weighted_triple_dot(
2511 &eta_sq_minus_one,
2512 second_coefficients_ru,
2513 first_coefficients_s,
2514 first_coefficients_t,
2515 &mut buf_a,
2516 &mut buf_b,
2517 &mut buf_c,
2518 );
2519 quad_coeff_term += conv_weighted_triple_dot(
2520 &eta_sq_minus_one,
2521 second_coefficients_st,
2522 first_coefficients_r,
2523 first_coefficients_u,
2524 &mut buf_a,
2525 &mut buf_b,
2526 &mut buf_c,
2527 );
2528 quad_coeff_term += conv_weighted_triple_dot(
2529 &eta_sq_minus_one,
2530 second_coefficients_su,
2531 first_coefficients_r,
2532 first_coefficients_t,
2533 &mut buf_a,
2534 &mut buf_b,
2535 &mut buf_c,
2536 );
2537 quad_coeff_term += conv_weighted_triple_dot(
2538 &eta_sq_minus_one,
2539 second_coefficients_tu,
2540 first_coefficients_r,
2541 first_coefficients_s,
2542 &mut buf_a,
2543 &mut buf_b,
2544 &mut buf_c,
2545 );
2546
2547 let mut eta_sq = [0.0_f64; 7];
2550 for (i, &eta_i) in eta.iter().enumerate() {
2551 for (j, &eta_j) in eta.iter().enumerate() {
2552 eta_sq[i + j] = eta_i.mul_add(eta_j, eta_sq[i + j]);
2553 }
2554 }
2555 let mut cubic_weight = [0.0_f64; 10];
2556 for (i, &eta_sq_i) in eta_sq.iter().enumerate() {
2557 for (j, &eta_j) in eta.iter().enumerate() {
2558 cubic_weight[i + j] = (-eta_sq_i).mul_add(eta_j, cubic_weight[i + j]);
2559 }
2560 }
2561 for (idx, &eta_coeff) in eta.iter().enumerate() {
2562 cubic_weight[idx] += 3.0 * eta_coeff;
2563 }
2564
2565 let rs_len = poly_conv_into(first_coefficients_r, first_coefficients_s, &mut buf_a);
2570 let rst_len = poly_conv_into(&buf_a[..rs_len], first_coefficients_t, &mut buf_b);
2571 let rstu_len = poly_conv_into(&buf_b[..rst_len], first_coefficients_u, &mut buf_a);
2572 let final_len = poly_conv_into(&cubic_weight, &buf_a[..rstu_len], &mut buf_b);
2573 let mut quartic_coeff_term = 0.0;
2574 for k in 0..final_len {
2575 quartic_coeff_term = buf_b[k].mul_add(moments[k], quartic_coeff_term);
2576 }
2577
2578 Ok((fourth_term - eta_linear_term + quad_coeff_term + quartic_coeff_term) * INV_TWO_PI)
2579}
2580
2581#[inline]
2582pub fn global_cubic_from_local(span: LocalSpanCubic) -> (f64, f64, f64, f64) {
2583 let left = span.left;
2584 let q0 = span.c0 - span.c1 * left + span.c2 * left * left - span.c3 * left * left * left;
2585 let q1 = span.c1 - 2.0 * span.c2 * left + 3.0 * span.c3 * left * left;
2586 let q2 = span.c2 - 3.0 * span.c3 * left;
2587 let q3 = span.c3;
2588 (q0, q1, q2, q3)
2589}
2590
2591#[inline]
2615pub fn transformed_link_cubic(link_span: LocalSpanCubic, a: f64, b: f64) -> (f64, f64, f64, f64) {
2616 let shift = a - link_span.left;
2617 let d0 = link_span.c0
2618 + link_span.c1 * shift
2619 + link_span.c2 * shift * shift
2620 + link_span.c3 * shift * shift * shift;
2621 let d1 = b * (link_span.c1 + 2.0 * link_span.c2 * shift + 3.0 * link_span.c3 * shift * shift);
2622 let d2 = b * b * (link_span.c2 + 3.0 * link_span.c3 * shift);
2623 let d3 = link_span.c3 * b * b * b;
2624 (d0, d1, d2, d3)
2625}
2626
2627#[inline]
2628pub fn denested_cell_coefficients(
2629 score_span: LocalSpanCubic,
2630 link_span: LocalSpanCubic,
2631 a: f64,
2632 b: f64,
2633) -> [f64; 4] {
2634 let (h0, h1, h2, h3) = global_cubic_from_local(score_span);
2635 let (d0, d1, d2, d3) = transformed_link_cubic(link_span, a, b);
2636 [a + b * h0 + d0, b + b * h1 + d1, b * h2 + d2, b * h3 + d3]
2637}
2638
2639#[inline]
2640pub fn denested_cell_coefficient_partials(
2641 score_span: LocalSpanCubic,
2642 link_span: LocalSpanCubic,
2643 a: f64,
2644 b: f64,
2645) -> ([f64; 4], [f64; 4]) {
2646 let (h0, h1, h2, h3) = global_cubic_from_local(score_span);
2647 let shift = a - link_span.left;
2648 let alpha1 = link_span.c1;
2649 let alpha2 = link_span.c2;
2650 let alpha3 = link_span.c3;
2651 let dc_da = [
2652 1.0 + alpha1 + 2.0 * alpha2 * shift + 3.0 * alpha3 * shift * shift,
2653 b * (2.0 * alpha2 + 6.0 * alpha3 * shift),
2654 3.0 * alpha3 * b * b,
2655 0.0,
2656 ];
2657 let dc_db = [
2658 h0,
2659 1.0 + h1 + alpha1 + 2.0 * alpha2 * shift + 3.0 * alpha3 * shift * shift,
2660 h2 + 2.0 * b * (alpha2 + 3.0 * alpha3 * shift),
2661 h3 + 3.0 * alpha3 * b * b,
2662 ];
2663 (dc_da, dc_db)
2664}
2665
2666#[inline]
2667fn link_cubic_second_partials(
2668 link_span: LocalSpanCubic,
2669 a: f64,
2670 b: f64,
2671) -> ([f64; 4], [f64; 4], [f64; 4]) {
2672 let shift = a - link_span.left;
2673 let alpha2 = link_span.c2;
2674 let alpha3 = link_span.c3;
2675 let dc_daa = [
2676 2.0 * alpha2 + 6.0 * alpha3 * shift,
2677 6.0 * alpha3 * b,
2678 0.0,
2679 0.0,
2680 ];
2681 let dc_dab = [
2682 0.0,
2683 2.0 * alpha2 + 6.0 * alpha3 * shift,
2684 6.0 * alpha3 * b,
2685 0.0,
2686 ];
2687 let dc_dbb = [
2688 0.0,
2689 0.0,
2690 2.0 * (alpha2 + 3.0 * alpha3 * shift),
2691 6.0 * alpha3 * b,
2692 ];
2693 (dc_daa, dc_dab, dc_dbb)
2694}
2695
2696#[inline]
2697pub fn denested_cell_second_partials(
2698 score_span: LocalSpanCubic,
2699 link_span: LocalSpanCubic,
2700 a: f64,
2701 b: f64,
2702) -> ([f64; 4], [f64; 4], [f64; 4]) {
2703 let score_left = score_span.left;
2704 if !score_left.is_finite() {
2705 return ([f64::NAN; 4], [f64::NAN; 4], [f64::NAN; 4]);
2706 }
2707 link_cubic_second_partials(link_span, a, b)
2708}
2709
2710#[inline]
2711fn link_cubic_third_partials(
2712 link_span: LocalSpanCubic,
2713) -> ([f64; 4], [f64; 4], [f64; 4], [f64; 4]) {
2714 let alpha3 = link_span.c3;
2715 (
2716 [6.0 * alpha3, 0.0, 0.0, 0.0],
2717 [0.0, 6.0 * alpha3, 0.0, 0.0],
2718 [0.0, 0.0, 6.0 * alpha3, 0.0],
2719 [0.0, 0.0, 0.0, 6.0 * alpha3],
2720 )
2721}
2722
2723#[inline]
2724pub fn denested_cell_third_partials(
2725 link_span: LocalSpanCubic,
2726) -> ([f64; 4], [f64; 4], [f64; 4], [f64; 4]) {
2727 link_cubic_third_partials(link_span)
2728}
2729
2730#[inline]
2731pub fn score_basis_cell_coefficients(score_basis_span: LocalSpanCubic, b: f64) -> [f64; 4] {
2732 let (h0, h1, h2, h3) = global_cubic_from_local(score_basis_span);
2733 [b * h0, b * h1, b * h2, b * h3]
2734}
2735
2736#[inline]
2737pub fn link_basis_cell_coefficients(link_basis_span: LocalSpanCubic, a: f64, b: f64) -> [f64; 4] {
2738 let (d0, d1, d2, d3) = transformed_link_cubic(link_basis_span, a, b);
2739 [d0, d1, d2, d3]
2740}
2741
2742#[inline]
2743pub fn link_basis_cell_coefficient_partials(
2744 link_basis_span: LocalSpanCubic,
2745 a: f64,
2746 b: f64,
2747) -> ([f64; 4], [f64; 4]) {
2748 let shift = a - link_basis_span.left;
2749 let alpha1 = link_basis_span.c1;
2750 let alpha2 = link_basis_span.c2;
2751 let alpha3 = link_basis_span.c3;
2752 let dc_da = [
2753 alpha1 + 2.0 * alpha2 * shift + 3.0 * alpha3 * shift * shift,
2754 b * (2.0 * alpha2 + 6.0 * alpha3 * shift),
2755 3.0 * alpha3 * b * b,
2756 0.0,
2757 ];
2758 let dc_db = [
2759 0.0,
2760 alpha1 + 2.0 * alpha2 * shift + 3.0 * alpha3 * shift * shift,
2761 2.0 * b * (alpha2 + 3.0 * alpha3 * shift),
2762 3.0 * alpha3 * b * b,
2763 ];
2764 (dc_da, dc_db)
2765}
2766
2767#[inline]
2768pub fn link_basis_cell_second_partials(
2769 link_basis_span: LocalSpanCubic,
2770 a: f64,
2771 b: f64,
2772) -> ([f64; 4], [f64; 4], [f64; 4]) {
2773 link_cubic_second_partials(link_basis_span, a, b)
2774}
2775
2776#[inline]
2777pub fn link_basis_cell_third_partials(
2778 link_basis_span: LocalSpanCubic,
2779) -> ([f64; 4], [f64; 4], [f64; 4], [f64; 4]) {
2780 link_cubic_third_partials(link_basis_span)
2781}
2782
2783pub fn build_denested_partition_cells<FS, FL>(
2784 a: f64,
2785 b: f64,
2786 score_breaks: &[f64],
2787 link_breaks: &[f64],
2788 score_span_at: FS,
2789 link_span_at: FL,
2790) -> Result<Vec<DenestedPartitionCell>, String>
2791where
2792 FS: FnMut(f64) -> Result<LocalSpanCubic, String>,
2793 FL: FnMut(f64) -> Result<LocalSpanCubic, String>,
2794{
2795 build_denested_partition_cells_with_tails(
2796 a,
2797 b,
2798 score_breaks,
2799 link_breaks,
2800 score_span_at,
2801 link_span_at,
2802 )
2803}
2804
2805pub fn build_denested_partition_cells_with_tails<FS, FL>(
2814 a: f64,
2815 b: f64,
2816 score_breaks: &[f64],
2817 link_breaks: &[f64],
2818 mut score_span_at: FS,
2819 mut link_span_at: FL,
2820) -> Result<Vec<DenestedPartitionCell>, String>
2821where
2822 FS: FnMut(f64) -> Result<LocalSpanCubic, String>,
2823 FL: FnMut(f64) -> Result<LocalSpanCubic, String>,
2824{
2825 let mut split_points: Vec<(f64, PartitionEdge)> = score_breaks
2830 .iter()
2831 .map(|&sigma| (sigma, PartitionEdge::Fixed(sigma)))
2832 .collect();
2833 if b.abs() > 1e-12 {
2834 for &tau in link_breaks {
2835 let z = (tau - a) / b;
2836 if z.is_finite() {
2837 split_points.push((z, PartitionEdge::Crossing { tau }));
2838 }
2839 }
2840 }
2841 dedup_sorted_tagged_breakpoints(&mut split_points);
2842
2843 let mut out = Vec::new();
2844
2845 if split_points.is_empty() {
2846 let score_span = score_span_at(0.0)?;
2847 let link_span = link_span_at(a)?;
2848 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
2849 return Ok(vec![DenestedPartitionCell {
2850 cell: DenestedCubicCell {
2851 left: f64::NEG_INFINITY,
2852 right: f64::INFINITY,
2853 c0: coeffs[0],
2854 c1: coeffs[1],
2855 c2: 0.0,
2856 c3: 0.0,
2857 },
2858 score_span,
2859 link_span,
2860 left_edge: PartitionEdge::Fixed(f64::NEG_INFINITY),
2861 right_edge: PartitionEdge::Fixed(f64::INFINITY),
2862 }]);
2863 }
2864
2865 let (leftmost, leftmost_edge) = split_points[0];
2867 let left_probe = interval_probe_point(f64::NEG_INFINITY, leftmost)?;
2870 let left_score_span = score_span_at(left_probe)?;
2871 let left_link_span = link_span_at(a + b * left_probe)?;
2872 let left_coeffs = denested_cell_coefficients(left_score_span, left_link_span, a, b);
2873 if left_coeffs[2].abs() > NORMALIZED_CELL_BRANCH_TOL
2874 || left_coeffs[3].abs() > NORMALIZED_CELL_BRANCH_TOL
2875 {
2876 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2877 "left tail cell must be affine (deviations constant outside support), \
2878 got c2={:.3e}, c3={:.3e}",
2879 left_coeffs[2], left_coeffs[3]
2880 ))
2881 .into());
2882 }
2883 out.push(DenestedPartitionCell {
2884 cell: DenestedCubicCell {
2885 left: f64::NEG_INFINITY,
2886 right: leftmost,
2887 c0: left_coeffs[0],
2888 c1: left_coeffs[1],
2889 c2: 0.0,
2890 c3: 0.0,
2891 },
2892 score_span: left_score_span,
2893 link_span: left_link_span,
2894 left_edge: PartitionEdge::Fixed(f64::NEG_INFINITY),
2895 right_edge: leftmost_edge,
2896 });
2897
2898 for window in split_points.windows(2) {
2900 let (left, left_edge) = window[0];
2901 let (right, right_edge) = window[1];
2902 if !left.is_finite() || !right.is_finite() || right - left <= 1e-12 {
2903 continue;
2904 }
2905 let mid = interval_probe_point(left, right)?;
2906 let score_span = score_span_at(mid)?;
2907 let link_span = link_span_at(a + b * mid)?;
2908 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
2909 out.push(DenestedPartitionCell {
2910 cell: DenestedCubicCell {
2911 left,
2912 right,
2913 c0: coeffs[0],
2914 c1: coeffs[1],
2915 c2: coeffs[2],
2916 c3: coeffs[3],
2917 },
2918 score_span,
2919 link_span,
2920 left_edge,
2921 right_edge,
2922 });
2923 }
2924
2925 let (rightmost, rightmost_edge) = *split_points.last().unwrap();
2927 let right_probe = interval_probe_point(rightmost, f64::INFINITY)?;
2928 let right_score_span = score_span_at(right_probe)?;
2929 let right_link_span = link_span_at(a + b * right_probe)?;
2930 let right_coeffs = denested_cell_coefficients(right_score_span, right_link_span, a, b);
2931 if right_coeffs[2].abs() > NORMALIZED_CELL_BRANCH_TOL
2932 || right_coeffs[3].abs() > NORMALIZED_CELL_BRANCH_TOL
2933 {
2934 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2935 "right tail cell must be affine (deviations constant outside support), \
2936 got c2={:.3e}, c3={:.3e}",
2937 right_coeffs[2], right_coeffs[3]
2938 ))
2939 .into());
2940 }
2941 out.push(DenestedPartitionCell {
2942 cell: DenestedCubicCell {
2943 left: rightmost,
2944 right: f64::INFINITY,
2945 c0: right_coeffs[0],
2946 c1: right_coeffs[1],
2947 c2: 0.0,
2948 c3: 0.0,
2949 },
2950 score_span: right_score_span,
2951 link_span: right_link_span,
2952 left_edge: rightmost_edge,
2953 right_edge: PartitionEdge::Fixed(f64::INFINITY),
2954 });
2955
2956 Ok(out)
2957}
2958
2959#[inline]
2960pub fn normalized_non_affine_coefficients(
2961 left: f64,
2962 right: f64,
2963 c0: f64,
2964 c1: f64,
2965 c2: f64,
2966 c3: f64,
2967) -> Result<(f64, f64), String> {
2968 let width = right - left;
2969 if !width.is_finite() || width <= 0.0 {
2970 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2971 "normalized cubic coefficients require a positive finite cell width, got left={left}, right={right}"
2972 ))
2973 .into());
2974 }
2975 let anchor_scale = c0.abs() + c1.abs();
2976 if !anchor_scale.is_finite() {
2977 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2978 "normalized cubic coefficients require finite affine coefficients, got c0={c0}, c1={c1}"
2979 ))
2980 .into());
2981 }
2982 let mid = 0.5 * (left + right);
2983 let half = 0.5 * width;
2984 let k2 = half * half * (c2 + 3.0 * c3 * mid);
2985 let k3 = c3 * half * half * half;
2986 Ok((k2, k3))
2987}
2988
2989#[inline]
2990pub fn branch_cell(cell: DenestedCubicCell) -> Result<ExactCellBranch, String> {
2991 let tol = effective_branch_tol(cell);
2992 if !cell.left.is_finite() || !cell.right.is_finite() {
2993 if cell.c2.abs() <= tol && cell.c3.abs() <= tol {
2994 return Ok(ExactCellBranch::Affine);
2995 }
2996 return Err(CubicCellKernelError::invalid_cell_shape(format!(
2997 "non-affine cells require finite bounds, got [{}, {}] with c2={:.6e}, c3={:.6e}",
2998 cell.left, cell.right, cell.c2, cell.c3
2999 ))
3000 .into());
3001 }
3002 let (k2, k3) = normalized_non_affine_coefficients(
3003 cell.left, cell.right, cell.c0, cell.c1, cell.c2, cell.c3,
3004 )?;
3005 if k2.abs() <= tol && k3.abs() <= tol {
3006 Ok(ExactCellBranch::Affine)
3007 } else if k3.abs() <= tol {
3008 Ok(ExactCellBranch::Quartic)
3009 } else {
3010 Ok(ExactCellBranch::Sextic)
3011 }
3012}
3013
3014#[inline]
3015fn degenerate_sextic_branch(
3016 cell: DenestedCubicCell,
3017 lead: f64,
3018) -> Result<Option<ExactCellBranch>, String> {
3019 let (normalized_k2, normalized_k3) = normalized_non_affine_coefficients(
3023 cell.left, cell.right, cell.c0, cell.c1, cell.c2, cell.c3,
3024 )?;
3025 if normalized_k3.abs() > NORMALIZED_CELL_BRANCH_TOL && lead.abs() > 1e-18 {
3026 return Ok(None);
3027 }
3028 if normalized_k2.abs() > NORMALIZED_CELL_BRANCH_TOL {
3029 Ok(Some(ExactCellBranch::Quartic))
3030 } else {
3031 Ok(Some(ExactCellBranch::Affine))
3032 }
3033}
3034
3035#[inline]
3036fn validate_bvn_args(h: f64, k: f64, rho: f64) -> Result<(), String> {
3037 if !h.is_finite() && !h.is_infinite() {
3038 return Err(CubicCellKernelError::bivariate_normal_domain(
3039 "bivariate normal cdf requires finite or infinite h",
3040 )
3041 .into());
3042 }
3043 if !k.is_finite() && !k.is_infinite() {
3044 return Err(CubicCellKernelError::bivariate_normal_domain(
3045 "bivariate normal cdf requires finite or infinite k",
3046 )
3047 .into());
3048 }
3049 if !rho.is_finite() {
3050 return Err(CubicCellKernelError::bivariate_normal_domain(format!(
3051 "bivariate normal cdf requires finite correlation, got {rho}"
3052 ))
3053 .into());
3054 }
3055 Ok::<(), _>(())
3056}
3057
3058#[inline]
3059fn bvn_gl_sum(h: f64, k: f64, rho_clamped: f64, asr: f64) -> f64 {
3060 if rho_clamped == 0.0 {
3067 return 0.0;
3068 }
3069 let hs = 0.5 * (h * h + k * k);
3070 let hk = h * k;
3071 let half_asr = 0.5 * asr;
3072 let (sin_mid, cos_mid) = half_asr.sin_cos();
3073 let mut sum = 0.0;
3074 for i in 0..10 {
3075 let node = GL20_NODES[i].abs();
3076 let weight = GL20_WEIGHTS[i];
3077 let (sin_delta, cos_delta) = (half_asr * node).sin_cos();
3078
3079 let sn_lo = sin_mid * cos_delta - cos_mid * sin_delta;
3080 let one_minus_lo = 1.0 - sn_lo * sn_lo;
3081 let expo_lo = ((sn_lo * hk) - hs) / one_minus_lo;
3082
3083 let sn_hi = sin_mid * cos_delta + cos_mid * sin_delta;
3084 let one_minus_hi = 1.0 - sn_hi * sn_hi;
3085 let expo_hi = ((sn_hi * hk) - hs) / one_minus_hi;
3086
3087 sum += weight * (expo_lo.exp() + expo_hi.exp());
3088 }
3089 sum
3090}
3091
3092pub fn bivariate_normal_cdf(h: f64, k: f64, rho: f64) -> Result<f64, String> {
3093 validate_bvn_args(h, k, rho)?;
3094 if h == f64::NEG_INFINITY || k == f64::NEG_INFINITY {
3095 return Ok(0.0);
3096 }
3097 if h == f64::INFINITY {
3098 return Ok(normal_cdf(k));
3099 }
3100 if k == f64::INFINITY {
3101 return Ok(normal_cdf(h));
3102 }
3103
3104 let rho_clamped = rho.clamp(-1.0, 1.0);
3105 if rho_clamped >= 1.0 - 1e-12 {
3106 return Ok(normal_cdf(h.min(k)));
3107 }
3108 if rho_clamped <= -1.0 + 1e-12 {
3109 return Ok((normal_cdf(h) - normal_cdf(-k)).clamp(0.0, 1.0));
3110 }
3111 if rho_clamped == 0.0 {
3112 return Ok((normal_cdf(h) * normal_cdf(k)).clamp(0.0, 1.0));
3113 }
3114 if h == 0.0 && k == 0.0 {
3115 return Ok((0.25 + rho_clamped.asin() / std::f64::consts::TAU).clamp(0.0, 1.0));
3116 }
3117
3118 let asr = rho_clamped.asin();
3119 let sum = bvn_gl_sum(h, k, rho_clamped, asr);
3120 Ok((normal_cdf(h) * normal_cdf(k) + asr * sum / (4.0 * std::f64::consts::PI)).clamp(0.0, 1.0))
3121}
3122
3123#[inline]
3124fn bvn_gl_sum_interval(h: f64, left: f64, right: f64, rho_clamped: f64, asr: f64) -> f64 {
3125 if rho_clamped == 0.0 {
3126 return 0.0;
3127 }
3128 let h2 = h * h;
3129 let right_hs = 0.5 * (h2 + right * right);
3130 let left_hs = 0.5 * (h2 + left * left);
3131 let half_asr = 0.5 * asr;
3132 let (sin_mid, cos_mid) = half_asr.sin_cos();
3133 let mut sum = 0.0;
3134 for i in 0..10 {
3135 let node = GL20_NODES[i].abs();
3136 let weight = GL20_WEIGHTS[i];
3137 let (sin_delta, cos_delta) = (half_asr * node).sin_cos();
3138
3139 let sn_lo = sin_mid * cos_delta - cos_mid * sin_delta;
3140 let one_minus_lo = 1.0 - sn_lo * sn_lo;
3141 let lo_right = (((sn_lo * h * right) - right_hs) / one_minus_lo).exp();
3142 let lo_left = (((sn_lo * h * left) - left_hs) / one_minus_lo).exp();
3143
3144 let sn_hi = sin_mid * cos_delta + cos_mid * sin_delta;
3145 let one_minus_hi = 1.0 - sn_hi * sn_hi;
3146 let hi_right = (((sn_hi * h * right) - right_hs) / one_minus_hi).exp();
3147 let hi_left = (((sn_hi * h * left) - left_hs) / one_minus_hi).exp();
3148
3149 sum += weight * ((lo_right - lo_left) + (hi_right - hi_left));
3150 }
3151 sum
3152}
3153
3154fn bivariate_normal_cdf_interval(h: f64, left: f64, right: f64, rho: f64) -> Result<f64, String> {
3155 if right <= left {
3156 return Ok(0.0);
3157 }
3158 if left == f64::NEG_INFINITY && right == f64::INFINITY {
3159 return Ok(normal_cdf(h));
3160 }
3161 if !left.is_finite() || !right.is_finite() {
3162 let upper = bivariate_normal_cdf(h, right, rho)?;
3163 let lower = bivariate_normal_cdf(h, left, rho)?;
3164 return Ok((upper - lower).clamp(0.0, 1.0));
3165 }
3166 validate_bvn_args(h, left, rho)?;
3167 validate_bvn_args(h, right, rho)?;
3168 if h == f64::NEG_INFINITY {
3169 return Ok(0.0);
3170 }
3171 if h == f64::INFINITY {
3172 return Ok((normal_cdf(right) - normal_cdf(left)).clamp(0.0, 1.0));
3173 }
3174
3175 let rho_clamped = rho.clamp(-1.0, 1.0);
3176 if rho_clamped >= 1.0 - 1e-12 || rho_clamped <= -1.0 + 1e-12 {
3177 let upper = bivariate_normal_cdf(h, right, rho_clamped)?;
3178 let lower = bivariate_normal_cdf(h, left, rho_clamped)?;
3179 return Ok((upper - lower).clamp(0.0, 1.0));
3180 }
3181
3182 let cdf_h = normal_cdf(h);
3183 let normal_part = cdf_h * (normal_cdf(right) - normal_cdf(left));
3184 if rho_clamped == 0.0 {
3185 return Ok(normal_part.clamp(0.0, 1.0));
3186 }
3187 let asr = rho_clamped.asin();
3188 let sum = bvn_gl_sum_interval(h, left, right, rho_clamped, asr);
3189 Ok((normal_part + asr * sum / (4.0 * std::f64::consts::PI)).clamp(0.0, 1.0))
3190}
3191
3192fn exp_neg_half_square(x: f64) -> f64 {
3193 if x.is_infinite() {
3194 0.0
3195 } else {
3196 (-0.5 * x * x).exp()
3197 }
3198}
3199
3200fn truncated_gaussian_zeroth_moment(a: f64, b: f64) -> f64 {
3240 let inv_sqrt2 = 1.0 / std::f64::consts::SQRT_2;
3241 let za = a * inv_sqrt2;
3242 let zb = b * inv_sqrt2;
3243 let erf_diff = if za >= 0.0 {
3244 libm::erfc(za) - libm::erfc(zb)
3245 } else if zb <= 0.0 {
3246 libm::erfc(-zb) - libm::erfc(-za)
3247 } else {
3248 2.0 - libm::erfc(zb) - libm::erfc(-za)
3249 };
3250 (std::f64::consts::PI / 2.0).sqrt() * erf_diff
3252}
3253
3254fn fill_truncated_gaussian_moments(a: f64, b: f64, out: &mut [f64]) {
3276 if out.is_empty() {
3277 return;
3278 }
3279 out[0] = truncated_gaussian_zeroth_moment(a, b);
3280 if out.len() == 1 {
3281 return;
3282 }
3283 let ea = exp_neg_half_square(a);
3284 let eb = exp_neg_half_square(b);
3285 out[1] = ea - eb;
3286 if out.len() == 2 {
3287 return;
3288 }
3289 let a_finite = a.is_finite();
3290 let b_finite = b.is_finite();
3291 let mut a_pow_n_minus_1 = a; let mut b_pow_n_minus_1 = b;
3299 for n in 2..out.len() {
3300 let left = if a_finite { a_pow_n_minus_1 * ea } else { 0.0 };
3301 let right = if b_finite { b_pow_n_minus_1 * eb } else { 0.0 };
3302 out[n] = left - right + (n as f64 - 1.0) * out[n - 2];
3303 a_pow_n_minus_1 *= a;
3304 b_pow_n_minus_1 *= b;
3305 }
3306}
3307
3308const MAX_AFFINE_ANCHOR_DEGREE: usize = 64;
3313
3314pub fn affine_anchor_moment_vector(
3315 alpha: f64,
3316 beta: f64,
3317 left: f64,
3318 right: f64,
3319 max_degree: usize,
3320) -> Vec<f64> {
3321 let mut out = vec![0.0; max_degree + 1];
3322 affine_anchor_moment_vector_into(alpha, beta, left, right, max_degree, &mut out);
3323 out
3324}
3325
3326fn affine_anchor_moment_vector_into(
3327 alpha: f64,
3328 beta: f64,
3329 left: f64,
3330 right: f64,
3331 max_degree: usize,
3332 out: &mut [f64],
3333) {
3334 assert_eq!(out.len(), max_degree + 1);
3335 let s = (1.0 + beta * beta).sqrt();
3336 let mu = -alpha * beta / (1.0 + beta * beta);
3337 let y_left = if left.is_infinite() {
3338 if left.is_sign_positive() {
3339 f64::INFINITY
3340 } else {
3341 f64::NEG_INFINITY
3342 }
3343 } else {
3344 s * (left - mu)
3345 };
3346 let y_right = if right.is_infinite() {
3347 if right.is_sign_positive() {
3348 f64::INFINITY
3349 } else {
3350 f64::NEG_INFINITY
3351 }
3352 } else {
3353 s * (right - mu)
3354 };
3355 let anchor = (-alpha * alpha / (2.0 * s * s)).exp() / s;
3356 assert!(
3357 max_degree <= MAX_AFFINE_ANCHOR_DEGREE,
3358 "affine_anchor_moment_vector max_degree {} exceeds compile-time bound {}",
3359 max_degree,
3360 MAX_AFFINE_ANCHOR_DEGREE
3361 );
3362 let mut t = [0.0_f64; MAX_AFFINE_ANCHOR_DEGREE + 1];
3363 fill_truncated_gaussian_moments(y_left, y_right, &mut t[..=max_degree]);
3364 let mut mu_pow = [1.0_f64; MAX_AFFINE_ANCHOR_DEGREE + 1];
3370 for k in 1..=max_degree {
3371 mu_pow[k] = mu_pow[k - 1] * mu;
3372 }
3373 let inv_s = 1.0 / s;
3374 let mut inv_s_pow = [1.0_f64; MAX_AFFINE_ANCHOR_DEGREE + 1];
3375 for k in 1..=max_degree {
3376 inv_s_pow[k] = inv_s_pow[k - 1] * inv_s;
3377 }
3378 out.fill(0.0);
3379 for n in 0..=max_degree {
3380 let mut acc = 0.0;
3381 let mut binom = 1.0;
3383 for k in 0..=n {
3384 let term = binom * mu_pow[n - k] * inv_s_pow[k];
3385 acc = term.mul_add(t[k], acc);
3386 if k < n {
3387 binom = binom * (n - k) as f64 / (k + 1) as f64;
3388 }
3389 }
3390 out[n] = anchor * acc;
3391 }
3392}
3393
3394fn affine_value_from_moment_primitive(alpha: f64, beta: f64, left: f64, right: f64) -> f64 {
3395 let s = (1.0 + beta * beta).sqrt();
3407 let h = alpha / s;
3408 let rho = -beta / s;
3409 bivariate_normal_cdf_interval(h, left, right, rho).unwrap_or(0.0)
3410}
3411
3412pub fn evaluate_affine_cell_state(
3419 cell: DenestedCubicCell,
3420 max_degree: usize,
3421) -> Result<CellMomentState, String> {
3422 let alpha = cell.c0;
3423 let beta = cell.c1;
3424 let value = affine_value_from_moment_primitive(alpha, beta, cell.left, cell.right);
3425 let moments = affine_anchor_moment_vector(alpha, beta, cell.left, cell.right, max_degree);
3426 Ok(CellMomentState {
3427 branch: ExactCellBranch::Affine,
3428 value,
3429 moments: moments.into(),
3430 })
3431}
3432
3433fn evaluate_affine_cell_derivative_state(
3434 cell: DenestedCubicCell,
3435 max_degree: usize,
3436) -> Result<CellDerivativeMomentState, String> {
3437 let alpha = cell.c0;
3438 let beta = cell.c1;
3439 let moments = affine_anchor_moment_vector(alpha, beta, cell.left, cell.right, max_degree);
3440 Ok(CellDerivativeMomentState {
3441 branch: ExactCellBranch::Affine,
3442 moments: moments.into(),
3443 })
3444}
3445
3446#[inline]
3453fn accumulate_moments_unrolled4(moments: &mut [f64], mw: f64, z: f64) {
3454 let mut z_pow = 1.0_f64;
3455 for slot in moments.iter_mut() {
3456 *slot = mw.mul_add(z_pow, *slot);
3457 z_pow *= z;
3458 }
3459}
3460
3461#[inline(always)]
3504fn evaluate_non_affine_cell_with_rule<const COMPUTE_VALUE: bool>(
3505 cell: DenestedCubicCell,
3506 max_degree: usize,
3507 gl_nodes: &[f64],
3508 gl_weights: &[f64],
3509) -> (CellMomentVec, f64) {
3510 let mut moments: CellMomentVec = smallvec![0.0_f64; max_degree + 1];
3511 let mut value_integral = 0.0_f64;
3512 let center = 0.5 * (cell.left + cell.right);
3513 let half_width = 0.5 * (cell.right - cell.left);
3514 let c0 = cell.c0;
3515 let c1 = cell.c1;
3516 let c2 = cell.c2;
3517 let c3 = cell.c3;
3518 let moments_slice: &mut [f64] = &mut moments;
3519 assert_eq!(gl_nodes.len(), gl_weights.len());
3520 use wide::f64x4;
3521 let center_v = f64x4::splat(center);
3522 let half_width_v = f64x4::splat(half_width);
3523 let c0_v = f64x4::splat(c0);
3524 let c1_v = f64x4::splat(c1);
3525 let c2_v = f64x4::splat(c2);
3526 let c3_v = f64x4::splat(c3);
3527 let neg_half_v = f64x4::splat(-0.5);
3528 let n_total = gl_nodes.len();
3529 let n_simd = n_total - (n_total % 4);
3530 let mut i = 0;
3531 while i < n_simd {
3532 let node_v = f64x4::from([
3533 gl_nodes[i],
3534 gl_nodes[i + 1],
3535 gl_nodes[i + 2],
3536 gl_nodes[i + 3],
3537 ]);
3538 let weight_v = f64x4::from([
3539 gl_weights[i],
3540 gl_weights[i + 1],
3541 gl_weights[i + 2],
3542 gl_weights[i + 3],
3543 ]);
3544 let z_v = half_width_v.mul_add(node_v, center_v);
3545 let eta_v = c3_v
3547 .mul_add(z_v, c2_v)
3548 .mul_add(z_v, c1_v)
3549 .mul_add(z_v, c0_v);
3550 let z2_v = z_v * z_v;
3551 let neg_q_v = neg_half_v * (z2_v + eta_v * eta_v);
3552 let exp_negq_v = neg_q_v.exp();
3553 let moment_weight_v = weight_v * exp_negq_v;
3554 let z_arr = z_v.to_array();
3555 let mw_arr = moment_weight_v.to_array();
3556 if COMPUTE_VALUE {
3557 for lane in 0..4 {
3558 let z = z_arr[lane];
3559 let mw = mw_arr[lane];
3560 accumulate_moments_unrolled4(moments_slice, mw, z);
3561 let node = gl_nodes[i + lane];
3574 let weight = gl_weights[i + lane];
3575 let z_ref = center + half_width * node;
3576 let eta_ref = c0 + c1 * z_ref + c2 * z_ref * z_ref + c3 * z_ref * z_ref * z_ref;
3577 value_integral += weight * (-0.5 * z_ref * z_ref).exp() * normal_cdf(eta_ref);
3578 }
3579 } else {
3580 for lane in 0..4 {
3581 let z = z_arr[lane];
3582 let mw = mw_arr[lane];
3583 accumulate_moments_unrolled4(moments_slice, mw, z);
3584 }
3585 }
3586 i += 4;
3587 }
3588 while i < n_total {
3589 let node = gl_nodes[i];
3590 let weight = gl_weights[i];
3591 let z = center + half_width * node;
3592 let eta = c3.mul_add(z, c2).mul_add(z, c1).mul_add(z, c0);
3593 let q = 0.5 * (z * z + eta * eta);
3594 let moment_weight = weight * (-q).exp();
3595 accumulate_moments_unrolled4(moments_slice, moment_weight, z);
3596 if COMPUTE_VALUE {
3597 let eta_ref = c0 + c1 * z + c2 * z * z + c3 * z * z * z;
3602 value_integral += weight * (-0.5 * z * z).exp() * normal_cdf(eta_ref);
3603 }
3604 i += 1;
3605 }
3606 for moment in moments_slice.iter_mut() {
3610 *moment *= half_width;
3611 }
3612 let value = if COMPUTE_VALUE {
3613 value_integral * half_width
3614 } else {
3615 value_integral
3616 };
3617 (moments, value)
3618}
3619
3620const NON_AFFINE_LADDER_RTOL: f64 = 1e-15;
3646
3647const NON_AFFINE_LADDER_RUNGS: [usize; 5] = [12, 24, 48, 96, 192];
3650
3651fn non_affine_ladder_rules() -> &'static [(Vec<f64>, Vec<f64>)] {
3658 static RULES: std::sync::OnceLock<Vec<(Vec<f64>, Vec<f64>)>> = std::sync::OnceLock::new();
3659 RULES.get_or_init(|| {
3660 NON_AFFINE_LADDER_RUNGS
3661 .iter()
3662 .map(|&n| gauss_legendre_rule(n))
3663 .collect()
3664 })
3665}
3666
3667fn gauss_legendre_rule(n: usize) -> (Vec<f64>, Vec<f64>) {
3674 let mut nodes = vec![0.0_f64; n];
3675 let mut weights = vec![0.0_f64; n];
3676 for i in 0..n.div_ceil(2) {
3677 let mut z = (std::f64::consts::PI * (i as f64 + 0.75) / (n as f64 + 0.5)).cos();
3678 let mut pp = 0.0_f64;
3679 for _ in 0..100 {
3680 let mut p1 = 1.0_f64;
3682 let mut p2 = 0.0_f64;
3683 for j in 1..=n {
3684 let p3 = p2;
3685 p2 = p1;
3686 p1 = ((2 * j - 1) as f64 * z * p2 - (j - 1) as f64 * p3) / j as f64;
3687 }
3688 pp = n as f64 * (z * p1 - p2) / (z * z - 1.0);
3689 let z_prev = z;
3690 z = z_prev - p1 / pp;
3691 if (z - z_prev).abs() <= f64::EPSILON {
3692 break;
3693 }
3694 }
3695 nodes[i] = -z;
3696 nodes[n - 1 - i] = z;
3697 let w = 2.0 / ((1.0 - z * z) * pp * pp);
3698 weights[i] = w;
3699 weights[n - 1 - i] = w;
3700 }
3701 (nodes, weights)
3702}
3703
3704fn non_affine_ladder_converged(coarse: &CellMomentVec, fine: &CellMomentVec) -> bool {
3719 let mut scale = 0.0_f64;
3720 let mut err = 0.0_f64;
3721 for (&c, &f) in coarse.iter().zip(fine.iter()) {
3722 scale = scale.max(f.abs());
3723 err = err.max((c - f).abs());
3724 }
3725 if !(scale.is_finite() && err.is_finite()) {
3726 return false;
3727 }
3728 err <= NON_AFFINE_LADDER_RTOL * scale
3729}
3730
3731pub(crate) static NON_AFFINE_LADDER_CERT_COUNTS: [AtomicU64; NON_AFFINE_LADDER_RUNGS.len() + 1] = [
3739 AtomicU64::new(0),
3740 AtomicU64::new(0),
3741 AtomicU64::new(0),
3742 AtomicU64::new(0),
3743 AtomicU64::new(0),
3744 AtomicU64::new(0),
3745];
3746
3747pub fn non_affine_ladder_cert_histogram() -> (Vec<(usize, u64)>, u64) {
3750 let per_rung = NON_AFFINE_LADDER_RUNGS
3751 .iter()
3752 .enumerate()
3753 .map(|(i, &n)| (n, NON_AFFINE_LADDER_CERT_COUNTS[i].load(Ordering::Relaxed)))
3754 .collect();
3755 let terminal =
3756 NON_AFFINE_LADDER_CERT_COUNTS[NON_AFFINE_LADDER_RUNGS.len()].load(Ordering::Relaxed);
3757 (per_rung, terminal)
3758}
3759
3760#[inline]
3765fn evaluate_non_affine_cell_simd<const COMPUTE_VALUE: bool>(
3766 cell: DenestedCubicCell,
3767 max_degree: usize,
3768) -> (CellMomentVec, f64) {
3769 let mut prev: Option<(CellMomentVec, f64)> = None;
3770 for (i, (nodes, weights)) in non_affine_ladder_rules().iter().enumerate() {
3771 let cur =
3772 evaluate_non_affine_cell_with_rule::<COMPUTE_VALUE>(cell, max_degree, nodes, weights);
3773 if let Some(prev) = prev.as_ref()
3774 && non_affine_ladder_converged(&prev.0, &cur.0)
3775 {
3776 NON_AFFINE_LADDER_CERT_COUNTS[i].fetch_add(1, Ordering::Relaxed);
3777 return cur;
3778 }
3779 prev = Some(cur);
3780 }
3781 NON_AFFINE_LADDER_CERT_COUNTS[NON_AFFINE_LADDER_RUNGS.len()].fetch_add(1, Ordering::Relaxed);
3782 evaluate_non_affine_cell_with_rule::<COMPUTE_VALUE>(cell, max_degree, &GL_NODES, &GL_WEIGHTS)
3783}
3784
3785fn evaluate_non_affine_cell_value_terminal(cell: DenestedCubicCell) -> f64 {
3805 let center = 0.5 * (cell.left + cell.right);
3806 let half_width = 0.5 * (cell.right - cell.left);
3807 let c0 = cell.c0;
3808 let c1 = cell.c1;
3809 let c2 = cell.c2;
3810 let c3 = cell.c3;
3811 let mut value_integral = 0.0_f64;
3812 for (&node, &weight) in GL_NODES.iter().zip(GL_WEIGHTS.iter()) {
3813 let z = center + half_width * node;
3814 let eta = c0 + c1 * z + c2 * z * z + c3 * z * z * z;
3815 value_integral += weight * (-0.5 * z * z).exp() * normal_cdf(eta);
3816 }
3817 value_integral * half_width
3818}
3819
3820fn evaluate_non_affine_cell_state(
3821 cell: DenestedCubicCell,
3822 branch: ExactCellBranch,
3823 max_degree: usize,
3824) -> Result<CellMomentState, String> {
3825 let (moments, _) = evaluate_non_affine_cell_simd::<false>(cell, max_degree);
3826 let value_integral = evaluate_non_affine_cell_value_terminal(cell);
3827 Ok(CellMomentState {
3832 branch,
3833 value: value_integral / (std::f64::consts::TAU).sqrt(),
3834 moments,
3835 })
3836}
3837
3838fn evaluate_non_affine_cell_derivative_state(
3839 cell: DenestedCubicCell,
3840 branch: ExactCellBranch,
3841 max_degree: usize,
3842) -> Result<CellDerivativeMomentState, String> {
3843 let (moments, _) = evaluate_non_affine_cell_simd::<false>(cell, max_degree);
3844 Ok(CellDerivativeMomentState { branch, moments })
3845}
3846
3847pub fn evaluate_cell_moments(
3853 cell: DenestedCubicCell,
3854 max_degree: usize,
3855) -> Result<CellMomentState, String> {
3856 if !TAIL_CELL_MOMENT_CACHE_ENABLED.load(std::sync::atomic::Ordering::Relaxed) {
3857 return evaluate_cell_moments_uncached(cell, max_degree);
3858 }
3859 tail_cell_moment_cache().evaluate(cell, max_degree)
3860}
3861
3862pub fn evaluate_cell_moments_uncached(
3867 cell: DenestedCubicCell,
3868 max_degree: usize,
3869) -> Result<CellMomentState, String> {
3870 evaluate_cell_state_dispatched(
3871 cell,
3872 max_degree,
3873 evaluate_affine_cell_state,
3874 evaluate_non_affine_cell_state,
3875 )
3876}
3877
3878pub fn evaluate_cell_derivative_moments_uncached(
3885 cell: DenestedCubicCell,
3886 max_degree: usize,
3887) -> Result<CellDerivativeMomentState, String> {
3888 evaluate_cell_state_dispatched(
3889 cell,
3890 max_degree,
3891 evaluate_affine_cell_derivative_state,
3892 evaluate_non_affine_cell_derivative_state,
3893 )
3894}
3895
3896fn evaluate_cell_state_dispatched<S>(
3905 cell: DenestedCubicCell,
3906 max_degree: usize,
3907 affine: fn(DenestedCubicCell, usize) -> Result<S, String>,
3908 non_affine: fn(DenestedCubicCell, ExactCellBranch, usize) -> Result<S, String>,
3909) -> Result<S, String> {
3910 let left_inf = !cell.left.is_finite();
3911 let right_inf = !cell.right.is_finite();
3912 if left_inf || right_inf {
3913 if cell.c2.abs() > NORMALIZED_CELL_BRANCH_TOL || cell.c3.abs() > NORMALIZED_CELL_BRANCH_TOL
3917 {
3918 return Err(CubicCellKernelError::invalid_cell_shape(format!(
3919 "semi-infinite cell [{}, {}] must be affine (c2=c3=0), got c2={:.3e}, c3={:.3e}",
3920 cell.left, cell.right, cell.c2, cell.c3
3921 ))
3922 .into());
3923 }
3924 return affine(cell, max_degree);
3925 }
3926 if cell.right <= cell.left {
3927 return Err(CubicCellKernelError::invalid_cell_shape(format!(
3928 "finite cell must have left < right, got [{}, {}]",
3929 cell.left, cell.right
3930 ))
3931 .into());
3932 }
3933 let branch = branch_cell(cell)?;
3934 if branch == ExactCellBranch::Affine {
3935 return affine(cell, max_degree);
3936 }
3937 if branch == ExactCellBranch::Sextic {
3938 let lead = sextic_qprime_coefficients(cell.c0, cell.c1, cell.c2, cell.c3)[5];
3939 if !lead.is_finite() {
3940 return Err(CubicCellKernelError::invalid_cell_shape(format!(
3941 "sextic cell evaluation encountered non-finite leading coefficient: {lead:.3e}"
3942 ))
3943 .into());
3944 }
3945 if let Some(lower_branch) = degenerate_sextic_branch(cell, lead)? {
3946 return match lower_branch {
3947 ExactCellBranch::Quartic => non_affine(
3948 DenestedCubicCell { c3: 0.0, ..cell },
3949 ExactCellBranch::Quartic,
3950 max_degree,
3951 ),
3952 ExactCellBranch::Affine => affine(
3953 DenestedCubicCell {
3954 c2: 0.0,
3955 c3: 0.0,
3956 ..cell
3957 },
3958 max_degree,
3959 ),
3960 ExactCellBranch::Sextic => Err(CubicCellKernelError::invalid_cell_shape(
3961 "internal: degenerate_sextic_branch returned Sextic as a lowered branch",
3962 )
3963 .into()),
3964 };
3965 }
3966 }
3967 non_affine(cell, branch, max_degree)
3968}
3969
3970pub fn evaluate_cell_moments_cached(
3977 cell: DenestedCubicCell,
3978 max_degree: usize,
3979 cache: &CellMomentLruCache,
3980 stats: Option<&CellMomentCacheStats>,
3981) -> Result<CellMomentState, String> {
3982 if matches!(branch_cell(cell), Ok(ExactCellBranch::Affine)) {
3991 if let Some(stats) = stats {
3992 stats.misses.fetch_add(1, Ordering::Relaxed);
3993 }
3994 return evaluate_cell_moments_uncached(cell, max_degree);
3995 }
3996 let key = CellFingerprint::new(cell);
3997 let existing_derivative = match cache.get(&key) {
3998 Some(cached) => {
3999 if let Some(state) = cached.state_for_degree(max_degree) {
4000 if let Some(stats) = stats {
4001 stats.hits.fetch_add(1, Ordering::Relaxed);
4002 }
4003 return Ok(state);
4004 }
4005 cached.derivative_state.clone()
4009 }
4010 None => None,
4011 };
4012 if let Some(stats) = stats {
4013 stats.misses.fetch_add(1, Ordering::Relaxed);
4014 }
4015 let state = evaluate_cell_moments(cell, max_degree)?;
4016 let shared = Arc::new(state);
4021 let mut entry = CachedCellMoments::new(Arc::clone(&shared));
4022 if let Some(derivative) = existing_derivative {
4023 entry = entry.with_derivative(derivative);
4024 }
4025 cache.insert(key, entry);
4026 Ok(Arc::try_unwrap(shared).unwrap_or_else(|a| (*a).clone()))
4027}
4028
4029pub fn evaluate_cell_derivative_moments_cached(
4035 cell: DenestedCubicCell,
4036 max_degree: usize,
4037 cache: &CellMomentLruCache,
4038 stats: Option<&CellMomentCacheStats>,
4039) -> Result<CellDerivativeMomentState, String> {
4040 if matches!(branch_cell(cell), Ok(ExactCellBranch::Affine)) {
4044 if let Some(stats) = stats {
4045 stats.misses.fetch_add(1, Ordering::Relaxed);
4046 }
4047 return evaluate_cell_derivative_moments_uncached(cell, max_degree);
4048 }
4049 let key = CellFingerprint::new(cell);
4050 let existing_value = match cache.get(&key) {
4051 Some(cached) => {
4052 if let Some(state) = cached.derivative_state_for_degree(max_degree) {
4053 if let Some(stats) = stats {
4054 stats.hits.fetch_add(1, Ordering::Relaxed);
4055 }
4056 return Ok(state);
4057 }
4058 cached.state.clone()
4062 }
4063 None => None,
4064 };
4065 if let Some(stats) = stats {
4066 stats.misses.fetch_add(1, Ordering::Relaxed);
4067 }
4068 let state = evaluate_cell_derivative_moments_uncached(cell, max_degree)?;
4069 let shared = Arc::new(state);
4074 let mut entry = CachedCellMoments::new_derivative(Arc::clone(&shared));
4075 if let Some(value) = existing_value {
4076 entry = entry.with_value(value);
4077 }
4078 cache.insert(key, entry);
4079 Ok(Arc::try_unwrap(shared).unwrap_or_else(|a| (*a).clone()))
4080}
4081
4082pub fn evaluate_cell_moments_with_scratch<'a>(
4089 cell: DenestedCubicCell,
4090 max_degree: usize,
4091 scratch: &'a mut CellMomentScratch,
4092) -> Result<CellMomentStateRef<'a>, String> {
4093 let state = evaluate_cell_moments(cell, max_degree)?;
4094 let out = scratch.prepare_moments(max_degree + 1);
4095 out.copy_from_slice(&state.moments);
4096 Ok(CellMomentStateRef {
4097 branch: state.branch,
4098 value: state.value,
4099 moments: out,
4100 })
4101}
4102
4103#[cfg(test)]
4104mod tests {
4105 use super::*;
4106 use gam_math::probability::normal_pdf;
4107
4108 #[inline]
4116 fn cell_third_derivative_boundary_integrand(
4117 cell: DenestedCubicCell,
4118 first_coefficients_r: &[f64],
4119 first_coefficients_s: &[f64],
4120 first_coefficients_t: &[f64],
4121 second_coefficients_rs: &[f64],
4122 second_coefficients_rt: &[f64],
4123 second_coefficients_st: &[f64],
4124 third_coefficients_rst: &[f64],
4125 z: f64,
4126 ) -> f64 {
4127 let eta = cell.eta(z);
4128 let c_r = poly_eval_at(first_coefficients_r, z);
4129 let c_s = poly_eval_at(first_coefficients_s, z);
4130 let c_t = poly_eval_at(first_coefficients_t, z);
4131 let c_rs = poly_eval_at(second_coefficients_rs, z);
4132 let c_rt = poly_eval_at(second_coefficients_rt, z);
4133 let c_st = poly_eval_at(second_coefficients_st, z);
4134 let c_rst = poly_eval_at(third_coefficients_rst, z);
4135 let amplitude = c_rst - eta * (c_rs * c_t + c_rt * c_s + c_st * c_r)
4136 + (eta * eta - 1.0) * c_r * c_s * c_t;
4137 amplitude * (-cell.q(z)).exp() * INV_TWO_PI
4138 }
4139
4140 #[inline]
4141 pub(super) fn polynomial_value(coefficients: &[f64], z: f64) -> f64 {
4142 coefficients
4143 .iter()
4144 .rev()
4145 .fold(0.0, |acc, &coeff| acc * z + coeff)
4146 }
4147
4148 fn reset_cell_moment_test_reallocs() {
4149 super::CELL_MOMENT_REALLOCS.store(0, std::sync::atomic::Ordering::Relaxed);
4150 }
4151
4152 fn cell_moment_test_reallocs() -> usize {
4153 super::CELL_MOMENT_REALLOCS.load(std::sync::atomic::Ordering::Relaxed)
4154 }
4155
4156 fn assert_close_rel(label: &str, actual: f64, expected: f64, tol: f64) {
4157 let denom = expected.abs().max(1.0);
4158 let rel = (actual - expected).abs() / denom;
4159 assert!(
4160 rel <= tol,
4161 "{label}: actual={actual:.17e} expected={expected:.17e} rel={rel:.3e} tol={tol:.3e}"
4162 );
4163 }
4164
4165 #[test]
4180 fn link_basis_cell_fourth_ab_partials_vanish_third_are_nonzero() {
4181 let span = LocalSpanCubic {
4182 left: -0.4,
4183 right: 1.6,
4184 c0: 0.37,
4185 c1: -0.81,
4186 c2: 0.53,
4187 c3: -0.29,
4188 };
4189 let a0 = 0.23_f64;
4190 let b0 = 0.61_f64;
4191 let h = 1e-2_f64;
4192
4193 let stencil = |order: usize| -> &'static [(i64, f64)] {
4195 match order {
4196 0 => &[(0, 1.0)],
4197 1 => &[(-1, -0.5), (1, 0.5)],
4198 2 => &[(-1, 1.0), (0, -2.0), (1, 1.0)],
4199 3 => &[(-2, -0.5), (-1, 1.0), (1, -1.0), (2, 0.5)],
4200 4 => &[(-2, 1.0), (-1, -4.0), (0, 6.0), (1, -4.0), (2, 1.0)],
4201 _ => &[(0, 1.0)],
4202 }
4203 };
4204 let fd = |k: usize, na: usize, nb: usize| -> f64 {
4206 let mut acc = 0.0;
4207 for &(ia, wa) in stencil(na) {
4208 for &(ib, wb) in stencil(nb) {
4209 let a = a0 + (ia as f64) * h;
4210 let b = b0 + (ib as f64) * h;
4211 acc += wa * wb * link_basis_cell_coefficients(span, a, b)[k];
4212 }
4213 }
4214 acc / h.powi((na + nb) as i32)
4215 };
4216
4217 let (p3_aaa, p3_aab, p3_abb, p3_bbb) = link_basis_cell_third_partials(span);
4218
4219 let mut max_third = 0.0_f64;
4223 for k in 0..4 {
4224 for (label, (na, nb), analytic) in [
4225 ("aaa", (3usize, 0usize), p3_aaa[k]),
4226 ("aab", (2, 1), p3_aab[k]),
4227 ("abb", (1, 2), p3_abb[k]),
4228 ("bbb", (0, 3), p3_bbb[k]),
4229 ] {
4230 let got = fd(k, na, nb);
4231 assert!(
4232 (got - analytic).abs() <= 1e-4 + 1e-3 * analytic.abs(),
4233 "3rd partial {label}[{k}] analytic {analytic:+.6e} vs FD {got:+.6e}"
4234 );
4235 max_third = max_third.max(analytic.abs());
4236 }
4237 }
4238 assert!(
4239 max_third > 1e-1,
4240 "expected an appreciable nonzero 3rd (a,b)-partial; max |analytic| = {max_third:.3e}"
4241 );
4242
4243 for k in 0..4 {
4247 for (na, nb) in [(4usize, 0usize), (3, 1), (2, 2), (1, 3), (0, 4)] {
4248 let got = fd(k, na, nb);
4249 assert!(
4250 got.abs() <= 1e-2,
4251 "4th (a,b)-partial ∂^{na}_a∂^{nb}_b of cell coeff[{k}] must vanish, FD = {got:+.6e}"
4252 );
4253 }
4254 }
4255 }
4256
4257 #[test]
4258 fn non_affine_cell_state_grid_matches_public_cell_moments_reference() {
4259 let cells = [
4260 DenestedCubicCell {
4261 left: -1.25,
4262 right: -0.2,
4263 c0: -0.35,
4264 c1: 0.85,
4265 c2: 0.04,
4266 c3: -0.015,
4267 },
4268 DenestedCubicCell {
4269 left: -0.2,
4270 right: 0.55,
4271 c0: 0.12,
4272 c1: -0.65,
4273 c2: -0.025,
4274 c3: 0.02,
4275 },
4276 DenestedCubicCell {
4277 left: 0.55,
4278 right: 1.6,
4279 c0: 0.42,
4280 c1: 0.35,
4281 c2: 0.018,
4282 c3: 0.012,
4283 },
4284 ];
4285 for cell in cells {
4286 let branch = branch_cell(cell).expect("branch");
4287 assert_ne!(branch, ExactCellBranch::Affine);
4288 for max_degree in [0usize, 2, 4, 9, 16] {
4289 let direct = evaluate_non_affine_cell_state(cell, branch, max_degree)
4290 .expect("direct non-affine transport");
4291 let public = evaluate_cell_moments(cell, max_degree).expect("public evaluator");
4292 assert_eq!(direct.branch, public.branch);
4293 assert_eq!(direct.moments.len(), public.moments.len());
4294 let value_scale = direct.value.abs().max(public.value.abs()).max(1.0);
4295 assert!(
4296 (direct.value - public.value).abs() <= 1e-10 * value_scale,
4297 "value mismatch for {cell:?} degree {max_degree}: direct={} public={}",
4298 direct.value,
4299 public.value
4300 );
4301 for (degree, (lhs, rhs)) in
4302 direct.moments.iter().zip(public.moments.iter()).enumerate()
4303 {
4304 let scale = lhs.abs().max(rhs.abs()).max(1.0);
4305 assert!(
4306 (lhs - rhs).abs() <= 1e-10 * scale,
4307 "moment {degree} mismatch for {cell:?} degree {max_degree}: {lhs} vs {rhs}"
4308 );
4309 }
4310 }
4311 }
4312 }
4313
4314 #[test]
4315 fn affine_tail_cell_memo_matches_uncached_grid_and_records_hits() {
4316 let cache = TailCellMomentCache::new();
4322 let c0s = [-2.0, -0.25, 0.0, 1.5];
4323 let c1s = [-1.2, -0.05, 0.0, 0.8];
4324 let endpoints = [-4.0, -1.0, 0.0, 2.5, 6.0];
4325 let degrees = [0_usize, 4, 9, 16, 24];
4326
4327 for &c0 in &c0s {
4328 for &c1 in &c1s {
4329 for &endpoint in &endpoints {
4330 for &max_degree in °rees {
4331 for &(left, right) in
4332 &[(f64::NEG_INFINITY, endpoint), (endpoint, f64::INFINITY)]
4333 {
4334 let cell = DenestedCubicCell {
4335 left,
4336 right,
4337 c0,
4338 c1,
4339 c2: 0.0,
4340 c3: 0.0,
4341 };
4342 let expected = evaluate_cell_moments_uncached(cell, max_degree)
4343 .expect("uncached affine tail moments");
4344 let actual = cache
4345 .evaluate(cell, max_degree)
4346 .expect("cached affine tail moments miss");
4347 let repeat = cache
4348 .evaluate(cell, max_degree)
4349 .expect("cached affine tail moments hit");
4350 assert_eq!(actual.branch, expected.branch);
4351 assert_eq!(repeat.branch, expected.branch);
4352 assert_close_rel(
4353 "tail value miss",
4354 actual.value,
4355 expected.value,
4356 1e-14,
4357 );
4358 assert_close_rel("tail value hit", repeat.value, expected.value, 1e-14);
4359 assert_eq!(actual.moments.len(), expected.moments.len());
4360 assert_eq!(repeat.moments.len(), expected.moments.len());
4361 for (idx, ((a, r), e)) in actual
4362 .moments
4363 .iter()
4364 .zip(repeat.moments.iter())
4365 .zip(expected.moments.iter())
4366 .enumerate()
4367 {
4368 assert_close_rel(
4369 &format!("tail moment miss[{idx}]"),
4370 *a,
4371 *e,
4372 1e-14,
4373 );
4374 assert_close_rel(&format!("tail moment hit[{idx}]"), *r, *e, 1e-14);
4375 }
4376 }
4377 }
4378 }
4379 }
4380 }
4381
4382 let stats = cache.stats();
4383 assert_eq!(stats.misses, stats.entries);
4384 assert!(
4385 stats.hits >= stats.misses,
4386 "expected repeat hits: {stats:?}"
4387 );
4388 assert!(
4389 stats.hit_rate() >= 0.5,
4390 "unexpected low hit rate: {stats:?}"
4391 );
4392 }
4393
4394 fn reference_bivariate_normal_cdf_20(h: f64, k: f64, rho: f64) -> f64 {
4395 if h == f64::NEG_INFINITY || k == f64::NEG_INFINITY {
4396 return 0.0;
4397 }
4398 if h == f64::INFINITY {
4399 return normal_cdf(k);
4400 }
4401 if k == f64::INFINITY {
4402 return normal_cdf(h);
4403 }
4404 let rho_clamped = rho.clamp(-1.0, 1.0);
4405 if rho_clamped >= 1.0 - 1e-12 {
4406 return normal_cdf(h.min(k));
4407 }
4408 if rho_clamped <= -1.0 + 1e-12 {
4409 return (normal_cdf(h) - normal_cdf(-k)).clamp(0.0, 1.0);
4410 }
4411
4412 let hs = 0.5 * (h * h + k * k);
4413 let asr = rho_clamped.asin();
4414 let mut sum = 0.0;
4415 for (&node, &weight) in GL20_NODES.iter().zip(GL20_WEIGHTS.iter()) {
4416 let sn = (0.5 * asr * (node + 1.0)).sin();
4417 let one_minus = 1.0 - sn * sn;
4418 let expo = ((sn * h * k) - hs) / one_minus;
4419 sum += weight * expo.exp();
4420 }
4421 (normal_cdf(h) * normal_cdf(k) + asr * sum / (4.0 * std::f64::consts::PI)).clamp(0.0, 1.0)
4422 }
4423
4424 #[test]
4425 fn non_affine_cell_state_reference_grid_matches_public_moments() {
4426 let c0s = [-0.4, 0.0, 0.35];
4427 let c1s = [-0.8, 0.25, 1.1];
4428 let c2s = [-0.12, 0.08];
4429 let c3s = [-0.04, 0.03];
4430 let intervals = [(-1.25, -0.2), (-0.5, 0.75), (0.1, 1.4)];
4431 let degrees = [3usize, 6, 9, 12];
4432
4433 for &c0 in &c0s {
4434 for &c1 in &c1s {
4435 for &c2 in &c2s {
4436 for &c3 in &c3s {
4437 for &(left, right) in &intervals {
4438 let cell = DenestedCubicCell {
4439 left,
4440 right,
4441 c0,
4442 c1,
4443 c2,
4444 c3,
4445 };
4446 let branch = branch_cell(cell).expect("branch");
4447 assert_ne!(branch, ExactCellBranch::Affine);
4448 for °ree in °rees {
4449 let direct = evaluate_non_affine_cell_state(cell, branch, degree)
4450 .expect("direct non-affine state");
4451 let public = evaluate_cell_moments(cell, degree)
4452 .expect("public non-affine state");
4453 assert_eq!(direct.branch, public.branch);
4454 let value_scale =
4455 direct.value.abs().max(public.value.abs()).max(1.0);
4456 assert!(
4457 (direct.value - public.value).abs() / value_scale <= 1.0e-15,
4458 "value mismatch for {cell:?}, degree {degree}: direct={:.17e}, public={:.17e}",
4459 direct.value,
4460 public.value
4461 );
4462 assert_eq!(direct.moments.len(), public.moments.len());
4463 for (idx, (&a, &b)) in
4464 direct.moments.iter().zip(public.moments.iter()).enumerate()
4465 {
4466 let scale = a.abs().max(b.abs()).max(1.0);
4467 assert!(
4468 (a - b).abs() / scale <= 1.0e-15,
4469 "moment {idx} mismatch for {cell:?}, degree {degree}: direct={a:.17e}, public={b:.17e}"
4470 );
4471 }
4472 }
4473 }
4474 }
4475 }
4476 }
4477 }
4478 }
4479
4480 #[test]
4481 fn bivariate_normal_cdf_matches_reference_grid_to_1e_minus_10() {
4482 let hs = [-8.0, -5.0, -3.0, -1.5, -0.5, 0.0, 0.25, 1.0, 2.5, 5.0, 8.0];
4483 let ks = [-8.0, -4.0, -2.0, -0.75, 0.0, 0.4, 1.25, 3.0, 6.0, 8.0];
4484 let rhos = [
4485 -0.999_999_999_999,
4486 -0.999,
4487 -0.95,
4488 -0.7,
4489 -0.3,
4490 -1.0e-12,
4491 0.0,
4492 1.0e-12,
4493 0.3,
4494 0.7,
4495 0.95,
4496 0.999,
4497 0.999_999_999_999,
4498 ];
4499 for &h in &hs {
4500 for &k in &ks {
4501 for &rho in &rhos {
4502 let actual = bivariate_normal_cdf(h, k, rho).expect("bvn");
4503 let expected = reference_bivariate_normal_cdf_20(h, k, rho);
4504 let scale = expected.abs().max(1.0e-300);
4505 let rel = (actual - expected).abs() / scale;
4506 assert!(
4507 rel < 1.0e-10 || (actual - expected).abs() < 1.0e-14,
4508 "h={h} k={k} rho={rho} actual={actual:.17e} expected={expected:.17e} rel={rel:.3e}"
4509 );
4510 }
4511 }
4512 }
4513 }
4514
4515 #[test]
4516 fn bivariate_normal_cdf_matches_reference_lcg_property_samples() {
4517 let mut seed = 0x5eed_cafe_f00d_u64;
4518 let mut next_unit = || {
4519 seed = seed.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
4520 ((seed >> 11) as f64) * (1.0 / ((1_u64 << 53) as f64))
4521 };
4522 for _ in 0..4096 {
4523 let h = -8.0 + 16.0 * next_unit();
4524 let k = -8.0 + 16.0 * next_unit();
4525 let rho = -0.999 + 1.998 * next_unit();
4526 let actual = bivariate_normal_cdf(h, k, rho).expect("bvn");
4527 let expected = reference_bivariate_normal_cdf_20(h, k, rho);
4528 let scale = expected.abs().max(1.0e-300);
4529 let rel = (actual - expected).abs() / scale;
4530 assert!(
4531 rel < 1.0e-10 || (actual - expected).abs() < 1.0e-14,
4532 "h={h} k={k} rho={rho} actual={actual:.17e} expected={expected:.17e} rel={rel:.3e}"
4533 );
4534 }
4535 }
4536
4537 #[test]
4538 fn affine_bvn_interval_primitive_matches_two_cdf_difference() {
4539 let hs = [-6.0, -2.0, -0.25, 0.0, 0.8, 3.0, 6.0];
4540 let bounds = [
4541 (-5.0, -2.0),
4542 (-3.0, -0.1),
4543 (-1.0, 0.0),
4544 (-0.25, 0.75),
4545 (0.2, 3.5),
4546 (2.0, 7.0),
4547 ];
4548 let rhos = [-0.98, -0.8, -0.25, 0.0, 0.25, 0.8, 0.98];
4549 for &h in &hs {
4550 for &(left, right) in &bounds {
4551 for &rho in &rhos {
4552 let actual =
4553 bivariate_normal_cdf_interval(h, left, right, rho).expect("interval");
4554 let expected = (reference_bivariate_normal_cdf_20(h, right, rho)
4555 - reference_bivariate_normal_cdf_20(h, left, rho))
4556 .clamp(0.0, 1.0);
4557 let scale = expected.abs().max(1.0e-300);
4558 let rel = (actual - expected).abs() / scale;
4559 assert!(
4560 rel < 1.0e-10 || (actual - expected).abs() < 1.0e-12,
4561 "h={h} left={left} right={right} rho={rho} actual={actual:.17e} expected={expected:.17e} rel={rel:.3e}"
4562 );
4563 }
4564 }
4565 }
4566 }
4567
4568 fn simpson_integral<F>(left: f64, right: f64, steps: usize, f: F) -> f64
4569 where
4570 F: Fn(f64) -> f64,
4571 {
4572 let n = if steps.is_multiple_of(2) {
4573 steps
4574 } else {
4575 steps + 1
4576 };
4577 let h = (right - left) / n as f64;
4578 let mut acc = f(left) + f(right);
4579 for k in 1..n {
4580 let x = left + h * k as f64;
4581 let w = if k % 2 == 0 { 2.0 } else { 4.0 };
4582 acc += w * f(x);
4583 }
4584 acc * h / 3.0
4585 }
4586
4587 #[test]
4588 fn global_transform_preserves_local_span_polynomial() {
4589 let span = LocalSpanCubic {
4590 left: -1.2,
4591 right: 0.8,
4592 c0: 0.3,
4593 c1: -0.25,
4594 c2: 0.11,
4595 c3: -0.04,
4596 };
4597 let (g0, g1, g2, g3) = global_cubic_from_local(span);
4598 for &x in &[-1.2, -0.7, -0.1, 0.4, 0.8] {
4599 let local = span.evaluate(x);
4600 let global = g0 + g1 * x + g2 * x * x + g3 * x * x * x;
4601 assert!((local - global).abs() < 1e-12);
4602 }
4603 }
4604
4605 #[test]
4606 fn bivariate_normal_cdf_independent_factorizes() {
4607 let h = -0.35;
4608 let k = 0.8;
4609 let out = bivariate_normal_cdf(h, k, 0.0).expect("bvn");
4610 let target = normal_cdf(h) * normal_cdf(k);
4611 assert!((out - target).abs() < 1e-12);
4612 }
4613
4614 #[test]
4615 fn evaluate_affine_cell_state_matches_numeric_integrals() {
4616 let cell = DenestedCubicCell {
4617 left: -0.9,
4618 right: 0.8,
4619 c0: 0.15,
4620 c1: -0.35,
4621 c2: 0.0,
4622 c3: 0.0,
4623 };
4624 let state = evaluate_affine_cell_state(cell, 6).expect("affine cell");
4625 let value_numeric = simpson_integral(cell.left, cell.right, 4000, |z| {
4626 super::normal_cdf(cell.eta(z)) * normal_pdf(z)
4627 });
4628 assert_eq!(state.branch, ExactCellBranch::Affine);
4629 assert!((state.value - value_numeric).abs() < 1e-9);
4630 for degree in 0..=6 {
4631 let target = simpson_integral(cell.left, cell.right, 4000, |z| {
4632 z.powi(degree as i32) * (-cell.q(z)).exp()
4633 });
4634 assert!((state.moments[degree] - target).abs() < 1e-9);
4635 }
4636 }
4637
4638 #[test]
4639 fn affine_cell_value_matches_zero_moment_derivative() {
4640 let cell = DenestedCubicCell {
4641 left: -1.1,
4642 right: 0.7,
4643 c0: 0.23,
4644 c1: -0.41,
4645 c2: 0.0,
4646 c3: 0.0,
4647 };
4648 let h = 1e-6;
4649 let plus = evaluate_affine_cell_state(
4650 DenestedCubicCell {
4651 c0: cell.c0 + h,
4652 ..cell
4653 },
4654 0,
4655 )
4656 .expect("affine plus");
4657 let minus = evaluate_affine_cell_state(
4658 DenestedCubicCell {
4659 c0: cell.c0 - h,
4660 ..cell
4661 },
4662 0,
4663 )
4664 .expect("affine minus");
4665 let center = evaluate_affine_cell_state(cell, 0).expect("affine center");
4666 let d_value = (plus.value - minus.value) / (2.0 * h);
4667 let target = INV_TWO_PI * center.moments[0];
4668 assert!((d_value - target).abs() < 1e-8);
4669 }
4670
4671 #[test]
4672 fn coefficient_partials_match_exact_span_derivatives() {
4673 let score_span = LocalSpanCubic {
4674 left: -0.75,
4675 right: 0.25,
4676 c0: 0.08,
4677 c1: -0.03,
4678 c2: 0.02,
4679 c3: -0.01,
4680 };
4681 let link_span = LocalSpanCubic {
4682 left: -0.6,
4683 right: 0.9,
4684 c0: -0.05,
4685 c1: 0.04,
4686 c2: -0.02,
4687 c3: 0.015,
4688 };
4689 let a = 0.3;
4690 let b = -0.7;
4691 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
4692 for &z in &[-0.75, -0.4, -0.1, 0.2] {
4693 let u = a + b * z;
4694 let eta_a = 1.0 + link_span.first_derivative(u);
4695 let eta_b = z + score_span.evaluate(z) + z * link_span.first_derivative(u);
4696 assert!((polynomial_value(&dc_da, z) - eta_a).abs() < 1e-12);
4697 assert!((polynomial_value(&dc_db, z) - eta_b).abs() < 1e-12);
4698 }
4699 }
4700
4701 #[test]
4702 fn second_coefficient_partials_match_exact_span_derivatives() {
4703 let score_span = LocalSpanCubic {
4704 left: -0.75,
4705 right: 0.25,
4706 c0: 0.08,
4707 c1: -0.03,
4708 c2: 0.02,
4709 c3: -0.01,
4710 };
4711 let link_span = LocalSpanCubic {
4712 left: -0.6,
4713 right: 0.9,
4714 c0: -0.05,
4715 c1: 0.04,
4716 c2: -0.02,
4717 c3: 0.015,
4718 };
4719 let a = 0.3;
4720 let b = -0.7;
4721 let second_partials = denested_cell_second_partials(score_span, link_span, a, b);
4722 let dc_daa = second_partials.0;
4723 let dc_dab = second_partials.1;
4724 let dc_dbb = second_partials.2;
4725 for &z in &[-0.75, -0.4, -0.1, 0.2] {
4726 let u = a + b * z;
4727 let eta_aa = link_span.second_derivative(u);
4728 let eta_ab = z * link_span.second_derivative(u);
4729 let eta_bb = z * z * link_span.second_derivative(u);
4730 assert!((polynomial_value(&dc_daa, z) - eta_aa).abs() < 1e-12);
4731 assert!((polynomial_value(&dc_dab, z) - eta_ab).abs() < 1e-12);
4732 assert!((polynomial_value(&dc_dbb, z) - eta_bb).abs() < 1e-12);
4733 }
4734 }
4735
4736 #[test]
4737 fn higher_derivative_moment_helpers_reject_empty_first_coefficients() {
4738 let cell = DenestedCubicCell {
4739 left: -1.0,
4740 right: 1.0,
4741 c0: 0.0,
4742 c1: 1.0,
4743 c2: 0.0,
4744 c3: 0.0,
4745 };
4746 let moments = [1.0; 16];
4747
4748 let third_err = cell_third_derivative_from_moments(
4749 cell,
4750 &[],
4751 &[1.0],
4752 &[1.0],
4753 &[],
4754 &[],
4755 &[],
4756 &[],
4757 &moments,
4758 )
4759 .expect_err("empty first coefficients should be rejected");
4760 assert!(third_err.contains("r first-derivative coefficients must be non-empty"));
4761
4762 let fourth_err = cell_fourth_derivative_from_moments(
4763 cell,
4764 &[1.0],
4765 &[],
4766 &[1.0],
4767 &[1.0],
4768 &[],
4769 &[],
4770 &[],
4771 &[],
4772 &[],
4773 &[],
4774 &[],
4775 &[],
4776 &[],
4777 &[],
4778 &[],
4779 &moments,
4780 )
4781 .expect_err("empty first coefficients should be rejected");
4782 assert!(fourth_err.contains("s first-derivative coefficients must be non-empty"));
4783 }
4784
4785 #[test]
4786 fn fourth_derivative_rejects_overlong_scratch_convolutions() {
4787 let cell = DenestedCubicCell {
4788 left: -1.0,
4789 right: 1.0,
4790 c0: 0.0,
4791 c1: 1.0,
4792 c2: 0.0,
4793 c3: 0.0,
4794 };
4795 let long_first = [1.0; 10];
4796 let zero = [0.0; 1];
4797 let moments = [1.0; 64];
4798
4799 let err = cell_fourth_derivative_from_moments(
4800 cell,
4801 &long_first,
4802 &long_first,
4803 &long_first,
4804 &long_first,
4805 &zero,
4806 &zero,
4807 &zero,
4808 &zero,
4809 &zero,
4810 &zero,
4811 &zero,
4812 &zero,
4813 &zero,
4814 &zero,
4815 &zero,
4816 &moments,
4817 )
4818 .expect_err("oversized convolution should be rejected before writing scratch");
4819 assert!(err.contains("fourth derivative polynomial convolution scratch too small"));
4820 }
4821
4822 #[test]
4823 fn score_and_link_basis_cell_coefficients_match_direct_construction() {
4824 let score_basis_span = LocalSpanCubic {
4825 left: -0.7,
4826 right: 0.4,
4827 c0: 0.2,
4828 c1: -0.04,
4829 c2: 0.03,
4830 c3: -0.01,
4831 };
4832 let link_basis_span = LocalSpanCubic {
4833 left: -0.5,
4834 right: 1.1,
4835 c0: -0.03,
4836 c1: 0.05,
4837 c2: -0.02,
4838 c3: 0.01,
4839 };
4840 let a = 0.25;
4841 let b = -0.8;
4842 let score_coeffs = score_basis_cell_coefficients(score_basis_span, b);
4843 let link_coeffs = link_basis_cell_coefficients(link_basis_span, a, b);
4844 for &z in &[-0.7, -0.1, 0.2, 0.4] {
4845 let score_poly = polynomial_value(&score_coeffs, z);
4846 let link_poly = polynomial_value(&link_coeffs, z);
4847 assert!((score_poly - b * score_basis_span.evaluate(z)).abs() < 1e-12);
4848 assert!((link_poly - link_basis_span.evaluate(a + b * z)).abs() < 1e-12);
4849 }
4850 }
4851
4852 #[test]
4853 fn link_basis_partials_match_exact_span_derivatives() {
4854 let link_basis_span = LocalSpanCubic {
4855 left: -0.5,
4856 right: 1.1,
4857 c0: -0.03,
4858 c1: 0.05,
4859 c2: -0.02,
4860 c3: 0.01,
4861 };
4862 let a = 0.25;
4863 let b = -0.8;
4864 let (dc_da, dc_db) = link_basis_cell_coefficient_partials(link_basis_span, a, b);
4865 let (dc_daa, dc_dab, dc_dbb) = link_basis_cell_second_partials(link_basis_span, a, b);
4866 for &z in &[-0.6, -0.2, 0.15, 0.5] {
4867 let u = a + b * z;
4868 let eta_a = link_basis_span.first_derivative(u);
4869 let eta_b = z * link_basis_span.first_derivative(u);
4870 let eta_aa = link_basis_span.second_derivative(u);
4871 let eta_ab = z * link_basis_span.second_derivative(u);
4872 let eta_bb = z * z * link_basis_span.second_derivative(u);
4873 assert!((polynomial_value(&dc_da, z) - eta_a).abs() < 1e-12);
4874 assert!((polynomial_value(&dc_db, z) - eta_b).abs() < 1e-12);
4875 assert!((polynomial_value(&dc_daa, z) - eta_aa).abs() < 1e-12);
4876 assert!((polynomial_value(&dc_dab, z) - eta_ab).abs() < 1e-12);
4877 assert!((polynomial_value(&dc_dbb, z) - eta_bb).abs() < 1e-12);
4878 }
4879 }
4880
4881 #[test]
4882 fn denested_third_partials_match_exact_span_derivatives() {
4883 let link_span = LocalSpanCubic {
4884 left: -0.6,
4885 right: 0.9,
4886 c0: -0.05,
4887 c1: 0.04,
4888 c2: -0.02,
4889 c3: 0.015,
4890 };
4891 let (dc_daaa, dc_daab, dc_dabb, dc_dbbb) = denested_cell_third_partials(link_span);
4892 let link_third = 6.0 * link_span.c3;
4893 for &z in &[-0.75, -0.4, -0.1, 0.2] {
4894 let eta_aaa = link_third;
4895 let eta_aab = z * link_third;
4896 let eta_abb = z * z * link_third;
4897 let eta_bbb = z * z * z * link_third;
4898 assert!((polynomial_value(&dc_daaa, z) - eta_aaa).abs() < 1e-12);
4899 assert!((polynomial_value(&dc_daab, z) - eta_aab).abs() < 1e-12);
4900 assert!((polynomial_value(&dc_dabb, z) - eta_abb).abs() < 1e-12);
4901 assert!((polynomial_value(&dc_dbbb, z) - eta_bbb).abs() < 1e-12);
4902 }
4903 }
4904
4905 #[test]
4906 fn link_basis_third_partials_match_exact_span_derivatives() {
4907 let link_basis_span = LocalSpanCubic {
4908 left: -0.5,
4909 right: 1.1,
4910 c0: -0.03,
4911 c1: 0.05,
4912 c2: -0.02,
4913 c3: 0.01,
4914 };
4915 let (dc_daaa, dc_daab, dc_dabb, dc_dbbb) = link_basis_cell_third_partials(link_basis_span);
4916 let link_third = 6.0 * link_basis_span.c3;
4917 for &z in &[-0.6, -0.2, 0.15, 0.5] {
4918 let eta_aaa = link_third;
4919 let eta_aab = z * link_third;
4920 let eta_abb = z * z * link_third;
4921 let eta_bbb = z * z * z * link_third;
4922 assert!((polynomial_value(&dc_daaa, z) - eta_aaa).abs() < 1e-12);
4923 assert!((polynomial_value(&dc_daab, z) - eta_aab).abs() < 1e-12);
4924 assert!((polynomial_value(&dc_dabb, z) - eta_abb).abs() < 1e-12);
4925 assert!((polynomial_value(&dc_dbbb, z) - eta_bbb).abs() < 1e-12);
4926 }
4927 }
4928
4929 #[test]
4930 fn branch_selection_uses_normalized_non_affine_coefficients() {
4931 let affine = DenestedCubicCell {
4932 left: -1.0,
4933 right: 1.0,
4934 c0: 0.1,
4935 c1: -0.4,
4936 c2: 1e-13,
4937 c3: -1e-13,
4938 };
4939 let quartic = DenestedCubicCell {
4940 c2: 2e-4,
4941 c3: 1e-13,
4942 ..affine
4943 };
4944 let sextic = DenestedCubicCell {
4945 c2: 2e-4,
4946 c3: 5e-3,
4947 ..affine
4948 };
4949 assert_eq!(branch_cell(affine).unwrap(), ExactCellBranch::Affine);
4950 assert_eq!(branch_cell(quartic).unwrap(), ExactCellBranch::Quartic);
4951 assert_eq!(branch_cell(sextic).unwrap(), ExactCellBranch::Sextic);
4952 }
4953
4954 #[test]
4955 fn affine_anchor_moments_match_whole_line_closed_forms() {
4956 let out = affine_anchor_moment_vector(0.0, 0.0, f64::NEG_INFINITY, f64::INFINITY, 4);
4957 let sqrt_2pi = (2.0 * std::f64::consts::PI).sqrt();
4958 assert!((out[0] - sqrt_2pi).abs() < 1e-12);
4959 assert!(out[1].abs() < 1e-12);
4960 assert!((out[2] - sqrt_2pi).abs() < 1e-12);
4961 }
4962
4963 #[test]
4964 fn affine_anchor_moments_match_shifted_gaussian_whole_line() {
4965 let alpha = 0.7;
4966 let beta = -0.4;
4967 let out = affine_anchor_moment_vector(alpha, beta, f64::NEG_INFINITY, f64::INFINITY, 4);
4968 let s = (1.0 + beta * beta).sqrt();
4969 let mu = -alpha * beta / (1.0 + beta * beta);
4970 let scale = (-alpha * alpha / (2.0 * s * s)).exp() / s;
4971 let sqrt_2pi = (2.0 * std::f64::consts::PI).sqrt();
4972 assert!((out[0] - scale * sqrt_2pi).abs() < 1e-12);
4973 assert!((out[1] - scale * sqrt_2pi * mu).abs() < 1e-12);
4974 assert!((out[2] - scale * sqrt_2pi * (mu * mu + 1.0 / (s * s))).abs() < 1e-10);
4975 }
4976
4977 #[test]
4978 fn quartic_recurrence_reduces_higher_moments() {
4979 let cell = DenestedCubicCell {
4980 left: -1.0,
4981 right: 0.9,
4982 c0: 0.2,
4983 c1: -0.3,
4984 c2: 0.18,
4985 c3: 0.0,
4986 };
4987 let exact = |k: usize| {
4988 simpson_integral(cell.left, cell.right, 2000, |z| {
4989 z.powi(k as i32) * (-cell.q(z)).exp()
4990 })
4991 };
4992 let reduced = reduce_quartic_moments(cell, [exact(0), exact(1), exact(2)], 6)
4993 .expect("quartic reduction");
4994 for k in 0..=6 {
4995 let target = exact(k);
4996 assert!(
4997 (reduced[k] - target).abs() < 1e-7,
4998 "quartic reduced moment M{k} mismatch: {} vs {}",
4999 reduced[k],
5000 target
5001 );
5002 }
5003 }
5004
5005 #[test]
5006 fn sextic_recurrence_reduces_higher_moments() {
5007 let cell = DenestedCubicCell {
5008 left: -0.8,
5009 right: 0.7,
5010 c0: -0.1,
5011 c1: 0.25,
5012 c2: -0.14,
5013 c3: 0.22,
5014 };
5015 let exact = |k: usize| {
5016 simpson_integral(cell.left, cell.right, 3000, |z| {
5017 z.powi(k as i32) * (-cell.q(z)).exp()
5018 })
5019 };
5020 let reduced =
5021 reduce_sextic_moments(cell, [exact(0), exact(1), exact(2), exact(3), exact(4)], 9)
5022 .expect("sextic reduction");
5023 for k in 0..=9 {
5024 let target = exact(k);
5025 assert!(
5026 (reduced[k] - target).abs() < 1e-7,
5027 "sextic reduced moment M{k} mismatch: {} vs {}",
5028 reduced[k],
5029 target
5030 );
5031 }
5032 }
5033
5034 #[test]
5035 fn degenerate_sextic_branch_preserves_quadratic_coefficient() {
5036 let cell = DenestedCubicCell {
5037 left: -1.0,
5038 right: 1.0,
5039 c0: 0.0,
5040 c1: 0.0,
5041 c2: 0.1,
5042 c3: 2.0e-10,
5043 };
5044 assert_eq!(branch_cell(cell).unwrap(), ExactCellBranch::Sextic);
5045
5046 let state = evaluate_cell_moments(cell, 9).expect("degenerate sextic cell");
5047 let quartic_cell = DenestedCubicCell { c3: 0.0, ..cell };
5048 let quartic = evaluate_cell_moments(quartic_cell, 9).expect("quartic cell");
5049 let affine = evaluate_affine_cell_state(
5050 DenestedCubicCell {
5051 c2: 0.0,
5052 c3: 0.0,
5053 ..cell
5054 },
5055 9,
5056 )
5057 .expect("affine cell");
5058
5059 assert_eq!(state.branch, ExactCellBranch::Quartic);
5060 for k in 0..=9 {
5061 assert!(
5062 (state.moments[k] - quartic.moments[k]).abs() < 1e-12,
5063 "lowered moment M{k} should match the quartic cell: {} vs {}",
5064 state.moments[k],
5065 quartic.moments[k]
5066 );
5067 }
5068 assert!(
5069 (state.moments[0] - affine.moments[0]).abs() > 1e-4,
5070 "degenerate sextic handling must not drop the nonzero c2 term"
5071 );
5072 }
5073
5074 #[test]
5075 fn moment_reduced_first_and_second_derivatives_match_numeric_integrals() {
5076 let cell = DenestedCubicCell {
5077 left: -0.9,
5078 right: 0.6,
5079 c0: 0.15,
5080 c1: -0.2,
5081 c2: 0.08,
5082 c3: 0.17,
5083 };
5084 let moments = reduce_sextic_moments(
5085 cell,
5086 [
5087 simpson_integral(cell.left, cell.right, 3000, |z| (-cell.q(z)).exp()),
5088 simpson_integral(cell.left, cell.right, 3000, |z| z * (-cell.q(z)).exp()),
5089 simpson_integral(cell.left, cell.right, 3000, |z| z * z * (-cell.q(z)).exp()),
5090 simpson_integral(cell.left, cell.right, 3000, |z| {
5091 z.powi(3) * (-cell.q(z)).exp()
5092 }),
5093 simpson_integral(cell.left, cell.right, 3000, |z| {
5094 z.powi(4) * (-cell.q(z)).exp()
5095 }),
5096 ],
5097 9,
5098 )
5099 .expect("reduced moments");
5100
5101 let r = [0.7, -0.1, 0.3];
5102 let s = [0.2, 0.5];
5103 let second = [0.4, -0.2, 0.1];
5104 let exact_first = cell_first_derivative_from_moments(&r, &moments).expect("first");
5105 let exact_second =
5106 cell_second_derivative_from_moments(cell, &r, &s, &second, &moments).expect("second");
5107
5108 let numeric_first = simpson_integral(cell.left, cell.right, 3000, |z| {
5109 polynomial_value(&r, z) * (-cell.q(z)).exp() / (2.0 * std::f64::consts::PI)
5110 });
5111 let numeric_second = simpson_integral(cell.left, cell.right, 3000, |z| {
5112 let eta = cell.eta(z);
5113 (polynomial_value(&second, z) - eta * polynomial_value(&r, z) * polynomial_value(&s, z))
5114 * (-cell.q(z)).exp()
5115 / (2.0 * std::f64::consts::PI)
5116 });
5117
5118 assert!((exact_first - numeric_first).abs() < 1e-7);
5119 assert!((exact_second - numeric_second).abs() < 1e-7);
5120 }
5121
5122 #[test]
5123 fn moment_reduced_third_derivative_matches_numeric_integral() {
5124 let cell = DenestedCubicCell {
5125 left: -0.85,
5126 right: 0.7,
5127 c0: -0.12,
5128 c1: 0.18,
5129 c2: 0.09,
5130 c3: -0.11,
5131 };
5132 let moments = evaluate_cell_moments(cell, 12).expect("cell moments");
5133 let r = [0.35, -0.12, 0.08];
5134 let s = [0.17, 0.09];
5135 let t = [-0.21, 0.14, -0.04];
5136 let rs = [0.11, -0.07, 0.05];
5137 let rt = [-0.06, 0.03];
5138 let st = [0.08, -0.02, 0.01];
5139 let rst = [0.04, -0.05, 0.02];
5140
5141 let exact_third = cell_third_derivative_from_moments(
5142 cell,
5143 &r,
5144 &s,
5145 &t,
5146 &rs,
5147 &rt,
5148 &st,
5149 &rst,
5150 &moments.moments,
5151 )
5152 .expect("third derivative");
5153 let numeric_third = simpson_integral(cell.left, cell.right, 4000, |z| {
5154 let eta = cell.eta(z);
5155 let rz = polynomial_value(&r, z);
5156 let sz = polynomial_value(&s, z);
5157 let tz = polynomial_value(&t, z);
5158 let rsz = polynomial_value(&rs, z);
5159 let rtz = polynomial_value(&rt, z);
5160 let stz = polynomial_value(&st, z);
5161 let rstz = polynomial_value(&rst, z);
5162 (rstz - eta * (rsz * tz + rtz * sz + stz * rz) + (eta * eta - 1.0) * rz * sz * tz)
5163 * (-cell.q(z)).exp()
5164 / (2.0 * std::f64::consts::PI)
5165 });
5166
5167 assert!((exact_third - numeric_third).abs() < 1e-7);
5168 }
5169
5170 #[test]
5171 fn moment_reduced_fourth_derivative_matches_numeric_integral() {
5172 let cell = DenestedCubicCell {
5173 left: -0.8,
5174 right: 0.65,
5175 c0: 0.11,
5176 c1: -0.22,
5177 c2: 0.07,
5178 c3: 0.13,
5179 };
5180 let moments = evaluate_cell_moments(cell, 16).expect("cell moments");
5181 let r = [0.21, -0.13, 0.06];
5182 let s = [-0.18, 0.04];
5183 let t = [0.09, 0.07, -0.03];
5184 let u = [-0.14, 0.05];
5185 let rs = [0.08, -0.03, 0.02];
5186 let rt = [-0.05, 0.01];
5187 let ru = [0.04, -0.02, 0.01];
5188 let st = [0.03, 0.02];
5189 let su = [-0.02, 0.05, -0.01];
5190 let tu = [0.07, -0.04];
5191 let rst = [0.03, -0.01, 0.02];
5192 let rsu = [-0.02, 0.04];
5193 let rtu = [0.01, 0.02, -0.01];
5194 let stu = [-0.03, 0.02];
5195 let rstu = [0.02, -0.01, 0.01];
5196
5197 let exact_fourth = cell_fourth_derivative_from_moments(
5198 cell,
5199 &r,
5200 &s,
5201 &t,
5202 &u,
5203 &rs,
5204 &rt,
5205 &ru,
5206 &st,
5207 &su,
5208 &tu,
5209 &rst,
5210 &rsu,
5211 &rtu,
5212 &stu,
5213 &rstu,
5214 &moments.moments,
5215 )
5216 .expect("fourth derivative");
5217 let numeric_fourth = simpson_integral(cell.left, cell.right, 5000, |z| {
5218 let eta = cell.eta(z);
5219 let rz = polynomial_value(&r, z);
5220 let sz = polynomial_value(&s, z);
5221 let tz = polynomial_value(&t, z);
5222 let uz = polynomial_value(&u, z);
5223 let rsz = polynomial_value(&rs, z);
5224 let rtz = polynomial_value(&rt, z);
5225 let ruz = polynomial_value(&ru, z);
5226 let stz = polynomial_value(&st, z);
5227 let suz = polynomial_value(&su, z);
5228 let tuz = polynomial_value(&tu, z);
5229 let rstz = polynomial_value(&rst, z);
5230 let rsuz = polynomial_value(&rsu, z);
5231 let rtuz = polynomial_value(&rtu, z);
5232 let stuz = polynomial_value(&stu, z);
5233 let rstuz = polynomial_value(&rstu, z);
5234 let linear =
5235 rstz * uz + rsuz * tz + rtuz * sz + stuz * rz + rsz * tuz + rtz * suz + ruz * stz;
5236 let quadratic = rsz * tz * uz
5237 + rtz * sz * uz
5238 + ruz * sz * tz
5239 + stz * rz * uz
5240 + suz * rz * tz
5241 + tuz * rz * sz;
5242 let quartic = rz * sz * tz * uz;
5243 (rstuz - eta * linear
5244 + (eta * eta - 1.0) * quadratic
5245 + (-eta * eta * eta + 3.0 * eta) * quartic)
5246 * (-cell.q(z)).exp()
5247 / (2.0 * std::f64::consts::PI)
5248 });
5249
5250 assert!((exact_fourth - numeric_fourth).abs() < 2e-7);
5251 }
5252
5253 #[test]
5254 fn denested_cell_parameter_derivatives_match_exact_integrands() {
5255 let score_span = LocalSpanCubic {
5256 left: -0.75,
5257 right: 0.25,
5258 c0: 0.08,
5259 c1: -0.03,
5260 c2: 0.02,
5261 c3: -0.01,
5262 };
5263 let link_span = LocalSpanCubic {
5264 left: -0.6,
5265 right: 0.9,
5266 c0: -0.05,
5267 c1: 0.04,
5268 c2: -0.02,
5269 c3: 0.015,
5270 };
5271 let a = 0.3;
5272 let b = -0.7;
5273 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
5274 let cell = DenestedCubicCell {
5275 left: score_span.left,
5276 right: score_span.right,
5277 c0: coeffs[0],
5278 c1: coeffs[1],
5279 c2: coeffs[2],
5280 c3: coeffs[3],
5281 };
5282 let state = evaluate_cell_moments(cell, 24).expect("cell moments");
5283 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
5284 let (dc_daa, dc_dab, dc_dbb) = denested_cell_second_partials(score_span, link_span, a, b);
5285 let (dc_daaa, dc_daab, dc_dabb, dc_dbbb) = denested_cell_third_partials(link_span);
5286 let zero = [0.0; 4];
5287 let link_third = 6.0 * link_span.c3;
5288
5289 let eta_a = |z: f64| 1.0 + link_span.first_derivative(a + b * z);
5290 let eta_b = |z: f64| z + score_span.evaluate(z) + z * link_span.first_derivative(a + b * z);
5291 let eta_aa = |z: f64| link_span.second_derivative(a + b * z);
5292 let eta_ab = |z: f64| z * link_span.second_derivative(a + b * z);
5293 let eta_bb = |z: f64| z * z * link_span.second_derivative(a + b * z);
5294 let eta_aaa = |z: f64| link_third + 0.0 * z;
5295 let eta_aab = |z: f64| z * link_third;
5296 let eta_abb = |z: f64| z * z * link_third;
5297 let eta_bbb = |z: f64| z * z * z * link_third;
5298
5299 let exact_a = cell_first_derivative_from_moments(&dc_da, &state.moments).expect("a");
5300 let exact_b = cell_first_derivative_from_moments(&dc_db, &state.moments).expect("b");
5301 let exact_aa =
5302 cell_second_derivative_from_moments(cell, &dc_da, &dc_da, &dc_daa, &state.moments)
5303 .expect("aa");
5304 let exact_ab =
5305 cell_second_derivative_from_moments(cell, &dc_da, &dc_db, &dc_dab, &state.moments)
5306 .expect("ab");
5307 let exact_bb =
5308 cell_second_derivative_from_moments(cell, &dc_db, &dc_db, &dc_dbb, &state.moments)
5309 .expect("bb");
5310 let exact_aaa = cell_third_derivative_from_moments(
5311 cell,
5312 &dc_da,
5313 &dc_da,
5314 &dc_da,
5315 &dc_daa,
5316 &dc_daa,
5317 &dc_daa,
5318 &dc_daaa,
5319 &state.moments,
5320 )
5321 .expect("aaa");
5322 let exact_aab = cell_third_derivative_from_moments(
5323 cell,
5324 &dc_da,
5325 &dc_da,
5326 &dc_db,
5327 &dc_daa,
5328 &dc_dab,
5329 &dc_dab,
5330 &dc_daab,
5331 &state.moments,
5332 )
5333 .expect("aab");
5334 let exact_abb = cell_third_derivative_from_moments(
5335 cell,
5336 &dc_da,
5337 &dc_db,
5338 &dc_db,
5339 &dc_dab,
5340 &dc_dab,
5341 &dc_dbb,
5342 &dc_dabb,
5343 &state.moments,
5344 )
5345 .expect("abb");
5346 let exact_bbb = cell_third_derivative_from_moments(
5347 cell,
5348 &dc_db,
5349 &dc_db,
5350 &dc_db,
5351 &dc_dbb,
5352 &dc_dbb,
5353 &dc_dbb,
5354 &dc_dbbb,
5355 &state.moments,
5356 )
5357 .expect("bbb");
5358 let exact_aaaa = cell_fourth_derivative_from_moments(
5359 cell,
5360 &dc_da,
5361 &dc_da,
5362 &dc_da,
5363 &dc_da,
5364 &dc_daa,
5365 &dc_daa,
5366 &dc_daa,
5367 &dc_daa,
5368 &dc_daa,
5369 &dc_daa,
5370 &dc_daaa,
5371 &dc_daaa,
5372 &dc_daaa,
5373 &dc_daaa,
5374 &zero,
5375 &state.moments,
5376 )
5377 .expect("aaaa");
5378 let exact_aaab = cell_fourth_derivative_from_moments(
5379 cell,
5380 &dc_da,
5381 &dc_da,
5382 &dc_da,
5383 &dc_db,
5384 &dc_daa,
5385 &dc_daa,
5386 &dc_dab,
5387 &dc_daa,
5388 &dc_dab,
5389 &dc_dab,
5390 &dc_daaa,
5391 &dc_daab,
5392 &dc_daab,
5393 &dc_daab,
5394 &zero,
5395 &state.moments,
5396 )
5397 .expect("aaab");
5398 let exact_aabb = cell_fourth_derivative_from_moments(
5399 cell,
5400 &dc_da,
5401 &dc_da,
5402 &dc_db,
5403 &dc_db,
5404 &dc_daa,
5405 &dc_dab,
5406 &dc_dab,
5407 &dc_dab,
5408 &dc_dab,
5409 &dc_dbb,
5410 &dc_daab,
5411 &dc_daab,
5412 &dc_dabb,
5413 &dc_dabb,
5414 &zero,
5415 &state.moments,
5416 )
5417 .expect("aabb");
5418 let exact_abbb = cell_fourth_derivative_from_moments(
5419 cell,
5420 &dc_da,
5421 &dc_db,
5422 &dc_db,
5423 &dc_db,
5424 &dc_dab,
5425 &dc_dab,
5426 &dc_dab,
5427 &dc_dbb,
5428 &dc_dbb,
5429 &dc_dbb,
5430 &dc_dabb,
5431 &dc_dabb,
5432 &dc_dabb,
5433 &dc_dbbb,
5434 &zero,
5435 &state.moments,
5436 )
5437 .expect("abbb");
5438 let exact_bbbb = cell_fourth_derivative_from_moments(
5439 cell,
5440 &dc_db,
5441 &dc_db,
5442 &dc_db,
5443 &dc_db,
5444 &dc_dbb,
5445 &dc_dbb,
5446 &dc_dbb,
5447 &dc_dbb,
5448 &dc_dbb,
5449 &dc_dbb,
5450 &dc_dbbb,
5451 &dc_dbbb,
5452 &dc_dbbb,
5453 &dc_dbbb,
5454 &zero,
5455 &state.moments,
5456 )
5457 .expect("bbbb");
5458
5459 let numeric_a = simpson_integral(cell.left, cell.right, 5000, |z| {
5460 eta_a(z) * (-cell.q(z)).exp() * INV_TWO_PI
5461 });
5462 let numeric_b = simpson_integral(cell.left, cell.right, 5000, |z| {
5463 eta_b(z) * (-cell.q(z)).exp() * INV_TWO_PI
5464 });
5465 let numeric_aa = simpson_integral(cell.left, cell.right, 5000, |z| {
5466 (eta_aa(z) - cell.eta(z) * eta_a(z) * eta_a(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5467 });
5468 let numeric_ab = simpson_integral(cell.left, cell.right, 5000, |z| {
5469 (eta_ab(z) - cell.eta(z) * eta_a(z) * eta_b(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5470 });
5471 let numeric_bb = simpson_integral(cell.left, cell.right, 5000, |z| {
5472 (eta_bb(z) - cell.eta(z) * eta_b(z) * eta_b(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5473 });
5474 let numeric_aaa = simpson_integral(cell.left, cell.right, 5000, |z| {
5475 let eta = cell.eta(z);
5476 (eta_aaa(z) - 3.0 * eta * eta_aa(z) * eta_a(z) + (eta * eta - 1.0) * eta_a(z).powi(3))
5477 * (-cell.q(z)).exp()
5478 * INV_TWO_PI
5479 });
5480 let numeric_aab = simpson_integral(cell.left, cell.right, 5000, |z| {
5481 let eta = cell.eta(z);
5482 let a_z = eta_a(z);
5483 let b_z = eta_b(z);
5484 (eta_aab(z) - eta * (eta_aa(z) * b_z + 2.0 * eta_ab(z) * a_z)
5485 + (eta * eta - 1.0) * a_z * a_z * b_z)
5486 * (-cell.q(z)).exp()
5487 * INV_TWO_PI
5488 });
5489 let numeric_abb = simpson_integral(cell.left, cell.right, 5000, |z| {
5490 let eta = cell.eta(z);
5491 let a_z = eta_a(z);
5492 let b_z = eta_b(z);
5493 (eta_abb(z) - eta * (2.0 * eta_ab(z) * b_z + eta_bb(z) * a_z)
5494 + (eta * eta - 1.0) * a_z * b_z * b_z)
5495 * (-cell.q(z)).exp()
5496 * INV_TWO_PI
5497 });
5498 let numeric_bbb = simpson_integral(cell.left, cell.right, 5000, |z| {
5499 let eta = cell.eta(z);
5500 (eta_bbb(z) - 3.0 * eta * eta_bb(z) * eta_b(z) + (eta * eta - 1.0) * eta_b(z).powi(3))
5501 * (-cell.q(z)).exp()
5502 * INV_TWO_PI
5503 });
5504 let numeric_aaaa = simpson_integral(cell.left, cell.right, 5000, |z| {
5505 let eta = cell.eta(z);
5506 let eta_a_z = eta_a(z);
5507 let eta_aa_z = eta_aa(z);
5508 let eta_aaa_z = eta_aaa(z);
5509 (-eta * (4.0 * eta_aaa_z * eta_a_z + 3.0 * eta_aa_z * eta_aa_z)
5510 + (eta * eta - 1.0) * (6.0 * eta_aa_z * eta_a_z * eta_a_z)
5511 + (-eta * eta * eta + 3.0 * eta) * eta_a_z.powi(4))
5512 * (-cell.q(z)).exp()
5513 * INV_TWO_PI
5514 });
5515 let numeric_aaab = simpson_integral(cell.left, cell.right, 5000, |z| {
5516 let eta = cell.eta(z);
5517 let a_z = eta_a(z);
5518 let b_z = eta_b(z);
5519 let aa_z = eta_aa(z);
5520 let ab_z = eta_ab(z);
5521 let aaa_z = eta_aaa(z);
5522 let aab_z = eta_aab(z);
5523 (-eta * (aaa_z * b_z + 3.0 * aab_z * a_z + 3.0 * aa_z * ab_z)
5524 + (eta * eta - 1.0) * (3.0 * aa_z * a_z * b_z + 3.0 * ab_z * a_z * a_z)
5525 + (-eta * eta * eta + 3.0 * eta) * a_z.powi(3) * b_z)
5526 * (-cell.q(z)).exp()
5527 * INV_TWO_PI
5528 });
5529 let numeric_aabb = simpson_integral(cell.left, cell.right, 5000, |z| {
5530 let eta = cell.eta(z);
5531 let a_z = eta_a(z);
5532 let b_z = eta_b(z);
5533 let aa_z = eta_aa(z);
5534 let ab_z = eta_ab(z);
5535 let bb_z = eta_bb(z);
5536 let aab_z = eta_aab(z);
5537 let abb_z = eta_abb(z);
5538 (-eta * (2.0 * aab_z * b_z + 2.0 * abb_z * a_z + aa_z * bb_z + 2.0 * ab_z * ab_z)
5539 + (eta * eta - 1.0)
5540 * (aa_z * b_z * b_z + 4.0 * ab_z * a_z * b_z + bb_z * a_z * a_z)
5541 + (-eta * eta * eta + 3.0 * eta) * a_z * a_z * b_z * b_z)
5542 * (-cell.q(z)).exp()
5543 * INV_TWO_PI
5544 });
5545 let numeric_abbb = simpson_integral(cell.left, cell.right, 5000, |z| {
5546 let eta = cell.eta(z);
5547 let a_z = eta_a(z);
5548 let b_z = eta_b(z);
5549 let ab_z = eta_ab(z);
5550 let bb_z = eta_bb(z);
5551 let abb_z = eta_abb(z);
5552 let bbb_z = eta_bbb(z);
5553 (-eta * (3.0 * abb_z * b_z + bbb_z * a_z + 3.0 * ab_z * bb_z)
5554 + (eta * eta - 1.0) * (3.0 * ab_z * b_z * b_z + 3.0 * bb_z * a_z * b_z)
5555 + (-eta * eta * eta + 3.0 * eta) * a_z * b_z.powi(3))
5556 * (-cell.q(z)).exp()
5557 * INV_TWO_PI
5558 });
5559 let numeric_bbbb = simpson_integral(cell.left, cell.right, 5000, |z| {
5560 let eta = cell.eta(z);
5561 let eta_b_z = eta_b(z);
5562 let eta_bb_z = eta_bb(z);
5563 let eta_bbb_z = eta_bbb(z);
5564 (-eta * (4.0 * eta_bbb_z * eta_b_z + 3.0 * eta_bb_z * eta_bb_z)
5565 + (eta * eta - 1.0) * (6.0 * eta_bb_z * eta_b_z * eta_b_z)
5566 + (-eta * eta * eta + 3.0 * eta) * eta_b_z.powi(4))
5567 * (-cell.q(z)).exp()
5568 * INV_TWO_PI
5569 });
5570
5571 assert!((exact_a - numeric_a).abs() < 1e-8);
5572 assert!((exact_b - numeric_b).abs() < 1e-8);
5573 assert!((exact_aa - numeric_aa).abs() < 1e-8);
5574 assert!((exact_ab - numeric_ab).abs() < 1e-8);
5575 assert!((exact_bb - numeric_bb).abs() < 1e-8);
5576 assert!((exact_aaa - numeric_aaa).abs() < 2e-7);
5577 assert!((exact_aab - numeric_aab).abs() < 2e-7);
5578 assert!((exact_abb - numeric_abb).abs() < 2e-7);
5579 assert!((exact_bbb - numeric_bbb).abs() < 2e-7);
5580 assert!((exact_aaaa - numeric_aaaa).abs() < 2e-6);
5581 assert!((exact_aaab - numeric_aaab).abs() < 2e-6);
5582 assert!((exact_aabb - numeric_aabb).abs() < 2e-6);
5583 assert!((exact_abbb - numeric_abbb).abs() < 2e-6);
5584 assert!((exact_bbbb - numeric_bbbb).abs() < 2e-6);
5585 }
5586
5587 #[test]
5588 fn link_basis_cell_derivatives_match_exact_integrands() {
5589 let score_span = LocalSpanCubic {
5590 left: -0.75,
5591 right: 0.25,
5592 c0: 0.08,
5593 c1: -0.03,
5594 c2: 0.02,
5595 c3: -0.01,
5596 };
5597 let link_span = LocalSpanCubic {
5598 left: -0.6,
5599 right: 0.9,
5600 c0: -0.05,
5601 c1: 0.04,
5602 c2: -0.02,
5603 c3: 0.015,
5604 };
5605 let link_basis_span = LocalSpanCubic {
5606 left: -0.6,
5607 right: 0.9,
5608 c0: 0.02,
5609 c1: -0.01,
5610 c2: 0.03,
5611 c3: -0.02,
5612 };
5613 let a = 0.3;
5614 let b = -0.7;
5615 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
5616 let cell = DenestedCubicCell {
5617 left: score_span.left,
5618 right: score_span.right,
5619 c0: coeffs[0],
5620 c1: coeffs[1],
5621 c2: coeffs[2],
5622 c3: coeffs[3],
5623 };
5624 let state = evaluate_cell_moments(cell, 24).expect("cell moments");
5625 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
5626 let second_partials = denested_cell_second_partials(score_span, link_span, a, b);
5627 let dc_daa = second_partials.0;
5628 let dc_dab = second_partials.1;
5629 let dc_dbb = second_partials.2;
5630 let denested_third = denested_cell_third_partials(link_span);
5631 let dc_daaa = denested_third.0;
5632 let dc_dbbb = denested_third.3;
5633
5634 let coeff_w = link_basis_cell_coefficients(link_basis_span, a, b);
5635 let (coeff_aw, coeff_bw) = link_basis_cell_coefficient_partials(link_basis_span, a, b);
5636 let (coeff_aaw, coeff_abw, coeff_bbw) =
5637 link_basis_cell_second_partials(link_basis_span, a, b);
5638 let link_basis_third = link_basis_cell_third_partials(link_basis_span);
5639 let coeff_aaaw = link_basis_third.0;
5640 let coeff_bbbw = link_basis_third.3;
5641 let zero = [0.0; 4];
5642 let basis_third = 6.0 * link_basis_span.c3;
5643
5644 let eta_a = |z: f64| 1.0 + link_span.first_derivative(a + b * z);
5645 let eta_b = |z: f64| z + score_span.evaluate(z) + z * link_span.first_derivative(a + b * z);
5646 let eta_aa = |z: f64| link_span.second_derivative(a + b * z);
5647 let eta_ab = |z: f64| z * link_span.second_derivative(a + b * z);
5648 let eta_bb = |z: f64| z * z * link_span.second_derivative(a + b * z);
5649 let eta_w = |z: f64| link_basis_span.evaluate(a + b * z);
5650 let eta_aw = |z: f64| link_basis_span.first_derivative(a + b * z);
5651 let eta_bw = |z: f64| z * link_basis_span.first_derivative(a + b * z);
5652 let eta_aaw = |z: f64| link_basis_span.second_derivative(a + b * z);
5653 let eta_abw = |z: f64| z * link_basis_span.second_derivative(a + b * z);
5654 let eta_bbw = |z: f64| z * z * link_basis_span.second_derivative(a + b * z);
5655 let eta_aaaw = |z: f64| basis_third + 0.0 * z;
5656 let eta_bbbw = |z: f64| z * z * z * basis_third;
5657
5658 let exact_w = cell_first_derivative_from_moments(&coeff_w, &state.moments).expect("w");
5659 let exact_aw =
5660 cell_second_derivative_from_moments(cell, &dc_da, &coeff_w, &coeff_aw, &state.moments)
5661 .expect("aw");
5662 let exact_bw =
5663 cell_second_derivative_from_moments(cell, &dc_db, &coeff_w, &coeff_bw, &state.moments)
5664 .expect("bw");
5665 let exact_ww =
5666 cell_second_derivative_from_moments(cell, &coeff_w, &coeff_w, &zero, &state.moments)
5667 .expect("ww");
5668 let exact_aaw = cell_third_derivative_from_moments(
5669 cell,
5670 &dc_da,
5671 &dc_da,
5672 &coeff_w,
5673 &dc_daa,
5674 &coeff_aw,
5675 &coeff_aw,
5676 &coeff_aaw,
5677 &state.moments,
5678 )
5679 .expect("aaw");
5680 let exact_abw = cell_third_derivative_from_moments(
5681 cell,
5682 &dc_da,
5683 &dc_db,
5684 &coeff_w,
5685 &dc_dab,
5686 &coeff_aw,
5687 &coeff_bw,
5688 &coeff_abw,
5689 &state.moments,
5690 )
5691 .expect("abw");
5692 let exact_bbw = cell_third_derivative_from_moments(
5693 cell,
5694 &dc_db,
5695 &dc_db,
5696 &coeff_w,
5697 &dc_dbb,
5698 &coeff_bw,
5699 &coeff_bw,
5700 &coeff_bbw,
5701 &state.moments,
5702 )
5703 .expect("bbw");
5704 let exact_www = cell_third_derivative_from_moments(
5705 cell,
5706 &coeff_w,
5707 &coeff_w,
5708 &coeff_w,
5709 &zero,
5710 &zero,
5711 &zero,
5712 &zero,
5713 &state.moments,
5714 )
5715 .expect("www");
5716 let exact_aaaw = cell_fourth_derivative_from_moments(
5717 cell,
5718 &dc_da,
5719 &dc_da,
5720 &dc_da,
5721 &coeff_w,
5722 &dc_daa,
5723 &dc_daa,
5724 &coeff_aw,
5725 &dc_daa,
5726 &coeff_aw,
5727 &coeff_aw,
5728 &dc_daaa,
5729 &coeff_aaw,
5730 &coeff_aaw,
5731 &coeff_aaw,
5732 &coeff_aaaw,
5733 &state.moments,
5734 )
5735 .expect("aaaw");
5736 let exact_aaww = cell_fourth_derivative_from_moments(
5737 cell,
5738 &dc_da,
5739 &dc_da,
5740 &coeff_w,
5741 &coeff_w,
5742 &dc_daa,
5743 &coeff_aw,
5744 &coeff_aw,
5745 &coeff_aw,
5746 &coeff_aw,
5747 &zero,
5748 &coeff_aaw,
5749 &coeff_aaw,
5750 &zero,
5751 &zero,
5752 &zero,
5753 &state.moments,
5754 )
5755 .expect("aaww");
5756 let exact_abww = cell_fourth_derivative_from_moments(
5757 cell,
5758 &dc_da,
5759 &dc_db,
5760 &coeff_w,
5761 &coeff_w,
5762 &dc_dab,
5763 &coeff_aw,
5764 &coeff_aw,
5765 &coeff_bw,
5766 &coeff_bw,
5767 &zero,
5768 &coeff_abw,
5769 &coeff_abw,
5770 &zero,
5771 &zero,
5772 &zero,
5773 &state.moments,
5774 )
5775 .expect("abww");
5776 let exact_bbww = cell_fourth_derivative_from_moments(
5777 cell,
5778 &dc_db,
5779 &dc_db,
5780 &coeff_w,
5781 &coeff_w,
5782 &dc_dbb,
5783 &coeff_bw,
5784 &coeff_bw,
5785 &coeff_bw,
5786 &coeff_bw,
5787 &zero,
5788 &coeff_bbw,
5789 &coeff_bbw,
5790 &zero,
5791 &zero,
5792 &zero,
5793 &state.moments,
5794 )
5795 .expect("bbww");
5796 let exact_bbbw = cell_fourth_derivative_from_moments(
5797 cell,
5798 &dc_db,
5799 &dc_db,
5800 &dc_db,
5801 &coeff_w,
5802 &dc_dbb,
5803 &dc_dbb,
5804 &coeff_bw,
5805 &dc_dbb,
5806 &coeff_bw,
5807 &coeff_bw,
5808 &dc_dbbb,
5809 &coeff_bbw,
5810 &coeff_bbw,
5811 &coeff_bbw,
5812 &coeff_bbbw,
5813 &state.moments,
5814 )
5815 .expect("bbbw");
5816 let exact_wwww = cell_fourth_derivative_from_moments(
5817 cell,
5818 &coeff_w,
5819 &coeff_w,
5820 &coeff_w,
5821 &coeff_w,
5822 &zero,
5823 &zero,
5824 &zero,
5825 &zero,
5826 &zero,
5827 &zero,
5828 &zero,
5829 &zero,
5830 &zero,
5831 &zero,
5832 &zero,
5833 &state.moments,
5834 )
5835 .expect("wwww");
5836
5837 let numeric_w = simpson_integral(cell.left, cell.right, 5000, |z| {
5838 eta_w(z) * (-cell.q(z)).exp() * INV_TWO_PI
5839 });
5840 let numeric_aw = simpson_integral(cell.left, cell.right, 5000, |z| {
5841 (eta_aw(z) - cell.eta(z) * eta_a(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5842 });
5843 let numeric_bw = simpson_integral(cell.left, cell.right, 5000, |z| {
5844 (eta_bw(z) - cell.eta(z) * eta_b(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5845 });
5846 let numeric_ww = simpson_integral(cell.left, cell.right, 5000, |z| {
5847 (-cell.eta(z) * eta_w(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
5848 });
5849 let numeric_aaw = simpson_integral(cell.left, cell.right, 5000, |z| {
5850 let eta = cell.eta(z);
5851 let w_z = eta_w(z);
5852 let a_z = eta_a(z);
5853 (eta_aaw(z) - eta * (eta_aa(z) * w_z + 2.0 * eta_aw(z) * a_z)
5854 + (eta * eta - 1.0) * a_z * a_z * w_z)
5855 * (-cell.q(z)).exp()
5856 * INV_TWO_PI
5857 });
5858 let numeric_abw = simpson_integral(cell.left, cell.right, 5000, |z| {
5859 let eta = cell.eta(z);
5860 let w_z = eta_w(z);
5861 let a_z = eta_a(z);
5862 let b_z = eta_b(z);
5863 (eta_abw(z) - eta * (eta_ab(z) * w_z + eta_aw(z) * b_z + eta_bw(z) * a_z)
5864 + (eta * eta - 1.0) * a_z * b_z * w_z)
5865 * (-cell.q(z)).exp()
5866 * INV_TWO_PI
5867 });
5868 let numeric_bbw = simpson_integral(cell.left, cell.right, 5000, |z| {
5869 let eta = cell.eta(z);
5870 let w_z = eta_w(z);
5871 let b_z = eta_b(z);
5872 (eta_bbw(z) - eta * (eta_bb(z) * w_z + 2.0 * eta_bw(z) * b_z)
5873 + (eta * eta - 1.0) * b_z * b_z * w_z)
5874 * (-cell.q(z)).exp()
5875 * INV_TWO_PI
5876 });
5877 let numeric_www = simpson_integral(cell.left, cell.right, 5000, |z| {
5878 let eta = cell.eta(z);
5879 let w_z = eta_w(z);
5880 ((eta * eta - 1.0) * w_z * w_z * w_z) * (-cell.q(z)).exp() * INV_TWO_PI
5881 });
5882 let numeric_aaaw = simpson_integral(cell.left, cell.right, 5000, |z| {
5883 let eta = cell.eta(z);
5884 let a_z = eta_a(z);
5885 let w_z = eta_w(z);
5886 let aa_z = eta_aa(z);
5887 let aw_z = eta_aw(z);
5888 (eta_aaaw(z)
5889 - eta * ((dc_daaa[0] + 0.0 * z) * w_z + 3.0 * eta_aaw(z) * a_z + 3.0 * aa_z * aw_z)
5890 + (eta * eta - 1.0) * (3.0 * aa_z * a_z * w_z + 3.0 * aw_z * a_z * a_z)
5891 + (-eta * eta * eta + 3.0 * eta) * a_z * a_z * a_z * w_z)
5892 * (-cell.q(z)).exp()
5893 * INV_TWO_PI
5894 });
5895 let numeric_aaww = simpson_integral(cell.left, cell.right, 5000, |z| {
5896 let eta = cell.eta(z);
5897 let a_z = eta_a(z);
5898 let w_z = eta_w(z);
5899 let aw_z = eta_aw(z);
5900 (-(2.0 * eta * (eta_aaw(z) * w_z + aw_z * aw_z))
5901 + (eta * eta - 1.0) * (eta_aa(z) * w_z * w_z + 4.0 * aw_z * a_z * w_z)
5902 + (-eta * eta * eta + 3.0 * eta) * a_z * a_z * w_z * w_z)
5903 * (-cell.q(z)).exp()
5904 * INV_TWO_PI
5905 });
5906 let numeric_abww = simpson_integral(cell.left, cell.right, 5000, |z| {
5907 let eta = cell.eta(z);
5908 let a_z = eta_a(z);
5909 let b_z = eta_b(z);
5910 let w_z = eta_w(z);
5911 let aw_z = eta_aw(z);
5912 let bw_z = eta_bw(z);
5913 (-(2.0 * eta * (eta_abw(z) * w_z + aw_z * bw_z))
5914 + (eta * eta - 1.0)
5915 * (eta_ab(z) * w_z * w_z + 2.0 * aw_z * b_z * w_z + 2.0 * bw_z * a_z * w_z)
5916 + (-eta * eta * eta + 3.0 * eta) * a_z * b_z * w_z * w_z)
5917 * (-cell.q(z)).exp()
5918 * INV_TWO_PI
5919 });
5920 let numeric_bbww = simpson_integral(cell.left, cell.right, 5000, |z| {
5921 let eta = cell.eta(z);
5922 let b_z = eta_b(z);
5923 let w_z = eta_w(z);
5924 let bw_z = eta_bw(z);
5925 (-(2.0 * eta * (eta_bbw(z) * w_z + bw_z * bw_z))
5926 + (eta * eta - 1.0) * (eta_bb(z) * w_z * w_z + 4.0 * bw_z * b_z * w_z)
5927 + (-eta * eta * eta + 3.0 * eta) * b_z * b_z * w_z * w_z)
5928 * (-cell.q(z)).exp()
5929 * INV_TWO_PI
5930 });
5931 let numeric_bbbw = simpson_integral(cell.left, cell.right, 5000, |z| {
5932 let eta = cell.eta(z);
5933 let b_z = eta_b(z);
5934 let w_z = eta_w(z);
5935 let bb_z = eta_bb(z);
5936 let bw_z = eta_bw(z);
5937 (eta_bbbw(z)
5938 - eta
5939 * ((dc_dbbb[3] * z * z * z) * w_z + 3.0 * eta_bbw(z) * b_z + 3.0 * bb_z * bw_z)
5940 + (eta * eta - 1.0) * (3.0 * bb_z * b_z * w_z + 3.0 * bw_z * b_z * b_z)
5941 + (-eta * eta * eta + 3.0 * eta) * b_z * b_z * b_z * w_z)
5942 * (-cell.q(z)).exp()
5943 * INV_TWO_PI
5944 });
5945 let numeric_wwww = simpson_integral(cell.left, cell.right, 5000, |z| {
5946 let eta = cell.eta(z);
5947 let w_z = eta_w(z);
5948 ((-eta * eta * eta + 3.0 * eta) * w_z * w_z * w_z * w_z)
5949 * (-cell.q(z)).exp()
5950 * INV_TWO_PI
5951 });
5952
5953 assert!((exact_w - numeric_w).abs() < 1e-8);
5954 assert!((exact_aw - numeric_aw).abs() < 1e-7);
5955 assert!((exact_bw - numeric_bw).abs() < 1e-7);
5956 assert!((exact_ww - numeric_ww).abs() < 1e-7);
5957 assert!((exact_aaw - numeric_aaw).abs() < 2e-6);
5958 assert!((exact_abw - numeric_abw).abs() < 2e-6);
5959 assert!((exact_bbw - numeric_bbw).abs() < 2e-6);
5960 assert!((exact_www - numeric_www).abs() < 2e-6);
5961 assert!((exact_aaaw - numeric_aaaw).abs() < 3e-6);
5962 assert!((exact_aaww - numeric_aaww).abs() < 3e-6);
5963 assert!((exact_abww - numeric_abww).abs() < 3e-6);
5964 assert!((exact_bbww - numeric_bbww).abs() < 3e-6);
5965 assert!((exact_bbbw - numeric_bbbw).abs() < 3e-6);
5966 assert!((exact_wwww - numeric_wwww).abs() < 3e-6);
5967 }
5968
5969 #[test]
5970 fn score_basis_cell_derivatives_match_exact_integrands() {
5971 let score_span = LocalSpanCubic {
5972 left: -0.75,
5973 right: 0.25,
5974 c0: 0.08,
5975 c1: -0.03,
5976 c2: 0.02,
5977 c3: -0.01,
5978 };
5979 let score_basis_span = LocalSpanCubic {
5980 left: -0.75,
5981 right: 0.25,
5982 c0: -0.04,
5983 c1: 0.06,
5984 c2: -0.01,
5985 c3: 0.02,
5986 };
5987 let link_span = LocalSpanCubic {
5988 left: -0.6,
5989 right: 0.9,
5990 c0: -0.05,
5991 c1: 0.04,
5992 c2: -0.02,
5993 c3: 0.015,
5994 };
5995 let a = 0.3;
5996 let b = -0.7;
5997 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
5998 let cell = DenestedCubicCell {
5999 left: score_span.left,
6000 right: score_span.right,
6001 c0: coeffs[0],
6002 c1: coeffs[1],
6003 c2: coeffs[2],
6004 c3: coeffs[3],
6005 };
6006 let state = evaluate_cell_moments(cell, 24).expect("cell moments");
6007 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
6008 let second_partials = denested_cell_second_partials(score_span, link_span, a, b);
6009 let dc_daa = second_partials.0;
6010 let dc_dab = second_partials.1;
6011 let dc_dbb = second_partials.2;
6012 let denested_third = denested_cell_third_partials(link_span);
6013 let dc_dbbb = denested_third.3;
6014
6015 let coeff_h = score_basis_cell_coefficients(score_basis_span, b);
6016 let coeff_bh = score_basis_cell_coefficients(score_basis_span, 1.0);
6017 let zero = [0.0; 4];
6018
6019 let eta_a = |z: f64| 1.0 + link_span.first_derivative(a + b * z);
6020 let eta_b = |z: f64| z + score_span.evaluate(z) + z * link_span.first_derivative(a + b * z);
6021 let eta_ab = |z: f64| z * link_span.second_derivative(a + b * z);
6022 let eta_bb = |z: f64| z * z * link_span.second_derivative(a + b * z);
6023 let eta_h = |z: f64| b * score_basis_span.evaluate(z);
6024 let eta_bh = |z: f64| score_basis_span.evaluate(z);
6025
6026 let exact_h = cell_first_derivative_from_moments(&coeff_h, &state.moments).expect("h");
6027 let exact_ah =
6028 cell_second_derivative_from_moments(cell, &dc_da, &coeff_h, &zero, &state.moments)
6029 .expect("ah");
6030 let exact_bh =
6031 cell_second_derivative_from_moments(cell, &dc_db, &coeff_h, &coeff_bh, &state.moments)
6032 .expect("bh");
6033 let exact_hh =
6034 cell_second_derivative_from_moments(cell, &coeff_h, &coeff_h, &zero, &state.moments)
6035 .expect("hh");
6036 let exact_abh = cell_third_derivative_from_moments(
6037 cell,
6038 &dc_da,
6039 &dc_db,
6040 &coeff_h,
6041 &dc_dab,
6042 &zero,
6043 &coeff_bh,
6044 &zero,
6045 &state.moments,
6046 )
6047 .expect("abh");
6048 let exact_bbh = cell_third_derivative_from_moments(
6049 cell,
6050 &dc_db,
6051 &dc_db,
6052 &coeff_h,
6053 &dc_dbb,
6054 &coeff_bh,
6055 &coeff_bh,
6056 &zero,
6057 &state.moments,
6058 )
6059 .expect("bbh");
6060 let exact_bhh = cell_third_derivative_from_moments(
6061 cell,
6062 &dc_db,
6063 &coeff_h,
6064 &coeff_h,
6065 &coeff_bh,
6066 &coeff_bh,
6067 &zero,
6068 &zero,
6069 &state.moments,
6070 )
6071 .expect("bhh");
6072 let exact_hhh = cell_third_derivative_from_moments(
6073 cell,
6074 &coeff_h,
6075 &coeff_h,
6076 &coeff_h,
6077 &zero,
6078 &zero,
6079 &zero,
6080 &zero,
6081 &state.moments,
6082 )
6083 .expect("hhh");
6084 let exact_bbbh = cell_fourth_derivative_from_moments(
6085 cell,
6086 &dc_db,
6087 &dc_db,
6088 &dc_db,
6089 &coeff_h,
6090 &dc_dbb,
6091 &dc_dbb,
6092 &coeff_bh,
6093 &dc_dbb,
6094 &coeff_bh,
6095 &coeff_bh,
6096 &dc_dbbb,
6097 &zero,
6098 &zero,
6099 &zero,
6100 &zero,
6101 &state.moments,
6102 )
6103 .expect("bbbh");
6104 let exact_aahh = cell_fourth_derivative_from_moments(
6105 cell,
6106 &dc_da,
6107 &dc_da,
6108 &coeff_h,
6109 &coeff_h,
6110 &dc_daa,
6111 &zero,
6112 &zero,
6113 &zero,
6114 &zero,
6115 &zero,
6116 &zero,
6117 &zero,
6118 &zero,
6119 &zero,
6120 &zero,
6121 &state.moments,
6122 )
6123 .expect("aahh");
6124 let exact_abhh = cell_fourth_derivative_from_moments(
6125 cell,
6126 &dc_da,
6127 &dc_db,
6128 &coeff_h,
6129 &coeff_h,
6130 &dc_dab,
6131 &zero,
6132 &zero,
6133 &coeff_bh,
6134 &coeff_bh,
6135 &zero,
6136 &zero,
6137 &zero,
6138 &zero,
6139 &zero,
6140 &zero,
6141 &state.moments,
6142 )
6143 .expect("abhh");
6144 let exact_bbhh = cell_fourth_derivative_from_moments(
6145 cell,
6146 &dc_db,
6147 &dc_db,
6148 &coeff_h,
6149 &coeff_h,
6150 &dc_dbb,
6151 &coeff_bh,
6152 &coeff_bh,
6153 &coeff_bh,
6154 &coeff_bh,
6155 &zero,
6156 &zero,
6157 &zero,
6158 &zero,
6159 &zero,
6160 &zero,
6161 &state.moments,
6162 )
6163 .expect("bbhh");
6164 let exact_bhhh = cell_fourth_derivative_from_moments(
6165 cell,
6166 &dc_db,
6167 &coeff_h,
6168 &coeff_h,
6169 &coeff_h,
6170 &coeff_bh,
6171 &coeff_bh,
6172 &coeff_bh,
6173 &zero,
6174 &zero,
6175 &zero,
6176 &zero,
6177 &zero,
6178 &zero,
6179 &zero,
6180 &zero,
6181 &state.moments,
6182 )
6183 .expect("bhhh");
6184 let exact_hhhh = cell_fourth_derivative_from_moments(
6185 cell,
6186 &coeff_h,
6187 &coeff_h,
6188 &coeff_h,
6189 &coeff_h,
6190 &zero,
6191 &zero,
6192 &zero,
6193 &zero,
6194 &zero,
6195 &zero,
6196 &zero,
6197 &zero,
6198 &zero,
6199 &zero,
6200 &zero,
6201 &state.moments,
6202 )
6203 .expect("hhhh");
6204
6205 let numeric_h = simpson_integral(cell.left, cell.right, 5000, |z| {
6206 eta_h(z) * (-cell.q(z)).exp() * INV_TWO_PI
6207 });
6208 let numeric_ah = simpson_integral(cell.left, cell.right, 5000, |z| {
6209 (-cell.eta(z) * eta_a(z) * eta_h(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6210 });
6211 let numeric_bh = simpson_integral(cell.left, cell.right, 5000, |z| {
6212 (eta_bh(z) - cell.eta(z) * eta_b(z) * eta_h(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6213 });
6214 let numeric_hh = simpson_integral(cell.left, cell.right, 5000, |z| {
6215 (-cell.eta(z) * eta_h(z) * eta_h(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6216 });
6217 let numeric_abh = simpson_integral(cell.left, cell.right, 5000, |z| {
6218 let eta = cell.eta(z);
6219 (-(eta * (eta_ab(z) * eta_h(z) + eta_bh(z) * eta_a(z)))
6220 + (eta * eta - 1.0) * eta_a(z) * eta_b(z) * eta_h(z))
6221 * (-cell.q(z)).exp()
6222 * INV_TWO_PI
6223 });
6224 let numeric_bbh = simpson_integral(cell.left, cell.right, 5000, |z| {
6225 let eta = cell.eta(z);
6226 (-(eta * (eta_bb(z) * eta_h(z) + 2.0 * eta_bh(z) * eta_b(z)))
6227 + (eta * eta - 1.0) * eta_b(z) * eta_b(z) * eta_h(z))
6228 * (-cell.q(z)).exp()
6229 * INV_TWO_PI
6230 });
6231 let numeric_bhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6232 let eta = cell.eta(z);
6233 (-(2.0 * eta * eta_bh(z) * eta_h(z))
6234 + (eta * eta - 1.0) * eta_b(z) * eta_h(z) * eta_h(z))
6235 * (-cell.q(z)).exp()
6236 * INV_TWO_PI
6237 });
6238 let numeric_hhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6239 let eta = cell.eta(z);
6240 ((eta * eta - 1.0) * eta_h(z) * eta_h(z) * eta_h(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6241 });
6242 let numeric_bbbh = simpson_integral(cell.left, cell.right, 5000, |z| {
6243 let eta = cell.eta(z);
6244 let b_z = eta_b(z);
6245 let h_z = eta_h(z);
6246 let bb_z = eta_bb(z);
6247 let bh_z = eta_bh(z);
6248 (-(eta * ((dc_dbbb[3] * z * z * z) * h_z + 3.0 * bb_z * bh_z))
6249 + (eta * eta - 1.0) * (3.0 * bb_z * b_z * h_z + 3.0 * bh_z * b_z * b_z)
6250 + (-eta * eta * eta + 3.0 * eta) * b_z * b_z * b_z * h_z)
6251 * (-cell.q(z)).exp()
6252 * INV_TWO_PI
6253 });
6254 let numeric_aahh = simpson_integral(cell.left, cell.right, 5000, |z| {
6255 let eta = cell.eta(z);
6256 let a_z = eta_a(z);
6257 let h_z = eta_h(z);
6258 ((eta * eta - 1.0) * polynomial_value(&dc_daa, z) * h_z * h_z
6259 + (-eta * eta * eta + 3.0 * eta) * a_z * a_z * h_z * h_z)
6260 * (-cell.q(z)).exp()
6261 * INV_TWO_PI
6262 });
6263 let numeric_abhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6264 let eta = cell.eta(z);
6265 let a_z = eta_a(z);
6266 let b_z = eta_b(z);
6267 let h_z = eta_h(z);
6268 ((eta * eta - 1.0) * (eta_ab(z) * h_z * h_z + 2.0 * eta_bh(z) * a_z * h_z)
6269 + (-eta * eta * eta + 3.0 * eta) * a_z * b_z * h_z * h_z)
6270 * (-cell.q(z)).exp()
6271 * INV_TWO_PI
6272 });
6273 let numeric_bbhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6274 let eta = cell.eta(z);
6275 let b_z = eta_b(z);
6276 let h_z = eta_h(z);
6277 let bh_z = eta_bh(z);
6278 (-(2.0 * eta * bh_z * bh_z)
6279 + (eta * eta - 1.0) * (eta_bb(z) * h_z * h_z + 4.0 * bh_z * b_z * h_z)
6280 + (-eta * eta * eta + 3.0 * eta) * b_z * b_z * h_z * h_z)
6281 * (-cell.q(z)).exp()
6282 * INV_TWO_PI
6283 });
6284 let numeric_bhhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6285 let eta = cell.eta(z);
6286 let h_z = eta_h(z);
6287 (-(eta * (3.0 * eta_bh(z) * h_z * h_z))
6288 + (eta * eta - 1.0) * (3.0 * eta_bh(z) * h_z * h_z)
6289 + (-eta * eta * eta + 3.0 * eta) * eta_b(z) * h_z * h_z * h_z)
6290 * (-cell.q(z)).exp()
6291 * INV_TWO_PI
6292 });
6293 let numeric_hhhh = simpson_integral(cell.left, cell.right, 5000, |z| {
6294 let eta = cell.eta(z);
6295 let h_z = eta_h(z);
6296 ((-eta * eta * eta + 3.0 * eta) * h_z * h_z * h_z * h_z)
6297 * (-cell.q(z)).exp()
6298 * INV_TWO_PI
6299 });
6300
6301 assert!((exact_h - numeric_h).abs() < 1e-8);
6302 assert!((exact_ah - numeric_ah).abs() < 1e-7);
6303 assert!((exact_bh - numeric_bh).abs() < 1e-7);
6304 assert!((exact_hh - numeric_hh).abs() < 1e-7);
6305 assert!((exact_abh - numeric_abh).abs() < 2e-6);
6306 assert!((exact_bbh - numeric_bbh).abs() < 2e-6);
6307 assert!((exact_bhh - numeric_bhh).abs() < 2e-6);
6308 assert!((exact_hhh - numeric_hhh).abs() < 2e-6);
6309 assert!((exact_bbbh - numeric_bbbh).abs() < 3e-6);
6310 assert!((exact_aahh - numeric_aahh).abs() < 3e-6);
6311 assert!((exact_abhh - numeric_abhh).abs() < 3e-6);
6312 assert!((exact_bbhh - numeric_bbhh).abs() < 3e-6);
6313 assert!((exact_bhhh - numeric_bhhh).abs() < 3e-6);
6314 assert!((exact_hhhh - numeric_hhhh).abs() < 3e-6);
6315 }
6316
6317 #[test]
6318 fn cross_basis_cell_derivatives_match_exact_integrands() {
6319 let score_span = LocalSpanCubic {
6320 left: -0.75,
6321 right: 0.25,
6322 c0: 0.08,
6323 c1: -0.03,
6324 c2: 0.02,
6325 c3: -0.01,
6326 };
6327 let score_basis_span = LocalSpanCubic {
6328 left: -0.75,
6329 right: 0.25,
6330 c0: -0.04,
6331 c1: 0.06,
6332 c2: -0.01,
6333 c3: 0.02,
6334 };
6335 let link_span = LocalSpanCubic {
6336 left: -0.6,
6337 right: 0.9,
6338 c0: -0.05,
6339 c1: 0.04,
6340 c2: -0.02,
6341 c3: 0.015,
6342 };
6343 let link_basis_span = LocalSpanCubic {
6344 left: -0.6,
6345 right: 0.9,
6346 c0: 0.02,
6347 c1: -0.01,
6348 c2: 0.03,
6349 c3: -0.02,
6350 };
6351 let a = 0.3;
6352 let b = -0.7;
6353 let coeffs = denested_cell_coefficients(score_span, link_span, a, b);
6354 let cell = DenestedCubicCell {
6355 left: score_span.left,
6356 right: score_span.right,
6357 c0: coeffs[0],
6358 c1: coeffs[1],
6359 c2: coeffs[2],
6360 c3: coeffs[3],
6361 };
6362 let state = evaluate_cell_moments(cell, 24).expect("cell moments");
6363 let (dc_da, dc_db) = denested_cell_coefficient_partials(score_span, link_span, a, b);
6364 let (dc_daa, dc_dab, _) = denested_cell_second_partials(score_span, link_span, a, b);
6365
6366 let coeff_h = score_basis_cell_coefficients(score_basis_span, b);
6367 let coeff_bh = score_basis_cell_coefficients(score_basis_span, 1.0);
6368 let coeff_w = link_basis_cell_coefficients(link_basis_span, a, b);
6369 let (coeff_aw, coeff_bw) = link_basis_cell_coefficient_partials(link_basis_span, a, b);
6370 let (coeff_aaw, coeff_abw, _) = link_basis_cell_second_partials(link_basis_span, a, b);
6371 let zero = [0.0; 4];
6372
6373 let eta_a = |z: f64| 1.0 + link_span.first_derivative(a + b * z);
6374 let eta_b = |z: f64| z + score_span.evaluate(z) + z * link_span.first_derivative(a + b * z);
6375 let eta_h = |z: f64| b * score_basis_span.evaluate(z);
6376 let eta_bh = |z: f64| score_basis_span.evaluate(z);
6377 let eta_w = |z: f64| link_basis_span.evaluate(a + b * z);
6378 let eta_ab = |z: f64| z * link_span.second_derivative(a + b * z);
6379 let eta_aw = |z: f64| link_basis_span.first_derivative(a + b * z);
6380 let eta_bw = |z: f64| z * link_basis_span.first_derivative(a + b * z);
6381
6382 let exact_hw =
6383 cell_second_derivative_from_moments(cell, &coeff_h, &coeff_w, &zero, &state.moments)
6384 .expect("hw");
6385 let exact_ahw = cell_third_derivative_from_moments(
6386 cell,
6387 &dc_da,
6388 &coeff_h,
6389 &coeff_w,
6390 &zero,
6391 &coeff_aw,
6392 &zero,
6393 &zero,
6394 &state.moments,
6395 )
6396 .expect("ahw");
6397 let exact_bhw = cell_third_derivative_from_moments(
6398 cell,
6399 &dc_db,
6400 &coeff_h,
6401 &coeff_w,
6402 &coeff_bh,
6403 &coeff_bw,
6404 &zero,
6405 &zero,
6406 &state.moments,
6407 )
6408 .expect("bhw");
6409 let exact_hhw = cell_third_derivative_from_moments(
6410 cell,
6411 &coeff_h,
6412 &coeff_h,
6413 &coeff_w,
6414 &zero,
6415 &zero,
6416 &zero,
6417 &zero,
6418 &state.moments,
6419 )
6420 .expect("hhw");
6421 let exact_hww = cell_third_derivative_from_moments(
6422 cell,
6423 &coeff_h,
6424 &coeff_w,
6425 &coeff_w,
6426 &zero,
6427 &zero,
6428 &zero,
6429 &zero,
6430 &state.moments,
6431 )
6432 .expect("hww");
6433 let exact_aahw = cell_fourth_derivative_from_moments(
6434 cell,
6435 &dc_da,
6436 &dc_da,
6437 &coeff_h,
6438 &coeff_w,
6439 &dc_daa,
6440 &zero,
6441 &coeff_aw,
6442 &zero,
6443 &coeff_aw,
6444 &zero,
6445 &zero,
6446 &coeff_aaw,
6447 &zero,
6448 &zero,
6449 &zero,
6450 &state.moments,
6451 )
6452 .expect("aahw");
6453 let exact_hhww = cell_fourth_derivative_from_moments(
6454 cell,
6455 &coeff_h,
6456 &coeff_h,
6457 &coeff_w,
6458 &coeff_w,
6459 &zero,
6460 &zero,
6461 &zero,
6462 &zero,
6463 &zero,
6464 &zero,
6465 &zero,
6466 &zero,
6467 &zero,
6468 &zero,
6469 &zero,
6470 &state.moments,
6471 )
6472 .expect("hhww");
6473 let exact_hhhw = cell_fourth_derivative_from_moments(
6474 cell,
6475 &coeff_h,
6476 &coeff_h,
6477 &coeff_h,
6478 &coeff_w,
6479 &zero,
6480 &zero,
6481 &zero,
6482 &zero,
6483 &zero,
6484 &zero,
6485 &zero,
6486 &zero,
6487 &zero,
6488 &zero,
6489 &zero,
6490 &state.moments,
6491 )
6492 .expect("hhhw");
6493 let exact_abhw = cell_fourth_derivative_from_moments(
6494 cell,
6495 &dc_da,
6496 &dc_db,
6497 &coeff_h,
6498 &coeff_w,
6499 &dc_dab,
6500 &zero,
6501 &coeff_aw,
6502 &coeff_bh,
6503 &coeff_bw,
6504 &zero,
6505 &zero,
6506 &coeff_abw,
6507 &zero,
6508 &zero,
6509 &zero,
6510 &state.moments,
6511 )
6512 .expect("abhw");
6513 let exact_ahww = cell_fourth_derivative_from_moments(
6514 cell,
6515 &dc_da,
6516 &coeff_h,
6517 &coeff_w,
6518 &coeff_w,
6519 &zero,
6520 &coeff_aw,
6521 &coeff_aw,
6522 &zero,
6523 &zero,
6524 &zero,
6525 &zero,
6526 &zero,
6527 &zero,
6528 &zero,
6529 &zero,
6530 &state.moments,
6531 )
6532 .expect("ahww");
6533 let exact_bhww = cell_fourth_derivative_from_moments(
6534 cell,
6535 &dc_db,
6536 &coeff_h,
6537 &coeff_w,
6538 &coeff_w,
6539 &coeff_bh,
6540 &coeff_bw,
6541 &coeff_bw,
6542 &zero,
6543 &zero,
6544 &zero,
6545 &zero,
6546 &zero,
6547 &zero,
6548 &zero,
6549 &zero,
6550 &state.moments,
6551 )
6552 .expect("bhww");
6553 let exact_hwww = cell_fourth_derivative_from_moments(
6554 cell,
6555 &coeff_h,
6556 &coeff_w,
6557 &coeff_w,
6558 &coeff_w,
6559 &zero,
6560 &zero,
6561 &zero,
6562 &zero,
6563 &zero,
6564 &zero,
6565 &zero,
6566 &zero,
6567 &zero,
6568 &zero,
6569 &zero,
6570 &state.moments,
6571 )
6572 .expect("hwww");
6573
6574 let numeric_hw = simpson_integral(cell.left, cell.right, 5000, |z| {
6575 (-cell.eta(z) * eta_h(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6576 });
6577 let numeric_ahw = simpson_integral(cell.left, cell.right, 5000, |z| {
6578 let eta = cell.eta(z);
6579 (-(eta * eta_aw(z) * eta_h(z)) + (eta * eta - 1.0) * eta_a(z) * eta_h(z) * eta_w(z))
6580 * (-cell.q(z)).exp()
6581 * INV_TWO_PI
6582 });
6583 let numeric_bhw = simpson_integral(cell.left, cell.right, 5000, |z| {
6584 let eta = cell.eta(z);
6585 (-(eta * (eta_bh(z) * eta_w(z) + eta_bw(z) * eta_h(z)))
6586 + (eta * eta - 1.0) * eta_b(z) * eta_h(z) * eta_w(z))
6587 * (-cell.q(z)).exp()
6588 * INV_TWO_PI
6589 });
6590 let numeric_hhw = simpson_integral(cell.left, cell.right, 5000, |z| {
6591 let eta = cell.eta(z);
6592 ((eta * eta - 1.0) * eta_h(z) * eta_h(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6593 });
6594 let numeric_hww = simpson_integral(cell.left, cell.right, 5000, |z| {
6595 let eta = cell.eta(z);
6596 ((eta * eta - 1.0) * eta_h(z) * eta_w(z) * eta_w(z)) * (-cell.q(z)).exp() * INV_TWO_PI
6597 });
6598 let numeric_aahw = simpson_integral(cell.left, cell.right, 5000, |z| {
6599 let eta = cell.eta(z);
6600 (-(eta * polynomial_value(&coeff_aaw, z) * eta_h(z))
6601 + (eta * eta - 1.0)
6602 * (polynomial_value(&dc_daa, z) * eta_h(z) * eta_w(z)
6603 + 2.0 * eta_aw(z) * eta_a(z) * eta_h(z))
6604 + (-eta * eta * eta + 3.0 * eta) * eta_a(z) * eta_a(z) * eta_h(z) * eta_w(z))
6605 * (-cell.q(z)).exp()
6606 * INV_TWO_PI
6607 });
6608 let numeric_hhww = simpson_integral(cell.left, cell.right, 5000, |z| {
6609 let eta = cell.eta(z);
6610 ((-eta * eta * eta + 3.0 * eta) * eta_h(z) * eta_h(z) * eta_w(z) * eta_w(z))
6611 * (-cell.q(z)).exp()
6612 * INV_TWO_PI
6613 });
6614 let numeric_hhhw = simpson_integral(cell.left, cell.right, 5000, |z| {
6615 let eta = cell.eta(z);
6616 ((-eta * eta * eta + 3.0 * eta) * eta_h(z) * eta_h(z) * eta_h(z) * eta_w(z))
6617 * (-cell.q(z)).exp()
6618 * INV_TWO_PI
6619 });
6620 let numeric_abhw = simpson_integral(cell.left, cell.right, 5000, |z| {
6621 let eta = cell.eta(z);
6622 (-(eta * polynomial_value(&coeff_abw, z) * eta_h(z) + eta * eta_aw(z) * eta_bh(z))
6623 + (eta * eta - 1.0)
6624 * (eta_ab(z) * eta_h(z) * eta_w(z)
6625 + eta_aw(z) * eta_b(z) * eta_h(z)
6626 + eta_bh(z) * eta_a(z) * eta_w(z)
6627 + eta_bw(z) * eta_a(z) * eta_h(z))
6628 + (-eta * eta * eta + 3.0 * eta) * eta_a(z) * eta_b(z) * eta_h(z) * eta_w(z))
6629 * (-cell.q(z)).exp()
6630 * INV_TWO_PI
6631 });
6632 let numeric_ahww = simpson_integral(cell.left, cell.right, 5000, |z| {
6633 let eta = cell.eta(z);
6634 (2.0 * (eta * eta - 1.0) * eta_aw(z) * eta_h(z) * eta_w(z)
6635 + (-eta * eta * eta + 3.0 * eta) * eta_a(z) * eta_h(z) * eta_w(z) * eta_w(z))
6636 * (-cell.q(z)).exp()
6637 * INV_TWO_PI
6638 });
6639 let numeric_bhww = simpson_integral(cell.left, cell.right, 5000, |z| {
6640 let eta = cell.eta(z);
6641 let h_z = eta_h(z);
6642 let w_z = eta_w(z);
6643 ((eta * eta - 1.0) * (eta_bh(z) * w_z * w_z + 2.0 * eta_bw(z) * h_z * w_z)
6644 + (-eta * eta * eta + 3.0 * eta) * eta_b(z) * h_z * w_z * w_z)
6645 * (-cell.q(z)).exp()
6646 * INV_TWO_PI
6647 });
6648 let numeric_hwww = simpson_integral(cell.left, cell.right, 5000, |z| {
6649 let eta = cell.eta(z);
6650 ((-eta * eta * eta + 3.0 * eta) * eta_h(z) * eta_w(z) * eta_w(z) * eta_w(z))
6651 * (-cell.q(z)).exp()
6652 * INV_TWO_PI
6653 });
6654
6655 assert!((exact_hw - numeric_hw).abs() < 1e-7);
6656 assert!((exact_ahw - numeric_ahw).abs() < 2e-6);
6657 assert!((exact_bhw - numeric_bhw).abs() < 2e-6);
6658 assert!((exact_hhw - numeric_hhw).abs() < 2e-6);
6659 assert!((exact_hww - numeric_hww).abs() < 2e-6);
6660 assert!((exact_aahw - numeric_aahw).abs() < 3e-6);
6661 assert!((exact_hhww - numeric_hhww).abs() < 3e-6);
6662 assert!((exact_hhhw - numeric_hhhw).abs() < 3e-6);
6663 assert!((exact_abhw - numeric_abhw).abs() < 3e-6);
6664 assert!((exact_ahww - numeric_ahww).abs() < 3e-6);
6665 assert!((exact_bhww - numeric_bhww).abs() < 3e-6);
6666 assert!((exact_hwww - numeric_hwww).abs() < 3e-6);
6667 }
6668
6669 #[test]
6670 fn cell_moment_scratch_reuses_buffers_under_margslope_like_pressure() {
6671 let cells = [
6672 DenestedCubicCell {
6673 left: -1.2,
6674 right: -0.35,
6675 c0: 0.18,
6676 c1: 0.72,
6677 c2: -0.045,
6678 c3: 0.018,
6679 },
6680 DenestedCubicCell {
6681 left: -0.35,
6682 right: 0.48,
6683 c0: -0.08,
6684 c1: 0.91,
6685 c2: 0.038,
6686 c3: -0.014,
6687 },
6688 DenestedCubicCell {
6689 left: 0.48,
6690 right: 1.4,
6691 c0: 0.11,
6692 c1: 0.83,
6693 c2: 0.022,
6694 c3: 0.012,
6695 },
6696 ];
6697 let mut scratch = CellMomentScratch::with_capacity(MAX_AFFINE_ANCHOR_DEGREE);
6698 for cell in cells {
6699 let baseline = evaluate_cell_moments(cell, 9).expect("baseline moments");
6700 let scratch_state =
6701 evaluate_cell_moments_with_scratch(cell, 9, &mut scratch).expect("scratch moments");
6702 assert_eq!(baseline.branch, scratch_state.branch);
6703 assert!((baseline.value - scratch_state.value).abs() <= 1e-10);
6704 assert_eq!(baseline.moments.len(), scratch_state.moments.len());
6705 for (lhs, rhs) in baseline.moments.iter().zip(scratch_state.moments.iter()) {
6706 assert!((lhs - rhs).abs() <= 1e-10, "{lhs} vs {rhs}");
6707 }
6708 }
6709
6710 reset_cell_moment_test_reallocs();
6711 let mut checksum = 0.0;
6712 for i in 0..5_000 {
6713 let cell = cells[i % cells.len()];
6714 let state = evaluate_cell_moments_with_scratch(cell, 9, &mut scratch)
6715 .expect("scratch moments under repeated pressure");
6716 checksum += state.value + state.moments[0] * 1e-12;
6717 }
6718 assert!(checksum.is_finite());
6719 assert_eq!(
6720 cell_moment_test_reallocs(),
6721 0,
6722 "scratch-backed inner cell-moment calls should not grow Vec buffers"
6723 );
6724 }
6725
6726 #[test]
6727 fn evaluate_cell_moments_matches_numeric_integrals() {
6728 let cell = DenestedCubicCell {
6729 left: -0.9,
6730 right: 0.8,
6731 c0: 0.15,
6732 c1: -0.35,
6733 c2: 0.11,
6734 c3: -0.07,
6735 };
6736 let state = evaluate_cell_moments(cell, 6).expect("cell moments");
6737 let value_numeric = simpson_integral(cell.left, cell.right, 4000, |z| {
6738 super::normal_cdf(cell.eta(z)) * normal_pdf(z)
6739 });
6740 assert!((state.value - value_numeric).abs() < 1e-9);
6741 for degree in 0..=6 {
6742 let target = simpson_integral(cell.left, cell.right, 4000, |z| {
6743 z.powi(degree as i32) * (-cell.q(z)).exp()
6744 });
6745 assert!((state.moments[degree] - target).abs() < 1e-9);
6746 }
6747 }
6748
6749 #[test]
6750 fn partition_builder_moves_link_preimages_with_intercept() {
6751 let score_breaks = [-2.0, -1.0, 0.0, 1.0, 2.0];
6752 let link_breaks = [-1.5, -0.5, 0.5, 1.5];
6753 let score_span = |z: f64| {
6754 let left = if z < -1.0 {
6755 -2.0
6756 } else if z < 0.0 {
6757 -1.0
6758 } else if z < 1.0 {
6759 0.0
6760 } else {
6761 1.0
6762 };
6763 Ok(LocalSpanCubic {
6764 left,
6765 right: left + 1.0,
6766 c0: 0.1,
6767 c1: 0.2,
6768 c2: 0.0,
6769 c3: 0.0,
6770 })
6771 };
6772 let link_span = |u: f64| {
6773 let left = if u < -0.5 {
6774 -1.5
6775 } else if u < 0.5 {
6776 -0.5
6777 } else {
6778 0.5
6779 };
6780 Ok(LocalSpanCubic {
6781 left,
6782 right: left + 1.0,
6783 c0: -0.05,
6784 c1: 0.1,
6785 c2: 0.0,
6786 c3: 0.0,
6787 })
6788 };
6789 let cells_a0 = build_denested_partition_cells(
6790 0.25,
6791 0.9,
6792 &score_breaks,
6793 &link_breaks,
6794 score_span,
6795 link_span,
6796 )
6797 .expect("cells a0");
6798 let cells_a1 = build_denested_partition_cells(
6799 0.55,
6800 0.9,
6801 &score_breaks,
6802 &link_breaks,
6803 score_span,
6804 link_span,
6805 )
6806 .expect("cells a1");
6807 assert!(cells_a0.len() >= score_breaks.len() - 1);
6808 assert!(
6809 cells_a0
6810 .windows(2)
6811 .all(|w| (w[0].cell.right - w[1].cell.left).abs() <= 1e-12)
6812 );
6813 assert!(
6814 cells_a0
6815 .iter()
6816 .zip(cells_a1.iter())
6817 .any(|(lhs, rhs)| (lhs.cell.left - rhs.cell.left).abs() > 1e-10)
6818 );
6819 assert!(cells_a0.first().unwrap().cell.left.is_infinite());
6820 assert!(cells_a0.last().unwrap().cell.right.is_infinite());
6821 }
6822
6823 #[test]
6824 fn partition_builder_without_breaks_returns_single_global_cell() {
6825 let cells = build_denested_partition_cells_with_tails(
6826 0.3,
6827 -0.4,
6828 &[],
6829 &[],
6830 |z| {
6831 if z.is_nan() {
6832 return Err("probe z is NaN".to_string());
6833 }
6834 Ok(LocalSpanCubic {
6835 left: 0.0,
6836 right: 1.0,
6837 c0: 0.0,
6838 c1: 0.0,
6839 c2: 0.0,
6840 c3: 0.0,
6841 })
6842 },
6843 |u| {
6844 if u.is_nan() {
6845 return Err("probe u is NaN".to_string());
6846 }
6847 Ok(LocalSpanCubic {
6848 left: 0.0,
6849 right: 1.0,
6850 c0: 0.0,
6851 c1: 0.0,
6852 c2: 0.0,
6853 c3: 0.0,
6854 })
6855 },
6856 )
6857 .expect("global cell");
6858 assert_eq!(cells.len(), 1);
6859 assert_eq!(cells[0].cell.left, f64::NEG_INFINITY);
6860 assert_eq!(cells[0].cell.right, f64::INFINITY);
6861 assert!(cells[0].cell.c2.abs() < 1e-12);
6862 assert!(cells[0].cell.c3.abs() < 1e-12);
6863 }
6864
6865 #[test]
6866 fn polynomial_integral_helper_matches_moment_sum() {
6867 let cell = DenestedCubicCell {
6868 left: -1.5,
6869 right: 1.25,
6870 c0: 0.2,
6871 c1: -0.4,
6872 c2: 0.15,
6873 c3: 0.03,
6874 };
6875 let state = evaluate_cell_moments(cell, 8).expect("cell moments");
6876 let coeffs = [1.5, -0.25, 0.75, 0.1];
6877 let expected = INV_TWO_PI
6878 * coeffs
6879 .iter()
6880 .enumerate()
6881 .map(|(idx, coeff)| coeff * state.moments[idx])
6882 .sum::<f64>();
6883 let got = cell_polynomial_integral_from_moments(&coeffs, &state.moments, "test poly")
6884 .expect("poly integral");
6885 assert!((got - expected).abs() < 1e-14);
6886 }
6887
6888 #[test]
6889 fn batched_cell_moment_max_degree_matches_direct_non_affine_grid() {
6890 let cells = [
6891 DenestedCubicCell {
6892 left: -2.0,
6893 right: -0.25,
6894 c0: -0.7,
6895 c1: 0.8,
6896 c2: 0.015,
6897 c3: -0.004,
6898 },
6899 DenestedCubicCell {
6900 left: -0.5,
6901 right: 0.75,
6902 c0: 0.2,
6903 c1: -0.35,
6904 c2: -0.025,
6905 c3: 0.0,
6906 },
6907 DenestedCubicCell {
6908 left: 0.1,
6909 right: 1.6,
6910 c0: 0.4,
6911 c1: 0.25,
6912 c2: 0.01,
6913 c3: 0.006,
6914 },
6915 DenestedCubicCell {
6916 left: -1.25,
6917 right: 2.25,
6918 c0: -0.1,
6919 c1: 0.55,
6920 c2: -0.012,
6921 c3: 0.003,
6922 },
6923 ];
6924 for cell in cells {
6925 let branch = branch_cell(cell).expect("branch");
6926 if branch == ExactCellBranch::Affine {
6927 continue;
6928 }
6929 let batched =
6930 evaluate_non_affine_cell_state(cell, branch, 21).expect("degree-21 state");
6931 for degree in [9usize, 15, 21] {
6932 let direct =
6933 evaluate_non_affine_cell_state(cell, branch, degree).expect("direct state");
6934 assert_eq!(batched.branch, direct.branch);
6935 let denom = direct.value.abs().max(1.0);
6936 assert!(((batched.value - direct.value).abs() / denom) < 1e-10);
6937 for k in 0..=degree {
6938 let denom = direct.moments[k].abs().max(1.0);
6939 let rel = (batched.moments[k] - direct.moments[k]).abs() / denom;
6940 assert!(
6941 rel < 1e-10,
6942 "cell={cell:?} degree={degree} moment={k} rel={rel:e}"
6943 );
6944 }
6945 }
6946 }
6947 }
6948
6949 #[test]
6950 fn derivative_moment_evaluator_matches_value_evaluator_moments() {
6951 let cells = [
6952 DenestedCubicCell {
6953 left: -2.0,
6954 right: -0.4,
6955 c0: 0.15,
6956 c1: -0.8,
6957 c2: 0.0,
6958 c3: 0.0,
6959 },
6960 DenestedCubicCell {
6961 left: -0.75,
6962 right: 1.4,
6963 c0: -0.25,
6964 c1: 0.6,
6965 c2: 0.12,
6966 c3: 0.0,
6967 },
6968 DenestedCubicCell {
6969 left: -1.1,
6970 right: 0.9,
6971 c0: 0.35,
6972 c1: -0.3,
6973 c2: 0.05,
6974 c3: -0.015,
6975 },
6976 ];
6977 for cell in cells {
6978 for degree in [4usize, 9, 15, 21] {
6979 let full = evaluate_cell_moments_uncached(cell, degree).expect("full moments");
6980 let derivative = evaluate_cell_derivative_moments_uncached(cell, degree)
6981 .expect("derivative moments");
6982 assert_eq!(full.branch, derivative.branch);
6983 assert_eq!(full.moments.len(), derivative.moments.len());
6984 for k in 0..full.moments.len() {
6985 assert_eq!(full.moments[k].to_bits(), derivative.moments[k].to_bits());
6986 }
6987 }
6988 }
6989 }
6990
6991 #[test]
6992 fn cell_moment_lru_matches_uncached_non_affine_grid() {
6993 let cache = CellMomentLruCache::new(16 * 1024 * 1024);
6994 let stats = CellMomentCacheStats::default();
6995 let c0s = [-0.75, 0.0, 0.5];
6996 let c1s = [-1.2, 0.25, 1.1];
6997 let c2s = [-0.18, 0.07];
6998 let c3s = [0.0, 0.025];
6999 let bounds = [(-2.0, -0.5), (-0.25, 1.5)];
7000 let degrees = [4usize, 9, 15, 21];
7001 for &c0 in &c0s {
7002 for &c1 in &c1s {
7003 for &c2 in &c2s {
7004 for &c3 in &c3s {
7005 for &(left, right) in &bounds {
7006 for &max_degree in °rees {
7007 let cell = DenestedCubicCell {
7008 left,
7009 right,
7010 c0,
7011 c1,
7012 c2,
7013 c3,
7014 };
7015 let branch = branch_cell(cell).expect("branch");
7016 if branch == ExactCellBranch::Affine {
7017 continue;
7018 }
7019 let expected =
7020 evaluate_non_affine_cell_state(cell, branch, max_degree)
7021 .expect("uncached non-affine moments");
7022 let got = evaluate_cell_moments_cached(
7023 cell,
7024 max_degree,
7025 &cache,
7026 Some(&stats),
7027 )
7028 .expect("cached moments");
7029 assert_eq!(got.branch, expected.branch);
7030 assert_eq!(got.moments.len(), max_degree + 1);
7031 let denom = expected.value.abs().max(1.0);
7032 assert!(
7033 ((got.value - expected.value).abs() / denom) < 1e-10,
7034 "value mismatch for {cell:?} degree {max_degree}: got {} expected {}",
7035 got.value,
7036 expected.value
7037 );
7038 for (idx, (&lhs, &rhs)) in
7039 got.moments.iter().zip(expected.moments.iter()).enumerate()
7040 {
7041 let denom = rhs.abs().max(1.0);
7042 assert!(
7043 ((lhs - rhs).abs() / denom) < 1e-10,
7044 "moment {idx} mismatch for {cell:?} degree {max_degree}: got {lhs} expected {rhs}"
7045 );
7046 }
7047 let warm = evaluate_cell_moments_cached(
7048 cell,
7049 max_degree,
7050 &cache,
7051 Some(&stats),
7052 )
7053 .expect("warm cached moments");
7054 assert_eq!(warm, got);
7055 }
7056 }
7057 }
7058 }
7059 }
7060 }
7061 let (hits, misses) = stats.snapshot();
7062 assert!(hits > 0, "expected warm LRU hits");
7063 assert!(misses > 0, "expected cold LRU misses");
7064 }
7065
7066 #[test]
7067 fn cell_moment_fingerprint_exact_cache_matches_current_evaluator() {
7068 let cells = [
7069 DenestedCubicCell {
7070 left: -1.75,
7071 right: -0.25,
7072 c0: 0.15,
7073 c1: -0.35,
7074 c2: 0.08,
7075 c3: -0.015,
7076 },
7077 DenestedCubicCell {
7078 left: -0.5,
7079 right: 0.8,
7080 c0: -0.2,
7081 c1: 0.45,
7082 c2: -0.12,
7083 c3: 0.025,
7084 },
7085 DenestedCubicCell {
7086 left: 0.1,
7087 right: 1.6,
7088 c0: 0.05,
7089 c1: 0.2,
7090 c2: 0.03,
7091 c3: 0.004,
7092 },
7093 ];
7094 let mut cache = std::collections::HashMap::new();
7095 for max_degree in [0usize, 3, 4, 9, 16] {
7096 for cell in cells {
7097 let baseline = evaluate_cell_moments(cell, max_degree).expect("baseline moments");
7098 let key = cell_moment_cache_key(cell, max_degree, 0.0);
7099 let cached = cache.entry(key).or_insert_with(|| {
7100 evaluate_cell_moments(cell, max_degree).expect("cached moments")
7101 });
7102 assert_eq!(baseline.branch, cached.branch);
7103 assert_eq!(baseline.value.to_bits(), cached.value.to_bits());
7104 assert_eq!(baseline.moments.len(), cached.moments.len());
7105 for (lhs, rhs) in baseline.moments.iter().zip(cached.moments.iter()) {
7106 assert_eq!(lhs.to_bits(), rhs.to_bits());
7107 }
7108 }
7109 }
7110 }
7111
7112 #[test]
7113 fn fuzzy_cell_moment_fingerprint_error_scales_with_epsilon() {
7114 for epsilon in [1e-8, 1e-6] {
7115 let base = DenestedCubicCell {
7116 left: -1.25,
7117 right: 1.1,
7118 c0: 0.1,
7119 c1: -0.25,
7120 c2: 0.04,
7121 c3: -0.006,
7122 };
7123 let perturbed = DenestedCubicCell {
7124 left: base.left + 0.001 * epsilon,
7125 right: base.right - 0.001 * epsilon,
7126 c0: base.c0 + 0.001 * epsilon,
7127 c1: base.c1 - 0.001 * epsilon,
7128 c2: base.c2 + 0.001 * epsilon,
7129 c3: base.c3 - 0.001 * epsilon,
7130 };
7131 assert_eq!(
7132 cell_moment_cache_key(base, 9, epsilon),
7133 cell_moment_cache_key(perturbed, 9, epsilon)
7134 );
7135 let lhs = evaluate_cell_moments(base, 9).expect("base moments");
7136 let rhs = evaluate_cell_moments(perturbed, 9).expect("perturbed moments");
7137 let max_rel = lhs
7138 .moments
7139 .iter()
7140 .zip(rhs.moments.iter())
7141 .map(|(a, b)| (a - b).abs() / a.abs().max(b.abs()).max(1.0))
7142 .fold(0.0_f64, f64::max);
7143 assert!(
7144 max_rel <= 10.0 * epsilon,
7145 "epsilon={epsilon:.1e} max_rel={max_rel:.3e}"
7146 );
7147 }
7148 }
7149
7150 #[test]
7158 fn non_affine_cell_state_matches_prefold_reference_to_1e_minus_13() {
7159 fn reference(
7163 cell: DenestedCubicCell,
7164 branch: ExactCellBranch,
7165 max_degree: usize,
7166 ) -> CellMomentState {
7167 let mut moments: CellMomentVec = smallvec![0.0_f64; max_degree + 1];
7168 let mut value_integral = 0.0_f64;
7169 let center = 0.5 * (cell.left + cell.right);
7170 let half_width = 0.5 * (cell.right - cell.left);
7171 for (&node, &weight) in GL_NODES.iter().zip(GL_WEIGHTS.iter()) {
7172 let z = center + half_width * node;
7173 let eta = cell.eta(z);
7174 let moment_weight = weight * (-cell.q(z)).exp();
7175 let mut z_pow = 1.0_f64;
7176 for moment in &mut moments {
7177 *moment = moment_weight.mul_add(z_pow, *moment);
7178 z_pow *= z;
7179 }
7180 value_integral += weight * (-0.5 * z * z).exp() * normal_cdf(eta);
7181 }
7182 for moment in &mut moments {
7183 *moment *= half_width;
7184 }
7185 CellMomentState {
7186 branch,
7187 value: value_integral * half_width / (std::f64::consts::TAU).sqrt(),
7188 moments,
7189 }
7190 }
7191
7192 let cells = [
7197 DenestedCubicCell {
7198 left: -1.25,
7199 right: -0.2,
7200 c0: -0.35,
7201 c1: 0.85,
7202 c2: 0.04,
7203 c3: -0.015,
7204 },
7205 DenestedCubicCell {
7206 left: -0.2,
7207 right: 0.55,
7208 c0: 0.12,
7209 c1: -0.65,
7210 c2: -0.025,
7211 c3: 0.02,
7212 },
7213 DenestedCubicCell {
7214 left: 0.55,
7215 right: 1.6,
7216 c0: 0.42,
7217 c1: 0.35,
7218 c2: 0.018,
7219 c3: 0.012,
7220 },
7221 DenestedCubicCell {
7222 left: -3.0,
7223 right: -1.0,
7224 c0: 1.7,
7225 c1: -0.4,
7226 c2: 0.11,
7227 c3: -0.07,
7228 },
7229 ];
7230 let degrees = [0_usize, 4, 9, 16, 24];
7231 for cell in cells {
7232 let branch = branch_cell(cell).expect("branch");
7233 assert_ne!(branch, ExactCellBranch::Affine);
7234 for max_degree in degrees {
7235 let actual = evaluate_non_affine_cell_state(cell, branch, max_degree)
7236 .expect("optimized non-affine");
7237 let expected = reference(cell, branch, max_degree);
7238 assert_eq!(actual.branch, expected.branch);
7239 assert_eq!(actual.moments.len(), expected.moments.len());
7240 let denom_v = expected.value.abs().max(1.0);
7241 let rel_v = (actual.value - expected.value).abs() / denom_v;
7242 let actual_v = actual.value;
7243 let expected_v = expected.value;
7244 assert!(
7245 rel_v <= 1e-13,
7246 "value rel mismatch for {cell:?} degree {max_degree}: \
7247 actual={actual_v:.17e} expected={expected_v:.17e} rel={rel_v:.3e}"
7248 );
7249 for (k, (lhs, rhs)) in actual
7250 .moments
7251 .iter()
7252 .zip(expected.moments.iter())
7253 .enumerate()
7254 {
7255 let denom = rhs.abs().max(1.0);
7256 let rel = (lhs - rhs).abs() / denom;
7257 assert!(
7258 rel <= 1e-13,
7259 "moment {k} rel mismatch for {cell:?} degree {max_degree}: \
7260 actual={lhs:.17e} expected={rhs:.17e} rel={rel:.3e}"
7261 );
7262 }
7263
7264 let actual_deriv =
7267 evaluate_non_affine_cell_derivative_state(cell, branch, max_degree)
7268 .expect("optimized derivative");
7269 for (k, (lhs, rhs)) in actual_deriv
7270 .moments
7271 .iter()
7272 .zip(expected.moments.iter())
7273 .enumerate()
7274 {
7275 let denom = rhs.abs().max(1.0);
7276 let rel = (lhs - rhs).abs() / denom;
7277 assert!(
7278 rel <= 1e-13,
7279 "deriv moment {k} rel mismatch for {cell:?} degree {max_degree}: \
7280 actual={lhs:.17e} expected={rhs:.17e} rel={rel:.3e}"
7281 );
7282 }
7283 }
7284 }
7285 }
7286
7287 #[test]
7293 fn third_derivative_kernel_matches_fd_of_second_with_eta_perturbation() {
7294 let base = DenestedCubicCell {
7296 left: -0.6,
7297 right: 0.9,
7298 c0: 0.30,
7299 c1: 0.45,
7300 c2: -0.20,
7301 c3: 0.12,
7302 };
7303 let eta_u = [0.11_f64, -0.07, 0.05, 0.02];
7306 let eta_v = [-0.09_f64, 0.13, -0.04, 0.03];
7307 let eta_t = [0.17_f64, 0.06, -0.10, 0.04]; let eta_uv = [0.02_f64, 0.01, -0.015, 0.005];
7310 let eta_ut = [-0.01_f64, 0.02, 0.007, -0.003];
7311 let eta_vt = [0.015_f64, -0.008, 0.01, 0.004];
7312 let eta_uvt = [0.003_f64, -0.002, 0.001, 0.0005];
7314
7315 let neg = |a: &[f64; 4]| a.map(|v| -v);
7316 let max_degree = 15usize;
7317
7318 let f_uv_at = |s: f64| -> f64 {
7325 let cell_s = DenestedCubicCell {
7326 c0: base.c0 + s * eta_t[0],
7327 c1: base.c1 + s * eta_t[1],
7328 c2: base.c2 + s * eta_t[2],
7329 c3: base.c3 + s * eta_t[3],
7330 ..base
7331 };
7332 let st = evaluate_cell_moments(cell_s, max_degree).unwrap();
7334 let neg_cell = DenestedCubicCell {
7335 c0: -cell_s.c0,
7336 c1: -cell_s.c1,
7337 c2: -cell_s.c2,
7338 c3: -cell_s.c3,
7339 ..cell_s
7340 };
7341 let u_s = [
7342 eta_u[0] + s * eta_ut[0],
7343 eta_u[1] + s * eta_ut[1],
7344 eta_u[2] + s * eta_ut[2],
7345 eta_u[3] + s * eta_ut[3],
7346 ];
7347 let v_s = [
7348 eta_v[0] + s * eta_vt[0],
7349 eta_v[1] + s * eta_vt[1],
7350 eta_v[2] + s * eta_vt[2],
7351 eta_v[3] + s * eta_vt[3],
7352 ];
7353 let uv_s = [
7354 eta_uv[0] + s * eta_uvt[0],
7355 eta_uv[1] + s * eta_uvt[1],
7356 eta_uv[2] + s * eta_uvt[2],
7357 eta_uv[3] + s * eta_uvt[3],
7358 ];
7359 cell_second_derivative_from_moments(
7360 neg_cell,
7361 &neg(&u_s),
7362 &neg(&v_s),
7363 &neg(&uv_s),
7364 &st.moments,
7365 )
7366 .unwrap()
7367 };
7368
7369 let h = 1e-5;
7370 let fd = (f_uv_at(h) - f_uv_at(-h)) / (2.0 * h);
7371
7372 let st0 = evaluate_cell_moments(base, max_degree).unwrap();
7375 let neg_cell0 = DenestedCubicCell {
7376 c0: -base.c0,
7377 c1: -base.c1,
7378 c2: -base.c2,
7379 c3: -base.c3,
7380 ..base
7381 };
7382 let analytic = cell_third_derivative_from_moments(
7383 neg_cell0,
7384 &neg(&eta_u),
7385 &neg(&eta_v),
7386 &neg(&eta_t),
7387 &neg(&eta_uv),
7388 &neg(&eta_ut),
7389 &neg(&eta_vt),
7390 &neg(&eta_uvt),
7391 &st0.moments,
7392 )
7393 .unwrap();
7394
7395 let denom = fd.abs().max(1e-3);
7396 let rel = (analytic - fd).abs() / denom;
7397 assert!(
7398 rel <= 1e-5,
7399 "third kernel vs FD-of-second mismatch: analytic={analytic:.12e} fd={fd:.12e} rel={rel:.3e}"
7400 );
7401 }
7402
7403 #[test]
7404 fn moving_shared_edge_second_integral_derivative_has_leibniz_jump_sign() {
7405 let edge0 = 0.2_f64;
7406 let edge_velocity = -0.37_f64;
7407
7408 let left_eta = [0.22_f64, -0.18, 0.09, 0.03];
7409 let right_eta = [-0.11_f64, 0.26, -0.04, 0.02];
7410 let left_r = [0.08_f64, -0.05, 0.03, 0.01];
7411 let left_s = [-0.06_f64, 0.04, 0.02, -0.015];
7412 let left_rs = [0.025_f64, -0.012, 0.006, 0.004];
7413 let right_r = [-0.03_f64, 0.07, -0.02, 0.012];
7414 let right_s = [0.05_f64, -0.025, 0.018, 0.007];
7415 let right_rs = [-0.018_f64, 0.014, -0.005, 0.003];
7416
7417 let integral_at = |shift: f64| -> f64 {
7418 let edge = edge0 + edge_velocity * shift;
7419 let left = DenestedCubicCell {
7420 left: -0.7,
7421 right: edge,
7422 c0: left_eta[0],
7423 c1: left_eta[1],
7424 c2: left_eta[2],
7425 c3: left_eta[3],
7426 };
7427 let right = DenestedCubicCell {
7428 left: edge,
7429 right: 1.1,
7430 c0: right_eta[0],
7431 c1: right_eta[1],
7432 c2: right_eta[2],
7433 c3: right_eta[3],
7434 };
7435 let left_state = evaluate_cell_moments(left, 12).expect("left moments");
7436 let right_state = evaluate_cell_moments(right, 12).expect("right moments");
7437 cell_second_derivative_from_moments(
7438 left,
7439 &left_r,
7440 &left_s,
7441 &left_rs,
7442 &left_state.moments,
7443 )
7444 .expect("left second")
7445 + cell_second_derivative_from_moments(
7446 right,
7447 &right_r,
7448 &right_s,
7449 &right_rs,
7450 &right_state.moments,
7451 )
7452 .expect("right second")
7453 };
7454
7455 let h = 1e-5;
7456 let fd = (integral_at(h) - integral_at(-h)) / (2.0 * h);
7457
7458 let left = DenestedCubicCell {
7459 left: -0.7,
7460 right: edge0,
7461 c0: left_eta[0],
7462 c1: left_eta[1],
7463 c2: left_eta[2],
7464 c3: left_eta[3],
7465 };
7466 let right = DenestedCubicCell {
7467 left: edge0,
7468 right: 1.1,
7469 c0: right_eta[0],
7470 c1: right_eta[1],
7471 c2: right_eta[2],
7472 c3: right_eta[3],
7473 };
7474 let f_left =
7475 cell_second_derivative_boundary_integrand(left, &left_r, &left_s, &left_rs, edge0);
7476 let f_right =
7477 cell_second_derivative_boundary_integrand(right, &right_r, &right_s, &right_rs, edge0);
7478 let analytic = edge_velocity * (f_left - f_right);
7479
7480 let denom = analytic.abs().max(1e-8);
7481 let rel = (fd - analytic).abs() / denom;
7482 assert!(
7483 rel <= 5e-8,
7484 "moving edge sign mismatch: fd={fd:.12e} analytic={analytic:.12e} rel={rel:.3e}"
7485 );
7486 }
7487
7488 #[test]
7489 fn moving_shared_edge_second_integral_mixed_derivative_has_full_leibniz_terms() {
7490 let edge0 = -0.15_f64;
7491 let edge_d1 = 0.31_f64;
7492 let edge_d2 = -0.27_f64;
7493 let edge_d12 = 0.19_f64;
7494
7495 let left_eta = [0.16_f64, -0.21, 0.07, -0.025];
7496 let right_eta = [-0.09_f64, 0.18, -0.055, 0.018];
7497 let left_r = [0.075_f64, -0.045, 0.018, 0.009];
7498 let left_s = [-0.052_f64, 0.033, 0.014, -0.011];
7499 let left_rs = [0.021_f64, -0.009, 0.005, 0.0025];
7500 let right_r = [-0.028_f64, 0.063, -0.017, 0.010];
7501 let right_s = [0.047_f64, -0.023, 0.016, 0.006];
7502 let right_rs = [-0.015_f64, 0.012, -0.004, 0.002];
7503
7504 let integral_at = |s1: f64, s2: f64| -> f64 {
7505 let edge = edge0 + edge_d1 * s1 + edge_d2 * s2 + edge_d12 * s1 * s2;
7506 let left = DenestedCubicCell {
7507 left: -0.8,
7508 right: edge,
7509 c0: left_eta[0],
7510 c1: left_eta[1],
7511 c2: left_eta[2],
7512 c3: left_eta[3],
7513 };
7514 let right = DenestedCubicCell {
7515 left: edge,
7516 right: 0.9,
7517 c0: right_eta[0],
7518 c1: right_eta[1],
7519 c2: right_eta[2],
7520 c3: right_eta[3],
7521 };
7522 let left_state = evaluate_cell_moments(left, 12).expect("left moments");
7523 let right_state = evaluate_cell_moments(right, 12).expect("right moments");
7524 cell_second_derivative_from_moments(
7525 left,
7526 &left_r,
7527 &left_s,
7528 &left_rs,
7529 &left_state.moments,
7530 )
7531 .expect("left second")
7532 + cell_second_derivative_from_moments(
7533 right,
7534 &right_r,
7535 &right_s,
7536 &right_rs,
7537 &right_state.moments,
7538 )
7539 .expect("right second")
7540 };
7541
7542 let h = 2e-4;
7543 let fd = (integral_at(h, h) - integral_at(h, -h) - integral_at(-h, h)
7544 + integral_at(-h, -h))
7545 / (4.0 * h * h);
7546
7547 let left = DenestedCubicCell {
7548 left: -0.8,
7549 right: edge0,
7550 c0: left_eta[0],
7551 c1: left_eta[1],
7552 c2: left_eta[2],
7553 c3: left_eta[3],
7554 };
7555 let right = DenestedCubicCell {
7556 left: edge0,
7557 right: 0.9,
7558 c0: right_eta[0],
7559 c1: right_eta[1],
7560 c2: right_eta[2],
7561 c3: right_eta[3],
7562 };
7563
7564 let boundary_z_derivative =
7565 |cell: DenestedCubicCell, r: &[f64], s: &[f64], rs: &[f64]| -> f64 {
7566 let eta = cell.eta(edge0);
7567 let eta_z = cell.c1 + 2.0 * cell.c2 * edge0 + 3.0 * cell.c3 * edge0 * edge0;
7568 let cr = poly_eval_at(r, edge0);
7569 let cs = poly_eval_at(s, edge0);
7570 let crs = poly_eval_at(rs, edge0);
7571 let cr_z = r.iter().enumerate().skip(1).fold(0.0, |acc, (k, val)| {
7572 acc + (k as f64) * val * edge0.powi(k as i32 - 1)
7573 });
7574 let cs_z = s.iter().enumerate().skip(1).fold(0.0, |acc, (k, val)| {
7575 acc + (k as f64) * val * edge0.powi(k as i32 - 1)
7576 });
7577 let crs_z = rs.iter().enumerate().skip(1).fold(0.0, |acc, (k, val)| {
7578 acc + (k as f64) * val * edge0.powi(k as i32 - 1)
7579 });
7580 let amp = crs - eta * cr * cs;
7581 let amp_z = crs_z - eta_z * cr * cs - eta * cr_z * cs - eta * cr * cs_z;
7582 let q_z = edge0 + eta * eta_z;
7583 (amp_z - amp * q_z) * (-cell.q(edge0)).exp() * INV_TWO_PI
7584 };
7585
7586 let f_left =
7587 cell_second_derivative_boundary_integrand(left, &left_r, &left_s, &left_rs, edge0);
7588 let f_right =
7589 cell_second_derivative_boundary_integrand(right, &right_r, &right_s, &right_rs, edge0);
7590 let fz_left = boundary_z_derivative(left, &left_r, &left_s, &left_rs);
7591 let fz_right = boundary_z_derivative(right, &right_r, &right_s, &right_rs);
7592 let analytic = edge_d12 * (f_left - f_right) + edge_d1 * edge_d2 * (fz_left - fz_right);
7593
7594 let denom = analytic.abs().max(1e-8);
7595 let rel = (fd - analytic).abs() / denom;
7596 assert!(
7597 rel <= 2e-7,
7598 "moving edge mixed term mismatch: fd={fd:.12e} analytic={analytic:.12e} rel={rel:.3e}"
7599 );
7600 }
7601
7602 #[test]
7627 fn third_order_self_flux_telescopes_but_third_integrand_jumps_at_c2_knot_1454() {
7628 let edge0 = 0.13_f64;
7629 let edge_velocity = -0.41_f64;
7630
7631 let left_eta = [0.18_f64, -0.12, 0.07, 0.04];
7635 let right_c3 = 0.04_f64 + 0.09; let l0 = left_eta[0];
7642 let l1 = left_eta[1];
7643 let l2 = left_eta[2];
7644 let l3 = left_eta[3];
7645 let e = edge0;
7646 let eta_val = l0 + l1 * e + l2 * e * e + l3 * e * e * e;
7647 let eta_d1 = l1 + 2.0 * l2 * e + 3.0 * l3 * e * e;
7648 let eta_d2 = 2.0 * l2 + 6.0 * l3 * e;
7649 let rc2 = (eta_d2 - 6.0 * right_c3 * e) / 2.0;
7650 let rc1 = eta_d1 - 2.0 * rc2 * e - 3.0 * right_c3 * e * e;
7651 let rc0 = eta_val - rc1 * e - rc2 * e * e - right_c3 * e * e * e;
7652 let right_eta = [rc0, rc1, rc2, right_c3];
7653
7654 let common_r = [0.06_f64, -0.04, 0.02, 0.0];
7660 let common_s = [-0.05_f64, 0.03, 0.015, 0.0];
7661 let common_t = [0.08_f64, 0.05, -0.03, 0.0];
7662 let common_rs = [0.02_f64, -0.01, 0.005, 0.0];
7663 let common_rt = [-0.012_f64, 0.008, 0.004, 0.0];
7664 let common_st = [0.015_f64, -0.006, 0.003, 0.0];
7665 let left_rst = [6.0 * l3, 0.0, 0.0, 0.0];
7667 let right_rst = [6.0 * right_c3, 0.0, 0.0, 0.0];
7668
7669 let max_degree = 15usize;
7670 let neg = |a: &[f64; 4]| a.map(|v| -v);
7671
7672 let integral_at = |shift: f64| -> f64 {
7677 let edge = edge0 + edge_velocity * shift;
7678 let left = DenestedCubicCell {
7679 left: -0.7,
7680 right: edge,
7681 c0: left_eta[0],
7682 c1: left_eta[1],
7683 c2: left_eta[2],
7684 c3: left_eta[3],
7685 };
7686 let right = DenestedCubicCell {
7687 left: edge,
7688 right: 1.0,
7689 c0: right_eta[0],
7690 c1: right_eta[1],
7691 c2: right_eta[2],
7692 c3: right_eta[3],
7693 };
7694 let lst = evaluate_cell_moments(left, max_degree).unwrap();
7695 let rst_m = evaluate_cell_moments(right, max_degree).unwrap();
7696 let neg_left = DenestedCubicCell {
7697 c0: -left.c0,
7698 c1: -left.c1,
7699 c2: -left.c2,
7700 c3: -left.c3,
7701 ..left
7702 };
7703 let neg_right = DenestedCubicCell {
7704 c0: -right.c0,
7705 c1: -right.c1,
7706 c2: -right.c2,
7707 c3: -right.c3,
7708 ..right
7709 };
7710 let li = cell_third_derivative_from_moments(
7711 neg_left,
7712 &neg(&common_r),
7713 &neg(&common_s),
7714 &neg(&common_t),
7715 &neg(&common_rs),
7716 &neg(&common_rt),
7717 &neg(&common_st),
7718 &neg(&left_rst),
7719 &lst.moments,
7720 )
7721 .unwrap();
7722 let ri = cell_third_derivative_from_moments(
7723 neg_right,
7724 &neg(&common_r),
7725 &neg(&common_s),
7726 &neg(&common_t),
7727 &neg(&common_rs),
7728 &neg(&common_rt),
7729 &neg(&common_st),
7730 &neg(&right_rst),
7731 &rst_m.moments,
7732 )
7733 .unwrap();
7734 li + ri
7735 };
7736
7737 let h = 1e-5;
7738 let fd = (integral_at(h) - integral_at(-h)) / (2.0 * h);
7739
7740 let neg_eta = |eta: &[f64; 4]| [-eta[0], -eta[1], -eta[2], -eta[3]];
7759 let left_eta_neg = neg_eta(&left_eta);
7760 let right_eta_neg = neg_eta(&right_eta);
7761 let left0 = DenestedCubicCell {
7762 left: -0.7,
7763 right: edge0,
7764 c0: left_eta_neg[0],
7765 c1: left_eta_neg[1],
7766 c2: left_eta_neg[2],
7767 c3: left_eta_neg[3],
7768 };
7769 let right0 = DenestedCubicCell {
7770 left: edge0,
7771 right: 1.0,
7772 c0: right_eta_neg[0],
7773 c1: right_eta_neg[1],
7774 c2: right_eta_neg[2],
7775 c3: right_eta_neg[3],
7776 };
7777 let f_left = cell_third_derivative_boundary_integrand(
7778 left0,
7779 &neg(&common_r),
7780 &neg(&common_s),
7781 &neg(&common_t),
7782 &neg(&common_rs),
7783 &neg(&common_rt),
7784 &neg(&common_st),
7785 &neg(&left_rst),
7786 edge0,
7787 );
7788 let f_right = cell_third_derivative_boundary_integrand(
7789 right0,
7790 &neg(&common_r),
7791 &neg(&common_s),
7792 &neg(&common_t),
7793 &neg(&common_rs),
7794 &neg(&common_rt),
7795 &neg(&common_st),
7796 &neg(&right_rst),
7797 edge0,
7798 );
7799
7800 let jump = f_left - f_right;
7804 assert!(
7805 jump.abs() > 1e-4,
7806 "third-derivative integrand must jump across the C² knot (α₃ discontinuity); \
7807 got jump={jump:.3e}"
7808 );
7809
7810 let analytic_flux = edge_velocity * jump;
7811 let denom = fd.abs().max(1e-6);
7812 let rel = (fd - analytic_flux).abs() / denom;
7813 assert!(
7814 rel <= 1e-5,
7815 "moving-edge third-derivative flux mismatch (#1454): fd={fd:.12e} \
7816 analytic_flux={analytic_flux:.12e} rel={rel:.3e}"
7817 );
7818
7819 let a_row = 0.21_f64;
7832 let b_row = 1.37_f64;
7833 let knot = a_row + b_row * edge0; let left_link = LocalSpanCubic {
7837 left: knot - 0.6,
7838 right: knot + 0.6,
7839 c0: 0.0,
7840 c1: 0.0,
7841 c2: 0.08,
7842 c3: -0.05,
7843 };
7844 let right_alpha3 = -0.05_f64 + 0.11; let right_left_coord = knot - 0.4;
7847 let lhs = 2.0 * left_link.c2 + 6.0 * left_link.c3 * (knot - left_link.left);
7848 let right_alpha2 = (lhs - 6.0 * right_alpha3 * (knot - right_left_coord)) / 2.0;
7849 let right_link = LocalSpanCubic {
7850 left: right_left_coord,
7851 right: right_left_coord + 0.8,
7852 c0: 0.0,
7853 c1: 0.0,
7854 c2: right_alpha2,
7855 c3: right_alpha3,
7856 };
7857 let (_, _, dc_dbb_left) = link_cubic_second_partials(left_link, a_row, b_row);
7858 let (_, _, dc_dbb_right) = link_cubic_second_partials(right_link, a_row, b_row);
7859 assert!(
7861 (dc_dbb_left[3] - dc_dbb_right[3]).abs() > 1e-3,
7862 "α₃ jump must make the raw dc_dbb coefficient arrays differ"
7863 );
7864 let c_bb_left = poly_eval_at(&dc_dbb_left, edge0);
7867 let c_bb_right = poly_eval_at(&dc_dbb_right, edge0);
7868 assert!(
7869 (c_bb_left - c_bb_right).abs() <= 1e-12,
7870 "second-derivative slope-slope integrand must be CONTINUOUS across the \
7871 C² knot (telescoping self-flux): left={c_bb_left:.15e} right={c_bb_right:.15e}"
7872 );
7873 }
7874}