modmath 0.1.5

Modular math implemented with traits.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
/// Compute N' using trial search method - O(R) complexity (Strict)
/// Finds N' such that modulus * N' ≡ -1 (mod R)
/// Uses reference-based operations to minimize copying of large integers
/// Returns None if N' cannot be found
pub fn strict_compute_n_prime_trial_search<T>(modulus: &T, r: &T) -> Option<T>
where
    T: num_traits::Zero
        + num_traits::One
        + PartialEq
        + PartialOrd
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub,
    for<'a> T: core::ops::RemAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>,
{
    // We need to find N' where modulus * N' ≡ R - 1 (mod R)
    let target = r - &T::one(); // This is -1 mod R

    // Simple trial search for N'
    // TODO: Replace with Extended Euclidean Algorithm for O(log R) complexity instead of O(R)
    // Current implementation is fine for small numbers but inefficient for large moduli
    let mut n_prime = T::one();
    loop {
        // Check if (modulus * n_prime) % r == target
        // Use references to avoid copying large integers
        let product = modulus * &n_prime;
        let remainder = &product % r;
        if remainder == target {
            return Some(n_prime);
        }

        // Increment n_prime: n_prime = n_prime + 1
        // Use overflowing_add for strict arithmetic
        let (incremented, _overflow) = n_prime.overflowing_add(&T::one());
        n_prime = incremented;

        // Safety check to avoid infinite loop
        if &n_prime >= r {
            return None; // Could not find N' - should not happen for valid inputs
        }
    }
}

/// Compute N' using Extended Euclidean Algorithm - O(log R) complexity (strict version)
/// Finds N' such that modulus * N' ≡ -1 (mod R)
/// This is equivalent to N' ≡ -modulus^(-1) (mod R)
/// Returns None if modular inverse cannot be found
fn strict_compute_n_prime_extended_euclidean<T>(modulus: &T, r: &T) -> Option<T>
where
    T: num_traits::Zero
        + num_traits::One
        + PartialEq
        + PartialOrd
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub,
    for<'a> T: core::ops::RemAssign<&'a T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::AddAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<T, Output = T>
        + core::ops::Add<&'a T, Output = T>,
{
    // We need to solve: modulus * N' ≡ -1 (mod R)
    // This is equivalent to: N' ≡ -modulus^(-1) (mod R)

    // Use strict_mod_inv to find modulus^(-1) mod R
    let modulus_clone = &T::zero() + modulus; // Clone using references
    if let Some(modulus_inv) = crate::inv::strict_mod_inv(modulus_clone, r) {
        // N' = -modulus^(-1) mod R = R - modulus^(-1) mod R
        if modulus_inv == T::zero() {
            Some(r - &T::one()) // Handle edge case where inverse is 0
        } else {
            Some(r - &modulus_inv)
        }
    } else {
        None // Could not find modular inverse - gcd(modulus, R) should be 1 for valid Montgomery parameters
    }
}

/// Compute N' using Hensel's lifting - O(log R) complexity, optimized for R = 2^k (strict version)
/// Finds N' such that modulus * N' ≡ -1 (mod R)
/// Uses Hensel lifting, 1-bit per step, from 2^1 to 2^r_bits
/// Returns None if Hensel's lifting fails
fn strict_compute_n_prime_hensels_lifting<T>(modulus: &T, r: &T, r_bits: usize) -> Option<T>
where
    T: num_traits::Zero
        + num_traits::One
        + PartialEq
        + PartialOrd
        + core::ops::Shl<usize, Output = T>
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub
        + for<'a> core::ops::Rem<&'a T, Output = T>,
    for<'a> T: core::ops::RemAssign<&'a T>,
    for<'a> &'a T: core::ops::Add<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Rem<&'a T, Output = T>
        + core::ops::BitAnd<Output = T>,
{
    // Hensel's lifting for N' computation when R = 2^k
    // Start with base case: find N' such that modulus * N' ≡ -1 (mod 2)
    // Then iteratively lift to larger powers of 2

    // Base case: modulus * N' ≡ -1 ≡ 1 (mod 2)
    // Since modulus is odd (required for Montgomery), modulus ≡ 1 (mod 2)
    // So we need N' ≡ 1 (mod 2), hence N' starts as 1
    let mut n_prime = T::one();

    // Hensel's lifting iteration: lift precision from 2^1 to 2^r_bits, one bit at a time
    // At each step k, we ensure: modulus * n_prime ≡ -1 (mod 2^k)
    for k in 2..=r_bits {
        // Current goal: extend modulus * n_prime ≡ -1 (mod 2^(k-1)) to modulus * n_prime ≡ -1 (mod 2^k)
        // This is the core of Hensel lifting: extending solutions modulo increasing powers

        let target_mod = T::one() << k; // 2^k
        let temp_prod = modulus * &n_prime;
        let (temp_sum, _overflow) = temp_prod.overflowing_add(&T::one());
        let check_val = &temp_sum % &target_mod;

        if check_val != T::zero() {
            // The current n_prime doesn't satisfy the congruence modulo 2^k
            // Hensel lifting theorem: if f(x) ≡ 0 (mod p^k) and f'(x) ≢ 0 (mod p),
            // then there exists a unique lift x' such that f(x') ≡ 0 (mod p^(k+1))
            // Here f(x) = modulus * x + 1, so f'(x) = modulus (odd, hence invertible mod 2)
            let prev_power = T::one() << (k - 1); // 2^(k-1)

            if check_val == prev_power {
                // Need to add 2^(k-1) to n_prime to satisfy the congruence
                let (adjusted, _overflow) = n_prime.overflowing_add(&prev_power);
                n_prime = adjusted;
            }
        }
    }

    // Verification: ensure the computed N' satisfies modulus * N' ≡ -1 (mod R)
    // This check validates the mathematical correctness of our Hensel lifting
    let final_check = (modulus * &n_prime) % r;
    let target = r - &T::one(); // -1 mod R

    if final_check != target {
        // Safety check: In theory, correct Hensel lifting should always produce a valid N'
        // This branch should never execute for valid odd moduli, but provides defensive programming
        // against potential edge cases or implementation errors
        None // Hensel lifting failed to produce correct N'
    } else {
        // N' is already in [0, R) after Hensel lifting (starts at 1, accumulates powers of 2)
        Some(n_prime)
    }
}

/// Montgomery parameter computation (Strict)
/// Computes R, R^(-1) mod N, N', and R bit length for Montgomery arithmetic using default method
/// Uses reference-based operations to minimize copying of large integers
pub fn strict_compute_montgomery_params<T>(modulus: &T) -> Option<(T, T, T, usize)>
where
    T: num_traits::Zero
        + num_traits::One
        + PartialEq
        + PartialOrd
        + core::ops::Shl<usize, Output = T>
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub
        + for<'a> core::ops::Rem<&'a T, Output = T>,
    for<'a> T: core::ops::RemAssign<&'a T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::AddAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::BitAnd<Output = T>,
{
    strict_compute_montgomery_params_with_method(
        modulus,
        crate::montgomery::NPrimeMethod::default(),
    )
}

/// Montgomery parameter computation with method selection (Strict)
/// Computes R, R^(-1) mod N, N', and R bit length for Montgomery arithmetic using specified method
/// Uses reference-based operations to minimize copying of large integers
/// Returns None if parameter computation fails
pub fn strict_compute_montgomery_params_with_method<T>(
    modulus: &T,
    method: crate::montgomery::NPrimeMethod,
) -> Option<(T, T, T, usize)>
where
    T: num_traits::Zero
        + num_traits::One
        + PartialEq
        + PartialOrd
        + core::ops::Shl<usize, Output = T>
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub
        + for<'a> core::ops::Rem<&'a T, Output = T>,
    for<'a> T: core::ops::RemAssign<&'a T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::AddAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::BitAnd<Output = T>,
{
    // Step 1: Find R = 2^k where R > modulus
    let mut r = T::one();
    let mut r_bits = 0usize;

    while &r <= modulus {
        r = r << 1; // r *= 2
        r_bits += 1;
    }

    // Create a clone using reference-based addition to avoid moving r
    let r_clone = &r + &T::zero(); // Clone r using references

    // Step 2: Compute R^(-1) mod modulus using strict_mod_inv
    let r_inv = crate::inv::strict_mod_inv(r_clone, modulus)?;

    // Step 3: Compute N' such that N * N' ≡ -1 (mod R) using selected method
    let n_prime = match method {
        crate::montgomery::NPrimeMethod::TrialSearch => {
            strict_compute_n_prime_trial_search(modulus, &r)?
        }
        // Newton is mapped to ExtendedEuclidean in the strict path.
        // This is because strict/constrained paths use legacy R > N semantics
        // (R = smallest power of 2 exceeding N), not the wide-REDC R = 2^W approach.
        // Newton's method is designed for R = 2^W with wrapping arithmetic, so we
        // fall back to ExtendedEuclidean which computes the same N' = -N^{-1} mod R.
        crate::montgomery::NPrimeMethod::ExtendedEuclidean
        | crate::montgomery::NPrimeMethod::Newton => {
            strict_compute_n_prime_extended_euclidean(modulus, &r)?
        }
        crate::montgomery::NPrimeMethod::HenselsLifting => {
            strict_compute_n_prime_hensels_lifting(modulus, &r, r_bits)?
        }
    };

    Some((r, r_inv, n_prime, r_bits))
}

/// Montgomery multiplication (Strict): (a * R) * (b * R) -> (a * b * R) mod N
/// Multiplies two values already in Montgomery form and returns result in Montgomery form
pub fn strict_montgomery_mul<T>(a_mont: T, b_mont: &T, modulus: &T, n_prime: &T, r_bits: usize) -> T
where
    T: num_traits::Zero
        + num_traits::One
        + crate::parity::Parity
        + PartialOrd
        + core::ops::Sub<Output = T>
        + core::ops::Shr<usize, Output = T>
        + core::ops::Shl<usize, Output = T>
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub,
    for<'a> T: core::ops::Mul<&'a T, Output = T>
        + core::ops::RemAssign<&'a T>
        + core::ops::Sub<&'a T, Output = T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::BitAnd<Output = T>,
{
    // TODO(Phase 1): Replace mod_mul + from_montgomery with proper Montgomery
    // reduction using WideningMul. Current mod_mul is O(k) double-and-add which
    // defeats the performance purpose of Montgomery, and from_montgomery's
    // m * N intermediate can overflow at key sizes. See ROADMAP.md Phase 1.
    let product = crate::mul::strict_mod_mul(a_mont, b_mont, modulus);
    strict_from_montgomery(product, modulus, n_prime, r_bits)
}

/// Convert from Montgomery form (Strict): (a * R) -> a mod N
/// Uses Montgomery reduction algorithm with reference-based operations
/// to minimize copying of large integers
pub fn strict_from_montgomery<T>(a_mont: T, modulus: &T, n_prime: &T, r_bits: usize) -> T
where
    T: num_traits::Zero
        + num_traits::One
        + PartialOrd
        + core::ops::Shl<usize, Output = T>
        + core::ops::Shr<usize, Output = T>
        + core::ops::Sub<Output = T>
        + num_traits::ops::overflowing::OverflowingAdd,
    for<'a> T: core::ops::Mul<&'a T, Output = T> + core::ops::Sub<&'a T, Output = T>,
    for<'a> &'a T: core::ops::Sub<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::BitAnd<&'a T, Output = T>,
{
    // Montgomery reduction algorithm:
    // Input: a_mont (Montgomery form), N (modulus), N', r_bits
    // 1. R = 2^r_bits, mask = R - 1 = (1 << r_bits) - 1
    // 2. m = ((a_mont & mask) * N') & mask  [only low bits, no expensive modulo!]
    // 3. t = (a_mont + m * N) >> r_bits     [bit shift, no division!]
    // 4. if t >= N then return t - N else return t

    // Fast path for R=1 (r_bits == 0): Montgomery reduction simplifies to conditional subtraction
    if r_bits == 0 {
        // When r_bits = 0, R = 1, so Montgomery reduction is just a_mont % modulus
        return if &a_mont >= modulus {
            a_mont - modulus
        } else {
            a_mont
        };
    }

    let mask = (T::one() << r_bits) - T::one(); // mask = 2^r_bits - 1

    // Step 1: m = ((a_mont & mask) * N') & mask
    // Only use low r_bits of a_mont, then mask result to low r_bits
    let a_low = &a_mont & &mask;
    let product = &a_low * n_prime;
    let m = &product & &mask;

    // Step 2: t = (a_mont + m * N) >> r_bits
    // TODO(Phase 1): m * N can overflow for large moduli (m < R, N < R, so
    // m*N can reach R²). Needs WideningMul for correctness at key sizes.
    let m_times_n = &m * modulus;
    let (sum, _overflow) = a_mont.overflowing_add(&m_times_n);
    let t = sum >> r_bits; // Divide by R = 2^r_bits using bit shift

    // Step 3: Final reduction
    if &t >= modulus { t - modulus } else { t }
}

/// Convert to Montgomery form (Strict): a -> (a * R) mod N
/// Uses reference-based operations to minimize copying of large integers
pub fn strict_to_montgomery<T>(a: T, modulus: &T, r: &T) -> T
where
    T: PartialOrd
        + num_traits::Zero
        + num_traits::One
        + crate::parity::Parity
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub
        + core::ops::Shr<usize, Output = T>,
    for<'a> T: core::ops::RemAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>,
{
    crate::mul::strict_mod_mul(a, r, modulus)
}

/// Complete Montgomery modular multiplication with method selection (Strict): A * B mod N
/// Uses reference-based operations throughout to minimize copying of large integers
/// Returns None if Montgomery parameter computation fails
pub fn strict_montgomery_mod_mul_with_method<T>(
    a: T,
    b: &T,
    modulus: &T,
    method: crate::montgomery::NPrimeMethod,
) -> Option<T>
where
    T: num_traits::Zero
        + num_traits::One
        + crate::parity::Parity
        + PartialEq
        + PartialOrd
        + core::ops::Shl<usize, Output = T>
        + core::ops::Sub<Output = T>
        + core::ops::Shr<usize, Output = T>
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub
        + for<'a> core::ops::Rem<&'a T, Output = T>,
    for<'a> T: core::ops::RemAssign<&'a T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::AddAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::BitAnd<Output = T>,
{
    // Step 1: Compute Montgomery parameters using specified method
    let (r, _r_inv, n_prime, r_bits) =
        strict_compute_montgomery_params_with_method(modulus, method)?;

    // Step 2: Convert inputs to Montgomery form
    let a_mont = strict_to_montgomery(a, modulus, &r);
    let b_clone = b + &T::zero(); // Clone b to pass by value
    let b_mont = strict_to_montgomery(b_clone, modulus, &r);

    // Step 3: Montgomery multiplication in Montgomery domain
    // a_mont * b_mont = (a*R) * (b*R) = a*b*R² mod N
    let product = crate::mul::strict_mod_mul(a_mont, &b_mont, modulus);

    // Apply Montgomery reduction once to get a*b*R mod N (still in Montgomery form)
    let result_mont = strict_from_montgomery(product, modulus, &n_prime, r_bits);

    // Step 4: Convert final result back from Montgomery form to get (a * b) mod N
    Some(strict_from_montgomery(
        result_mont,
        modulus,
        &n_prime,
        r_bits,
    ))
}

/// Complete Montgomery modular multiplication (Strict): A * B mod N
/// Uses reference-based operations throughout to minimize copying of large integers
///
/// This function uses the default NPrimeMethod (Newton) for N' computation.
/// Note: In the strict path, Newton is mapped to ExtendedEuclidean since these
/// paths use legacy R > N semantics rather than wide-REDC's R = 2^W.
/// For performance-critical applications, consider using `strict_montgomery_mod_mul_with_method`
/// to select the optimal N' computation method for your specific use case.
///
/// Returns None if Montgomery parameter computation fails
pub fn strict_montgomery_mod_mul<T>(a: T, b: &T, modulus: &T) -> Option<T>
where
    T: num_traits::Zero
        + num_traits::One
        + crate::parity::Parity
        + PartialEq
        + PartialOrd
        + core::ops::Shl<usize, Output = T>
        + core::ops::Sub<Output = T>
        + core::ops::Shr<usize, Output = T>
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub
        + for<'a> core::ops::Rem<&'a T, Output = T>,
    for<'a> T: core::ops::RemAssign<&'a T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::AddAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::BitAnd<Output = T>,
{
    strict_montgomery_mod_mul_with_method(a, b, modulus, crate::montgomery::NPrimeMethod::default())
}

/// Montgomery-based modular exponentiation with method selection (Strict): base^exponent mod modulus
/// Uses Montgomery arithmetic for efficient repeated multiplication with reference-based operations
/// to minimize copying of large integers
/// Returns None if Montgomery parameter computation fails
pub fn strict_montgomery_mod_exp_with_method<T>(
    mut base: T,
    exponent: &T,
    modulus: &T,
    method: crate::montgomery::NPrimeMethod,
) -> Option<T>
where
    T: num_traits::Zero
        + num_traits::One
        + crate::parity::Parity
        + PartialEq
        + PartialOrd
        + core::ops::Shl<usize, Output = T>
        + core::ops::Sub<Output = T>
        + core::ops::Shr<usize, Output = T>
        + core::ops::ShrAssign<usize>
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub,
    for<'a> T: core::ops::RemAssign<&'a T>
        + core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::AddAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::BitAnd<Output = T>,
{
    // Step 1: Compute Montgomery parameters using specified method
    let (r, _r_inv, n_prime, r_bits) =
        strict_compute_montgomery_params_with_method(modulus, method)?;

    // Step 2: Reduce base and convert to Montgomery form
    base.rem_assign(modulus); // Reduce base first to ensure base < modulus
    base = strict_to_montgomery(base, modulus, &r);

    // Step 3: Initialize result to Montgomery form of 1
    let mut result = strict_to_montgomery(T::one(), modulus, &r);

    // Step 4: Clone exponent for manipulation to avoid moving the reference
    let mut exp = exponent + &T::zero(); // Clone exponent

    // Step 5: Binary exponentiation staying in Montgomery form
    // Use Montgomery multiplication to stay in Montgomery domain throughout
    while exp > T::zero() {
        // If exponent is odd, multiply result by current base power
        if exp.is_odd() {
            result = strict_montgomery_mul(result, &base, modulus, &n_prime, r_bits);
        }

        // Square the base for next iteration
        exp >>= 1;
        if exp > T::zero() {
            let base_clone = &base + &T::zero(); // Clone base using references
            base = strict_montgomery_mul(base, &base_clone, modulus, &n_prime, r_bits);
        }
    }

    // Step 6: Convert result back from Montgomery form
    Some(strict_from_montgomery(result, modulus, &n_prime, r_bits))
}

/// Montgomery-based modular exponentiation (Strict): base^exponent mod modulus
/// Uses Montgomery arithmetic for efficient repeated multiplication with reference-based operations
/// to minimize copying of large integers
///
/// This function uses the default NPrimeMethod (Newton) for N' computation.
/// Note: In the strict path, Newton is mapped to ExtendedEuclidean since these
/// paths use legacy R > N semantics rather than wide-REDC's R = 2^W.
/// For performance-critical applications or specific hardware optimization, consider using
/// `strict_montgomery_mod_exp_with_method` to select the optimal N' computation method:
/// - `Newton`: Default, mapped to ExtendedEuclidean in strict path
/// - `ExtendedEuclidean`: Balanced performance, good for most use cases
/// - `TrialSearch`: Simple but O(R) complexity, suitable for small moduli
/// - `HenselsLifting`: Optimal for R = 2^k, best performance for power-of-2 radix
///
/// Returns None if Montgomery parameter computation fails
pub fn strict_montgomery_mod_exp<T>(base: T, exponent: &T, modulus: &T) -> Option<T>
where
    T: num_traits::Zero
        + num_traits::One
        + crate::parity::Parity
        + PartialEq
        + PartialOrd
        + core::ops::Shl<usize, Output = T>
        + core::ops::Sub<Output = T>
        + core::ops::Shr<usize, Output = T>
        + core::ops::ShrAssign<usize>
        + num_traits::ops::overflowing::OverflowingAdd
        + num_traits::ops::overflowing::OverflowingSub,
    for<'a> T: core::ops::RemAssign<&'a T>
        + core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::AddAssign<&'a T>,
    for<'a> &'a T: core::ops::Rem<&'a T, Output = T>
        + core::ops::Mul<&'a T, Output = T>
        + core::ops::Sub<&'a T, Output = T>
        + core::ops::Div<&'a T, Output = T>
        + core::ops::Sub<T, Output = T>
        + core::ops::Add<&'a T, Output = T>
        + core::ops::BitAnd<Output = T>,
{
    strict_montgomery_mod_exp_with_method(
        base,
        exponent,
        modulus,
        crate::montgomery::NPrimeMethod::default(),
    )
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_strict_compute_n_prime_trial_search() {
        let modulus = 13u32;
        let r = 16u32;
        let n_prime = strict_compute_n_prime_trial_search(&modulus, &r).unwrap();
        assert_eq!(n_prime, 11);
    }

    #[test]
    fn test_strict_with_fixed_bigint() {
        type U256 = fixed_bigint::FixedUInt<u32, 4>;
        let modulus = U256::from(13u32);
        let r = U256::from(16u32);
        let n_prime = strict_compute_n_prime_trial_search(&modulus, &r).unwrap();
        assert_eq!(n_prime, U256::from(11u32));
    }

    #[test]
    fn test_strict_compute_montgomery_params() {
        // Test with our documented example: N = 13
        // Expected: R = 16, R^(-1) = 9, N' = 11, r_bits = 4
        let modulus = 13u32;
        let (r, r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        assert_eq!(r, 16);
        assert_eq!(r_inv, 9);
        assert_eq!(n_prime, 11);
        assert_eq!(r_bits, 4);

        // Verify the mathematical properties
        // 1. R * R^(-1) ≡ 1 (mod N)
        assert_eq!((r * r_inv) % 13, 1);

        // 2. N * N' ≡ -1 (mod R) which means N * N' ≡ R - 1 (mod R)
        assert_eq!((13 * n_prime) % r, r - 1);

        // 3. R should be > N and a power of 2
        assert!(r > 13);
        assert_eq!(r, 1u32 << r_bits);
    }

    #[test]
    fn test_strict_compute_montgomery_params_with_method() {
        // Test that the parametrized version produces identical results
        let modulus = 13u32;
        let default_result = strict_compute_montgomery_params(&modulus).unwrap();
        let explicit_trial_result = strict_compute_montgomery_params_with_method(
            &modulus,
            crate::montgomery::NPrimeMethod::TrialSearch,
        )
        .unwrap();

        // All methods produce identical results (N' is unique mod R)
        assert_eq!(default_result, explicit_trial_result);

        // Verify the explicit method call produces correct values
        let (r, r_inv, n_prime, r_bits) = explicit_trial_result;
        assert_eq!(r, 16);
        assert_eq!(r_inv, 9);
        assert_eq!(n_prime, 11);
        assert_eq!(r_bits, 4);

        // Test that all methods produce the same result (for now they all use trial search)
        let euclidean_result = strict_compute_montgomery_params_with_method(
            &modulus,
            crate::montgomery::NPrimeMethod::ExtendedEuclidean,
        )
        .unwrap();
        let hensels_result = strict_compute_montgomery_params_with_method(
            &modulus,
            crate::montgomery::NPrimeMethod::HenselsLifting,
        )
        .unwrap();

        assert_eq!(default_result, euclidean_result);
        assert_eq!(default_result, hensels_result);
    }

    #[test]
    fn test_strict_montgomery_params_with_fixed_bigint() {
        type U256 = fixed_bigint::FixedUInt<u32, 4>;
        let modulus = U256::from(13u32);
        let (r, r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        assert_eq!(r, U256::from(16u32));
        assert_eq!(r_inv, U256::from(9u32));
        assert_eq!(n_prime, U256::from(11u32));
        assert_eq!(r_bits, 4);

        // Verify mathematical properties
        let thirteen = U256::from(13u32);
        let one = U256::from(1u32);

        // R * R^(-1) ≡ 1 (mod N)
        assert_eq!((r * r_inv) % thirteen, one);

        // N * N' ≡ -1 (mod R) which means N * N' ≡ R - 1 (mod R)
        assert_eq!((thirteen * n_prime) % r, r - one);

        // R should be > N and a power of 2
        assert!(r > thirteen);
        assert_eq!(r, one << r_bits);
    }

    #[test]
    fn test_strict_from_montgomery() {
        // Test with our documented example: N = 13
        let modulus = 13u32;
        let (_r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Test conversion from Montgomery form back to normal form
        // We know from the basic tests that 7 -> Montgomery form is 8
        // So 8 -> normal form should be 7
        let mont_form = 8u32; // This is 7 in Montgomery form
        let normal_form = strict_from_montgomery(mont_form, &modulus, &n_prime, r_bits);
        assert_eq!(normal_form, 7u32);

        // Test with 5 -> Montgomery (2) -> back to normal (should be 5)
        let mont_5 = 2u32; // This is 5 in Montgomery form
        let normal_5 = strict_from_montgomery(mont_5, &modulus, &n_prime, r_bits);
        assert_eq!(normal_5, 5u32);

        // Test edge cases
        let mont_0 = 0u32; // 0 in Montgomery form is still 0
        let normal_0 = strict_from_montgomery(mont_0, &modulus, &n_prime, r_bits);
        assert_eq!(normal_0, 0u32);
    }

    #[test]
    fn test_strict_from_montgomery_with_fixed_bigint() {
        type U256 = fixed_bigint::FixedUInt<u32, 4>;
        let modulus = U256::from(13u32);
        let (_r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Test conversion from Montgomery form back to normal form
        let mont_form = U256::from(8u32); // This is 7 in Montgomery form
        let normal_form = strict_from_montgomery(mont_form, &modulus, &n_prime, r_bits);
        assert_eq!(normal_form, U256::from(7u32));

        let mont_5 = U256::from(2u32); // This is 5 in Montgomery form
        let normal_5 = strict_from_montgomery(mont_5, &modulus, &n_prime, r_bits);
        assert_eq!(normal_5, U256::from(5u32));

        // Test edge case
        let mont_0 = U256::from(0u32);
        let normal_0 = strict_from_montgomery(mont_0, &modulus, &n_prime, r_bits);
        assert_eq!(normal_0, U256::from(0u32));
    }

    #[test]
    fn test_strict_round_trip_conversion() {
        // Test round-trip: normal -> Montgomery -> normal using strict functions
        // We'll need to implement strict_to_montgomery first to complete this test
        // For now, we can verify that from_montgomery works with known Montgomery values

        let modulus = 13u32;
        let (r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Test all values 0..13 using basic_to_montgomery to get Montgomery form,
        // then strict_from_montgomery to get back to normal
        for i in 0u32..13u32 {
            // Use basic function to convert to Montgomery form
            let mont = crate::montgomery::basic_mont::basic_to_montgomery(i, modulus, r);
            // Use strict function to convert back
            let back = strict_from_montgomery(mont, &modulus, &n_prime, r_bits);
            assert_eq!(
                back, i,
                "Round-trip failed for {}: {} -> {} -> {}",
                i, i, mont, back
            );
        }
    }

    #[test]
    fn test_strict_to_montgomery() {
        // Test with our documented example: N = 13, R = 16
        let modulus = 13u32;
        let (r, _r_inv, _n_prime, _r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // From EXAMPLE1_COMPUTE_PARAM.md:
        // 7 -> Montgomery: 7 * 16 mod 13 = 112 mod 13 = 8
        // 5 -> Montgomery: 5 * 16 mod 13 = 80 mod 13 = 2
        assert_eq!(strict_to_montgomery(7u32, &modulus, &r), 8u32);
        assert_eq!(strict_to_montgomery(5u32, &modulus, &r), 2u32);

        // Test edge cases
        assert_eq!(strict_to_montgomery(0u32, &modulus, &r), 0u32); // 0 * R mod N = 0
        assert_eq!(strict_to_montgomery(1u32, &modulus, &r), 3u32); // 1 * 16 mod 13 = 3
    }

    #[test]
    fn test_strict_to_montgomery_with_fixed_bigint() {
        type U256 = fixed_bigint::FixedUInt<u32, 4>;
        let modulus = U256::from(13u32);
        let (r, _r_inv, _n_prime, _r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Test conversions
        assert_eq!(
            strict_to_montgomery(U256::from(7u32), &modulus, &r),
            U256::from(8u32)
        );
        assert_eq!(
            strict_to_montgomery(U256::from(5u32), &modulus, &r),
            U256::from(2u32)
        );
        assert_eq!(
            strict_to_montgomery(U256::from(0u32), &modulus, &r),
            U256::from(0u32)
        );
        assert_eq!(
            strict_to_montgomery(U256::from(1u32), &modulus, &r),
            U256::from(3u32)
        );
    }

    #[test]
    fn test_strict_montgomery_mod_mul() {
        // Test the complete strict Montgomery workflow end-to-end
        let modulus = 13u32;

        // Test: 7 * 5 mod 13 = 9
        let a = 7u32;
        let b = 5u32;
        let result = strict_montgomery_mod_mul(a, &b, &modulus).unwrap();
        assert_eq!(result, 9u32);

        // Verify against regular modular multiplication
        let regular_result = crate::mul::strict_mod_mul(a, &b, &modulus);
        assert_eq!(result, regular_result);

        // Test more cases to ensure correctness
        let test_cases = [
            (0u32, 5u32, 0u32),   // 0 * 5 mod 13 = 0
            (7u32, 0u32, 0u32),   // 7 * 0 mod 13 = 0
            (1u32, 7u32, 7u32),   // 1 * 7 mod 13 = 7
            (7u32, 1u32, 7u32),   // 7 * 1 mod 13 = 7
            (12u32, 12u32, 1u32), // 12 * 12 mod 13 = 144 mod 13 = 1
            (6u32, 9u32, 2u32),   // 6 * 9 mod 13 = 54 mod 13 = 2
        ];

        for (a, b, expected) in test_cases.iter() {
            let result = strict_montgomery_mod_mul(*a, b, &modulus).unwrap();
            assert_eq!(result, *expected, "Failed for {} * {} mod 13", a, b);
        }
    }

    #[test]
    fn test_strict_montgomery_mod_mul_with_fixed_bigint() {
        type U256 = fixed_bigint::FixedUInt<u32, 4>;
        let modulus = U256::from(13u32);

        // Test: 7 * 5 mod 13 = 9
        let a = U256::from(7u32);
        let b = U256::from(5u32);
        let result = strict_montgomery_mod_mul(a, &b, &modulus).unwrap();
        assert_eq!(result, U256::from(9u32));

        // Verify against regular modular multiplication
        let regular_result = crate::mul::strict_mod_mul(U256::from(7u32), &b, &modulus);
        assert_eq!(result, regular_result);
    }

    #[test]
    fn test_strict_complete_round_trip() {
        // Test the complete workflow: to_montgomery -> from_montgomery
        let modulus = 13u32;
        let (r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Test all values 0..13 for complete round-trip
        for i in 0u32..13u32 {
            let mont = strict_to_montgomery(i, &modulus, &r);
            let back = strict_from_montgomery(mont, &modulus, &n_prime, r_bits);
            assert_eq!(
                back, i,
                "Complete round-trip failed for {}: {} -> {} -> {}",
                i, i, mont, back
            );
        }
    }

    #[test]
    fn test_strict_montgomery_mod_exp() {
        // Test Montgomery-based exponentiation against regular exponentiation
        let modulus = 13u32;

        // Test: 7^5 mod 13 = 16807 mod 13 = 11
        let base = 7u32;
        let exponent = 5u32;
        let montgomery_result = strict_montgomery_mod_exp(base, &exponent, &modulus).unwrap();
        let regular_result = crate::exp::strict_mod_exp(base, &exponent, &modulus);
        assert_eq!(montgomery_result, regular_result);
        assert_eq!(montgomery_result, 11u32);

        // Test edge cases
        assert_eq!(
            strict_montgomery_mod_exp(0u32, &5u32, &modulus).unwrap(),
            0u32
        ); // 0^5 = 0
        assert_eq!(
            strict_montgomery_mod_exp(7u32, &0u32, &modulus).unwrap(),
            1u32
        ); // 7^0 = 1
        assert_eq!(
            strict_montgomery_mod_exp(1u32, &100u32, &modulus).unwrap(),
            1u32
        ); // 1^100 = 1
        assert_eq!(
            strict_montgomery_mod_exp(7u32, &1u32, &modulus).unwrap(),
            7u32
        ); // 7^1 = 7

        // Test more cases
        let test_cases = [
            (2u32, 10u32, 13u32, 10u32), // 2^10 mod 13 = 1024 mod 13 = 10
            (3u32, 4u32, 13u32, 3u32),   // 3^4 mod 13 = 81 mod 13 = 3
            (12u32, 2u32, 13u32, 1u32),  // 12^2 mod 13 = 144 mod 13 = 1
            (5u32, 3u32, 13u32, 8u32),   // 5^3 mod 13 = 125 mod 13 = 8
        ];

        for (base, exp, mod_val, expected) in test_cases.iter() {
            let result = strict_montgomery_mod_exp(*base, exp, mod_val).unwrap();
            assert_eq!(
                result, *expected,
                "Failed for {}^{} mod {}",
                base, exp, mod_val
            );
        }
    }

    #[test]
    fn test_strict_montgomery_mod_exp_with_fixed_bigint() {
        type U256 = fixed_bigint::FixedUInt<u32, 4>;
        let modulus = U256::from(13u32);

        // Test: 7^5 mod 13 = 11
        let base = U256::from(7u32);
        let exponent = U256::from(5u32);
        let montgomery_result = strict_montgomery_mod_exp(base, &exponent, &modulus).unwrap();
        let regular_result = crate::exp::strict_mod_exp(U256::from(7u32), &exponent, &modulus);
        assert_eq!(montgomery_result, regular_result);
        assert_eq!(montgomery_result, U256::from(11u32));

        // Test edge cases
        assert_eq!(
            strict_montgomery_mod_exp(U256::from(0u32), &U256::from(5u32), &modulus).unwrap(),
            U256::from(0u32)
        ); // 0^5 = 0

        assert_eq!(
            strict_montgomery_mod_exp(U256::from(7u32), &U256::from(0u32), &modulus).unwrap(),
            U256::from(1u32)
        ); // 7^0 = 1

        assert_eq!(
            strict_montgomery_mod_exp(U256::from(1u32), &U256::from(100u32), &modulus).unwrap(),
            U256::from(1u32)
        ); // 1^100 = 1
    }

    #[test]
    fn test_strict_montgomery_mod_exp_comprehensive() {
        // Comprehensive test: verify Montgomery exp matches regular exp for all small values
        let modulus = 13u32;

        for base in 0u32..13u32 {
            for exponent in 0u32..10u32 {
                let montgomery_result =
                    strict_montgomery_mod_exp(base, &exponent, &modulus).unwrap();
                let regular_result = crate::exp::strict_mod_exp(base, &exponent, &modulus);
                assert_eq!(
                    montgomery_result, regular_result,
                    "Montgomery vs regular exp mismatch: {}^{} mod 13: {} != {}",
                    base, exponent, montgomery_result, regular_result
                );
            }
        }
    }

    #[test]
    fn test_strict_montgomery_mod_exp_large_exponents() {
        // Test with larger exponents to verify efficiency benefits would apply
        let modulus = 13u32;

        assert_eq!(
            strict_montgomery_mod_exp(2u32, &100u32, &modulus).unwrap(),
            crate::exp::strict_mod_exp(2u32, &100u32, &modulus)
        );

        assert_eq!(
            strict_montgomery_mod_exp(3u32, &1000u32, &modulus).unwrap(),
            crate::exp::strict_mod_exp(3u32, &1000u32, &modulus)
        );

        // Test with very large exponent
        assert_eq!(
            strict_montgomery_mod_exp(7u32, &999999u32, &modulus).unwrap(),
            crate::exp::strict_mod_exp(7u32, &999999u32, &modulus)
        );
    }

    #[test]
    fn test_strict_compute_n_prime_trial_search_failure() {
        // Test case where no N' can be found - this happens when gcd(modulus, R) != 1
        // For Montgomery arithmetic, we need gcd(N, R) = 1, but let's create a scenario
        // where this fails by using an even modulus with R = power of 2

        // Example: N = 4, R = 8 (both even, so gcd(4, 8) = 4 != 1)
        // There should be no N' such that 4 * N' ≡ -1 (mod 8) since 4 and 8 share factors
        let modulus = 4u32;
        let r = 8u32;
        let result = strict_compute_n_prime_trial_search(&modulus, &r);
        assert!(
            result.is_none(),
            "Should return None for invalid modulus-R pair"
        );
    }

    #[test]
    fn test_strict_compute_montgomery_params_failure_even_modulus() {
        // Test Montgomery parameter computation failure with even modulus
        // Montgomery arithmetic requires odd modulus, but the parameter computation
        // should handle this gracefully and return None
        let even_modulus = 4u32;
        let result = strict_compute_montgomery_params(&even_modulus);
        assert!(result.is_none(), "Should return None for even modulus");
    }

    #[test]
    fn test_strict_compute_montgomery_params_failure_with_method() {
        // Test all N' computation methods with invalid inputs
        let invalid_modulus = 4u32; // Even modulus

        // Trial search should fail
        let trial_result = strict_compute_montgomery_params_with_method(
            &invalid_modulus,
            crate::montgomery::NPrimeMethod::TrialSearch,
        );
        assert!(
            trial_result.is_none(),
            "Trial search should fail with even modulus"
        );

        // Extended Euclidean should fail
        let euclidean_result = strict_compute_montgomery_params_with_method(
            &invalid_modulus,
            crate::montgomery::NPrimeMethod::ExtendedEuclidean,
        );
        assert!(
            euclidean_result.is_none(),
            "Extended Euclidean should fail with even modulus"
        );

        // Hensel's lifting should fail
        let hensels_result = strict_compute_montgomery_params_with_method(
            &invalid_modulus,
            crate::montgomery::NPrimeMethod::HenselsLifting,
        );
        assert!(
            hensels_result.is_none(),
            "Hensel's lifting should fail with even modulus"
        );
    }

    #[test]
    fn test_strict_montgomery_mod_mul_parameter_failure() {
        // Test that montgomery_mod_mul returns None when parameter computation fails
        let invalid_modulus = 4u32;
        let a = 2u32;
        let b = 3u32;

        let result = strict_montgomery_mod_mul(a, &b, &invalid_modulus);
        assert!(
            result.is_none(),
            "Montgomery mod_mul should return None for invalid modulus"
        );
    }

    #[test]
    fn test_strict_montgomery_mod_exp_parameter_failure() {
        // Test that montgomery_mod_exp returns None when parameter computation fails
        let invalid_modulus = 4u32;
        let base = 2u32;
        let exponent = 3u32;

        let result = strict_montgomery_mod_exp(base, &exponent, &invalid_modulus);
        assert!(
            result.is_none(),
            "Montgomery mod_exp should return None for invalid modulus"
        );
    }

    #[test]
    fn test_montgomery_reduction_final_subtraction() {
        // Test case designed to trigger t >= modulus in Montgomery reduction
        // We need values where the intermediate result requires final subtraction

        // Using specific values that cause Montgomery reduction to need final subtraction
        let modulus = 15u32; // N = 15
        let (r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Create Montgomery forms that when processed will result in t >= modulus
        // This requires careful construction to hit the >= modulus branch in from_montgomery
        let a_mont = strict_to_montgomery(14u32, &modulus, &r); // Near maximum value

        // The from_montgomery reduction should trigger the t >= modulus branch
        let result = strict_from_montgomery(a_mont, &modulus, &n_prime, r_bits);
        assert_eq!(result, 14u32);

        // Another case with maximum value
        let max_mont = strict_to_montgomery(14u32, &modulus, &r);
        let result_max = strict_from_montgomery(max_mont, &modulus, &n_prime, r_bits);
        assert_eq!(result_max, 14u32);
    }

    #[test]
    fn test_hensel_lifting_conditional_branches() {
        // Test Hensel's lifting with values that trigger conditional branches
        let modulus = 15u32; // Composite number that's harder for some algorithms

        // Test with different N' computation methods to hit different branches
        let hensels_result = strict_compute_montgomery_params_with_method(
            &modulus,
            crate::montgomery::NPrimeMethod::HenselsLifting,
        );

        // Should still work but may hit different code paths
        assert!(
            hensels_result.is_some(),
            "Hensel's lifting should work with modulus 15"
        );

        // Try with a larger odd composite to stress test the algorithm
        let large_modulus = 255u32; // 255 = 3 * 5 * 17, odd composite
        let result_large = strict_compute_montgomery_params_with_method(
            &large_modulus,
            crate::montgomery::NPrimeMethod::HenselsLifting,
        );
        assert!(
            result_large.is_some(),
            "Should handle larger composite moduli"
        );
    }

    #[test]
    fn test_specific_montgomery_edge_cases() {
        // Test specific mathematical edge cases that may hit uncovered branches

        // Case 1: Modulus where R^(-1) computation is at edge cases
        let edge_modulus = 9u32; // 9 = 3^2, may hit different inv computation paths
        let params = strict_compute_montgomery_params(&edge_modulus);
        assert!(params.is_some(), "Should handle modulus 9");

        if let Some((r, _r_inv, n_prime, r_bits)) = params {
            // Test Montgomery operations with edge values
            let edge_val = 8u32; // Maximum value for this modulus
            let mont_edge = strict_to_montgomery(edge_val, &edge_modulus, &r);
            let back = strict_from_montgomery(mont_edge, &edge_modulus, &n_prime, r_bits);
            assert_eq!(back, edge_val);

            // Test multiplication that might hit t >= modulus branch
            let a = 7u32;
            let b = 8u32;
            let result = strict_montgomery_mod_mul(a, &b, &edge_modulus).unwrap();
            let expected = (a * b) % edge_modulus;
            assert_eq!(result, expected);
        }

        // Case 2: Values designed to stress Montgomery multiplication
        let modulus2 = 21u32; // 21 = 3 * 7
        if let Some((_r, _r_inv, _n_prime, _r_bits)) = strict_compute_montgomery_params(&modulus2) {
            // This Montgomery multiplication may hit the final reduction branch
            let result = strict_montgomery_mod_mul(20u32, &19u32, &modulus2).unwrap();

            let expected = (20u32 * 19u32) % modulus2;
            assert_eq!(result, expected);
        }
    }

    #[test]
    fn test_montgomery_exponentiation_edge_paths() {
        // Test exponentiation with values designed to hit specific loop branches
        let modulus = 11u32; // Prime modulus

        // Test with exponent that exercises different loop paths
        let base = 10u32; // Base near modulus
        let exponent = 10u32; // Exponent that will exercise binary exponentiation loops

        let result = strict_montgomery_mod_exp(base, &exponent, &modulus).unwrap();
        let expected = crate::exp::strict_mod_exp(base, &exponent, &modulus);
        assert_eq!(result, expected);

        // Test with specific values that may hit the exp > T::zero() conditional
        let small_exp = 1u32;
        let result_small = strict_montgomery_mod_exp(base, &small_exp, &modulus).unwrap();
        assert_eq!(result_small, base);

        // Test with larger exponent to exercise more iterations
        let large_exp = 100u32;
        let result_large = strict_montgomery_mod_exp(2u32, &large_exp, &modulus).unwrap();
        let expected_large = crate::exp::strict_mod_exp(2u32, &large_exp, &modulus);
        assert_eq!(result_large, expected_large);
    }

    #[test]
    fn test_strict_montgomery_mul_basic() {
        // Test the new strict_montgomery_mul function directly
        let modulus = 13u32;
        let (r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Convert test values to Montgomery form
        let a = 7u32;
        let b = 5u32;
        let a_mont = strict_to_montgomery(a, &modulus, &r);
        let b_mont = strict_to_montgomery(b, &modulus, &r);

        // Test Montgomery multiplication
        let result_mont = strict_montgomery_mul(a_mont, &b_mont, &modulus, &n_prime, r_bits);
        let result = strict_from_montgomery(result_mont, &modulus, &n_prime, r_bits);

        // Verify against regular multiplication
        let expected = (a * b) % modulus;
        assert_eq!(
            result, expected,
            "Montgomery multiplication failed: {} * {} = {} (expected {})",
            a, b, result, expected
        );
    }

    #[test]
    fn test_strict_montgomery_mul_edge_cases() {
        // Test edge cases for Montgomery multiplication
        let modulus = 17u32;
        let (r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Test with zero
        let zero_mont = strict_to_montgomery(0u32, &modulus, &r);
        let a_mont = strict_to_montgomery(5u32, &modulus, &r);
        let result_mont = strict_montgomery_mul(zero_mont, &a_mont, &modulus, &n_prime, r_bits);
        let result = strict_from_montgomery(result_mont, &modulus, &n_prime, r_bits);
        assert_eq!(result, 0u32, "Montgomery multiplication with zero failed");

        // Test with one (Montgomery form)
        let one_mont = strict_to_montgomery(1u32, &modulus, &r);
        let result_mont = strict_montgomery_mul(one_mont, &a_mont, &modulus, &n_prime, r_bits);
        let result = strict_from_montgomery(result_mont, &modulus, &n_prime, r_bits);
        assert_eq!(result, 5u32, "Montgomery multiplication with one failed");

        // Test with maximum value less than modulus
        let max_val = modulus - 1;
        let max_mont = strict_to_montgomery(max_val, &modulus, &r);
        let result_mont = strict_montgomery_mul(max_mont, &max_mont, &modulus, &n_prime, r_bits);
        let result = strict_from_montgomery(result_mont, &modulus, &n_prime, r_bits);
        let expected = (max_val * max_val) % modulus;
        assert_eq!(
            result, expected,
            "Montgomery multiplication with max values failed"
        );
    }

    #[test]
    fn test_strict_montgomery_mul_large_modulus() {
        // Test Montgomery multiplication with a large prime modulus.
        // Note: strict_mod_mul prevents overflow in the a*b step, but
        // strict_from_montgomery's intermediate a_low * n_prime can still
        // overflow for some input combinations with this modulus.
        let modulus = 65521u32; // Large prime
        let (r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Test with values that would cause overflow if multiplied directly
        let a = 65520u32; // Near maximum
        let b = 65519u32; // Near maximum
        let a_mont = strict_to_montgomery(a, &modulus, &r);
        let b_mont = strict_to_montgomery(b, &modulus, &r);

        // This should not panic due to our modular multiplication fix
        let result_mont = strict_montgomery_mul(a_mont, &b_mont, &modulus, &n_prime, r_bits);
        let result = strict_from_montgomery(result_mont, &modulus, &n_prime, r_bits);

        // Verify correctness
        let expected = crate::mul::strict_mod_mul(a, &b, &modulus);
        assert_eq!(result, expected, "Overflow prevention test failed");
    }

    #[test]
    fn test_strict_montgomery_mul_various_moduli() {
        // Test Montgomery multiplication with various moduli sizes
        let test_cases = [
            (7u32, 3u32, 5u32),    // Small modulus
            (11u32, 8u32, 9u32),   // Prime modulus
            (15u32, 14u32, 13u32), // Composite modulus
            (21u32, 20u32, 19u32), // Another composite
            (97u32, 96u32, 95u32), // Larger prime
        ];

        for (modulus, a, b) in test_cases.iter() {
            if let Some((r, _r_inv, n_prime, r_bits)) = strict_compute_montgomery_params(modulus) {
                let a_mont = strict_to_montgomery(*a, modulus, &r);
                let b_mont = strict_to_montgomery(*b, modulus, &r);

                let result_mont = strict_montgomery_mul(a_mont, &b_mont, modulus, &n_prime, r_bits);
                let result = strict_from_montgomery(result_mont, modulus, &n_prime, r_bits);

                let expected = (a * b) % modulus;
                assert_eq!(
                    result, expected,
                    "Montgomery multiplication failed for modulus {}: {} * {} = {} (expected {})",
                    modulus, a, b, result, expected
                );
            }
        }
    }

    #[test]
    fn test_strict_montgomery_mul_consistency() {
        // Test that Montgomery multiplication is consistent with regular Montgomery operations
        let modulus = 23u32;
        let (r, _r_inv, n_prime, r_bits) = strict_compute_montgomery_params(&modulus).unwrap();

        // Test multiple pairs to ensure consistency
        for a in 1u32..10u32 {
            for b in 1u32..10u32 {
                let a_mont = strict_to_montgomery(a, &modulus, &r);
                let b_mont = strict_to_montgomery(b, &modulus, &r);

                // Use our new function
                let result_mont =
                    strict_montgomery_mul(a_mont, &b_mont, &modulus, &n_prime, r_bits);
                let result = strict_from_montgomery(result_mont, &modulus, &n_prime, r_bits);

                // Compare with the expected result
                let expected = (a * b) % modulus;
                assert_eq!(
                    result, expected,
                    "Consistency check failed: {} * {} mod {} = {} (expected {})",
                    a, b, modulus, result, expected
                );
            }
        }
    }
}