1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*
Copyright (C) 2024 Fredrik Johansson
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version. See <https://www.gnu.org/licenses/>.
*/
#include "thread_support.h"
#include "mpn_mod.h"
#include "gr_mat.h"
/* todo: retune this when arithmetic is more optimized */
/* cutoffs for classical -> waksman and waksman -> multi_mod */
/* equal cutoffs means waksman is not used */
static const short mat_mul_cutoff_tab[][2] = {
{88, 88}, /* bits = 80 */
{230, 230}, /* bits = 96 */
{220, 220}, /* bits = 112 */
{160, 160}, /* bits = 128 */
{10, 39}, /* bits = 144 */
{9, 57}, /* bits = 160 */
{17, 57}, /* bits = 176 */
{74, 74}, /* bits = 192 */
{8, 64}, /* bits = 208 */
{8, 64}, /* bits = 224 */
{8, 92}, /* bits = 240 */
{17, 63}, /* bits = 256 */
{8, 69}, /* bits = 272 */
{8, 65}, /* bits = 288 */
{8, 91}, /* bits = 304 */
{14, 67}, /* bits = 320 */
{8, 75}, /* bits = 336 */
{7, 77}, /* bits = 352 */
{7, 93}, /* bits = 368 */
{10, 79}, /* bits = 384 */
{6, 77}, /* bits = 400 */
{6, 91}, /* bits = 416 */
{6, 93}, /* bits = 432 */
{10, 73}, /* bits = 448 */
{6, 75}, /* bits = 464 */
{6, 87}, /* bits = 480 */
{6, 87}, /* bits = 496 */
{9, 71}, /* bits = 512 */
{6, 70}, /* bits = 528 */
{5, 77}, /* bits = 544 */
{5, 84}, /* bits = 560 */
{6, 67}, /* bits = 576 */
{5, 75}, /* bits = 592 */
{5, 74}, /* bits = 608 */
{4, 78}, /* bits = 624 */
{6, 63}, /* bits = 640 */
{4, 68}, /* bits = 656 */
{4, 68}, /* bits = 672 */
{4, 75}, /* bits = 688 */
{6, 59}, /* bits = 704 */
{4, 66}, /* bits = 720 */
{4, 66}, /* bits = 736 */
{4, 71}, /* bits = 752 */
{5, 59}, /* bits = 768 */
{4, 59}, /* bits = 784 */
{4, 66}, /* bits = 800 */
{4, 63}, /* bits = 816 */
{4, 58}, /* bits = 832 */
{4, 57}, /* bits = 848 */
{4, 61}, /* bits = 864 */
{4, 61}, /* bits = 880 */
{4, 55}, /* bits = 896 */
{4, 57}, /* bits = 912 */
{4, 64}, /* bits = 928 */
{4, 65}, /* bits = 944 */
{4, 57}, /* bits = 960 */
{4, 61}, /* bits = 976 */
{4, 62}, /* bits = 992 */
{4, 65}, /* bits = 1008 */
{4, 53}, /* bits = 1024 */
};
int
mpn_mod_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
{
slong ar = A->r;
if (ar <= 3)
return gr_mat_mul_classical(C, A, B, ctx);
slong ac = A->c;
slong bc = B->c;
slong tab_i, cutoff_waksman, cutoff_multi_mod;
slong bits = MPN_MOD_CTX_MODULUS_BITS(ctx);
tab_i = (bits - FLINT_BITS - 1) / 16;
cutoff_waksman = mat_mul_cutoff_tab[tab_i][0];
cutoff_multi_mod = mat_mul_cutoff_tab[tab_i][1];
if (ar < cutoff_waksman || ac < cutoff_waksman || bc < cutoff_waksman)
return gr_mat_mul_classical(C, A, B, ctx);
if (ar < cutoff_multi_mod || ac < cutoff_multi_mod || bc < cutoff_multi_mod)
return mpn_mod_mat_mul_waksman(C, A, B, ctx);
/* special case: near the two-limb boundary, strassen beats multi_mod on a single thread */
#ifndef FLINT_USES_BLAS
if (bits >= 113 && bits <= 128 && flint_get_num_available_threads() == 1)
return gr_mat_mul_strassen(C, A, B, ctx);
#endif
return mpn_mod_mat_mul_multi_mod(C, A, B, ctx);
}