1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
Copyright (C) 2008, 2009 William Hart
Copyright (C) 2010 Sebastian Pancratz
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version. See <https://www.gnu.org/licenses/>.
*/
#include "fmpz.h"
#include "fmpz_vec.h"
#include "fmpz_poly.h"
#define FLINT_DIVREMLOW_DIVCONQUER_CUTOFF 16
int
_fmpz_poly_divremlow_divconquer_recursive(fmpz * Q, fmpz * QB,
const fmpz * A, const fmpz * B, slong lenB, int exact)
{
if (lenB <= FLINT_DIVREMLOW_DIVCONQUER_CUTOFF)
{
if (!_fmpz_poly_divrem_basecase(Q, QB, A, 2 * lenB - 1, B, lenB, exact))
return 0;
_fmpz_vec_sub(QB, A, QB, lenB - 1);
}
else
{
const slong n2 = lenB / 2;
const slong n1 = lenB - n2;
const fmpz * p1 = A + 2 * n2;
const fmpz * p2;
const fmpz * d1 = B + n2;
const fmpz * d2 = B;
fmpz * q1 = Q + n2;
fmpz * q2 = Q;
/*
Think of the top lenB coefficients of QB as temporary space; this
code will not depend on {QB, lenB - 1} and W being adjacent
*/
fmpz * W = QB + (lenB - 1);
fmpz *d1q1, *d2q1, *t, *d3q2, *d4q2;
/*
Set q1 to p1 div d1, a 2 n1 - 1 by n1 division, so q1 has length
at most n1; {W, n1 - 1} is d1 * q1 is truncated to length n1 - 1
*/
if (!_fmpz_poly_divremlow_divconquer_recursive(q1, W, p1, d1, n1, exact))
return 0;
/*
W is of length lenB, but we only care about the bottom n1 - 1
coeffs, which we push up by n2 + 1, to the very top; we do this
manually here instead of via _fmpz_vec_swap() because the source
and destination arrays overlap
*/
d1q1 = W + (n2 + 1);
{
slong i;
for (i = 0; i < n1 - 1; i++)
fmpz_swap(d1q1 + i, W + i);
}
/*
Compute d2q1 = d2 * q1, of length at most lenB - 1; we'll need the
top n2 coeffs for t and the bottom n1 - 1 coeffs for QB
*/
d2q1 = QB;
_fmpz_poly_mul(d2q1, q1, n1, d2, n2);
/*
Compute {t - (n2 - 1), 2 n2 - 1} to be the top 2 n2 - 1 coeffs of
A / x^n2 - (d1q1 x^n2 + d2q1).
Note that actually the bottom n2 - 1 coeffs may be arbitrary
*/
t = W + n1;
if (n1 == n2)
fmpz_zero(t);
_fmpz_vec_add(t, t, d2q1 + (n1 - 1), n2);
_fmpz_vec_neg(t, t, n2);
_fmpz_vec_add(t, t, A + (n1 + n2 - 1), n2);
p2 = t - (n2 - 1);
/*
Move {QB, n1 - 1} into the bottom coefficients of W, so that
we can use {QB, 2 n2 - 1} as space in the next division
*/
_fmpz_vec_swap(QB, W, n1 - 1);
/*
Compute q2 = t div {B + n1}, a 2 n2 - 1 by n2 division
*/
d3q2 = QB;
if (!_fmpz_poly_divremlow_divconquer_recursive(q2, d3q2, p2, B + n1, n2, exact))
return 0;
_fmpz_vec_swap(QB + n1, d3q2, n2 - 1);
if (lenB & WORD(1))
fmpz_zero(QB + n2);
_fmpz_vec_add(QB + n2, QB + n2, W, n1 - 1);
/*
Compute {d4q2, lenB - 1} as {B, n1} * {q2, n2}; then move the
bottom n2 coeffs of this into {QB, n2}, and add the top n1 - 1
coeffs to {QB + n2, n1 - 1}
*/
d4q2 = W;
_fmpz_poly_mul(d4q2, B, n1, q2, n2);
_fmpz_vec_swap(QB, d4q2, n2);
_fmpz_vec_add(QB + n2, QB + n2, d4q2 + n2, n1 - 1);
}
return 1;
}