#include "blis.h"
thrinfo_t BLIS_PACKM_SINGLE_THREADED = {};
thrinfo_t BLIS_GEMM_SINGLE_THREADED = {};
thrcomm_t BLIS_SINGLE_COMM = {};
extern rntm_t global_rntm;
extern bli_pthread_mutex_t global_rntm_mutex;
void bli_thread_init( void )
{
bli_thrcomm_init( 1, &BLIS_SINGLE_COMM );
bli_packm_thrinfo_init_single( &BLIS_PACKM_SINGLE_THREADED );
bli_l3_thrinfo_init_single( &BLIS_GEMM_SINGLE_THREADED );
bli_thread_init_rntm_from_env( &global_rntm );
}
void bli_thread_finalize( void )
{
}
void bli_thread_range_sub
(
thrinfo_t* thread,
dim_t n,
dim_t bf,
bool handle_edge_low,
dim_t* start,
dim_t* end
)
{
dim_t n_way = bli_thread_n_way( thread );
if ( n_way == 1 ) { *start = 0; *end = n; return; }
dim_t work_id = bli_thread_work_id( thread );
dim_t all_start = 0;
dim_t all_end = n;
dim_t size = all_end - all_start;
dim_t n_bf_whole = size / bf;
dim_t n_bf_left = size % bf;
dim_t n_bf_lo = n_bf_whole / n_way;
dim_t n_bf_hi = n_bf_whole / n_way;
if ( handle_edge_low == FALSE )
{
dim_t n_th_lo = n_bf_whole % n_way;
if ( n_th_lo != 0 ) n_bf_lo += 1;
dim_t size_lo = n_bf_lo * bf;
dim_t size_hi = n_bf_hi * bf;
dim_t lo_start = all_start;
dim_t hi_start = all_start + n_th_lo * size_lo;
if ( work_id < n_th_lo )
{
*start = lo_start + (work_id ) * size_lo;
*end = lo_start + (work_id+1) * size_lo;
}
else {
*start = hi_start + (work_id-n_th_lo ) * size_hi;
*end = hi_start + (work_id-n_th_lo+1) * size_hi;
if ( work_id == n_way - 1 ) *end += n_bf_left;
}
}
else {
dim_t n_th_hi = n_bf_whole % n_way;
dim_t n_th_lo = n_way - n_th_hi;
if ( n_th_hi != 0 ) n_bf_hi += 1;
dim_t size_lo = n_bf_lo * bf;
dim_t size_hi = n_bf_hi * bf;
dim_t lo_start = all_start;
dim_t hi_start = all_start + n_th_lo * size_lo
+ n_bf_left;
if ( work_id < n_th_lo )
{
*start = lo_start + (work_id ) * size_lo;
*end = lo_start + (work_id+1) * size_lo;
if ( work_id == 0 ) *end += n_bf_left;
else { *start += n_bf_left;
*end += n_bf_left; }
}
else {
*start = hi_start + (work_id-n_th_lo ) * size_hi;
*end = hi_start + (work_id-n_th_lo+1) * size_hi;
}
}
}
siz_t bli_thread_range_l2r
(
thrinfo_t* thr,
obj_t* a,
blksz_t* bmult,
dim_t* start,
dim_t* end
)
{
num_t dt = bli_obj_dt( a );
dim_t m = bli_obj_length_after_trans( a );
dim_t n = bli_obj_width_after_trans( a );
dim_t bf = bli_blksz_get_def( dt, bmult );
bli_thread_range_sub( thr, n, bf,
FALSE, start, end );
return m * ( *end - *start );
}
siz_t bli_thread_range_r2l
(
thrinfo_t* thr,
obj_t* a,
blksz_t* bmult,
dim_t* start,
dim_t* end
)
{
num_t dt = bli_obj_dt( a );
dim_t m = bli_obj_length_after_trans( a );
dim_t n = bli_obj_width_after_trans( a );
dim_t bf = bli_blksz_get_def( dt, bmult );
bli_thread_range_sub( thr, n, bf,
TRUE, start, end );
return m * ( *end - *start );
}
siz_t bli_thread_range_t2b
(
thrinfo_t* thr,
obj_t* a,
blksz_t* bmult,
dim_t* start,
dim_t* end
)
{
num_t dt = bli_obj_dt( a );
dim_t m = bli_obj_length_after_trans( a );
dim_t n = bli_obj_width_after_trans( a );
dim_t bf = bli_blksz_get_def( dt, bmult );
bli_thread_range_sub( thr, m, bf,
FALSE, start, end );
return n * ( *end - *start );
}
siz_t bli_thread_range_b2t
(
thrinfo_t* thr,
obj_t* a,
blksz_t* bmult,
dim_t* start,
dim_t* end
)
{
num_t dt = bli_obj_dt( a );
dim_t m = bli_obj_length_after_trans( a );
dim_t n = bli_obj_width_after_trans( a );
dim_t bf = bli_blksz_get_def( dt, bmult );
bli_thread_range_sub( thr, m, bf,
TRUE, start, end );
return n * ( *end - *start );
}
dim_t bli_thread_range_width_l
(
doff_t diagoff_j,
dim_t m,
dim_t n_j,
dim_t j,
dim_t n_way,
dim_t bf,
dim_t bf_left,
double area_per_thr,
bool handle_edge_low
)
{
dim_t width;
if ( j == n_way - 1 ) return n_j;
{
dim_t n_j_bf = n_j / bf + ( bf_left > 0 ? 1 : 0 );
if ( n_j_bf <= n_way - j )
{
if ( j == 0 && handle_edge_low )
width = ( bf_left > 0 ? bf_left : bf );
else
width = bf;
if ( width > n_j ) width = n_j;
return width;
}
}
{
width = ( dim_t )bli_round( ( double )area_per_thr / ( double )m );
if ( j == 0 && handle_edge_low )
{
if ( width % bf != bf_left ) width += bf_left - ( width % bf );
}
else {
if ( width % bf != 0 ) width = bli_round_to_mult( width, bf );
}
}
if ( diagoff_j < width )
{
dim_t offm_inc, offn_inc;
bli_prune_unstored_region_top_l( &diagoff_j, &m, &n_j, &offm_inc );
( void )offm_inc;
( void )offn_inc;
const double a = -0.5;
const double b = ( double )m + ( double )diagoff_j + 0.5;
const double c = -0.5 * ( ( double )diagoff_j *
( ( double )diagoff_j + 1.0 )
) - area_per_thr;
const double r = b * b - 4.0 * a * c;
if ( r >= 0.0 )
{
const double x = ( -b + sqrt( r ) ) / ( 2.0 * a );
width = ( dim_t )bli_round( x );
if ( width == 0 ) width = 1;
}
if ( j == 0 && handle_edge_low )
{
if ( width % bf != bf_left ) width += bf_left - ( width % bf );
}
else {
if ( width % bf != 0 ) width = bli_round_to_mult( width, bf );
}
}
if ( width > n_j ) width = n_j;
return width;
}
siz_t bli_find_area_trap_l
(
dim_t m,
dim_t n,
doff_t diagoff
)
{
dim_t offm_inc = 0;
dim_t offn_inc = 0;
double tri_area;
double area;
bli_prune_unstored_region_top_l( &diagoff, &m, &n, &offm_inc );
bli_prune_unstored_region_right_l( &diagoff, &m, &n, &offn_inc );
( void )offm_inc;
( void )offn_inc;
if ( bli_intersects_diag_n( diagoff, m, n ) )
{
double tri_dim = ( double )( n - diagoff - 1 );
tri_area = tri_dim * ( tri_dim + 1.0 ) / 2.0;
}
else
{
tri_area = 0.0;
}
area = ( double )m * ( double )n - tri_area;
return ( siz_t )area;
}
siz_t bli_thread_range_weighted_sub
(
thrinfo_t* restrict thread,
doff_t diagoff,
uplo_t uplo,
dim_t m,
dim_t n,
dim_t bf,
bool handle_edge_low,
dim_t* restrict j_start_thr,
dim_t* restrict j_end_thr
)
{
dim_t n_way = bli_thread_n_way( thread );
dim_t my_id = bli_thread_work_id( thread );
dim_t bf_left = n % bf;
dim_t j;
dim_t off_j;
doff_t diagoff_j;
dim_t n_left;
dim_t width_j;
dim_t offm_inc, offn_inc;
double tri_dim, tri_area;
double area_total, area_per_thr;
siz_t area = 0;
if ( bli_is_lower( uplo ) )
{
bli_prune_unstored_region_top_l( &diagoff, &m, &n, &offm_inc );
bli_prune_unstored_region_right_l( &diagoff, &m, &n, &offn_inc );
( void )offm_inc;
( void )offn_inc;
tri_dim = ( double )( n - diagoff - 1 );
tri_area = tri_dim * ( tri_dim + 1.0 ) / 2.0;
area_total = ( double )m * ( double )n - tri_area;
area_per_thr = area_total / ( double )n_way;
off_j = 0;
diagoff_j = diagoff;
n_left = n;
for ( j = 0; j < n_way; ++j )
{
width_j =
bli_thread_range_width_l
(
diagoff_j, m, n_left,
j, n_way,
bf, bf_left,
area_per_thr,
handle_edge_low
);
if ( j == my_id )
{
*j_start_thr = off_j;
*j_end_thr = off_j + width_j;
area = bli_find_area_trap_l( m, width_j, diagoff_j );
break;
}
off_j += width_j;
diagoff_j -= width_j;
n_left -= width_j;
}
}
else {
bli_rotate180_trapezoid( &diagoff, &uplo, &m, &n );
bli_toggle_bool( &handle_edge_low );
area = bli_thread_range_weighted_sub
(
thread, diagoff, uplo, m, n, bf,
handle_edge_low,
j_start_thr, j_end_thr
);
bli_reverse_index_direction( n, j_start_thr, j_end_thr );
}
return area;
}
siz_t bli_thread_range_mdim
(
dir_t direct,
thrinfo_t* thr,
obj_t* a,
obj_t* b,
obj_t* c,
cntl_t* cntl,
cntx_t* cntx,
dim_t* start,
dim_t* end
)
{
bszid_t bszid = bli_cntl_bszid( cntl );
opid_t family = bli_cntl_family( cntl );
if ( family == BLIS_TRSM )
{
if ( bli_obj_root_is_triangular( a ) ) bszid = BLIS_MR;
else bszid = BLIS_NR;
}
blksz_t* bmult = bli_cntx_get_bmult( bszid, cntx );
obj_t* x;
bool use_weighted;
if ( family == BLIS_GEMM ) { x = a; use_weighted = FALSE; }
else if ( family == BLIS_GEMMT ) { x = c; use_weighted = TRUE; }
else if ( family == BLIS_TRMM ) { x = a; use_weighted = TRUE; }
else { x = a; use_weighted = FALSE; }
if ( use_weighted )
{
if ( direct == BLIS_FWD )
return bli_thread_range_weighted_t2b( thr, x, bmult, start, end );
else
return bli_thread_range_weighted_b2t( thr, x, bmult, start, end );
}
else
{
if ( direct == BLIS_FWD )
return bli_thread_range_t2b( thr, x, bmult, start, end );
else
return bli_thread_range_b2t( thr, x, bmult, start, end );
}
}
siz_t bli_thread_range_ndim
(
dir_t direct,
thrinfo_t* thr,
obj_t* a,
obj_t* b,
obj_t* c,
cntl_t* cntl,
cntx_t* cntx,
dim_t* start,
dim_t* end
)
{
bszid_t bszid = bli_cntl_bszid( cntl );
opid_t family = bli_cntl_family( cntl );
if ( family == BLIS_TRSM )
{
if ( bli_obj_root_is_triangular( b ) ) bszid = BLIS_MR;
else bszid = BLIS_NR;
}
blksz_t* bmult = bli_cntx_get_bmult( bszid, cntx );
obj_t* x;
bool use_weighted;
if ( family == BLIS_GEMM ) { x = b; use_weighted = FALSE; }
else if ( family == BLIS_GEMMT ) { x = c; use_weighted = TRUE; }
else if ( family == BLIS_TRMM ) { x = b; use_weighted = TRUE; }
else { x = b; use_weighted = FALSE; }
if ( use_weighted )
{
if ( direct == BLIS_FWD )
return bli_thread_range_weighted_l2r( thr, x, bmult, start, end );
else
return bli_thread_range_weighted_r2l( thr, x, bmult, start, end );
}
else
{
if ( direct == BLIS_FWD )
return bli_thread_range_l2r( thr, x, bmult, start, end );
else
return bli_thread_range_r2l( thr, x, bmult, start, end );
}
}
siz_t bli_thread_range_weighted_l2r
(
thrinfo_t* thr,
obj_t* a,
blksz_t* bmult,
dim_t* start,
dim_t* end
)
{
siz_t area;
if ( bli_obj_intersects_diag( a ) &&
bli_obj_is_upper_or_lower( a ) )
{
num_t dt = bli_obj_dt( a );
doff_t diagoff = bli_obj_diag_offset( a );
uplo_t uplo = bli_obj_uplo( a );
dim_t m = bli_obj_length( a );
dim_t n = bli_obj_width( a );
dim_t bf = bli_blksz_get_def( dt, bmult );
if ( bli_obj_has_trans( a ) )
{
bli_reflect_about_diag( &diagoff, &uplo, &m, &n );
}
area =
bli_thread_range_weighted_sub
(
thr, diagoff, uplo, m, n, bf,
FALSE, start, end
);
}
else {
area = bli_thread_range_l2r
(
thr, a, bmult,
start, end
);
}
return area;
}
siz_t bli_thread_range_weighted_r2l
(
thrinfo_t* thr,
obj_t* a,
blksz_t* bmult,
dim_t* start,
dim_t* end
)
{
siz_t area;
if ( bli_obj_intersects_diag( a ) &&
bli_obj_is_upper_or_lower( a ) )
{
num_t dt = bli_obj_dt( a );
doff_t diagoff = bli_obj_diag_offset( a );
uplo_t uplo = bli_obj_uplo( a );
dim_t m = bli_obj_length( a );
dim_t n = bli_obj_width( a );
dim_t bf = bli_blksz_get_def( dt, bmult );
if ( bli_obj_has_trans( a ) )
{
bli_reflect_about_diag( &diagoff, &uplo, &m, &n );
}
bli_rotate180_trapezoid( &diagoff, &uplo, &m, &n );
area =
bli_thread_range_weighted_sub
(
thr, diagoff, uplo, m, n, bf,
TRUE, start, end
);
}
else {
area = bli_thread_range_r2l
(
thr, a, bmult,
start, end
);
}
return area;
}
siz_t bli_thread_range_weighted_t2b
(
thrinfo_t* thr,
obj_t* a,
blksz_t* bmult,
dim_t* start,
dim_t* end
)
{
siz_t area;
if ( bli_obj_intersects_diag( a ) &&
bli_obj_is_upper_or_lower( a ) )
{
num_t dt = bli_obj_dt( a );
doff_t diagoff = bli_obj_diag_offset( a );
uplo_t uplo = bli_obj_uplo( a );
dim_t m = bli_obj_length( a );
dim_t n = bli_obj_width( a );
dim_t bf = bli_blksz_get_def( dt, bmult );
if ( bli_obj_has_trans( a ) )
{
bli_reflect_about_diag( &diagoff, &uplo, &m, &n );
}
bli_reflect_about_diag( &diagoff, &uplo, &m, &n );
area =
bli_thread_range_weighted_sub
(
thr, diagoff, uplo, m, n, bf,
FALSE, start, end
);
}
else {
area = bli_thread_range_t2b
(
thr, a, bmult,
start, end
);
}
return area;
}
siz_t bli_thread_range_weighted_b2t
(
thrinfo_t* thr,
obj_t* a,
blksz_t* bmult,
dim_t* start,
dim_t* end
)
{
siz_t area;
if ( bli_obj_intersects_diag( a ) &&
bli_obj_is_upper_or_lower( a ) )
{
num_t dt = bli_obj_dt( a );
doff_t diagoff = bli_obj_diag_offset( a );
uplo_t uplo = bli_obj_uplo( a );
dim_t m = bli_obj_length( a );
dim_t n = bli_obj_width( a );
dim_t bf = bli_blksz_get_def( dt, bmult );
if ( bli_obj_has_trans( a ) )
{
bli_reflect_about_diag( &diagoff, &uplo, &m, &n );
}
bli_reflect_about_diag( &diagoff, &uplo, &m, &n );
bli_rotate180_trapezoid( &diagoff, &uplo, &m, &n );
area = bli_thread_range_weighted_sub
(
thr, diagoff, uplo, m, n, bf,
TRUE, start, end
);
}
else {
area = bli_thread_range_b2t
(
thr, a, bmult,
start, end
);
}
return area;
}
void bli_prime_factorization( dim_t n, bli_prime_factors_t* factors )
{
factors->n = n;
factors->sqrt_n = ( dim_t )sqrt( ( double )n );
factors->f = 2;
}
dim_t bli_next_prime_factor( bli_prime_factors_t* factors )
{
while ( factors->f <= factors->sqrt_n )
{
if ( factors->f == 2 )
{
if ( factors->n % 2 == 0 )
{
factors->n /= 2;
return 2;
}
factors->f = 3;
}
else if ( factors->f == 3 )
{
if ( factors->n % 3 == 0 )
{
factors->n /= 3;
return 3;
}
factors->f = 5;
}
else if ( factors->f == 5 )
{
if ( factors->n % 5 == 0 )
{
factors->n /= 5;
return 5;
}
factors->f = 7;
}
else if ( factors->f == 7 )
{
if ( factors->n % 7 == 0 )
{
factors->n /= 7;
return 7;
}
factors->f = 11;
}
else
{
if ( factors->n % factors->f == 0 )
{
factors->n /= factors->f;
return factors->f;
}
factors->f++;
}
}
dim_t tmp = factors->n;
factors->n = 1;
return tmp;
}
bool bli_is_prime( dim_t n )
{
bli_prime_factors_t factors;
bli_prime_factorization( n, &factors );
dim_t f = bli_next_prime_factor( &factors );
if ( f == n ) return TRUE;
else return FALSE;
}
void bli_thread_partition_2x2
(
dim_t n_thread,
dim_t work1,
dim_t work2,
dim_t* restrict nt1,
dim_t* restrict nt2
)
{
if ( n_thread < 4 )
{
*nt1 = ( work1 >= work2 ? n_thread : 1 );
*nt2 = ( work1 < work2 ? n_thread : 1 );
return;
}
#if 1
bli_thread_partition_2x2_fast( n_thread, work1, work2, nt1, nt2 );
#else#endif
}
void bli_thread_partition_2x2_fast
(
dim_t n_thread,
dim_t work1,
dim_t work2,
dim_t* restrict nt1,
dim_t* restrict nt2
)
{
dim_t tn1 = 1;
dim_t tn2 = 1;
bli_prime_factors_t factors;
bli_prime_factorization( n_thread, &factors );
#ifdef PRINT_FACTORS
printf( "w1 w2 = %d %d (initial)\n", (int)work1, (int)work2 );
#endif
dim_t f;
while ( ( f = bli_next_prime_factor( &factors ) ) > 1 )
{
#ifdef PRINT_FACTORS
printf( "w1 w2 = %4d %4d nt1 nt2 = %d %d ... f = %d\n",
(int)work1, (int)work2, (int)tn1, (int)tn2, (int)f );
#endif
if ( work1 > work2 ) { work1 /= f; tn1 *= f; }
else { work2 /= f; tn2 *= f; }
}
#ifdef PRINT_FACTORS
printf( "w1 w2 = %4d %4d nt1 nt2 = %d %d\n",
(int)work1, (int)work2, (int)tn1, (int)tn2 );
#endif
if ( work1 > work2 )
{
if ( tn2 % 2 == 0 )
{
dim_t diff = work1 - work2;
dim_t diff_mod = bli_abs( work1/2 - work2*2 );
if ( diff_mod < diff ) { tn1 *= 2; tn2 /= 2; }
}
}
else if ( work1 < work2 )
{
if ( tn1 % 2 == 0 )
{
dim_t diff = work2 - work1;
dim_t diff_mod = bli_abs( work2/2 - work1*2 );
if ( diff_mod < diff ) { tn1 /= 2; tn2 *= 2; }
}
}
#ifdef PRINT_FACTORS
printf( "w1 w2 = %4d %4d nt1 nt2 = %d %d (final)\n",
(int)work1, (int)work2, (int)tn1, (int)tn2 );
#endif
*nt1 = tn1;
*nt2 = tn2;
}
#include "limits.h"
void bli_thread_partition_2x2_slow
(
dim_t n_thread,
dim_t work1,
dim_t work2,
dim_t* restrict nt1,
dim_t* restrict nt2
)
{
dim_t tn1 = 1;
dim_t tn2 = 1;
bli_prime_factors_t factors;
bli_prime_factorization( n_thread, &factors );
dim_t fact[8];
dim_t mult[8];
dim_t nfact = 1;
fact[0] = bli_next_prime_factor( &factors );
mult[0] = 1;
dim_t f;
while ( ( f = bli_next_prime_factor( &factors ) ) > 1 )
{
if ( f == fact[nfact-1] )
{
mult[nfact-1]++;
}
else
{
nfact++;
fact[nfact-1] = f;
mult[nfact-1] = 1;
}
}
dim_t ntake[8] = {0};
dim_t min_diff = INT_MAX;
bool done = FALSE;
while ( !done )
{
dim_t x = 1;
dim_t y = 1;
for ( dim_t i = 0 ; i < nfact ; i++ )
{
x *= bli_ipow( fact[i], ntake[i] );
y *= bli_ipow( fact[i], mult[i]-ntake[i] );
}
dim_t diff = llabs( x*work2 - y*work1 );
if ( diff < min_diff )
{
min_diff = diff;
tn1 = x;
tn2 = y;
}
for ( dim_t i = 0 ; i < nfact ; i++ )
{
if ( ++ntake[i] > mult[i] )
{
ntake[i] = 0;
if ( i == nfact-1 ) done = TRUE;
else continue;
}
break;
}
}
*nt1 = tn1;
*nt2 = tn2;
}
#if 0#endif
dim_t bli_gcd( dim_t x, dim_t y )
{
while ( y != 0 )
{
dim_t t = y;
y = x % y;
x = t;
}
return x;
}
dim_t bli_lcm( dim_t x, dim_t y)
{
return x * y / bli_gcd( x, y );
}
dim_t bli_ipow( dim_t base, dim_t power )
{
dim_t p = 1;
for ( dim_t mask = 0x1 ; mask <= power ; mask <<= 1 )
{
if ( power & mask ) p *= base;
base *= base;
}
return p;
}
dim_t bli_thread_get_jc_nt( void )
{
bli_init_once();
return bli_rntm_jc_ways( &global_rntm );
}
dim_t bli_thread_get_pc_nt( void )
{
bli_init_once();
return bli_rntm_pc_ways( &global_rntm );
}
dim_t bli_thread_get_ic_nt( void )
{
bli_init_once();
return bli_rntm_ic_ways( &global_rntm );
}
dim_t bli_thread_get_jr_nt( void )
{
bli_init_once();
return bli_rntm_jr_ways( &global_rntm );
}
dim_t bli_thread_get_ir_nt( void )
{
bli_init_once();
return bli_rntm_ir_ways( &global_rntm );
}
dim_t bli_thread_get_num_threads( void )
{
bli_init_once();
return bli_rntm_num_threads( &global_rntm );
}
void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir )
{
bli_init_once();
bli_pthread_mutex_lock( &global_rntm_mutex );
bli_rntm_set_ways_only( jc, pc, ic, jr, ir, &global_rntm );
bli_pthread_mutex_unlock( &global_rntm_mutex );
}
void bli_thread_set_num_threads( dim_t n_threads )
{
bli_init_once();
bli_pthread_mutex_lock( &global_rntm_mutex );
bli_rntm_set_num_threads_only( n_threads, &global_rntm );
bli_pthread_mutex_unlock( &global_rntm_mutex );
}
void bli_thread_init_rntm_from_env
(
rntm_t* rntm
)
{
bool auto_factor = FALSE;
dim_t nt;
dim_t jc, pc, ic, jr, ir;
#ifdef BLIS_ENABLE_MULTITHREADING
nt = bli_env_get_var( "BLIS_NUM_THREADS", -1 );
if ( nt == -1 )
nt = bli_env_get_var( "OMP_NUM_THREADS", -1 );
jc = bli_env_get_var( "BLIS_JC_NT", -1 );
pc = bli_env_get_var( "BLIS_PC_NT", -1 );
ic = bli_env_get_var( "BLIS_IC_NT", -1 );
jr = bli_env_get_var( "BLIS_JR_NT", -1 );
ir = bli_env_get_var( "BLIS_IR_NT", -1 );
if ( jc != -1 || pc != -1 || ic != -1 || jr != -1 || ir != -1 )
{
if ( jc == -1 ) jc = 1;
if ( pc == -1 ) pc = 1;
if ( ic == -1 ) ic = 1;
if ( jr == -1 ) jr = 1;
if ( ir == -1 ) ir = 1;
nt = -1;
}
if ( nt != -1 ) auto_factor = TRUE;
#else
nt = -1;
jc = pc = ic = jr = ir = 1;
#endif
bli_rntm_set_auto_factor_only( auto_factor, rntm );
bli_rntm_set_num_threads_only( nt, rntm );
bli_rntm_set_ways_only( jc, pc, ic, jr, ir, rntm );
#if 0#endif
}