#ifndef _SKP_SILK_API_ARM_H_
#define _SKP_SILK_API_ARM_H_
#if EMBEDDED_ARM==4
extern SKP_int32 SKP_Silk_CLZ16(SKP_int16 in16);
extern SKP_int32 SKP_Silk_CLZ32(SKP_int32 in32);
#define SKP_SMULWB(a32, b32) ((((a32) >> 16) * (SKP_int32)((SKP_int16)(b32))) + ((((a32) & 0x0000FFFF) * (SKP_int32)((SKP_int16)(b32))) >> 16))
#define SKP_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (SKP_int32)((SKP_int16)(c32))) + ((((b32) & 0x0000FFFF) * (SKP_int32)((SKP_int16)(c32))) >> 16)))
#define SKP_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
#define SKP_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
#define SKP_SMULBB(a32, b32) ((SKP_int32)((SKP_int16)(a32)) * (SKP_int32)((SKP_int16)(b32)))
#define SKP_SMLABB(a32, b32, c32) ((a32) + ((SKP_int32)((SKP_int16)(b32))) * (SKP_int32)((SKP_int16)(c32)))
#define SKP_SMLABB_ovflw(a32, b32, c32) ((a32) + ((SKP_int32)((SKP_int16)(b32))) * (SKP_int32)((SKP_int16)(c32)))
#define SKP_SMULBT(a32, b32) ((SKP_int32)((SKP_int16)(a32)) * ((b32) >> 16))
#define SKP_SMLABT(a32, b32, c32) ((a32) + ((SKP_int32)((SKP_int16)(b32))) * ((c32) >> 16))
SKP_INLINE SKP_int64 SKP_SMLAL(SKP_int64 a64, SKP_int32 b32, SKP_int32 c32)
{
#ifdef IPHONE
a64 = (SKP_int64)b32 * c32;
return(a64);
#else
__asm__ __volatile__ ("smlal %Q0, %R0, %2, %3" : "=r" (a64) : "0" (a64), "r" (b32), "r" (c32));
return(a64);
#endif
}
#define SKP_SMULWW(a32, b32) SKP_MLA(SKP_SMULWB((a32), (b32)), (a32), SKP_RSHIFT_ROUND((b32), 16))
#define SKP_SMLAWW(a32, b32, c32) SKP_MLA(SKP_SMLAWB((a32), (b32), (c32)), (b32), SKP_RSHIFT_ROUND((c32), 16))
#define SKP_ADD_SAT32(a, b) ((((a) + (b)) & 0x80000000) == 0 ? \
((((a) & (b)) & 0x80000000) != 0 ? SKP_int32_MIN : (a)+(b)) : \
((((a) | (b)) & 0x80000000) == 0 ? SKP_int32_MAX : (a)+(b)) )
#define SKP_SUB_SAT32(a, b) ((((a)-(b)) & 0x80000000) == 0 ? \
(( (a) & ((b)^0x80000000) & 0x80000000) ? SKP_int32_MIN : (a)-(b)) : \
((((a)^0x80000000) & (b) & 0x80000000) ? SKP_int32_MAX : (a)-(b)) )
#define SKP_SMMUL(a32, b32) (SKP_int32)SKP_RSHIFT64(SKP_SMULL((a32), (b32)), 32)
#else
SKP_INLINE SKP_int32 SKP_SMULWB(SKP_int32 a32, SKP_int32 b32) {
SKP_int32 out32;
__asm__ __volatile__ ("smulwb %0, %1, %2" : "=r" (out32) : "r" (a32), "r" (b32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMLAWB(SKP_int32 a32, SKP_int32 b32, SKP_int32 c32) {
SKP_int32 out32;
__asm__ __volatile__ ("smlawb %0, %2, %3, %1" : "=r" (out32) : "r" (a32), "r" (b32), "r" (c32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMULWT(SKP_int32 a32, SKP_int32 b32)
{
SKP_int32 out32;
__asm__ __volatile__ ("smulwt %0, %1, %2" : "=r" (out32) : "r" (a32), "r" (b32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMLAWT(SKP_int32 a32, SKP_int32 b32, SKP_int32 c32)
{
SKP_int32 out32;
__asm__ __volatile__ ("smlawt %0, %2, %3, %1" : "=r" (out32) : "r" (a32), "r" (b32), "r" (c32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMULBB(SKP_int32 a32, SKP_int32 b32) {
SKP_int32 out32;
__asm__ __volatile__ ("smulbb %0, %1, %2" : "=r" (out32) : "r" (a32), "r" (b32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMLABB(SKP_int32 a32, SKP_int32 b32, SKP_int32 c32) {
SKP_int32 out32;
__asm__ __volatile__ ("smlabb %0, %2, %3, %1" : "=r" (out32) : "r" (a32), "r" (b32), "r" (c32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMLABB_ovflw(SKP_int32 a32, SKP_int32 b32, SKP_int32 c32) {
SKP_int32 out32;
__asm__ __volatile__ ("smlabb %0, %2, %3, %1" : "=r" (out32) : "r" (a32), "r" (b32), "r" (c32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMULBT(SKP_int32 a32, SKP_int32 b32) {
SKP_int32 out32;
__asm__ __volatile__ ("smulbt %0, %1, %2" : "=r" (out32) : "r" (a32), "r" (b32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMLABT(SKP_int32 a32, SKP_int32 b32, SKP_int32 c32) {
SKP_int32 out32;
__asm__ __volatile__ ("smlabt %0, %2, %3, %1" : "=r" (out32) : "r" (a32), "r" (b32), "r" (c32));
return(out32);
}
SKP_INLINE SKP_int64 SKP_SMLAL(SKP_int64 a64, SKP_int32 b32, SKP_int32 c32)
{
#ifdef IPHONE
a64 = (SKP_int64)b32 * c32;
return(a64);
#else
__asm__ __volatile__ ("smlal %Q0, %R0, %2, %3" : "=r" (a64) : "0" (a64), "r" (b32), "r" (c32));
return(a64);
#endif
}
#define SKP_SMULWW(a32, b32) SKP_MLA(SKP_SMULWB((a32), (b32)), (a32), SKP_RSHIFT_ROUND((b32), 16))
#define SKP_SMLAWW(a32, b32, c32) SKP_MLA(SKP_SMLAWB((a32), (b32), (c32)), (b32), SKP_RSHIFT_ROUND((c32), 16))
SKP_INLINE SKP_int32 SKP_ADD_SAT32(SKP_int32 a32, SKP_int32 b32) {
SKP_int32 out32;
__asm__ __volatile__ ("qadd %0, %1, %2" : "=r" (out32) : "r" (a32), "r" (b32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SUB_SAT32(SKP_int32 a32, SKP_int32 b32) {
SKP_int32 out32;
__asm__ __volatile__ ("qsub %0, %1, %2" : "=r" (out32) : "r" (a32), "r" (b32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_Silk_CLZ16(SKP_int16 in16)
{
SKP_int32 out32;
__asm__ __volatile__ ("movs %0, %1, lsl #16 \n\tclz %0, %0 \n\t it eq \n\t moveq %0, #16" : "=r" (out32) : "r" (in16) : "cc");
return(out32);
}
SKP_INLINE SKP_int32 SKP_Silk_CLZ32(SKP_int32 in32)
{
SKP_int32 out32;
__asm__ __volatile__ ("clz %0, %1" : "=r" (out32) : "r" (in32));
return(out32);
}
#if EMBEDDED_ARM < 6
#define SKP_SMMUL(a32, b32) (SKP_int32)SKP_RSHIFT64(SKP_SMULL((a32), (b32)), 32)
#endif
#endif
#if EMBEDDED_ARM>=6
SKP_INLINE SKP_int32 SKP_SMMUL(SKP_int32 a32, SKP_int32 b32){
SKP_int32 out32;
__asm__ __volatile__ ("smmul %0, %1, %2" : "=r" (out32) : "r" (a32), "r" (b32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMUAD(SKP_int32 a32, SKP_int32 b32)
{
SKP_int32 out32;
__asm__ __volatile__ ("smuad %0, %1, %2" : "=r" (out32) : "r" (a32), "r" (b32));
return(out32);
}
SKP_INLINE SKP_int32 SKP_SMLAD(SKP_int32 a32, SKP_int32 b32, SKP_int32 c32)
{
SKP_int32 out32;
__asm__ __volatile__ ("smlad %0, %2, %3, %1" : "=r" (out32) : "r" (a32), "r" (b32), "r" (c32));
return(out32);
}
#endif
#endif