#include "DspSqrt.h"
#include "PdGraph.h"
message::Object *DspSqrt::new_object(pd::Message *init_message, PdGraph *graph) {
return new DspSqrt(init_message, graph);
}
DspSqrt::DspSqrt(pd::Message *init_message, PdGraph *graph) : DspObject(0, 1, 0, 1, graph) {
process_function = &processSignal;
}
DspSqrt::~DspSqrt() {
}
void DspSqrt::processSignal(DspObject *dspObject, int fromIndex, int toIndex) {
DspSqrt *d = reinterpret_cast<DspSqrt *>(dspObject);
#if __ARM_NEON__
float *inBuff = d->dspBufferAtInlet[0];
float *outBuff = d->dspBufferAtOutlet[0];
float32x4_t inVec, outVec;
float32x4_t zeroVec = vdupq_n_f32(0.0f);
int n4 = toIndex & 0xFFFFFFFC;
while (n4) {
inVec = vld1q_f32(inBuff);
inVec = vmaxq_f32(inVec, zeroVec);
outVec = vrsqrteq_f32(inVec); outVec = vrecpeq_f32(outVec); vst1q_f32((float32_t *) outBuff, outVec);
n4 -= 4;
inBuff += 4;
outBuff += 4;
}
switch (toIndex & 0x3) {
case 3: *outBuff++ = (*inBuff > 0.0f) ? sqrtf(*inBuff) : 0.0f; ++inBuff;
case 2: *outBuff++ = (*inBuff > 0.0f) ? sqrtf(*inBuff) : 0.0f; ++inBuff;
case 1: *outBuff++ = (*inBuff > 0.0f) ? sqrtf(*inBuff) : 0.0f; ++inBuff;
default: break;
}
#elif __SSE__
float *inBuff = d->dspBufferAtInlet[0];
float *outBuff = d->dspBufferAtOutlet[0];
__m128 inVec, outVec;
__m128 zeroVec = _mm_set1_ps(0.0f);
int n4 = toIndex & 0xFFFFFFFC;
while (n4) {
inVec = _mm_load_ps(inBuff);
inVec = _mm_max_ps(inVec, zeroVec);
outVec = _mm_sqrt_ps(inVec);
_mm_store_ps(outBuff, outVec);
n4 -= 4;
inBuff += 4;
outBuff += 4;
}
switch (toIndex & 0x3) {
case 3: *outBuff++ = (*inBuff > 0.0f) ? sqrtf(*inBuff) : 0.0f; ++inBuff;
case 2: *outBuff++ = (*inBuff > 0.0f) ? sqrtf(*inBuff) : 0.0f; ++inBuff;
case 1: *outBuff++ = (*inBuff > 0.0f) ? sqrtf(*inBuff) : 0.0f; ++inBuff;
default: break;
}
#else
for (int i = 0; i < toIndex; i++) {
d->dspBufferAtOutlet[0][i] = sqrtf(d->dspBufferAtInlet[0][i]);
}
#endif
}