#include "DspOsc.h"
#include "PdGraph.h"
float *DspOsc::cos_table = NULL;
int DspOsc::refCount = 0;
message::Object *DspOsc::new_object(pd::Message *init_message, PdGraph *graph) {
  return new DspOsc(init_message, graph);
}
DspOsc::DspOsc(pd::Message *init_message, PdGraph *graph) : DspObject(2, 2, 0, 1, graph) {
  frequency = init_message->is_float(0) ? init_message->get_float(0) : 0.0f;
  sampleStep = frequency * 65536.0f / graph->get_sample_rate();
  #if __SSE3__
  short step = (short) roundf(sampleStep);
  inc = _mm_set_epi16(8*step, 8*step, 8*step, 8*step, 8*step, 8*step, 8*step, 8*step);
  indicies = _mm_set_epi16(7*step, 6*step, 5*step, 4*step, 3*step, 2*step, step, 0);
  #endif
  
  phase = 0.0f;
  refCount++;
  if (cos_table == NULL) {
    cos_table = ALLOC_ALIGNED_BUFFER(65536 * sizeof(float));
    for (int i = 0; i < 65536; i++) {
      cos_table[i] = cosf(2.0f * M_PI * ((float) i) / 65536.0f);
    }
  }
  
  process_function = &processScalar;
  process_functionNoMessage = &processScalar;
}
DspOsc::~DspOsc() {
  if (--refCount == 0) {
    FREE_ALIGNED_BUFFER(cos_table);
    cos_table = NULL;
  }
}
void DspOsc::onInletConnectionUpdate(unsigned int inlet_index) {
  }
string DspOsc::toString() {
  char str[snprintf(NULL, 0, "%s %g", get_object_label(), frequency)+1];
  snprintf(str, sizeof(str), "%s %g", get_object_label(), frequency);
  return string(str);
}
void DspOsc::process_message(int inlet_index, pd::Message *message) {
  switch (inlet_index) {
    case 0: {       if (message->is_float(0)) {
        frequency = fabsf(message->get_float(0));
        sampleStep = frequency * 65536.0f / graph->get_sample_rate();
        
        #if __SSE3__
        short step = (short) roundf(sampleStep);
        inc = _mm_set_epi16(8*step, 8*step, 8*step, 8*step, 8*step, 8*step, 8*step, 8*step);
        unsigned short currentIndex = _mm_extract_epi16(indicies,0);
        indicies = _mm_set_epi16(7*step+currentIndex, 6*step+currentIndex, 5*step+currentIndex,
            4*step+currentIndex, 3*step+currentIndex, 2*step+currentIndex, step+currentIndex, currentIndex);
        #endif
      }
      break;
    }
    case 1: {             break;
    }
    default: break;
  }
}
void DspOsc::processScalar(DspObject *dspObject, int fromIndex, int toIndex) {
  DspOsc *d = reinterpret_cast<DspOsc *>(dspObject);
  #if __SSE3__
  
  float *output = d->dspBufferAtOutlet[0]+fromIndex;
  __m128i inc = d->inc;
  __m128i indicies = d->indicies;
  int n = toIndex - fromIndex;
  
  unsigned short currentIndex = _mm_extract_epi16(indicies,0);
  short step = _mm_extract_epi16(inc,0)/8;
  
  switch (fromIndex & 0x7) {
    case 0: default: break;
    case 1: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step; --n;     case 2: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step; --n;
    case 3: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step; --n;
    case 4: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step; --n;
    case 5: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step; --n;
    case 6: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step; --n;
    case 7: {
      *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step; --n;
      
      d->indicies = _mm_set_epi16(7*step+currentIndex, 6*step+currentIndex, 5*step+currentIndex,
          4*step+currentIndex, 3*step+currentIndex, 2*step+currentIndex, step+currentIndex, currentIndex);
    }
  }
  
  int n4 = n & 0xFFFFFFF8;   while (n4) {
    __m128 values = _mm_set_ps(DspOsc::cos_table[(unsigned short) _mm_extract_epi16(indicies,3)],
                               DspOsc::cos_table[(unsigned short) _mm_extract_epi16(indicies,2)],
                               DspOsc::cos_table[(unsigned short) _mm_extract_epi16(indicies,1)],
                               DspOsc::cos_table[(unsigned short) _mm_extract_epi16(indicies,0)]);
    _mm_store_ps(output, values);
    output += 4;
    values = _mm_set_ps(DspOsc::cos_table[(unsigned short) _mm_extract_epi16(indicies,7)],
                        DspOsc::cos_table[(unsigned short) _mm_extract_epi16(indicies,6)],
                        DspOsc::cos_table[(unsigned short) _mm_extract_epi16(indicies,5)],
                        DspOsc::cos_table[(unsigned short) _mm_extract_epi16(indicies,4)]);
    _mm_store_ps(output, values);
    indicies = _mm_add_epi16(indicies, inc);
    output += 4;
    n4 -= 8;
  }
  
  currentIndex = _mm_extract_epi16(indicies,0);
  switch (n & 0x7) {
    case 7: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step;     case 6: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step;
    case 5: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step;
    case 4: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step;
    case 3: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step;
    case 2: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step;
    case 1: *output++ = DspOsc::cos_table[currentIndex]; currentIndex += step;
    default: {
                              
      if ((n & 0x7) == 0) {
        d->indicies = indicies;
      } else {
        d->indicies = _mm_set_epi16(7*step+currentIndex, 6*step+currentIndex, 5*step+currentIndex,
            4*step+currentIndex, 3*step+currentIndex, 2*step+currentIndex, step+currentIndex, currentIndex);        
      }
      break;
    }
  }
  #else
    #endif
}