purezen 0.0.2

Runtime for the Pure Data (Pd) audio programming language, implemented as an extensible audio library allowing full control over signal processing, message passing, and graph manipulation. Pure Data is a graph-based programming language environment creating interactive music and multimedia works.
Documentation
/*
 *  Copyright 2009,2010,2012 Reality Jockey, Ltd.
 *                 info@rjdj.me
 *                 http://rjdj.me/
 *
 *  This file is part of ZenGarden.
 *
 *  ZenGarden is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  ZenGarden is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with ZenGarden.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include "DspPhasor.h"
#include "PdGraph.h"

#define SHORT_TO_FLOAT_RATIO 0.0000152590219f // == 1/(2^16 - 1)

message::Object *DspPhasor::new_object(pd::Message *init_message, PdGraph *graph) {
  return new DspPhasor(init_message, graph);
}

DspPhasor::DspPhasor(pd::Message *init_message, PdGraph *graph) : DspObject(2, 2, 0, 1, graph) {  
  pd::Message *message = PD_MESSAGE_ON_STACK(1);
  message->from_timestamp_and_float(0.0, init_message->is_float(0) ? init_message->get_float(0) : 0.0f);
  process_message(0, message);

  process_function = &processScalar;
  process_functionNoMessage = &processScalar;
}

DspPhasor::~DspPhasor() {
  // TODO
}

string DspPhasor::toString() {
  char str[snprintf(NULL, 0, "%s %g", get_object_label(), frequency)+1];
  snprintf(str, sizeof(str), "%s %g", get_object_label(), frequency);
  return string(str);
}

void DspPhasor::onInletConnectionUpdate(unsigned int inlet_index) {
  process_function = incomingDspConnections[0].empty() ? &processScalar : &processSignal;
}

void DspPhasor::process_message(int inlet_index, pd::Message *message) {
  switch (inlet_index) {
    case 0: { // update the frequency
      if (message->is_float(0)) {
        frequency = message->get_float(0);
        #if __SSE3__
        float sampleStep = frequency * 65536.0f / graph->get_sample_rate();
        short s = (short) sampleStep; // signed as step size may be negative as well!
        inc = _mm_set1_pi16(4*s);
        #endif // __SSE3__
      }
      break;
    }
    case 1: { // update the phase
      // TODO(mhroth)
      break;
    }
    default: break;
  }
}

// NOTE(mhroth): it is assumed that the block size (toIndex) is a multiple of 4
void DspPhasor::processSignal(DspObject *dspObject, int fromIndex, int n4) {
  DspPhasor *d = reinterpret_cast<DspPhasor *>(dspObject);
  #if __SSE3__
  float *input = d->dspBufferAtInlet[0];
  float *output = d->dspBufferAtOutlet[0];
  __m64 indicies = d->indicies;
  static __m128 constVec = _mm_set1_ps(SHORT_TO_FLOAT_RATIO);
  static __m128 sampVec = _mm_set1_ps(65536.0f/d->graph->get_sample_rate());

  while (n4) {
    // convert signal input to sample increments
    // (short) (input * (65536.0f/d->graph->get_sample_rate()))
    __m64 inc = _mm_cvtps_pi16(_mm_mul_ps(_mm_load_ps(input),sampVec));
    
    // cumulative summation of increments
    inc = _mm_add_pi16(_mm_add_pi16(_mm_add_pi16(inc,_mm_slli_si64(inc,16)),
        _mm_slli_si64(inc,32)),_mm_slli_si64(inc,48));
    
    // add increments to index
    indicies = _mm_add_pi16(_mm_set1_pi16(_mm_extract_pi16(indicies,3)), inc);
    
    _mm_store_ps(output,_mm_mul_ps(_mm_cvtpu16_ps(indicies),constVec));
    input += 4;
    output += 4;
    n4 -= 4;
  }
  
  d->indicies = indicies;
  
  #else
  // TODO(mhroth):!!!
  #endif
}

// http://cache-www.intel.com/cd/00/00/34/76/347603_347603.pdf
void DspPhasor::processScalar(DspObject *dspObject, int fromIndex, int toIndex) {
  DspPhasor *d = reinterpret_cast<DspPhasor *>(dspObject);
  #if __SSE3__
  /*
   * Creates an array of unsigned short indicies (since the length of the cosine lookup table is
   * of length 2^16. These indicies are incremented by a step size based on the desired frequency.
   * As the indicies overflow during addition, they loop back around to zero.
   */
  int n = toIndex - fromIndex;
  float *output = d->dspBufferAtOutlet[0]+fromIndex;
  __m64 inc = d->inc;
  short s = _mm_extract_pi16(inc,0) >> 2; // == / 4 in order to recover original step size
  static __m128 constVec = _mm_set1_ps(SHORT_TO_FLOAT_RATIO);
  unsigned short idx;
  
  // ensure that input and output vectors are 16-byte aligned
  __m64 indicies;
  switch (fromIndex & 0x3) {
    case 1: {
      idx = _mm_extract_pi16(d->indicies,0); // get current index
      *output++ = ((float) idx) * SHORT_TO_FLOAT_RATIO; idx += s; --n;
    }
    case 2: *output++ = ((float) idx) * SHORT_TO_FLOAT_RATIO; idx += s; --n;
    case 3: {
      *output++ = ((float) idx) * SHORT_TO_FLOAT_RATIO; idx += s; --n;
      indicies = _mm_set_pi16(idx+3*s, idx+2*s, idx+s, idx);
      break;
    }
    default: indicies = d->indicies; break;
  }
  
  // compute as many 4-tuples as possible
  int n4 = n & 0xFFFFFFFC; // we can process 4 indicies at a time
  while (n4) {
    // output = ((float) indicies) * (1/(2^16-1))
    _mm_store_ps(output, _mm_mul_ps(_mm_cvtpu16_ps(indicies),constVec));
    indicies = _mm_add_pi16(indicies, inc);
    output += 4;
    n4 -= 4;
  }
  
  // finish the remaining (up to 3) samples
  switch (n & 0x3) {
    case 3: {
      idx = _mm_extract_pi16(d->indicies,3);
      *output++ = ((float) idx) * SHORT_TO_FLOAT_RATIO; idx += s;
    }
    case 2: *output++ = ((float) idx) * SHORT_TO_FLOAT_RATIO; idx += s;
    case 1: {
      *output++ = ((float) idx) * SHORT_TO_FLOAT_RATIO; idx += s;
      d->indicies = _mm_set_pi16(idx+3*s, idx+2*s, idx+s, idx);
      break;
    }
    default: d->indicies = indicies; break;
    // set the current index to the correct location, given that the step size is actually
    // a real number, not an integer
    // NOTE(mhroth): but doing this will cause clicks :-/ Osc will thus go out of phase over time
    // Will anyone complain?
    //      d->currentIndex = currentIndex + ((short) ((d->sampleStep - floorf(d->sampleStep)) * n));
  }
  #else
  // TODO(mhroth):!!!
  #endif
}