#include "edlib.h"
#include <stdint.h>
#include <cstdlib>
#include <algorithm>
#include <vector>
#include <cstring>
#include <string>
using namespace std;
typedef uint64_t Word;
static const int WORD_SIZE = sizeof(Word) * 8; static const Word WORD_1 = static_cast<Word>(1);
static const Word HIGH_BIT_MASK = WORD_1 << (WORD_SIZE - 1); static const int MAX_UCHAR = 255;
struct AlignmentData {
Word* Ps;
Word* Ms;
int* scores;
int* firstBlocks;
int* lastBlocks;
AlignmentData(int maxNumBlocks, int targetLength) {
Ps = new Word[maxNumBlocks * targetLength];
Ms = new Word[maxNumBlocks * targetLength];
scores = new int[maxNumBlocks * targetLength];
firstBlocks = new int[targetLength];
lastBlocks = new int[targetLength];
}
~AlignmentData() {
delete[] Ps;
delete[] Ms;
delete[] scores;
delete[] firstBlocks;
delete[] lastBlocks;
}
};
struct Block {
Word P; Word M; int score;
Block() {}
Block(Word p, Word m, int s) :P(p), M(m), score(s) {}
};
class EqualityDefinition {
private:
bool matrix[MAX_UCHAR + 1][MAX_UCHAR + 1];
public:
EqualityDefinition(const string& alphabet,
const EdlibEqualityPair* additionalEqualities = NULL,
const int additionalEqualitiesLength = 0) {
for (int i = 0; i < static_cast<int>(alphabet.size()); i++) {
for (int j = 0; j < static_cast<int>(alphabet.size()); j++) {
matrix[i][j] = (i == j);
}
}
if (additionalEqualities != NULL) {
for (int i = 0; i < additionalEqualitiesLength; i++) {
size_t firstTransformed = alphabet.find(additionalEqualities[i].first);
size_t secondTransformed = alphabet.find(additionalEqualities[i].second);
if (firstTransformed != string::npos && secondTransformed != string::npos) {
matrix[firstTransformed][secondTransformed] = matrix[secondTransformed][firstTransformed] = true;
}
}
}
}
bool areEqual(unsigned char a, unsigned char b) const {
return matrix[a][b];
}
};
static int myersCalcEditDistanceSemiGlobal(const Word* Peq, int W, int maxNumBlocks,
int queryLength,
const unsigned char* target, int targetLength,
int k, EdlibAlignMode mode,
int* bestScore_, int** positions_, int* numPositions_);
static int myersCalcEditDistanceNW(const Word* Peq, int W, int maxNumBlocks,
int queryLength,
const unsigned char* target, int targetLength,
int k, int* bestScore_,
int* position_, bool findAlignment,
AlignmentData** alignData, int targetStopPosition);
static int obtainAlignment(
const unsigned char* query, const unsigned char* rQuery, int queryLength,
const unsigned char* target, const unsigned char* rTarget, int targetLength,
const EqualityDefinition& equalityDefinition, int alphabetLength, int bestScore,
unsigned char** alignment, int* alignmentLength);
static int obtainAlignmentHirschberg(
const unsigned char* query, const unsigned char* rQuery, int queryLength,
const unsigned char* target, const unsigned char* rTarget, int targetLength,
const EqualityDefinition& equalityDefinition, int alphabetLength, int bestScore,
unsigned char** alignment, int* alignmentLength);
static int obtainAlignmentTraceback(int queryLength, int targetLength,
int bestScore, const AlignmentData* alignData,
unsigned char** alignment, int* alignmentLength);
static string transformSequences(const char* queryOriginal, int queryLength,
const char* targetOriginal, int targetLength,
unsigned char** queryTransformed,
unsigned char** targetTransformed);
static inline int ceilDiv(int x, int y);
static inline unsigned char* createReverseCopy(const unsigned char* seq, int length);
static inline Word* buildPeq(const int alphabetLength,
const unsigned char* query,
const int queryLength,
const EqualityDefinition& equalityDefinition);
extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const int queryLength,
const char* const targetOriginal, const int targetLength,
const EdlibAlignConfig config) {
EdlibAlignResult result;
result.status = EDLIB_STATUS_OK;
result.editDistance = -1;
result.endLocations = result.startLocations = NULL;
result.numLocations = 0;
result.alignment = NULL;
result.alignmentLength = 0;
result.alphabetLength = 0;
unsigned char* query, * target;
string alphabet = transformSequences(queryOriginal, queryLength, targetOriginal, targetLength,
&query, &target);
result.alphabetLength = static_cast<int>(alphabet.size());
if (queryLength == 0 || targetLength == 0) {
if (config.mode == EDLIB_MODE_NW) {
result.editDistance = std::max(queryLength, targetLength);
result.endLocations = static_cast<int *>(malloc(sizeof(int) * 1));
result.endLocations[0] = targetLength - 1;
result.numLocations = 1;
} else if (config.mode == EDLIB_MODE_SHW || config.mode == EDLIB_MODE_HW) {
result.editDistance = queryLength;
result.endLocations = static_cast<int *>(malloc(sizeof(int) * 1));
result.endLocations[0] = -1;
result.numLocations = 1;
} else {
result.status = EDLIB_STATUS_ERROR;
}
free(query);
free(target);
return result;
}
int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); int W = maxNumBlocks * WORD_SIZE - queryLength; EqualityDefinition equalityDefinition(alphabet, config.additionalEqualities, config.additionalEqualitiesLength);
Word* Peq = buildPeq(static_cast<int>(alphabet.size()), query, queryLength, equalityDefinition);
int positionNW; AlignmentData* alignData = NULL;
bool dynamicK = false;
int k = config.k;
if (k < 0) { dynamicK = true;
k = WORD_SIZE; }
do {
if (config.mode == EDLIB_MODE_HW || config.mode == EDLIB_MODE_SHW) {
myersCalcEditDistanceSemiGlobal(Peq, W, maxNumBlocks,
queryLength, target, targetLength,
k, config.mode, &(result.editDistance),
&(result.endLocations), &(result.numLocations));
} else { myersCalcEditDistanceNW(Peq, W, maxNumBlocks,
queryLength, target, targetLength,
k, &(result.editDistance), &positionNW,
false, &alignData, -1);
}
k *= 2;
} while(dynamicK && result.editDistance == -1);
if (result.editDistance >= 0) { if (config.mode == EDLIB_MODE_NW) {
result.endLocations = static_cast<int *>(malloc(sizeof(int) * 1));
result.endLocations[0] = targetLength - 1;
result.numLocations = 1;
}
if (config.task == EDLIB_TASK_LOC || config.task == EDLIB_TASK_PATH) {
result.startLocations = static_cast<int *>(malloc(result.numLocations * sizeof(int)));
if (config.mode == EDLIB_MODE_HW) { const unsigned char* rTarget = createReverseCopy(target, targetLength);
const unsigned char* rQuery = createReverseCopy(query, queryLength);
Word* rPeq = buildPeq(static_cast<int>(alphabet.size()), rQuery, queryLength, equalityDefinition);
for (int i = 0; i < result.numLocations; i++) {
int endLocation = result.endLocations[i];
if (endLocation == -1) {
result.startLocations[i] = 0; } else {
int bestScoreSHW, numPositionsSHW;
int* positionsSHW;
myersCalcEditDistanceSemiGlobal(
rPeq, W, maxNumBlocks,
queryLength, rTarget + targetLength - endLocation - 1, endLocation + 1,
result.editDistance, EDLIB_MODE_SHW,
&bestScoreSHW, &positionsSHW, &numPositionsSHW);
result.startLocations[i] = endLocation - positionsSHW[numPositionsSHW - 1];
free(positionsSHW);
}
}
delete[] rTarget;
delete[] rQuery;
delete[] rPeq;
} else { for (int i = 0; i < result.numLocations; i++) {
result.startLocations[i] = 0;
}
}
}
if (config.task == EDLIB_TASK_PATH) {
int alnStartLocation = result.startLocations[0];
int alnEndLocation = result.endLocations[0];
const unsigned char* alnTarget = target + alnStartLocation;
const int alnTargetLength = alnEndLocation - alnStartLocation + 1;
const unsigned char* rAlnTarget = createReverseCopy(alnTarget, alnTargetLength);
const unsigned char* rQuery = createReverseCopy(query, queryLength);
obtainAlignment(query, rQuery, queryLength,
alnTarget, rAlnTarget, alnTargetLength,
equalityDefinition, static_cast<int>(alphabet.size()), result.editDistance,
&(result.alignment), &(result.alignmentLength));
delete[] rAlnTarget;
delete[] rQuery;
}
}
delete[] Peq;
free(query);
free(target);
if (alignData) delete alignData;
return result;
}
extern "C" char* edlibAlignmentToCigar(const unsigned char* const alignment, const int alignmentLength,
const EdlibCigarFormat cigarFormat) {
if (cigarFormat != EDLIB_CIGAR_EXTENDED && cigarFormat != EDLIB_CIGAR_STANDARD) {
return 0;
}
char moveCodeToChar[] = {'=', 'I', 'D', 'X'};
if (cigarFormat == EDLIB_CIGAR_STANDARD) {
moveCodeToChar[0] = moveCodeToChar[3] = 'M';
}
vector<char>* cigar = new vector<char>();
char lastMove = 0; int numOfSameMoves = 0;
for (int i = 0; i <= alignmentLength; i++) {
if (i == alignmentLength || (moveCodeToChar[alignment[i]] != lastMove && lastMove != 0)) {
int numDigits = 0;
for (; numOfSameMoves; numOfSameMoves /= 10) {
cigar->push_back('0' + numOfSameMoves % 10);
numDigits++;
}
reverse(cigar->end() - numDigits, cigar->end());
cigar->push_back(lastMove);
if (i < alignmentLength) {
if (alignment[i] > 3) {
delete cigar;
return 0;
}
numOfSameMoves = 0;
}
}
if (i < alignmentLength) {
lastMove = moveCodeToChar[alignment[i]];
numOfSameMoves++;
}
}
cigar->push_back(0); char* cigar_ = static_cast<char *>(malloc(cigar->size() * sizeof(char)));
memcpy(cigar_, &(*cigar)[0], cigar->size() * sizeof(char));
delete cigar;
return cigar_;
}
static inline Word* buildPeq(const int alphabetLength,
const unsigned char* const query,
const int queryLength,
const EqualityDefinition& equalityDefinition) {
int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
Word* Peq = new Word[(alphabetLength + 1) * maxNumBlocks];
for (unsigned char symbol = 0; symbol <= alphabetLength; symbol++) {
for (int b = 0; b < maxNumBlocks; b++) {
if (symbol < alphabetLength) {
Peq[symbol * maxNumBlocks + b] = 0;
for (int r = (b+1) * WORD_SIZE - 1; r >= b * WORD_SIZE; r--) {
Peq[symbol * maxNumBlocks + b] <<= 1;
if (r >= queryLength || equalityDefinition.areEqual(query[r], symbol))
Peq[symbol * maxNumBlocks + b] += 1;
}
} else { Peq[symbol * maxNumBlocks + b] = static_cast<Word>(-1);
}
}
}
return Peq;
}
static inline unsigned char* createReverseCopy(const unsigned char* const seq, const int length) {
unsigned char* rSeq = new unsigned char[length];
for (int i = 0; i < length; i++) {
rSeq[i] = seq[length - i - 1];
}
return rSeq;
}
static inline int calculateBlock(Word Pv, Word Mv, Word Eq, const int hin,
Word &PvOut, Word &MvOut) {
Word hinIsNeg = static_cast<Word>(hin >> 2) & WORD_1;
Word Xv = Eq | Mv;
Eq |= hinIsNeg;
Word Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq;
Word Ph = Mv | ~(Xh | Pv);
Word Mh = Pv & Xh;
int hout = 0;
hout = (Ph & HIGH_BIT_MASK) >> (WORD_SIZE - 1);
hout -= (Mh & HIGH_BIT_MASK) >> (WORD_SIZE - 1);
Ph <<= 1;
Mh <<= 1;
Mh |= hinIsNeg;
Ph |= static_cast<Word>((hin + 1) >> 1);
PvOut = Mh | ~(Xv | Ph);
MvOut = Ph & Xv;
return hout;
}
static inline int ceilDiv(const int x, const int y) {
return x % y ? x / y + 1 : x / y;
}
static inline int min(const int x, const int y) {
return x < y ? x : y;
}
static inline int max(const int x, const int y) {
return x > y ? x : y;
}
static inline vector<int> getBlockCellValues(const Block block) {
vector<int> scores(WORD_SIZE);
int score = block.score;
Word mask = HIGH_BIT_MASK;
for (int i = 0; i < WORD_SIZE - 1; i++) {
scores[i] = score;
if (block.P & mask) score--;
if (block.M & mask) score++;
mask >>= 1;
}
scores[WORD_SIZE - 1] = score;
return scores;
}
static inline void readBlock(const Block block, int* const dest) {
int score = block.score;
Word mask = HIGH_BIT_MASK;
for (int i = 0; i < WORD_SIZE - 1; i++) {
dest[WORD_SIZE - 1 - i] = score;
if (block.P & mask) score--;
if (block.M & mask) score++;
mask >>= 1;
}
dest[0] = score;
}
static inline void readBlockReverse(const Block block, int* const dest) {
int score = block.score;
Word mask = HIGH_BIT_MASK;
for (int i = 0; i < WORD_SIZE - 1; i++) {
dest[i] = score;
if (block.P & mask) score--;
if (block.M & mask) score++;
mask >>= 1;
}
dest[WORD_SIZE - 1] = score;
}
static inline bool allBlockCellsLarger(const Block block, const int k) {
vector<int> scores = getBlockCellValues(block);
for (int i = 0; i < WORD_SIZE; i++) {
if (scores[i] <= k) return false;
}
return true;
}
static int myersCalcEditDistanceSemiGlobal(
const Word* const Peq, const int W, const int maxNumBlocks,
const int queryLength,
const unsigned char* const target, const int targetLength,
int k, const EdlibAlignMode mode,
int* const bestScore_, int** const positions_, int* const numPositions_) {
*positions_ = NULL;
*numPositions_ = 0;
int firstBlock = 0;
int lastBlock = min(ceilDiv(k + 1, WORD_SIZE), maxNumBlocks) - 1; Block *bl;
Block* blocks = new Block[maxNumBlocks];
if (mode == EDLIB_MODE_HW) {
k = min(queryLength, k);
}
const int STRONG_REDUCE_NUM = 2048;
bl = blocks;
for (int b = 0; b <= lastBlock; b++) {
bl->score = (b + 1) * WORD_SIZE;
bl->P = static_cast<Word>(-1); bl->M = static_cast<Word>(0);
bl++;
}
int bestScore = -1;
vector<int> positions; const int startHout = mode == EDLIB_MODE_HW ? 0 : 1; const unsigned char* targetChar = target;
for (int c = 0; c < targetLength; c++) { const Word* Peq_c = Peq + (*targetChar) * maxNumBlocks;
int hout = startHout;
bl = blocks + firstBlock;
Peq_c += firstBlock;
for (int b = firstBlock; b <= lastBlock; b++) {
hout = calculateBlock(bl->P, bl->M, *Peq_c, hout, bl->P, bl->M);
bl->score += hout;
bl++; Peq_c++;
}
bl--; Peq_c--;
if ((lastBlock < maxNumBlocks - 1) && (bl->score - hout <= k) && ((*(Peq_c + 1) & WORD_1) || hout < 0)) { lastBlock++; bl++; Peq_c++;
bl->P = static_cast<Word>(-1); bl->M = static_cast<Word>(0);
bl->score = (bl - 1)->score - hout + WORD_SIZE + calculateBlock(bl->P, bl->M, *Peq_c, hout, bl->P, bl->M);
} else {
while (lastBlock >= firstBlock && bl->score >= k + WORD_SIZE) {
lastBlock--; bl--; Peq_c--;
}
}
if (c % STRONG_REDUCE_NUM == 0) {
while (lastBlock >= 0 && lastBlock >= firstBlock && allBlockCellsLarger(*bl, k)) {
lastBlock--; bl--; Peq_c--;
}
}
if (mode == EDLIB_MODE_HW && lastBlock == -1) {
lastBlock++; bl++; Peq_c++;
}
if (mode != EDLIB_MODE_HW) {
while (firstBlock <= lastBlock && blocks[firstBlock].score >= k + WORD_SIZE) {
firstBlock++;
}
if (c % STRONG_REDUCE_NUM == 0) { while (firstBlock <= lastBlock && allBlockCellsLarger(blocks[firstBlock], k)) {
firstBlock++;
}
}
}
if (lastBlock < firstBlock) {
*bestScore_ = bestScore;
if (bestScore != -1) {
*positions_ = static_cast<int *>(malloc(sizeof(int) * static_cast<int>(positions.size())));
*numPositions_ = static_cast<int>(positions.size());
copy(positions.begin(), positions.end(), *positions_);
}
delete[] blocks;
return EDLIB_STATUS_OK;
}
if (lastBlock == maxNumBlocks - 1) {
int colScore = bl->score;
if (colScore <= k) { if (bestScore == -1 || colScore <= bestScore) {
if (colScore != bestScore) {
positions.clear();
bestScore = colScore;
k = bestScore;
}
positions.push_back(c - W);
}
}
}
targetChar++;
}
if (lastBlock == maxNumBlocks - 1) {
vector<int> blockScores = getBlockCellValues(*bl);
for (int i = 0; i < W; i++) {
int colScore = blockScores[i + 1];
if (colScore <= k && (bestScore == -1 || colScore <= bestScore)) {
if (colScore != bestScore) {
positions.clear();
k = bestScore = colScore;
}
positions.push_back(targetLength - W + i);
}
}
}
*bestScore_ = bestScore;
if (bestScore != -1) {
*positions_ = static_cast<int *>(malloc(sizeof(int) * static_cast<int>(positions.size())));
*numPositions_ = static_cast<int>(positions.size());
copy(positions.begin(), positions.end(), *positions_);
}
delete[] blocks;
return EDLIB_STATUS_OK;
}
static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int maxNumBlocks,
const int queryLength,
const unsigned char* const target, const int targetLength,
int k, int* const bestScore_,
int* const position_, const bool findAlignment,
AlignmentData** const alignData, const int targetStopPosition) {
if (targetStopPosition > -1 && findAlignment) {
return EDLIB_STATUS_ERROR;
}
const int STRONG_REDUCE_NUM = 2048;
if (k < abs(targetLength - queryLength)) {
*bestScore_ = *position_ = -1;
return EDLIB_STATUS_OK;
}
k = min(k, max(queryLength, targetLength));
int firstBlock = 0;
int lastBlock = min(maxNumBlocks, ceilDiv(min(k, (k + queryLength - targetLength) / 2) + 1, WORD_SIZE)) - 1;
Block* bl;
Block* blocks = new Block[maxNumBlocks];
bl = blocks;
for (int b = 0; b <= lastBlock; b++) {
bl->score = (b + 1) * WORD_SIZE;
bl->P = static_cast<Word>(-1); bl->M = static_cast<Word>(0);
bl++;
}
if (findAlignment)
*alignData = new AlignmentData(maxNumBlocks, targetLength);
else if (targetStopPosition > -1)
*alignData = new AlignmentData(maxNumBlocks, 1);
else
*alignData = NULL;
const unsigned char* targetChar = target;
for (int c = 0; c < targetLength; c++) { const Word* Peq_c = Peq + *targetChar * maxNumBlocks;
int hout = 1;
bl = blocks + firstBlock;
for (int b = firstBlock; b <= lastBlock; b++) {
hout = calculateBlock(bl->P, bl->M, Peq_c[b], hout, bl->P, bl->M);
bl->score += hout;
bl++;
}
bl--;
k = min(k, bl->score
+ max(targetLength - c - 1, queryLength - ((1 + lastBlock) * WORD_SIZE - 1) - 1)
+ (lastBlock == maxNumBlocks - 1 ? W : 0));
if (lastBlock + 1 < maxNumBlocks
&& !( ((lastBlock + 1) * WORD_SIZE - 1
> k - bl->score + 2 * WORD_SIZE - 2 - targetLength + c + queryLength))) {
lastBlock++; bl++;
bl->P = static_cast<Word>(-1); bl->M = static_cast<Word>(0);
int newHout = calculateBlock(bl->P, bl->M, Peq_c[lastBlock], hout, bl->P, bl->M);
bl->score = (bl - 1)->score - hout + WORD_SIZE + newHout;
hout = newHout;
}
while (lastBlock >= firstBlock
&& (bl->score >= k + WORD_SIZE
|| ((lastBlock + 1) * WORD_SIZE - 1 >
k - bl->score + 2 * WORD_SIZE - 2 - targetLength + c + queryLength + 1))) {
lastBlock--; bl--;
}
while (firstBlock <= lastBlock
&& (blocks[firstBlock].score >= k + WORD_SIZE
|| ((firstBlock + 1) * WORD_SIZE - 1 <
blocks[firstBlock].score - k - targetLength + queryLength + c))) {
firstBlock++;
}
if (c % STRONG_REDUCE_NUM == 0) { while (lastBlock >= firstBlock) {
vector<int> scores = getBlockCellValues(*bl);
int numCells = lastBlock == maxNumBlocks - 1 ? WORD_SIZE - W : WORD_SIZE;
int r = lastBlock * WORD_SIZE + numCells - 1;
bool reduce = true;
for (int i = WORD_SIZE - numCells; i < WORD_SIZE; i++) {
if (scores[i] <= k && r <= k - scores[i] - targetLength + c + queryLength + 1) {
reduce = false;
break;
}
r--;
}
if (!reduce) break;
lastBlock--; bl--;
}
while (firstBlock <= lastBlock) {
vector<int> scores = getBlockCellValues(blocks[firstBlock]);
int numCells = firstBlock == maxNumBlocks - 1 ? WORD_SIZE - W : WORD_SIZE;
int r = firstBlock * WORD_SIZE + numCells - 1;
bool reduce = true;
for (int i = WORD_SIZE - numCells; i < WORD_SIZE; i++) {
if (scores[i] <= k && r >= scores[i] - k - targetLength + c + queryLength) {
reduce = false;
break;
}
r--;
}
if (!reduce) break;
firstBlock++;
}
}
if (lastBlock < firstBlock) {
*bestScore_ = *position_ = -1;
delete[] blocks;
return EDLIB_STATUS_OK;
}
if (findAlignment && c < targetLength) {
bl = blocks + firstBlock;
for (int b = firstBlock; b <= lastBlock; b++) {
(*alignData)->Ps[maxNumBlocks * c + b] = bl->P;
(*alignData)->Ms[maxNumBlocks * c + b] = bl->M;
(*alignData)->scores[maxNumBlocks * c + b] = bl->score;
(*alignData)->firstBlocks[c] = firstBlock;
(*alignData)->lastBlocks[c] = lastBlock;
bl++;
}
}
if (c == targetStopPosition) {
for (int b = firstBlock; b <= lastBlock; b++) {
(*alignData)->Ps[b] = (blocks + b)->P;
(*alignData)->Ms[b] = (blocks + b)->M;
(*alignData)->scores[b] = (blocks + b)->score;
(*alignData)->firstBlocks[0] = firstBlock;
(*alignData)->lastBlocks[0] = lastBlock;
}
*bestScore_ = -1;
*position_ = targetStopPosition;
delete[] blocks;
return EDLIB_STATUS_OK;
}
targetChar++;
}
if (lastBlock == maxNumBlocks - 1) { int bestScore = getBlockCellValues(blocks[lastBlock])[W];
if (bestScore <= k) {
*bestScore_ = bestScore;
*position_ = targetLength - 1;
delete[] blocks;
return EDLIB_STATUS_OK;
}
}
*bestScore_ = *position_ = -1;
delete[] blocks;
return EDLIB_STATUS_OK;
}
static int obtainAlignmentTraceback(const int queryLength, const int targetLength,
const int bestScore, const AlignmentData* const alignData,
unsigned char** const alignment, int* const alignmentLength) {
const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
const int W = maxNumBlocks * WORD_SIZE - queryLength;
*alignment = static_cast<unsigned char*>(malloc((queryLength + targetLength - 1) * sizeof(unsigned char)));
*alignmentLength = 0;
int c = targetLength - 1; int b = maxNumBlocks - 1; int currScore = bestScore; int lScore = -1; int uScore = -1; int ulScore = -1; Word currP = alignData->Ps[c * maxNumBlocks + b]; Word currM = alignData->Ms[c * maxNumBlocks + b]; bool thereIsLeftBlock = c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1];
Word lP = 0, lM = 0;
if (thereIsLeftBlock) {
lP = alignData->Ps[(c - 1) * maxNumBlocks + b]; lM = alignData->Ms[(c - 1) * maxNumBlocks + b]; }
currP <<= W;
currM <<= W;
int blockPos = WORD_SIZE - W - 1;
while (true) {
if (c == 0) {
thereIsLeftBlock = true;
lScore = b * WORD_SIZE + blockPos + 1;
ulScore = lScore - 1;
}
if (lScore == -1 && thereIsLeftBlock) {
lScore = alignData->scores[(c - 1) * maxNumBlocks + b]; for (int i = 0; i < WORD_SIZE - blockPos - 1; i++) {
if (lP & HIGH_BIT_MASK) lScore--;
if (lM & HIGH_BIT_MASK) lScore++;
lP <<= 1;
lM <<= 1;
}
}
if (ulScore == -1) {
if (lScore != -1) {
ulScore = lScore;
if (lP & HIGH_BIT_MASK) ulScore--;
if (lM & HIGH_BIT_MASK) ulScore++;
}
else if (c > 0 && b-1 >= alignData->firstBlocks[c-1] && b-1 <= alignData->lastBlocks[c-1]) {
ulScore = alignData->scores[(c - 1) * maxNumBlocks + b - 1];
}
}
if (uScore == -1) {
uScore = currScore;
if (currP & HIGH_BIT_MASK) uScore--;
if (currM & HIGH_BIT_MASK) uScore++;
currP <<= 1;
currM <<= 1;
}
if (uScore != -1 && uScore + 1 == currScore) {
currScore = uScore;
lScore = ulScore;
uScore = ulScore = -1;
if (blockPos == 0) { if (b == 0) { (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT; for (int i = 0; i < c + 1; i++) (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
break;
} else {
blockPos = WORD_SIZE - 1;
b--;
currP = alignData->Ps[c * maxNumBlocks + b];
currM = alignData->Ms[c * maxNumBlocks + b];
if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
thereIsLeftBlock = true;
lP = alignData->Ps[(c - 1) * maxNumBlocks + b]; lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
} else {
thereIsLeftBlock = false;
}
}
} else {
blockPos--;
lP <<= 1;
lM <<= 1;
}
(*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
}
else if (lScore != -1 && lScore + 1 == currScore) {
currScore = lScore;
uScore = ulScore;
lScore = ulScore = -1;
c--;
if (c == -1) { (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE; int numUp = b * WORD_SIZE + blockPos + 1;
for (int i = 0; i < numUp; i++) (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
break;
}
currP = lP;
currM = lM;
if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
thereIsLeftBlock = true;
lP = alignData->Ps[(c - 1) * maxNumBlocks + b];
lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
} else {
if (c == 0) { thereIsLeftBlock = true;
lScore = b * WORD_SIZE + blockPos + 1;
ulScore = lScore - 1;
} else {
thereIsLeftBlock = false;
}
}
(*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
}
else if (ulScore != -1) {
unsigned char moveCode = ulScore == currScore ? EDLIB_EDOP_MATCH : EDLIB_EDOP_MISMATCH;
currScore = ulScore;
uScore = lScore = ulScore = -1;
c--;
if (c == -1) { (*alignment)[(*alignmentLength)++] = moveCode; int numUp = b * WORD_SIZE + blockPos;
for (int i = 0; i < numUp; i++) (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
break;
}
if (blockPos == 0) { if (b == 0) { (*alignment)[(*alignmentLength)++] = moveCode; for (int i = 0; i < c + 1; i++) (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
break;
}
blockPos = WORD_SIZE - 1;
b--;
currP = alignData->Ps[c * maxNumBlocks + b];
currM = alignData->Ms[c * maxNumBlocks + b];
} else { blockPos--;
currP = lP;
currM = lM;
currP <<= 1;
currM <<= 1;
}
if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
thereIsLeftBlock = true;
lP = alignData->Ps[(c - 1) * maxNumBlocks + b];
lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
} else {
if (c == 0) { thereIsLeftBlock = true;
lScore = b * WORD_SIZE + blockPos + 1;
ulScore = lScore - 1;
} else {
thereIsLeftBlock = false;
}
}
(*alignment)[(*alignmentLength)++] = moveCode;
} else {
break;
}
}
*alignment = static_cast<unsigned char*>(realloc(*alignment, (*alignmentLength) * sizeof(unsigned char)));
reverse(*alignment, *alignment + (*alignmentLength));
return EDLIB_STATUS_OK;
}
static int obtainAlignment(
const unsigned char* const query, const unsigned char* const rQuery, const int queryLength,
const unsigned char* const target, const unsigned char* const rTarget, const int targetLength,
const EqualityDefinition& equalityDefinition, const int alphabetLength, const int bestScore,
unsigned char** const alignment, int* const alignmentLength) {
if (queryLength == 0 || targetLength == 0) {
*alignmentLength = targetLength + queryLength;
*alignment = static_cast<unsigned char*>(malloc((*alignmentLength) * sizeof(unsigned char)));
for (int i = 0; i < *alignmentLength; i++) {
(*alignment)[i] = queryLength == 0 ? EDLIB_EDOP_DELETE : EDLIB_EDOP_INSERT;
}
return EDLIB_STATUS_OK;
}
const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
const int W = maxNumBlocks * WORD_SIZE - queryLength;
int statusCode;
long long alignmentDataSize = (2ll * sizeof(Word) + sizeof(int)) * maxNumBlocks * targetLength
+ 2ll * sizeof(int) * targetLength;
if (alignmentDataSize < 1024 * 1024) {
int score_, endLocation_; AlignmentData* alignData = NULL;
Word* Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition);
myersCalcEditDistanceNW(Peq, W, maxNumBlocks,
queryLength,
target, targetLength,
bestScore,
&score_, &endLocation_, true, &alignData, -1);
statusCode = obtainAlignmentTraceback(queryLength, targetLength,
bestScore, alignData, alignment, alignmentLength);
delete alignData;
delete[] Peq;
} else {
statusCode = obtainAlignmentHirschberg(query, rQuery, queryLength,
target, rTarget, targetLength,
equalityDefinition, alphabetLength, bestScore,
alignment, alignmentLength);
}
return statusCode;
}
static int obtainAlignmentHirschberg(
const unsigned char* const query, const unsigned char* const rQuery, const int queryLength,
const unsigned char* const target, const unsigned char* const rTarget, const int targetLength,
const EqualityDefinition& equalityDefinition, const int alphabetLength, const int bestScore,
unsigned char** const alignment, int* const alignmentLength) {
const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
const int W = maxNumBlocks * WORD_SIZE - queryLength;
Word* Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition);
Word* rPeq = buildPeq(alphabetLength, rQuery, queryLength, equalityDefinition);
int score_, endLocation_;
const int leftHalfWidth = targetLength / 2;
const int rightHalfWidth = targetLength - leftHalfWidth;
AlignmentData* alignDataLeftHalf = NULL;
int leftHalfCalcStatus = myersCalcEditDistanceNW(
Peq, W, maxNumBlocks, queryLength, target, targetLength, bestScore,
&score_, &endLocation_, false, &alignDataLeftHalf, leftHalfWidth - 1);
AlignmentData* alignDataRightHalf = NULL;
int rightHalfCalcStatus = myersCalcEditDistanceNW(
rPeq, W, maxNumBlocks, queryLength, rTarget, targetLength, bestScore,
&score_, &endLocation_, false, &alignDataRightHalf, rightHalfWidth - 1);
delete[] Peq;
delete[] rPeq;
if (leftHalfCalcStatus == EDLIB_STATUS_ERROR || rightHalfCalcStatus == EDLIB_STATUS_ERROR) {
if (alignDataLeftHalf) delete alignDataLeftHalf;
if (alignDataRightHalf) delete alignDataRightHalf;
return EDLIB_STATUS_ERROR;
}
int firstBlockIdxLeft = alignDataLeftHalf->firstBlocks[0];
int lastBlockIdxLeft = alignDataLeftHalf->lastBlocks[0];
int scoresLeftLength = (lastBlockIdxLeft - firstBlockIdxLeft + 1) * WORD_SIZE;
int* scoresLeft = new int[scoresLeftLength];
for (int blockIdx = firstBlockIdxLeft; blockIdx <= lastBlockIdxLeft; blockIdx++) {
Block block(alignDataLeftHalf->Ps[blockIdx], alignDataLeftHalf->Ms[blockIdx],
alignDataLeftHalf->scores[blockIdx]);
readBlock(block, scoresLeft + (blockIdx - firstBlockIdxLeft) * WORD_SIZE);
}
int scoresLeftStartIdx = firstBlockIdxLeft * WORD_SIZE;
if (lastBlockIdxLeft == maxNumBlocks - 1) {
scoresLeftLength -= W;
}
int firstBlockIdxRight = alignDataRightHalf->firstBlocks[0];
int lastBlockIdxRight = alignDataRightHalf->lastBlocks[0];
int scoresRightLength = (lastBlockIdxRight - firstBlockIdxRight + 1) * WORD_SIZE;
int* scoresRight = new int[scoresRightLength];
int* scoresRightOriginalStart = scoresRight;
for (int blockIdx = firstBlockIdxRight; blockIdx <= lastBlockIdxRight; blockIdx++) {
Block block(alignDataRightHalf->Ps[blockIdx], alignDataRightHalf->Ms[blockIdx],
alignDataRightHalf->scores[blockIdx]);
readBlockReverse(block, scoresRight + (lastBlockIdxRight - blockIdx) * WORD_SIZE);
}
int scoresRightStartIdx = queryLength - (lastBlockIdxRight + 1) * WORD_SIZE;
if (scoresRightStartIdx < 0) {
scoresRight += W;
scoresRightStartIdx += W;
scoresRightLength -= W;
}
delete alignDataLeftHalf;
delete alignDataRightHalf;
int queryIdxLeftStart = max(scoresLeftStartIdx, scoresRightStartIdx - 1);
int queryIdxLeftEnd = min(scoresLeftStartIdx + scoresLeftLength - 1,
scoresRightStartIdx + scoresRightLength - 2);
int leftScore = -1, rightScore = -1;
int queryIdxLeftAlignment = -1; bool queryIdxLeftAlignmentFound = false;
for (int queryIdx = queryIdxLeftStart; queryIdx <= queryIdxLeftEnd; queryIdx++) {
leftScore = scoresLeft[queryIdx - scoresLeftStartIdx];
rightScore = scoresRight[queryIdx + 1 - scoresRightStartIdx];
if (leftScore + rightScore == bestScore) {
queryIdxLeftAlignment = queryIdx;
queryIdxLeftAlignmentFound = true;
break;
}
}
if (!queryIdxLeftAlignmentFound && scoresLeftStartIdx == 0 && scoresRightStartIdx == 0) {
leftScore = leftHalfWidth;
rightScore = scoresRight[0];
if (leftScore + rightScore == bestScore) {
queryIdxLeftAlignment = -1;
queryIdxLeftAlignmentFound = true;
}
}
if (!queryIdxLeftAlignmentFound && scoresLeftStartIdx + scoresLeftLength == queryLength
&& scoresRightStartIdx + scoresRightLength == queryLength) {
leftScore = scoresLeft[scoresLeftLength - 1];
rightScore = rightHalfWidth;
if (leftScore + rightScore == bestScore) {
queryIdxLeftAlignment = queryLength - 1;
queryIdxLeftAlignmentFound = true;
}
}
delete[] scoresLeft;
delete[] scoresRightOriginalStart;
if (queryIdxLeftAlignmentFound == false) {
return EDLIB_STATUS_ERROR;
}
const int ulHeight = queryIdxLeftAlignment + 1;
const int lrHeight = queryLength - ulHeight;
const int ulWidth = leftHalfWidth;
const int lrWidth = rightHalfWidth;
unsigned char* ulAlignment = NULL; int ulAlignmentLength;
int ulStatusCode = obtainAlignment(query, rQuery + lrHeight, ulHeight,
target, rTarget + lrWidth, ulWidth,
equalityDefinition, alphabetLength, leftScore,
&ulAlignment, &ulAlignmentLength);
unsigned char* lrAlignment = NULL; int lrAlignmentLength;
int lrStatusCode = obtainAlignment(query + ulHeight, rQuery, lrHeight,
target + ulWidth, rTarget, lrWidth,
equalityDefinition, alphabetLength, rightScore,
&lrAlignment, &lrAlignmentLength);
if (ulStatusCode == EDLIB_STATUS_ERROR || lrStatusCode == EDLIB_STATUS_ERROR) {
if (ulAlignment) free(ulAlignment);
if (lrAlignment) free(lrAlignment);
return EDLIB_STATUS_ERROR;
}
*alignmentLength = ulAlignmentLength + lrAlignmentLength;
*alignment = static_cast<unsigned char*>(malloc((*alignmentLength) * sizeof(unsigned char)));
memcpy(*alignment, ulAlignment, ulAlignmentLength);
memcpy(*alignment + ulAlignmentLength, lrAlignment, lrAlignmentLength);
free(ulAlignment);
free(lrAlignment);
return EDLIB_STATUS_OK;
}
static string transformSequences(const char* const queryOriginal, const int queryLength,
const char* const targetOriginal, const int targetLength,
unsigned char** const queryTransformed,
unsigned char** const targetTransformed) {
*queryTransformed = static_cast<unsigned char *>(malloc(sizeof(unsigned char) * queryLength));
*targetTransformed = static_cast<unsigned char *>(malloc(sizeof(unsigned char) * targetLength));
string alphabet = "";
unsigned char letterIdx[MAX_UCHAR + 1];
bool inAlphabet[MAX_UCHAR + 1]; for (int i = 0; i < MAX_UCHAR + 1; i++) inAlphabet[i] = false;
for (int i = 0; i < queryLength; i++) {
unsigned char c = static_cast<unsigned char>(queryOriginal[i]);
if (!inAlphabet[c]) {
inAlphabet[c] = true;
letterIdx[c] = static_cast<unsigned char>(alphabet.size());
alphabet += queryOriginal[i];
}
(*queryTransformed)[i] = letterIdx[c];
}
for (int i = 0; i < targetLength; i++) {
unsigned char c = static_cast<unsigned char>(targetOriginal[i]);
if (!inAlphabet[c]) {
inAlphabet[c] = true;
letterIdx[c] = static_cast<unsigned char>(alphabet.size());
alphabet += targetOriginal[i];
}
(*targetTransformed)[i] = letterIdx[c];
}
return alphabet;
}
extern "C" EdlibAlignConfig edlibNewAlignConfig(int k, EdlibAlignMode mode, EdlibAlignTask task,
const EdlibEqualityPair* additionalEqualities,
int additionalEqualitiesLength) {
EdlibAlignConfig config;
config.k = k;
config.mode = mode;
config.task = task;
config.additionalEqualities = additionalEqualities;
config.additionalEqualitiesLength = additionalEqualitiesLength;
return config;
}
extern "C" EdlibAlignConfig edlibDefaultAlignConfig(void) {
return edlibNewAlignConfig(-1, EDLIB_MODE_NW, EDLIB_TASK_DISTANCE, NULL, 0);
}
extern "C" void edlibFreeAlignResult(EdlibAlignResult result) {
if (result.endLocations) free(result.endLocations);
if (result.startLocations) free(result.startLocations);
if (result.alignment) free(result.alignment);
}