#pragma once
#include "Util/SymSpell/IDistance.h"
#include <vector>
class DamerauOSA : public IDistance {
public:
DamerauOSA() = default;
DamerauOSA(std::size_t expectedMaxStringLength)
: _baseChar1Costs(expectedMaxStringLength, 0),
_basePrevChar1Costs(expectedMaxStringLength, 0) {
}
int Distance(std::string_view string1, std::string_view string2) override {
if (string1.empty()) {
return static_cast<int>(string2.size());
}
if (string2.empty()) {
return static_cast<int>(string1.size());
}
if (string1.size() > string2.size()) {
std::swap(string1, string2);
}
int len1 = 0, len2 = 0, start = 0;
PrefixSuffixPrep(string1, string2, len1, len2, start);
if (len1 == 0) {
return len2;
}
if (len2 > static_cast<int>(this->_baseChar1Costs.size())) {
_baseChar1Costs.resize(len2, 0);
_basePrevChar1Costs.resize(len2, 0);
}
return Distance(string1, string2, len1, len2, start, _baseChar1Costs, _basePrevChar1Costs);
}
int Distance(std::string_view string1, std::string_view string2, std::size_t maxEditDistance) override {
if (string1.empty() || string2.empty()) {
return NullDistanceResults(string1, string2, maxEditDistance);
}
if (maxEditDistance <= 0) {
return (string1 == string2) ? 0 : -1;
}
if (string1.size() > string2.size()) {
std::swap(string1, string2);
}
if (string2.size() - string1.size() > maxEditDistance) {
return -1;
}
int len1 = 0, len2 = 0, start = 0;
PrefixSuffixPrep(string1, string2, len1, len2, start);
if (len1 == 0) {
return (len2 <= static_cast<int>(maxEditDistance)) ? len2 : -1;
}
if (len2 > static_cast<int>(_baseChar1Costs.size())) {
_baseChar1Costs.resize(len2, 0);
_basePrevChar1Costs.resize(len2, 0);
}
if (static_cast<int>(maxEditDistance) < len2) {
return Distance(string1, string2, len1, len2, start, static_cast<int>(maxEditDistance), _baseChar1Costs, _basePrevChar1Costs);
}
return Distance(string1, string2, len1, len2, start, _baseChar1Costs, _basePrevChar1Costs);
}
static int Distance(std::string_view string1, std::string_view string2, int len1, int len2, int start,
std::vector<int> char1Costs,
std::vector<int> prevChar1Costs) {
int j;
for (j = 0; j < len2; j++) char1Costs[j] = j + 1;
char char1 = ' ';
int currentCost = 0;
for (int i = 0; i < len1; ++i) {
char prevChar1 = char1;
char1 = string1[start + i];
char char2 = ' ';
int leftCharCost = i, aboveCharCost = i;
int nextTransCost = 0;
for (j = 0; j < len2; ++j) {
int thisTransCost = nextTransCost;
nextTransCost = prevChar1Costs[j];
prevChar1Costs[j] = currentCost = leftCharCost; leftCharCost = char1Costs[j]; char prevChar2 = char2;
char2 = string2[start + j];
if (char1 != char2) {
if (aboveCharCost < currentCost) currentCost = aboveCharCost; if (leftCharCost < currentCost) currentCost = leftCharCost; ++currentCost;
if ((i != 0) && (j != 0) && (char1 == prevChar2) && (prevChar1 == char2) && (thisTransCost + 1 < currentCost)) {
currentCost = thisTransCost + 1; }
}
char1Costs[j] = aboveCharCost = currentCost;
}
}
return currentCost;
}
static int Distance(std::string_view string1, std::string_view string2, int len1, int len2, int start,
int maxDistance, std::vector<int> char1Costs, std::vector<int> prevChar1Costs) {
int i, j;
for (j = 0; j < maxDistance; j++)
char1Costs[j] = j + 1;
for (; j < len2;) char1Costs[j++] = maxDistance + 1;
int lenDiff = len2 - len1;
int jStartOffset = maxDistance - lenDiff;
int jStart = 0;
int jEnd = maxDistance;
char char1 = ' ';
int currentCost = 0;
for (i = 0; i < len1; ++i) {
char prevChar1 = char1;
char1 = string1[start + i];
char char2 = ' ';
int leftCharCost, aboveCharCost;
leftCharCost = aboveCharCost = i;
int nextTransCost = 0;
jStart += (i > jStartOffset) ? 1 : 0;
jEnd += (jEnd < len2) ? 1 : 0;
for (j = jStart; j < jEnd; ++j) {
int thisTransCost = nextTransCost;
nextTransCost = prevChar1Costs[j];
prevChar1Costs[j] = currentCost = leftCharCost; leftCharCost = char1Costs[j]; char prevChar2 = char2;
char2 = string2[start + j];
if (char1 != char2) {
if (aboveCharCost < currentCost) currentCost = aboveCharCost; if (leftCharCost < currentCost) currentCost = leftCharCost; ++currentCost;
if ((i != 0) && (j != 0) && (char1 == prevChar2) && (prevChar1 == char2) && (thisTransCost + 1 < currentCost)) {
currentCost = thisTransCost + 1; }
}
char1Costs[j] = aboveCharCost = currentCost;
}
if (char1Costs[i + lenDiff] > maxDistance) return -1;
}
return (currentCost <= maxDistance) ? currentCost : -1;
}
static void PrefixSuffixPrep(std::string_view string1, std::string_view string2, int &len1, int &len2, int &start) {
len1 = static_cast<int>(string1.size()); len2 = static_cast<int>(string2.size());
while (len1 != 0 && string1[len1 - 1] == string2[len2 - 1]) {
len1--;
len2--;
}
start = 0;
while (start != len1 && string1[start] == string2[start]) {
start++;
}
if (start != 0) {
len2 -= start; len1 -= start;
}
}
static int NullDistanceResults(std::string_view string1, std::string_view string2, std::size_t maxEditDistance) {
if (string1.empty())
return (string2.empty()) ? 0 : (string2.size() <= maxEditDistance) ? static_cast<int>(string2.size())
: -1;
return (string1.size() <= maxEditDistance) ? static_cast<int>(string1.size()) : -1;
}
private:
std::vector<int> _baseChar1Costs;
std::vector<int> _basePrevChar1Costs;
};