#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fuzzy/fuzzy.h"
static bool is_ascii(const unsigned char* str) {
for (int idx = 0; str[idx]; idx++) {
if (str[idx] & 0x80) {
return false;
}
}
return true;
}
static void fuzzy_damlev(sqlite3_context* context, int argc, sqlite3_value** argv) {
assert(argc == 2);
const unsigned char* str1 = sqlite3_value_text(argv[0]);
const unsigned char* str2 = sqlite3_value_text(argv[1]);
if (str1 == 0 || str2 == 0) {
sqlite3_result_error(context, "arguments should not be NULL", -1);
return;
}
if (!is_ascii(str1) || !is_ascii(str2)) {
sqlite3_result_error(context, "arguments should be ASCII strings", -1);
return;
}
unsigned distance = damerau_levenshtein((const char*)str1, (const char*)str2);
sqlite3_result_int(context, distance);
}
static void fuzzy_hamming(sqlite3_context* context, int argc, sqlite3_value** argv) {
assert(argc == 2);
const unsigned char* str1 = sqlite3_value_text(argv[0]);
const unsigned char* str2 = sqlite3_value_text(argv[1]);
if (str1 == 0 || str2 == 0) {
sqlite3_result_error(context, "arguments should not be NULL", -1);
return;
}
if (!is_ascii(str1) || !is_ascii(str2)) {
sqlite3_result_error(context, "arguments should be ASCII strings", -1);
return;
}
int distance = hamming((const char*)str1, (const char*)str2);
sqlite3_result_int(context, distance);
}
static void fuzzy_jarowin(sqlite3_context* context, int argc, sqlite3_value** argv) {
assert(argc == 2);
const unsigned char* str1 = sqlite3_value_text(argv[0]);
const unsigned char* str2 = sqlite3_value_text(argv[1]);
if (str1 == 0 || str2 == 0) {
sqlite3_result_error(context, "arguments should not be NULL", -1);
return;
}
if (!is_ascii(str1) || !is_ascii(str2)) {
sqlite3_result_error(context, "arguments should be ASCII strings", -1);
return;
}
double distance = jaro_winkler((const char*)str1, (const char*)str2);
sqlite3_result_double(context, distance);
}
static void fuzzy_leven(sqlite3_context* context, int argc, sqlite3_value** argv) {
assert(argc == 2);
const unsigned char* str1 = sqlite3_value_text(argv[0]);
const unsigned char* str2 = sqlite3_value_text(argv[1]);
if (str1 == 0 || str2 == 0) {
sqlite3_result_error(context, "arguments should not be NULL", -1);
return;
}
if (!is_ascii(str1) || !is_ascii(str2)) {
sqlite3_result_error(context, "arguments should be ASCII strings", -1);
return;
}
unsigned distance = levenshtein((const char*)str1, (const char*)str2);
sqlite3_result_int(context, distance);
}
static void fuzzy_osadist(sqlite3_context* context, int argc, sqlite3_value** argv) {
assert(argc == 2);
const unsigned char* str1 = sqlite3_value_text(argv[0]);
const unsigned char* str2 = sqlite3_value_text(argv[1]);
if (str1 == 0 || str2 == 0) {
sqlite3_result_error(context, "arguments should not be NULL", -1);
return;
}
if (!is_ascii(str1) || !is_ascii(str2)) {
sqlite3_result_error(context, "arguments should be ASCII strings", -1);
return;
}
unsigned distance = optimal_string_alignment((const char*)str1, (const char*)str2);
sqlite3_result_int(context, distance);
}
static void fuzzy_soundex(sqlite3_context* context, int argc, sqlite3_value** argv) {
assert(argc == 1);
const unsigned char* source = sqlite3_value_text(argv[0]);
if (source == 0) {
return;
}
if (!is_ascii(source)) {
sqlite3_result_error(context, "argument should be ASCII string", -1);
return;
}
char* result = soundex((const char*)source);
sqlite3_result_text(context, result, -1, free);
}
static void fuzzy_rsoundex(sqlite3_context* context, int argc, sqlite3_value** argv) {
assert(argc == 1);
const unsigned char* source = sqlite3_value_text(argv[0]);
if (source == 0) {
return;
}
if (!is_ascii(source)) {
sqlite3_result_error(context, "argument should be ASCII string", -1);
return;
}
char* result = refined_soundex((const char*)source);
sqlite3_result_text(context, result, -1, free);
}
static void fuzzy_phonetic(sqlite3_context* context, int argc, sqlite3_value** argv) {
const unsigned char* zIn;
unsigned char* zOut;
zIn = sqlite3_value_text(argv[0]);
if (zIn == 0)
return;
zOut = phonetic_hash(zIn, sqlite3_value_bytes(argv[0]));
if (zOut == 0) {
sqlite3_result_error_nomem(context);
} else {
sqlite3_result_text(context, (char*)zOut, -1, free);
}
}
static void fuzzy_editdist(sqlite3_context* context, int argc, sqlite3_value** argv) {
int res = edit_distance((const char*)sqlite3_value_text(argv[0]),
(const char*)sqlite3_value_text(argv[1]), 0);
if (res < 0) {
if (res == (-3)) {
sqlite3_result_error_nomem(context);
} else if (res == (-2)) {
sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
} else {
sqlite3_result_error(context, "NULL input to editdist()", -1);
}
} else {
sqlite3_result_int(context, res);
}
}
static void fuzzy_translit(sqlite3_context* context, int argc, sqlite3_value** argv) {
const unsigned char* zIn = sqlite3_value_text(argv[0]);
int nIn = sqlite3_value_bytes(argv[0]);
unsigned char* zOut = transliterate(zIn, nIn);
if (zOut == 0) {
sqlite3_result_error_nomem(context);
} else {
sqlite3_result_text(context, (char*)zOut, -1, free);
}
}
static void fuzzy_script(sqlite3_context* context, int argc, sqlite3_value** argv) {
const unsigned char* zIn = sqlite3_value_text(argv[0]);
int nIn = sqlite3_value_bytes(argv[0]);
int res = script_code(zIn, nIn);
sqlite3_result_int(context, res);
}
static void fuzzy_caver(sqlite3_context* context, int argc, sqlite3_value** argv) {
assert(argc == 1);
const unsigned char* source = sqlite3_value_text(argv[0]);
if (source == 0) {
return;
}
if (!is_ascii(source)) {
sqlite3_result_error(context, "argument should be ASCII string", -1);
return;
}
char* result = caverphone((const char*)source);
sqlite3_result_text(context, result, -1, free);
}
int fuzzy_init(sqlite3* db) {
static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC;
sqlite3_create_function(db, "fuzzy_damlev", 2, flags, 0, fuzzy_damlev, 0, 0);
sqlite3_create_function(db, "dlevenshtein", 2, flags, 0, fuzzy_damlev, 0, 0);
sqlite3_create_function(db, "fuzzy_hamming", 2, flags, 0, fuzzy_hamming, 0, 0);
sqlite3_create_function(db, "hamming", 2, flags, 0, fuzzy_hamming, 0, 0);
sqlite3_create_function(db, "fuzzy_jarowin", 2, flags, 0, fuzzy_jarowin, 0, 0);
sqlite3_create_function(db, "jaro_winkler", 2, flags, 0, fuzzy_jarowin, 0, 0);
sqlite3_create_function(db, "fuzzy_leven", 2, flags, 0, fuzzy_leven, 0, 0);
sqlite3_create_function(db, "levenshtein", 2, flags, 0, fuzzy_leven, 0, 0);
sqlite3_create_function(db, "fuzzy_osadist", 2, flags, 0, fuzzy_osadist, 0, 0);
sqlite3_create_function(db, "osa_distance", 2, flags, 0, fuzzy_osadist, 0, 0);
sqlite3_create_function(db, "fuzzy_soundex", 1, flags, 0, fuzzy_soundex, 0, 0);
sqlite3_create_function(db, "soundex", 1, flags, 0, fuzzy_soundex, 0, 0);
sqlite3_create_function(db, "fuzzy_rsoundex", 1, flags, 0, fuzzy_rsoundex, 0, 0);
sqlite3_create_function(db, "rsoundex", 1, flags, 0, fuzzy_rsoundex, 0, 0);
sqlite3_create_function(db, "fuzzy_editdist", 2, flags, 0, fuzzy_editdist, 0, 0);
sqlite3_create_function(db, "edit_distance", 2, flags, 0, fuzzy_editdist, 0, 0);
sqlite3_create_function(db, "fuzzy_phonetic", 1, flags, 0, fuzzy_phonetic, 0, 0);
sqlite3_create_function(db, "phonetic_hash", 1, flags, 0, fuzzy_phonetic, 0, 0);
sqlite3_create_function(db, "fuzzy_script", 1, flags, 0, fuzzy_script, 0, 0);
sqlite3_create_function(db, "script_code", 1, flags, 0, fuzzy_script, 0, 0);
sqlite3_create_function(db, "fuzzy_translit", 1, flags, 0, fuzzy_translit, 0, 0);
sqlite3_create_function(db, "translit", 1, flags, 0, fuzzy_translit, 0, 0);
sqlite3_create_function(db, "fuzzy_caver", 1, flags, 0, fuzzy_caver, 0, 0);
sqlite3_create_function(db, "caverphone", 1, flags, 0, fuzzy_caver, 0, 0);
return SQLITE_OK;
}