#include "simple_highlight.h"
#include <math.h>
#include <stdio.h>
#include <string.h>
SQLITE_EXTENSION_INIT3
#ifndef UNUSED_PARAM
#define UNUSED_PARAM(X) (void)(X)
#endif
#ifndef UNUSED_PARAM2
#define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y)
#endif
typedef struct CInstIter CInstIter;
struct CInstIter {
const Fts5ExtensionApi *pApi;
Fts5Context *pFts;
int iCol;
int iInst;
int nInst;
int iStart;
int iEnd;
};
static int fts5CInstIterNext(CInstIter *pIter) {
int rc = SQLITE_OK;
pIter->iStart = -1;
pIter->iEnd = -1;
while (rc == SQLITE_OK && pIter->iInst < pIter->nInst) {
int ip;
int ic;
int io;
rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
if (rc == SQLITE_OK) {
if (ic == pIter->iCol) {
int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
if (pIter->iStart < 0) {
pIter->iStart = io;
pIter->iEnd = iEnd;
} else if (io <= pIter->iEnd + 1) { if (iEnd > pIter->iEnd) pIter->iEnd = iEnd;
} else {
break;
}
}
pIter->iInst++;
}
}
return rc;
}
static int fts5CInstIterInit(const Fts5ExtensionApi *pApi, Fts5Context *pFts, int iCol, CInstIter *pIter) {
int rc;
memset(pIter, 0, sizeof(CInstIter));
pIter->pApi = pApi;
pIter->pFts = pFts;
pIter->iCol = iCol;
rc = pApi->xInstCount(pFts, &pIter->nInst);
if (rc == SQLITE_OK) {
rc = fts5CInstIterNext(pIter);
}
return rc;
}
typedef struct HighlightContext HighlightContext;
struct HighlightContext {
CInstIter iter;
int iPos;
int iRangeStart;
int iRangeEnd;
const char *zOpen;
const char *zClose;
const char *zIn;
int nIn;
int iOff;
char *zOut;
};
static void fts5HighlightAppend(int *pRc, HighlightContext *p, const char *z, int n) {
if (*pRc == SQLITE_OK && z) {
if (n < 0) n = (int)strlen(z);
p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
if (p->zOut == 0) *pRc = SQLITE_NOMEM;
}
}
static int fts5HighlightCb(void *pContext,
int tflags,
const char *pToken,
int nToken,
int iStartOff,
int iEndOff
) {
HighlightContext *p = (HighlightContext *)pContext;
int rc = SQLITE_OK;
int iPos;
if (tflags & FTS5_TOKEN_COLOCATED) return SQLITE_OK;
iPos = p->iPos++;
if (p->iRangeEnd > 0) {
if (iPos < p->iRangeStart || iPos > p->iRangeEnd) return SQLITE_OK;
if (p->iRangeStart && iPos == p->iRangeStart) p->iOff = iStartOff;
}
if (iPos == p->iter.iStart) {
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
fts5HighlightAppend(&rc, p, p->zOpen, -1);
p->iOff = iStartOff;
}
if (iPos == p->iter.iEnd) {
if (p->iRangeEnd && p->iter.iStart < p->iRangeStart) {
fts5HighlightAppend(&rc, p, p->zOpen, -1);
}
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
fts5HighlightAppend(&rc, p, p->zClose, -1);
p->iOff = iEndOff;
if (rc == SQLITE_OK) {
rc = fts5CInstIterNext(&p->iter);
}
}
if (p->iRangeEnd > 0 && iPos == p->iRangeEnd) {
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
p->iOff = iEndOff;
if (iPos >= p->iter.iStart && iPos < p->iter.iEnd) {
fts5HighlightAppend(&rc, p, p->zClose, -1);
}
}
return rc;
}
void simple_highlight(const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
sqlite3_context *pCtx,
int nVal,
sqlite3_value **apVal
) {
HighlightContext ctx;
int rc;
int iCol;
if (nVal != 3) {
const char *zErr = "wrong number of arguments to function highlight()";
sqlite3_result_error(pCtx, zErr, -1);
return;
}
iCol = sqlite3_value_int(apVal[0]);
memset(&ctx, 0, sizeof(HighlightContext));
ctx.zOpen = (const char *)sqlite3_value_text(apVal[1]);
ctx.zClose = (const char *)sqlite3_value_text(apVal[2]);
rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
if (ctx.zIn) {
if (rc == SQLITE_OK) {
rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
}
if (rc == SQLITE_OK) {
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void *)&ctx, fts5HighlightCb);
}
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
if (rc == SQLITE_OK) {
sqlite3_result_text(pCtx, (const char *)ctx.zOut, -1, SQLITE_TRANSIENT);
}
sqlite3_free(ctx.zOut);
}
if (rc != SQLITE_OK) {
sqlite3_result_error_code(pCtx, rc);
}
}
typedef struct HighlightPosContext HighlightPosContext;
struct HighlightPosContext {
CInstIter iter;
int iPos;
int iRangeStart;
int iRangeEnd;
const char *zIn;
int nIn;
int iOff;
char *zOut;
};
static void fts5HighlightPosAppend(int *pRc, HighlightPosContext *p, const char *z, int n) {
if (*pRc == SQLITE_OK) {
if (n < 0) n = (int)strlen(z);
p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
if (p->zOut == 0) *pRc = SQLITE_NOMEM;
}
}
static void fts5HighlightPosAppendStart(int *pRc, HighlightPosContext *p, int start) {
char str[64];
sprintf(str, "%d", start);
fts5HighlightPosAppend(pRc, p, str, -1);
fts5HighlightPosAppend(pRc, p, ",", -1);
}
static void fts5HighlightPosAppendEnd(int *pRc, HighlightPosContext *p, int end) {
char str[64];
sprintf(str, "%d", end);
fts5HighlightPosAppend(pRc, p, str, -1);
fts5HighlightPosAppend(pRc, p, ";", -1);
}
static int fts5HighlightPosCb(void *pContext,
int tflags,
const char *pToken,
int nToken,
int iStartOff,
int iEndOff
) {
HighlightPosContext *p = (HighlightPosContext *)pContext;
int rc = SQLITE_OK;
int iPos;
if (tflags & FTS5_TOKEN_COLOCATED) return SQLITE_OK;
iPos = p->iPos++;
if (p->iRangeEnd > 0) {
if (iPos < p->iRangeStart || iPos > p->iRangeEnd) return SQLITE_OK;
if (p->iRangeStart && iPos == p->iRangeStart) p->iOff = iStartOff;
}
if (iPos == p->iter.iStart) {
fts5HighlightPosAppendStart(&rc, p, iStartOff);
p->iOff = iStartOff;
}
if (iPos == p->iter.iEnd) {
fts5HighlightPosAppendEnd(&rc, p, iEndOff);
p->iOff = iEndOff;
if (rc == SQLITE_OK) {
rc = fts5CInstIterNext(&p->iter);
}
}
if (p->iRangeEnd > 0 && iPos == p->iRangeEnd) {
fts5HighlightPosAppendEnd(&rc, p, iEndOff);
p->iOff = iEndOff;
}
return rc;
}
void simple_highlight_pos(const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
sqlite3_context *pCtx,
int nVal,
sqlite3_value **apVal
) {
HighlightPosContext ctx;
int rc;
int iCol;
if (nVal != 1) {
const char *zErr = "wrong number of arguments to function highlight_pos()";
sqlite3_result_error(pCtx, zErr, -1);
return;
}
iCol = sqlite3_value_int(apVal[0]);
memset(&ctx, 0, sizeof(HighlightPosContext));
rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
if (ctx.zIn) {
if (rc == SQLITE_OK) {
rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
}
if (rc == SQLITE_OK) {
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void *)&ctx, fts5HighlightPosCb);
}
if (rc == SQLITE_OK) {
sqlite3_result_text(pCtx, (const char *)ctx.zOut, -1, SQLITE_TRANSIENT);
}
sqlite3_free(ctx.zOut);
}
if (rc != SQLITE_OK) {
sqlite3_result_error_code(pCtx, rc);
}
}
typedef struct Fts5SnippetFinder Fts5SnippetFinder;
struct Fts5SnippetFinder {
int iPos;
int nFirstAlloc;
int nFirst;
int *aFirst;
const char *zDoc;
};
static int fts5SnippetFinderAdd(Fts5SnippetFinder *p, int iAdd) {
if (p->nFirstAlloc == p->nFirst) {
int nNew = p->nFirstAlloc ? p->nFirstAlloc * 2 : 64;
int *aNew;
aNew = (int *)sqlite3_realloc64(p->aFirst, nNew * sizeof(int));
if (aNew == 0) return SQLITE_NOMEM;
p->aFirst = aNew;
p->nFirstAlloc = nNew;
}
p->aFirst[p->nFirst++] = iAdd;
return SQLITE_OK;
}
static int fts5SnippetFinderCb(void *pContext,
int tflags,
const char *pToken,
int nToken,
int iStartOff,
int iEndOff
) {
int rc = SQLITE_OK;
UNUSED_PARAM2(pToken, nToken);
UNUSED_PARAM(iEndOff);
if ((tflags & FTS5_TOKEN_COLOCATED) == 0) {
Fts5SnippetFinder *p = (Fts5SnippetFinder *)pContext;
if (p->iPos > 0) {
int i;
char c = 0;
for (i = iStartOff - 1; i >= 0; i--) {
c = p->zDoc[i];
if (c != ' ' && c != '\t' && c != '\n' && c != '\r') break;
}
if (i != iStartOff - 1 && (c == '.' || c == ':')) {
rc = fts5SnippetFinderAdd(p, p->iPos);
}
} else {
rc = fts5SnippetFinderAdd(p, 0);
}
p->iPos++;
}
return rc;
}
static int fts5SnippetScore(const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
int nDocsize,
unsigned char *aSeen,
int iCol,
int iPos,
int nToken,
int *pnScore,
int *piPos
) {
int rc;
int i;
int ip = 0;
int ic = 0;
int iOff = 0;
int iFirst = -1;
int nInst;
int nScore = 0;
int iLast = 0;
sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken;
rc = pApi->xInstCount(pFts, &nInst);
for (i = 0; i < nInst && rc == SQLITE_OK; i++) {
rc = pApi->xInst(pFts, i, &ip, &ic, &iOff);
if (rc == SQLITE_OK && ic == iCol && iOff >= iPos && iOff < iEnd) {
nScore += (aSeen[ip] ? 1 : 1000);
aSeen[ip] = 1;
if (iFirst < 0) iFirst = iOff;
iLast = iOff + pApi->xPhraseSize(pFts, ip);
}
}
*pnScore = nScore;
if (piPos) {
sqlite3_int64 iAdj = iFirst - (nToken - (iLast - iFirst)) / 2;
if ((iAdj + nToken) > nDocsize) iAdj = nDocsize - nToken;
if (iAdj < 0) iAdj = 0;
*piPos = (int)iAdj;
}
return rc;
}
static const char *fts5ValueToText(sqlite3_value *pVal) {
const char *zRet = (const char *)sqlite3_value_text(pVal);
return zRet ? zRet : "";
}
void simple_snippet(const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
sqlite3_context *pCtx,
int nVal,
sqlite3_value **apVal
) {
HighlightContext ctx;
int rc = SQLITE_OK;
int iCol;
const char *zEllips;
int nToken;
int nInst = 0;
int i;
int nPhrase;
unsigned char *aSeen;
int iBestCol;
int iBestStart = 0;
int nBestScore = 0;
int nColSize = 0;
Fts5SnippetFinder sFinder;
int nCol;
if (nVal != 5) {
const char *zErr = "wrong number of arguments to function snippet()";
sqlite3_result_error(pCtx, zErr, -1);
return;
}
nCol = pApi->xColumnCount(pFts);
memset(&ctx, 0, sizeof(HighlightContext));
iCol = sqlite3_value_int(apVal[0]);
ctx.zOpen = fts5ValueToText(apVal[1]);
ctx.zClose = fts5ValueToText(apVal[2]);
zEllips = fts5ValueToText(apVal[3]);
nToken = sqlite3_value_int(apVal[4]);
iBestCol = (iCol >= 0 ? iCol : 0);
nPhrase = pApi->xPhraseCount(pFts);
aSeen = (unsigned char *)sqlite3_malloc(nPhrase);
if (aSeen == 0) {
rc = SQLITE_NOMEM;
}
if (rc == SQLITE_OK) {
rc = pApi->xInstCount(pFts, &nInst);
}
memset(&sFinder, 0, sizeof(Fts5SnippetFinder));
for (i = 0; i < nCol; i++) {
if (iCol < 0 || iCol == i) {
int nDoc;
int nDocsize;
int ii;
sFinder.iPos = 0;
sFinder.nFirst = 0;
rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
if (rc != SQLITE_OK) break;
rc = pApi->xTokenize(pFts, sFinder.zDoc, nDoc, (void *)&sFinder, fts5SnippetFinderCb);
if (rc != SQLITE_OK) break;
rc = pApi->xColumnSize(pFts, i, &nDocsize);
if (rc != SQLITE_OK) break;
for (ii = 0; rc == SQLITE_OK && ii < nInst; ii++) {
int ip, ic, io;
int iAdj;
int nScore;
int jj;
rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
if (ic != i) continue;
if (io > nDocsize) rc = SQLITE_CORRUPT_VTAB;
if (rc != SQLITE_OK) continue;
memset(aSeen, 0, nPhrase);
rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, io, nToken, &nScore, &iAdj);
if (rc == SQLITE_OK && nScore > nBestScore) {
nBestScore = nScore;
iBestCol = i;
iBestStart = iAdj;
nColSize = nDocsize;
}
if (rc == SQLITE_OK && sFinder.nFirst && nDocsize > nToken) {
for (jj = 0; jj < (sFinder.nFirst - 1); jj++) {
if (sFinder.aFirst[jj + 1] > io) break;
}
if (sFinder.aFirst[jj] < io) {
memset(aSeen, 0, nPhrase);
rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, sFinder.aFirst[jj], nToken, &nScore, 0);
nScore += (sFinder.aFirst[jj] == 0 ? 120 : 100);
if (rc == SQLITE_OK && nScore > nBestScore) {
nBestScore = nScore;
iBestCol = i;
iBestStart = sFinder.aFirst[jj];
nColSize = nDocsize;
}
}
}
}
}
}
if (rc == SQLITE_OK) {
rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
}
if (rc == SQLITE_OK && nColSize == 0) {
rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
}
if (ctx.zIn) {
if (rc == SQLITE_OK) {
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
}
ctx.iRangeStart = iBestStart;
ctx.iRangeEnd = iBestStart + nToken - 1;
if (iBestStart > 0) {
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
}
while (ctx.iter.iStart >= 0 && ctx.iter.iStart < iBestStart && rc == SQLITE_OK) {
rc = fts5CInstIterNext(&ctx.iter);
}
if (rc == SQLITE_OK) {
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void *)&ctx, fts5HighlightCb);
}
if (ctx.iRangeEnd >= (nColSize - 1)) {
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
} else {
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
}
}
if (rc == SQLITE_OK) {
sqlite3_result_text(pCtx, (const char *)ctx.zOut, -1, SQLITE_TRANSIENT);
} else {
sqlite3_result_error_code(pCtx, rc);
}
sqlite3_free(ctx.zOut);
sqlite3_free(aSeen);
sqlite3_free(sFinder.aFirst);
}