"use strict";
const exists = (val) => val !== undefined;
class MemoryWalker {
constructor(buffer, next = 0) {
this.buffer = buffer;
this.next = next;
this.dataView = new DataView(buffer);
this.uint8Array = new Uint8Array(buffer);
}
jumpTo(ptr) {
this.next = ptr;
return this;
}
forkAndJump(ptr) {
return new MemoryWalker(this.buffer, ptr);
}
skip(bytes) {
this.next += bytes;
return this;
}
readAndDereferencePointer() {
return new MemoryWalker(this.buffer, this.readUInt32LE());
}
readSlice(len) {
return this.buffer.slice(this.next, this.next += len);
}
readBoolean() {
return !!this.dataView.getUint8(this.next++);
}
readUInt8() {
return this.dataView.getUint8(this.next++);
}
readInt32LE() {
const val = this.dataView.getInt32(this.next, true);
this.next += 4;
return val;
}
readInt32BE() {
const val = this.dataView.getInt32(this.next, false);
this.next += 4;
return val;
}
readUInt32LE() {
const val = this.dataView.getUint32(this.next, true);
this.next += 4;
return val;
}
readUInt32BE() {
const val = this.dataView.getUint32(this.next, false);
this.next += 4;
return val;
}
readInt64LE() {
const val = this.dataView.getBigInt64(this.next, true);
this.next += 8;
return val;
}
readUInt64LE() {
const val = this.dataView.getBigUint64(this.next, true);
this.next += 8;
return val;
}
readDoubleLE() {
const val = this.dataView.getFloat64(this.next, true);
this.next += 8;
return val;
}
readNullTerminatedString() {
let end = this.next;
while (this.uint8Array[end]) {
end++;
}
const val = textDecoder.decode(this.uint8Array.slice(this.next, end));
this.next = end + 1;
return val;
}
writeUInt32LE(val) {
this.dataView.setUint32(this.next, val, true);
this.next += 4;
return this;
}
writeAll(src) {
this.uint8Array.set(src, this.next);
this.next += src.byteLength;
return this;
}
}
const SPECIFIER_PARSERS = [
{ length: new Set(['hh', 'h', 'l', 'z', 't', '']), type: new Set('dic'), parse: mem => mem.readInt32LE() },
{ length: new Set(['hh', 'h', 'l', 'z', 't', '']), type: new Set('uxXop'), parse: mem => mem.readUInt32LE() },
{ length: new Set(['ll', 'j']), type: new Set('di'), parse: mem => mem.readInt64LE() },
{ length: new Set(['ll', 'j']), type: new Set('uxXop'), parse: mem => mem.readUInt64LE() },
{ length: new Set(['L', '']), type: new Set('fFeEgGaA'), parse: mem => mem.readDoubleLE() },
{ length: new Set(), type: new Set('s'), parse: mem => mem.readAndDereferencePointer().readNullTerminatedString() },
{ length: new Set(), type: new Set('%'), parse: () => '%' },
];
const SPECIFIER_FORMATTERS = {
'%': () => '%',
d: (val) => val.toString(),
i: (val) => val.toString(),
u: (val) => val.toString(),
f: (val) => val.toLocaleString('fullwide', { useGrouping: false, maximumFractionDigits: 20 }),
F: (val) => val.toLocaleString('fullwide', { useGrouping: false, maximumFractionDigits: 20 }).toUpperCase(),
e: (val) => val.toExponential(2),
E: (val) => val.toExponential(2).toUpperCase(),
g: (val) => val.toString(),
G: (val) => val.toString().toUpperCase(),
x: (val) => val.toString(16),
X: (val) => val.toString(16).toUpperCase(),
o: (val) => val.toString(8),
s: (val) => val,
c: (val) => String.fromCharCode(val),
p: (val) => val.toString(16),
a: (val) => val.toString(16),
A: (val) => val.toString(16).toUpperCase(),
};
const formatFromVarargs = (mem) => mem
.readAndDereferencePointer()
.readNullTerminatedString()
.replace(/%([-+ 0'#]*)((?:[0-9]+|\*)?)((?:\.(?:[0-9]+|\*))?)((?:hh|h|l|ll|L|z|j|t|I|I32|I64|q)?)([%diufFeEgGxXoscpaA])/g, ((spec, flags, width, precision, length, type) => {
if (flags || width || precision) {
throw new Error(`Unsupported format specifier "${spec}"`);
}
const parser = SPECIFIER_PARSERS.find(p => p.length.has(length) && p.type.has(type));
if (!parser) {
throw new SyntaxError(`Invalid format specifier "${spec}"`);
}
const rawValue = parser.parse(mem);
return SPECIFIER_FORMATTERS[type](rawValue);
}));
const wasmMemory = new WebAssembly.Memory({ initial: 1024 });
const wasmInstance = new WebAssembly.Instance(QUERY_RUNNER_WASM, {
env: {
_wasm_import_log(argsPtr) {
console.log(formatFromVarargs(queryRunnerMemory.forkAndJump(argsPtr)));
},
_wasm_import_error(argsPtr) {
throw new Error(`[fprintf] ${formatFromVarargs(queryRunnerMemory.forkAndJump(argsPtr))}`);
},
memory: wasmMemory,
},
});
const queryRunner = wasmInstance.exports;
const queryRunnerMemory = new MemoryWalker(wasmMemory.buffer);
const textEncoder = new TextEncoder();
const textDecoder = new TextDecoder();
const CORS_HEADERS = {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET, HEAD, POST, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type',
};
const responsePreflight = () => new Response(null, {
headers: CORS_HEADERS,
});
const responseError = (error, status = 400) => new Response(JSON.stringify({ error }), {
status, headers: {
'Content-Type': 'application/json',
...CORS_HEADERS,
},
});
const responseRawJson = (json, status = 200) => new Response(json, {
status, headers: {
'Content-Type': 'application/json',
...CORS_HEADERS,
},
});
const responseNoResults = () => responseRawJson(`{"results":[],"continuation":null,"total":0}`);
const allocateKey = (key) => {
if (typeof key == 'string') {
const encoded = textEncoder.encode(key);
const len = encoded.length;
const ptr = queryRunner.malloc(len);
queryRunnerMemory.forkAndJump(ptr).writeAll(encoded);
return { ptr, len };
}
else {
return key;
}
};
const findContainingChunk = (key) => {
let chunkRefPtr;
let cKey = allocateKey(key);
if (typeof cKey == 'number') {
chunkRefPtr = queryRunner.find_chunk_containing_doc(cKey);
}
else {
chunkRefPtr = queryRunner.find_chunk_containing_term(cKey.ptr, cKey.len);
}
console.log('Found containing chunk');
if (chunkRefPtr === 0) {
return undefined;
}
const chunkRef = queryRunnerMemory.forkAndJump(chunkRefPtr);
const chunkId = chunkRef.readUInt32LE();
const chunkMidPos = chunkRef.readUInt32LE();
return { id: chunkId, midPos: chunkMidPos };
};
const compareKey = (a, b) => {
return typeof a == 'number' ? a - b : a.localeCompare(b);
};
const extractKeyAtPosInBstChunkJs = (chunk, type) => {
if (type == 'string') {
const len = chunk.readUInt8();
return textDecoder.decode(chunk.readSlice(len));
}
else {
return chunk.readUInt32LE();
}
};
const searchInBstChunkJs = (chunk, targetKey) => {
while (true) {
const currentKey = extractKeyAtPosInBstChunkJs(chunk, typeof targetKey);
const leftPos = chunk.readInt32LE();
const rightPos = chunk.readInt32LE();
const valueLen = chunk.readUInt32LE();
const cmp = compareKey(targetKey, currentKey);
if (cmp < 0) {
if (leftPos == -1) {
break;
}
chunk.jumpTo(leftPos);
}
else if (cmp == 0) {
console.log('Found entry in chunk');
return chunk.readSlice(valueLen);
}
else {
if (rightPos == -1) {
break;
}
chunk.jumpTo(rightPos);
}
}
console.log('Searched failed to find entry in chunk');
return undefined;
};
const findAllInChunks = async (chunkIdPrefix, keys) => {
const results = [];
const chunks = new Map();
for (const key of keys) {
const chunkRef = findContainingChunk(key);
const resultIdx = results.push(undefined) - 1;
if (!chunkRef) {
continue;
}
if (!chunks.has(chunkRef.id)) {
chunks.set(chunkRef.id, {
keys: [],
midPos: chunkRef.midPos,
});
}
chunks.get(chunkRef.id).keys.push([key, resultIdx]);
}
for (const [chunkId, { keys, midPos }] of chunks.entries()) {
const chunkData = await fetchChunk(chunkIdPrefix, chunkId);
for (const [key, resultIdx] of keys) {
const entry = searchInBstChunkJs(new MemoryWalker(chunkData).jumpTo(midPos), key);
if (!entry) {
continue;
}
results[resultIdx] = entry;
}
}
return results;
};
const parseQuery = (termsRaw) => {
const modeTerms = [
Array(),
Array(),
Array(),
];
for (const value of termsRaw) {
const matches = /^([012])_([^&]+)(?:&|$)/.exec(value);
if (!matches) {
return;
}
const mode = Number.parseInt(matches[1], 10);
const term = decodeURIComponent(matches[2]);
modeTerms[mode].push(term);
}
return modeTerms;
};
const readResult = (result) => {
const continuation = result.readInt32LE();
const total = result.readUInt32LE();
const count = result.readUInt8();
result.skip(3);
const documents = [];
for (let resultNo = 0; resultNo < count; resultNo++) {
const docId = result.readUInt32LE();
documents.push(docId);
}
return { continuation: continuation == -1 ? null : continuation, total, documents };
};
const findSerialisedTermBitmaps = (query) =>
Promise.all(query.map(modeTerms => findAllInChunks('terms/', modeTerms)));
const buildIndexQuery = async (firstRank, modeTermBitmaps) => {
const bitmapCount = modeTermBitmaps.reduce((count, modeTerms) => count + modeTerms.length, 0);
const input = new MemoryWalker(new ArrayBuffer(4 + (bitmapCount * 2 + 3) * 4));
input.writeUInt32LE(firstRank);
for (const mode of modeTermBitmaps) {
for (const bitmap of mode) {
const ptr = queryRunner.malloc(bitmap.byteLength);
queryRunnerMemory.forkAndJump(ptr).writeAll(new Uint8Array(bitmap));
input
.writeUInt32LE(bitmap.byteLength)
.writeUInt32LE(ptr);
}
input.writeUInt32LE(0);
}
return new Uint8Array(input.buffer);
};
const executePostingsListQuery = (queryData) => {
const inputPtr = queryRunner.index_query_malloc();
queryRunnerMemory.forkAndJump(inputPtr).writeAll(queryData);
const outputPtr = queryRunner.index_query(inputPtr);
return outputPtr == 0 ? undefined : readResult(queryRunnerMemory.forkAndJump(outputPtr));
};
const getAsciiBytes = (str) => new Uint8Array(str.split('').map(c => c.charCodeAt(0)));
const COMMA = getAsciiBytes(',');
const handleSearch = async (url) => {
const query = parseQuery(url.searchParams.getAll('t'));
if (!query) {
return responseError('Malformed query');
}
const continuation = Math.max(0, Number.parseInt(url.searchParams.get('c') || '', 10) || 0);
const termCount = query.reduce((count, modeTerms) => count + modeTerms.length, 0);
if (termCount > MAX_QUERY_TERMS) {
return responseError('Too many terms', 413);
}
const modeTermBitmaps = await findSerialisedTermBitmaps(query);
console.log('Bit sets retrieved');
if (modeTermBitmaps[0].some(bm => !bm)) {
return responseNoResults();
}
modeTermBitmaps[1] = modeTermBitmaps[1].filter(bm => bm);
modeTermBitmaps[2] = modeTermBitmaps[2].filter(bm => bm);
let result;
if (modeTermBitmaps.every(modeTerms => !modeTerms.length)) {
console.log('Using default results');
const after = continuation + MAX_RESULTS;
result = {
continuation: DOCUMENT_COUNT > after ? after : null,
documents: Array.from({ length: MAX_RESULTS }, (_, i) => continuation + i).filter(docId => docId >= 0 && docId < DOCUMENT_COUNT),
total: DOCUMENT_COUNT,
};
}
else {
queryRunner.reset();
const indexQueryData = await buildIndexQuery(continuation, modeTermBitmaps);
console.log('Query built');
const maybeResult = await executePostingsListQuery(indexQueryData);
if (!maybeResult) {
throw new Error(`Failed to execute query`);
}
result = maybeResult;
console.log('Query executed');
}
const jsonResPrefix = getAsciiBytes(`{"total":${result.total},"continuation":${result.continuation},"results":[`);
const jsonResSuffix = getAsciiBytes(`]}`);
const documents = (await findAllInChunks('documents/', result.documents))
.filter(exists)
.map(d => new Uint8Array(d));
console.log('Documents fetched');
const stream = new TransformStream();
const writer = stream.writable.getWriter();
writer.write(jsonResPrefix);
for (let i = 0; i < documents.length; i++) {
if (i !== 0) {
writer.write(COMMA);
}
writer.write(documents[i]);
}
writer.write(jsonResSuffix);
writer.releaseLock();
return new Response(stream.readable, {
status: 200,
headers: {
'Content-Type': 'application/json',
...CORS_HEADERS,
},
});
};
const requestHandler = async (request) => {
if (request.method == 'OPTIONS') {
return responsePreflight();
}
const url = new URL(request.url);
return url.pathname === '/search'
? handleSearch(url)
: new Response(null, { status: 404 });
};
self.addEventListener('fetch', event => {
event.respondWith(requestHandler(event.request));
});