const sdk = require('node-appwrite');
const elid = require('../../pkg-node/elid');
module.exports = async function ({ req, res, log, error }) {
try {
const body = JSON.parse(req.bodyRaw || '{}');
const {
query,
collectionId,
searchField = 'name',
maxDistance = 10, limit = 20
} = body;
if (!query || !collectionId) {
return res.json({
success: false,
error: 'query and collectionId are required'
}, 400);
}
log(`SimHash query: "${query}" in collection ${collectionId}`);
const client = new sdk.Client()
.setEndpoint(process.env.APPWRITE_FUNCTION_ENDPOINT)
.setProject(process.env.APPWRITE_FUNCTION_PROJECT_ID)
.setKey(process.env.APPWRITE_API_KEY);
const database = new sdk.Databases(client);
const queryHash = elid.simhash(query);
log(`Query SimHash: ${queryHash}`);
const rangeSize = Math.pow(2, maxDistance); const minHash = Math.max(0, queryHash - rangeSize);
const maxHash = queryHash + rangeSize;
try {
const documents = await database.listDocuments(
process.env.DATABASE_ID,
collectionId,
[
sdk.Query.greaterThanEqual('simhash', minHash),
sdk.Query.lessThanEqual('simhash', maxHash),
sdk.Query.limit(100)
]
);
const matches = documents.documents
.map(doc => ({
document: doc,
storedHash: doc.simhash,
distance: elid.simhashDistance(queryHash, doc.simhash)
}))
.filter(item => item.distance <= maxDistance)
.sort((a, b) => a.distance - b.distance)
.slice(0, limit);
log(`Found ${matches.length} matches within distance ${maxDistance}`);
return res.json({
success: true,
query,
queryHash,
results: matches.map(m => ({
documentId: m.document.$id,
name: m.document[searchField],
simhash: m.storedHash,
distance: m.distance,
similarity: 1.0 - (m.distance / 64.0),
document: m.document
})),
metadata: {
totalCandidates: documents.documents.length,
matchesFound: matches.length,
maxDistance,
queryHash
}
});
} catch (dbError) {
log('SimHash field not found, computing hashes on the fly...');
const documents = await database.listDocuments(
process.env.DATABASE_ID,
collectionId,
[]
);
const matches = documents.documents
.map(doc => ({
document: doc,
hash: elid.simhash(doc[searchField] || ''),
distance: elid.simhashDistance(queryHash, elid.simhash(doc[searchField] || ''))
}))
.filter(item => item.distance <= maxDistance)
.sort((a, b) => a.distance - b.distance)
.slice(0, limit);
return res.json({
success: true,
query,
queryHash,
results: matches.map(m => ({
documentId: m.document.$id,
name: m.document[searchField],
computedHash: m.hash,
distance: m.distance,
similarity: 1.0 - (m.distance / 64.0),
document: m.document
})),
metadata: {
totalCandidates: documents.documents.length,
matchesFound: matches.length,
maxDistance,
queryHash,
note: 'Hashes computed on the fly - consider adding simhash field for better performance'
}
});
}
} catch (err) {
error(`Error in SimHash search: ${err.message}`);
return res.json({
success: false,
error: err.message
}, 500);
}
};
async function initializeSimHashes({ collectionId, searchField = 'name' }) {
const client = new sdk.Client()
.setEndpoint(process.env.APPWRITE_FUNCTION_ENDPOINT)
.setProject(process.env.APPWRITE_FUNCTION_PROJECT_ID)
.setKey(process.env.APPWRITE_API_KEY);
const database = new sdk.Databases(client);
try {
const documents = await database.listDocuments(
process.env.DATABASE_ID,
collectionId,
[]
);
for (const doc of documents.documents) {
const hash = elid.simhash(doc[searchField] || '');
await database.updateDocument(
process.env.DATABASE_ID,
collectionId,
doc.$id,
{ simhash: hash }
);
}
return {
success: true,
updatedCount: documents.documents.length
};
} catch (err) {
return {
success: false,
error: err.message
};
}
}
module.exports.initializeSimHashes = initializeSimHashes;