import { globalMemoryManager } from './memory-manager.js';
export class VectorizedOperations {
static UNROLL_FACTOR = 4;
static PREFETCH_DISTANCE = 64;
static dotProduct(a, b, hints) {
const n = a.length;
const unrollFactor = hints?.unroll || this.UNROLL_FACTOR;
let sum = 0;
let i = 0;
for (; i <= n - unrollFactor; i += unrollFactor) {
if (hints?.prefetch && i + this.PREFETCH_DISTANCE < n) {
const prefetchIndex = i + this.PREFETCH_DISTANCE;
void a[prefetchIndex]; void b[prefetchIndex];
}
sum += a[i] * b[i] +
a[i + 1] * b[i + 1] +
a[i + 2] * b[i + 2] +
a[i + 3] * b[i + 3];
}
for (; i < n; i++) {
sum += a[i] * b[i];
}
return sum;
}
static vectorAdd(a, b, result, hints) {
const n = a.length;
const blockSize = hints?.blocking.enabled ? hints.blocking.size : 1024;
if (hints?.blocking.enabled && n > blockSize) {
for (let blockStart = 0; blockStart < n; blockStart += blockSize) {
const blockEnd = Math.min(blockStart + blockSize, n);
this.vectorAddBlock(a, b, result, blockStart, blockEnd, hints);
}
}
else {
this.vectorAddBlock(a, b, result, 0, n, hints);
}
}
static vectorAddBlock(a, b, result, start, end, hints) {
const unrollFactor = hints?.unroll || this.UNROLL_FACTOR;
let i = start;
for (; i <= end - unrollFactor; i += unrollFactor) {
result[i] = a[i] + b[i];
result[i + 1] = a[i + 1] + b[i + 1];
result[i + 2] = a[i + 2] + b[i + 2];
result[i + 3] = a[i + 3] + b[i + 3];
}
for (; i < end; i++) {
result[i] = a[i] + b[i];
}
}
static async streamingOperation(operation, vectors, chunkSize = 10000) {
const n = vectors[0].length;
if (operation === 'dot' && vectors.length === 2) {
let sum = 0;
for (let start = 0; start < n; start += chunkSize) {
const end = Math.min(start + chunkSize, n);
const chunkA = vectors[0].slice(start, end);
const chunkB = vectors[1].slice(start, end);
sum += this.dotProduct(chunkA, chunkB);
if (start % (chunkSize * 10) === 0) {
await new Promise(resolve => setTimeout(resolve, 0));
}
}
return sum;
}
else if (operation === 'add' && vectors.length === 2) {
const result = globalMemoryManager.acquireTypedArray('float64', n);
for (let start = 0; start < n; start += chunkSize) {
const end = Math.min(start + chunkSize, n);
const chunkA = vectors[0].slice(start, end);
const chunkB = vectors[1].slice(start, end);
const chunkResult = new Array(end - start);
this.vectorAdd(chunkA, chunkB, chunkResult);
for (let i = 0; i < chunkResult.length; i++) {
result[start + i] = chunkResult[i];
}
if (start % (chunkSize * 10) === 0) {
await new Promise(resolve => setTimeout(resolve, 0));
}
}
return Array.from(result);
}
throw new Error(`Unsupported streaming operation: ${operation}`);
}
}
export class OptimizedMatrixMultiplication {
static sparseMatVec(matrix, vector, result, blockSize = 1000) {
const rows = matrix.getRows();
for (let blockStart = 0; blockStart < rows; blockStart += blockSize) {
const blockEnd = Math.min(blockStart + blockSize, rows);
for (let row = blockStart; row < blockEnd; row++) {
let sum = 0;
for (const entry of matrix.rowEntries(row)) {
sum += entry.val * vector[entry.col];
}
result[row] = sum;
}
}
}
static async parallelMatVec(matrix, vector, numWorkers = navigator.hardwareConcurrency || 4) {
const rows = matrix.getRows();
const result = new Array(rows).fill(0);
if (typeof globalThis === 'undefined' || !globalThis.Worker || rows < 1000) {
this.sparseMatVec(matrix, vector, result);
return result;
}
const chunkSize = Math.ceil(rows / numWorkers);
const promises = [];
for (let i = 0; i < numWorkers; i++) {
const startRow = i * chunkSize;
const endRow = Math.min(startRow + chunkSize, rows);
if (startRow >= rows)
break;
const workerPromise = this.createMatVecWorker(matrix, vector, startRow, endRow);
promises.push(workerPromise);
}
const results = await Promise.all(promises);
let offset = 0;
for (const chunkResult of results) {
for (let i = 0; i < chunkResult.length; i++) {
result[offset + i] = chunkResult[i];
}
offset += chunkResult.length;
}
return result;
}
static async createMatVecWorker(matrix, vector, startRow, endRow) {
return new Promise(resolve => {
setTimeout(() => {
const chunkResult = new Array(endRow - startRow).fill(0);
for (let row = startRow; row < endRow; row++) {
let sum = 0;
for (const entry of matrix.rowEntries(row)) {
sum += entry.val * vector[entry.col];
}
chunkResult[row - startRow] = sum;
}
resolve(chunkResult);
}, 0);
});
}
static selectOptimalAlgorithm(matrix, vector) {
const nnz = matrix.getNnz();
const rows = matrix.getRows();
const sparsity = nnz / (rows * matrix.getCols());
const memoryUsage = matrix.getMemoryUsage();
if (memoryUsage > 100 * 1024 * 1024) { return {
algorithm: 'streaming',
params: { chunkSize: 1000 }
};
}
else if (rows > 10000 && typeof globalThis !== 'undefined' && globalThis.Worker) {
return {
algorithm: 'parallel',
params: { numWorkers: navigator.hardwareConcurrency || 4 }
};
}
else if (sparsity < 0.1 && rows > 1000) {
return {
algorithm: 'blocked',
params: { blockSize: Math.min(1000, Math.ceil(Math.sqrt(rows))) }
};
}
else {
return {
algorithm: 'sequential',
params: {}
};
}
}
}
export class PerformanceBenchmark {
memoryManager;
constructor(memoryManager = globalMemoryManager) {
this.memoryManager = memoryManager;
}
async benchmarkMatrixOperations(matrices, vectors, iterations = 100) {
const results = [];
for (let i = 0; i < matrices.length; i++) {
const matrix = matrices[i];
const vector = vectors[i];
const result = globalMemoryManager.acquireTypedArray('float64', matrix.getRows());
const seqResult = await this.benchmarkOperation('Sequential MatVec', () => OptimizedMatrixMultiplication.sparseMatVec(matrix, vector, Array.from(result)), iterations);
results.push(seqResult);
const blockedResult = await this.benchmarkOperation('Blocked MatVec', () => OptimizedMatrixMultiplication.sparseMatVec(matrix, vector, Array.from(result), 500), iterations);
results.push(blockedResult);
const vecResult = await this.benchmarkOperation('Vectorized Dot Product', () => VectorizedOperations.dotProduct(vector, vector), iterations * 10);
results.push(vecResult);
globalMemoryManager.releaseTypedArray(result);
}
return results;
}
async benchmarkOperation(name, operation, iterations) {
for (let i = 0; i < Math.min(10, iterations); i++) {
operation();
}
const { result, profile } = await this.memoryManager.profileOperation(name, async () => {
const startTime = performance.now();
for (let i = 0; i < iterations; i++) {
operation();
}
return performance.now() - startTime;
});
const totalTime = result;
const averageTime = totalTime / iterations;
const throughput = iterations / (totalTime / 1000); return {
operation: name,
iterations,
totalTime,
averageTime,
throughput,
memoryProfile: profile,
cacheStats: {
hitRate: profile.cacheHitRate,
missRate: 1 - profile.cacheHitRate
}
};
}
generateOptimizationReport(benchmarks) {
const recommendations = [];
const bottlenecks = [];
let totalMemoryDelta = 0;
let totalCacheHitRate = 0;
for (const benchmark of benchmarks) {
totalMemoryDelta += Math.abs(benchmark.memoryProfile.memoryDelta);
totalCacheHitRate += benchmark.cacheStats.hitRate;
if (benchmark.throughput < 1000) {
bottlenecks.push(`Low throughput in ${benchmark.operation}: ${benchmark.throughput.toFixed(2)} ops/sec`);
}
if (benchmark.cacheStats.hitRate < 0.8) {
recommendations.push(`Improve cache locality for ${benchmark.operation} (hit rate: ${(benchmark.cacheStats.hitRate * 100).toFixed(1)}%)`);
}
if (benchmark.memoryProfile.memoryDelta > 1024 * 1024) {
recommendations.push(`Reduce memory allocation in ${benchmark.operation} (${(benchmark.memoryProfile.memoryDelta / 1024 / 1024).toFixed(2)}MB allocated)`);
}
if (benchmark.averageTime > 100) {
recommendations.push(`Consider parallelization for ${benchmark.operation} (avg time: ${benchmark.averageTime.toFixed(2)}ms)`);
}
}
const avgMemoryDelta = totalMemoryDelta / benchmarks.length;
const avgCacheHitRate = totalCacheHitRate / benchmarks.length;
if (avgCacheHitRate < 0.7) {
recommendations.push('Consider using blocked algorithms for better cache locality');
}
if (avgMemoryDelta > 1024 * 1024) {
recommendations.push('Implement memory pooling to reduce allocation overhead');
}
return {
recommendations,
bottlenecks,
memoryEfficiency: 1 - (avgMemoryDelta / (1024 * 1024 * 100)), cacheEfficiency: avgCacheHitRate
};
}
async autoTuneParameters(matrix, vector) {
const blockSizes = [64, 128, 256, 512, 1024];
const unrollFactors = [2, 4, 8];
let bestBlockSize = 256;
let bestUnrollFactor = 4;
let bestThroughput = 0;
for (const blockSize of blockSizes) {
const result = await this.benchmarkOperation(`Block size ${blockSize}`, () => OptimizedMatrixMultiplication.sparseMatVec(matrix, vector, new Array(matrix.getRows()).fill(0), blockSize), 50);
if (result.throughput > bestThroughput) {
bestThroughput = result.throughput;
bestBlockSize = blockSize;
}
}
bestThroughput = 0;
for (const unrollFactor of unrollFactors) {
const result = await this.benchmarkOperation(`Unroll factor ${unrollFactor}`, () => VectorizedOperations.dotProduct(vector, vector, {
vectorize: true,
unroll: unrollFactor,
prefetch: false,
blocking: { enabled: false, size: 0 },
streaming: { enabled: false, chunkSize: 0 }
}), 100);
if (result.throughput > bestThroughput) {
bestThroughput = result.throughput;
bestUnrollFactor = unrollFactor;
}
}
const algorithmSelection = OptimizedMatrixMultiplication.selectOptimalAlgorithm(matrix, vector);
return {
optimalBlockSize: bestBlockSize,
optimalUnrollFactor: bestUnrollFactor,
recommendedAlgorithm: algorithmSelection.algorithm
};
}
}
export const globalPerformanceOptimizer = new PerformanceBenchmark();