@ruvector/attention
High-performance attention mechanisms for Node.js, powered by Rust.
Features
- Scaled Dot-Product Attention: Classic attention mechanism with optional scaling
- Multi-Head Attention: Parallel attention heads for richer representations
- Flash Attention: Memory-efficient attention with block-wise computation
- Linear Attention: O(N) complexity attention using kernel approximations
- Hyperbolic Attention: Attention in hyperbolic space for hierarchical data
- Mixture-of-Experts (MoE) Attention: Dynamic expert routing for specialized attention
Installation
Usage
Basic Dot-Product Attention
const = require;
const attention = ;
const query = ;
const keys = ;
const values = ;
const output = attention.;
Multi-Head Attention
const = require;
const mha = ; // 512 dim, 8 heads
const output = mha.;
// Async version for large computations
const outputAsync = await mha.;
Flash Attention
const = require;
const flash = ; // 512 dim, 64 block size
const output = flash.;
Hyperbolic Attention
const = require;
const hyperbolic = ; // negative curvature
const output = hyperbolic.;
Mixture-of-Experts Attention
const = require;
const moe = ;
const output = moe.;
const expertUsage = moe.;
Training
const = require;
// Configure training
const trainer = ;
// Training step
const loss = trainer.;
// Get metrics
const metrics = trainer.;
console.log;
// Custom optimizer
const optimizer = ;
const updatedParams = optimizer.;
Batch Processing
const = require;
// Batch processor for efficient batching
const processor = ;
const results = await processor.;
const throughput = processor.;
// Parallel computation with automatic worker management
const results = await ;
API Reference
Classes
DotProductAttention
constructor(dim: number, scale?: number)compute(query: Float32Array, keys: Float32Array[], values: Float32Array[]): Float32Array
MultiHeadAttention
constructor(dim: number, numHeads: number)compute(query: Float32Array, keys: Float32Array[], values: Float32Array[]): Float32ArraycomputeAsync(query: Float32Array, keys: Float32Array[], values: Float32Array[]): Promise<Float32Array>
FlashAttention
constructor(dim: number, blockSize: number)compute(query: Float32Array, keys: Float32Array[], values: Float32Array[]): Float32Array
LinearAttention
constructor(dim: number, numFeatures: number)compute(query: Float32Array, keys: Float32Array[], values: Float32Array[]): Float32Array
HyperbolicAttention
constructor(dim: number, curvature: number)compute(query: Float32Array, keys: Float32Array[], values: Float32Array[]): Float32Array
MoEAttention
constructor(config: MoEConfig)compute(query: Float32Array, keys: Float32Array[], values: Float32Array[]): Float32ArraygetExpertUsage(): number[]
Trainer
constructor(config: TrainingConfig)trainStep(inputs: Float32Array[], targets: Float32Array[]): numbertrainStepAsync(inputs: Float32Array[], targets: Float32Array[]): Promise<number>getMetrics(): TrainingMetrics
AdamOptimizer
constructor(learningRate: number, beta1?: number, beta2?: number, epsilon?: number)step(gradients: Float32Array[]): Float32Array[]getLearningRate(): numbersetLearningRate(lr: number): void
BatchProcessor
constructor(config: BatchConfig)processBatch(queries: Float32Array[], keys: Float32Array[][], values: Float32Array[][]): Promise<Float32Array[]>getThroughput(): number
Functions
parallelAttentionCompute
function parallelAttentionCompute(
attentionType: string,
queries: Float32Array[],
keys: Float32Array[][],
values: Float32Array[][],
numWorkers?: number
): Promise<Float32Array[]>
version
Returns the package version string.
Performance
This package uses Rust under the hood for optimal performance:
- Zero-copy data transfer where possible
- SIMD optimizations for vector operations
- Multi-threaded batch processing
- Memory-efficient attention mechanisms
Platform Support
Pre-built binaries are provided for:
- macOS (x64, ARM64)
- Linux (x64, ARM64, musl)
- Windows (x64, ARM64)
License
MIT OR Apache-2.0