llama-gguf 0.14.0

A high-performance Rust implementation of llama.cpp - LLM inference engine with full GGUF support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#include <metal_stdlib>
using namespace metal;

kernel void add_f32(
    device const float* a [[buffer(0)]],
    device const float* b [[buffer(1)]],
    device float* result [[buffer(2)]],
    constant int& n [[buffer(3)]],
    uint idx [[thread_position_in_grid]]
) {
    if (idx < uint(n)) {
        result[idx] = a[idx] + b[idx];
    }
}