interface forward {
use wasi:io/poll@0.2.4.{pollable};
use common.{queue, pointer};
resource forward-pass {
execute: func() -> option<forward-pass-result>;
}
resource forward-pass-result {
// Returns a pollable object to check when the result is ready
pollable: func() -> pollable;
// Retrieves the result if ready; None if still pending
// Each tuple: (token IDs, associated probabilities)
get-distributions: func() -> option<list<tuple<list<u32>, list<f32>>>>;
get-tokens: func() -> option<list<u32>>;
}
create-forward-pass: func(
queue: borrow<queue>
) -> forward-pass;
attention-mask: func(
pass: borrow<forward-pass>,
mask: list<list<u32>>,
);
kv-cache: func(
pass: borrow<forward-pass>,
kv-page-ptrs: list<pointer>,
last-kv-page-len: u32,
);
input-embeddings: func(
pass: borrow<forward-pass>,
emb-ptrs: list<pointer>,
positions: list<u32>,
);
input-tokens: func(
pass: borrow<forward-pass>,
input-tokens: list<u32>,
positions: list<u32>,
);
output-embeddings: func(
pass: borrow<forward-pass>,
emb-ptrs: list<pointer>,
indices: list<u32>,
);
output-distributions: func(
pass: borrow<forward-pass>,
indices: list<u32>,
temperature: f32,
top-k: option<u32>,
);
output-tokens: func(
pass: borrow<forward-pass>,
indices: list<u32>,
temperature: f32,
);
output-tokens-top-k: func(
pass: borrow<forward-pass>,
indices: list<u32>,
temperature: f32,
top-k: u32
);
output-tokens-top-p: func(
pass: borrow<forward-pass>,
indices: list<u32>,
temperature: f32,
top-p: f32,
);
output-tokens-min-p: func(
pass: borrow<forward-pass>,
indices: list<u32>,
temperature: f32,
min-p: f32,
);
output-tokens-top-k-top-p: func(
pass: borrow<forward-pass>,
indices: list<u32>,
temperature: f32,
top-k: u32,
top-p: f32
);
}