pie 0.2.0

Pie: A Programmable LLM Serving System
Documentation
interface forward {

    use wasi:io/poll@0.2.4.{pollable};
    use common.{queue, pointer};

    resource forward-pass {
        execute: func() -> option<forward-pass-result>;
    }

    resource forward-pass-result {
        // Returns a pollable object to check when the result is ready
        pollable: func() -> pollable;

        // Retrieves the result if ready; None if still pending
        // Each tuple: (token IDs, associated probabilities)
        get-distributions: func() -> option<list<tuple<list<u32>, list<f32>>>>;

        get-tokens: func() -> option<list<u32>>;
    }

    create-forward-pass: func(
        queue: borrow<queue>
    ) -> forward-pass;


    attention-mask: func(
        pass: borrow<forward-pass>,
        mask: list<list<u32>>,
    );

    kv-cache: func(
        pass: borrow<forward-pass>,
        kv-page-ptrs: list<pointer>,
        last-kv-page-len: u32,
    );


    input-embeddings: func(
        pass: borrow<forward-pass>,
        emb-ptrs: list<pointer>,
        positions: list<u32>,
    );


    input-tokens: func(
        pass: borrow<forward-pass>,
        input-tokens: list<u32>,
        positions: list<u32>,
    );


    output-embeddings: func(
        pass: borrow<forward-pass>,
        emb-ptrs: list<pointer>,
        indices: list<u32>,
    );

    output-distributions: func(
        pass: borrow<forward-pass>,
        indices: list<u32>,
        temperature: f32,
        top-k: option<u32>,
    );

    output-tokens: func(
        pass: borrow<forward-pass>,
        indices: list<u32>,
        temperature: f32,
    );

    output-tokens-top-k: func(
        pass: borrow<forward-pass>,
        indices: list<u32>,
        temperature: f32,
        top-k: u32
    );

    output-tokens-top-p: func(
        pass: borrow<forward-pass>,
        indices: list<u32>,
        temperature: f32,
        top-p: f32,
    );

    output-tokens-min-p: func(
        pass: borrow<forward-pass>,
        indices: list<u32>,
        temperature: f32,
        min-p: f32,
    );

    output-tokens-top-k-top-p: func(
        pass: borrow<forward-pass>,
        indices: list<u32>,
        temperature: f32,
        top-k: u32,
        top-p: f32
    );


}