pie 0.2.0

Pie: A Programmable LLM Serving System
Documentation
interface common {

    use wasi:io/poll@0.2.4.{pollable};

    type pointer = u32;

    // Defines task priority levels
    enum priority {
        low,        // Lowest priority
        normal,     // Default priority
        high,       // Highest priority
    }

    resource blob {
        constructor(init: list<u8>);
        read: func(offset: u64, n: u64) -> list<u8>;
        size: func() -> u64;
    }

    // Result of an async receive operation
    resource blob-result {
        // Pollable to check readiness
        pollable: func() -> pollable;

        // Retrieves the message if available; None if not ready
        get: func() -> option<blob>;
    }

    // Resource representing a specific model instance
    resource model {
        get-name: func() -> string;                  // Returns the model's name (e.g. "llama-3.1-8b-instruct")
        get-traits: func() -> list<string>;          // Returns the full set of model traits
        get-description: func() -> string;           // Human-readable description of the model
        get-prompt-template: func() -> string;       // Returns the prompt formatting template in Tera
        get-stop-tokens: func() -> list<string>;
        get-service-id: func() -> u32;
        get-kv-page-size: func() -> u32; // Get the size of a KV page
        create-queue: func() -> queue;               // Create a new command queue
    }

    // Queue resource with methods for synchronization and priority control
    resource queue {
        get-service-id: func() -> u32;
        synchronize: func() -> synchronization-result; // Begin synchronization process
        set-priority: func(priority: priority);     // Change the queue's priority
        debug-query: func(query: string) -> debug-query-result;  // Sends a message to the model and returns the response
    }

    // Result of a synchronization attempt
    resource synchronization-result {
        pollable: func() -> pollable;               // Returns a pollable for async readiness checks
        get: func() -> option<bool>;                // Returns true if sync succeeded, false if failed, none if not ready
    }

    // Debug query response
    resource debug-query-result {
        pollable: func() -> pollable;
        get: func() -> option<string>;
    }

    // resources
    allocate-resources: func(
        queue: borrow<queue>,
        resource-type: u32,
        count: u32,
    ) -> list<pointer>;


    deallocate-resources: func(
        queue: borrow<queue>,
        resource-type: u32,
        ptrs: list<pointer>
    );

    get-all-exported-resources: func(
        queue: borrow<queue>,
        resource-type: u32,
    ) -> list<tuple<string, u32>>;

    release-exported-resources: func(
        queue: borrow<queue>,
        resource-type: u32,
        name: string
    );

    export-resources: func(
        queue: borrow<queue>,
        resource-type: u32,
        ptrs: list<pointer>,
        name: string
    );

    import-resources: func(
        queue: borrow<queue>,
        resource-type: u32,
        name: string
    ) -> list<pointer>;


}