// A WASI interface dedicated to performing inferencing for Large Language Models.
interface llm {
/// A Large Language Model.
type inferencing-model = string;
/// Inference request parameters
record inferencing-params {
/// The maximum tokens that should be inferred.
///
/// Note: the backing implementation may return less tokens.
max-tokens: u32,
/// The amount the model should avoid repeating tokens.
repeat-penalty: f32,
/// The number of tokens the model should apply the repeat penalty to.
repeat-penalty-last-n-token-count: u32,
/// The randomness with which the next token is selected.
temperature: f32,
/// The number of possible next tokens the model will choose from.
top-k: u32,
/// The probability total of next tokens the model will choose from.
top-p: f32
}
/// The set of errors which may be raised by functions in this interface
variant error {
model-not-supported,
runtime-error(string),
invalid-input(string)
}
/// An inferencing result
record inferencing-result {
/// The text generated by the model
// TODO: this should be a stream
text: string,
/// Usage information about the inferencing request
usage: inferencing-usage
}
/// Usage information related to the inferencing result
record inferencing-usage {
/// Number of tokens in the prompt
prompt-token-count: u32,
/// Number of tokens generated by the inferencing operation
generated-token-count: u32
}
/// Perform inferencing using the provided model and prompt with the given optional params
infer: func(model: inferencing-model, prompt: string, params: option<inferencing-params>) -> result<inferencing-result, error>;
/// The model used for generating embeddings
type embedding-model = string;
/// Generate embeddings for the supplied list of text
generate-embeddings: func(model: embedding-model, text: list<string>) -> result<embeddings-result, error>;
/// Result of generating embeddings
record embeddings-result {
/// The embeddings generated by the request
embeddings: list<list<f32>>,
/// Usage related to the embeddings generation request
usage: embeddings-usage
}
/// Usage related to an embeddings generation request
record embeddings-usage {
/// Number of tokens in the prompt
prompt-token-count: u32,
}
}