#[doc(hidden)]
pub mod wit {
#![allow(missing_docs)]
use crate::wit_bindgen;
wit_bindgen::generate!({
runtime_path: "crate::wit_bindgen::rt",
world: "spin-sdk-llm",
path: "wit",
generate_all,
});
pub use fermyon::spin::llm;
}
pub use wit::llm::{Error, InferencingParams, InferencingResult, InferencingUsage};
#[doc(inline)]
pub use wit::llm;
#[doc(inline)]
pub use wit::llm::EmbeddingsResult;
pub use wit::llm::EmbeddingsUsage;
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy)]
pub enum InferencingModel<'a> {
Llama2Chat,
Codellarunstruct,
Other(&'a str),
}
impl std::fmt::Display for InferencingModel<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let str = match self {
InferencingModel::Llama2Chat => "llama2-chat",
InferencingModel::Codellarunstruct => "codellama-instruct",
InferencingModel::Other(s) => s,
};
f.write_str(str)
}
}
impl Default for InferencingParams {
fn default() -> Self {
Self {
max_tokens: 100,
repeat_penalty: 1.1,
repeat_penalty_last_n_token_count: 64,
temperature: 0.8,
top_k: 40,
top_p: 0.9,
}
}
}
pub fn infer(model: InferencingModel, prompt: &str) -> Result<InferencingResult, Error> {
llm::infer(&model.to_string(), prompt, None)
}
pub fn infer_with_options(
model: InferencingModel,
prompt: &str,
options: InferencingParams,
) -> Result<InferencingResult, Error> {
llm::infer(&model.to_string(), prompt, Some(options))
}
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy)]
pub enum EmbeddingModel<'a> {
AllMiniLmL6V2,
Other(&'a str),
}
impl std::fmt::Display for EmbeddingModel<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let str = match self {
EmbeddingModel::AllMiniLmL6V2 => "all-minilm-l6-v2",
EmbeddingModel::Other(s) => s,
};
f.write_str(str)
}
}
pub fn generate_embeddings(
model: EmbeddingModel,
text: &[String],
) -> Result<llm::EmbeddingsResult, Error> {
llm::generate_embeddings(&model.to_string(), text)
}