Skip to main content

kproc_llm/
lib.rs

1#![doc = include_str!("../README.MD")]
2#![warn(missing_docs)]
3
4use std::future::Future;
5
6pub mod prelude;
7
8#[cfg_attr(not(any(feature = "ollama", feature = "llama.cpp")), deny(warnings))]
9mod error;
10mod message;
11mod prompts;
12
13#[cfg(any(feature = "candle", feature = "candle-git"))]
14pub mod candle;
15
16#[cfg(feature = "llama.cpp")]
17pub mod llama_cpp;
18
19#[cfg(feature = "ollama")]
20pub mod ollama;
21
22#[cfg(feature = "simple-api")]
23pub mod simple_api;
24
25#[cfg(feature = "template")]
26pub mod template;
27
28pub use error::Error;
29pub use message::{Message, Role};
30pub use prompts::{ChatPrompt, Format, GenerationPrompt};
31
32/// Export Result type.
33pub type Result<T, E = Error> = std::result::Result<T, E>;
34
35/// String stream
36pub type StringStream = ccutils::streams::BoxedStream<Result<String>>;
37
38fn accumulate<T>(stream_maker: T) -> Result<impl Future<Output = Result<String>> + Send>
39where
40  T: Future<Output = Result<StringStream>> + Send,
41{
42  use futures::stream::StreamExt;
43  Ok(async {
44    let mut result: String = Default::default();
45    let mut stream = Box::pin(stream_maker.await?);
46    while let Some(next_token) = stream.next().await
47    {
48      if result.is_empty()
49      {
50        result = next_token?;
51      }
52      else
53      {
54        result.push_str(&next_token?);
55      }
56    }
57    Ok(result)
58  })
59}
60
61#[allow(dead_code)]
62pub(crate) fn generate_with_chat<LLM>(
63  llm: &LLM,
64  prompt: GenerationPrompt,
65) -> Result<impl Future<Output = Result<StringStream>> + Send + use<'_, LLM>>
66where
67  LLM: LargeLanguageModel,
68{
69  let chat_prompt = ChatPrompt::default()
70    .system_opt(prompt.system)
71    .assistant_opt(prompt.assistant)
72    .user(prompt.user)
73    .options(prompt.options);
74  llm.chat_stream(chat_prompt)
75}
76
77/// LLM
78pub trait LargeLanguageModel
79{
80  /// Chat with a model, returning a stream
81  fn chat_stream(
82    &self,
83    prompt: ChatPrompt,
84  ) -> Result<impl Future<Output = Result<StringStream>> + Send>;
85  /// Run chat on a model, return once the complete answer has been computed.
86  /// The default implementation call `chat_stream` until completion of the stream.
87  fn chat(&self, prompt: ChatPrompt) -> Result<impl Future<Output = Result<String>> + Send>
88  {
89    let stream = self.chat_stream(prompt)?;
90    accumulate(stream)
91  }
92  /// Run inference on a model, returning a stream
93  fn generate_stream(
94    &self,
95    prompt: GenerationPrompt,
96  ) -> Result<impl Future<Output = Result<StringStream>> + Send>;
97  /// Run inference on a model, return once the complete answer has been computed.
98  /// The default implementation call `infer_stream` until completion of the stream.
99  fn generate(
100    &self,
101    prompt: GenerationPrompt,
102  ) -> Result<impl Future<Output = Result<String>> + Send>
103  {
104    let stream = self.generate_stream(prompt)?;
105    accumulate(stream)
106  }
107}