kproc_llm/
lib.rs

1#![doc = include_str!("../README.MD")]
2#![warn(missing_docs)]
3#![deny(warnings)]
4
5#[cfg_attr(not(any(feature = "ollama", feature = "llama.cpp")), deny(warnings))]
6mod error;
7pub mod prelude;
8
9#[cfg(feature = "llama.cpp")]
10pub mod llama_cpp;
11
12#[cfg(feature = "ollama")]
13pub mod ollama;
14
15#[cfg(feature = "simple-api")]
16pub mod simple_api;
17
18#[cfg(feature = "template")]
19pub mod template;
20
21use std::future::Future;
22
23/// Export Error enum.
24pub use error::Error;
25use futures::Stream;
26
27/// Export Result type.
28pub type Result<T> = std::result::Result<T, Error>;
29
30/// Format
31#[derive(Debug)]
32pub enum Format
33{
34  /// Output text
35  Text,
36  /// Output Json
37  Json,
38}
39
40/// Speaker
41#[derive(Debug)]
42pub enum Role
43{
44  /// User role
45  User,
46  /// System role
47  System,
48  /// Assistant role
49  Assistant,
50  /// Custom role, defined by the string. Often fallback to User.
51  Custom(String),
52}
53
54/// Message
55#[derive(Debug)]
56pub struct Message
57{
58  /// Role of the message
59  pub role: Role,
60  /// Content of the message
61  pub content: String,
62}
63
64/// Prompt
65#[derive(Debug)]
66pub struct ChatPrompt
67{
68  /// Messages
69  pub messages: Vec<Message>,
70  /// Requested output format
71  pub format: Format,
72}
73
74impl ChatPrompt
75{
76  /// Start an empty chat prompt.
77  pub fn new() -> Self
78  {
79    Self {
80      messages: Default::default(),
81      format: Format::Text,
82    }
83  }
84  /// Create a new message, from the given role and content.
85  pub fn message(mut self, role: Role, content: impl Into<String>) -> Self
86  {
87    self.messages.push(Message {
88      role,
89      content: content.into(),
90    });
91    self
92  }
93  /// Create a new message, from the given role and content. Ignore if content is null.
94  pub fn message_opt(mut self, role: Role, content: Option<String>) -> Self
95  {
96    if let Some(content) = content
97    {
98      self.messages.push(Message {
99        role,
100        content: content.into(),
101      });
102    }
103    self
104  }
105  /// Create a new prompt, from the given string.
106  pub fn user(self, content: impl Into<String>) -> Self
107  {
108    self.message(Role::User, content)
109  }
110  /// Set the system hint.
111  pub fn system(self, content: impl Into<String>) -> Self
112  {
113    self.message(Role::System, content)
114  }
115  /// Set the system hint.
116  pub fn system_opt(self, content: Option<String>) -> Self
117  {
118    self.message_opt(Role::System, content)
119  }
120  /// Set the system hint.
121  pub fn assistant(self, content: impl Into<String>) -> Self
122  {
123    self.message(Role::Assistant, content)
124  }
125  /// Set the system hint.
126  pub fn assistant_opt(self, content: Option<String>) -> Self
127  {
128    self.message_opt(Role::Assistant, content)
129  }
130  /// Set the result format
131  pub fn format(mut self, format: impl Into<Format>) -> Self
132  {
133    self.format = format.into();
134    self
135  }
136}
137
138/// Prompt
139#[derive(Debug)]
140pub struct GenerationPrompt
141{
142  /// Messages
143  pub user: String,
144  /// Messages
145  pub system: Option<String>,
146  /// Messages
147  pub assistant: Option<String>,
148  /// Requested output format
149  pub format: Format,
150  /// Optional prompt image (for multi modal models)
151  #[cfg(feature = "image")]
152  pub image: Option<kproc_values::Image>,
153}
154
155impl GenerationPrompt
156{
157  /// Start creation of generation prompt, with user prompt
158  pub fn prompt(user: impl Into<String>) -> Self
159  {
160    Self {
161      user: user.into(),
162      system: Default::default(),
163      assistant: Default::default(),
164      format: Format::Text,
165      #[cfg(feature = "image")]
166      image: None,
167    }
168  }
169  /// Set the system hint.
170  pub fn system(mut self, content: impl Into<String>) -> Self
171  {
172    self.system = Some(content.into());
173    self
174  }
175  /// Set the assistant hint.
176  pub fn assistant(mut self, content: impl Into<String>) -> Self
177  {
178    self.assistant = Some(content.into());
179    self
180  }
181  /// Set the result format
182  pub fn format(mut self, format: impl Into<Format>) -> Self
183  {
184    self.format = format.into();
185    self
186  }
187  /// Set the image, for use in multi-modal models
188  #[cfg(feature = "image")]
189  pub fn image(mut self, image: impl Into<kproc_values::Image>) -> Self
190  {
191    self.image = Some(image.into());
192    self
193  }
194}
195
196/// String stream
197pub type StringStream = ccutils::futures::BoxedStream<Result<String>>;
198
199/// Convenient function for coercing a box pin stream to a StringStream
200#[allow(dead_code)]
201pub(crate) fn pin_stream<T: 'static + Send + Stream<Item = Result<String>>>(t: T) -> StringStream
202{
203  Box::pin(t)
204}
205
206fn accumulate<T>(stream_maker: T) -> Result<impl Future<Output = Result<String>> + Send>
207where
208  T: Future<Output = Result<StringStream>> + Send,
209{
210  use futures::stream::StreamExt;
211  Ok(async {
212    let mut result: String = Default::default();
213    let mut stream = Box::pin(stream_maker.await?);
214    while let Some(next_token) = stream.next().await
215    {
216      if result.is_empty()
217      {
218        result = next_token?;
219      }
220      else
221      {
222        result.push_str(&next_token?);
223      }
224    }
225    Ok(result)
226  })
227}
228
229#[allow(dead_code)]
230pub(crate) fn generate_with_chat<LLM>(
231  llm: &LLM,
232  prompt: GenerationPrompt,
233) -> Result<impl Future<Output = Result<StringStream>> + Send + use<'_, LLM>>
234where
235  LLM: LargeLanguageModel,
236{
237  let chat_prompt = ChatPrompt::new()
238    .system_opt(prompt.system)
239    .assistant_opt(prompt.assistant)
240    .user(prompt.user)
241    .format(prompt.format);
242  llm.chat_stream(chat_prompt)
243}
244
245/// LLM
246pub trait LargeLanguageModel
247{
248  /// Chat with a model, returning a stream
249  fn chat_stream(
250    &self,
251    prompt: ChatPrompt,
252  ) -> Result<impl Future<Output = Result<StringStream>> + Send>;
253  /// Run chat on a model, return once the complete answer has been computed.
254  /// The default implementation call `chat_stream` until completion of the stream.
255  fn chat(&self, prompt: ChatPrompt) -> Result<impl Future<Output = Result<String>> + Send>
256  {
257    let stream = self.chat_stream(prompt)?;
258    accumulate(stream)
259  }
260  /// Run inference on a model, returning a stream
261  fn generate_stream(
262    &self,
263    prompt: GenerationPrompt,
264  ) -> Result<impl Future<Output = Result<StringStream>> + Send>;
265  /// Run inference on a model, return once the complete answer has been computed.
266  /// The default implementation call `infer_stream` until completion of the stream.
267  fn generate(
268    &self,
269    prompt: GenerationPrompt,
270  ) -> Result<impl Future<Output = Result<String>> + Send>
271  {
272    let stream = self.generate_stream(prompt)?;
273    accumulate(stream)
274  }
275}