llama_runner/runner/
mod.rs1mod ext;
2mod gemma3;
3mod gemma4;
4mod msg;
5mod req;
6mod stream;
7
8use std::sync::LazyLock;
9
10pub use ext::*;
11pub use gemma3::*;
12pub use gemma4::*;
13use llama_cpp_2::llama_backend::LlamaBackend;
14pub use msg::*;
15pub use req::*;
16pub use stream::*;
17
18use crate::{error::GenericRunnerError, template::ChatTemplate};
19
20static LLAMA_BACKEND: LazyLock<LlamaBackend> = LazyLock::new(|| {
21 llama_cpp_2::send_logs_to_tracing(llama_cpp_2::LogOptions::default());
22 LlamaBackend::init().unwrap()
23});
24
25pub trait TextLmRunner<'s, 'req, Tmpl>
26where
27 Tmpl: ChatTemplate,
28{
29 fn stream_lm_response(
30 &'s self,
31 request: GenericTextLmRequest<'req, Tmpl>,
32 ) -> impl Iterator<Item = Result<String, GenericRunnerError<Tmpl::Error>>>;
33}
34
35pub trait VisionLmRunner<'s, 'req, Tmpl>
36where
37 Tmpl: ChatTemplate,
38{
39 fn stream_vlm_response(
40 &'s self,
41 request: GenericVisionLmRequest<'req, Tmpl>,
42 ) -> impl Iterator<Item = Result<String, GenericRunnerError<Tmpl::Error>>>;
43}