Skip to main content

oxillama_py/
lib.rs

1// PyO3 generates code that triggers `useless_conversion` and related lints
2// because it wraps `PyResult<T>` at the function signature level.  This is a
3// well-known false positive with PyO3 ≥ 0.20; suppress it crate-wide.
4#![allow(clippy::useless_conversion)]
5
6//! # oxillama-py
7//!
8//! PyO3 Python bindings for the OxiLLaMa Pure-Rust LLM inference engine.
9//!
10//! ## Quick start
11//!
12//! ```python
13//! import oxillama_py
14//!
15//! config = oxillama_py.EngineConfig(model_path="model.gguf", context_size=4096)
16//! engine = oxillama_py.Engine(config)
17//! engine.load_model()
18//!
19//! text = engine.generate("Hello", max_tokens=128)
20//! emb  = engine.embed("Hello world")   # List\[float\]
21//! toks = engine.tokenize("Hello")      # List[int]
22//!
23//! engine.generate_streaming(
24//!     "Hello",
25//!     max_tokens=128,
26//!     callback=lambda tok: print(tok, end="", flush=True),
27//! )
28//! ```
29//!
30//! ## Module structure
31//!
32//! | Python class         | Rust source       |
33//! |----------------------|-------------------|
34//! | `EngineConfig`       | `engine.rs`       |
35//! | `Engine`             | `engine.rs`       |
36//! | `SamplerConfig`      | `sampler.rs`      |
37//! | `SpeculativeConfig`  | `speculative.rs`  |
38//! | `SpeculativeEngine`  | `speculative.rs`  |
39//! | `Lora`               | `lora.rs`         |
40
41pub mod async_support;
42pub mod callback;
43pub mod cancel;
44pub mod chat_template;
45pub mod dlpack;
46pub mod engine;
47pub mod error;
48#[cfg(feature = "hub")]
49pub mod hub;
50pub mod lora;
51pub mod sampler;
52pub mod snapshot;
53pub mod speculative;
54pub mod tokenizer;
55pub mod torch_interop;
56
57use pyo3::prelude::*;
58
59/// The `oxillama_py` Python extension module.
60///
61/// Registers all public Python classes.
62#[pymodule]
63fn oxillama_py(m: &Bound<'_, PyModule>) -> PyResult<()> {
64    m.add_class::<engine::PyEngineConfig>()?;
65    m.add_class::<engine::PyEngine>()?;
66    m.add_class::<async_support::PyAsyncEngine>()?;
67    m.add_class::<sampler::PySamplerConfig>()?;
68    m.add_class::<speculative::PySpeculativeConfig>()?;
69    m.add_class::<speculative::PySpeculativeEngine>()?;
70    m.add_class::<lora::PyLora>()?;
71    m.add_class::<tokenizer::PyTokenizer>()?;
72    m.add_class::<cancel::PyCancellationToken>()?;
73    m.add_class::<snapshot::PySnapshotInfo>()?;
74    error::register_exceptions(m)?;
75    Ok(())
76}