Skip to main content

inferd_engine/
lib.rs

1//! Backend trait and adapters for inferd.
2//!
3//! See ADR 0005 (engine consumed via FFI), ADR 0007 (routing), and
4//! `docs/ai.internals.explained.md` for the architectural framing.
5//!
6//! v0.1 ships:
7//! - `mock` — deterministic test double, always available.
8//! - `llamacpp` — FFI to vendored `libllama` (gated behind the `llamacpp`
9//!   cargo feature; lands in M2a).
10
11// `deny` rather than `forbid` so the FFI module (M2a, gated behind the
12// `llamacpp` feature) can scope an inner `#![allow(unsafe_code)]` to the
13// generated bindings. Every other module in the crate is unsafe-free; CI
14// `cargo deny`/clippy lint surfaces any regression.
15#![deny(unsafe_code)]
16#![warn(missing_docs, rust_2018_idioms)]
17
18mod backend;
19#[cfg(feature = "bedrock")]
20pub mod bedrock_invoke;
21#[cfg(feature = "llamacpp")]
22pub(crate) mod ffi;
23#[cfg(feature = "llamacpp")]
24pub mod llamacpp;
25pub mod mock;
26#[cfg(feature = "llamacpp")]
27pub(crate) mod mtmd_ffi;
28#[cfg(feature = "openai")]
29pub mod openai_compat;
30
31pub use backend::{
32    AcceleratorInfo, AcceleratorKind, Backend, BackendCapabilities, EmbedError, EmbedResult,
33    GenerateError, TokenEvent, TokenEventV2, TokenStream, TokenStreamV2,
34};
35
36/// Default `max_tokens` for v2 requests when the consumer didn't
37/// supply one. Lives here (rather than in `inferd-proto`) because v2
38/// sampling defaults are backend-specific (per ADR 0015): the proto
39/// crate doesn't pick them, the active backend does.
40pub const DEFAULT_V2_MAX_TOKENS: u32 = 1000;