tritonserver_rs/lib.rs
1//! # **Perform easy and efficient ML model inference**
2//!
3//! This crate is designed to run **any** Machine Learning model on **any** architecture with ease and efficiency.
4//! It leverages the [Triton Inference Server](https://github.com/triton-inference-server/server)
5//! (specifically the [Triton C library](https://github.com/triton-inference-server/core)) and provides a similar API with comparable advantages.
6//! However, **Tritonserver-rs** allows you to build the inference server locally, offering significant performance benefits.
7//! Check the [benchmark](https://github.com/3xMike/tritonserver-rs/blob/main/BENCH.md) for more details.
8//!
9//! ---
10//!
11//! # Usage
12//!
13//! Run inference in three simple steps:
14//!
15//! ## **Step 1. Prepare the model repository**
16//!
17//! Organize your model files in the following structure:
18//!
19//! ```text
20//! models/
21//! ├── yolov8/
22//! | ├── config.pbtxt
23//! | ├── 1/
24//! | │ └── model.onnx
25//! | ├── 2/
26//! | │ └── model.onnx
27//! | └── `<other versions of yolov8>`/
28//! └── `<other models>`/
29//! ```
30//!
31//! **Rules**:
32//! - All models must be stored in the same root directory (`models/` in this example).
33//! - Each model resides in its own folder containing:
34//! - A `config.pbtxt` configuration file.
35//! - One or more subdirectories, each representing a version of the model and containing the model file (e.g., `model.onnx`).
36//!
37//! ---
38//!
39//! ## **Step 2. Write the code**
40//!
41//! Add **Tritonserver-rs** to your `Cargo.toml`:
42//!
43//! ```toml
44//! [dependencies]
45//! tritonserver-rs = "0.1"
46//! ```
47//!
48//! Then write your application code:
49//!
50//! ```rust
51//! use tritonserver_rs::{Buffer, options::Options, Server};
52//! use std::time::Duration;
53//!
54//! // Configure server options.
55//! let mut opts = Options::new("models/")?;
56//!
57//! opts.exit_timeout(Duration::from_secs(5))?
58//! .backend_directory("/opt/tritonserver/backends")?;
59//!
60//! // Create the server.
61//! let server = Server::new(opts).await?;
62//!
63//! // Input data.
64//! let image = image::open("/data/cats.jpg")?;
65//! let image = image.as_flat_samples_u8();
66//!
67//! // Create a request (specify the model name and version).
68//! let mut request = server.create_request("yolov8", 2)?;
69//!
70//! // Add input data and an allocator.
71//! request
72//! .add_default_allocator()
73//! .add_input("IMAGE", Buffer::from(image))?;
74//!
75//! // Run inference.
76//! let fut = request.infer_async()?;
77//!
78//! // Obtain results.
79//! let response = fut.await?;
80//! ```
81//!
82//! ---
83//!
84//! ## **Step 3. Deploy**
85//!
86//! Here is an example of how to deploy using `docker-compose.yml`:
87//!
88//! ```yml
89//! my_app:
90//! image: {DEV_IMAGE}
91//! volumes:
92//! - ./Cargo.toml:/project/
93//! - ./src:/project/src
94//! - ../models:/models
95//! - ../cats.jpg:/data/cats.jpg
96//! entrypoint: ["cargo", "run", "--manifest-path=/project/Cargo.toml"]
97//! ```
98//!
99//! We recommend using Dockerfile.dev as `{DEV_IMAGE}`. For more details on suitable images and deployment instructions, see DEPLOY.md.
100//!
101//! ---
102//!
103//! # **More Information**
104//!
105//! For further details, check out the following resources (in [github repo](https://github.com/3xMike/tritonserver-rs/blob/main)):
106//! - [Examples](https://github.com/3xMike/tritonserver-rs/blob/main/examples): Learn how to run various ML models using **Tritonserver-rs**, configure inference, prepare models, and deploy.
107//! - [Model configuration guide](https://github.com/3xMike/tritonserver-rs/blob/main/MODEL_CONFIGURATION.md).
108//! - [Build and deployment instructions](https://github.com/3xMike/tritonserver-rs/blob/main/DEPLOY.md).
109//! - [Benchmark results](https://github.com/3xMike/tritonserver-rs/blob/main/BENCH.md).
110//! - [Triton Inference Server guides](https://github.com/triton-inference-server/server/tree/main/docs/README.md).
111//!
112//! ---
113//!
114//! # **Advantages of the Crate**
115//!
116//! - **Versatility**: Extensive configuration options for models and servers.
117//! - **High performance**: Optimized for maximum efficiency.
118//! - **Broad backend support**: Run PyTorch, ONNX, TensorFlow, TensorRT, OpenVINO, model pipelines, and custom backends out of the box.
119//! - **Compatibility**: Supports most GPUs and architectures.
120//! - **Multi-model handling**: Handle multiple models simultaneously.
121//! - **Prometheus integration**: Built-in support for monitoring.
122//! - **CUDA-optimized**: Directly handle model inputs and outputs on GPU memory.
123//! - **Dynamic server management**: Advanced runtime control features.
124//! - **Rust-based**: Enjoy the safety, speed, and concurrency benefits of Rust.
125//!
126//! # Tritonserver C-lib API version
127//! `1.33` (Minimal TRITON_CONTAINER_VERSION=23.07).
128
129#![allow(clippy::bad_bit_mask)]
130#![allow(clippy::arc_with_non_send_sync)]
131
132/// Macros to run some Cuda operations in context.
133#[macro_use]
134pub mod macros;
135
136pub(crate) mod allocator;
137#[cfg(feature = "gpu")]
138/// Cuda context for managing device execution.
139pub mod context;
140/// Error types for Tritonserver-rs.
141pub mod error;
142/// Memory management utilities for model inference.
143pub mod memory;
144/// Metadata message serialization/deserialization.
145pub mod message;
146/// Performance metrics collection and reporting.
147pub mod metrics;
148/// Configuration options for Tritonserver-rs server.
149pub mod options;
150/// Model inference requests and server parameters.
151pub mod parameter;
152/// Request builder and utilities for Triton server inference.
153pub mod request;
154/// Response handling and parsing from Triton server.
155pub mod response;
156/// Server initialization and lifecycle management.
157pub mod server;
158pub(crate) mod sys {
159 #![allow(
160 non_camel_case_types,
161 non_upper_case_globals,
162 non_snake_case,
163 dead_code,
164 unused_imports,
165 rustdoc::invalid_html_tags
166 )]
167 include!(concat!(env!("OUT_DIR"), "/tritonserver.rs"));
168}
169pub mod trace;
170
171pub use crate::{
172 error::{Error, ErrorCode},
173 macros::{run_in_context, run_in_context_sync},
174 memory::{Buffer, MemoryType},
175 request::{Allocator, Request},
176 response::Response,
177 server::Server,
178 sys::{TRITONSERVER_API_VERSION_MAJOR, TRITONSERVER_API_VERSION_MINOR},
179};
180#[cfg(feature = "gpu")]
181pub use context::{get_context, init_cuda};
182
183use std::{
184 ffi::{CStr, CString},
185 os::{raw::c_char, unix::ffi::OsStrExt as _},
186 path::Path,
187};
188
189/// Get the TRITONBACKEND API version supported by the Triton library.
190/// This value can be compared against the TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR used to build the client to ensure that Triton shared library is compatible with the client.
191pub fn api_version() -> Result<(u32, u32), Error> {
192 let mut major: u32 = 0;
193 let mut minor: u32 = 0;
194
195 triton_call!(
196 sys::TRITONSERVER_ApiVersion(&mut major as *mut _, &mut minor as *mut _),
197 (major, minor)
198 )
199}
200
201pub(crate) fn to_cstring<S: AsRef<str>>(value: S) -> Result<CString, Error> {
202 CString::new(value.as_ref().as_bytes())
203 .map_err(|err| Error::new(ErrorCode::InvalidArg, format!("{}", err)))
204}
205
206pub(crate) fn path_to_cstring<P: AsRef<Path>>(value: P) -> Result<CString, Error> {
207 value
208 .as_ref()
209 .canonicalize()
210 .map_err(|err| Error::new(ErrorCode::InvalidArg, err.to_string()))
211 .and_then(|path| {
212 CString::new(path.as_os_str().as_bytes())
213 .map_err(|err| Error::new(ErrorCode::InvalidArg, err.to_string()))
214 })
215}
216
217pub(crate) fn from_char_array(value: *const c_char) -> String {
218 assert!(!value.is_null());
219 unsafe { CStr::from_ptr(value) }
220 .to_str()
221 .unwrap_or(error::CSTR_CONVERT_ERROR_PLUG)
222 .to_string()
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228
229 #[test]
230 fn api() {
231 let (major, minor) = api_version().unwrap();
232
233 assert_eq!(major, TRITONSERVER_API_VERSION_MAJOR);
234 assert_eq!(minor, TRITONSERVER_API_VERSION_MINOR);
235 }
236}