tritonserver_rs/
lib.rs

1//! # **Perform easy and efficient ML model inference**
2//!
3//! This crate is designed to run **any** Machine Learning model on **any** architecture with ease and efficiency.  
4//! It leverages the [Triton Inference Server](https://github.com/triton-inference-server/server)
5//! (specifically the [Triton C library](https://github.com/triton-inference-server/core)) and provides a similar API with comparable advantages.
6//! However, **Tritonserver-rs** allows you to build the inference server locally, offering significant performance benefits.
7//! Check the [benchmark](https://github.com/3xMike/tritonserver-rs/blob/main/BENCH.md) for more details.
8//!
9//! ---
10//!
11//! # Usage  
12//!
13//! Run inference in three simple steps:
14//!
15//! ## **Step 1. Prepare the model repository**  
16//!
17//! Organize your model files in the following structure:
18//!
19//! ```text
20//! models/
21//! ├── yolov8/
22//! |    ├── config.pbtxt
23//! |    ├── 1/
24//! |    │   └── model.onnx
25//! |    ├── 2/
26//! |    │   └── model.onnx
27//! |    └── `<other versions of yolov8>`/
28//! └── `<other models>`/
29//! ```
30//!
31//! **Rules**:  
32//! - All models must be stored in the same root directory (`models/` in this example).  
33//! - Each model resides in its own folder containing:
34//!   - A `config.pbtxt` configuration file.
35//!   - One or more subdirectories, each representing a version of the model and containing the model file (e.g., `model.onnx`).  
36//!
37//! ---
38//!
39//! ## **Step 2. Write the code**  
40//!
41//! Add **Tritonserver-rs** to your `Cargo.toml`:  
42//!
43//! ```toml
44//! [dependencies]
45//! tritonserver-rs = "0.1"
46//! ```
47//!
48//! Then write your application code:  
49//!
50//! ```rust
51//! use tritonserver_rs::{Buffer, options::Options, Server};
52//! use std::time::Duration;
53//!
54//! // Configure server options.
55//! let mut opts = Options::new("models/")?;
56//!
57//! opts.exit_timeout(Duration::from_secs(5))?
58//!     .backend_directory("/opt/tritonserver/backends")?;
59//!
60//! // Create the server.
61//! let server = Server::new(opts).await?;
62//!
63//! // Input data.
64//! let image = image::open("/data/cats.jpg")?;
65//! let image = image.as_flat_samples_u8();
66//!
67//! // Create a request (specify the model name and version).
68//! let mut request = server.create_request("yolov8", 2)?;
69//!
70//! // Add input data and an allocator.
71//! request
72//!     .add_default_allocator()
73//!     .add_input("IMAGE", Buffer::from(image))?;
74//!
75//! // Run inference.
76//! let fut = request.infer_async()?;
77//!
78//! // Obtain results.
79//! let response = fut.await?;
80//! ```
81//!
82//! ---
83//!
84//! ## **Step 3. Deploy**
85//!
86//! Here is an example of how to deploy using `docker-compose.yml`:  
87//!
88//! ```yml
89//! my_app:
90//!   image: {DEV_IMAGE}
91//!   volumes:
92//!     - ./Cargo.toml:/project/
93//!     - ./src:/project/src
94//!     - ../models:/models
95//!     - ../cats.jpg:/data/cats.jpg
96//!   entrypoint: ["cargo", "run", "--manifest-path=/project/Cargo.toml"]
97//! ```
98//!
99//! We recommend using Dockerfile.dev as `{DEV_IMAGE}`. For more details on suitable images and deployment instructions, see DEPLOY.md.  
100//!
101//! ---
102//!
103//! # **More Information**
104//!
105//! For further details, check out the following resources (in [github repo](https://github.com/3xMike/tritonserver-rs/blob/main)):  
106//! - [Examples](https://github.com/3xMike/tritonserver-rs/blob/main/examples): Learn how to run various ML models using **Tritonserver-rs**, configure inference, prepare models, and deploy.  
107//! - [Model configuration guide](https://github.com/3xMike/tritonserver-rs/blob/main/MODEL_CONFIGURATION.md).  
108//! - [Build and deployment instructions](https://github.com/3xMike/tritonserver-rs/blob/main/DEPLOY.md).  
109//! - [Benchmark results](https://github.com/3xMike/tritonserver-rs/blob/main/BENCH.md).  
110//! - [Triton Inference Server guides](https://github.com/triton-inference-server/server/tree/main/docs/README.md).  
111//!
112//! ---
113//!
114//! # **Advantages of the Crate**
115//!
116//! - **Versatility**: Extensive configuration options for models and servers.  
117//! - **High performance**: Optimized for maximum efficiency.  
118//! - **Broad backend support**: Run PyTorch, ONNX, TensorFlow, TensorRT, OpenVINO, model pipelines, and custom backends out of the box.  
119//! - **Compatibility**: Supports most GPUs and architectures.  
120//! - **Multi-model handling**: Handle multiple models simultaneously.  
121//! - **Prometheus integration**: Built-in support for monitoring.  
122//! - **CUDA-optimized**: Directly handle model inputs and outputs on GPU memory.  
123//! - **Dynamic server management**: Advanced runtime control features.  
124//! - **Rust-based**: Enjoy the safety, speed, and concurrency benefits of Rust.
125//!
126//! # Tritonserver C-lib API version
127//! `1.33` (Minimal TRITON_CONTAINER_VERSION=23.07).
128
129#![allow(clippy::bad_bit_mask)]
130
131/// Macros to run some Cuda operations in context.
132#[macro_use]
133pub mod macros;
134
135pub(crate) mod allocator;
136#[cfg(feature = "gpu")]
137/// Cuda context for managing device execution.
138pub mod context;
139/// Error types for Tritonserver-rs.
140pub mod error;
141/// Memory management utilities for model inference.
142pub mod memory;
143/// Metadata message serialization/deserialization.
144pub mod message;
145/// Performance metrics collection and reporting.
146pub mod metrics;
147/// Configuration options for Tritonserver-rs server.
148pub mod options;
149/// Model inference requests and server parameters.
150pub mod parameter;
151/// Request builder and utilities for Triton server inference.
152pub mod request;
153/// Response handling and parsing from Triton server.
154pub mod response;
155/// Server initialization and lifecycle management.
156pub mod server;
157pub(crate) mod sys {
158    #![allow(
159        non_camel_case_types,
160        non_upper_case_globals,
161        non_snake_case,
162        dead_code,
163        unused_imports,
164        rustdoc::invalid_html_tags
165    )]
166    include!(concat!(env!("OUT_DIR"), "/tritonserver.rs"));
167}
168pub mod trace;
169
170pub use crate::{
171    error::{Error, ErrorCode},
172    memory::{Buffer, MemoryType},
173    request::{Allocator, Request},
174    response::Response,
175    server::Server,
176    sys::{TRITONSERVER_API_VERSION_MAJOR, TRITONSERVER_API_VERSION_MINOR},
177};
178#[cfg(feature = "gpu")]
179pub use context::{get_context, init_cuda};
180
181use std::{
182    ffi::{CStr, CString},
183    os::{raw::c_char, unix::ffi::OsStrExt as _},
184    path::Path,
185};
186
187/// Get the TRITONBACKEND API version supported by the Triton library.
188/// This value can be compared against the TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR used to build the client to ensure that Triton shared library is compatible with the client.
189pub fn api_version() -> Result<(u32, u32), Error> {
190    let mut major: u32 = 0;
191    let mut minor: u32 = 0;
192
193    triton_call!(
194        sys::TRITONSERVER_ApiVersion(&mut major as *mut _, &mut minor as *mut _),
195        (major, minor)
196    )
197}
198
199pub(crate) fn to_cstring<S: AsRef<str>>(value: S) -> Result<CString, Error> {
200    CString::new(value.as_ref().as_bytes())
201        .map_err(|err| Error::new(ErrorCode::InvalidArg, format!("{}", err)))
202}
203
204pub(crate) fn path_to_cstring<P: AsRef<Path>>(value: P) -> Result<CString, Error> {
205    value
206        .as_ref()
207        .canonicalize()
208        .map_err(|err| Error::new(ErrorCode::InvalidArg, err.to_string()))
209        .and_then(|path| {
210            CString::new(path.as_os_str().as_bytes())
211                .map_err(|err| Error::new(ErrorCode::InvalidArg, err.to_string()))
212        })
213}
214
215pub(crate) fn from_char_array(value: *const c_char) -> String {
216    assert!(!value.is_null());
217    unsafe { CStr::from_ptr(value) }
218        .to_str()
219        .unwrap_or(error::CSTR_CONVERT_ERROR_PLUG)
220        .to_string()
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    #[test]
228    fn api() {
229        let (major, minor) = api_version().unwrap();
230
231        assert_eq!(major, TRITONSERVER_API_VERSION_MAJOR);
232        assert_eq!(minor, TRITONSERVER_API_VERSION_MINOR);
233    }
234}
tritonserver_rs/lib.rs

tritonserver_rs/
lib.rs