Skip to main content

llama_cpp_4/rpc/
server.rs

1//! RPC server for hosting backends
2
3use crate::rpc::error::RpcError;
4use llama_cpp_sys_4 as sys;
5use std::ffi::CString;
6use std::ptr::NonNull;
7
8/// RPC server for hosting a backend that can be accessed remotely
9pub struct RpcServer {
10    backend: NonNull<sys::ggml_backend>,
11    endpoint: String,
12}
13
14impl RpcServer {
15    /// Start an RPC server for the given backend
16    ///
17    /// # Arguments
18    /// * `backend` - The backend to expose via RPC
19    /// * `endpoint` - The endpoint to listen on (e.g., "0.0.0.0:50052")
20    /// * `free_mem` - Amount of free memory to advertise (0 for auto)
21    /// * `total_mem` - Total memory to advertise (0 for auto)
22    ///
23    /// # Example
24    /// ```no_run
25    /// use llama_cpp_4::rpc::RpcServer;
26    ///
27    /// // Assuming you have a backend initialized
28    /// let server = RpcServer::start(
29    ///     backend,
30    ///     "0.0.0.0:50052",
31    ///     0,
32    ///     0,
33    /// )?;
34    /// ```
35    pub fn start(
36        backend: NonNull<sys::ggml_backend>,
37        endpoint: &str,
38        free_mem: usize,
39        total_mem: usize,
40    ) -> Result<Self, RpcError> {
41        let c_endpoint = CString::new(endpoint).map_err(|e| RpcError::StringConversion(e))?;
42
43        unsafe {
44            sys::ggml_backend_rpc_start_server(
45                backend.as_ptr(),
46                c_endpoint.as_ptr(),
47                free_mem,
48                total_mem,
49            );
50        }
51
52        Ok(Self {
53            backend,
54            endpoint: endpoint.to_string(),
55        })
56    }
57
58    /// Get the endpoint this server is listening on
59    pub fn endpoint(&self) -> &str {
60        &self.endpoint
61    }
62
63    /// Get the backend this server is hosting
64    pub fn backend(&self) -> NonNull<sys::ggml_backend> {
65        self.backend
66    }
67}
68
69impl std::fmt::Debug for RpcServer {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        f.debug_struct("RpcServer")
72            .field("endpoint", &self.endpoint)
73            .finish()
74    }
75}
76
77// Safety: RpcServer can be sent between threads
78unsafe impl Send for RpcServer {}
79// Safety: RpcServer can be shared between threads
80unsafe impl Sync for RpcServer {}
81
82/// Add a new RPC device
83///
84/// This function registers a new RPC device that can be used for inference.
85///
86/// # Arguments
87/// * `endpoint` - The RPC server endpoint to connect to
88///
89/// # Returns
90/// The device handle if successful
91pub fn add_rpc_device(endpoint: &str) -> Result<NonNull<sys::ggml_backend_device>, RpcError> {
92    let c_endpoint = CString::new(endpoint).map_err(|e| RpcError::StringConversion(e))?;
93
94    let device = unsafe { sys::ggml_backend_rpc_add_device(c_endpoint.as_ptr()) };
95
96    NonNull::new(device).ok_or_else(|| RpcError::InitializationFailed {
97        endpoint: endpoint.to_string(),
98    })
99}