llama_cpp_4/rpc/server.rs
1//! RPC server for hosting backends
2
3use crate::rpc::error::RpcError;
4use llama_cpp_sys_4 as sys;
5use std::ffi::CString;
6use std::ptr::NonNull;
7
8/// RPC server for hosting a backend that can be accessed remotely
9pub struct RpcServer {
10 backend: NonNull<sys::ggml_backend>,
11 endpoint: String,
12}
13
14impl RpcServer {
15 /// Start an RPC server for the given backend
16 ///
17 /// # Arguments
18 /// * `backend` - The backend to expose via RPC
19 /// * `endpoint` - The endpoint to listen on (e.g., "0.0.0.0:50052")
20 /// * `free_mem` - Amount of free memory to advertise (0 for auto)
21 /// * `total_mem` - Total memory to advertise (0 for auto)
22 ///
23 /// # Example
24 /// ```no_run
25 /// use llama_cpp_4::rpc::RpcServer;
26 ///
27 /// // Assuming you have a backend initialized
28 /// let server = RpcServer::start(
29 /// backend,
30 /// "0.0.0.0:50052",
31 /// 0,
32 /// 0,
33 /// )?;
34 /// ```
35 pub fn start(
36 backend: NonNull<sys::ggml_backend>,
37 endpoint: &str,
38 free_mem: usize,
39 total_mem: usize,
40 ) -> Result<Self, RpcError> {
41 let c_endpoint = CString::new(endpoint).map_err(|e| RpcError::StringConversion(e))?;
42
43 unsafe {
44 sys::ggml_backend_rpc_start_server(
45 backend.as_ptr(),
46 c_endpoint.as_ptr(),
47 free_mem,
48 total_mem,
49 );
50 }
51
52 Ok(Self {
53 backend,
54 endpoint: endpoint.to_string(),
55 })
56 }
57
58 /// Get the endpoint this server is listening on
59 pub fn endpoint(&self) -> &str {
60 &self.endpoint
61 }
62
63 /// Get the backend this server is hosting
64 pub fn backend(&self) -> NonNull<sys::ggml_backend> {
65 self.backend
66 }
67}
68
69impl std::fmt::Debug for RpcServer {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 f.debug_struct("RpcServer")
72 .field("endpoint", &self.endpoint)
73 .finish()
74 }
75}
76
77// Safety: RpcServer can be sent between threads
78unsafe impl Send for RpcServer {}
79// Safety: RpcServer can be shared between threads
80unsafe impl Sync for RpcServer {}
81
82/// Add a new RPC device
83///
84/// This function registers a new RPC device that can be used for inference.
85///
86/// # Arguments
87/// * `endpoint` - The RPC server endpoint to connect to
88///
89/// # Returns
90/// The device handle if successful
91pub fn add_rpc_device(endpoint: &str) -> Result<NonNull<sys::ggml_backend_device>, RpcError> {
92 let c_endpoint = CString::new(endpoint).map_err(|e| RpcError::StringConversion(e))?;
93
94 let device = unsafe { sys::ggml_backend_rpc_add_device(c_endpoint.as_ptr()) };
95
96 NonNull::new(device).ok_or_else(|| RpcError::InitializationFailed {
97 endpoint: endpoint.to_string(),
98 })
99}