alith_devices/devices/
mod.rs

1use cpu::CpuConfig;
2#[cfg(any(target_os = "linux", target_os = "windows"))]
3pub use cuda::CudaConfig;
4use gpu::GpuLayerAllocator;
5#[cfg(target_os = "macos")]
6pub use metal::MetalConfig;
7use ram::RamConfig;
8pub mod cpu;
9#[cfg(any(target_os = "linux", target_os = "windows"))]
10pub mod cuda;
11pub mod gpu;
12#[cfg(target_os = "macos")]
13pub mod metal;
14pub mod ram;
15
16/// Configuration for device-specific settings in LLM inference.
17#[derive(Debug, Clone)]
18pub struct DeviceConfig {
19    /// CPU configuration for thread count.
20    pub cpu_config: CpuConfig,
21
22    /// RAM configuration for non-GPU inference on Windows and Unix.
23    ///
24    /// This setting is used when GPU acceleration is not available or not enabled.
25    pub ram_config: RamConfig,
26
27    /// Indicates whether to use any available GPUs for inference.
28    ///
29    /// If true, the system will attempt to use GPU acceleration.
30    /// If false, inference will be performed on CPU only.
31    pub use_gpu: bool,
32
33    /// CUDA configuration for GPU inference on non-macOS platforms.
34    ///
35    /// This field is only available on platforms other than macOS.
36    /// If None, default CUDA settings will be used when GPU is enabled.
37    #[cfg(any(target_os = "linux", target_os = "windows"))]
38    pub cuda_config: Option<CudaConfig>,
39
40    /// Metal configuration for GPU inference on macOS.
41    ///
42    /// This field is only available on macOS.
43    /// If None, default Metal settings will be used when GPU is enabled.
44    #[cfg(target_os = "macos")]
45    pub metal_config: Option<MetalConfig>,
46
47    /// Determines error handling behavior for configuration issues.
48    ///
49    /// If true, the system will return an error when encountering configuration issues.
50    /// If false (default), issues will be logged and execution will continue if possible.
51    ///
52    /// This flag is useful for debugging purposes.
53    pub error_on_config_issue: bool,
54
55    /// The number of layers in the model.
56    ///
57    /// This is set at runtime.
58    pub layer_count: Option<u64>,
59
60    /// The average size of a layer in bytes.
61    ///
62    /// This is set at runtime.
63    pub average_layer_size_bytes: Option<u64>,
64
65    /// The file system path to the local model.
66    ///
67    /// This is set at runtime.
68    pub local_model_path: String,
69}
70
71impl Default for DeviceConfig {
72    fn default() -> Self {
73        Self {
74            cpu_config: CpuConfig::default(),
75            ram_config: RamConfig::default(),
76            use_gpu: true,
77            #[cfg(any(target_os = "linux", target_os = "windows"))]
78            cuda_config: None,
79            #[cfg(target_os = "macos")]
80            metal_config: None,
81            error_on_config_issue: false,
82            layer_count: None,
83            average_layer_size_bytes: None,
84            local_model_path: Default::default(),
85        }
86    }
87}
88
89impl DeviceConfig {
90    pub fn initialize(&mut self) -> crate::Result<()> {
91        self.cpu_config.initialize(self.error_on_config_issue)?;
92        #[cfg(any(target_os = "linux", target_os = "windows"))]
93        {
94            self.initialize_unix_windows()?;
95        }
96
97        #[cfg(target_os = "macos")]
98        {
99            self.initialize_mac()?;
100        }
101
102        #[cfg(not(any(unix, windows, target_os = "macos")))]
103        {
104            crate::bail!("Unsupported OS");
105        }
106        crate::info!("{}", self);
107        Ok(())
108    }
109
110    #[cfg(any(target_os = "linux", target_os = "windows"))]
111    fn initialize_unix_windows(&mut self) -> crate::Result<()> {
112        if self.use_gpu {
113            if self.cuda_config.is_none() {
114                let cuda_config = CudaConfig::default();
115                self.cuda_config = Some(cuda_config);
116            }
117            if let Some(cuda_config) = &mut self.cuda_config {
118                match cuda_config.initialize(self.error_on_config_issue) {
119                    Ok(_) => (),
120                    Err(e) => {
121                        if self.error_on_config_issue {
122                            crate::warn!("{}", cuda_config);
123                            crate::bail!("Failed to initialize CUDA devices: {}", e);
124                        } else {
125                            crate::warn!("{}", cuda_config);
126                            crate::warn!("Failed to initialize CUDA devices: {}", e);
127                            crate::warn!("Falling back to CPU");
128                            self.use_gpu = false;
129                        }
130                    }
131                }
132            }
133        }
134        if !self.use_gpu {
135            self.cuda_config = None;
136            self.ram_config.initialize(self.error_on_config_issue)?;
137        }
138        Ok(())
139    }
140
141    #[cfg(target_os = "macos")]
142    fn initialize_mac(&mut self) -> crate::Result<()> {
143        if self.use_gpu {
144            if self.metal_config.is_none() {
145                let metal_config = MetalConfig::default();
146                self.metal_config = Some(metal_config);
147            }
148            if let Some(metal_config) = &mut self.metal_config {
149                match metal_config.initialize(self.error_on_config_issue) {
150                    Ok(_) => {
151                        crate::info!("Successfully initialized: {}", metal_config);
152                    }
153                    Err(e) => {
154                        if self.error_on_config_issue {
155                            crate::warn!("{}", metal_config);
156                            crate::bail!("Failed to initialize Metal: {}", e);
157                        } else {
158                            crate::warn!("{}", metal_config);
159                            crate::warn!("Failed to initialize Metal: {}", e);
160                            crate::warn!("Falling back to CPU");
161                            self.use_gpu = false;
162                        }
163                    }
164                }
165            }
166        }
167        if !self.use_gpu {
168            self.metal_config = None;
169            self.ram_config.initialize(self.error_on_config_issue)?;
170        }
171        Ok(())
172    }
173
174    pub fn available_memory_bytes(&self) -> crate::Result<u64> {
175        #[cfg(any(target_os = "linux", target_os = "windows"))]
176        if let Some(cuda_config) = &self.cuda_config {
177            Ok(cuda_config.total_vram_bytes)
178        } else {
179            Ok(self.ram_config.use_ram_bytes)
180        }
181        #[cfg(target_os = "macos")]
182        if let Some(metal_config) = &self.metal_config {
183            Ok(metal_config.use_ram_bytes)
184        } else {
185            Ok(self.ram_config.use_ram_bytes)
186        }
187
188        #[cfg(not(any(unix, windows, target_os = "macos")))]
189        {
190            crate::bail!("Unsupported OS");
191        }
192    }
193
194    pub fn average_layer_size_bytes(&self) -> crate::Result<u64> {
195        match self.average_layer_size_bytes {
196            Some(size) => Ok(size),
197            None => crate::bail!("Average layer size not set"),
198        }
199    }
200
201    pub fn layer_count(&self) -> crate::Result<u64> {
202        match self.layer_count {
203            Some(count) => Ok(count),
204            None => crate::bail!("Layer count not set"),
205        }
206    }
207
208    pub fn main_gpu(&self) -> crate::Result<u32> {
209        #[cfg(any(target_os = "linux", target_os = "windows"))]
210        if let Some(cuda_config) = &self.cuda_config {
211            cuda_config.main_gpu(self.error_on_config_issue)
212        } else {
213            crate::bail!("No GPUs available")
214        }
215        #[cfg(target_os = "macos")]
216        if self.metal_config.is_some() {
217            Ok(1)
218        } else {
219            Ok(0)
220        }
221        #[cfg(not(any(unix, windows, target_os = "macos")))]
222        {
223            crate::bail!("Unsupported OS");
224        }
225    }
226
227    pub fn gpu_count(&self) -> usize {
228        #[cfg(any(target_os = "linux", target_os = "windows"))]
229        if let Some(cuda_config) = &self.cuda_config {
230            cuda_config.device_count()
231        } else {
232            0
233        }
234        #[cfg(target_os = "macos")]
235        if self.metal_config.is_some() {
236            1
237        } else {
238            0
239        }
240        #[cfg(not(any(unix, windows, target_os = "macos")))]
241        {
242            crate::bail!("Unsupported OS");
243        }
244    }
245
246    pub fn allocate_layers_to_gpus(
247        &self,
248        buffer_layer_per_gpu: u64,
249        buffer_layer_main_gpu: u64,
250    ) -> crate::Result<Vec<gpu::GpuDevice>> {
251        #[cfg(any(target_os = "linux", target_os = "windows"))]
252        let mut gpu_devices: Vec<gpu::GpuDevice> = if let Some(cuda_config) = &self.cuda_config {
253            cuda_config.to_generic_gpu_devices(self.error_on_config_issue)?
254        } else {
255            crate::bail!("No GPUs available")
256        };
257        #[cfg(target_os = "macos")]
258        let mut gpu_devices: Vec<gpu::GpuDevice> = if let Some(metal_config) = &self.metal_config {
259            vec![metal_config.to_generic_gpu_device()]
260        } else {
261            crate::bail!("No GPUs available")
262        };
263        #[cfg(not(any(unix, windows, target_os = "macos")))]
264        {
265            crate::bail!("Unsupported OS");
266        }
267        let allocator = GpuLayerAllocator::new(
268            self.average_layer_size_bytes()?,
269            self.layer_count()?,
270            buffer_layer_per_gpu,
271            buffer_layer_main_gpu,
272        );
273        allocator.allocate(&mut gpu_devices)?;
274        Ok(gpu_devices)
275    }
276}
277
278impl std::fmt::Display for DeviceConfig {
279    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
280        writeln!(f)?;
281        write!(f, "DeviceConfig:")?;
282        crate::i_ln(f, format_args!("{}", self.cpu_config))?;
283        crate::i_ln(f, format_args!("{}", self.ram_config))?;
284        crate::i_ln(f, format_args!("use_gpu: {}", self.use_gpu))?;
285
286        #[cfg(any(target_os = "linux", target_os = "windows"))]
287        if let Some(cuda_config) = &self.cuda_config {
288            crate::i_ln(f, format_args!("{}", cuda_config))?;
289        }
290        #[cfg(target_os = "macos")]
291        if let Some(metal_config) = &self.metal_config {
292            crate::i_ln(f, format_args!("{}", metal_config))?;
293        }
294        crate::i_ln(
295            f,
296            format_args!("error_on_config_issue: {}", self.error_on_config_issue),
297        )?;
298        if let Some(layer_count) = self.layer_count {
299            crate::i_ln(f, format_args!("layer_count: {}", layer_count))?;
300        }
301        if let Some(average_layer_size_bytes) = self.average_layer_size_bytes {
302            crate::i_ln(
303                f,
304                format_args!("average_layer_size_bytes: {}", average_layer_size_bytes),
305            )?;
306        }
307
308        Ok(())
309    }
310}