Skip to main content

all_smi/
error.rs

1// Copyright 2025 Lablup Inc. and Jeongkyu Shin
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Unified error types for the all-smi library.
16//!
17//! This module provides a comprehensive error hierarchy for library users,
18//! covering platform initialization, device access, and I/O operations.
19//!
20//! # Example
21//!
22//! ```rust,no_run
23//! use all_smi::{AllSmi, Error, Result};
24//!
25//! fn main() -> Result<()> {
26//!     let smi = AllSmi::new()?;
27//!     let gpus = smi.get_gpu_info();
28//!     println!("Found {} GPU(s)", gpus.len());
29//!     Ok(())
30//! }
31//! ```
32
33use thiserror::Error;
34
35/// The main error type for all-smi library operations.
36///
37/// This enum covers all possible error conditions that can occur when
38/// using the all-smi library, from initialization failures to device
39/// access issues.
40#[derive(Debug, Error)]
41pub enum Error {
42    /// Platform initialization failed.
43    ///
44    /// This error occurs when the underlying platform-specific libraries
45    /// or APIs cannot be initialized (e.g., NVML, IOReport, hl-smi).
46    #[error("Platform initialization failed: {0}")]
47    PlatformInit(String),
48
49    /// No supported devices were found on the system.
50    ///
51    /// This is not necessarily an error condition - it simply indicates
52    /// that no GPUs, NPUs, or other accelerators were detected.
53    #[error("No supported devices found")]
54    NoDevicesFound,
55
56    /// Device access error occurred.
57    ///
58    /// This error occurs when a device was detected but could not be
59    /// accessed or queried for metrics.
60    #[error("Device access error: {0}")]
61    DeviceAccess(String),
62
63    /// Permission denied when accessing device resources.
64    ///
65    /// Some platforms require elevated privileges to access certain
66    /// metrics (e.g., AMD GPUs on Linux require sudo access).
67    #[error("Permission denied: {0}")]
68    PermissionDenied(String),
69
70    /// Feature not supported on this platform.
71    ///
72    /// This error is returned when attempting to use functionality
73    /// that is not available on the current platform or hardware.
74    #[error("Feature not supported on this platform: {0}")]
75    NotSupported(String),
76
77    /// An I/O error occurred.
78    ///
79    /// This wraps standard I/O errors that may occur during file
80    /// system operations or process execution.
81    #[error(transparent)]
82    Io(#[from] std::io::Error),
83}
84
85/// A specialized Result type for all-smi operations.
86///
87/// This type alias simplifies error handling by using the library's
88/// unified [`enum@Error`] type.
89///
90/// # Example
91///
92/// ```rust,no_run
93/// use all_smi::{AllSmi, Result};
94///
95/// fn get_gpu_count() -> Result<usize> {
96///     let smi = AllSmi::new()?;
97///     Ok(smi.get_gpu_info().len())
98/// }
99/// ```
100pub type Result<T> = std::result::Result<T, Error>;
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105
106    #[test]
107    fn test_error_display() {
108        let err = Error::PlatformInit("NVML not found".to_string());
109        assert_eq!(
110            err.to_string(),
111            "Platform initialization failed: NVML not found"
112        );
113
114        let err = Error::NoDevicesFound;
115        assert_eq!(err.to_string(), "No supported devices found");
116
117        let err = Error::DeviceAccess("GPU 0 not responding".to_string());
118        assert_eq!(err.to_string(), "Device access error: GPU 0 not responding");
119
120        let err = Error::PermissionDenied("Cannot access /dev/dri".to_string());
121        assert_eq!(err.to_string(), "Permission denied: Cannot access /dev/dri");
122
123        let err = Error::NotSupported("ANE metrics".to_string());
124        assert_eq!(
125            err.to_string(),
126            "Feature not supported on this platform: ANE metrics"
127        );
128    }
129
130    #[test]
131    fn test_error_from_io() {
132        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found");
133        let err: Error = io_err.into();
134        assert!(matches!(err, Error::Io(_)));
135    }
136
137    #[test]
138    fn test_error_is_send_sync() {
139        fn assert_send_sync<T: Send + Sync>() {}
140        assert_send_sync::<Error>();
141    }
142}