use alloc::collections::BTreeMap;
use alloc::vec::Vec;
use lazy_static::lazy_static;
use libm;
use spin::Mutex;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum SmartAttribute {
ReallocatedSectors,
SpinRetry,
EndToEndErrors,
ReportedUncorrectable,
CommandTimeout,
CurrentPendingSectors,
OfflineUncorrectable,
Temperature,
PowerOnHours,
TotalLbasWritten,
TotalLbasRead,
UdmaCrcErrors,
}
impl SmartAttribute {
pub fn name(&self) -> &'static str {
match self {
SmartAttribute::ReallocatedSectors => "Reallocated Sectors",
SmartAttribute::SpinRetry => "Spin Retry",
SmartAttribute::EndToEndErrors => "End-to-End Errors",
SmartAttribute::ReportedUncorrectable => "Reported Uncorrectable",
SmartAttribute::CommandTimeout => "Command Timeout",
SmartAttribute::CurrentPendingSectors => "Current Pending Sectors",
SmartAttribute::OfflineUncorrectable => "Offline Uncorrectable",
SmartAttribute::Temperature => "Temperature",
SmartAttribute::PowerOnHours => "Power-On Hours",
SmartAttribute::TotalLbasWritten => "Total LBAs Written",
SmartAttribute::TotalLbasRead => "Total LBAs Read",
SmartAttribute::UdmaCrcErrors => "UDMA CRC Errors",
}
}
pub fn critical_threshold(&self) -> u64 {
match self {
SmartAttribute::ReallocatedSectors => 10,
SmartAttribute::SpinRetry => 5,
SmartAttribute::EndToEndErrors => 1,
SmartAttribute::ReportedUncorrectable => 1,
SmartAttribute::CommandTimeout => 100,
SmartAttribute::CurrentPendingSectors => 5,
SmartAttribute::OfflineUncorrectable => 1,
SmartAttribute::Temperature => 60, SmartAttribute::PowerOnHours => 50000,
SmartAttribute::TotalLbasWritten => u64::MAX,
SmartAttribute::TotalLbasRead => u64::MAX,
SmartAttribute::UdmaCrcErrors => 50,
}
}
pub fn ml_weight(&self) -> f32 {
match self {
SmartAttribute::ReallocatedSectors => 1.0, SmartAttribute::CurrentPendingSectors => 0.9,
SmartAttribute::ReportedUncorrectable => 0.85,
SmartAttribute::OfflineUncorrectable => 0.8,
SmartAttribute::EndToEndErrors => 0.75,
SmartAttribute::SpinRetry => 0.7,
SmartAttribute::CommandTimeout => 0.6,
SmartAttribute::UdmaCrcErrors => 0.5,
SmartAttribute::Temperature => 0.3,
SmartAttribute::PowerOnHours => 0.2,
SmartAttribute::TotalLbasWritten => 0.1,
SmartAttribute::TotalLbasRead => 0.1,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum DiskHealth {
Healthy,
Warning,
Critical,
Failed,
}
impl DiskHealth {
pub fn name(&self) -> &'static str {
match self {
DiskHealth::Healthy => "Healthy",
DiskHealth::Warning => "Warning",
DiskHealth::Critical => "Critical",
DiskHealth::Failed => "Failed",
}
}
pub fn color(&self) -> &'static str {
match self {
DiskHealth::Healthy => "Green",
DiskHealth::Warning => "Yellow",
DiskHealth::Critical => "Orange",
DiskHealth::Failed => "Red",
}
}
}
#[derive(Debug, Clone)]
pub struct AttributeValue {
pub current: u64,
pub worst: u64,
pub threshold: u64,
pub history: Vec<u64>,
pub average: f64,
pub variance: f64,
pub sample_count: u64,
}
impl AttributeValue {
pub fn new(value: u64, threshold: u64) -> Self {
Self {
current: value,
worst: value,
threshold,
history: Vec::new(),
average: value as f64,
variance: 0.0,
sample_count: 1,
}
}
pub fn update(&mut self, value: u64) {
self.current = value;
self.worst = self.worst.max(value);
self.history.push(value);
if self.history.len() > 100 {
self.history.remove(0);
}
self.sample_count += 1;
let n = self.sample_count as f64;
let delta = value as f64 - self.average;
self.average += delta / n;
let delta2 = value as f64 - self.average;
self.variance += delta * delta2;
}
pub fn stddev(&self) -> f64 {
if self.sample_count < 2 {
return 0.0;
}
libm::sqrt(self.variance / (self.sample_count - 1) as f64)
}
pub fn trend(&self) -> f32 {
if self.history.len() < 2 {
return 0.0;
}
let n = self.history.len() as f32;
let mut sum_x = 0.0;
let mut sum_y = 0.0;
let mut sum_xy = 0.0;
let mut sum_x2 = 0.0;
for (i, &value) in self.history.iter().enumerate() {
let x = i as f32;
let y = value as f32;
sum_x += x;
sum_y += y;
sum_xy += x * y;
sum_x2 += x * x;
}
let numerator = n * sum_xy - sum_x * sum_y;
let denominator = n * sum_x2 - sum_x * sum_x;
if denominator.abs() < 0.001 {
return 0.0;
}
numerator / denominator
}
pub fn is_anomalous(&self) -> bool {
if self.sample_count < 10 {
return false;
}
let sigma = self.stddev();
let z_score = (self.current as f64 - self.average).abs() / sigma.max(1.0);
z_score > 3.0
}
pub fn exceeds_threshold(&self) -> bool {
self.current > self.threshold
}
}
#[derive(Debug, Clone)]
pub struct SmartData {
pub disk_id: u64,
pub attributes: BTreeMap<SmartAttribute, AttributeValue>,
pub last_update: u64,
pub health: DiskHealth,
pub failure_probability: f32,
}
impl SmartData {
pub fn new(disk_id: u64) -> Self {
Self {
disk_id,
attributes: BTreeMap::new(),
last_update: 0,
health: DiskHealth::Healthy,
failure_probability: 0.0,
}
}
pub fn update_attribute(&mut self, attr: SmartAttribute, value: u64, timestamp: u64) {
self.last_update = timestamp;
let threshold = attr.critical_threshold();
if let Some(attr_value) = self.attributes.get_mut(&attr) {
attr_value.update(value);
} else {
self.attributes
.insert(attr, AttributeValue::new(value, threshold));
}
self.update_health();
}
fn update_health(&mut self) {
let mut score = 0.0f32;
let mut total_weight = 0.0f32;
for (attr, value) in &self.attributes {
let weight = attr.ml_weight();
total_weight += weight;
if value.exceeds_threshold() {
score += weight * 1.0;
}
if value.is_anomalous() {
score += weight * 0.5;
}
let trend = value.trend();
if trend > 0.1 {
score += weight * 0.3;
}
}
if total_weight > 0.0 {
self.failure_probability = score / total_weight;
} else {
self.failure_probability = 0.0;
}
self.health = if self.failure_probability >= 0.8 {
DiskHealth::Failed
} else if self.failure_probability >= 0.5 {
DiskHealth::Critical
} else if self.failure_probability >= 0.2 {
DiskHealth::Warning
} else {
DiskHealth::Healthy
};
}
pub fn critical_attributes(&self) -> Vec<SmartAttribute> {
self.attributes
.iter()
.filter(|(_, v)| v.exceeds_threshold())
.map(|(k, _)| *k)
.collect()
}
pub fn anomalous_attributes(&self) -> Vec<SmartAttribute> {
self.attributes
.iter()
.filter(|(_, v)| v.is_anomalous())
.map(|(k, _)| *k)
.collect()
}
}
#[derive(Debug, Clone, Default)]
pub struct SmartStats {
pub total_disks: u64,
pub healthy_disks: u64,
pub warning_disks: u64,
pub critical_disks: u64,
pub failed_disks: u64,
pub predicted_failures: u64,
}
lazy_static! {
static ref SMART_MONITOR: Mutex<SmartMonitor> = Mutex::new(SmartMonitor::new());
}
pub struct SmartMonitor {
disks: BTreeMap<u64, SmartData>,
stats: SmartStats,
monitor_interval: u64,
last_monitor: u64,
}
impl Default for SmartMonitor {
fn default() -> Self {
Self::new()
}
}
impl SmartMonitor {
pub fn new() -> Self {
Self {
disks: BTreeMap::new(),
stats: SmartStats::default(),
monitor_interval: 60_000, last_monitor: 0,
}
}
pub fn register_disk(&mut self, disk_id: u64) {
let data = SmartData::new(disk_id);
self.disks.insert(disk_id, data);
self.stats.total_disks += 1;
self.stats.healthy_disks += 1;
crate::lcpfs_println!("[ SMART ] Registered disk {} for monitoring", disk_id);
}
pub fn update_attribute(
&mut self,
disk_id: u64,
attr: SmartAttribute,
value: u64,
timestamp: u64,
) -> Result<(), &'static str> {
let data = self.disks.get_mut(&disk_id).ok_or("Disk not found")?;
let old_health = data.health;
data.update_attribute(attr, value, timestamp);
let new_health = data.health;
let failure_prob = data.failure_probability;
if old_health != new_health {
self.update_health_stats(old_health, new_health);
crate::lcpfs_println!(
"[ SMART ] Disk {} health changed: {} -> {} (failure prob: {:.1}%)",
disk_id,
old_health.name(),
new_health.name(),
failure_prob * 100.0
);
if new_health == DiskHealth::Critical || new_health == DiskHealth::Failed {
self.stats.predicted_failures += 1;
}
}
Ok(())
}
fn update_health_stats(&mut self, old: DiskHealth, new: DiskHealth) {
match old {
DiskHealth::Healthy => self.stats.healthy_disks -= 1,
DiskHealth::Warning => self.stats.warning_disks -= 1,
DiskHealth::Critical => self.stats.critical_disks -= 1,
DiskHealth::Failed => self.stats.failed_disks -= 1,
}
match new {
DiskHealth::Healthy => self.stats.healthy_disks += 1,
DiskHealth::Warning => self.stats.warning_disks += 1,
DiskHealth::Critical => self.stats.critical_disks += 1,
DiskHealth::Failed => self.stats.failed_disks += 1,
}
}
pub fn get_health(&self, disk_id: u64) -> Option<DiskHealth> {
self.disks.get(&disk_id).map(|d| d.health)
}
pub fn disks_by_health(&self, health: DiskHealth) -> Vec<u64> {
self.disks
.iter()
.filter(|(_, d)| d.health == health)
.map(|(id, _)| *id)
.collect()
}
pub fn monitor(&mut self, current_time: u64) {
if current_time < self.last_monitor + self.monitor_interval {
return;
}
self.last_monitor = current_time;
for (disk_id, data) in &self.disks {
if current_time > data.last_update + 600_000 {
crate::lcpfs_println!(
"[ SMART ] WARNING: No S.M.A.R.T. update for disk {} in 10 minutes",
disk_id
);
}
}
}
pub fn stats(&self) -> SmartStats {
self.stats.clone()
}
pub fn get_disk_data(&self, disk_id: u64) -> Option<SmartData> {
self.disks.get(&disk_id).cloned()
}
}
pub struct Smart;
impl Smart {
pub fn register_disk(disk_id: u64) {
let mut mon = SMART_MONITOR.lock();
mon.register_disk(disk_id);
}
pub fn update_attribute(
disk_id: u64,
attr: SmartAttribute,
value: u64,
timestamp: u64,
) -> Result<(), &'static str> {
let mut mon = SMART_MONITOR.lock();
mon.update_attribute(disk_id, attr, value, timestamp)
}
pub fn get_health(disk_id: u64) -> Option<DiskHealth> {
let mon = SMART_MONITOR.lock();
mon.get_health(disk_id)
}
pub fn monitor(current_time: u64) {
let mut mon = SMART_MONITOR.lock();
mon.monitor(current_time);
}
pub fn stats() -> SmartStats {
let mon = SMART_MONITOR.lock();
mon.stats()
}
pub fn get_disk_data(disk_id: u64) -> Option<SmartData> {
let mon = SMART_MONITOR.lock();
mon.get_disk_data(disk_id)
}
}
pub fn get_smart_data(disk_id: u64) -> Option<SmartData> {
Smart::get_disk_data(disk_id)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_attribute_thresholds() {
assert_eq!(SmartAttribute::ReallocatedSectors.critical_threshold(), 10);
assert!(SmartAttribute::Temperature.critical_threshold() < 100);
}
#[test]
fn test_attribute_weights() {
let weight1 = SmartAttribute::ReallocatedSectors.ml_weight();
let weight2 = SmartAttribute::Temperature.ml_weight();
assert!(weight1 > weight2); }
#[test]
fn test_attribute_value_update() {
let mut attr = AttributeValue::new(0, 10);
attr.update(5);
assert_eq!(attr.current, 5);
assert_eq!(attr.worst, 5);
attr.update(2);
assert_eq!(attr.current, 2);
assert_eq!(attr.worst, 5); }
#[test]
fn test_welford_statistics() {
let mut attr = AttributeValue::new(10, 100);
for &val in &[12, 11, 13, 9, 10] {
attr.update(val);
}
assert!((attr.average - 11.0).abs() < 1.0);
let stddev = attr.stddev();
assert!(stddev > 0.0 && stddev < 3.0);
}
#[test]
fn test_trend_detection() {
let mut attr = AttributeValue::new(0, 100);
for i in 1..=10 {
attr.update(i * 5);
}
let trend = attr.trend();
assert!(trend > 0.0); }
#[test]
fn test_anomaly_detection() {
let mut attr = AttributeValue::new(10, 100);
for _ in 0..20 {
attr.update(10);
}
assert!(!attr.is_anomalous());
attr.update(100);
assert!(attr.is_anomalous());
}
#[test]
fn test_threshold_checking() {
let attr = AttributeValue::new(15, 10);
assert!(attr.exceeds_threshold());
let attr2 = AttributeValue::new(5, 10);
assert!(!attr2.exceeds_threshold());
}
#[test]
fn test_smart_data_creation() {
let data = SmartData::new(1);
assert_eq!(data.disk_id, 1);
assert_eq!(data.health, DiskHealth::Healthy);
assert_eq!(data.failure_probability, 0.0);
}
#[test]
fn test_health_status_update() {
let mut data = SmartData::new(1);
data.update_attribute(SmartAttribute::ReallocatedSectors, 20, 1000);
assert_ne!(data.health, DiskHealth::Healthy);
assert!(data.failure_probability > 0.0);
}
#[test]
fn test_critical_attributes() {
let mut data = SmartData::new(1);
data.update_attribute(SmartAttribute::ReallocatedSectors, 15, 1000);
data.update_attribute(SmartAttribute::Temperature, 45, 1000);
let critical = data.critical_attributes();
assert_eq!(critical.len(), 1);
assert_eq!(critical[0], SmartAttribute::ReallocatedSectors);
}
#[test]
fn test_monitor_registration() {
let mut mon = SmartMonitor::new();
mon.register_disk(1);
mon.register_disk(2);
assert_eq!(mon.stats.total_disks, 2);
assert_eq!(mon.stats.healthy_disks, 2);
}
#[test]
fn test_health_state_transitions() {
let mut mon = SmartMonitor::new();
mon.register_disk(1);
mon.update_attribute(1, SmartAttribute::ReallocatedSectors, 8, 1000)
.expect("test: operation should succeed");
let stats = mon.stats();
assert!(stats.healthy_disks + stats.warning_disks == 1);
}
#[test]
fn test_disks_by_health() {
let mut mon = SmartMonitor::new();
mon.register_disk(1);
mon.register_disk(2);
let healthy = mon.disks_by_health(DiskHealth::Healthy);
assert_eq!(healthy.len(), 2);
}
#[test]
fn test_failure_prediction() {
let mut mon = SmartMonitor::new();
mon.register_disk(1);
mon.update_attribute(1, SmartAttribute::ReallocatedSectors, 15, 1000)
.expect("test: operation should succeed");
mon.update_attribute(1, SmartAttribute::CurrentPendingSectors, 10, 2000)
.expect("test: operation should succeed");
let data = mon
.get_disk_data(1)
.expect("test: operation should succeed");
assert!(data.failure_probability > 0.3);
if data.health == DiskHealth::Critical || data.health == DiskHealth::Failed {
assert!(mon.stats.predicted_failures > 0);
}
}
#[test]
fn test_monitoring_interval() {
let mut mon = SmartMonitor::new();
mon.register_disk(1);
mon.update_attribute(1, SmartAttribute::Temperature, 40, 1000)
.expect("test: operation should succeed");
mon.monitor(1000);
mon.monitor(2000);
mon.monitor(70_000);
}
}