lcpfs 2026.1.102

LCP File System - A ZFS-inspired copy-on-write filesystem for Rust
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
// Copyright 2025 LunaOS Contributors
// SPDX-License-Identifier: Apache-2.0
//
// VDEV Evacuation Engine
// Drive migration with safe data relocation.

// ALL thresholds are learned from observation - NO hardcoded values.
// ============================================================================

use crate::fscore::impl_::LcpfsController;
use crate::fscore::structs::Dva;
use crate::hw::smart::{SmartAttribute, get_smart_data};
use alloc::collections::VecDeque;
use alloc::string::String;
use alloc::vec::Vec;
use lazy_static::lazy_static;
use libm::{fabs, sqrt};
use spin::Mutex;

// ═══════════════════════════════════════════════════════════════════════════════
// LEARNED THRESHOLDS (Welford's algorithm - no hardcoded values)
// ═══════════════════════════════════════════════════════════════════════════════

/// Adaptive threshold that learns from observations using Welford's algorithm
#[derive(Clone, Copy)]
pub struct LearnedThreshold {
    /// Current threshold value
    pub value: f64,
    /// Uncertainty in the threshold estimate
    pub uncertainty: f64,
    /// Number of observations made
    pub observations: u64,
    /// Current learning rate (decreases with more observations)
    pub learning_rate: f64,
    /// Mean outcome from past actions
    pub mean_outcome: f64,
    /// Variance of outcomes
    pub variance: f64,
}

impl LearnedThreshold {
    /// Creates a new uninformed threshold with an initial guess
    pub const fn uninformed(initial_guess: f64) -> Self {
        Self {
            value: initial_guess,
            uncertainty: f64::MAX,
            observations: 0,
            learning_rate: 1.0,
            mean_outcome: 0.0,
            variance: f64::MAX,
        }
    }

    /// Records an observation and updates the threshold using Welford's algorithm
    pub fn observe(&mut self, action_value: f64, outcome_delta_epsilon: f64) {
        self.observations += 1;
        let n = self.observations as f64;

        let delta = outcome_delta_epsilon - self.mean_outcome;
        self.mean_outcome += delta / n;
        let delta2 = outcome_delta_epsilon - self.mean_outcome;

        if self.observations > 1 {
            let m2 = self.variance * (n - 2.0) + delta * delta2;
            self.variance = m2 / (n - 1.0);
            self.uncertainty = sqrt(self.variance / n);
        }

        let adjustment = if outcome_delta_epsilon < 0.0 {
            (action_value - self.value) * self.learning_rate
        } else {
            (self.value - action_value) * self.learning_rate * 0.5
        };

        self.value += adjustment;
        self.learning_rate = 1.0 / (1.0 + sqrt(self.observations as f64) * 0.1);
    }

    /// Returns confidence in the threshold (0.0 to 1.0) based on observations and uncertainty
    pub fn confidence(&self) -> f64 {
        if self.observations == 0 {
            return 0.0;
        }
        let obs_factor = 1.0 - 1.0 / (1.0 + self.observations as f64 * 0.01);
        let unc_factor = 1.0 / (1.0 + fabs(self.uncertainty));
        obs_factor * unc_factor
    }

    /// Determines if action should be taken based on current value and estimated benefit
    pub fn should_act(&self, current_value: f64, estimated_benefit: f64) -> bool {
        let benefit_over_uncertainty = estimated_benefit / (self.uncertainty + 1e-10);
        current_value >= self.value && benefit_over_uncertainty > 1.0
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// VDEV HEALTH OBSERVATION
// ═══════════════════════════════════════════════════════════════════════════════

/// Health metrics for a single VDEV
#[derive(Clone, Copy, Default)]
pub struct VdevHealthObservation {
    /// Virtual device identifier
    pub vdev_id: usize,
    /// Timestamp of observation in milliseconds
    pub timestamp_ms: u64,
    /// Average I/O latency (microseconds)
    pub latency_avg_us: f64,
    /// P99 I/O latency (microseconds)
    pub latency_p99_us: f64,
    /// Read errors in last observation window
    pub read_errors: u64,
    /// Write errors in last observation window
    pub write_errors: u64,
    /// Reallocated sector count (SMART)
    pub reallocated_sectors: u64,
    /// Pending sector count (SMART)
    pub pending_sectors: u64,
    /// Current temperature (Celsius)
    pub temperature_c: f64,
    /// Power-on hours
    pub power_on_hours: u64,
    /// Uncorrectable error count
    pub uncorrectable_errors: u64,
    /// I/O operations in last window
    pub io_ops: u64,
}

impl VdevHealthObservation {
    /// Calculate a failure probability score (0.0 to 1.0)
    /// Higher = more likely to fail soon
    pub fn failure_risk(&self, learned: &EvacuationEngine) -> f64 {
        let mut risk = 0.0;

        // Latency risk (learned threshold)
        if self.latency_p99_us > learned.threshold_latency.value {
            risk += 0.3 * (self.latency_p99_us / learned.threshold_latency.value).min(2.0);
        }

        // Error rate risk (learned threshold)
        let error_rate = if self.io_ops > 0 {
            (self.read_errors + self.write_errors) as f64 / self.io_ops as f64
        } else {
            0.0
        };
        if error_rate > learned.threshold_error_rate.value {
            risk += 0.4 * (error_rate / learned.threshold_error_rate.value).min(3.0);
        }

        // SMART indicators (learned thresholds)
        if self.reallocated_sectors as f64 > learned.threshold_reallocated.value {
            risk += 0.2;
        }
        if self.pending_sectors as f64 > learned.threshold_pending.value {
            risk += 0.2;
        }

        // Temperature risk (learned threshold)
        if self.temperature_c > learned.threshold_temperature.value {
            risk +=
                0.1 * ((self.temperature_c - learned.threshold_temperature.value) / 10.0).min(1.0);
        }

        risk.min(1.0)
    }
}

/// Outcome of an evacuation for learning
#[derive(Clone, Copy)]
pub struct EvacuationOutcome {
    /// Virtual device identifier
    pub vdev_id: usize,
    /// Risk level when evacuation started
    pub started_at_risk: f64,
    /// Number of blocks successfully evacuated
    pub blocks_evacuated: u64,
    /// Time taken for evacuation in milliseconds
    pub time_taken_ms: u64,
    /// Whether the drive failed during evacuation
    pub drive_failed_during: bool,
    /// System epsilon before evacuation
    pub epsilon_before: f64,
    /// System epsilon after evacuation
    pub epsilon_after: f64,
}

impl EvacuationOutcome {
    /// Calculates the change in epsilon from the evacuation
    pub fn delta_epsilon(&self) -> f64 {
        self.epsilon_after - self.epsilon_before
    }

    /// Determines if the evacuation was successful (no failure and epsilon decreased)
    pub fn was_successful(&self) -> bool {
        !self.drive_failed_during && self.delta_epsilon() <= 0.0
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// EVACUATION ENGINE
// ═══════════════════════════════════════════════════════════════════════════════

lazy_static! {
    /// Global evacuation engine singleton for managing data evacuation from failing drives.
    /// Provides predictive failure detection and PI-controlled data migration across VDEVs.
    pub static ref EVAC_ENGINE: Mutex<EvacuationEngine> = Mutex::new(EvacuationEngine::new());
}

/// Engine for evacuating data from failing drives using learned thresholds
pub struct EvacuationEngine {
    /// Whether an evacuation is currently running
    pub is_running: bool,
    /// VDEV currently being evacuated (if any)
    pub evacuating_vdev: Option<usize>,
    /// Number of blocks migrated so far
    pub blocks_migrated: u64,
    /// Target VDEV for migration (if any)
    pub target_vdev: Option<usize>,

    // Observation history
    observations: VecDeque<VdevHealthObservation>,
    outcomes: VecDeque<EvacuationOutcome>,

    // ═══════════════════════════════════════════════════════════════════════════
    // LEARNED THRESHOLDS (no hardcoded values)
    // ═══════════════════════════════════════════════════════════════════════════
    /// Learned: Latency threshold (microseconds)
    threshold_latency: LearnedThreshold,

    /// Learned: Error rate threshold (errors per op)
    threshold_error_rate: LearnedThreshold,

    /// Learned: Reallocated sector threshold
    threshold_reallocated: LearnedThreshold,

    /// Learned: Pending sector threshold
    threshold_pending: LearnedThreshold,

    /// Learned: Temperature threshold (Celsius)
    threshold_temperature: LearnedThreshold,

    /// Learned: Risk level at which to trigger evacuation
    threshold_evac_risk: LearnedThreshold,

    /// Learned: Migration batch size
    batch_size: LearnedThreshold,

    /// Learned: Pause between batches (microseconds)
    batch_pause_us: LearnedThreshold,

    /// Current system epsilon
    current_epsilon: f64,

    /// Last computed risk score (for outcome tracking)
    last_risk_score: f64,
}

impl Default for EvacuationEngine {
    fn default() -> Self {
        Self::new()
    }
}

impl EvacuationEngine {
    /// Creates a new EvacuationEngine with uninformed priors
    pub fn new() -> Self {
        Self {
            is_running: false,
            evacuating_vdev: None,
            blocks_migrated: 0,
            target_vdev: None,
            observations: VecDeque::with_capacity(1000),
            outcomes: VecDeque::with_capacity(100),

            // Initialize with uninformed priors (will learn from observation)
            threshold_latency: LearnedThreshold::uninformed(50_000.0), // 50ms P99
            threshold_error_rate: LearnedThreshold::uninformed(0.001), // 0.1% error rate
            threshold_reallocated: LearnedThreshold::uninformed(100.0), // 100 reallocated
            threshold_pending: LearnedThreshold::uninformed(10.0),     // 10 pending
            threshold_temperature: LearnedThreshold::uninformed(55.0), // 55°C
            threshold_evac_risk: LearnedThreshold::uninformed(0.5),    // 50% risk triggers evac
            batch_size: LearnedThreshold::uninformed(1000.0),          // 1000 blocks per batch
            batch_pause_us: LearnedThreshold::uninformed(1000.0),      // 1ms between batches

            current_epsilon: 0.0,
            last_risk_score: 0.0,
        }
    }

    /// Update current system epsilon
    pub fn update_epsilon(&mut self, epsilon: f64) {
        self.current_epsilon = epsilon;
    }

    /// Submit a health observation for a VDEV
    pub fn observe(&mut self, obs: VdevHealthObservation) {
        self.observations.push_back(obs);

        // Keep history bounded
        while self.observations.len() > 1000 {
            self.observations.pop_front();
        }
    }

    /// PI decides whether to evacuate a VDEV
    pub fn should_evacuate(&mut self, vdev_id: usize) -> bool {
        if self.is_running {
            return false;
        }

        // Get latest observation for this VDEV
        let latest = self
            .observations
            .iter()
            .rev()
            .find(|o| o.vdev_id == vdev_id);

        let obs = match latest {
            Some(o) => o,
            None => return false,
        };

        let risk = obs.failure_risk(self);

        // Store risk score for outcome tracking
        self.last_risk_score = risk;

        // Estimate benefit of evacuating now
        let evac_benefit = self.estimate_evac_benefit(vdev_id, risk);

        // Use learned threshold with uncertainty-aware decision
        self.threshold_evac_risk.should_act(risk, evac_benefit)
            && self.threshold_evac_risk.confidence() > 0.1
    }

    /// Estimate epsilon reduction from evacuating a VDEV
    fn estimate_evac_benefit(&self, vdev_id: usize, current_risk: f64) -> f64 {
        // Look at past evacuations at similar risk levels
        let similar_outcomes: Vec<_> = self
            .outcomes
            .iter()
            .filter(|o| fabs(o.started_at_risk - current_risk) < 0.2)
            .collect();

        if similar_outcomes.is_empty() {
            // No prior data - assume benefit proportional to risk
            // If we evacuate successfully, we avoid potential drive failure epsilon cost
            let potential_loss = current_risk * 1_000_000.0; // Assume 1M epsilon if drive dies
            return potential_loss * 0.8; // 80% chance we avoid it
        }

        // Average epsilon improvement from similar evacuations
        let successful: Vec<_> = similar_outcomes
            .iter()
            .filter(|o| o.was_successful())
            .collect();

        if successful.is_empty() {
            return 0.0;
        }

        let avg_improvement: f64 =
            successful.iter().map(|o| -o.delta_epsilon()).sum::<f64>() / successful.len() as f64;

        avg_improvement.max(0.0)
    }

    /// Find the best spare/healthy VDEV to migrate to
    pub fn find_target_vdev(&self) -> Option<usize> {
        // Find VDEV with lowest failure risk that has enough space
        let mut best_target: Option<(usize, f64)> = None;

        for obs in self.observations.iter().rev() {
            // Skip if we're evacuating from this VDEV
            if Some(obs.vdev_id) == self.evacuating_vdev {
                continue;
            }

            let risk = obs.failure_risk(self);

            match best_target {
                None => best_target = Some((obs.vdev_id, risk)),
                Some((_, best_risk)) if risk < best_risk => {
                    best_target = Some((obs.vdev_id, risk));
                }
                _ => {}
            }
        }

        best_target.map(|(id, _)| id)
    }

    /// Start evacuating a VDEV
    pub fn start_evacuation(&mut self, dying_vdev: usize) -> Result<(), &'static str> {
        if self.is_running {
            return Err("Evacuation already in progress");
        }

        let target = self
            .find_target_vdev()
            .ok_or("No suitable target VDEV found")?;

        self.is_running = true;
        self.evacuating_vdev = Some(dying_vdev);
        self.target_vdev = Some(target);
        self.blocks_migrated = 0;

        crate::lcpfs_println!(
            "[ EVAC ] INITIATING PI-CONTROLLED EVACUATION: VDEV {} -> VDEV {}",
            dying_vdev,
            target
        );
        crate::lcpfs_println!(
            "[ EVAC ] Parameters: batch={}, pause={}μs (learned)",
            self.batch_size.value as u64,
            self.batch_pause_us.value as u64
        );

        // Spawn evacuation task
        crate::spawn_on_core(Self::evac_task, Some(2));

        Ok(())
    }

    fn evac_task() {
        let mut engine = EVAC_ENGINE.lock();

        let dying_vdev = match engine.evacuating_vdev {
            Some(v) => v,
            None => {
                engine.is_running = false;
                return;
            }
        };

        let target_vdev = match engine.target_vdev {
            Some(v) => v,
            None => {
                engine.is_running = false;
                return;
            }
        };

        let batch_size = engine.batch_size.value.max(100.0) as u64;
        let epsilon_before = engine.current_epsilon;
        let start_time = crate::get_time();

        // Real block migration using BLOCK_DEVICES
        use crate::BLOCK_DEVICES;
        use alloc::vec;

        // Get total blocks from device
        let total_blocks = {
            let devices = BLOCK_DEVICES.lock();
            if let Some(dev) = devices.get(dying_vdev) {
                dev.block_count()
            } else {
                crate::lcpfs_println!("[ EVAC ] Error: Source VDEV {} not found", dying_vdev);
                engine.is_running = false;
                return;
            }
        };

        crate::lcpfs_println!(
            "[ EVAC ] Migrating {} blocks from VDEV {} to VDEV {}",
            total_blocks,
            dying_vdev,
            target_vdev
        );

        let mut block_id = 0u64;
        let mut failed_blocks = 0u64;

        while block_id < total_blocks as u64 {
            let batch_end = (block_id + batch_size).min(total_blocks as u64);

            // Process batch
            for bid in block_id..batch_end {
                // Read from source VDEV
                let mut buffer = vec![0u8; 512];
                let read_success = {
                    let mut devices = BLOCK_DEVICES.lock();
                    if let Some(src) = devices.get_mut(dying_vdev) {
                        src.read_block(bid as usize, &mut buffer).is_ok()
                    } else {
                        false
                    }
                };

                if !read_success {
                    failed_blocks += 1;
                    continue; // Skip unreadable blocks
                }

                // Write to target VDEV
                let write_success = {
                    let mut devices = BLOCK_DEVICES.lock();
                    if let Some(dst) = devices.get_mut(target_vdev) {
                        dst.write_block(bid as usize, &buffer).is_ok()
                    } else {
                        false
                    }
                };

                if write_success {
                    engine.blocks_migrated += 1;
                } else {
                    failed_blocks += 1;
                }
            }

            block_id = batch_end;

            // Progress update
            if block_id % 1000 == 0 {
                crate::lcpfs_println!(
                    "[ EVAC ] Progress: {}/{} blocks migrated ({} failed)",
                    engine.blocks_migrated,
                    total_blocks,
                    failed_blocks
                );
            }
        }

        let time_taken_ms = (crate::get_time() - start_time) / 1_000_000; // ns to ms

        // Record outcome for learning
        let drive_failed = failed_blocks > (total_blocks as u64 / 10); // >10% failure = drive failed
        let outcome = EvacuationOutcome {
            vdev_id: dying_vdev,
            started_at_risk: engine.last_risk_score,
            blocks_evacuated: engine.blocks_migrated,
            time_taken_ms,
            drive_failed_during: drive_failed,
            epsilon_before,
            epsilon_after: engine.current_epsilon,
        };

        engine.learn_from_outcome(&outcome);
        engine.outcomes.push_back(outcome);

        while engine.outcomes.len() > 100 {
            engine.outcomes.pop_front();
        }

        engine.is_running = false;
        engine.evacuating_vdev = None;
        engine.target_vdev = None;

        crate::lcpfs_println!(
            "[ EVAC ] EVACUATION COMPLETE: {} blocks migrated from VDEV {} to VDEV {}",
            engine.blocks_migrated,
            dying_vdev,
            target_vdev
        );
    }

    /// Learn from evacuation outcome
    fn learn_from_outcome(&mut self, outcome: &EvacuationOutcome) {
        let delta = outcome.delta_epsilon();

        // Learn evacuation risk threshold
        self.threshold_evac_risk
            .observe(outcome.started_at_risk, delta);

        // If drive failed during evacuation, we waited too long
        if outcome.drive_failed_during {
            // Reduce the risk threshold (trigger earlier next time)
            self.threshold_evac_risk.observe(
                outcome.started_at_risk * 0.5,
                -1000.0, // Strong signal: this was bad
            );
        }

        // Learn batch size from throughput
        if outcome.time_taken_ms > 0 {
            let throughput = outcome.blocks_evacuated as f64 / outcome.time_taken_ms as f64;
            self.batch_size.observe(self.batch_size.value, -throughput);
        }
    }

    /// Get current statistics
    pub fn stats(&self) -> EvacStats {
        EvacStats {
            is_running: self.is_running,
            evacuating_vdev: self.evacuating_vdev,
            target_vdev: self.target_vdev,
            blocks_migrated: self.blocks_migrated,
            evac_risk_threshold: self.threshold_evac_risk.value,
            evac_risk_confidence: self.threshold_evac_risk.confidence(),
            latency_threshold_us: self.threshold_latency.value as u64,
            latency_confidence: self.threshold_latency.confidence(),
        }
    }
}

/// Statistics about evacuation operations and learned thresholds
#[derive(Debug, Clone, Copy)]
pub struct EvacStats {
    /// Whether an evacuation is currently running
    pub is_running: bool,
    /// VDEV currently being evacuated
    pub evacuating_vdev: Option<usize>,
    /// Target VDEV for migration
    pub target_vdev: Option<usize>,
    /// Total number of blocks migrated
    pub blocks_migrated: u64,
    /// Learned evacuation risk threshold
    pub evac_risk_threshold: f64,
    /// Confidence in evacuation risk threshold (0.0 to 1.0)
    pub evac_risk_confidence: f64,
    /// Learned latency threshold in microseconds
    pub latency_threshold_us: u64,
    /// Confidence in latency threshold (0.0 to 1.0)
    pub latency_confidence: f64,
}

// ═══════════════════════════════════════════════════════════════════════════════
// HEALTH MONITOR (Legacy compatibility wrapper)
// ═══════════════════════════════════════════════════════════════════════════════

/// Legacy wrapper for health monitoring (compatibility layer)
pub struct HealthMonitor {
    /// Recorded latency observations
    pub latencies: Vec<u64>,
    /// Recorded error counts
    pub error_counts: Vec<u64>,
}

impl Default for HealthMonitor {
    fn default() -> Self {
        Self::new()
    }
}

impl HealthMonitor {
    /// Creates a new HealthMonitor
    pub fn new() -> Self {
        Self {
            latencies: Vec::new(),
            error_counts: Vec::new(),
        }
    }

    /// Record a latency observation
    pub fn record_latency(&mut self, vdev_id: usize, latency_us: u64) {
        while self.latencies.len() <= vdev_id {
            self.latencies.push(0);
        }
        self.latencies[vdev_id] = latency_us;
    }

    /// Record an error
    pub fn record_error(&mut self, vdev_id: usize) {
        while self.error_counts.len() <= vdev_id {
            self.error_counts.push(0);
        }
        self.error_counts[vdev_id] += 1;
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// PUBLIC API
// ═══════════════════════════════════════════════════════════════════════════════

/// Update system epsilon
pub fn update_epsilon(epsilon: f64) {
    EVAC_ENGINE.lock().update_epsilon(epsilon);
}

/// Submit a VDEV health observation
pub fn observe_health(obs: VdevHealthObservation) {
    EVAC_ENGINE.lock().observe(obs);
}

/// Check if PI thinks we should evacuate a VDEV
pub fn should_evacuate(vdev_id: usize) -> bool {
    EVAC_ENGINE.lock().should_evacuate(vdev_id)
}

/// Start evacuating a VDEV
pub fn start_evacuation(dying_vdev: usize) -> Result<(), &'static str> {
    EVAC_ENGINE.lock().start_evacuation(dying_vdev)
}

/// Get current statistics
pub fn stats() -> EvacStats {
    EVAC_ENGINE.lock().stats()
}

/// Legacy API: Check health using controller
///
/// Queries SMART data from the device and uses it to populate health observations.
/// Falls back to conservative defaults if SMART data is unavailable.
///
/// Note: `controller` parameter is kept for API compatibility but not currently used
/// for metrics. Future versions should add latency/error tracking to LcpfsController.
pub fn check_health(_controller: &LcpfsController) {
    let vdev_id = 0;
    let timestamp_ms = crate::time::now() * 1000;

    // Try to get SMART data from the device
    let smart_data = get_smart_data(vdev_id as u64);

    let obs = if let Some(data) = smart_data {
        // Use real SMART data
        let attrs = &data.attributes;

        let reallocated = attrs
            .get(&SmartAttribute::ReallocatedSectors)
            .map(|v| v.current)
            .unwrap_or(0);
        let pending = attrs
            .get(&SmartAttribute::CurrentPendingSectors)
            .map(|v| v.current)
            .unwrap_or(0);
        let uncorrectable = attrs
            .get(&SmartAttribute::ReportedUncorrectable)
            .map(|v| v.current)
            .unwrap_or(0);
        let temperature = attrs
            .get(&SmartAttribute::Temperature)
            .map(|v| v.current as f64)
            .unwrap_or(40.0);
        let power_on_hours = attrs
            .get(&SmartAttribute::PowerOnHours)
            .map(|v| v.current)
            .unwrap_or(0);

        VdevHealthObservation {
            vdev_id,
            timestamp_ms,
            latency_avg_us: 100.0, // Default - would come from I/O subsystem
            latency_p99_us: 200.0, // Default - would come from I/O subsystem
            read_errors: 0,
            write_errors: 0,
            reallocated_sectors: reallocated,
            pending_sectors: pending,
            temperature_c: temperature,
            power_on_hours,
            uncorrectable_errors: uncorrectable,
            io_ops: 1000,
        }
    } else {
        // SMART data unavailable - use conservative defaults
        VdevHealthObservation {
            vdev_id,
            timestamp_ms,
            latency_avg_us: 100.0,
            latency_p99_us: 200.0,
            read_errors: 0,
            write_errors: 0,
            reallocated_sectors: 0,
            pending_sectors: 0,
            temperature_c: 40.0,
            power_on_hours: 0,
            uncorrectable_errors: 0,
            io_ops: 1000,
        }
    };

    observe_health(obs);

    // Let PI decide
    if should_evacuate(vdev_id) {
        let _ = start_evacuation(vdev_id);
    }
}