use crate::error::Error;
use crate::scsi;
pub struct ReadCtx {
pub batch: u16,
pub consecutive_good: u64,
pub consecutive_failures: u64,
pub consecutive_outer_failures: u64,
pub damage_window: Vec<bool>,
pub damage_window_max: usize,
pub damage_threshold_pct: usize,
pub fast_jump_threshold: u64,
pub jump_multiplier: u64,
pub not_ready_retries: u32,
pub bridge_degradation_count: u32,
pub bisecting: bool,
pub bisect_on_marginal: bool,
pub wedge_count: u64,
pub last_success_at: Option<std::time::Instant>,
pub last_error_at: Option<std::time::Instant>,
pub last_error_family: Option<SenseFamily>,
pub total_errors: u64,
pub total_reads_ok: u64,
pub zones_entered: u64,
pub jumps_taken: u64,
pub in_damage_zone: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SenseFamily {
NotReady,
Medium,
Hardware,
IllegalRequest,
Other,
}
impl SenseFamily {
pub fn from_sense_key(sense_key: u8) -> Self {
match sense_key {
scsi::SENSE_KEY_NOT_READY => SenseFamily::NotReady,
scsi::SENSE_KEY_MEDIUM_ERROR => SenseFamily::Medium,
scsi::SENSE_KEY_HARDWARE_ERROR => SenseFamily::Hardware,
scsi::SENSE_KEY_ILLEGAL_REQUEST => SenseFamily::IllegalRequest,
_ => SenseFamily::Other,
}
}
pub fn is_wedge_family(self) -> bool {
matches!(self, SenseFamily::Hardware | SenseFamily::IllegalRequest)
}
}
impl ReadCtx {
pub fn for_sweep(batch: u16) -> Self {
Self {
batch,
consecutive_good: 0,
consecutive_failures: 0,
consecutive_outer_failures: 0,
damage_window: Vec::with_capacity(16),
damage_window_max: 16,
damage_threshold_pct: 12,
fast_jump_threshold: 1,
jump_multiplier: 1,
not_ready_retries: 0,
bridge_degradation_count: 0,
bisecting: false,
bisect_on_marginal: false,
wedge_count: 0,
last_success_at: None,
last_error_at: None,
last_error_family: None,
total_errors: 0,
total_reads_ok: 0,
zones_entered: 0,
jumps_taken: 0,
in_damage_zone: false,
}
}
pub fn for_patch(batch: u16) -> Self {
Self {
batch,
consecutive_good: 0,
consecutive_failures: 0,
consecutive_outer_failures: 0,
damage_window: Vec::with_capacity(16),
damage_window_max: 16,
damage_threshold_pct: PATCH_DAMAGE_THRESHOLD_PCT,
fast_jump_threshold: u64::MAX,
jump_multiplier: 1,
not_ready_retries: 0,
bridge_degradation_count: 0,
bisecting: false,
bisect_on_marginal: true,
wedge_count: 0,
last_success_at: None,
last_error_at: None,
last_error_family: None,
total_errors: 0,
total_reads_ok: 0,
zones_entered: 0,
jumps_taken: 0,
in_damage_zone: false,
}
}
pub fn on_success(&mut self) {
self.consecutive_good += 1;
self.consecutive_failures = 0;
self.not_ready_retries = 0;
self.wedge_count = 0;
if !self.bisecting {
self.consecutive_outer_failures = 0;
}
self.damage_window.push(true);
if self.damage_window.len() > self.damage_window_max {
self.damage_window.remove(0);
}
self.total_reads_ok += 1;
self.last_success_at = Some(std::time::Instant::now());
if self.in_damage_zone && self.consecutive_good >= self.damage_window_max as u64 {
self.in_damage_zone = false;
self.last_error_family = None;
}
}
pub fn pass_summary(&self) -> PassSummary {
PassSummary {
total_reads_ok: self.total_reads_ok,
total_errors: self.total_errors,
zones_entered: self.zones_entered,
jumps_taken: self.jumps_taken,
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct PassSummary {
pub total_reads_ok: u64,
pub total_errors: u64,
pub zones_entered: u64,
pub jumps_taken: u64,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ReadAction {
Retry { pause_secs: u64 },
Bisect,
SkipBlock { pause_secs: u64 },
JumpAhead { sectors: u64, pause_secs: u64 },
AbortPass,
}
const FAIL_PAUSE_SECS: u64 = 5;
const ZONE_ENTRY_COOLDOWN_SECS: u64 = 30;
const CONSECUTIVE_FAIL_LONG_PAUSE_SECS: u64 = 5;
const CONSECUTIVE_FAIL_LONG_PAUSE_THRESHOLD: u64 = 10;
const POST_JUMP_EXTRA_PAUSE_SECS: u64 = 2;
const NOT_READY_PAUSE_SECS: u64 = 3;
const NOT_READY_MAX_RETRIES: u32 = 3;
const BRIDGE_DEGRADATION_PAUSE_SECS: u64 = 15;
const BRIDGE_DEGRADATION_MAX_RETRIES: u32 = 5;
const JUMP_BASE_SECTORS: u64 = 1024;
const WEDGE_JUMP_SECTORS: u64 = 524_288;
const WEDGE_PAUSE_SECS: u64 = 30;
const WEDGE_ABORT_THRESHOLD: u64 = 16;
const WEDGE_PASS_N_SKIP_SECTORS: u64 = 64;
pub const PATCH_DAMAGE_THRESHOLD_PCT: usize = 6;
pub fn handle_read_error(err: &Error, ctx: &mut ReadCtx) -> ReadAction {
ctx.consecutive_failures += 1;
ctx.consecutive_good = 0;
if !ctx.bisecting {
ctx.consecutive_outer_failures += 1;
}
let now = std::time::Instant::now();
let ms_since_last_error = ctx
.last_error_at
.map(|t| now.duration_since(t).as_millis() as u64);
let ms_since_last_success = ctx
.last_success_at
.map(|t| now.duration_since(t).as_millis() as u64);
let current_family = err
.scsi_sense()
.map(|s| SenseFamily::from_sense_key(s.sense_key))
.unwrap_or(SenseFamily::Other);
if !ctx.in_damage_zone && !ctx.bisecting {
ctx.in_damage_zone = true;
ctx.zones_entered += 1;
}
ctx.total_errors += 1;
ctx.last_error_at = Some(now);
let is_wedge_transition = matches!(ctx.last_error_family, Some(prev) if !prev.is_wedge_family())
&& current_family.is_wedge_family();
ctx.last_error_family = Some(current_family);
tracing::warn!(
target: "freemkv::disc",
phase = "read_error",
consecutive_failures = ctx.consecutive_failures,
consecutive_outer_failures = ctx.consecutive_outer_failures,
ms_since_last_error,
ms_since_last_success,
total_errors = ctx.total_errors,
total_reads_ok = ctx.total_reads_ok,
batch = ctx.batch,
bisecting = ctx.bisecting,
wedge_count = ctx.wedge_count,
sense_family = ?current_family,
sense_key = err.scsi_sense().map(|s| s.sense_key),
asc = err.scsi_sense().map(|s| s.asc),
ascq = err.scsi_sense().map(|s| s.ascq),
error = %err,
"read failed; classifying"
);
if is_wedge_transition {
tracing::warn!(
target: "freemkv::disc",
phase = "wedge_transition",
errors_in_zone = ctx.total_errors,
ms_since_last_success,
new_family = ?current_family,
"drive entered wedge / fast-fail family (was returning recoverable medium errors before this)"
);
}
if err.is_scsi_transport_failure() {
return ReadAction::AbortPass;
}
if err.is_bridge_degradation() && ctx.bridge_degradation_count < BRIDGE_DEGRADATION_MAX_RETRIES
{
ctx.bridge_degradation_count += 1;
return ReadAction::Retry {
pause_secs: BRIDGE_DEGRADATION_PAUSE_SECS,
};
}
let sense_key = err.scsi_sense().map(|s| s.sense_key).unwrap_or(0);
if sense_key == scsi::SENSE_KEY_NOT_READY && ctx.not_ready_retries < NOT_READY_MAX_RETRIES {
ctx.not_ready_retries += 1;
return ReadAction::Retry {
pause_secs: NOT_READY_PAUSE_SECS,
};
}
if sense_key != scsi::SENSE_KEY_NOT_READY {
ctx.not_ready_retries = 0;
}
if sense_key == scsi::SENSE_KEY_HARDWARE_ERROR || sense_key == scsi::SENSE_KEY_ILLEGAL_REQUEST {
if !ctx.bisecting {
ctx.wedge_count += 1;
}
if ctx.wedge_count >= WEDGE_ABORT_THRESHOLD {
tracing::warn!(
target: "freemkv::disc",
phase = "wedge_abort",
wedge_count = ctx.wedge_count,
threshold = WEDGE_ABORT_THRESHOLD,
pass = if ctx.bisect_on_marginal { "N" } else { "1" },
"wedge-skip exhausted — drive appears permanently stuck"
);
return ReadAction::AbortPass;
}
let jump_sectors = if ctx.bisect_on_marginal {
WEDGE_PASS_N_SKIP_SECTORS
} else {
WEDGE_JUMP_SECTORS
};
tracing::warn!(
target: "freemkv::disc",
phase = "wedge_skip",
pass = if ctx.bisect_on_marginal { "N" } else { "1" },
wedge_count = ctx.wedge_count,
jump_sectors,
pause_secs = WEDGE_PAUSE_SECS,
"wedge detected — skipping ahead and pausing for drive cooldown"
);
ctx.jumps_taken += 1;
return ReadAction::JumpAhead {
sectors: jump_sectors,
pause_secs: WEDGE_PAUSE_SECS,
};
}
let is_marginal = matches!(
sense_key,
scsi::SENSE_KEY_MEDIUM_ERROR | scsi::SENSE_KEY_ABORTED_COMMAND
);
if is_marginal && ctx.batch > 1 && !ctx.bisecting && ctx.bisect_on_marginal {
return ReadAction::Bisect;
}
if !ctx.bisecting {
ctx.damage_window.push(false);
if ctx.damage_window.len() > ctx.damage_window_max {
ctx.damage_window.remove(0);
}
}
let bad_count = ctx.damage_window.iter().filter(|&&b| !b).count();
let bad_pct = if ctx.damage_window.is_empty() {
0
} else {
bad_count * 100 / ctx.damage_window.len()
};
let is_zone_entry =
ctx.consecutive_outer_failures == 1 && !ctx.bisecting && !ctx.bisect_on_marginal;
let pause_secs = if is_zone_entry {
ZONE_ENTRY_COOLDOWN_SECS
} else if ctx.consecutive_failures >= CONSECUTIVE_FAIL_LONG_PAUSE_THRESHOLD {
CONSECUTIVE_FAIL_LONG_PAUSE_SECS
} else {
FAIL_PAUSE_SECS
};
const MAX_JUMP_MULTIPLIER: u64 = 64;
let fast_trigger = !ctx.bisecting && ctx.consecutive_outer_failures >= ctx.fast_jump_threshold;
let window_trigger =
ctx.damage_window.len() >= ctx.damage_window_max && bad_pct >= ctx.damage_threshold_pct;
if fast_trigger || window_trigger {
let mult = ctx.jump_multiplier.min(MAX_JUMP_MULTIPLIER);
let sectors = JUMP_BASE_SECTORS
.saturating_mul(ctx.batch as u64)
.saturating_mul(mult);
ctx.jump_multiplier = (ctx.jump_multiplier.saturating_mul(2)).min(MAX_JUMP_MULTIPLIER);
ctx.consecutive_outer_failures = 0;
ctx.jumps_taken += 1;
return ReadAction::JumpAhead {
sectors,
pause_secs: pause_secs + POST_JUMP_EXTRA_PAUSE_SECS,
};
}
ReadAction::SkipBlock { pause_secs }
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::Error;
use crate::scsi::ScsiSense;
fn medium_err() -> Error {
Error::DiscRead {
sector: 100,
status: Some(2),
sense: Some(ScsiSense {
sense_key: scsi::SENSE_KEY_MEDIUM_ERROR,
asc: 0x11,
ascq: 0x05,
}),
}
}
fn hardware_err() -> Error {
Error::DiscRead {
sector: 100,
status: Some(2),
sense: Some(ScsiSense {
sense_key: scsi::SENSE_KEY_HARDWARE_ERROR,
asc: 0x44,
ascq: 0x00,
}),
}
}
fn illegal_request_err() -> Error {
Error::DiscRead {
sector: 100,
status: Some(2),
sense: Some(ScsiSense {
sense_key: scsi::SENSE_KEY_ILLEGAL_REQUEST,
asc: 0x24,
ascq: 0x00,
}),
}
}
#[test]
fn pass_n_marginal_with_batch_gt_1_bisects() {
let mut ctx = ReadCtx::for_patch(32);
let action = handle_read_error(&medium_err(), &mut ctx);
assert_eq!(action, ReadAction::Bisect);
}
#[test]
fn pass_1_marginal_jumps_immediately_not_bisecting() {
let mut ctx = ReadCtx::for_sweep(32);
let action = handle_read_error(&medium_err(), &mut ctx);
match action {
ReadAction::JumpAhead { .. } => {}
other => panic!("expected JumpAhead on first Pass 1 marginal error, got {other:?}"),
}
}
#[test]
fn medium_error_with_batch_1_skips() {
let mut ctx = ReadCtx::for_patch(1);
let action = handle_read_error(&medium_err(), &mut ctx);
match action {
ReadAction::SkipBlock { pause_secs } => assert!(pause_secs >= 1),
other => panic!("expected SkipBlock, got {other:?}"),
}
}
#[test]
fn medium_error_while_bisecting_does_not_recurse() {
let mut ctx = ReadCtx::for_patch(32);
ctx.bisecting = true;
let action = handle_read_error(&medium_err(), &mut ctx);
match action {
ReadAction::SkipBlock { .. } => {}
other => panic!("expected SkipBlock, got {other:?}"),
}
}
#[test]
fn pass_1_jumps_immediately_on_first_outer_failure() {
let mut ctx = ReadCtx::for_sweep(32);
let a = handle_read_error(&medium_err(), &mut ctx);
assert!(
matches!(a, ReadAction::JumpAhead { .. }),
"expected JumpAhead on first outer failure (fast_jump_threshold=1), got {a:?}"
);
}
#[test]
fn pass_n_does_not_fast_jump() {
let mut ctx = ReadCtx::for_patch(32);
for _ in 0..4 {
let a = handle_read_error(&medium_err(), &mut ctx);
assert!(
!matches!(a, ReadAction::JumpAhead { .. }),
"Pass N must not fast-jump; got {a:?}"
);
}
}
#[test]
fn outer_success_resets_consecutive_outer_failures() {
let mut ctx = ReadCtx::for_sweep(32);
handle_read_error(&medium_err(), &mut ctx);
assert_eq!(ctx.consecutive_outer_failures, 0);
ctx.bisecting = false;
ctx.on_success();
assert_eq!(ctx.consecutive_outer_failures, 0);
}
#[test]
fn bisect_inner_success_does_not_reset_outer_counter() {
let mut ctx = ReadCtx::for_patch(32);
for _ in 0..3 {
handle_read_error(&medium_err(), &mut ctx);
}
assert_eq!(ctx.consecutive_outer_failures, 3);
ctx.bisecting = true;
ctx.on_success();
assert_eq!(
ctx.consecutive_outer_failures, 3,
"bisect inner success must not reset outer-failure counter"
);
}
#[test]
fn pass_1_hardware_error_jumps_ahead_not_aborts() {
let mut ctx = ReadCtx::for_sweep(32);
let action = handle_read_error(&hardware_err(), &mut ctx);
match action {
ReadAction::JumpAhead {
sectors,
pause_secs,
} => {
assert_eq!(sectors, WEDGE_JUMP_SECTORS);
assert_eq!(pause_secs, WEDGE_PAUSE_SECS);
}
other => panic!("expected JumpAhead, got {other:?}"),
}
assert_eq!(ctx.wedge_count, 1);
}
#[test]
fn pass_1_hardware_error_aborts_after_threshold() {
let mut ctx = ReadCtx::for_sweep(32);
for i in 0..WEDGE_ABORT_THRESHOLD - 1 {
let action = handle_read_error(&hardware_err(), &mut ctx);
assert!(
matches!(action, ReadAction::JumpAhead { .. }),
"iter {i}: expected JumpAhead, got {action:?}"
);
}
let action = handle_read_error(&hardware_err(), &mut ctx);
assert_eq!(action, ReadAction::AbortPass);
}
#[test]
fn pass_1_good_read_resets_wedge_count() {
let mut ctx = ReadCtx::for_sweep(32);
for _ in 0..(WEDGE_ABORT_THRESHOLD - 1) {
handle_read_error(&hardware_err(), &mut ctx);
}
assert_eq!(ctx.wedge_count, WEDGE_ABORT_THRESHOLD - 1);
ctx.on_success();
assert_eq!(ctx.wedge_count, 0);
let action = handle_read_error(&hardware_err(), &mut ctx);
assert!(matches!(action, ReadAction::JumpAhead { .. }));
}
#[test]
fn pass_n_hardware_error_also_skips_not_aborts() {
let mut ctx = ReadCtx::for_patch(1);
let action = handle_read_error(&hardware_err(), &mut ctx);
match action {
ReadAction::JumpAhead {
sectors,
pause_secs,
} => {
assert_eq!(sectors, WEDGE_PASS_N_SKIP_SECTORS);
assert_eq!(pause_secs, WEDGE_PAUSE_SECS);
}
other => panic!("expected JumpAhead, got {other:?}"),
}
assert_eq!(ctx.wedge_count, 1);
}
#[test]
fn pass_n_hardware_error_aborts_after_threshold() {
let mut ctx = ReadCtx::for_patch(1);
for _ in 0..WEDGE_ABORT_THRESHOLD - 1 {
let action = handle_read_error(&hardware_err(), &mut ctx);
assert!(matches!(action, ReadAction::JumpAhead { .. }));
}
let action = handle_read_error(&hardware_err(), &mut ctx);
assert_eq!(action, ReadAction::AbortPass);
}
#[test]
fn pass_1_illegal_request_also_routes_to_wedge_skip() {
let mut ctx = ReadCtx::for_sweep(32);
let action = handle_read_error(&illegal_request_err(), &mut ctx);
assert!(matches!(action, ReadAction::JumpAhead { .. }));
}
#[test]
fn long_failure_streak_extends_pause_on_pass_n() {
let mut ctx = ReadCtx::for_patch(1);
for _ in 0..15 {
handle_read_error(&medium_err(), &mut ctx);
}
let final_action = handle_read_error(&medium_err(), &mut ctx);
match final_action {
ReadAction::SkipBlock { pause_secs } => {
assert!(pause_secs >= CONSECUTIVE_FAIL_LONG_PAUSE_SECS);
}
ReadAction::JumpAhead { pause_secs, .. } => {
assert!(pause_secs >= CONSECUTIVE_FAIL_LONG_PAUSE_SECS);
}
other => panic!("expected long-pause action, got {other:?}"),
}
}
#[test]
fn pass_1_zone_entry_uses_long_cooldown() {
let mut ctx = ReadCtx::for_sweep(32);
let action = handle_read_error(&medium_err(), &mut ctx);
match action {
ReadAction::JumpAhead { pause_secs, .. } => {
assert_eq!(
pause_secs,
ZONE_ENTRY_COOLDOWN_SECS + POST_JUMP_EXTRA_PAUSE_SECS,
"first-error pause should be 30 + 2 = 32 s"
);
}
other => panic!("expected JumpAhead on first Pass 1 error, got {other:?}"),
}
}
#[test]
fn pass_n_pauses_uniformly_on_failed_read() {
let mut ctx = ReadCtx::for_patch(1);
let action = handle_read_error(&medium_err(), &mut ctx);
match action {
ReadAction::SkipBlock { pause_secs } => assert_eq!(pause_secs, FAIL_PAUSE_SECS),
ReadAction::JumpAhead { pause_secs, .. } => {
assert_eq!(pause_secs, FAIL_PAUSE_SECS + POST_JUMP_EXTRA_PAUSE_SECS)
}
ReadAction::Bisect => {}
other => panic!("expected pausing action, got {other:?}"),
}
}
#[test]
fn damage_window_fills_then_jumps() {
let mut ctx = ReadCtx::for_sweep(1);
ctx.damage_window_max = 4;
ctx.damage_threshold_pct = 50;
let mut saw_jump = false;
for _ in 0..6 {
let a = handle_read_error(&medium_err(), &mut ctx);
if matches!(a, ReadAction::JumpAhead { .. }) {
saw_jump = true;
break;
}
}
assert!(
saw_jump,
"expected at least one JumpAhead in 6 failures with 50% threshold"
);
}
#[test]
fn on_success_resets_failure_counters_and_pushes_window() {
let mut ctx = ReadCtx::for_sweep(32);
for _ in 0..3 {
handle_read_error(&medium_err(), &mut ctx);
}
assert!(ctx.consecutive_failures > 0);
ctx.bisecting = false;
ctx.on_success();
assert_eq!(ctx.consecutive_good, 1);
assert_eq!(ctx.consecutive_failures, 0);
assert!(*ctx.damage_window.last().unwrap());
}
}