#[cfg(test)]
mod tests {
use std::time::Duration;
#[derive(Debug, Clone, PartialEq)]
pub enum OtlpError {
Retryable {
status_code: u16,
retry_after: Option<Duration>,
},
NonRetryable {
message: String,
},
CompressionFallback {
status_code: u16,
},
}
impl OtlpError {
pub fn retryable(status_code: u16, retry_after: Option<Duration>) -> Self {
Self::Retryable {
status_code,
retry_after,
}
}
pub fn non_retryable(message: impl Into<String>) -> Self {
Self::NonRetryable {
message: message.into(),
}
}
pub fn compression_fallback(status_code: u16) -> Self {
Self::CompressionFallback { status_code }
}
pub fn is_retryable(&self) -> bool {
matches!(self, Self::Retryable { .. })
}
}
struct ResponseFixture {
status: u16,
headers: Vec<(String, String)>,
}
fn current_otlp_status_classifier(response: &ResponseFixture) -> Result<(), OtlpError> {
match response.status {
200..=299 => Ok(()),
429 => {
let retry_after = response
.headers
.iter()
.find(|(name, _)| name.eq_ignore_ascii_case("retry-after"))
.and_then(|(_, value)| value.parse::<u64>().ok())
.map(Duration::from_secs);
Err(OtlpError::retryable(response.status, retry_after))
}
502 | 503 | 504 => {
Err(OtlpError::retryable(response.status, None))
}
415 => {
Err(OtlpError::compression_fallback(response.status))
}
400..=499 => {
Err(OtlpError::non_retryable(format!(
"OTLP client error: {} - batch dropped",
response.status
)))
}
500..=599 => {
Err(OtlpError::non_retryable(format!(
"OTLP server error: {} - batch dropped",
response.status
)))
}
_ => Err(OtlpError::non_retryable(format!(
"Unexpected OTLP response status: {}",
response.status
))),
}
}
fn corrected_otlp_status_classifier(response: &ResponseFixture) -> Result<(), OtlpError> {
match response.status {
200..=299 => Ok(()),
429 => {
let retry_after = response
.headers
.iter()
.find(|(name, _)| name.eq_ignore_ascii_case("retry-after"))
.and_then(|(_, value)| value.parse::<u64>().ok())
.map(Duration::from_secs);
Err(OtlpError::retryable(response.status, retry_after))
}
408 => {
Err(OtlpError::retryable(response.status, None))
}
502 | 503 | 504 => {
Err(OtlpError::retryable(response.status, None))
}
415 => {
Err(OtlpError::compression_fallback(response.status))
}
400..=499 => {
Err(OtlpError::non_retryable(format!(
"OTLP client error: {} - batch dropped",
response.status
)))
}
500..=599 => {
Err(OtlpError::non_retryable(format!(
"OTLP server error: {} - batch dropped",
response.status
)))
}
_ => Err(OtlpError::non_retryable(format!(
"Unexpected OTLP response status: {}",
response.status
))),
}
}
#[test]
fn otlp_408_timeout_retry_classification_audit() {
eprintln!("\n🔍 OTLP HTTP 408 REQUEST TIMEOUT RETRY AUDIT");
eprintln!("============================================");
eprintln!("\n📋 RFC 9110 Requirements for HTTP 408:");
eprintln!(" • 408 Request Timeout indicates server-side timeout");
eprintln!(" • Should be retryable with exponential backoff");
eprintln!(" • NOT a client error requiring batch drop");
eprintln!(" • Temporary condition, not permanent failure");
let test_cases = vec![
(408, "Request Timeout", true), (429, "Too Many Requests", true), (502, "Bad Gateway", true), (503, "Service Unavailable", true), (504, "Gateway Timeout", true), (400, "Bad Request", false), (401, "Unauthorized", false), (404, "Not Found", false), ];
eprintln!("\n📊 Testing retry classification:");
for (status_code, status_name, should_be_retryable) in test_cases {
let response = ResponseFixture {
status: status_code,
headers: vec![],
};
let current_result = current_otlp_status_classifier(&response);
let current_retryable = matches!(current_result, Err(ref e) if e.is_retryable());
let corrected_result = corrected_otlp_status_classifier(&response);
let corrected_retryable = matches!(corrected_result, Err(ref e) if e.is_retryable());
eprintln!(" {} {} ({}):", status_code, status_name, if should_be_retryable { "should retry" } else { "should drop" });
eprintln!(" Current: {} {}", if current_retryable { "✓ retryable" } else { "✗ terminal" }, if current_retryable == should_be_retryable { "" } else { "❌ WRONG" });
eprintln!(" Corrected: {} {}", if corrected_retryable { "✓ retryable" } else { "✗ terminal" }, if corrected_retryable == should_be_retryable { "✅ CORRECT" } else { "❌ WRONG" });
if status_code == 408 {
assert!(!current_retryable, "CURRENT: 408 should be incorrectly classified as terminal");
assert!(corrected_retryable, "CORRECTED: 408 should be correctly classified as retryable");
eprintln!("\n🎯 HTTP 408 SPECIFIC ANALYSIS:");
eprintln!(" Current behavior: DEFECTIVE - 408 treated as terminal client error");
eprintln!(" RFC 9110 compliant: CORRECTED - 408 treated as retryable timeout");
}
}
eprintln!("\n🚨 AUDIT FINDINGS:");
eprintln!("==================");
eprintln!("❌ DEFECTIVE: Current classifier treats 408 as non-retryable");
eprintln!(" • Falls into 400-499 range → non_retryable()");
eprintln!(" • Violates RFC 9110 which classifies 408 as retryable");
eprintln!(" • Causes premature batch dropping on server timeouts");
eprintln!("");
eprintln!("✅ FIX REQUIRED: Add explicit 408 case before 400-499 range");
eprintln!(" • Handle 408 as retryable with exponential backoff");
eprintln!(" • Maintain existing terminal classification for other 4xx");
eprintln!(" • Align with OTLP best practices for server timeout handling");
}
#[test]
fn rfc_9110_408_semantics_verification() {
eprintln!("\n📖 RFC 9110 HTTP 408 SEMANTICS VERIFICATION");
eprintln!("===========================================");
eprintln!("📋 RFC 9110 Section 15.5.9 - 408 Request Timeout:");
eprintln!(" • 'The server did not receive a complete request message'");
eprintln!(" • 'Server SHOULD send a Connection: close header field'");
eprintln!(" • 'Client MAY repeat the request without modifications'");
eprintln!(" • → Indicates server-side timeout, not client error");
eprintln!("\n🔍 OTLP Context Analysis:");
eprintln!(" • Collector may timeout waiting for complete protobuf payload");
eprintln!(" • Network delays can cause partial request transmission");
eprintln!(" • Retry with exponential backoff is appropriate");
eprintln!(" • Dropping batch permanently loses telemetry data");
eprintln!("\n🎯 Correct OTLP Behavior:");
eprintln!(" ✅ Retry 408 with exponential backoff (1s, 2s, 4s...)");
eprintln!(" ✅ Close connection and establish new one");
eprintln!(" ✅ Maintain batch for retry rather than drop");
eprintln!(" ❌ Do NOT treat 408 as permanent client error");
}
#[test]
fn demonstrate_408_batch_dropping_defect() {
eprintln!("\n❌ DEMONSTRATING 408 BATCH DROPPING DEFECT");
eprintln!("==========================================");
let timeout_response = ResponseFixture {
status: 408,
headers: vec![],
};
let result = current_otlp_status_classifier(&timeout_response);
eprintln!("Collector returns: HTTP 408 Request Timeout");
match result {
Err(OtlpError::NonRetryable { message }) => {
eprintln!("Current behavior: {} ❌", message);
eprintln!("Impact: Telemetry batch permanently lost!");
eprintln!("Root cause: 408 classified in 400-499 non-retryable range");
},
Err(OtlpError::Retryable { status_code, .. }) => {
eprintln!("Current behavior: Retryable {} ✅", status_code);
panic!("Current implementation should be defective for this test");
},
_ => panic!("Unexpected result for 408 status"),
}
eprintln!("\nCorrect RFC 9110 behavior:");
let corrected_result = corrected_otlp_status_classifier(&timeout_response);
match corrected_result {
Err(OtlpError::Retryable { status_code, .. }) => {
eprintln!("Fixed behavior: Retryable {} ✅", status_code);
eprintln!("Impact: Batch preserved for retry with backoff");
},
_ => panic!("Corrected implementation should treat 408 as retryable"),
}
}
}