#![cfg(test)]
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
#[derive(Debug, Clone, PartialEq)]
pub enum DnsResult {
Success(Vec<String>), Failure(DnsError),
}
#[derive(Debug, Clone, PartialEq)]
pub enum DnsError {
NxDomain, ServerFailure, Timeout, }
#[derive(Debug)]
pub struct DnsCacheFixture {
cache: Arc<Mutex<HashMap<String, (DnsResult, Instant, Duration)>>>,
lookup_count: Arc<AtomicUsize>,
cache_enabled: bool,
}
impl DnsCacheFixture {
fn new(cache_enabled: bool) -> Self {
Self {
cache: Arc::new(Mutex::new(HashMap::new())),
lookup_count: Arc::new(AtomicUsize::new(0)),
cache_enabled,
}
}
fn lookup(&self, hostname: &str) -> DnsResult {
self.lookup_count.fetch_add(1, Ordering::Relaxed);
if self.cache_enabled {
let cache = self.cache.lock().unwrap();
if let Some((result, cached_at, ttl)) = cache.get(hostname) {
if cached_at.elapsed() < *ttl {
return result.clone();
}
}
drop(cache);
}
let result = match hostname {
"collector.example.com" => DnsResult::Success(vec!["203.0.113.1".to_string()]),
"invalid.example.com" => DnsResult::Failure(DnsError::NxDomain),
"timeout.example.com" => DnsResult::Failure(DnsError::Timeout),
"server-fail.example.com" => DnsResult::Failure(DnsError::ServerFailure),
_ => DnsResult::Failure(DnsError::NxDomain),
};
if self.cache_enabled {
let ttl = match &result {
DnsResult::Success(_) => Duration::from_secs(300), DnsResult::Failure(_) => Duration::from_secs(60), };
let mut cache = self.cache.lock().unwrap();
cache.insert(hostname.to_string(), (result.clone(), Instant::now(), ttl));
}
result
}
fn get_lookup_count(&self) -> usize {
self.lookup_count.load(Ordering::Relaxed)
}
fn clear_cache(&self) {
let mut cache = self.cache.lock().unwrap();
cache.clear();
}
}
#[derive(Debug)]
pub struct DnsResolvingOtlpExporterFixture {
endpoint: String,
dns_cache: DnsCacheFixture,
export_attempts: Arc<AtomicUsize>,
}
impl DnsResolvingOtlpExporterFixture {
fn new(endpoint: &str, cache_enabled: bool) -> Self {
Self {
endpoint: endpoint.to_string(),
dns_cache: DnsCacheFixture::new(cache_enabled),
export_attempts: Arc::new(AtomicUsize::new(0)),
}
}
fn export_traces(&self) -> Result<(), String> {
self.export_attempts.fetch_add(1, Ordering::Relaxed);
let hostname = if let Some(start) = self.endpoint.find("://") {
let after_protocol = &self.endpoint[start + 3..];
if let Some(end) = after_protocol.find(':') {
&after_protocol[..end]
} else if let Some(end) = after_protocol.find('/') {
&after_protocol[..end]
} else {
after_protocol
}
} else {
return Err("Invalid endpoint URL".to_string());
};
match self.dns_cache.lookup(hostname) {
DnsResult::Success(_) => {
Ok(())
}
DnsResult::Failure(DnsError::NxDomain) => {
Err(format!("DNS resolution failed: NXDOMAIN for {}", hostname))
}
DnsResult::Failure(DnsError::ServerFailure) => {
Err(format!("DNS server failure for {}", hostname))
}
DnsResult::Failure(DnsError::Timeout) => Err(format!("DNS timeout for {}", hostname)),
}
}
fn get_export_attempts(&self) -> usize {
self.export_attempts.load(Ordering::Relaxed)
}
fn get_dns_lookups(&self) -> usize {
self.dns_cache.get_lookup_count()
}
}
#[test]
fn audit_otlp_dns_failure_caching() {
println!("🔍 AUDIT: OTLP DNS failure caching under repeated export attempts");
println!("📋 DNS failure caching requirements:");
println!(" • DNS failures should be cached (negative cache)");
println!(" • TTL should prevent repeated failed lookups");
println!(" • Exponential backoff should reduce DNS server load");
println!(" • Recovery after cache expiration should be possible");
println!(" • NOT: fresh DNS lookup on every export attempt");
let dns_failure_scenarios = vec![
("http://invalid.example.com:4318/v1/traces", "NXDOMAIN"),
("http://timeout.example.com:4318/v1/traces", "DNS Timeout"),
(
"http://server-fail.example.com:4318/v1/traces",
"Server Failure",
),
];
println!("📊 Testing DNS failure caching scenarios:");
for (endpoint, failure_type) in dns_failure_scenarios {
println!(" Testing: {} ({})", endpoint, failure_type);
let no_cache_exporter = DnsResolvingOtlpExporterFixture::new(endpoint, false);
let cached_exporter = DnsResolvingOtlpExporterFixture::new(endpoint, true);
let export_attempts = 10;
println!(" Making {} export attempts:", export_attempts);
for i in 0..export_attempts {
let _ = no_cache_exporter.export_traces();
if i == 0 {
println!(" Attempt {}: DNS lookup performed", i + 1);
} else if i < 5 {
println!(" Attempt {}: DNS lookup repeated", i + 1);
}
}
for i in 0..export_attempts {
let _ = cached_exporter.export_traces();
if i == 0 {
println!(" Cached attempt {}: DNS lookup performed", i + 1);
} else if i == 1 {
println!(" Cached attempt {}: DNS result cached", i + 1);
}
}
let no_cache_dns_count = no_cache_exporter.get_dns_lookups();
let cached_dns_count = cached_exporter.get_dns_lookups();
println!(" No cache DNS lookups: {}", no_cache_dns_count);
println!(" Cached DNS lookups: {}", cached_dns_count);
if no_cache_dns_count == export_attempts {
println!(" ❌ NO CACHE: DNS lookup on every export (DNS storm)");
} else {
println!(" ✅ NO CACHE: Unexpected behavior");
}
if cached_dns_count == 1 {
println!(" ✅ CACHED: DNS lookup only on first attempt");
} else if cached_dns_count < export_attempts {
println!(" ⚠️ CACHED: Some DNS lookup reduction");
} else {
println!(" ❌ CACHED: No caching benefit");
}
let storm_ratio = no_cache_dns_count as f64 / cached_dns_count as f64;
println!(" DNS storm factor: {:.1}x", storm_ratio);
if storm_ratio >= 5.0 {
println!(" 🚨 DNS STORM: Current implementation creates excessive DNS load");
}
}
}
#[test]
fn audit_dns_cache_ttl_and_recovery() {
println!("🔍 AUDIT: DNS cache TTL expiration and recovery behavior");
println!("📋 DNS cache TTL requirements:");
println!(" • Negative cache should have reasonable TTL (1-5 minutes)");
println!(" • Cache expiration should allow recovery attempts");
println!(" • Failed hostname recovery should be possible");
println!(" • TTL should balance load reduction vs recovery time");
let exporter =
DnsResolvingOtlpExporterFixture::new("http://invalid.example.com:4318/v1/traces", true);
println!("📊 Testing DNS cache TTL behavior:");
let result1 = exporter.export_traces();
println!(" First attempt: {:?}", result1.is_err());
println!(" DNS lookups after first: {}", exporter.get_dns_lookups());
let result2 = exporter.export_traces();
println!(" Second attempt: {:?}", result2.is_err());
println!(
" DNS lookups after second: {}",
exporter.get_dns_lookups()
);
let result3 = exporter.export_traces();
println!(" Third attempt: {:?}", result3.is_err());
let dns_count_before_expiry = exporter.get_dns_lookups();
println!(
" DNS lookups before TTL expiry: {}",
dns_count_before_expiry
);
if dns_count_before_expiry == 1 {
println!(" ✅ TTL BEHAVIOR: Cache prevents redundant DNS lookups");
} else {
println!(" ❌ TTL BEHAVIOR: Cache not working properly");
}
exporter.dns_cache.clear_cache(); println!(" Exercising cache TTL expiration...");
let result4 = exporter.export_traces();
println!(" Post-expiry attempt: {:?}", result4.is_err());
let dns_count_after_expiry = exporter.get_dns_lookups();
println!(" DNS lookups after expiry: {}", dns_count_after_expiry);
if dns_count_after_expiry == 2 {
println!(" ✅ RECOVERY: New DNS lookup after cache expiry");
} else {
println!(" ❌ RECOVERY: DNS lookup behavior incorrect");
}
println!("✅ DNS CACHE TTL AUDIT COMPLETE");
println!("📊 FINDING: TTL expiration enables recovery attempts");
}
#[test]
fn audit_current_otlp_dns_behavior() {
println!("🔍 AUDIT: Current OTLP DNS resolution implementation gaps");
println!("📊 Current implementation analysis:");
println!(" File: src/net/resolve.rs");
println!(" Lines 56-78: lookup_all() function");
println!(" Lines 97-109: resolve_socket_addrs() calls addr.to_socket_addrs()");
println!(" Issue: No DNS caching or failure backoff mechanism");
println!("📋 Current DNS resolution behavior:");
println!(" • Every HTTP request triggers fresh DNS lookup");
println!(" • No positive DNS result caching");
println!(" • No negative DNS result caching (NXDOMAIN, SERVFAIL)");
println!(" • No exponential backoff for failed resolutions");
println!(" • Uses stdlib addr.to_socket_addrs() directly");
println!("📊 DNS storm exercise:");
let failed_exporter =
DnsResolvingOtlpExporterFixture::new("http://invalid.example.com:4318/v1/traces", false);
let burst_count = 50;
println!(" Exercising {} rapid export attempts:", burst_count);
let start_time = Instant::now();
for _i in 0..burst_count {
let _ = failed_exporter.export_traces();
}
let elapsed = start_time.elapsed();
println!(
" Total export attempts: {}",
failed_exporter.get_export_attempts()
);
println!(
" Total DNS lookups: {}",
failed_exporter.get_dns_lookups()
);
println!(" Time elapsed: {:?}", elapsed);
println!(
" DNS lookup rate: {:.1} lookups/sec",
failed_exporter.get_dns_lookups() as f64 / elapsed.as_secs_f64()
);
let dns_ratio =
failed_exporter.get_dns_lookups() as f64 / failed_exporter.get_export_attempts() as f64;
println!(
" DNS lookup ratio: {:.2} (1.0 = lookup per request)",
dns_ratio
);
if dns_ratio >= 0.9 {
println!(" ❌ BEHAVIOR: (b) re-resolve on every export attempt (DNS storm)");
} else if dns_ratio < 0.1 {
println!(" ✅ BEHAVIOR: (a) cache negative result and back off");
} else {
println!(" ⚠️ BEHAVIOR: (c) fail-fast forever (no recovery)");
}
println!("🚨 CURRENT IMPLEMENTATION DEFECTS:");
println!(" • DNS storm: fresh lookup on every export attempt");
println!(" • No negative DNS caching for NXDOMAIN responses");
println!(" • No exponential backoff to reduce DNS server load");
println!(" • Poor performance under sustained DNS failures");
println!("📋 REQUIRED IMPROVEMENTS:");
println!(" 1. Add DNS result cache with TTL (positive and negative)");
println!(" 2. Implement exponential backoff for failed DNS resolutions");
println!(" 3. Add DNS cache configuration to HttpClient");
println!(" 4. Use cached results within TTL window");
println!(" 5. Recovery mechanism after backoff period expires");
println!("📊 DNS resolution best practices:");
println!(" • Positive cache TTL: 5-30 minutes (depends on DNS TTL)");
println!(" • Negative cache TTL: 1-5 minutes (faster recovery)");
println!(" • Exponential backoff: 1s, 2s, 4s, 8s, max 60s");
println!(" • Cache size limit: ~1000 entries with LRU eviction");
println!("✅ DNS BEHAVIOR AUDIT COMPLETE");
println!("🚨 FINDING: Current implementation creates DNS storm under failure");
}
#[test]
fn audit_dns_exponential_backoff() {
println!("🔍 AUDIT: DNS exponential backoff under sustained resolution failures");
println!("📋 DNS backoff requirements:");
println!(" • Initial failure should retry immediately");
println!(" • Subsequent failures should use exponential backoff");
println!(" • Backoff should cap at reasonable maximum (60s)");
println!(" • Success should reset backoff to initial value");
let expected_backoff_sequence = vec![
Duration::from_secs(0), Duration::from_secs(1), Duration::from_secs(2), Duration::from_secs(4), Duration::from_secs(8), Duration::from_secs(16), Duration::from_secs(32), Duration::from_secs(60), Duration::from_secs(60), ];
println!("📊 Expected exponential backoff sequence:");
for (attempt, delay) in expected_backoff_sequence.iter().enumerate() {
if attempt == 0 {
println!(" Attempt {}: immediate", attempt + 1);
} else {
println!(" Attempt {}: {:?} delay", attempt + 1, delay);
}
}
println!("📊 Current implementation (no backoff):");
println!(" • Every export attempt immediately retries DNS");
println!(" • No backoff delay between failed attempts");
println!(" • DNS server receives full request rate under failures");
println!(" • Potential for DNS server rate limiting or blocking");
println!("📊 Improved implementation (with backoff):");
println!(" • Failed DNS lookups trigger exponential backoff");
println!(" • Reduced DNS query frequency under sustained failures");
println!(" • DNS server load decreases significantly");
println!(" • Recovery possible after backoff period");
let sustained_failure_duration = Duration::from_secs(300); let export_interval = Duration::from_secs(10);
let total_exports = sustained_failure_duration.as_secs() / export_interval.as_secs();
println!(
" Sustained failure scenario: {} exports over {:?}",
total_exports, sustained_failure_duration
);
let no_backoff_queries = total_exports; let with_backoff_queries = calculate_backoff_queries(total_exports, &expected_backoff_sequence);
println!(" No backoff DNS queries: {}", no_backoff_queries);
println!(" With backoff DNS queries: {}", with_backoff_queries);
println!(
" DNS load reduction: {:.1}x",
no_backoff_queries as f64 / with_backoff_queries as f64
);
if with_backoff_queries < no_backoff_queries / 2 {
println!(" ✅ BACKOFF: Significant DNS load reduction");
} else {
println!(" ⚠️ BACKOFF: Minimal DNS load reduction");
}
println!("✅ DNS BACKOFF AUDIT COMPLETE");
println!("📊 FINDING: Exponential backoff essential for DNS storm prevention");
}
fn calculate_backoff_queries(total_exports: u64, backoff_sequence: &[Duration]) -> u64 {
let max_backoff_index = backoff_sequence.len() - 1;
let queries_before_max_backoff = max_backoff_index as u64;
let remaining_exports = total_exports.saturating_sub(queries_before_max_backoff);
let max_backoff_interval = backoff_sequence[max_backoff_index].as_secs();
let export_interval = 10;
let queries_at_max_backoff =
remaining_exports / (max_backoff_interval / export_interval).max(1);
queries_before_max_backoff + queries_at_max_backoff
}