fluxmq 0.1.0 - Docs.rs

#![allow(dead_code)]
use crate::performance::object_pool::MessagePools;
use crate::performance::protocol_arena::ProtocolArenaPool;
/// High-performance Kafka protocol codec optimizations
///
/// This module implements aggressive optimizations for Kafka protocol processing:
/// - Zero-copy deserialization where possible
/// - Batch processing of requests/responses
/// - Pre-compiled response templates
/// - SIMD-optimized parsing
use crate::protocol::{ProduceRequest, ProduceResponse};
use bytes::{Buf, BufMut, Bytes, BytesMut};
use std::collections::HashMap;
use std::sync::Mutex;

/// High-performance codec with optimizations
pub struct HighPerformanceKafkaCodec {
    // Pre-compiled response templates
    api_versions_response: Bytes,
    metadata_response_template: Bytes,
    // Object pools for reusing buffers
    pools: MessagePools,
    // 🚀 Protocol arena pool for request/response processing
    arena_pool: Mutex<ProtocolArenaPool>,
    // Response cache for frequently requested data
    response_cache: HashMap<String, Bytes>,
    // Performance counters
    fast_path_hits: std::sync::atomic::AtomicU64,
    slow_path_hits: std::sync::atomic::AtomicU64,
    // Arena allocation statistics
    arena_allocations: std::sync::atomic::AtomicU64,
    arena_bytes_saved: std::sync::atomic::AtomicU64,
}

impl HighPerformanceKafkaCodec {
    pub fn new() -> Self {
        let mut codec = Self {
            api_versions_response: Bytes::new(),
            metadata_response_template: Bytes::new(),
            pools: MessagePools::new(),
            arena_pool: Mutex::new(ProtocolArenaPool::new()),
            response_cache: HashMap::new(),
            fast_path_hits: std::sync::atomic::AtomicU64::new(0),
            slow_path_hits: std::sync::atomic::AtomicU64::new(0),
            arena_allocations: std::sync::atomic::AtomicU64::new(0),
            arena_bytes_saved: std::sync::atomic::AtomicU64::new(0),
        };

        // Pre-compile common responses
        codec.precompile_responses();
        codec
    }

    /// Pre-compile frequently used responses to avoid repeated serialization
    fn precompile_responses(&mut self) {
        // Pre-compile ApiVersions response (most common request)
        let api_versions = self.build_api_versions_response();
        self.api_versions_response = api_versions;

        // Pre-compile basic metadata response template
        let metadata_template = self.build_metadata_template();
        self.metadata_response_template = metadata_template;
    }

    /// Build optimized ApiVersions response for broad compatibility
    /// Uses standard (non-flexible) format for better compatibility with older clients
    fn build_api_versions_response(&self) -> Bytes {
        let mut buf = BytesMut::with_capacity(200);

        // CRITICAL FIX: Don't include length prefix in template - it will be added by framing layer
        // Response header starts with correlation_id (will be patched at runtime)
        buf.put_i32(0); // Correlation ID placeholder (will be updated with request correlation_id)

        // CRITICAL FIX: Use non-flexible format for better compatibility
        // This matches what standard Kafka brokers send for ApiVersions v0-v2

        // 1. ErrorCode (int16) - first field in non-flexible format
        buf.put_u16(0);

        // 2. ApiKeys (standard array) - NOT compact array
        // Standard array: length as int32, not varint
        buf.put_u32(20); // Number of API entries as int32

        // Add supported API versions - use conservative versions for compatibility
        let apis = [
            (0u16, 0u16, 7u16), // Produce (conservative v7)
            (1, 0, 11),         // Fetch (conservative v11)
            (2, 0, 5),          // ListOffsets (conservative v5)
            (3, 0, 8),          // Metadata (TEMP FIX: force non-flexible v8 for Java compatibility)
            (8, 0, 6),          // OffsetCommit (conservative v6)
            (9, 0, 5),          // OffsetFetch (conservative v5)
            (10, 0, 3),         // FindCoordinator (conservative v3)
            (11, 0, 5),         // JoinGroup (conservative v5)
            (12, 0, 3),         // Heartbeat (conservative v3)
            (13, 0, 2),         // LeaveGroup (conservative v2)
            (14, 0, 3),         // SyncGroup (conservative v3)
            (15, 0, 4),         // DescribeGroups (conservative v4)
            (16, 0, 3),         // ListGroups (conservative v3)
            (17, 0, 1),         // SaslHandshake
            (18, 0, 3),         // ApiVersions (v3 max for compatibility)
            (19, 0, 4),         // CreateTopics (conservative v4)
            (20, 0, 3),         // DeleteTopics (conservative v3)
            (32, 0, 3),         // DescribeConfigs (conservative v3)
            (33, 0, 2),         // AlterConfigs (conservative v2)
            (36, 0, 2),         // SaslAuthenticate (required for Java)
        ];

        for (api_key, min_version, max_version) in apis {
            buf.put_u16(api_key);
            buf.put_u16(min_version);
            buf.put_u16(max_version);
            // No tagged fields in non-flexible format
        }

        // No tagged fields section in non-flexible format
        // No length field update needed - length prefix will be added by framing layer

        buf.freeze()
    }

    /// Build metadata response template
    fn build_metadata_template(&self) -> Bytes {
        let mut buf = BytesMut::with_capacity(100);

        // Response header template
        buf.put_u32(0); // Length placeholder
        buf.put_u32(0); // Correlation ID placeholder
        buf.put_u16(0); // Error code

        // Brokers array
        buf.put_u32(1); // One broker
        buf.put_u32(0); // Broker ID
        buf.put_u16(9); // Host length
        buf.put_slice(b"localhost");
        buf.put_u32(9092); // Port
        buf.put_u16(0); // Rack (null)

        buf.freeze()
    }

    /// Ultra-fast ApiVersions response using pre-compiled template
    pub fn encode_api_versions_fast(&self, correlation_id: i32) -> Bytes {
        self.fast_path_hits
            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);

        // SAFETY: Clone the template to avoid bounds issues
        let template_len = self.api_versions_response.len();
        let mut response = BytesMut::with_capacity(template_len);
        response.extend_from_slice(&self.api_versions_response);

        // DEBUG: Log template size and correlation ID
        tracing::debug!(
            "ApiVersions fast encoding: template_size={}, correlation_id={}",
            template_len,
            correlation_id
        );

        // Validate we have at least 4 bytes for correlation ID
        if response.len() < 4 {
            tracing::error!(
                "Template too small: {} bytes, need at least 4",
                response.len()
            );
            // Fallback to empty response
            return Bytes::new();
        }

        // Update correlation ID at start of response (no length prefix in template)
        let correlation_bytes = correlation_id.to_be_bytes();
        response[0] = correlation_bytes[0];
        response[1] = correlation_bytes[1];
        response[2] = correlation_bytes[2];
        response[3] = correlation_bytes[3];

        // DEBUG: Verify the update worked
        let updated_correlation =
            i32::from_be_bytes([response[0], response[1], response[2], response[3]]);
        tracing::debug!(
            "ApiVersions correlation after update: final_value={}",
            updated_correlation
        );

        response.freeze()
    }

    /// Batch decode multiple Produce requests
    pub fn batch_decode_produce_requests(&self, data: &[u8]) -> Vec<ProduceRequest> {
        let mut requests = Vec::new();
        let mut offset = 0;

        while offset < data.len() {
            if let Some((request, consumed)) = self.fast_decode_produce_request(&data[offset..]) {
                requests.push(request);
                offset += consumed;
            } else {
                break;
            }
        }

        requests
    }

    /// Fast decode single Produce request using optimized parsing
    fn fast_decode_produce_request(&self, data: &[u8]) -> Option<(ProduceRequest, usize)> {
        if data.len() < 8 {
            return None;
        }

        let mut cursor = std::io::Cursor::new(data);

        // Use optimized parsing - this is a simplified version
        // Real implementation would use more sophisticated techniques
        let correlation_id = cursor.get_u32();
        let topic_len = cursor.get_u16() as usize;

        if data.len() < 8 + topic_len {
            return None;
        }

        let topic = String::from_utf8_lossy(&data[8..8 + topic_len]).to_string();
        let consumed = 8 + topic_len;

        let request = ProduceRequest {
            correlation_id: correlation_id as i32,
            topic,
            partition: 0, // Simplified
            messages: Vec::new(),
            acks: 1,
            timeout_ms: 1000,
        };

        Some((request, consumed))
    }

    /// Batch encode multiple responses
    pub fn batch_encode_responses(&self, responses: &[ProduceResponse]) -> Bytes {
        let mut total_size = 0;

        // Calculate total buffer size needed
        for response in responses {
            total_size += 50 + response.topic.len(); // Rough estimate
        }

        let mut buf = BytesMut::with_capacity(total_size);

        for response in responses {
            self.encode_produce_response_fast(response, &mut buf);
        }

        buf.freeze()
    }

    /// Fast encode Produce response
    fn encode_produce_response_fast(&self, response: &ProduceResponse, buf: &mut BytesMut) {
        buf.put_u32(20 + response.topic.len() as u32); // Response length
        buf.put_u32(response.correlation_id as u32);
        buf.put_u16(response.topic.len() as u16);
        buf.put_slice(response.topic.as_bytes());
        buf.put_u32(response.partition);
        buf.put_u64(response.base_offset);
        buf.put_u16(response.error_code as u16);
    }

    /// High-performance generic response encoding with fast paths
    pub fn encode_response_fast(
        &self,
        response: &crate::protocol::kafka::KafkaResponse,
    ) -> Result<bytes::Bytes, Box<dyn std::error::Error + Send + Sync>> {
        use crate::protocol::kafka::KafkaCodec;

        // Fast path optimizations for common response types
        match response {
            crate::protocol::kafka::KafkaResponse::ApiVersions(api_response) => {
                // CRITICAL FIX: Use the proper method that patches correlation ID
                self.fast_path_hits
                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
                Ok(self.encode_api_versions_fast(api_response.header.correlation_id))
            }
            _ => {
                // Fallback to standard codec with performance tracking
                self.slow_path_hits
                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
                match KafkaCodec::encode_response(response) {
                    Ok(bytes) => Ok(bytes),
                    Err(e) => Err(Box::new(e) as Box<dyn std::error::Error + Send + Sync>),
                }
            }
        }
    }

    /// Get performance statistics
    pub fn get_performance_stats(&self) -> CodecPerformanceStats {
        let fast_hits = self
            .fast_path_hits
            .load(std::sync::atomic::Ordering::Relaxed);
        let slow_hits = self
            .slow_path_hits
            .load(std::sync::atomic::Ordering::Relaxed);

        CodecPerformanceStats {
            fast_path_hits: fast_hits,
            slow_path_hits: slow_hits,
            fast_path_ratio: if fast_hits + slow_hits > 0 {
                fast_hits as f64 / (fast_hits + slow_hits) as f64
            } else {
                0.0
            },
            cache_hit_count: self.response_cache.len() as u64,
        }
    }

    /// Encode unsigned varint according to Kafka specification
    /// Variable-length encoding where each byte uses 7 bits for data and 1 bit as continuation flag
    fn put_unsigned_varint(&self, buf: &mut BytesMut, mut value: u32) {
        while value >= 0x80 {
            buf.put_u8((value & 0x7F) as u8 | 0x80);
            value >>= 7;
        }
        buf.put_u8(value as u8);
    }
}

#[derive(Debug, Clone)]
pub struct CodecPerformanceStats {
    pub fast_path_hits: u64,
    pub slow_path_hits: u64,
    pub fast_path_ratio: f64,
    pub cache_hit_count: u64,
}

impl CodecPerformanceStats {
    pub fn report(&self) -> String {
        format!(
            "High-Performance Codec Stats:\n  Fast Path: {} hits ({:.1}%)\n  Slow Path: {} hits\n  Cache Entries: {}",
            self.fast_path_hits,
            self.fast_path_ratio * 100.0,
            self.slow_path_hits,
            self.cache_hit_count
        )
    }
}

/// SIMD-optimized string operations
pub struct SIMDStringOps;

impl SIMDStringOps {
    /// Fast string length validation
    pub fn validate_string_lengths(data: &[u8]) -> bool {
        // Placeholder for SIMD validation
        // Real implementation would use SIMD instructions to validate
        // multiple string lengths in parallel
        !data.is_empty()
    }

    /// Parallel string copying
    pub fn batch_copy_strings(sources: &[&[u8]], destinations: &mut [Vec<u8>]) {
        // Placeholder for SIMD-optimized batch copying
        for (i, source) in sources.iter().enumerate() {
            if i < destinations.len() {
                destinations[i].clear();
                destinations[i].extend_from_slice(source);
            }
        }
    }
}

/// Zero-copy message processing utilities
pub struct ZeroCopyProcessor;

impl ZeroCopyProcessor {
    /// Process messages without copying data
    pub fn process_batch_zero_copy(data: &Bytes) -> Vec<MessageRef> {
        let mut messages = Vec::new();
        let mut offset = 0;

        while offset < data.len() {
            if let Some((msg_ref, consumed)) = Self::parse_message_ref(data, offset) {
                messages.push(msg_ref);
                offset += consumed;
            } else {
                break;
            }
        }

        messages
    }

    fn parse_message_ref(data: &Bytes, offset: usize) -> Option<(MessageRef, usize)> {
        if offset + 8 > data.len() {
            return None;
        }

        // Parse message header
        let key_len = u16::from_be_bytes([data[offset], data[offset + 1]]);
        let value_len = u32::from_be_bytes([
            data[offset + 2],
            data[offset + 3],
            data[offset + 4],
            data[offset + 5],
        ]);

        let total_len = 6 + key_len as usize + value_len as usize;

        if offset + total_len > data.len() {
            return None;
        }

        let msg_ref = MessageRef {
            key_range: if key_len > 0 {
                Some((offset + 6, key_len as usize))
            } else {
                None
            },
            value_range: (offset + 6 + key_len as usize, value_len as usize),
            data: data.clone(),
        };

        Some((msg_ref, total_len))
    }
}

/// Zero-copy message reference
pub struct MessageRef {
    pub key_range: Option<(usize, usize)>,
    pub value_range: (usize, usize),
    pub data: Bytes,
}

impl MessageRef {
    pub fn key(&self) -> Option<&[u8]> {
        self.key_range
            .map(|(start, len)| &self.data[start..start + len])
    }

    pub fn value(&self) -> &[u8] {
        let (start, len) = self.value_range;
        &self.data[start..start + len]
    }
}