1use crate::event;
6use oxicode::{Decode, Encode};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::fmt;
10
11#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Encode, Decode)]
13pub struct TopicName(String);
14
15impl TopicName {
16 pub fn new(name: String) -> Self {
17 Self(name)
18 }
19
20 pub fn as_str(&self) -> &str {
21 &self.0
22 }
23}
24
25impl fmt::Display for TopicName {
26 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27 write!(f, "{}", self.0)
28 }
29}
30
31impl From<&str> for TopicName {
32 fn from(s: &str) -> Self {
33 Self(s.to_string())
34 }
35}
36
37impl From<String> for TopicName {
38 fn from(s: String) -> Self {
39 Self(s)
40 }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Encode, Decode)]
45pub struct PartitionId(u32);
46
47impl PartitionId {
48 pub fn new(id: u32) -> Self {
49 Self(id)
50 }
51
52 pub fn value(&self) -> u32 {
53 self.0
54 }
55}
56
57impl fmt::Display for PartitionId {
58 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59 write!(f, "{}", self.0)
60 }
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Encode, Decode)]
65pub struct Offset(u64);
66
67impl Offset {
68 pub fn new(offset: u64) -> Self {
69 Self(offset)
70 }
71
72 pub fn value(&self) -> u64 {
73 self.0
74 }
75}
76
77impl fmt::Display for Offset {
78 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
79 write!(f, "{}", self.0)
80 }
81}
82
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Encode, Decode)]
85pub enum StreamPosition {
86 Beginning,
88 End,
90 Offset(u64),
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct EventMetadata {
97 pub source: String,
99 pub user: Option<String>,
101 pub session_id: Option<String>,
103 pub trace_id: Option<String>,
105 pub causality_token: Option<String>,
107 pub version: Option<String>,
109
110 pub timestamp: chrono::DateTime<chrono::Utc>,
113 pub operation_context: Option<OperationContext>,
115 pub priority: EventPriority,
117 pub partition: Option<PartitionId>,
119 pub correlation_id: Option<String>,
121 pub checksum: Option<String>,
123 pub schema_version: String,
125 pub tags: HashMap<String, String>,
127 pub ttl_seconds: Option<u64>,
129 pub compression: Option<CompressionType>,
131 pub serialization_format: SerializationFormat,
133 pub message_size: Option<usize>,
135 pub processing_hints: ProcessingHints,
137}
138
139impl From<EventMetadata> for event::EventMetadata {
141 fn from(metadata: EventMetadata) -> Self {
142 Self {
143 event_id: format!(
144 "evt_{}",
145 chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)
146 ), timestamp: metadata.timestamp,
148 source: metadata.source,
149 user: metadata.user,
150 context: metadata.operation_context.map(|ctx| ctx.operation_type),
151 caused_by: metadata.causality_token,
152 version: metadata.version.unwrap_or(metadata.schema_version),
153 properties: HashMap::new(), checksum: metadata.checksum,
155 }
156 }
157}
158
159impl From<event::EventMetadata> for EventMetadata {
161 fn from(metadata: event::EventMetadata) -> Self {
162 Self {
163 source: metadata.source,
164 user: metadata.user,
165 session_id: None,
166 trace_id: None,
167 causality_token: metadata.caused_by,
168 version: Some(metadata.version),
169 timestamp: metadata.timestamp,
170 operation_context: metadata.context.map(|ctx| OperationContext {
171 operation_type: ctx,
172 request_id: None,
173 client_info: None,
174 metrics: None,
175 auth_context: None,
176 custom_fields: HashMap::new(),
177 }),
178 priority: EventPriority::Normal,
179 partition: None,
180 correlation_id: None,
181 checksum: metadata.checksum,
182 schema_version: "1.0".to_string(),
183 tags: metadata.properties,
184 ttl_seconds: None,
185 compression: None,
186 serialization_format: SerializationFormat::Json,
187 message_size: None,
188 processing_hints: ProcessingHints::default(),
189 }
190 }
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct OperationContext {
196 pub operation_type: String,
198 pub request_id: Option<String>,
200 pub client_info: Option<ClientInfo>,
202 pub metrics: Option<PerformanceMetrics>,
204 pub auth_context: Option<AuthContext>,
206 pub custom_fields: HashMap<String, String>,
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
212pub struct ClientInfo {
213 pub application: String,
215 pub version: String,
217 pub ip_address: Option<String>,
219 pub user_agent: Option<String>,
221 pub location: Option<GeoLocation>,
223}
224
225#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
227pub struct GeoLocation {
228 pub country: String,
230 pub region: Option<String>,
232 pub city: Option<String>,
234 pub lat: Option<f64>,
236 pub lon: Option<f64>,
238}
239
240#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
242pub struct PerformanceMetrics {
243 pub processing_latency_us: Option<u64>,
245 pub queue_wait_time_us: Option<u64>,
247 pub serialization_time_us: Option<u64>,
249 pub network_latency_us: Option<u64>,
251 pub memory_usage_bytes: Option<u64>,
253 pub cpu_time_us: Option<u64>,
255}
256
257#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct AuthContext {
260 pub user_id: String,
262 pub roles: Vec<String>,
264 pub permissions: Vec<String>,
266 pub auth_method: String,
268 pub token_expires_at: Option<chrono::DateTime<chrono::Utc>>,
270}
271
272#[derive(
274 Debug,
275 Clone,
276 Copy,
277 PartialEq,
278 Eq,
279 PartialOrd,
280 Ord,
281 Serialize,
282 Deserialize,
283 Default,
284 Encode,
285 Decode,
286)]
287pub enum EventPriority {
288 Low = 0,
289 #[default]
290 Normal = 1,
291 High = 2,
292 Critical = 3,
293}
294
295#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, Encode, Decode)]
297pub enum CompressionType {
298 #[default]
299 None,
300 Gzip,
301 Lz4,
302 Zstd,
303 Snappy,
304 Brotli,
305}
306
307#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, Encode, Decode)]
309pub enum SerializationFormat {
310 #[default]
311 Json,
312 MessagePack,
313 Protobuf,
314 Avro,
315 Cbor,
316 Bincode,
317}
318
319#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
321pub struct ProcessingHints {
322 pub allow_out_of_order: bool,
324 pub allow_deduplication: bool,
326 pub batch_preference: BatchPreference,
328 pub consistency_level: ConsistencyLevel,
330 pub retry_policy: RetryPolicy,
332 pub processing_timeout_ms: Option<u64>,
334}
335
336#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Encode, Decode)]
338pub enum BatchPreference {
339 Immediate,
341 Batchable,
343 RequiredBatch,
345}
346
347#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Encode, Decode)]
349pub enum ConsistencyLevel {
350 Eventual,
352 PerPartition,
354 Strong,
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
360pub struct RetryPolicy {
361 pub max_retries: u32,
363 pub base_delay_ms: u64,
365 pub max_delay_ms: u64,
367 pub backoff_multiplier: f64,
369 pub use_jitter: bool,
371}
372
373impl Default for EventMetadata {
374 fn default() -> Self {
375 Self {
376 source: "oxirs-stream".to_string(),
377 user: None,
378 session_id: None,
379 trace_id: None,
380 causality_token: None,
381 version: Some("1.0".to_string()),
382 timestamp: chrono::Utc::now(),
383 operation_context: None,
384 priority: EventPriority::Normal,
385 partition: None,
386 correlation_id: None,
387 checksum: None,
388 schema_version: "1.0".to_string(),
389 tags: HashMap::new(),
390 ttl_seconds: None,
391 compression: None,
392 serialization_format: SerializationFormat::Json,
393 message_size: None,
394 processing_hints: ProcessingHints::default(),
395 }
396 }
397}
398
399impl Default for ProcessingHints {
400 fn default() -> Self {
401 Self {
402 allow_out_of_order: false,
403 allow_deduplication: true,
404 batch_preference: BatchPreference::Batchable,
405 consistency_level: ConsistencyLevel::PerPartition,
406 retry_policy: RetryPolicy::default(),
407 processing_timeout_ms: Some(30000), }
409 }
410}
411
412impl Default for RetryPolicy {
413 fn default() -> Self {
414 Self {
415 max_retries: 3,
416 base_delay_ms: 100,
417 max_delay_ms: 10000,
418 backoff_multiplier: 2.0,
419 use_jitter: true,
420 }
421 }
422}
423
424pub mod serialization {
426 use super::*;
427 use anyhow::{anyhow, Result};
428
429 pub fn serialize_metadata(
431 metadata: &EventMetadata,
432 format: SerializationFormat,
433 ) -> Result<Vec<u8>> {
434 match format {
435 SerializationFormat::Json => {
436 serde_json::to_vec(metadata).map_err(|e| anyhow!("JSON serialization failed: {e}"))
437 }
438 SerializationFormat::MessagePack => rmp_serde::to_vec(metadata)
439 .map_err(|e| anyhow!("MessagePack serialization failed: {e}")),
440 SerializationFormat::Cbor => {
441 serde_cbor::to_vec(metadata).map_err(|e| anyhow!("CBOR serialization failed: {e}"))
442 }
443 SerializationFormat::Bincode => {
444 oxicode::serde::encode_to_vec(metadata, oxicode::config::standard())
445 .map_err(|e| anyhow!("Bincode serialization failed: {e}"))
446 }
447 SerializationFormat::Protobuf | SerializationFormat::Avro => {
448 serde_json::to_vec(metadata)
451 .map_err(|e| anyhow!("Protobuf/Avro serialization fallback failed: {e}"))
452 }
453 }
454 }
455
456 pub fn deserialize_metadata(data: &[u8], format: SerializationFormat) -> Result<EventMetadata> {
458 match format {
459 SerializationFormat::Json => serde_json::from_slice(data)
460 .map_err(|e| anyhow!("JSON deserialization failed: {e}")),
461 SerializationFormat::MessagePack => rmp_serde::from_slice(data)
462 .map_err(|e| anyhow!("MessagePack deserialization failed: {e}")),
463 SerializationFormat::Cbor => serde_cbor::from_slice(data)
464 .map_err(|e| anyhow!("CBOR deserialization failed: {e}")),
465 SerializationFormat::Bincode => {
466 oxicode::serde::decode_from_slice(data, oxicode::config::standard())
467 .map(|(v, _)| v)
468 .map_err(|e| anyhow!("Bincode deserialization failed: {e}"))
469 }
470 SerializationFormat::Protobuf | SerializationFormat::Avro => {
471 serde_json::from_slice(data)
474 .map_err(|e| anyhow!("Protobuf/Avro deserialization fallback failed: {e}"))
475 }
476 }
477 }
478
479 pub fn compress_data(data: &[u8], compression: CompressionType) -> Result<Vec<u8>> {
481 match compression {
482 CompressionType::None => Ok(data.to_vec()),
483 CompressionType::Gzip => {
484 use flate2::write::GzEncoder;
485 use flate2::Compression;
486 use std::io::Write;
487
488 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
489 encoder.write_all(data)?;
490 Ok(encoder.finish()?)
491 }
492 CompressionType::Lz4 => Ok(lz4_flex::compress_prepend_size(data)),
493 CompressionType::Zstd => {
494 zstd::bulk::compress(data, 0).map_err(|e| anyhow!("Zstd compression failed: {e}"))
495 }
496 CompressionType::Snappy => Ok(snap::raw::Encoder::new().compress_vec(data)?),
497 CompressionType::Brotli => {
498 use brotli::CompressorWriter;
499 use std::io::Write;
500 let mut compressed = Vec::new();
501 {
502 let mut compressor = CompressorWriter::new(&mut compressed, 4096, 6, 22);
503 compressor.write_all(data)?;
504 } Ok(compressed)
506 }
507 }
508 }
509
510 pub fn decompress_data(data: &[u8], compression: CompressionType) -> Result<Vec<u8>> {
512 match compression {
513 CompressionType::None => Ok(data.to_vec()),
514 CompressionType::Gzip => {
515 use flate2::read::GzDecoder;
516 use std::io::Read;
517
518 let mut decoder = GzDecoder::new(data);
519 let mut decompressed = Vec::new();
520 decoder.read_to_end(&mut decompressed)?;
521 Ok(decompressed)
522 }
523 CompressionType::Lz4 => lz4_flex::decompress_size_prepended(data)
524 .map_err(|e| anyhow!("LZ4 decompression failed: {e}")),
525 CompressionType::Zstd => {
526 zstd::bulk::decompress(data, 1024 * 1024) .map_err(|e| anyhow!("Zstd decompression failed: {e}"))
528 }
529 CompressionType::Snappy => snap::raw::Decoder::new()
530 .decompress_vec(data)
531 .map_err(|e| anyhow!("Snappy decompression failed: {e}")),
532 CompressionType::Brotli => {
533 use std::io::Read;
534 let mut decompressed = Vec::new();
535 let mut decompressor = brotli::Decompressor::new(data, 4096);
536 decompressor.read_to_end(&mut decompressed)?;
537 Ok(decompressed)
538 }
539 }
540 }
541}
542
543pub mod processing {
545 use super::*;
546 use std::time::{Duration, Instant};
547
548 pub struct EventProcessor {
550 pub deduplication_cache: std::collections::HashSet<String>,
551 pub batch_buffer: Vec<(crate::event::StreamEvent, EventMetadata)>,
552 pub last_flush: Instant,
553 pub flush_interval: Duration,
554 }
555
556 impl Default for EventProcessor {
557 fn default() -> Self {
558 Self::new()
559 }
560 }
561
562 impl EventProcessor {
563 pub fn new() -> Self {
564 Self {
565 deduplication_cache: std::collections::HashSet::new(),
566 batch_buffer: Vec::new(),
567 last_flush: Instant::now(),
568 flush_interval: Duration::from_millis(100),
569 }
570 }
571
572 pub fn process_event(
574 &mut self,
575 mut event: crate::event::StreamEvent,
576 ) -> anyhow::Result<Option<crate::event::StreamEvent>> {
577 let metadata = self.extract_metadata(&event)?;
579 let enhanced_metadata = self.enhance_metadata(metadata)?;
580
581 if enhanced_metadata.processing_hints.allow_deduplication {
583 if let Some(correlation_id) = &enhanced_metadata.correlation_id {
584 if self.deduplication_cache.contains(correlation_id) {
585 return Ok(None); }
587 self.deduplication_cache.insert(correlation_id.clone());
588 }
589 }
590
591 self.update_event_metadata(&mut event, enhanced_metadata)?;
593
594 match self.get_batch_preference(&event) {
596 BatchPreference::Immediate => Ok(Some(event)),
597 BatchPreference::Batchable | BatchPreference::RequiredBatch => {
598 self.add_to_batch(event);
599
600 if self.should_flush_batch() {
602 Ok(self.batch_buffer.last().map(|(e, _)| e.clone()))
605 } else {
606 Ok(None)
607 }
608 }
609 }
610 }
611
612 fn extract_metadata(
613 &self,
614 event: &crate::event::StreamEvent,
615 ) -> anyhow::Result<EventMetadata> {
616 match event {
618 crate::event::StreamEvent::TripleAdded { metadata, .. } => {
619 Ok(metadata.clone().into())
620 }
621 crate::event::StreamEvent::TripleRemoved { metadata, .. } => {
622 Ok(metadata.clone().into())
623 }
624 crate::event::StreamEvent::GraphCreated { metadata, .. } => {
625 Ok(metadata.clone().into())
626 }
627 crate::event::StreamEvent::SparqlUpdate { metadata, .. } => {
628 Ok(metadata.clone().into())
629 }
630 crate::event::StreamEvent::TransactionBegin { metadata, .. } => {
631 Ok(metadata.clone().into())
632 }
633 crate::event::StreamEvent::Heartbeat { metadata, .. } => {
634 Ok(metadata.clone().into())
635 }
636 _ => Ok(EventMetadata::default()),
637 }
638 }
639
640 fn enhance_metadata(&self, mut metadata: EventMetadata) -> anyhow::Result<EventMetadata> {
641 if metadata.timestamp == chrono::DateTime::<chrono::Utc>::MIN_UTC {
643 metadata.timestamp = chrono::Utc::now();
644 }
645
646 if metadata.correlation_id.is_none() {
648 metadata.correlation_id = Some(uuid::Uuid::new_v4().to_string());
649 }
650
651 if metadata.schema_version.is_empty() {
653 metadata.schema_version = "1.0".to_string();
654 }
655
656 if metadata.operation_context.is_none() {
658 metadata.operation_context = Some(OperationContext {
659 operation_type: "stream_event".to_string(),
660 request_id: Some(uuid::Uuid::new_v4().to_string()),
661 client_info: None,
662 metrics: Some(PerformanceMetrics {
663 processing_latency_us: Some(0),
664 queue_wait_time_us: Some(0),
665 serialization_time_us: Some(0),
666 network_latency_us: Some(0),
667 memory_usage_bytes: Some(0),
668 cpu_time_us: Some(0),
669 }),
670 auth_context: None,
671 custom_fields: HashMap::new(),
672 });
673 }
674
675 Ok(metadata)
676 }
677
678 fn update_event_metadata(
679 &self,
680 event: &mut crate::event::StreamEvent,
681 metadata: EventMetadata,
682 ) -> anyhow::Result<()> {
683 let event_metadata = event::EventMetadata::from(metadata);
684 match event {
685 crate::event::StreamEvent::TripleAdded { metadata: m, .. } => *m = event_metadata,
686 crate::event::StreamEvent::TripleRemoved { metadata: m, .. } => *m = event_metadata,
687 crate::event::StreamEvent::GraphCreated { metadata: m, .. } => *m = event_metadata,
688 crate::event::StreamEvent::SparqlUpdate { metadata: m, .. } => *m = event_metadata,
689 crate::event::StreamEvent::TransactionBegin { metadata: m, .. } => {
690 *m = event_metadata
691 }
692 crate::event::StreamEvent::Heartbeat { metadata: m, .. } => *m = event_metadata,
693 _ => {}
694 }
695 Ok(())
696 }
697
698 fn get_batch_preference(&self, event: &crate::event::StreamEvent) -> BatchPreference {
699 match event {
700 crate::event::StreamEvent::Heartbeat { .. } => BatchPreference::Immediate,
701 crate::event::StreamEvent::TransactionBegin { .. } => BatchPreference::Immediate,
702 crate::event::StreamEvent::TransactionCommit { .. } => BatchPreference::Immediate,
703 crate::event::StreamEvent::TransactionAbort { .. } => BatchPreference::Immediate,
704 _ => BatchPreference::Batchable,
705 }
706 }
707
708 fn add_to_batch(&mut self, event: crate::event::StreamEvent) {
709 let metadata = self.extract_metadata(&event).unwrap_or_default();
710 self.batch_buffer.push((event, metadata));
711 }
712
713 fn should_flush_batch(&self) -> bool {
714 self.batch_buffer.len() >= 100 || self.last_flush.elapsed() >= self.flush_interval
715 }
716 }
717
718 #[cfg(test)]
719 mod tests {
720 use super::*;
721 use crate::types::serialization::{compress_data, decompress_data};
722
723 #[test]
724 fn test_compression_round_trip() {
725 let test_data = b"Hello, World! This is a test message for compression.";
726 let compression_types = vec![
727 CompressionType::None,
728 CompressionType::Gzip,
729 CompressionType::Lz4,
730 CompressionType::Zstd,
731 CompressionType::Snappy,
732 CompressionType::Brotli,
733 ];
734
735 for compression in compression_types {
736 let compressed = compress_data(test_data, compression).unwrap();
737 let decompressed = decompress_data(&compressed, compression).unwrap();
738 assert_eq!(
739 test_data,
740 decompressed.as_slice(),
741 "Failed round-trip for {compression:?}"
742 );
743 }
744 }
745
746 #[test]
747 fn test_compression_effectiveness() {
748 let test_data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; let compression_types = vec![
750 CompressionType::Gzip,
751 CompressionType::Lz4,
752 CompressionType::Zstd,
753 CompressionType::Snappy,
754 CompressionType::Brotli,
755 ];
756
757 for compression in compression_types {
758 let compressed = compress_data(test_data, compression).unwrap();
759 assert!(
761 compressed.len() < test_data.len(),
762 "Compression {compression:?} did not reduce size"
763 );
764 }
765 }
766
767 #[test]
768 fn test_empty_data_compression() {
769 let test_data = b"";
770 let compression_types = vec![
771 CompressionType::None,
772 CompressionType::Gzip,
773 CompressionType::Lz4,
774 CompressionType::Zstd,
775 CompressionType::Snappy,
776 CompressionType::Brotli,
777 ];
778
779 for compression in compression_types {
780 let compressed = compress_data(test_data, compression).unwrap();
781 let decompressed = decompress_data(&compressed, compression).unwrap();
782 assert_eq!(
783 test_data,
784 decompressed.as_slice(),
785 "Failed empty data round-trip for {compression:?}"
786 );
787 }
788 }
789
790 #[test]
791 fn test_large_data_compression() {
792 let test_data = vec![42u8; 10000]; let compression_types = vec![
794 CompressionType::None,
795 CompressionType::Gzip,
796 CompressionType::Lz4,
797 CompressionType::Zstd,
798 CompressionType::Snappy,
799 CompressionType::Brotli,
800 ];
801
802 for compression in compression_types {
803 let compressed = compress_data(&test_data, compression).unwrap();
804 let decompressed = decompress_data(&compressed, compression).unwrap();
805 assert_eq!(
806 test_data, decompressed,
807 "Failed large data round-trip for {compression:?}"
808 );
809 }
810 }
811
812 #[test]
813 fn test_random_data_compression() {
814 use scirs2_core::random::{Random, Rng};
815 let mut random_gen = Random::default();
816 let test_data: Vec<u8> = (0..1000).map(|_| random_gen.random()).collect();
817 let compression_types = vec![
818 CompressionType::None,
819 CompressionType::Gzip,
820 CompressionType::Lz4,
821 CompressionType::Zstd,
822 CompressionType::Snappy,
823 CompressionType::Brotli,
824 ];
825
826 for compression in compression_types {
827 let compressed = compress_data(&test_data, compression).unwrap();
828 let decompressed = decompress_data(&compressed, compression).unwrap();
829 assert_eq!(
830 test_data, decompressed,
831 "Failed random data round-trip for {compression:?}"
832 );
833 }
834 }
835 }
836}