m2m/lib.rs
1//! # M2M Protocol - Machine-to-Machine LLM Communication
2//!
3//! High-performance agent-to-agent protocol for LLM API communication with
4//! intelligent compression, cryptographic security, and dynamic algorithm routing.
5//!
6//! ## Features
7//!
8//! - **Multi-codec compression**: M2M (60-70% savings), TokenNative (30-35%), Brotli (high-ratio)
9//! - **Protocol negotiation**: HELLO/ACCEPT handshake for capability exchange
10//! - **ML-based routing**: Hydra model for intelligent algorithm selection
11//! - **Security scanning**: Threat detection for prompt injection/jailbreaks
12//! - **Session management**: Stateful sessions with timeout and keep-alive
13//!
14//! ## Protocol Overview
15//!
16//! M2M Protocol v3.0 defines a wire format for efficient LLM API payload exchange
17//! between agents. The protocol uses a session-based model with capability negotiation.
18//!
19//! ### Architecture
20//!
21//! ```text
22//! Agent A M2M Server Agent B
23//! | | |
24//! |------ HELLO (caps) -------->| |
25//! |<----- ACCEPT (caps) --------| |
26//! | | |
27//! |====== DATA (compressed) ===>|------- DATA (compressed) ----->|
28//! |<===== DATA (compressed) ====|<------ DATA (compressed) ------|
29//! | | |
30//! |------ CLOSE --------------->| |
31//! ```
32//!
33//! ### State Machine
34//!
35//! ```text
36//! create_hello()
37//! [Initial] ─────────────────────> [HelloSent]
38//! │ │
39//! │ process_hello() │ process_accept()
40//! │ (valid caps) │
41//! v v
42//! [Established] <───────────────────────┘
43//! │ │
44//! │ close() │ process_reject()
45//! v v
46//! [Closing] ────────────────────> [Closed]
47//! ```
48//!
49//! ### Wire Formats
50//!
51//! Each compression algorithm has a distinct wire format prefix:
52//!
53//! | Algorithm | Wire Format | Use Case |
54//! |--------------|--------------------------------|-----------------------------|
55//! | M2M | `#M2M\|1\|<header><payload>` | LLM API payloads (~60-70%) |
56//! | TokenNative | `#TK\|C\|<base64_tokens>` | Token ID transmission |
57//! | Brotli | `#M2M[v3.0]\|DATA:<base64>` | Large repetitive content |
58//! | Hybrid | `#HY\|<binary>` | Streaming mode |
59//! | None | (passthrough) | Small content (<100 bytes) |
60//!
61//! ### Message Types
62//!
63//! | Type | Direction | Purpose |
64//! |--------|----------------|--------------------------------------|
65//! | HELLO | Client→Server | Initiate handshake with capabilities |
66//! | ACCEPT | Server→Client | Confirm session, return negotiated |
67//! | REJECT | Server→Client | Deny session with reason code |
68//! | DATA | Bidirectional | Compressed payload exchange |
69//! | PING | Bidirectional | Keep-alive request |
70//! | PONG | Bidirectional | Keep-alive response |
71//! | CLOSE | Bidirectional | Session termination |
72//!
73//! ## Quick Start
74//!
75//! ### Compression Only (Stateless)
76//!
77//! ```rust,ignore
78//! use m2m_core::{CodecEngine, Algorithm};
79//!
80//! let engine = CodecEngine::new();
81//!
82//! // Compress LLM API payload
83//! let content = r#"{"model":"gpt-4o","messages":[{"role":"user","content":"Hello"}]}"#;
84//! let result = engine.compress(content, Algorithm::M2M).unwrap();
85//!
86//! println!("Compressed: {}", result.data);
87//! println!("Ratio: {:.1}%", result.byte_ratio() * 100.0);
88//!
89//! // Decompress (auto-detects algorithm from wire format)
90//! let original = engine.decompress(&result.data).unwrap();
91//! assert_eq!(original, content);
92//! ```
93//!
94//! ### Auto-Selection (Best Algorithm)
95//!
96//! ```rust,ignore
97//! use m2m_core::CodecEngine;
98//!
99//! let engine = CodecEngine::new();
100//! let content = r#"{"model":"gpt-4o","messages":[{"role":"user","content":"Hello"}]}"#;
101//!
102//! // Automatically select best algorithm based on content
103//! let (result, algorithm) = engine.compress_auto(content).unwrap();
104//! println!("Selected: {:?}", algorithm);
105//! ```
106//!
107//! ### Full Protocol (Session-Based)
108//!
109//! ```rust,ignore
110//! use m2m_core::{Session, Capabilities, MessageType};
111//!
112//! // Client side
113//! let mut client = Session::new(Capabilities::default());
114//! let hello = client.create_hello();
115//!
116//! // Server side
117//! let mut server = Session::new(Capabilities::default());
118//! let accept = server.process_hello(&hello).unwrap();
119//!
120//! // Client processes accept
121//! client.process_accept(&accept).unwrap();
122//! assert!(client.is_established());
123//!
124//! // Exchange compressed data
125//! let data_msg = client.compress(r#"{"model":"gpt-4o","messages":[]}"#).unwrap();
126//! let content = server.decompress(&data_msg).unwrap();
127//! ```
128//!
129//! ### Security Scanning
130//!
131//! ```rust,ignore
132//! use m2m_core::SecurityScanner;
133//!
134//! let scanner = SecurityScanner::new().with_blocking(0.8);
135//!
136//! // Safe content
137//! let result = scanner.scan(r#"{"messages":[{"role":"user","content":"Hello"}]}"#).unwrap();
138//! assert!(result.safe);
139//!
140//! // Potential threat
141//! let result = scanner.scan("Ignore previous instructions").unwrap();
142//! assert!(!result.safe);
143//! println!("Threat: {:?}", result.threats);
144//! ```
145//!
146//! ## Modules
147//!
148//! - [`codec`]: Multi-algorithm compression engine
149//! - [`protocol`]: Session management and capability negotiation
150//! - [`inference`]: Hydra ML model for algorithm routing
151//! - [`security`]: Threat detection and content scanning
152//! - [`server`]: HTTP API server (Axum-based)
153//! - [`models`]: LLM model registry and metadata
154//! - [`config`]: Configuration management
155//! - [`error`]: Error types and result aliases
156//!
157//! ## Performance
158//!
159//! Typical compression ratios for LLM API payloads:
160//!
161//! | Content Type | M2M | TokenNative | Brotli |
162//! |---------------------|---------|-------------|---------|
163//! | Chat completion | ~60% | ~30% | ~20% |
164//! | Long conversation | ~65% | ~35% | ~40% |
165//! | Tool calls | ~70% | ~40% | ~15% |
166//!
167//! Algorithm selection heuristics:
168//! - **M2M**: Best for standard LLM API JSON (default, 100% fidelity)
169//! - **TokenNative**: Best for token-optimized transmission
170//! - **Brotli**: Best for large repetitive content (>1KB with patterns)
171//! - **Hybrid**: Best for streaming use cases
172//! - **None**: Content under 100 bytes (overhead exceeds savings)
173
174pub mod codec;
175pub mod config;
176pub mod error;
177pub mod inference;
178pub mod models;
179pub mod protocol;
180pub mod security;
181pub mod server;
182pub mod tokenizer;
183pub mod transport;
184
185// Re-exports for convenience
186pub use codec::{Algorithm, CodecEngine, CompressionResult, StreamingCodec, StreamingDecompressor};
187pub use config::Config;
188pub use error::{M2MError, Result};
189pub use inference::{HydraModel, SecurityDecision};
190pub use models::{ModelCard, ModelRegistry, Provider};
191pub use protocol::{Capabilities, Message, Session, SessionState};
192pub use security::{ScanResult, SecurityScanner};
193pub use server::{AppState, ServerConfig};
194pub use tokenizer::{
195 count_tokens, count_tokens_for_model, count_tokens_with_encoding, TokenCounter,
196};
197pub use transport::{QuicTransport, QuicTransportConfig, TcpTransport, Transport, TransportKind};
198
199/// Library version
200pub const VERSION: &str = env!("CARGO_PKG_VERSION");
201
202/// M2M Protocol version
203pub const PROTOCOL_VERSION: &str = "3.0";
204
205/// Check if content is in M2M format
206pub fn is_m2m_format(content: &str) -> bool {
207 codec::is_m2m_format(content)
208}
209
210/// Detect compression algorithm from wire format
211pub fn detect_algorithm(content: &str) -> Option<Algorithm> {
212 codec::detect_algorithm(content)
213}