pcapsql_core/protocol/
mod.rs

1//! Protocol parsing framework.
2//!
3//! This module provides:
4//! - [`Protocol`] trait for implementing parsers
5//! - [`ProtocolRegistry`] for managing registered parsers
6//! - Built-in parsers for common protocols
7//!
8//! ## Supported Protocols
9//!
10//! | Layer | Protocols |
11//! |-------|-----------|
12//! | Link | Ethernet, VLAN (802.1Q) |
13//! | Network | IPv4, IPv6, ARP, ICMP, ICMPv6 |
14//! | Transport | TCP, UDP |
15//! | Application | DNS, DHCP, NTP, TLS, SSH, QUIC |
16//!
17//! Note: HTTP is parsed via TCP stream reassembly (see `stream::parsers::http`).
18//!
19//! ## Example
20//!
21//! ```rust
22//! use pcapsql_core::protocol::{default_registry, parse_packet};
23//!
24//! let registry = default_registry();
25//! // Ethernet frame with IP/TCP
26//! let packet_data: &[u8] = &[
27//!     // Ethernet header (14 bytes)
28//!     0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  // dst mac
29//!     0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // src mac
30//!     0x08, 0x00,                          // ethertype (IPv4)
31//!     // Minimal IPv4 header would follow...
32//! ];
33//!
34//! let results = parse_packet(&registry, 1, packet_data); // 1 = Ethernet
35//! for (name, result) in results {
36//!     let field_names: Vec<_> = result.fields.iter().map(|(k, _)| *k).collect();
37//!     println!("Parsed {}: {:?}", name, field_names);
38//! }
39//! ```
40
41mod context;
42mod field;
43mod projection;
44mod pruning;
45mod registry;
46
47// Protocol implementations
48mod arp;
49mod bgp;
50mod dhcp;
51mod dns;
52mod ethernet;
53mod gre;
54mod gtp;
55mod icmp;
56mod icmpv6;
57mod ipsec;
58mod ipv4;
59mod ipv6;
60mod linux_sll;
61mod mpls;
62mod netlink;
63mod ntp;
64mod ospf;
65mod quic;
66mod rtnetlink;
67mod ssh;
68mod tcp;
69mod tls;
70mod udp;
71mod vlan;
72mod vxlan;
73
74// Test utilities (only compiled for tests)
75#[cfg(test)]
76pub mod test_utils;
77
78pub use context::{FieldEntry, HintEntry, ParseContext, ParseResult, TunnelLayer, TunnelType};
79pub use field::{FieldValue, OwnedFieldValue};
80pub use projection::{chain_fields_for_protocol, merge_with_chain_fields, ProjectionConfig};
81pub use pruning::{compute_required_protocols, should_continue_parsing, should_run_parser};
82pub use registry::{BuiltinProtocol, PayloadMode, Protocol, ProtocolRegistry};
83
84// Re-export protocol implementations
85pub use arp::ArpProtocol;
86pub use bgp::BgpProtocol;
87pub use dhcp::DhcpProtocol;
88pub use dns::DnsProtocol;
89pub use ethernet::EthernetProtocol;
90pub use gre::GreProtocol;
91pub use gtp::GtpProtocol;
92pub use icmp::IcmpProtocol;
93pub use icmpv6::Icmpv6Protocol;
94pub use ipsec::IpsecProtocol;
95pub use ipv4::Ipv4Protocol;
96pub use ipv6::Ipv6Protocol;
97pub use linux_sll::LinuxSllProtocol;
98pub use mpls::MplsProtocol;
99pub use netlink::NetlinkProtocol;
100pub use ntp::NtpProtocol;
101pub use ospf::OspfProtocol;
102pub use quic::QuicProtocol;
103pub use rtnetlink::RtnetlinkProtocol;
104pub use ssh::SshProtocol;
105pub use tcp::TcpProtocol;
106pub use tls::TlsProtocol;
107pub use udp::UdpProtocol;
108pub use vlan::VlanProtocol;
109pub use vxlan::VxlanProtocol;
110
111// Re-export protocol constants for use in UDFs and other crates
112pub use dns::{rcode, record_type};
113pub use ethernet::ethertype;
114pub use ipv6::next_header;
115pub use netlink::family as netlink_family;
116
117/// Create a registry with all built-in protocol parsers.
118pub fn default_registry() -> ProtocolRegistry {
119    let mut registry = ProtocolRegistry::new();
120
121    // Layer 2
122    registry.register(EthernetProtocol);
123    registry.register(LinuxSllProtocol);
124    registry.register(ArpProtocol);
125    registry.register(VlanProtocol);
126    registry.register(MplsProtocol);
127
128    // Layer 3
129    registry.register(Ipv4Protocol);
130    registry.register(Ipv6Protocol);
131
132    // Layer 4
133    registry.register(TcpProtocol);
134    registry.register(UdpProtocol);
135    registry.register(IcmpProtocol);
136    registry.register(Icmpv6Protocol);
137
138    // Tunneling protocols (higher priority than application protocols)
139    registry.register(GreProtocol);
140    registry.register(VxlanProtocol);
141    registry.register(GtpProtocol);
142    registry.register(IpsecProtocol);
143
144    // Routing protocols
145    registry.register(BgpProtocol);
146    registry.register(OspfProtocol);
147
148    // Application layer
149    // Note: HTTP is parsed via TCP stream reassembly (see stream::parsers::http)
150    registry.register(DnsProtocol);
151    registry.register(DhcpProtocol);
152    registry.register(NtpProtocol);
153    registry.register(TlsProtocol);
154    registry.register(SshProtocol);
155    registry.register(QuicProtocol);
156
157    // Netlink protocols (Linux kernel-userspace)
158    registry.register(NetlinkProtocol);
159    registry.register(RtnetlinkProtocol);
160
161    registry
162}
163
164use std::collections::{HashMap, HashSet};
165
166/// Parse a packet through all protocol layers.
167///
168/// For tunneled traffic, this function tracks encapsulation depth and tunnel context.
169/// Each ParseResult includes encap_depth, tunnel_type, and tunnel_id fields that indicate
170/// whether the protocol was parsed inside a tunnel and which tunnel it was in.
171pub fn parse_packet<'a>(
172    registry: &ProtocolRegistry,
173    link_type: u16,
174    data: &'a [u8],
175) -> Vec<(&'static str, ParseResult<'a>)> {
176    // Typical packet has 3-4 protocol layers (Eth/IP/TCP/App)
177    // Tunneled packets may have more (up to 8 layers for complex encapsulation)
178    let mut results = Vec::with_capacity(8);
179    let mut context = ParseContext::new(link_type);
180    let mut remaining = data;
181
182    while !remaining.is_empty() {
183        if let Some(parser) = registry.find_parser(&context) {
184            let mut result = parser.parse(remaining, &context);
185
186            // Set encapsulation context on the result BEFORE updating context
187            // This captures the encap state when this protocol was parsed
188            result.set_encap_context(&context);
189
190            // Check if this protocol's child hints indicate a tunnel boundary
191            // If so, update context for the next layer (inner protocols)
192            if let Some(tunnel_type_val) = result.hint("tunnel_type") {
193                let tunnel_id = result.hint("tunnel_id");
194                context.push_tunnel(TunnelType::from_u64(tunnel_type_val), tunnel_id);
195            }
196
197            // Update context for next layer
198            context.parent_protocol = Some(parser.name());
199            context.hints = result.child_hints.clone();
200            context.offset += remaining.len() - result.remaining.len();
201
202            let should_stop = result.error.is_some();
203            remaining = result.remaining;
204
205            results.push((parser.name(), result));
206
207            if should_stop {
208                break;
209            }
210        } else {
211            break;
212        }
213    }
214
215    results
216}
217
218/// Parse a packet with protocol pruning.
219///
220/// Only parses protocols in the `required` set and their dependencies.
221/// This can significantly reduce CPU usage for selective queries.
222///
223/// # Arguments
224///
225/// * `registry` - Protocol registry containing parser definitions
226/// * `link_type` - Link layer type (e.g., 1 for Ethernet)
227/// * `data` - Raw packet bytes
228/// * `required` - Set of protocol names needed for the query
229///
230/// # Returns
231///
232/// Vector of (protocol_name, parse_result) pairs for protocols in the required set.
233/// Protocols parsed but not in the required set (i.e., intermediate layers) are
234/// still included as they may be needed for correct result interpretation.
235///
236/// # Example
237///
238/// ```rust,ignore
239/// use std::collections::HashSet;
240/// use pcapsql_core::protocol::{default_registry, parse_packet_pruned};
241///
242/// let registry = default_registry();
243/// let required: HashSet<String> = ["tcp"].iter().map(|s| s.to_string()).collect();
244///
245/// let results = parse_packet_pruned(&registry, 1, &packet_data, &required);
246/// // Will parse Ethernet, IPv4/IPv6, TCP but skip DNS, HTTP, TLS, etc.
247/// ```
248pub fn parse_packet_pruned<'a>(
249    registry: &ProtocolRegistry,
250    link_type: u16,
251    data: &'a [u8],
252    required: &HashSet<String>,
253) -> Vec<(&'static str, ParseResult<'a>)> {
254    // If no required set or empty, fall back to full parsing
255    if required.is_empty() {
256        return parse_packet(registry, link_type, data);
257    }
258
259    // Typical packet has 3-4 protocol layers
260    let mut results = Vec::with_capacity(4);
261    let mut parsed_protocols: Vec<&str> = Vec::with_capacity(4);
262    let mut context = ParseContext::new(link_type);
263    let mut remaining = data;
264
265    while !remaining.is_empty() {
266        // Check if we have everything we need
267        if !should_continue_parsing(&parsed_protocols, required) {
268            break;
269        }
270
271        // Find next parser
272        let parser = match registry.find_parser(&context) {
273            Some(p) => p,
274            None => break,
275        };
276
277        let name = parser.name();
278
279        // Check if we should run this parser
280        if !should_run_parser(name, required, registry) {
281            // Skip this parser - we don't need it or anything it produces
282            break;
283        }
284
285        // Parse
286        let mut result = parser.parse(remaining, &context);
287        parsed_protocols.push(name);
288
289        // Set encapsulation context on the result BEFORE updating context
290        result.set_encap_context(&context);
291
292        // Check if this protocol's child hints indicate a tunnel boundary
293        if let Some(tunnel_type_val) = result.hint("tunnel_type") {
294            let tunnel_id = result.hint("tunnel_id");
295            context.push_tunnel(TunnelType::from_u64(tunnel_type_val), tunnel_id);
296        }
297
298        // Update context for next layer
299        context.parent_protocol = Some(name);
300        context.hints = result.child_hints.clone();
301        context.offset += remaining.len() - result.remaining.len();
302
303        let should_stop = result.error.is_some() || result.remaining.is_empty();
304        remaining = result.remaining;
305
306        // Always add to results - we may need intermediate layers for joins
307        results.push((name, result));
308
309        if should_stop {
310            break;
311        }
312    }
313
314    results
315}
316
317/// Parse a packet with field projection.
318///
319/// Uses `parse_projected()` for each protocol, only extracting the fields
320/// in the projection config. This can significantly reduce CPU usage when
321/// queries only need a subset of fields.
322///
323/// # Arguments
324///
325/// * `registry` - Protocol registry containing parser definitions
326/// * `link_type` - Link layer type (e.g., 1 for Ethernet)
327/// * `data` - Raw packet bytes
328/// * `projections` - Per-protocol field projections (protocol name -> field names)
329///
330/// # Returns
331///
332/// Vector of (protocol_name, parse_result) pairs. Parse results only contain
333/// the fields that were requested in the projection config.
334///
335/// # Example
336///
337/// ```rust,ignore
338/// use std::collections::{HashMap, HashSet};
339/// use pcapsql_core::protocol::{default_registry, parse_packet_projected};
340///
341/// let registry = default_registry();
342///
343/// // Only extract ports from TCP
344/// let mut projections = HashMap::new();
345/// projections.insert("tcp", ["src_port", "dst_port"].iter().map(|s| s.to_string()).collect());
346///
347/// let results = parse_packet_projected(&registry, 1, &packet_data, &projections);
348/// ```
349pub fn parse_packet_projected<'a>(
350    registry: &ProtocolRegistry,
351    link_type: u16,
352    data: &'a [u8],
353    projections: &HashMap<String, HashSet<String>>,
354) -> Vec<(&'static str, ParseResult<'a>)> {
355    // If no projections, fall back to full parsing
356    if projections.is_empty() {
357        return parse_packet(registry, link_type, data);
358    }
359
360    // Typical packet has 3-4 protocol layers
361    let mut results = Vec::with_capacity(4);
362    let mut context = ParseContext::new(link_type);
363    let mut remaining = data;
364
365    while !remaining.is_empty() {
366        if let Some(parser) = registry.find_parser(&context) {
367            let name = parser.name();
368
369            // Get projection for this protocol, if any
370            let projection = projections.get(name);
371
372            // Use projected parsing if projection is configured
373            let mut result = parser.parse_projected(remaining, &context, projection);
374
375            // Set encapsulation context on the result BEFORE updating context
376            result.set_encap_context(&context);
377
378            // Check if this protocol's child hints indicate a tunnel boundary
379            if let Some(tunnel_type_val) = result.hint("tunnel_type") {
380                let tunnel_id = result.hint("tunnel_id");
381                context.push_tunnel(TunnelType::from_u64(tunnel_type_val), tunnel_id);
382            }
383
384            // Update context for next layer
385            context.parent_protocol = Some(name);
386            context.hints = result.child_hints.clone();
387            context.offset += remaining.len() - result.remaining.len();
388
389            let should_stop = result.error.is_some();
390            remaining = result.remaining;
391
392            results.push((name, result));
393
394            if should_stop {
395                break;
396            }
397        } else {
398            break;
399        }
400    }
401
402    results
403}
404
405/// Parse a packet with both protocol pruning and field projection.
406///
407/// This combines the benefits of both optimizations:
408/// - Protocol pruning skips parsing protocols not needed for the query
409/// - Field projection only extracts needed fields within parsed protocols
410///
411/// # Arguments
412///
413/// * `registry` - Protocol registry containing parser definitions
414/// * `link_type` - Link layer type (e.g., 1 for Ethernet)
415/// * `data` - Raw packet bytes
416/// * `required` - Set of protocol names needed for the query (for pruning)
417/// * `projections` - Per-protocol field projections
418///
419/// # Returns
420///
421/// Vector of (protocol_name, parse_result) pairs.
422pub fn parse_packet_pruned_projected<'a>(
423    registry: &ProtocolRegistry,
424    link_type: u16,
425    data: &'a [u8],
426    required: &HashSet<String>,
427    projections: &HashMap<String, HashSet<String>>,
428) -> Vec<(&'static str, ParseResult<'a>)> {
429    // If no pruning or projection, fall back to full parsing
430    if required.is_empty() && projections.is_empty() {
431        return parse_packet(registry, link_type, data);
432    }
433
434    // If only pruning, use pruned parsing
435    if projections.is_empty() {
436        return parse_packet_pruned(registry, link_type, data, required);
437    }
438
439    // If only projection, use projected parsing
440    if required.is_empty() {
441        return parse_packet_projected(registry, link_type, data, projections);
442    }
443
444    // Combined pruning and projection
445    // Typical packet has 3-4 protocol layers
446    let mut results = Vec::with_capacity(4);
447    let mut parsed_protocols: Vec<&str> = Vec::with_capacity(4);
448    let mut context = ParseContext::new(link_type);
449    let mut remaining = data;
450
451    while !remaining.is_empty() {
452        // Check if we have everything we need (pruning)
453        if !should_continue_parsing(&parsed_protocols, required) {
454            break;
455        }
456
457        // Find next parser
458        let parser = match registry.find_parser(&context) {
459            Some(p) => p,
460            None => break,
461        };
462
463        let name = parser.name();
464
465        // Check if we should run this parser (pruning)
466        if !should_run_parser(name, required, registry) {
467            break;
468        }
469
470        // Get projection for this protocol, if any
471        let projection = projections.get(name);
472
473        // Parse with projection
474        let mut result = parser.parse_projected(remaining, &context, projection);
475        parsed_protocols.push(name);
476
477        // Set encapsulation context on the result BEFORE updating context
478        result.set_encap_context(&context);
479
480        // Check if this protocol's child hints indicate a tunnel boundary
481        if let Some(tunnel_type_val) = result.hint("tunnel_type") {
482            let tunnel_id = result.hint("tunnel_id");
483            context.push_tunnel(TunnelType::from_u64(tunnel_type_val), tunnel_id);
484        }
485
486        // Update context for next layer
487        context.parent_protocol = Some(name);
488        context.hints = result.child_hints.clone();
489        context.offset += remaining.len() - result.remaining.len();
490
491        let should_stop = result.error.is_some() || result.remaining.is_empty();
492        remaining = result.remaining;
493
494        results.push((name, result));
495
496        if should_stop {
497            break;
498        }
499    }
500
501    results
502}
503
504#[cfg(test)]
505mod payload_mode_tests {
506    use super::*;
507
508    // Test 1: Default payload mode is Chain
509    #[test]
510    fn test_default_payload_mode() {
511        // Most protocols should default to Chain
512        let eth = EthernetProtocol;
513        assert_eq!(eth.payload_mode(), PayloadMode::Chain);
514
515        let ipv4 = Ipv4Protocol;
516        assert_eq!(ipv4.payload_mode(), PayloadMode::Chain);
517
518        let udp = UdpProtocol;
519        assert_eq!(udp.payload_mode(), PayloadMode::Chain);
520    }
521
522    // Test 2: TCP returns Stream mode
523    #[test]
524    fn test_tcp_stream_mode() {
525        let tcp = TcpProtocol;
526        assert_eq!(tcp.payload_mode(), PayloadMode::Stream);
527    }
528
529    // Test 3: TCP child_protocols is empty
530    #[test]
531    fn test_tcp_no_child_protocols() {
532        let tcp = TcpProtocol;
533        assert!(tcp.child_protocols().is_empty());
534    }
535
536    // Test 4: PayloadMode enum values
537    #[test]
538    fn test_payload_mode_values() {
539        assert_ne!(PayloadMode::Chain, PayloadMode::Stream);
540        assert_ne!(PayloadMode::Stream, PayloadMode::None);
541        assert_ne!(PayloadMode::Chain, PayloadMode::None);
542    }
543}