pcapsql_core/protocol/
registry.rs

1//! Protocol registry for managing parsers.
2
3use std::collections::HashSet;
4
5use crate::schema::FieldDescriptor;
6
7use super::{
8    ArpProtocol, BgpProtocol, DhcpProtocol, DnsProtocol, EthernetProtocol, GreProtocol,
9    GtpProtocol, IcmpProtocol, Icmpv6Protocol, IpsecProtocol, Ipv4Protocol, Ipv6Protocol,
10    LinuxSllProtocol, MplsProtocol, NetlinkProtocol, NtpProtocol, OspfProtocol, ParseContext,
11    ParseResult, QuicProtocol, RtnetlinkProtocol, SshProtocol, TcpProtocol, TlsProtocol,
12    UdpProtocol, VlanProtocol, VxlanProtocol,
13};
14
15/// How a protocol's remaining bytes should be handled.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum PayloadMode {
18    /// Continue with parse_packet() loop (default).
19    /// Remaining bytes are passed to child protocol parsers.
20    Chain,
21
22    /// Route payload to StreamManager for reassembly.
23    /// Used by TCP - application protocols are parsed from reassembled streams.
24    Stream,
25
26    /// No payload / terminal protocol.
27    /// Parsing stops after this protocol.
28    None,
29}
30
31/// Core trait all protocol parsers must implement.
32pub trait Protocol: Send + Sync {
33    /// Unique identifier for this protocol (e.g., "tcp", "dns").
34    fn name(&self) -> &'static str;
35
36    /// Human-readable display name.
37    fn display_name(&self) -> &'static str {
38        self.name()
39    }
40
41    /// Check if this parser can handle the given context.
42    /// Returns a priority score (higher = more specific match).
43    /// Returns `None` if this parser cannot handle the context.
44    fn can_parse(&self, context: &ParseContext) -> Option<u32>;
45
46    /// Parse bytes into structured fields.
47    fn parse<'a>(&self, data: &'a [u8], context: &ParseContext) -> ParseResult<'a>;
48
49    /// Return the schema fields this protocol produces.
50    fn schema_fields(&self) -> Vec<FieldDescriptor>;
51
52    /// Protocols that might follow this one.
53    fn child_protocols(&self) -> &[&'static str] {
54        &[]
55    }
56
57    /// How should remaining bytes be handled after parsing?
58    ///
59    /// - `Chain`: Continue parsing with child protocols (default)
60    /// - `Stream`: Route to StreamManager for TCP reassembly
61    /// - `None`: Stop parsing (terminal protocol)
62    fn payload_mode(&self) -> PayloadMode {
63        PayloadMode::Chain
64    }
65
66    /// Protocols that must be parsed before this one can be reached.
67    ///
68    /// Used for protocol pruning optimization - when a query only needs
69    /// certain protocols, we can skip parsing protocols not in the
70    /// transitive dependency chain.
71    ///
72    /// Returns a list of protocol names that could appear in the parse
73    /// chain before this protocol (e.g., TCP depends on ipv4, ipv6).
74    fn dependencies(&self) -> &'static [&'static str] {
75        &[] // Default: no dependencies (link layer protocols)
76    }
77
78    /// Parse with field projection - only extract requested fields.
79    ///
80    /// If `fields` is None, extract all fields (default behavior).
81    /// If `fields` is Some, only extract fields in the set.
82    ///
83    /// Note: `frame_number` and `timestamp` are always available from
84    /// the packet metadata, not from parsing.
85    ///
86    /// The default implementation ignores projection and calls `parse()`.
87    /// Protocols can override this to skip expensive field extraction.
88    fn parse_projected<'a>(
89        &self,
90        data: &'a [u8],
91        context: &ParseContext,
92        _fields: Option<&HashSet<String>>,
93    ) -> ParseResult<'a> {
94        // Default: ignore projection, parse everything
95        self.parse(data, context)
96    }
97
98    /// Returns fields that are "cheap" to extract (header fields parsed anyway).
99    ///
100    /// These fields come from the basic header parse that must happen
101    /// regardless of projection. Used to decide if projection is worthwhile.
102    fn cheap_fields(&self) -> &'static [&'static str] {
103        &[] // Default: no fields are marked as cheap
104    }
105
106    /// Returns fields that are "expensive" to extract.
107    ///
108    /// These fields require additional parsing beyond the basic header,
109    /// such as variable-length options, compressed data, or string parsing.
110    fn expensive_fields(&self) -> &'static [&'static str] {
111        &[] // Default: no fields are marked as expensive
112    }
113}
114
115/// Enum of all built-in protocol parsers.
116///
117/// This enables static dispatch (no vtable overhead) for all built-in protocols.
118/// The compiler can inline match arms and optimize branch prediction.
119#[derive(Debug, Clone, Copy)]
120pub enum BuiltinProtocol {
121    Ethernet(EthernetProtocol),
122    LinuxSll(LinuxSllProtocol),
123    Arp(ArpProtocol),
124    Vlan(VlanProtocol),
125    Mpls(MplsProtocol),
126    Ipv4(Ipv4Protocol),
127    Ipv6(Ipv6Protocol),
128    Tcp(TcpProtocol),
129    Udp(UdpProtocol),
130    Icmp(IcmpProtocol),
131    Icmpv6(Icmpv6Protocol),
132    Gre(GreProtocol),
133    Vxlan(VxlanProtocol),
134    Gtp(GtpProtocol),
135    Ipsec(IpsecProtocol),
136    Bgp(BgpProtocol),
137    Ospf(OspfProtocol),
138    Dns(DnsProtocol),
139    Dhcp(DhcpProtocol),
140    Ntp(NtpProtocol),
141    Tls(TlsProtocol),
142    Ssh(SshProtocol),
143    Quic(QuicProtocol),
144    Netlink(NetlinkProtocol),
145    Rtnetlink(RtnetlinkProtocol),
146}
147
148/// Macro to delegate Protocol trait methods to inner types.
149macro_rules! delegate_protocol {
150    ($self:expr, $method:ident $(, $arg:expr)*) => {
151        match $self {
152            BuiltinProtocol::Ethernet(p) => p.$method($($arg),*),
153            BuiltinProtocol::LinuxSll(p) => p.$method($($arg),*),
154            BuiltinProtocol::Arp(p) => p.$method($($arg),*),
155            BuiltinProtocol::Vlan(p) => p.$method($($arg),*),
156            BuiltinProtocol::Mpls(p) => p.$method($($arg),*),
157            BuiltinProtocol::Ipv4(p) => p.$method($($arg),*),
158            BuiltinProtocol::Ipv6(p) => p.$method($($arg),*),
159            BuiltinProtocol::Tcp(p) => p.$method($($arg),*),
160            BuiltinProtocol::Udp(p) => p.$method($($arg),*),
161            BuiltinProtocol::Icmp(p) => p.$method($($arg),*),
162            BuiltinProtocol::Icmpv6(p) => p.$method($($arg),*),
163            BuiltinProtocol::Gre(p) => p.$method($($arg),*),
164            BuiltinProtocol::Vxlan(p) => p.$method($($arg),*),
165            BuiltinProtocol::Gtp(p) => p.$method($($arg),*),
166            BuiltinProtocol::Ipsec(p) => p.$method($($arg),*),
167            BuiltinProtocol::Bgp(p) => p.$method($($arg),*),
168            BuiltinProtocol::Ospf(p) => p.$method($($arg),*),
169            BuiltinProtocol::Dns(p) => p.$method($($arg),*),
170            BuiltinProtocol::Dhcp(p) => p.$method($($arg),*),
171            BuiltinProtocol::Ntp(p) => p.$method($($arg),*),
172            BuiltinProtocol::Tls(p) => p.$method($($arg),*),
173            BuiltinProtocol::Ssh(p) => p.$method($($arg),*),
174            BuiltinProtocol::Quic(p) => p.$method($($arg),*),
175            BuiltinProtocol::Netlink(p) => p.$method($($arg),*),
176            BuiltinProtocol::Rtnetlink(p) => p.$method($($arg),*),
177        }
178    };
179}
180
181impl Protocol for BuiltinProtocol {
182    #[inline]
183    fn name(&self) -> &'static str {
184        delegate_protocol!(self, name)
185    }
186
187    #[inline]
188    fn display_name(&self) -> &'static str {
189        delegate_protocol!(self, display_name)
190    }
191
192    #[inline]
193    fn can_parse(&self, context: &ParseContext) -> Option<u32> {
194        delegate_protocol!(self, can_parse, context)
195    }
196
197    #[inline]
198    fn parse<'a>(&self, data: &'a [u8], context: &ParseContext) -> ParseResult<'a> {
199        delegate_protocol!(self, parse, data, context)
200    }
201
202    #[inline]
203    fn schema_fields(&self) -> Vec<FieldDescriptor> {
204        delegate_protocol!(self, schema_fields)
205    }
206
207    #[inline]
208    fn child_protocols(&self) -> &[&'static str] {
209        delegate_protocol!(self, child_protocols)
210    }
211
212    #[inline]
213    fn payload_mode(&self) -> PayloadMode {
214        delegate_protocol!(self, payload_mode)
215    }
216
217    #[inline]
218    fn dependencies(&self) -> &'static [&'static str] {
219        delegate_protocol!(self, dependencies)
220    }
221
222    #[inline]
223    fn parse_projected<'a>(
224        &self,
225        data: &'a [u8],
226        context: &ParseContext,
227        fields: Option<&HashSet<String>>,
228    ) -> ParseResult<'a> {
229        delegate_protocol!(self, parse_projected, data, context, fields)
230    }
231
232    #[inline]
233    fn cheap_fields(&self) -> &'static [&'static str] {
234        delegate_protocol!(self, cheap_fields)
235    }
236
237    #[inline]
238    fn expensive_fields(&self) -> &'static [&'static str] {
239        delegate_protocol!(self, expensive_fields)
240    }
241}
242
243/// Conversion traits for ergonomic registration.
244impl From<EthernetProtocol> for BuiltinProtocol {
245    fn from(p: EthernetProtocol) -> Self {
246        BuiltinProtocol::Ethernet(p)
247    }
248}
249
250impl From<LinuxSllProtocol> for BuiltinProtocol {
251    fn from(p: LinuxSllProtocol) -> Self {
252        BuiltinProtocol::LinuxSll(p)
253    }
254}
255
256impl From<ArpProtocol> for BuiltinProtocol {
257    fn from(p: ArpProtocol) -> Self {
258        BuiltinProtocol::Arp(p)
259    }
260}
261
262impl From<VlanProtocol> for BuiltinProtocol {
263    fn from(p: VlanProtocol) -> Self {
264        BuiltinProtocol::Vlan(p)
265    }
266}
267
268impl From<MplsProtocol> for BuiltinProtocol {
269    fn from(p: MplsProtocol) -> Self {
270        BuiltinProtocol::Mpls(p)
271    }
272}
273
274impl From<Ipv4Protocol> for BuiltinProtocol {
275    fn from(p: Ipv4Protocol) -> Self {
276        BuiltinProtocol::Ipv4(p)
277    }
278}
279
280impl From<Ipv6Protocol> for BuiltinProtocol {
281    fn from(p: Ipv6Protocol) -> Self {
282        BuiltinProtocol::Ipv6(p)
283    }
284}
285
286impl From<TcpProtocol> for BuiltinProtocol {
287    fn from(p: TcpProtocol) -> Self {
288        BuiltinProtocol::Tcp(p)
289    }
290}
291
292impl From<UdpProtocol> for BuiltinProtocol {
293    fn from(p: UdpProtocol) -> Self {
294        BuiltinProtocol::Udp(p)
295    }
296}
297
298impl From<IcmpProtocol> for BuiltinProtocol {
299    fn from(p: IcmpProtocol) -> Self {
300        BuiltinProtocol::Icmp(p)
301    }
302}
303
304impl From<Icmpv6Protocol> for BuiltinProtocol {
305    fn from(p: Icmpv6Protocol) -> Self {
306        BuiltinProtocol::Icmpv6(p)
307    }
308}
309
310impl From<GreProtocol> for BuiltinProtocol {
311    fn from(p: GreProtocol) -> Self {
312        BuiltinProtocol::Gre(p)
313    }
314}
315
316impl From<VxlanProtocol> for BuiltinProtocol {
317    fn from(p: VxlanProtocol) -> Self {
318        BuiltinProtocol::Vxlan(p)
319    }
320}
321
322impl From<GtpProtocol> for BuiltinProtocol {
323    fn from(p: GtpProtocol) -> Self {
324        BuiltinProtocol::Gtp(p)
325    }
326}
327
328impl From<IpsecProtocol> for BuiltinProtocol {
329    fn from(p: IpsecProtocol) -> Self {
330        BuiltinProtocol::Ipsec(p)
331    }
332}
333
334impl From<BgpProtocol> for BuiltinProtocol {
335    fn from(p: BgpProtocol) -> Self {
336        BuiltinProtocol::Bgp(p)
337    }
338}
339
340impl From<OspfProtocol> for BuiltinProtocol {
341    fn from(p: OspfProtocol) -> Self {
342        BuiltinProtocol::Ospf(p)
343    }
344}
345
346impl From<DnsProtocol> for BuiltinProtocol {
347    fn from(p: DnsProtocol) -> Self {
348        BuiltinProtocol::Dns(p)
349    }
350}
351
352impl From<DhcpProtocol> for BuiltinProtocol {
353    fn from(p: DhcpProtocol) -> Self {
354        BuiltinProtocol::Dhcp(p)
355    }
356}
357
358impl From<NtpProtocol> for BuiltinProtocol {
359    fn from(p: NtpProtocol) -> Self {
360        BuiltinProtocol::Ntp(p)
361    }
362}
363
364impl From<TlsProtocol> for BuiltinProtocol {
365    fn from(p: TlsProtocol) -> Self {
366        BuiltinProtocol::Tls(p)
367    }
368}
369
370impl From<SshProtocol> for BuiltinProtocol {
371    fn from(p: SshProtocol) -> Self {
372        BuiltinProtocol::Ssh(p)
373    }
374}
375
376impl From<QuicProtocol> for BuiltinProtocol {
377    fn from(p: QuicProtocol) -> Self {
378        BuiltinProtocol::Quic(p)
379    }
380}
381
382impl From<NetlinkProtocol> for BuiltinProtocol {
383    fn from(p: NetlinkProtocol) -> Self {
384        BuiltinProtocol::Netlink(p)
385    }
386}
387
388impl From<RtnetlinkProtocol> for BuiltinProtocol {
389    fn from(p: RtnetlinkProtocol) -> Self {
390        BuiltinProtocol::Rtnetlink(p)
391    }
392}
393
394/// Registry for protocol parsers with priority-based selection.
395///
396/// Uses static dispatch via enum for all built-in protocols,
397/// avoiding vtable overhead and enabling compiler optimizations.
398#[derive(Debug, Clone)]
399pub struct ProtocolRegistry {
400    parsers: Vec<BuiltinProtocol>,
401}
402
403impl ProtocolRegistry {
404    /// Create a new empty registry.
405    pub fn new() -> Self {
406        Self {
407            parsers: Vec::new(),
408        }
409    }
410
411    /// Register a protocol parser.
412    pub fn register<P: Into<BuiltinProtocol>>(&mut self, parser: P) {
413        self.parsers.push(parser.into());
414    }
415
416    /// Find the best parser for the given context.
417    #[inline]
418    pub fn find_parser(&self, context: &ParseContext) -> Option<&BuiltinProtocol> {
419        self.parsers
420            .iter()
421            .filter_map(|p| p.can_parse(context).map(|priority| (p, priority)))
422            .max_by_key(|(_, priority)| *priority)
423            .map(|(parser, _)| parser)
424    }
425
426    /// Get all registered parsers.
427    pub fn all_parsers(&self) -> impl Iterator<Item = &BuiltinProtocol> {
428        self.parsers.iter()
429    }
430
431    /// Get a parser by name.
432    pub fn get_parser(&self, name: &str) -> Option<&BuiltinProtocol> {
433        self.parsers.iter().find(|p| p.name() == name)
434    }
435
436    /// Build combined schema from all parsers.
437    pub fn combined_schema(&self) -> Vec<FieldDescriptor> {
438        let mut fields = Vec::new();
439        for parser in &self.parsers {
440            fields.extend(parser.schema_fields());
441        }
442        fields
443    }
444
445    /// Get the number of registered parsers.
446    pub fn len(&self) -> usize {
447        self.parsers.len()
448    }
449
450    /// Check if the registry is empty.
451    pub fn is_empty(&self) -> bool {
452        self.parsers.is_empty()
453    }
454}
455
456impl Default for ProtocolRegistry {
457    fn default() -> Self {
458        Self::new()
459    }
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465
466    #[test]
467    fn test_builtin_protocol_size() {
468        // Ensure the enum is reasonably sized (no large variants bloating it)
469        let size = std::mem::size_of::<BuiltinProtocol>();
470        // All our protocols are zero-sized unit structs, so enum is just the discriminant
471        assert!(
472            size <= 8,
473            "BuiltinProtocol is {} bytes, expected <= 8",
474            size
475        );
476    }
477
478    #[test]
479    fn test_registry_static_dispatch() {
480        let mut registry = ProtocolRegistry::new();
481        registry.register(EthernetProtocol);
482        registry.register(Ipv4Protocol);
483        registry.register(TcpProtocol);
484
485        assert_eq!(registry.len(), 3);
486
487        // Test that we can find parsers
488        let ctx = ParseContext::new(1); // Ethernet link type
489        let parser = registry.find_parser(&ctx);
490        assert!(parser.is_some());
491        assert_eq!(parser.unwrap().name(), "ethernet");
492    }
493
494    #[test]
495    fn test_get_parser_by_name() {
496        let mut registry = ProtocolRegistry::new();
497        registry.register(TcpProtocol);
498        registry.register(UdpProtocol);
499
500        assert!(registry.get_parser("tcp").is_some());
501        assert!(registry.get_parser("udp").is_some());
502        assert!(registry.get_parser("unknown").is_none());
503    }
504}