pcapsql_core/format/
address.rs

1//! Network address formatting and detection.
2//!
3//! Provides functions to:
4//! - Format binary address values as human-readable strings
5//! - Detect which columns represent network addresses based on type and name
6
7use std::net::{Ipv4Addr, Ipv6Addr};
8
9use crate::schema::{DataKind, FieldDescriptor};
10
11/// The kind of network address a column represents.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum AddressKind {
14    /// IPv4 address stored as UInt32 (network byte order)
15    Ipv4,
16    /// IPv6 address stored as FixedSizeBinary(16)
17    Ipv6,
18    /// MAC address stored as FixedSizeBinary(6)
19    Mac,
20}
21
22/// Format a UInt32 as an IPv4 address string in dotted-decimal notation.
23///
24/// # Example
25///
26/// ```
27/// use pcapsql_core::format::format_ipv4;
28///
29/// assert_eq!(format_ipv4(0xC0A80101), "192.168.1.1");
30/// assert_eq!(format_ipv4(0x0A000001), "10.0.0.1");
31/// ```
32pub fn format_ipv4(value: u32) -> String {
33    let bytes = value.to_be_bytes();
34    Ipv4Addr::from(bytes).to_string()
35}
36
37/// Format 16 bytes as an IPv6 address string.
38///
39/// Returns `None` if the slice is not exactly 16 bytes.
40///
41/// # Example
42///
43/// ```
44/// use pcapsql_core::format::format_ipv6;
45///
46/// let bytes = [0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
47/// assert_eq!(format_ipv6(&bytes), Some("2001:db8::1".to_string()));
48/// ```
49pub fn format_ipv6(bytes: &[u8]) -> Option<String> {
50    if bytes.len() != 16 {
51        return None;
52    }
53    let octets: [u8; 16] = bytes.try_into().ok()?;
54    Some(Ipv6Addr::from(octets).to_string())
55}
56
57/// Format 6 bytes as a MAC address string in colon-separated hex format.
58///
59/// Returns `None` if the slice is not exactly 6 bytes.
60///
61/// # Example
62///
63/// ```
64/// use pcapsql_core::format::format_mac;
65///
66/// let bytes = [0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff];
67/// assert_eq!(format_mac(&bytes), Some("aa:bb:cc:dd:ee:ff".to_string()));
68/// ```
69pub fn format_mac(bytes: &[u8]) -> Option<String> {
70    if bytes.len() != 6 {
71        return None;
72    }
73    Some(format!(
74        "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
75        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5]
76    ))
77}
78
79/// Detect if a column represents a network address based on its type and name.
80///
81/// This uses a hybrid approach: both the data type AND the column name
82/// must match expected patterns. This prevents false positives (e.g., a UInt32
83/// column named "count" won't be treated as an IPv4 address).
84///
85/// # Detection Rules
86///
87/// | AddressKind | DataKind | Name Patterns |
88/// |-------------|----------|---------------|
89/// | Ipv4 | UInt32 | `*_ip`, `*_ip_*`, `*addr`, `router`, `server_id`, `subnet_mask`, `ciaddr`, `yiaddr`, `siaddr`, `giaddr` |
90/// | Ipv6 | FixedBinary(16) | `*_ip`, `*_ip_*`, `*_address`, `*_prefix` |
91/// | Mac | FixedBinary(6) | `*_mac`, `chaddr`, `*_mac_*` |
92pub fn detect_address_column(field: &FieldDescriptor) -> Option<AddressKind> {
93    let name = field.name.to_lowercase();
94
95    match field.kind {
96        DataKind::UInt32 => {
97            // IPv4: must have IP-related name
98            if is_ipv4_column_name(&name) {
99                Some(AddressKind::Ipv4)
100            } else {
101                None
102            }
103        }
104        DataKind::FixedBinary(16) => {
105            // IPv6: must have IP/address-related name
106            if is_ipv6_column_name(&name) {
107                Some(AddressKind::Ipv6)
108            } else {
109                None
110            }
111        }
112        DataKind::FixedBinary(6) => {
113            // MAC: must have MAC-related name
114            if is_mac_column_name(&name) {
115                Some(AddressKind::Mac)
116            } else {
117                None
118            }
119        }
120        _ => None,
121    }
122}
123
124/// Check if a column name indicates an IPv4 address.
125fn is_ipv4_column_name(name: &str) -> bool {
126    // Exact matches for known IPv4 columns
127    let exact_matches = [
128        "router",
129        "server_id",
130        "subnet_mask",
131        // DHCP-specific
132        "ciaddr",
133        "yiaddr",
134        "siaddr",
135        "giaddr",
136    ];
137
138    if exact_matches.contains(&name) {
139        return true;
140    }
141
142    // Pattern matches
143    // Ends with _ip (e.g., src_ip, dst_ip, sender_ip, target_ip)
144    if name.ends_with("_ip") {
145        return true;
146    }
147
148    // Contains _ip_ (e.g., src_ip_v4)
149    if name.contains("_ip_") {
150        return true;
151    }
152
153    // Ends with addr (e.g., srcaddr, dstaddr) but not _mac related
154    if name.ends_with("addr") && !name.contains("mac") {
155        return true;
156    }
157
158    false
159}
160
161/// Check if a column name indicates an IPv6 address.
162fn is_ipv6_column_name(name: &str) -> bool {
163    // Ends with _ip (same pattern as IPv4, distinguished by data type)
164    if name.ends_with("_ip") {
165        return true;
166    }
167
168    // Contains _ip_ (e.g., src_ip_v6)
169    if name.contains("_ip_") {
170        return true;
171    }
172
173    // Ends with _address (e.g., ndp_target_address, mld_multicast_address)
174    if name.ends_with("_address") {
175        return true;
176    }
177
178    // Ends with _prefix (e.g., ndp_prefix)
179    if name.ends_with("_prefix") {
180        return true;
181    }
182
183    false
184}
185
186/// Check if a column name indicates a MAC address.
187fn is_mac_column_name(name: &str) -> bool {
188    // Exact match for DHCP client hardware address
189    if name == "chaddr" {
190        return true;
191    }
192
193    // Ends with _mac (e.g., src_mac, dst_mac, sender_mac, target_mac)
194    if name.ends_with("_mac") {
195        return true;
196    }
197
198    // Contains _mac_ (e.g., ndp_source_mac_address)
199    if name.contains("_mac_") {
200        return true;
201    }
202
203    false
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209
210    // ========== format_ipv4 tests ==========
211
212    #[test]
213    fn test_format_ipv4_common() {
214        assert_eq!(format_ipv4(0xC0A80101), "192.168.1.1");
215        assert_eq!(format_ipv4(0x0A000001), "10.0.0.1");
216        assert_eq!(format_ipv4(0x08080808), "8.8.8.8");
217    }
218
219    #[test]
220    fn test_format_ipv4_edge_cases() {
221        assert_eq!(format_ipv4(0x00000000), "0.0.0.0");
222        assert_eq!(format_ipv4(0xFFFFFFFF), "255.255.255.255");
223        assert_eq!(format_ipv4(0x7F000001), "127.0.0.1");
224    }
225
226    #[test]
227    fn test_format_ipv4_private_ranges() {
228        // 10.0.0.0/8
229        assert_eq!(format_ipv4(0x0A123456), "10.18.52.86");
230        // 172.16.0.0/12
231        assert_eq!(format_ipv4(0xAC100001), "172.16.0.1");
232        // 192.168.0.0/16
233        assert_eq!(format_ipv4(0xC0A80001), "192.168.0.1");
234    }
235
236    // ========== format_ipv6 tests ==========
237
238    #[test]
239    fn test_format_ipv6_common() {
240        let loopback = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
241        assert_eq!(format_ipv6(&loopback), Some("::1".to_string()));
242
243        let doc = [0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
244        assert_eq!(format_ipv6(&doc), Some("2001:db8::1".to_string()));
245    }
246
247    #[test]
248    fn test_format_ipv6_link_local() {
249        let link_local = [0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
250        assert_eq!(format_ipv6(&link_local), Some("fe80::1".to_string()));
251    }
252
253    #[test]
254    fn test_format_ipv6_invalid_length() {
255        assert_eq!(format_ipv6(&[0; 15]), None);
256        assert_eq!(format_ipv6(&[0; 17]), None);
257        assert_eq!(format_ipv6(&[]), None);
258    }
259
260    // ========== format_mac tests ==========
261
262    #[test]
263    fn test_format_mac_common() {
264        let bytes = [0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff];
265        assert_eq!(format_mac(&bytes), Some("aa:bb:cc:dd:ee:ff".to_string()));
266    }
267
268    #[test]
269    fn test_format_mac_broadcast() {
270        let broadcast = [0xff, 0xff, 0xff, 0xff, 0xff, 0xff];
271        assert_eq!(
272            format_mac(&broadcast),
273            Some("ff:ff:ff:ff:ff:ff".to_string())
274        );
275    }
276
277    #[test]
278    fn test_format_mac_zeros() {
279        let zeros = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
280        assert_eq!(format_mac(&zeros), Some("00:00:00:00:00:00".to_string()));
281    }
282
283    #[test]
284    fn test_format_mac_invalid_length() {
285        assert_eq!(format_mac(&[0; 5]), None);
286        assert_eq!(format_mac(&[0; 7]), None);
287        assert_eq!(format_mac(&[]), None);
288    }
289
290    // ========== detect_address_column tests ==========
291
292    #[test]
293    fn test_detect_ipv4_columns() {
294        // Standard IP columns
295        let field = FieldDescriptor::new("src_ip", DataKind::UInt32);
296        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
297
298        let field = FieldDescriptor::new("dst_ip", DataKind::UInt32);
299        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
300
301        // View-style columns (src_ip_v4)
302        let field = FieldDescriptor::new("src_ip_v4", DataKind::UInt32);
303        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
304
305        // ARP columns
306        let field = FieldDescriptor::new("sender_ip", DataKind::UInt32);
307        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
308
309        let field = FieldDescriptor::new("target_ip", DataKind::UInt32);
310        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
311
312        // DHCP columns
313        let field = FieldDescriptor::new("ciaddr", DataKind::UInt32);
314        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
315
316        let field = FieldDescriptor::new("yiaddr", DataKind::UInt32);
317        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
318
319        let field = FieldDescriptor::new("router", DataKind::UInt32);
320        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
321    }
322
323    #[test]
324    fn test_detect_non_ip_uint32() {
325        // These UInt32 columns should NOT be detected as IPs
326        let field = FieldDescriptor::new("count", DataKind::UInt32);
327        assert_eq!(detect_address_column(&field), None);
328
329        let field = FieldDescriptor::new("xid", DataKind::UInt32);
330        assert_eq!(detect_address_column(&field), None);
331
332        let field = FieldDescriptor::new("seq", DataKind::UInt32);
333        assert_eq!(detect_address_column(&field), None);
334
335        let field = FieldDescriptor::new("flow_label", DataKind::UInt32);
336        assert_eq!(detect_address_column(&field), None);
337    }
338
339    #[test]
340    fn test_detect_ipv6_columns() {
341        let field = FieldDescriptor::new("src_ip", DataKind::FixedBinary(16));
342        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv6));
343
344        let field = FieldDescriptor::new("dst_ip", DataKind::FixedBinary(16));
345        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv6));
346
347        let field = FieldDescriptor::new("src_ip_v6", DataKind::FixedBinary(16));
348        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv6));
349
350        let field = FieldDescriptor::new("ndp_target_address", DataKind::FixedBinary(16));
351        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv6));
352    }
353
354    #[test]
355    fn test_detect_mac_columns() {
356        let field = FieldDescriptor::new("src_mac", DataKind::FixedBinary(6));
357        assert_eq!(detect_address_column(&field), Some(AddressKind::Mac));
358
359        let field = FieldDescriptor::new("dst_mac", DataKind::FixedBinary(6));
360        assert_eq!(detect_address_column(&field), Some(AddressKind::Mac));
361
362        let field = FieldDescriptor::new("chaddr", DataKind::FixedBinary(6));
363        assert_eq!(detect_address_column(&field), Some(AddressKind::Mac));
364
365        let field = FieldDescriptor::new("sender_mac", DataKind::FixedBinary(6));
366        assert_eq!(detect_address_column(&field), Some(AddressKind::Mac));
367    }
368
369    #[test]
370    fn test_detect_non_mac_binary6() {
371        // A FixedBinary(6) with non-MAC name should not be detected
372        let field = FieldDescriptor::new("some_data", DataKind::FixedBinary(6));
373        assert_eq!(detect_address_column(&field), None);
374    }
375
376    #[test]
377    fn test_detect_wrong_type() {
378        // Right name but wrong type should not match
379        let field = FieldDescriptor::new("src_ip", DataKind::String);
380        assert_eq!(detect_address_column(&field), None);
381
382        let field = FieldDescriptor::new("src_mac", DataKind::String);
383        assert_eq!(detect_address_column(&field), None);
384    }
385
386    #[test]
387    fn test_detect_case_insensitive() {
388        let field = FieldDescriptor::new("SRC_IP", DataKind::UInt32);
389        assert_eq!(detect_address_column(&field), Some(AddressKind::Ipv4));
390
391        let field = FieldDescriptor::new("Dst_Mac", DataKind::FixedBinary(6));
392        assert_eq!(detect_address_column(&field), Some(AddressKind::Mac));
393    }
394}