postfix_log_parser/utils/
common_fields.rs

1//! Postfix公共字段解析器
2//!
3//! 基于Postfix源码分析,提取高频公共字段的统一解析方法。
4//! 这个模块解决了多个组件中重复的字段解析逻辑,提供了性能优化的统一实现。
5
6use lazy_static::lazy_static;
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9
10lazy_static! {
11    // === 邮件地址字段 (基于Postfix src/cleanup/cleanup_envelope.c) ===
12    /// 发件人地址提取: from=<address> 或 from=<>(空地址)
13    pub static ref FROM_EMAIL_REGEX: Regex = Regex::new(r"from=<([^>]*)>").unwrap();
14
15    /// 收件人地址提取: to=<address>
16    pub static ref TO_EMAIL_REGEX: Regex = Regex::new(r"to=<([^>]+)>").unwrap();
17
18    /// 原始收件人地址: orig_to=<address> (aliases/forwards处理前)
19    pub static ref ORIG_TO_EMAIL_REGEX: Regex = Regex::new(r"orig_to=<([^>]+)>").unwrap();
20
21    // === 客户端信息字段 (基于Postfix src/smtpd/smtpd.c) ===
22    /// 客户端连接信息: client=hostname[ip]:port 或 client=hostname[ip]
23    pub static ref CLIENT_INFO_REGEX: Regex = Regex::new(r"client=([^\[]+)\[([^\]]+)\](?::(\d+))?").unwrap();
24
25    /// 客户端信息(简化): hostname[ip]:port 格式 (用于cleanup等组件)
26    pub static ref CLIENT_SIMPLE_REGEX: Regex = Regex::new(r"([^\[]+)\[([^\]]+)\](?::(\d+))?").unwrap();
27
28    // === 中继信息字段 (基于Postfix src/smtp/smtp_deliver.c) ===
29    /// 中继主机信息: relay=hostname[ip]:port 或 relay=hostname 或 relay=none
30    pub static ref RELAY_INFO_REGEX: Regex = Regex::new(r"relay=([^,\[\]]+)(?:\[([^\]]+)\])?(?::(\d+))?").unwrap();
31
32    // === 性能和状态字段 (基于Postfix src/global/deliver_*.c) ===
33    /// 延迟时间: delay=seconds (可以是小数)
34    pub static ref DELAY_REGEX: Regex = Regex::new(r"delay=([\d.]+)").unwrap();
35
36    /// 详细延迟: delays=a/b/c/d (qmgr/smtp/connection/delivery)
37    pub static ref DELAYS_REGEX: Regex = Regex::new(r"delays=([\d./]+)").unwrap();
38
39    /// DSN状态码: dsn=x.y.z
40    pub static ref DSN_REGEX: Regex = Regex::new(r"dsn=([\d.]+)").unwrap();
41
42    /// 投递状态: status=sent/bounced/deferred/...
43    pub static ref STATUS_REGEX: Regex = Regex::new(r"status=(\w+)").unwrap();
44
45    // === 邮件属性字段 (基于Postfix src/cleanup/cleanup_message.c) ===
46    /// 邮件大小: size=bytes
47    pub static ref SIZE_REGEX: Regex = Regex::new(r"size=(\d+)").unwrap();
48
49    /// 收件人数量: nrcpt=count
50    pub static ref NRCPT_REGEX: Regex = Regex::new(r"nrcpt=(\d+)").unwrap();
51
52    /// Message-ID: message-id=<id> 或 message-id=id (带或不带尖括号)
53    pub static ref MESSAGE_ID_REGEX: Regex = Regex::new(r"message-id=(?:<([^>]+)>|([^,\s]+))").unwrap();
54
55    // === 协议和认证字段 (基于Postfix src/smtpd/smtpd_sasl_*.c) ===
56    /// 协议版本: proto=SMTP/ESMTP
57    pub static ref PROTO_REGEX: Regex = Regex::new(r"proto=(\w+)").unwrap();
58
59    /// HELO/EHLO信息: helo=<hostname>
60    pub static ref HELO_REGEX: Regex = Regex::new(r"helo=<([^>]+)>").unwrap();
61
62    /// SASL认证方法: sasl_method=PLAIN/LOGIN/...
63    pub static ref SASL_METHOD_REGEX: Regex = Regex::new(r"sasl_method=(\w+)").unwrap();
64
65    /// SASL用户名: sasl_username=user
66    pub static ref SASL_USERNAME_REGEX: Regex = Regex::new(r"sasl_username=([^,\s]+)").unwrap();
67}
68
69/// 邮件地址信息结构体
70#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
71pub struct EmailAddress {
72    /// 邮件地址
73    pub address: String,
74    /// 是否为空地址(如bounce邮件的from=<>)
75    pub is_empty: bool,
76}
77
78/// 客户端连接信息结构体
79#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
80pub struct ClientInfo {
81    /// 客户端主机名
82    pub hostname: String,
83    /// 客户端IP地址
84    pub ip: String,
85    /// 客户端端口(如果有)
86    pub port: Option<u16>,
87}
88
89/// 中继主机信息结构体
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
91pub struct RelayInfo {
92    /// 中继主机名
93    pub hostname: String,
94    /// 中继IP地址(如果有)
95    pub ip: Option<String>,
96    /// 中继端口(如果有)
97    pub port: Option<u16>,
98    /// 是否为"none"(本地处理)
99    pub is_none: bool,
100}
101
102/// 延迟时间详情结构体
103#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
104pub struct DelayInfo {
105    /// 总延迟(秒)
106    pub total: f64,
107    /// 详细延迟分解:[qmgr, smtp_connect, network, smtp_data]
108    pub breakdown: Option<[f64; 4]>,
109}
110
111/// 状态信息结构体
112#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct StatusInfo {
114    /// 状态:sent, bounced, deferred等
115    pub status: String,
116    /// DSN状态码
117    pub dsn: Option<String>,
118    /// 状态描述(括号内的内容)
119    pub description: Option<String>,
120}
121
122/// 公共字段解析器
123pub struct CommonFieldsParser;
124
125impl CommonFieldsParser {
126    /// 提取发件人地址
127    pub fn extract_from_email(message: &str) -> Option<EmailAddress> {
128        FROM_EMAIL_REGEX.captures(message).map(|caps| {
129            let address = caps
130                .get(1)
131                .map_or(String::new(), |m| m.as_str().to_string());
132            EmailAddress {
133                is_empty: address.is_empty(),
134                address,
135            }
136        })
137    }
138
139    /// 提取收件人地址
140    pub fn extract_to_email(message: &str) -> Option<EmailAddress> {
141        TO_EMAIL_REGEX.captures(message).map(|caps| {
142            let address = caps
143                .get(1)
144                .map_or(String::new(), |m| m.as_str().to_string());
145            EmailAddress {
146                is_empty: address.is_empty(),
147                address,
148            }
149        })
150    }
151
152    /// 提取原始收件人地址
153    pub fn extract_orig_to_email(message: &str) -> Option<EmailAddress> {
154        ORIG_TO_EMAIL_REGEX.captures(message).map(|caps| {
155            let address = caps
156                .get(1)
157                .map_or(String::new(), |m| m.as_str().to_string());
158            EmailAddress {
159                is_empty: address.is_empty(),
160                address,
161            }
162        })
163    }
164
165    /// 提取客户端信息(完整格式)
166    pub fn extract_client_info(message: &str) -> Option<ClientInfo> {
167        CLIENT_INFO_REGEX.captures(message).map(|caps| ClientInfo {
168            hostname: caps
169                .get(1)
170                .map_or(String::new(), |m| m.as_str().to_string()),
171            ip: caps
172                .get(2)
173                .map_or(String::new(), |m| m.as_str().to_string()),
174            port: caps.get(3).and_then(|m| m.as_str().parse().ok()),
175        })
176    }
177
178    /// 提取客户端信息(简化格式,不带client=前缀)
179    pub fn extract_client_info_simple(client_str: &str) -> Option<ClientInfo> {
180        CLIENT_SIMPLE_REGEX
181            .captures(client_str)
182            .map(|caps| ClientInfo {
183                hostname: caps
184                    .get(1)
185                    .map_or(String::new(), |m| m.as_str().to_string()),
186                ip: caps
187                    .get(2)
188                    .map_or(String::new(), |m| m.as_str().to_string()),
189                port: caps.get(3).and_then(|m| m.as_str().parse().ok()),
190            })
191    }
192
193    /// 提取中继信息
194    pub fn extract_relay_info(message: &str) -> Option<RelayInfo> {
195        RELAY_INFO_REGEX.captures(message).map(|caps| {
196            let hostname = caps
197                .get(1)
198                .map_or(String::new(), |m| m.as_str().to_string());
199            let is_none = hostname == "none";
200
201            RelayInfo {
202                hostname,
203                ip: caps.get(2).map(|m| m.as_str().to_string()),
204                port: caps.get(3).and_then(|m| m.as_str().parse().ok()),
205                is_none,
206            }
207        })
208    }
209
210    /// 提取延迟信息
211    pub fn extract_delay_info(message: &str) -> Option<DelayInfo> {
212        let total = DELAY_REGEX
213            .captures(message)
214            .and_then(|caps| caps.get(1))
215            .and_then(|m| m.as_str().parse().ok())?;
216
217        let breakdown = DELAYS_REGEX
218            .captures(message)
219            .and_then(|caps| caps.get(1))
220            .and_then(|m| Self::parse_delays_breakdown(m.as_str()));
221
222        Some(DelayInfo { total, breakdown })
223    }
224
225    /// 解析延迟分解信息
226    fn parse_delays_breakdown(delays_str: &str) -> Option<[f64; 4]> {
227        let parts: Vec<&str> = delays_str.split('/').collect();
228        if parts.len() == 4 {
229            let mut breakdown = [0.0; 4];
230            for (i, part) in parts.iter().enumerate() {
231                breakdown[i] = part.parse().ok()?;
232            }
233            Some(breakdown)
234        } else {
235            None
236        }
237    }
238
239    /// 提取状态信息
240    pub fn extract_status_info(message: &str) -> Option<StatusInfo> {
241        let status = STATUS_REGEX
242            .captures(message)
243            .and_then(|caps| caps.get(1))
244            .map(|m| m.as_str().to_string())?;
245
246        let dsn = DSN_REGEX
247            .captures(message)
248            .and_then(|caps| caps.get(1))
249            .map(|m| m.as_str().to_string());
250
251        // 提取状态描述(括号内容)
252        let description = if let Some(start) = message.find('(') {
253            if let Some(end) = message.rfind(')') {
254                if end > start {
255                    Some(message[start + 1..end].to_string())
256                } else {
257                    None
258                }
259            } else {
260                None
261            }
262        } else {
263            None
264        };
265
266        Some(StatusInfo {
267            status,
268            dsn,
269            description,
270        })
271    }
272
273    /// 提取邮件大小
274    pub fn extract_size(message: &str) -> Option<u64> {
275        SIZE_REGEX
276            .captures(message)
277            .and_then(|caps| caps.get(1))
278            .and_then(|m| m.as_str().parse().ok())
279    }
280
281    /// 提取收件人数量
282    pub fn extract_nrcpt(message: &str) -> Option<u32> {
283        NRCPT_REGEX
284            .captures(message)
285            .and_then(|caps| caps.get(1))
286            .and_then(|m| m.as_str().parse().ok())
287    }
288
289    /// 提取Message-ID (支持带尖括号和不带尖括号的格式)
290    pub fn extract_message_id(message: &str) -> Option<String> {
291        MESSAGE_ID_REGEX.captures(message).and_then(|caps| {
292            // 先检查带尖括号的格式 (第1个捕获组)
293            if let Some(bracketed) = caps.get(1) {
294                Some(bracketed.as_str().to_string())
295            }
296            // 再检查不带尖括号的格式 (第2个捕获组)
297            else if let Some(unbracketed) = caps.get(2) {
298                Some(unbracketed.as_str().to_string())
299            } else {
300                None
301            }
302        })
303    }
304
305    /// 提取协议信息
306    pub fn extract_protocol(message: &str) -> Option<String> {
307        PROTO_REGEX
308            .captures(message)
309            .and_then(|caps| caps.get(1))
310            .map(|m| m.as_str().to_string())
311    }
312
313    /// 提取HELO信息
314    pub fn extract_helo(message: &str) -> Option<String> {
315        HELO_REGEX
316            .captures(message)
317            .and_then(|caps| caps.get(1))
318            .map(|m| m.as_str().to_string())
319    }
320
321    /// 提取SASL认证方法
322    pub fn extract_sasl_method(message: &str) -> Option<String> {
323        SASL_METHOD_REGEX
324            .captures(message)
325            .and_then(|caps| caps.get(1))
326            .map(|m| m.as_str().to_string())
327    }
328
329    /// 提取SASL用户名
330    pub fn extract_sasl_username(message: &str) -> Option<String> {
331        SASL_USERNAME_REGEX
332            .captures(message)
333            .and_then(|caps| caps.get(1))
334            .map(|m| m.as_str().to_string())
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341
342    #[test]
343    fn test_extract_from_email() {
344        let message = "4bG4VR5z: from=<sender@example.com>, size=1234";
345        let result = CommonFieldsParser::extract_from_email(message);
346        assert_eq!(
347            result,
348            Some(EmailAddress {
349                address: "sender@example.com".to_string(),
350                is_empty: false,
351            })
352        );
353
354        // 测试空地址(bounce邮件)
355        let bounce_message = "4bG4VR5z: from=<>, to=<user@example.com>";
356        let bounce_result = CommonFieldsParser::extract_from_email(bounce_message);
357        assert_eq!(
358            bounce_result,
359            Some(EmailAddress {
360                address: "".to_string(),
361                is_empty: true,
362            })
363        );
364    }
365
366    #[test]
367    fn test_extract_client_info() {
368        let message = "4bG4VR5z: client=mail.example.com[192.168.1.100]:25";
369        let result = CommonFieldsParser::extract_client_info(message);
370        assert_eq!(
371            result,
372            Some(ClientInfo {
373                hostname: "mail.example.com".to_string(),
374                ip: "192.168.1.100".to_string(),
375                port: Some(25),
376            })
377        );
378
379        // 测试无端口的情况
380        let no_port_message = "4bG4VR5z: client=localhost[127.0.0.1]";
381        let no_port_result = CommonFieldsParser::extract_client_info(no_port_message);
382        assert_eq!(
383            no_port_result,
384            Some(ClientInfo {
385                hostname: "localhost".to_string(),
386                ip: "127.0.0.1".to_string(),
387                port: None,
388            })
389        );
390    }
391
392    #[test]
393    fn test_extract_relay_info() {
394        let message = "4bG4VR5z: to=<user@example.com>, relay=mx.example.com[1.2.3.4]:25";
395        let result = CommonFieldsParser::extract_relay_info(message);
396        assert_eq!(
397            result,
398            Some(RelayInfo {
399                hostname: "mx.example.com".to_string(),
400                ip: Some("1.2.3.4".to_string()),
401                port: Some(25),
402                is_none: false,
403            })
404        );
405
406        // 测试relay=none的情况
407        let none_message = "4bG4VR5z: to=<user@example.com>, relay=none, delay=0";
408        let none_result = CommonFieldsParser::extract_relay_info(none_message);
409        assert_eq!(
410            none_result,
411            Some(RelayInfo {
412                hostname: "none".to_string(),
413                ip: None,
414                port: None,
415                is_none: true,
416            })
417        );
418    }
419
420    #[test]
421    fn test_extract_delay_info() {
422        let message = "4bG4VR5z: delay=5.5, delays=1.0/0.5/3.0/1.0";
423        let result = CommonFieldsParser::extract_delay_info(message);
424        assert_eq!(
425            result,
426            Some(DelayInfo {
427                total: 5.5,
428                breakdown: Some([1.0, 0.5, 3.0, 1.0]),
429            })
430        );
431
432        // 测试只有total delay的情况
433        let simple_message = "4bG4VR5z: delay=2.3";
434        let simple_result = CommonFieldsParser::extract_delay_info(simple_message);
435        assert_eq!(
436            simple_result,
437            Some(DelayInfo {
438                total: 2.3,
439                breakdown: None,
440            })
441        );
442    }
443
444    #[test]
445    fn test_extract_status_info() {
446        let message = "4bG4VR5z: status=sent (250 2.0.0 OK), dsn=2.0.0";
447        let result = CommonFieldsParser::extract_status_info(message);
448        assert_eq!(
449            result,
450            Some(StatusInfo {
451                status: "sent".to_string(),
452                dsn: Some("2.0.0".to_string()),
453                description: Some("250 2.0.0 OK".to_string()),
454            })
455        );
456    }
457
458    #[test]
459    fn test_extract_message_properties() {
460        let message = "4bG4VR5z: from=<sender@example.com>, size=1234, nrcpt=2";
461
462        assert_eq!(CommonFieldsParser::extract_size(message), Some(1234));
463        assert_eq!(CommonFieldsParser::extract_nrcpt(message), Some(2));
464    }
465
466    #[test]
467    fn test_extract_message_id() {
468        // 测试带尖括号的格式
469        let bracketed_message = "61172636348059648: message-id=<61172636348059648@m01.localdomain>";
470        let bracketed_result = CommonFieldsParser::extract_message_id(bracketed_message);
471        assert_eq!(
472            bracketed_result,
473            Some("61172636348059648@m01.localdomain".to_string())
474        );
475
476        // 测试不带尖括号的格式
477        let unbracketed_message = "61172641393807360: message-id=61172636348059648@m01.localdomain";
478        let unbracketed_result = CommonFieldsParser::extract_message_id(unbracketed_message);
479        assert_eq!(
480            unbracketed_result,
481            Some("61172636348059648@m01.localdomain".to_string())
482        );
483
484        // 测试复杂的带尖括号格式
485        let complex_message = "4bG4VR5z: message-id=<test123@example.com>, size=456";
486        let complex_result = CommonFieldsParser::extract_message_id(complex_message);
487        assert_eq!(complex_result, Some("test123@example.com".to_string()));
488    }
489
490    #[test]
491    fn test_extract_protocol_info() {
492        let message = "4bG4VR5z: proto=ESMTP, helo=<mail.example.com>";
493
494        assert_eq!(
495            CommonFieldsParser::extract_protocol(message),
496            Some("ESMTP".to_string())
497        );
498        assert_eq!(
499            CommonFieldsParser::extract_helo(message),
500            Some("mail.example.com".to_string())
501        );
502    }
503}