postfix-log-parser 0.2.0

高性能模块化Postfix日志解析器,经3.2GB生产数据验证,SMTPD事件100%准确率
Documentation
//! Postfix组件统计工具
//!
//! 分析日志文件中各组件的出现频率,为开发优先级提供数据支持

use postfix_log_parser::{events::base::ComponentEvent, parse_log_line};
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;

#[derive(Debug, Default)]
struct ComponentStats {
    /// 组件总出现次数
    total_count: u64,
    /// 解析成功次数
    parsed_count: u64,
    /// 解析失败次数(作为Unknown处理)
    unknown_count: u64,
    /// 不同事件类型的分布
    event_types: HashMap<String, u64>,
    /// 置信度统计
    confidence_sum: f64,
    /// 高置信度事件数量(≥0.9)
    high_confidence_count: u64,
}

#[derive(Debug, Default)]
struct OverallStats {
    /// 按组件统计
    components: HashMap<String, ComponentStats>,
    /// 总处理行数
    total_lines: u64,
    /// 空行数
    empty_lines: u64,
    /// 解析失败行数
    parse_failed: u64,
}

impl ComponentStats {
    fn average_confidence(&self) -> f64 {
        if self.parsed_count > 0 {
            self.confidence_sum / self.parsed_count as f64
        } else {
            0.0
        }
    }

    fn parse_success_rate(&self) -> f64 {
        if self.total_count > 0 {
            self.parsed_count as f64 / self.total_count as f64 * 100.0
        } else {
            0.0
        }
    }

    fn high_confidence_rate(&self) -> f64 {
        if self.parsed_count > 0 {
            self.high_confidence_count as f64 / self.parsed_count as f64 * 100.0
        } else {
            0.0
        }
    }
}

fn analyze_log_file(file_path: &Path) -> Result<OverallStats, Box<dyn std::error::Error>> {
    let file = File::open(file_path)?;
    let reader = BufReader::new(file);
    let mut stats = OverallStats::default();

    println!("🔍 正在分析日志文件: {}", file_path.display());

    for (line_number, line) in reader.lines().enumerate() {
        let line = line?;
        stats.total_lines += 1;

        if line.trim().is_empty() {
            stats.empty_lines += 1;
            continue;
        }

        // 显示进度
        if line_number % 10000 == 0 {
            print!("\r📊 已处理: {}", line_number + 1);
        }

        let result = parse_log_line(&line);

        if let Some(event) = result.event {
            let component = &event.component;
            let comp_stats = stats.components.entry(component.clone()).or_default();

            comp_stats.total_count += 1;
            comp_stats.confidence_sum += result.confidence as f64;

            if result.confidence >= 0.9 {
                comp_stats.high_confidence_count += 1;
            }

            // 分析事件类型
            let event_type = match &event.event {
                ComponentEvent::Smtpd(smtpd_event) => {
                    comp_stats.parsed_count += 1;
                    format!("smtpd:{}", smtpd_event.event_type())
                }
                ComponentEvent::Qmgr(_) => {
                    comp_stats.parsed_count += 1;
                    "qmgr:parsed".to_string()
                }
                ComponentEvent::Smtp(_) => {
                    comp_stats.parsed_count += 1;
                    "smtp:parsed".to_string()
                }
                ComponentEvent::Cleanup(_) => {
                    comp_stats.parsed_count += 1;
                    "cleanup:parsed".to_string()
                }
                ComponentEvent::Error(_) => {
                    comp_stats.parsed_count += 1;
                    "error:parsed".to_string()
                }
                ComponentEvent::Relay(_) => {
                    comp_stats.parsed_count += 1;
                    "relay:parsed".to_string()
                }
                ComponentEvent::Discard(_) => {
                    comp_stats.parsed_count += 1;
                    "discard:parsed".to_string()
                }
                ComponentEvent::Bounce(bounce_event) => {
                    comp_stats.parsed_count += 1;
                    format!("bounce:{}", bounce_event.event_type())
                }
                ComponentEvent::PostfixScript(postfix_script_event) => {
                    comp_stats.parsed_count += 1;
                    format!("postfix-script:{}", postfix_script_event.event_type())
                }
                ComponentEvent::Master(master_event) => {
                    comp_stats.parsed_count += 1;
                    format!("master:{}", master_event.event_type())
                }
                ComponentEvent::Local(_) => {
                    comp_stats.parsed_count += 1;
                    "local:parsed".to_string()
                }
                ComponentEvent::Postmap(_) => {
                    comp_stats.parsed_count += 1;
                    "postmap:parsed".to_string()
                }
                ComponentEvent::Anvil(_) => {
                    comp_stats.parsed_count += 1;
                    "anvil:parsed".to_string()
                }
                ComponentEvent::Virtual(_) => {
                    comp_stats.parsed_count += 1;
                    "virtual:parsed".to_string()
                }
                ComponentEvent::Unknown(_) => {
                    comp_stats.unknown_count += 1;
                    "unknown".to_string()
                }
                ComponentEvent::Pickup(_) => {
                    comp_stats.parsed_count += 1;
                    "pickup:parsed".to_string()
                }
                ComponentEvent::Postlogd(_) => {
                    comp_stats.parsed_count += 1;
                    "postlogd:parsed".to_string()
                }
                ComponentEvent::Proxymap(_) => {
                    comp_stats.parsed_count += 1;
                    "proxymap:parsed".to_string()
                }
                ComponentEvent::Sendmail(_) => {
                    comp_stats.parsed_count += 1;
                    "sendmail:parsed".to_string()
                }
                ComponentEvent::TrivialRewrite(_) => {
                    comp_stats.parsed_count += 1;
                    "trivial-rewrite:parsed".to_string()
                }
                ComponentEvent::Postsuper(_) => {
                    comp_stats.parsed_count += 1;
                    "postsuper:parsed".to_string()
                }
            };

            *comp_stats.event_types.entry(event_type).or_insert(0) += 1;
        } else {
            stats.parse_failed += 1;
        }
    }

    println!(
        "\r✅ 分析完成!总共处理了 {}",
        stats.total_lines
    );
    Ok(stats)
}

fn print_detailed_report(stats: &OverallStats) {
    let separator = "=".repeat(80);
    println!("\n{}", separator);
    println!("📊 Postfix组件分析报告");
    println!("{}", separator);

    // 总体统计
    println!("\n📈 总体统计:");
    println!("  总行数: {}", stats.total_lines);
    println!("  空行数: {}", stats.empty_lines);
    println!("  解析失败: {}", stats.parse_failed);
    println!("  成功识别的组件数: {}", stats.components.len());

    // 按频率排序组件
    let mut components: Vec<_> = stats.components.iter().collect();
    components.sort_by(|a, b| b.1.total_count.cmp(&a.1.total_count));

    println!("\n🏆 组件出现频率排行(开发优先级参考):");
    println!(
        "{:<15} {:<10} {:<12} {:<12} {:<10} {:<10}",
        "组件", "总计", "成功解析", "Unknown", "成功率%", "高置信度%"
    );
    let dash_line = "-".repeat(80);
    println!("{}", dash_line);

    for (component, comp_stats) in &components {
        println!(
            "{:<15} {:<10} {:<12} {:<12} {:<10.1} {:<10.1}",
            component,
            comp_stats.total_count,
            comp_stats.parsed_count,
            comp_stats.unknown_count,
            comp_stats.parse_success_rate(),
            comp_stats.high_confidence_rate()
        );
    }

    println!("\n📋 详细事件类型分析:");
    println!("{}", dash_line);

    for (component, comp_stats) in &components {
        if comp_stats.total_count > 100 {
            // 只显示高频组件的详细信息
            println!(
                "\n🔧 组件: {} (总计: {})",
                component, comp_stats.total_count
            );

            let mut event_types: Vec<_> = comp_stats.event_types.iter().collect();
            event_types.sort_by(|a, b| b.1.cmp(a.1));

            for (event_type, count) in event_types.iter().take(10) {
                // 显示前10个事件类型
                let percentage = **count as f64 / comp_stats.total_count as f64 * 100.0;
                println!("  {:<25} {:<8} ({:.1}%)", event_type, count, percentage);
            }
        }
    }
}

fn print_development_roadmap(stats: &OverallStats) {
    let separator = "=".repeat(80);
    println!("\n{}", separator);
    println!("🛣️  推荐开发路线图");
    println!("{}", separator);

    let mut components: Vec<_> = stats.components.iter().collect();
    components.sort_by(|a, b| b.1.total_count.cmp(&a.1.total_count));

    println!("\n优先级基于以下因素:");
    println!("  1. 📊 出现频率 (数量)");
    println!("  2. 🎯 解析成功率 (当前实现质量)");
    println!("  3. 💼 业务价值 (邮件流程重要性)");

    let mut priority = 1;
    for (component, comp_stats) in components.iter().take(10) {
        let status = if comp_stats.parse_success_rate() > 80.0 {
            "✅ 已完成"
        } else if comp_stats.parse_success_rate() > 50.0 {
            "🚧 进行中"
        } else {
            "⭐ 待开发"
        };

        let urgency = if comp_stats.total_count > 10000 {
            "🔥 高优先级"
        } else if comp_stats.total_count > 1000 {
            "⚡ 中优先级"
        } else {
            "💡 低优先级"
        };

        println!("\n{:2}. 组件: {}", priority, component);
        println!(
            "    出现次数: {} | 状态: {} | 紧急度: {}",
            comp_stats.total_count, status, urgency
        );
        println!(
            "    当前解析率: {:.1}% | 平均置信度: {:.2}",
            comp_stats.parse_success_rate(),
            comp_stats.average_confidence()
        );

        priority += 1;
    }

    println!("\n💡 建议的开发阶段:");
    println!("  🚀 阶段1: smtpd (已完成) + qmgr + cleanup");
    println!("  🛠️  阶段2: smtp + pickup + local");
    println!("  🔧 阶段3: 其他低频组件");
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let args: Vec<String> = env::args().collect();

    if args.len() != 2 {
        eprintln!("用法: {} <postfix_log_file>", args[0]);
        eprintln!("\n示例:");
        eprintln!("  {} logs/test1.log", args[0]);
        eprintln!("  {} /var/log/mail.log", args[0]);
        std::process::exit(1);
    }

    let log_file = Path::new(&args[1]);

    if !log_file.exists() {
        eprintln!("❌ 错误: 文件不存在: {}", log_file.display());
        std::process::exit(1);
    }

    println!("🎯 Postfix组件统计分析工具");
    println!("目标: 基于真实数据制定开发优先级\n");

    let stats = analyze_log_file(log_file)?;

    print_detailed_report(&stats);
    print_development_roadmap(&stats);

    println!("\n💡 使用建议:");
    println!("  1. 优先开发高频组件的解析器");
    println!("  2. 重点关注业务核心流程 (邮件收发)");
    println!("  3. 定期重新分析以调整优先级");
    println!("  4. 每个组件都追求 >90% 解析成功率");

    Ok(())
}