postfix-log-parser 0.2.0

高性能模块化Postfix日志解析器,经3.2GB生产数据验证,SMTPD事件100%准确率
Documentation
//! 快速Postfix组件统计工具
//!
//! 快速分析大文件中各组件的出现频率,专注速度而非详细解析

use regex::Regex;
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::time::Instant;

#[derive(Debug, Default)]
struct FastComponentStats {
    /// 组件总出现次数
    count: u64,
    /// 示例日志行(用于了解该组件的典型格式)
    examples: Vec<String>,
}

#[derive(Debug, Default)]
struct FastOverallStats {
    /// 按组件统计
    components: HashMap<String, FastComponentStats>,
    /// 总处理行数
    total_lines: u64,
    /// 空行数
    empty_lines: u64,
    /// 无法识别组件的行数
    unrecognized_lines: u64,
    /// 处理时间
    processing_time: f64,
}

struct FastAnalyzer {
    // 预编译的正则表达式,避免重复编译
    postfix_pattern: Regex,
}

impl FastAnalyzer {
    fn new() -> Self {
        Self {
            // 匹配 Postfix 日志格式: timestamp hostname postfix/COMPONENT[pid]: message
            postfix_pattern: Regex::new(
                r"^\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+\S+\s+postfix/([^/\[\s]+)",
            )
            .unwrap(),
        }
    }

    fn extract_component(&self, line: &str) -> Option<String> {
        self.postfix_pattern
            .captures(line)
            .and_then(|caps| caps.get(1))
            .map(|m| m.as_str().to_string())
    }
}

fn analyze_log_file_fast(file_path: &Path) -> Result<FastOverallStats, Box<dyn std::error::Error>> {
    let start_time = Instant::now();
    let file = File::open(file_path)?;
    let reader = BufReader::new(file);
    let mut stats = FastOverallStats::default();
    let analyzer = FastAnalyzer::new();

    println!("🚀 快速分析日志文件: {}", file_path.display());
    println!("⚡ 模式: 仅提取组件名,跳过详细解析");

    for (line_number, line) in reader.lines().enumerate() {
        let line = line?;
        stats.total_lines += 1;

        if line.trim().is_empty() {
            stats.empty_lines += 1;
            continue;
        }

        // 显示进度
        if line_number % 50000 == 0 {
            print!(
                "\r⚡ 已处理: {} 行 ({:.1}K/s)",
                line_number + 1,
                (line_number + 1) as f64 / start_time.elapsed().as_secs_f64() / 1000.0
            );
            std::io::Write::flush(&mut std::io::stdout()).unwrap();
        }

        // 快速提取组件名
        if let Some(component) = analyzer.extract_component(&line) {
            let comp_stats = stats.components.entry(component).or_default();
            comp_stats.count += 1;

            // 只保存前3个示例,用于了解格式
            if comp_stats.examples.len() < 3 {
                comp_stats.examples.push(line);
            }
        } else {
            stats.unrecognized_lines += 1;
        }
    }

    stats.processing_time = start_time.elapsed().as_secs_f64();

    println!(
        "\r✅ 快速分析完成!总共处理了 {} 行,耗时 {:.2}",
        stats.total_lines, stats.processing_time
    );
    Ok(stats)
}

fn print_fast_report(stats: &FastOverallStats) {
    let separator = "=".repeat(80);
    println!("\n{}", separator);
    println!("⚡ Postfix组件快速分析报告");
    println!("{}", separator);

    // 性能统计
    let throughput = stats.total_lines as f64 / stats.processing_time;
    println!("\n🚀 性能统计:");
    println!("  处理速度: {:.0} 行/秒", throughput);
    println!("  总耗时: {:.2}", stats.processing_time);
    println!("  总行数: {}", stats.total_lines);
    println!("  空行数: {}", stats.empty_lines);
    println!("  无法识别: {}", stats.unrecognized_lines);
    println!("  识别组件数: {}", stats.components.len());

    // 按频率排序组件
    let mut components: Vec<_> = stats.components.iter().collect();
    components.sort_by(|a, b| b.1.count.cmp(&a.1.count));

    println!("\n🏆 组件出现频率排行:");
    println!(
        "{:<20} {:<15} {:<10} {:<10}",
        "组件", "出现次数", "占比%", "业务重要性"
    );
    let dash_line = "-".repeat(80);
    println!("{}", dash_line);

    let total_component_lines = stats.total_lines - stats.empty_lines - stats.unrecognized_lines;

    for (component, comp_stats) in &components {
        let percentage = if total_component_lines > 0 {
            comp_stats.count as f64 / total_component_lines as f64 * 100.0
        } else {
            0.0
        };

        let importance = match component.as_str() {
            "smtpd" => "🔥 核心",
            "qmgr" => "🔥 核心",
            "smtp" => "🔥 核心",
            "cleanup" => "⚡ 重要",
            "pickup" => "⚡ 重要",
            "local" => "⚡ 重要",
            "virtual" => "💡 一般",
            _ => "💡 一般",
        };

        println!(
            "{:<20} {:<15} {:<10.1} {:<10}",
            component, comp_stats.count, percentage, importance
        );
    }
}

fn print_fast_roadmap(stats: &FastOverallStats) {
    let separator = "=".repeat(80);
    println!("\n{}", separator);
    println!("🛣️  数据驱动的开发路线图");
    println!("{}", separator);

    let mut components: Vec<_> = stats.components.iter().collect();
    components.sort_by(|a, b| b.1.count.cmp(&a.1.count));

    println!("\n📊 基于真实数据的优先级建议:");

    let mut phase1 = Vec::new();
    let mut phase2 = Vec::new();
    let mut phase3 = Vec::new();

    for (component, comp_stats) in &components {
        let priority_score = match component.as_str() {
            // 核心邮件流程组件
            "smtpd" | "qmgr" | "smtp" | "cleanup" => {
                if comp_stats.count > 1000 {
                    1
                } else {
                    2
                }
            }
            // 重要但非核心组件
            "pickup" | "local" | "virtual" | "bounce" => 2,
            // 其他组件
            _ => 3,
        };

        let component_info = (component, comp_stats);
        match priority_score {
            1 => phase1.push(component_info),
            2 => phase2.push(component_info),
            _ => phase3.push(component_info),
        }
    }

    if !phase1.is_empty() {
        println!("\n🚀 第一阶段 - 核心邮件流程 (高优先级):");
        for (component, comp_stats) in phase1 {
            let status = if component == &"smtpd" {
                "✅ 已完成"
            } else {
                "⭐ 待开发"
            };
            println!(
                "  {} {:<12} ({:>8} 次, {}) ",
                if status.contains("已完成") {
                    ""
                } else {
                    "🔥"
                },
                component,
                comp_stats.count,
                status
            );
        }
    }

    if !phase2.is_empty() {
        println!("\n🛠️  第二阶段 - 重要功能组件 (中优先级):");
        for (component, comp_stats) in phase2 {
            println!(
                "  ⚡ {:<12} ({:>8} 次, ⭐ 待开发)",
                component, comp_stats.count
            );
        }
    }

    if !phase3.is_empty() {
        println!("\n🔧 第三阶段 - 其他组件 (低优先级):");
        for (component, comp_stats) in phase3.iter().take(8) {
            // 只显示前8个
            println!(
                "  💡 {:<12} ({:>8} 次, ⭐ 待开发)",
                component, comp_stats.count
            );
        }
        if phase3.len() > 8 {
            println!("  ... 还有 {} 个低频组件", phase3.len() - 8);
        }
    }

    println!("\n💡 开发建议:");
    println!("  1. 🎯 按阶段顺序开发,确保核心功能优先");
    println!("  2. 📊 定期重新分析大文件,验证优先级");
    println!("  3. 🚀 每个组件追求 >90% 解析成功率");
    println!("  4. ⚡ 使用这个快速工具监控开发进度");
}

fn print_component_examples(stats: &FastOverallStats) {
    println!("\n📋 各组件日志格式示例 (用于开发参考):");
    let dash_line = "-".repeat(80);
    println!("{}", dash_line);

    let mut components: Vec<_> = stats.components.iter().collect();
    components.sort_by(|a, b| b.1.count.cmp(&a.1.count));

    for (component, comp_stats) in components.iter().take(8) {
        // 只显示前8个主要组件
        println!("\n🔧 组件: {} (出现 {} 次)", component, comp_stats.count);
        for (i, example) in comp_stats.examples.iter().enumerate() {
            println!("  {}: {}", i + 1, example);
        }
    }
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let args: Vec<String> = env::args().collect();

    if args.len() < 2 {
        eprintln!("用法: {} <postfix_log_file> [options]", args[0]);
        eprintln!("\n选项:");
        eprintln!("  --examples    显示各组件的日志格式示例");
        eprintln!("\n示例:");
        eprintln!("  {} logs/test1.log", args[0]);
        eprintln!("  {} /var/log/mail.log --examples", args[0]);
        eprintln!("  {} ./logs/postfix_part_aa", args[0]);
        std::process::exit(1);
    }

    let log_file = Path::new(&args[1]);
    let show_examples = args.len() > 2 && args[2] == "--examples";

    if !log_file.exists() {
        eprintln!("❌ 错误: 文件不存在: {}", log_file.display());
        std::process::exit(1);
    }

    println!("⚡ Postfix组件快速统计分析工具");
    println!("🎯 目标: 基于真实数据快速制定开发优先级\n");

    let stats = analyze_log_file_fast(log_file)?;

    print_fast_report(&stats);
    print_fast_roadmap(&stats);

    if show_examples {
        print_component_examples(&stats);
    }

    println!("\n🚀 性能提示:");
    println!("  • 使用 --examples 查看各组件日志格式");
    println!("  • 这个快速版本专注速度,如需详细解析用原版工具");
    println!("  • 建议优先开发出现频率 >1000 的组件");

    Ok(())
}