use clap::Parser;
use memmap2::MmapOptions;
use regex::Regex;
use std::fs::File;
use std::time::Instant;
use timberjack::analyzer::LogAnalyzer;
use timberjack::cli::Args;
use timberjack::formatter::print_results;
use timberjack::parser::{LogFormat, ParserRegistry};
const PARALLEL_THRESHOLD_BYTES: u64 = 10 * 1024 * 1024;
fn main() -> std::io::Result<()> {
let args = Args::parse();
if !args.json && !args.count {
println!(
"\nWaking LumberJacks...Timberjack is chopping: {}\n",
args.file
);
}
let parser_registry = ParserRegistry::new();
let format = match args.format.to_lowercase().as_str() {
"auto" => {
let file = File::open(&args.file)?;
if file.metadata()?.len() == 0 {
LogFormat::Generic
} else {
let mmap = unsafe { MmapOptions::new().map(&file)? };
let mut sample_lines = Vec::with_capacity(10);
let mut start = 0;
let mut line_count = 0;
let max_sample = std::cmp::min(4096, mmap.len());
for i in 0..max_sample {
if i == mmap.len() - 1 || mmap[i] == b'\n' {
if i > start {
if let Ok(line) = std::str::from_utf8(&mmap[start..i]) {
let trimmed_line = line.trim();
if !trimmed_line.is_empty() {
sample_lines.push(trimmed_line);
line_count += 1;
if line_count >= 10 {
break;
}
}
}
}
start = i + 1;
}
}
let (detected_format, _) = parser_registry.detect_format(&sample_lines.to_vec());
if !args.json && !args.count {
println!("Detected format: {:?}", detected_format);
}
detected_format
}
}
"json" => LogFormat::Json,
"apache" => LogFormat::Apache,
"syslog" => LogFormat::Syslog,
_ => LogFormat::Generic,
};
let pattern = match &args.chop {
Some(pattern) => {
if !args.json && !args.count {
println!("Searching for pattern: {}", pattern);
}
Some(Regex::new(pattern).expect("Invalid regex pattern"))
}
None => None,
};
let level = args.level.as_deref();
if let Some(level_str) = level {
if !args.json && !args.count {
println!("Filtering by level: {}", level_str);
}
}
let start_time = Instant::now();
let mut analyzer = LogAnalyzer::new();
if !args.field.is_empty() {
if !args.json && !args.count {
println!("Filtering by fields: {:?}", args.field);
}
analyzer.set_field_filters(args.field);
}
let parser = parser_registry
.get_parser(format)
.expect("Failed to get parser for format");
analyzer.set_parser(parser);
let use_parallel = if args.sequential {
false
} else if args.parallel {
true
} else {
should_use_parallel(&args.file)
};
if args.count {
let count = count_total_logs(&args.file, pattern.as_ref(), level)?;
println!("{}", count);
return Ok(());
}
let result = process_with_mmap(
&args.file,
&mut analyzer,
pattern.as_ref(),
level,
args.trend,
args.stats,
use_parallel,
)?;
let elapsed = start_time.elapsed();
if !args.json {
println!("Analysis completed in {:.2}s", elapsed.as_secs_f32());
}
if !args.json {
println!();
}
print_results(
&result,
args.trend,
args.stats,
args.json,
args.top_errors,
args.show_unique,
);
Ok(())
}
fn count_total_logs(
file_path: &str,
pattern: Option<&Regex>,
level_filter: Option<&str>,
) -> std::io::Result<usize> {
let file = File::open(file_path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let mut total_count = 0;
let mut analyzer = LogAnalyzer::new();
if let Some(pat) = pattern {
analyzer.configure(Some(&pat.to_string()), level_filter);
} else {
analyzer.configure(None, level_filter);
}
const FAST_CHUNK_SIZE: usize = 1_048_576; let mut position = 0;
while position < mmap.len() {
let chunk_end = std::cmp::min(position + FAST_CHUNK_SIZE, mmap.len());
let chunk = &mmap[position..chunk_end];
let mut result = timberjack::analyzer::AnalysisResult::default();
analyzer.process_chunk_data(chunk, &mut result, false, false);
total_count += result.count;
position += chunk_end - position;
}
Ok(total_count)
}
fn process_with_mmap(
file_path: &str,
analyzer: &mut LogAnalyzer,
pattern: Option<&Regex>,
level_filter: Option<&str>,
collect_trends: bool,
collect_stats: bool,
use_parallel: bool,
) -> std::io::Result<timberjack::analyzer::AnalysisResult> {
let path = std::path::Path::new(file_path);
if !path.exists() {
return Err(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {}", file_path),
));
}
let file = File::open(path)?;
let file_size = file.metadata()?.len();
if file_size == 0 {
return Ok(timberjack::analyzer::AnalysisResult::default());
}
let mmap = unsafe { MmapOptions::new().map(&file)? };
Ok(analyzer.analyze_mmap(
&mmap,
pattern,
level_filter,
collect_trends,
collect_stats,
use_parallel,
))
}
fn should_use_parallel(file_path: &str) -> bool {
match std::fs::metadata(file_path) {
Ok(metadata) => {
let size = metadata.len();
size > PARALLEL_THRESHOLD_BYTES
}
Err(_) => false, }
}