use clap::Parser;
use memmap2::MmapOptions;
use regex::Regex;
use std::fs::File;
use std::io;
use std::io::BufRead;
use std::io::BufReader;
use std::io::IsTerminal;
use std::time::Instant;
use timberjack::analyzer::LogAnalyzer;
use timberjack::cli::Args;
use timberjack::formatter::print_results;
use timberjack::parser::{LogFormat, ParserRegistry};
const PARALLEL_THRESHOLD_BYTES: u64 = 10 * 1024 * 1024; const MAX_UNIQUE_LINES: usize = 10000;
fn main() -> std::io::Result<()> {
let args = Args::parse();
let using_stdin = args.file.is_none() && !std::io::stdin().is_terminal();
if !using_stdin && args.file.is_none() {
eprintln!("Error: No input source. Provide a file or pipe data to stdin.");
std::process::exit(1);
}
if !args.json && !args.count {
if using_stdin {
println!("\nWaking LumberJacks...Timberjack is chopping from stdin\n");
} else if let Some(file) = &args.file {
println!("\nWaking LumberJacks...Timberjack is chopping: {}\n", file);
}
}
let parser_registry = ParserRegistry::new();
let format = match args.format.to_lowercase().as_str() {
"auto" => {
if let Some(file_path) = &args.file {
let file = File::open(file_path)?;
if file.metadata()?.len() == 0 {
LogFormat::Generic
} else {
let mmap = unsafe { MmapOptions::new().map(&file)? };
let mut sample_lines = Vec::with_capacity(10);
let mut start = 0;
let mut line_count = 0;
let max_sample = std::cmp::min(4096, mmap.len());
for i in 0..max_sample {
if i == mmap.len() - 1 || mmap[i] == b'\n' {
if i > start {
if let Ok(line) = std::str::from_utf8(&mmap[start..i]) {
let trimmed_line = line.trim();
if !trimmed_line.is_empty() {
sample_lines.push(trimmed_line);
line_count += 1;
if line_count >= 10 {
break;
}
}
}
}
start = i + 1;
}
}
let (detected_format, _) =
parser_registry.detect_format(&sample_lines.to_vec());
if !args.json && !args.count {
println!("Detected format: {:?}", detected_format);
}
detected_format
}
} else {
LogFormat::Generic
}
}
"json" => LogFormat::Json,
"apache" => LogFormat::Apache,
"syslog" => LogFormat::Syslog,
_ => LogFormat::Generic,
};
let pattern = match &args.chop {
Some(pattern) => {
if !args.json && !args.count {
println!("Searching for pattern: {}", pattern);
}
Some(Regex::new(pattern).expect("Invalid regex pattern"))
}
None => None,
};
let level = args.level.as_deref();
if let Some(level_str) = level {
if !args.json && !args.count {
println!("Filtering by level: {}", level_str);
}
}
let start_time = Instant::now();
let mut analyzer = LogAnalyzer::new();
if !args.field.is_empty() {
if !args.json && !args.count {
println!("Filtering by fields: {:?}", args.field);
}
analyzer.set_field_filters(args.field);
}
let parser = parser_registry
.get_parser(format)
.expect("Failed to get parser for format");
analyzer.set_parser(parser);
let use_parallel = if args.sequential {
false
} else if args.parallel {
true
} else {
should_use_parallel(args.file.as_deref())
};
if args.count {
let count = count_total_logs(args.file.as_deref(), pattern.as_ref(), level)?;
println!("{}", count);
return Ok(());
}
let result = if using_stdin {
process_from_stdin(
&mut analyzer,
pattern.as_ref(),
level,
args.trend,
args.stats,
)?
} else if args.file.is_some() {
process_with_mmap(
args.file.as_deref(),
&mut analyzer,
pattern.as_ref(),
level,
args.trend,
args.stats,
use_parallel,
)?
} else {
unreachable!()
};
let elapsed = start_time.elapsed();
if !args.json {
if using_stdin {
println!(
"Analysis completed in {:.2}s (source: stdin)",
elapsed.as_secs_f32()
);
} else if let Some(file) = &args.file {
println!(
"Analysis completed in {:.2}s (source: {})",
elapsed.as_secs_f32(),
file
);
}
}
if !args.json {
println!();
}
print_results(
&result,
args.trend,
args.stats,
args.json,
args.top_errors,
args.show_unique,
);
Ok(())
}
fn process_from_stdin(
analyzer: &mut LogAnalyzer,
pattern: Option<&Regex>,
level_filter: Option<&str>,
collect_trends: bool,
collect_stats: bool,
) -> std::io::Result<timberjack::analyzer::AnalysisResult> {
if let Some(pat) = pattern {
analyzer.configure(Some(&pat.to_string()), level_filter);
} else if level_filter.is_some() {
analyzer.configure(None, level_filter);
}
let mut result = timberjack::analyzer::AnalysisResult {
deduplicated: true,
..Default::default()
};
let stdin = io::stdin();
let reader = BufReader::new(stdin);
for line_result in reader.lines() {
let line = line_result?;
if let Some((matched_line, level, timestamp)) = analyzer.analyze_line(
&line,
None,
analyzer.get_level_filter(),
collect_trends,
collect_stats,
) {
result.count += 1;
if result.matched_lines.len() < MAX_UNIQUE_LINES {
let line_count_entry = result.line_counts.entry(matched_line.clone()).or_insert(0);
*line_count_entry += 1;
if !result.matched_lines.contains(&matched_line) {
result.matched_lines.push(matched_line.clone());
}
}
if collect_trends {
if let Some(ts) = timestamp {
let hour = if ts.len() >= 13 {
ts[0..13].to_string()
} else {
ts
};
*result.time_trends.entry(hour).or_insert(0) += 1;
}
}
if collect_stats {
*result.levels_count.entry(level.clone()).or_insert(0) += 1;
if let Some(error_type) = analyzer.extract_error_type(&matched_line) {
*result.error_types.entry(error_type).or_insert(0) += 1;
}
if let Some(message) = matched_line.split(']').nth(1).map(|s| s.trim().to_string())
{
result.unique_messages.insert(message);
} else {
result.unique_messages.insert(matched_line);
}
}
}
}
Ok(result)
}
fn count_total_logs(
file_path: Option<&str>,
pattern: Option<&Regex>,
level_filter: Option<&str>,
) -> std::io::Result<usize> {
let mut total_count = 0;
let mut analyzer = LogAnalyzer::new();
if let Some(pat) = pattern {
analyzer.configure(Some(&pat.to_string()), level_filter);
} else {
analyzer.configure(None, level_filter);
}
if file_path.is_none() && !std::io::stdin().is_terminal() {
let stdin = io::stdin();
let reader = stdin.lock();
for line_result in reader.lines() {
let line = line_result?;
if analyzer
.analyze_line(&line, None, analyzer.get_level_filter(), false, false)
.is_some()
{
total_count += 1;
}
}
return Ok(total_count);
}
let path = file_path.ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"No file path provided".to_string(),
)
})?;
let file = File::open(path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
const FAST_CHUNK_SIZE: usize = 1_048_576; let mut position = 0;
while position < mmap.len() {
let chunk_end = std::cmp::min(position + FAST_CHUNK_SIZE, mmap.len());
let chunk = &mmap[position..chunk_end];
let mut result = timberjack::analyzer::AnalysisResult::default();
analyzer.process_chunk_data(chunk, &mut result, false, false);
total_count += result.count;
position += chunk_end - position;
}
Ok(total_count)
}
fn process_with_mmap(
file_path: Option<&str>,
analyzer: &mut LogAnalyzer,
pattern: Option<&Regex>,
level_filter: Option<&str>,
collect_trends: bool,
collect_stats: bool,
use_parallel: bool,
) -> std::io::Result<timberjack::analyzer::AnalysisResult> {
let path_str = file_path.ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"No file path provided".to_string(),
)
})?;
let path = std::path::Path::new(path_str);
if !path.exists() {
return Err(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {}", path_str),
));
}
let file = File::open(path)?;
let file_size = file.metadata()?.len();
if file_size == 0 {
return Ok(timberjack::analyzer::AnalysisResult::default());
}
let mmap = unsafe { MmapOptions::new().map(&file)? };
Ok(analyzer.analyze_mmap(
&mmap,
pattern,
level_filter,
collect_trends,
collect_stats,
use_parallel,
))
}
fn should_use_parallel(file_path: Option<&str>) -> bool {
match file_path {
Some(path) => match std::fs::metadata(path) {
Ok(metadata) => {
let size = metadata.len();
size > PARALLEL_THRESHOLD_BYTES
}
Err(_) => false,
},
None => false,
}
}