use anyhow::{Context, Result};
use chrono::DateTime;
use matchy::schemas::{get_schema_info, is_known_database_type};
use matchy::{DataValue, DatabaseBuilder, DatabaseBuilderExt, MatchMode};
use std::collections::HashMap;
use std::fs;
use std::io::{self, BufRead};
use std::path::{Path, PathBuf};
use crate::cli_utils::json_to_data_map;
#[allow(clippy::too_many_arguments)]
pub fn cmd_build(
inputs: &[PathBuf],
output: &Path,
format: &str,
database_type: Option<&str>,
description: Option<&str>,
desc_lang: &str,
verbose: bool,
debug: bool,
case_insensitive: bool,
update_url: Option<&str>,
) -> Result<()> {
let match_mode = if case_insensitive {
MatchMode::CaseInsensitive
} else {
MatchMode::CaseSensitive
};
if debug {
println!("Building unified MMDB database (IP + patterns)...");
println!(" Input files: {}", inputs.len());
for input in inputs {
println!(" - {}", input.display());
}
println!(" Output: {}", output.display());
println!(" Format: {format}");
println!(
" Match mode: {}",
if case_insensitive {
"case-insensitive"
} else {
"case-sensitive"
}
);
println!();
}
let mut builder = DatabaseBuilder::new(match_mode);
if let Some(db_type) = database_type {
if is_known_database_type(db_type) {
builder = builder
.with_schema(db_type)
.with_context(|| format!("Failed to load schema for '{db_type}'"))?;
if verbose || debug {
let canonical_type = get_schema_info(db_type)
.map(|info| info.database_type)
.unwrap_or(db_type);
println!("Schema validation: enabled ({canonical_type})");
}
} else {
builder = builder.with_database_type(db_type.to_string());
}
}
if let Some(desc) = description {
builder = builder.with_description(desc_lang.to_string(), desc.to_string());
}
if let Some(url) = update_url {
builder = builder.with_update_url(url);
if verbose || debug {
println!("Update URL: {url}");
}
}
match format {
"text" => {
let mut total_count = 0;
for input in inputs {
if debug && inputs.len() > 1 {
println!(" Reading: {}...", input.display());
}
if let Ok(content) = fs::read_to_string(input) {
let trimmed = content.trim_start();
if trimmed.starts_with('{') || trimmed.starts_with('[') {
if trimmed.contains("\"Event\"") {
anyhow::bail!(
"File {} appears to be MISP JSON format.\n\n\
You specified --format text, but this looks like MISP JSON.\n\
Try: --format misp (or -f misp)",
input.display()
);
} else {
eprintln!(
"Warning: {} looks like JSON but you specified --format text.\n\
If this is a JSON file, use --format json instead.",
input.display()
);
}
}
let first_line = content.lines().next().unwrap_or("");
if first_line.contains(',') && first_line.split(',').count() > 3 {
eprintln!(
"Warning: {} looks like CSV but you specified --format text.\n\
If this is a CSV file, use --format csv instead.",
input.display()
);
}
}
let file = fs::File::open(input)
.with_context(|| format!("Failed to open input file: {}", input.display()))?;
let reader = io::BufReader::new(file);
let mut count = 0;
for line in reader.lines() {
let line = line?;
let entry = line.trim();
if !entry.is_empty() && !entry.starts_with('#') {
let data = HashMap::new();
builder.add_entry(entry, data).with_context(|| {
format!("Failed to add entry '{entry}'. Use a custom --database-type name if you don't want schema validation.")
})?;
count += 1;
total_count += 1;
if debug && total_count % 1000 == 0 {
println!(" Added {total_count} entries...");
}
}
}
if debug && inputs.len() > 1 {
println!(" {count} entries from this file");
}
}
if debug {
println!(" Total: {total_count} entries");
}
}
"csv" => {
let mut total_entries = 0;
for input in inputs {
if debug && inputs.len() > 1 {
println!(" Reading: {}...", input.display());
}
let file = fs::File::open(input)
.with_context(|| format!("Failed to open CSV file: {}", input.display()))?;
let mut reader = csv::Reader::from_reader(file);
let headers = reader.headers().context("Failed to read CSV headers")?;
let entry_col = headers
.iter()
.position(|h| h == "entry" || h == "key")
.ok_or_else(|| {
anyhow::anyhow!(
"CSV must have an 'entry' or 'key' column. Found headers: {}",
headers.iter().collect::<Vec<_>>().join(", ")
)
})?;
let data_cols: Vec<(usize, String)> = headers
.iter()
.enumerate()
.filter(|(i, _)| *i != entry_col)
.map(|(i, name)| (i, name.to_string()))
.collect();
for (row_num, result) in reader.records().enumerate() {
let record = result.context("Failed to read CSV record")?;
let entry = record.get(entry_col).ok_or_else(|| {
anyhow::anyhow!("Missing entry column at row {}", row_num + 2)
})?;
let mut data = HashMap::new();
for (col_idx, col_name) in &data_cols {
if let Some(value) = record.get(*col_idx) {
if !value.is_empty() {
let data_value = if let Ok(i) = value.parse::<i64>() {
DataValue::Int32(i32::try_from(i).unwrap_or(if i < 0 {
i32::MIN
} else {
i32::MAX
}))
} else if let Ok(u) = value.parse::<u64>() {
DataValue::Uint64(u)
} else if let Ok(f) = value.parse::<f64>() {
DataValue::Double(f)
} else if value == "true" || value == "false" {
DataValue::Bool(value == "true")
} else if let Ok(dt) = DateTime::parse_from_rfc3339(value) {
DataValue::Timestamp(dt.timestamp())
} else {
DataValue::String(value.to_string())
};
data.insert(col_name.clone(), data_value);
}
}
}
builder.add_entry(entry, data).with_context(|| {
format!("Failed to add entry '{}' at row {}. Use a custom --database-type name if you don't want schema validation.", entry, row_num + 2)
})?;
total_entries += 1;
if debug && total_entries % 1000 == 0 {
println!(" Added {total_entries} entries...");
}
}
if debug && inputs.len() > 1 {
println!(" {} entries from this file", reader.position().line());
}
}
if debug {
println!(" Total: {total_entries} entries");
}
}
"json" => {
let mut total_entries = 0;
for input in inputs {
if debug && inputs.len() > 1 {
println!(" Reading: {}...", input.display());
}
let content = fs::read_to_string(input)
.with_context(|| format!("Failed to read JSON file: {}", input.display()))?;
let entries: Vec<serde_json::Value> =
serde_json::from_str(&content).context("Failed to parse JSON")?;
for (i, item) in entries.iter().enumerate() {
let key = item
.get("key")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'key' field at index {i}"))?;
let data = if let Some(data_json) = item.get("data") {
json_to_data_map(data_json)?
} else {
HashMap::new()
};
builder.add_entry(key, data).with_context(|| {
format!("Failed to add entry '{key}' at index {i}. Use a custom --database-type name if you don't want schema validation.")
})?;
total_entries += 1;
if debug && total_entries % 1000 == 0 {
println!(" Added {total_entries} entries...");
}
}
if debug && inputs.len() > 1 {
println!(" {} entries from this file", entries.len());
}
}
if debug {
println!(" Total: {total_entries} entries");
}
}
"misp" => {
use matchy::misp_importer::MispImporter;
if debug {
println!(" Processing MISP JSON files (streaming mode)...");
}
let input_refs: Vec<&PathBuf> = inputs.iter().collect();
builder = MispImporter::build_from_files(
&input_refs,
MatchMode::CaseSensitive,
false, )
.context("Failed to process MISP JSON files")?;
if debug {
let stats = builder.stats();
println!(" Total indicators: {}", stats.total_entries);
}
}
_ => {
anyhow::bail!("Unknown format: {format}. Use 'text', 'csv', 'json', or 'misp'");
}
}
let stats = builder.stats();
if verbose || debug {
println!("\nBuilding database:");
println!(" Total entries: {}", stats.total_entries);
println!(" IP entries: {}", stats.ip_entries);
println!(" Literal entries: {}", stats.literal_entries);
println!(" Glob entries: {}", stats.glob_entries);
}
if debug {
println!("\nSerializing...");
}
let database_bytes = builder.build().context("Failed to build database")?;
if debug {
println!("Writing to disk...");
}
let temp_path = output.with_extension("tmp");
fs::write(&temp_path, &database_bytes)
.with_context(|| format!("Failed to write temp file: {}", temp_path.display()))?;
fs::rename(&temp_path, output)
.with_context(|| format!("Failed to rename to: {}", output.display()))?;
if verbose || debug {
println!("\n✓ Database built successfully!");
println!(" Output: {}", output.display());
println!(
" Database size: {:.2} MB ({} bytes)",
database_bytes.len() as f64 / (1024.0 * 1024.0),
database_bytes.len()
);
} else {
println!("✓ Database built: {}", output.display());
}
if debug {
println!(" Format: MMDB (extended with patterns)");
}
Ok(())
}