// CLI-specific types and structures
// This module contains the command-line interface definitions and parsing logic
use crate::config::{MultilineJoin, ScriptStageType};
use anyhow::Result;
use clap::{ArgMatches, Parser};
// CLI types - specific to command-line interface
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum InputFormat {
Auto,
AutoPerFile,
Json,
Line,
Raw,
Logfmt,
Syslog,
Cef,
Csv,
Tsv,
Csvnh,
Tsvnh,
Combined,
Cols,
Regex,
}
#[derive(clap::ValueEnum, Clone, Debug, Default)]
pub enum OutputFormat {
#[default]
Default,
Json,
Logfmt,
Inspect,
Levelmap,
Keymap,
Tailmap,
Csv,
Tsv,
Csvnh,
Tsvnh,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum FileOrder {
Cli,
Name,
Mtime,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum MetricsFormat {
Short,
Full,
Json,
/// Tab-separated record stream (one `metric<TAB>key<TAB>value` row per line,
/// sorted by count/score descending) for piping to head/tail/sort/awk.
Tsv,
/// Resolve at output time: the human report on a terminal, `tsv` when piped
/// or redirected. The default for `-m` and `--freq`/`--describe`. Hidden
/// because it is the implicit default rather than something to type.
#[value(hide = true)]
Auto,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum StatsFormat {
Table,
Json,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum DiscoverFieldsFormat {
Table,
Json,
}
#[derive(clap::ValueEnum, Clone, Debug, Default)]
pub enum DrainFormat {
#[default]
Table,
Full,
Id,
Json,
}
#[derive(clap::ValueEnum, Clone, Copy, Debug)]
pub enum ShellCompletion {
Bash,
Zsh,
Fish,
#[value(name = "powershell")]
PowerShell,
Elvish,
}
// CLI structure - contains all command-line arguments and options
#[derive(Parser)]
#[command(name = "kelora")]
#[command(about = "A command-line log analysis tool with embedded Rhai scripting")]
#[command(
long_about = "A command-line log analysis tool with embedded Rhai scripting\n\nProcesses logs through a streaming pipeline: parse -> filter/transform (Rhai) -> format.\nRuns sequentially by default; add --parallel for high-throughput batch analysis.\n\nRun 'kelora' in a terminal with no arguments for interactive mode - a readline-based REPL with\ncommand history, glob expansion, and proper quoting (especially helpful on Windows). When stdin\nis not a terminal (piped or redirected), kelora reads that input instead of going interactive.\n\nFor a quick reference with worked examples, run: kelora -h\n\nTo search this reference, run: kelora --help KEYWORD (e.g. 'kelora --help -j', 'kelora --help --since', or 'kelora --help time')\n\nTopic-specific help (--help-rhai, --help-functions, --help-examples, ...) is listed under Help Options below."
)]
#[command(author = "Dirk Loss <mail@dirk-loss.de>")]
#[command(version)]
#[command(args_override_self = true)]
#[command(after_long_help = "Exit Codes:\n \
0 Success — the run did its job. By default Kelora is resilient: malformed\n \
lines in an otherwise-valid stream, and best-effort --exec transform\n \
errors, are reported on stderr and counted but do NOT change the exit code\n \
(use --strict to make the first such error fatal instead).\n \
1 Error. Any of:\n \
- a named input file could not be opened\n \
- an --assert condition failed\n \
- a gate stage saw input but never once succeeded (parsing failed on every\n \
line, or a --filter errored on every event)\n \
- a forbidden operation (e.g. mutating `conf` outside --begin)\n \
- with --strict, ANY single parse/filter/exec error (also aborts early)\n \
2 Invalid command-line usage (unknown flag, bad value, conflicting options, or a\n \
malformed config file).\n \
130 Interrupted (SIGINT / Ctrl-C).\n \
134 Internal panic (SIGABRT) — a bug; please report it.\n \
141 Broken pipe (SIGPIPE).\n \
143 Terminated (SIGTERM).")]
pub struct Cli {
/// Input files (stdin if not specified, or use "-" to explicitly specify stdin)
pub files: Vec<String>,
/// Run without reading input (useful for scripts that only use --begin/--end stages)
#[arg(long = "no-input", help_heading = "Input Options")]
pub no_input: bool,
/// Input format. Available formats: auto (default), auto-per-file, json, line, raw, logfmt, syslog, cef, csv, tsv, csvnh, tsvnh, combined, cols:<spec>, regex:<pattern>.
/// With 'auto', the format is detected from the first non-empty line and applied to every line; for files that mix formats use a cascade (below) instead.
/// Use cols:<spec> for column parsing, regex:<pattern> for regex parsing with named groups, and csv/tsv with optional type annotations.
/// Built-in application-log formats: cri (Kubernetes container logs) plus glog, nginx-error, apache-error, log4j, python-logging, redis, s3, haproxy, iso8601-level (adapted from lnav). Select with -f <name>; most are also recognized by auto-detection. See --help-formats.
/// Cascade mode: pass a comma-separated list (e.g. 'json,logfmt,line') to try each parser in order; the first success wins, so put catch-all fallbacks like 'line' or 'raw' last. Adds an '_format' field to each event.
/// Repeat -f to build a cascade that includes spec-based parsers: -f json -f 'cols:ts(2) level *msg'. Each -f is tried in order; put catch-alls ('line', 'raw', 'cols:') last (regex declines non-matching lines, so it can sit earlier).
/// Examples: -f json, -f json,line, -f json -f 'cols:ts level *msg', -f 'regex:(?P<code:int>\\d+) (?P<msg>.*)', -f 'csv status:int bytes:int'.
#[arg(
short = 'f',
long = "input-format",
default_value = "auto",
action = clap::ArgAction::Append,
num_args = 1,
help_heading = "Input Options",
value_parser = parse_format_value
)]
pub format: Vec<String>,
/// Shortcut for -f json.
#[arg(short = 'j', help_heading = "Input Options", conflicts_with = "format")]
pub json_input: bool,
/// File processing order.
#[arg(
long = "file-order",
value_enum,
value_name = "ORDER",
default_value = "cli",
help_heading = "Input Options"
)]
pub file_order: FileOrder,
/// Merge already-sorted input files by timestamp; aborts on missing timestamps, parse failures, or per-file disorder.
#[arg(long = "merge-sorted", help_heading = "Input Options")]
pub merge_ts: bool,
/// Skip the first N input lines.
#[arg(long = "skip-lines", value_name = "N", help_heading = "Input Options")]
pub skip_lines: Option<usize>,
/// Read only the first N input lines (stops I/O early, complementing --take which limits output events)
#[arg(long = "head", value_name = "N", help_heading = "Input Options")]
pub head: Option<usize>,
/// Start emitting sections from the line matching this regex (inclusive start).
/// All --section-* regexes are unanchored; use ^...$ to match a whole line.
#[arg(
long = "section-from",
value_name = "REGEX",
help_heading = "Input Options",
conflicts_with = "section_after"
)]
pub section_from: Option<String>,
/// Start emitting sections after the line matching this regex (exclusive start)
#[arg(
long = "section-after",
value_name = "REGEX",
help_heading = "Input Options",
conflicts_with = "section_from"
)]
pub section_after: Option<String>,
/// Stop before the line matching this regex (exclusive end)
#[arg(
long = "section-before",
value_name = "REGEX",
help_heading = "Input Options",
conflicts_with = "section_through"
)]
pub section_before: Option<String>,
/// Stop after emitting the line matching this regex (inclusive end)
#[arg(
long = "section-through",
value_name = "REGEX",
help_heading = "Input Options",
conflicts_with = "section_before"
)]
pub section_through: Option<String>,
/// Maximum number of sections to process (default: -1 for unlimited)
#[arg(
long = "max-sections",
value_name = "N",
default_value = "-1",
help_heading = "Input Options"
)]
pub max_sections: i64,
/// Keep only input lines matching this regex pattern (applied before ignore-lines)
#[arg(
long = "keep-lines",
value_name = "REGEX",
help_heading = "Input Options"
)]
pub keep_lines: Option<String>,
/// Ignore input lines matching this regex pattern.
#[arg(
long = "ignore-lines",
value_name = "REGEX",
help_heading = "Input Options"
)]
pub ignore_lines: Option<String>,
/// Custom timestamp field name for parsing.
#[arg(
long = "ts-field",
value_name = "FIELD",
help_heading = "Input Options"
)]
pub ts_field: Option<String>,
/// Custom timestamp format for parsing (uses chrono format strings)
#[arg(
long = "ts-format",
value_name = "FORMAT",
help_heading = "Input Options"
)]
pub ts_format: Option<String>,
/// Assume timezone for input timestamps without timezone info (default: UTC).
/// Use 'local' for system local time.
/// Examples: 'Europe/Berlin', 'local', 'UTC'.
#[arg(long = "input-tz", value_name = "TZ", help_heading = "Input Options")]
pub input_tz: Option<String>,
/// Multi-line event detection strategy. Supply values like `timestamp`,
/// `timestamp:format=%Y-%m-%d %H-%M-%S`, `regex:match=^START`, or
/// `regex:match=^START:end=^END$`. See `kelora --help-multiline` for details.
#[arg(
short = 'M',
long = "multiline",
value_name = "STRATEGY",
help_heading = "Input Options"
)]
pub multiline: Option<String>,
/// Join multiline lines with: space (default), newline, or empty.
#[arg(
long = "multiline-join",
value_enum,
value_name = "JOIN",
default_value_t = MultilineJoin::Space,
help_heading = "Input Options"
)]
pub multiline_join: MultilineJoin,
/// Extract text before separator to specified field (runs before parsing)
#[arg(
long = "extract-prefix",
value_name = "FIELD",
help_heading = "Input Options"
)]
pub extract_prefix: Option<String>,
/// Separator string for prefix extraction (default: pipe '|')
#[arg(
long = "prefix-sep",
value_name = "SEP",
default_value = "|",
help_heading = "Input Options"
)]
pub prefix_sep: String,
/// Column separator for cols:<spec> format (default: whitespace)
#[arg(long = "cols-sep", value_name = "SEP", help_heading = "Input Options")]
pub cols_sep: Option<String>,
/// Pre-run a Rhai script before any other stage runs.
#[arg(
long = "begin",
value_name = "EXPR",
help_heading = "Processing Options",
help = "Pre-run a Rhai script before any other stage runs.\n\nTypical use: seed the global `conf` map with lookup tables or shared context.\n\nHelpers available only here:\n read_lines(path) -> Array<String> # UTF-8, one entry per line\n read_file(path) -> String # UTF-8, entire file contents\n\nData stored in `conf` becomes read-only afterwards. See --help-rhai for stage order."
)]
pub begin: Option<String>,
#[arg(
long = "filter",
value_name = "EXPR",
help_heading = "Processing Options",
help = "Boolean filter expression; events where it evaluates to true are kept.\n\nCan be combined with --include (-I) to call helper functions defined in an\ninclude file. Include files used with --filter must contain only function\ndefinitions — top-level statements are rejected with an error.\n\nExample:\n kelora -I helpers.rhai --filter 'is_error(e.level)' app.log\n\nSee --help-rhai for expression syntax."
)]
pub filters: Vec<String>,
/// Transform/process exec scripts evaluated on each event. See --help-rhai for stage semantics.
#[arg(
short = 'e',
long = "exec",
value_name = "EXPR",
help_heading = "Processing Options"
)]
pub execs: Vec<String>,
/// Execute script from file (contents run in the exec stage).
#[arg(
short = 'E',
long = "exec-file",
value_name = "FILE",
help_heading = "Processing Options"
)]
pub exec_files: Vec<String>,
/// Assertion expressions that must evaluate to true. Violations are reported to stderr;
/// processing continues unless --strict is enabled. See --help-rhai for expression syntax.
#[arg(
long = "assert",
value_name = "EXPR",
help_heading = "Processing Options"
)]
pub asserts: Vec<String>,
#[arg(
short = 'I',
long = "include",
value_name = "FILE",
help_heading = "Processing Options",
help = "Include a Rhai file of helper functions, loaded into the adjacent script stage.\n\nCommand-line position selects which stage the file applies to:\n --include before a --filter/--exec → that filter/exec stage\n --include before the first stage → the --begin stage (if present)\n --include after the last stage → the --end stage (if present)\n\nEach stage has its own scope: an include's functions are only visible to\nthe stage it is attached to. Repeat --include to share helpers across\nseveral stages. An include that attaches to begin/end has no effect unless\nthat --begin/--end stage exists.\n\nWhen used with --filter, the include file must contain only function\ndefinitions. Top-level statements (side effects) are rejected with an error.\n\nExample:\n kelora -I helpers.rhai --filter 'is_error(e.level)' app.log"
)]
pub includes: Vec<String>,
/// Run once after processing completes (post-processing stage). Ideal for summarising metrics or emitting reports. The global `metrics` map from track_*() calls is accessible here.
#[arg(long = "end", value_name = "EXPR", help_heading = "Processing Options")]
pub end: Option<String>,
/// Allow Rhai scripts to create directories and write files on disk (required for file helpers like append_file or mkdir).
#[arg(long = "allow-fs-writes", help_heading = "Processing Options")]
pub allow_fs_writes: bool,
/// Enable access to a sliding window of N+1 recent events (needed for window_* functions).
#[arg(long = "window", value_name = "N", help_heading = "Processing Options")]
pub window_size: Option<usize>,
/// Aggregate events into fixed-size spans (count or duration) before running a span-close hook.
#[arg(
long = "span",
value_name = "N|DURATION|FIELD",
help_heading = "Processing Options",
help = "Aggregate events into consecutive spans.\n --span <N> Close after every N events that pass filters.\n --span <DURATION> Close on aligned time windows (e.g. 5m, 1h, 30s).\n --span <FIELD> Close when the specified field value changes.\nUse with --span-close to run a Rhai snippet when each span finishes."
)]
pub span: Option<String>,
/// Close span after a period of inactivity (mutually exclusive with --span)
#[arg(
long = "span-idle",
value_name = "DURATION",
help_heading = "Processing Options",
help = "Close span after this duration of inactivity (e.g. --span-idle 5m). Requires timestamps and cannot be combined with --span."
)]
pub span_idle: Option<String>,
/// Rhai snippet executed once every time a span closes.
#[arg(
long = "span-close",
value_name = "EXPR",
help_heading = "Processing Options",
help = "Run a Rhai snippet when each span closes. Within the hook, read span.start, span.end, span.id, span.events, span.size, and span.metrics for span context. span.metrics carries per-window values only for additive aggregators (count, sum, avg, unique, bucket); non-additive ones (min, max, percentiles, cardinality, top, bottom) are omitted with a warning, so use span.events for those."
)]
pub span_close: Option<String>,
/// Exit on first error (fail-fast behavior). Use --no-strict to force resilient mode, overriding a config default.
#[arg(long = "strict", help_heading = "Error Handling")]
pub strict: bool,
/// Disable strict error handling (resilient mode)
#[arg(
long = "no-strict",
hide = true,
help_heading = "Error Handling",
overrides_with = "strict"
)]
pub no_strict: bool,
/// Abort on invalid UTF-8 instead of decoding losslessly.
#[arg(
long = "strict-utf8",
help_heading = "Error Handling",
help = "Abort on the first non-UTF-8 byte instead of the default lossy decoding.\n\nBy default, kelora tolerates non-UTF-8 input the way grep does: invalid byte sequences are replaced with U+FFFD (\u{fffd}) and a diagnostic reports how many lines were affected, so a single bad byte no longer truncates the rest of the stream. Pass --strict-utf8 to restore hard failure (exit 1) on invalid UTF-8."
)]
pub strict_utf8: bool,
/// Cap the bytes a single line may use (circuit breaker; default 64MiB, 0 disables).
#[arg(
long = "max-line-bytes",
value_name = "SIZE",
help_heading = "Input Options",
help = "Cap the number of bytes a single input line may consume in memory (default 64MiB).\n\nThis is a safety circuit breaker against runaway memory: a newline-free stream — including a tiny gzip/zstd payload that decompresses into one enormous line — would otherwise grow without bound and exhaust RAM. No real log line approaches the default, so it normally never triggers.\n\nWhen a line exceeds the cap it is truncated to the cap and a warning (\u{1f538}) reports how many lines were clipped; the run still succeeds (exit 0). With --strict an over-limit line is a hard error (exit 1) instead. Accepts a byte count or an IEC/SI suffix (64MiB, 1GiB, 1048576); 0/off/unlimited disables the cap.\n\nReading is streamed, so a large multi-line compressed file is unaffected — only a single over-long line trips this."
)]
pub max_line_bytes: Option<String>,
/// Show detailed error information (use multiple times for more verbosity: -v, -vv, -vvv)
#[arg(short = 'v', long = "verbose", action = clap::ArgAction::Count, help_heading = "Error Handling")]
pub verbose: u8,
/// Include only events with these log levels.
#[arg(
short = 'l',
long = "levels",
help_heading = "Filtering Options",
help = "Include only events with these log levels (comma-separated, case-insensitive).\n\nUse comma-separated values for OR logic: --levels ERROR,WARN\nMultiple flags create sequential AND filters (advanced)."
)]
pub levels: Vec<String>,
/// Exclude events with these log levels.
#[arg(
short = 'L',
long = "exclude-levels",
help_heading = "Filtering Options",
help = "Exclude events with these log levels (comma-separated, case-insensitive)."
)]
pub exclude_levels: Vec<String>,
/// Output only specific fields.
#[arg(
short = 'k',
long = "keys",
value_delimiter = ',',
help_heading = "Filtering Options",
help = "Output only these fields (comma-separated list).",
long_help = "Output only these fields, in the order given (comma-separated list).\n\nActs on whole top-level fields. Nested values that --discover prints as dotted or bracketed paths (e.g. api.queries, tags[]) are NOT selectable here — flatten them first, e.g. --exec 'e.val = e.get_path(\"api.queries\")' then -k val. (A top-level field whose literal name contains a dot is matched as-is.)"
)]
pub keys: Vec<String>,
/// Exclude specific fields from output.
#[arg(
short = 'K',
long = "exclude-keys",
value_delimiter = ',',
help_heading = "Filtering Options",
help = "Exclude these fields from output (comma-separated list).",
long_help = "Exclude these fields from output (comma-separated list).\n\nActs on whole top-level fields; nested values (the dotted/bracketed paths shown by --discover) cannot be dropped here — remove them in an --exec stage instead."
)]
pub exclude_keys: Vec<String>,
/// Start showing entries on or newer than the specified date.
#[arg(
long = "since",
value_name = "TIME",
help_heading = "Filtering Options",
allow_hyphen_values = true,
help = "Keep only events at or after this time.\n\nAccepts journalctl-style timestamps (e.g., 2024-01-15T12:00:00Z, '2024-01-15 12:00', '1h', '-30m', 'yesterday'). Can also use 'until+DURATION', 'until-DURATION', 'now+DURATION', or 'now-DURATION' anchors. See --help-time."
)]
pub since: Option<String>,
/// Stop showing entries on or older than the specified date.
#[arg(
long = "until",
value_name = "TIME",
help_heading = "Filtering Options",
allow_hyphen_values = true,
help = "Keep only events at or before this time.\n\nAccepts journalctl-style timestamps (e.g., 2024-01-15T12:00:00Z, '2024-01-15 12:00', '1h', '+30m', 'tomorrow'). Can also use 'since+DURATION', 'since-DURATION', 'now+DURATION', or 'now-DURATION' anchors. See --help-time."
)]
pub until: Option<String>,
/// Limit output to the first N events.
#[arg(
short = 'n',
long = "take",
value_name = "N",
help_heading = "Filtering Options"
)]
pub take: Option<usize>,
/// Show N lines before each match (requires filtering)
#[arg(
short = 'B',
long = "before-context",
value_name = "N",
help_heading = "Filtering Options"
)]
pub before_context: Option<usize>,
/// Show N lines after each match (requires filtering)
#[arg(
short = 'A',
long = "after-context",
value_name = "N",
help_heading = "Filtering Options"
)]
pub after_context: Option<usize>,
/// Show N lines before and after each match (requires filtering)
#[arg(
short = 'C',
long = "context",
value_name = "N",
help_heading = "Filtering Options"
)]
pub context: Option<usize>,
/// Output format.
#[arg(
short = 'F',
long = "output-format",
value_enum,
value_name = "FORMAT",
default_value = "default",
help = "Output format.\n\nFormats:\n default Colored key-value output\n json JSON Lines (one object per line)\n logfmt Key=value pairs on one line\n inspect Debug view with type information\n levelmap Compact level timeline\n keymap First-character map for one selected field\n tailmap Percentile map for one numeric field\n csv Comma-separated with header row\n tsv Tab-separated with header row\n csvnh CSV without header row\n tsvnh TSV without header row\n\nSee --help-formats for requirements, extracted fields, and examples.",
help_heading = "Output Options"
)]
pub output_format: OutputFormat,
/// Shortcut for -F json.
#[arg(
short = 'J',
help_heading = "Output Options",
conflicts_with = "output_format"
)]
pub json_output: bool,
/// Output only core fields.
#[arg(short = 'c', long = "core", help_heading = "Output Options")]
pub core: bool,
/// Output file for formatted events.
#[arg(
short = 'o',
long = "output-file",
value_name = "FILE",
help_heading = "Output Options"
)]
pub output_file: Option<String>,
/// Suppress events (formatter output)
#[arg(short = 'q', long = "quiet", help_heading = "Output Options")]
pub quiet: bool,
/// Show warnings (🔸) — problems that did not stop the run. Overrides a
/// KELORA_NO_WARNINGS / config default and re-enables warnings in data-only
/// modes like --metrics/--drain/--discover.
#[arg(long = "warnings", help_heading = "Output Options", overrides_with_all = ["no_warnings", "warnings"])]
pub warnings: bool,
/// Suppress warnings (🔸). Errors still print; use --silent to hide those too.
#[arg(long = "no-warnings", help_heading = "Output Options", overrides_with_all = ["warnings", "no_warnings"])]
pub no_warnings: bool,
/// Show hints (💡) — advisory suggestions (zero-result/typo/format tips).
/// Overrides a KELORA_NO_HINTS / config default and re-enables hints in
/// data-only modes like --metrics/--drain/--discover.
#[arg(long = "hints", help_heading = "Output Options", overrides_with_all = ["no_hints", "hints"])]
pub hints: bool,
/// Suppress hints (💡, zero-result hints, format/typo tips).
#[arg(long = "no-hints", help_heading = "Output Options", overrides_with_all = ["hints", "no_hints"])]
pub no_hints: bool,
/// Show both warnings and hints (shortcut for --warnings --hints), even in
/// data-only modes. Error summaries always show unless --silent.
#[arg(long = "diagnostics", help_heading = "Output Options", overrides_with_all = ["no_diagnostics", "diagnostics"])]
pub diagnostics: bool,
/// Suppress both warnings and hints (shortcut for --no-warnings --no-hints),
/// plus per-line verbose errors. Error and parse summaries still print; use
/// --silent to hide those too.
#[arg(long = "no-diagnostics", help_heading = "Output Options", overrides_with_all = ["diagnostics", "no_diagnostics"])]
pub no_diagnostics: bool,
/// Silence pipeline stdout/stderr emitters (events/diagnostics/stats/terminal metrics); script output still allowed. Metrics files still write. Use --no-silent to override a config default.
#[arg(long = "silent", help_heading = "Output Options")]
pub silent: bool,
/// Disable a silent default coming from config.
#[arg(long = "no-silent", hide = true, help_heading = "Output Options")]
pub no_silent: bool,
/// Enable Rhai print/eprint output.
#[arg(long = "script-output", help_heading = "Output Options", overrides_with_all = ["no_script_output", "script_output"])]
pub script_output: bool,
/// Suppress Rhai print/eprint and side-effect warnings (implied by data-only modes).
#[arg(long = "no-script-output", hide = true, help_heading = "Output Options", overrides_with_all = ["script_output", "no_script_output"])]
pub no_script_output: bool,
/// Append a legend to map outputs (levelmap/keymap/tailmap) even when piped.
/// By default the legend shows only when stdout is a terminal.
#[arg(long = "legend", help_heading = "Output Options", overrides_with_all = ["no_legend", "legend"])]
pub legend: bool,
/// Suppress the legend on map outputs (levelmap/keymap/tailmap).
#[arg(long = "no-legend", help_heading = "Output Options", overrides_with_all = ["legend", "no_legend"])]
pub no_legend: bool,
/// Output only field values (omit keys).
#[arg(short = 'b', long = "brief", help_heading = "Default Format Options")]
pub brief: bool,
/// Expand nested structures (maps/arrays) with indentation.
#[arg(long = "expand-nested", help_heading = "Default Format Options")]
pub expand_nested: bool,
/// Always word-wrap wide events onto indented continuation lines, even when
/// piped or redirected. By default wrapping is enabled only when stdout is a
/// terminal, so piped output stays one line per event.
#[arg(
long = "wrap",
help_heading = "Default Format Options",
overrides_with_all = ["no_wrap", "wrap"]
)]
pub wrap: bool,
/// Never word-wrap; keep each event on a single line.
#[arg(
long = "no-wrap",
help_heading = "Default Format Options",
overrides_with_all = ["wrap", "no_wrap"]
)]
pub no_wrap: bool,
/// Normalize the primary timestamp field to RFC3339 (ISO 8601 compatible).
/// Modifies event data - affects all output formats.
#[arg(long = "normalize-ts", help_heading = "Processing Options")]
pub normalize_ts: bool,
/// Display timestamps as local RFC3339 (ISO 8601 compatible).
/// Display-only - only affects default formatter output.
#[arg(
short = 'z',
long = "show-ts-local",
help_heading = "Default Format Options"
)]
pub format_timestamps_local: bool,
/// Display timestamps as UTC RFC3339 (ISO 8601 compatible).
/// Display-only - only affects default formatter output.
#[arg(
short = 'Z',
long = "show-ts-utc",
help_heading = "Default Format Options"
)]
pub format_timestamps_utc: bool,
/// Force colored output.
#[arg(long = "force-color", help_heading = "Display Options", overrides_with_all = ["no_color", "force_color"])]
pub force_color: bool,
/// Disable colored output.
#[arg(long = "no-color", help_heading = "Display Options", overrides_with_all = ["force_color", "no_color"])]
pub no_color: bool,
/// Insert a centered marker when time gaps grow large.
#[arg(
long = "mark-gaps",
value_name = "DURATION",
help_heading = "Display Options",
help = "Insert a centered marker when the time delta between events exceeds the given duration.\nExample: --mark-gaps 30s prints a divider when consecutive events are separated by >=30s."
)]
pub mark_gaps: Option<String>,
/// Force emoji prefixes (override auto-detection)
#[arg(long = "force-emoji", help_heading = "Display Options", overrides_with_all = ["no_emoji", "force_emoji"])]
pub force_emoji: bool,
/// Disable emoji prefixes.
#[arg(long = "no-emoji", help_heading = "Display Options", overrides_with_all = ["force_emoji", "no_emoji"])]
pub no_emoji: bool,
/// Enable parallel processing (default: sequential processing). Use --no-parallel to force sequential, overriding a config default.
#[arg(short = 'P', long = "parallel", help_heading = "Performance Options")]
pub parallel: bool,
/// Disable parallel processing explicitly (default mode is sequential).
#[arg(
long = "no-parallel",
hide = true,
help_heading = "Performance Options",
overrides_with = "parallel"
)]
pub no_parallel: bool,
/// Number of worker threads (0 = auto-detect, one per available core)
#[arg(
long = "threads",
value_name = "N",
default_value_t = 0,
help_heading = "Performance Options"
)]
pub threads: usize,
/// Batch size (events per batch) for parallel processing.
#[arg(
long = "batch-size",
value_name = "N",
help_heading = "Performance Options"
)]
pub batch_size: Option<usize>,
/// Batch timeout in milliseconds.
#[arg(
long = "batch-timeout",
value_name = "MS",
default_value_t = 200,
help_heading = "Performance Options",
help = "Flush partially full parallel batches after this idle period (milliseconds). Lower values reduce latency; higher values improve throughput."
)]
pub batch_timeout: u64,
/// Disable ordered output.
#[arg(long = "unordered", help_heading = "Performance Options")]
pub no_preserve_order: bool,
/// Show stats only (implies -q/--quiet). Use -s for default (table), or --stats=FORMAT for explicit format.
#[arg(
short = 's',
long = "stats",
value_enum,
value_name = "FORMAT",
require_equals = true,
num_args = 0..=1,
default_missing_value = "table",
help_heading = "Metrics and Stats",
help = "Show stats only (implies -q/--quiet).\n\nFormats: table, json\n\nExamples:\n -s Default table format\n --stats=json JSON output\n\nUse --no-stats to override a config default. Note the '=': --stats=json (a space, as in '-s json', is read as a filename)."
)]
pub stats: Option<StatsFormat>,
/// Disable processing statistics explicitly (default: off).
#[arg(
long = "no-stats",
hide = true,
help_heading = "Metrics and Stats",
overrides_with = "stats"
)]
pub no_stats: bool,
/// Show stats alongside events (rare case).
#[arg(long = "with-stats", help_heading = "Metrics and Stats")]
pub with_stats: bool,
/// Show metrics only (implies -q/--quiet). Use -m for default (table), or --metrics=FORMAT for explicit format.
#[arg(
short = 'm',
long = "metrics",
value_enum,
value_name = "FORMAT",
require_equals = true,
num_args = 0..=1,
default_missing_value = "auto",
help_heading = "Metrics and Stats",
help = "Show metrics only (implies -q/--quiet).\n\nFormats: short (first 5), full, tsv, json. Bare -m auto-selects: the\nhuman-readable table on a terminal, tsv when piped or redirected (like ls).\n\ntsv emits one tab-separated 'metric<TAB>key<TAB>value' record per line, sorted\nby count descending, so | head is top-N and | tail is bottom-N.\n\nExamples:\n -m Auto (table on a TTY, tsv when piped)\n --metrics=full Force the table even through a pipe\n --metrics=tsv Force the record stream even to a TTY\n --metrics=short Abbreviated (first 5 items)\n --metrics=json JSON output\n\nUse --no-metrics to override a config default. Note the '=': --metrics=json (a space, as in '-m json', is read as a filename)."
)]
pub metrics: Option<MetricsFormat>,
/// Disable tracked metrics explicitly (default: off).
#[arg(
long = "no-metrics",
hide = true,
help_heading = "Metrics and Stats",
overrides_with = "metrics"
)]
pub no_metrics: bool,
/// Show metrics alongside events (rare case).
#[arg(long = "with-metrics", help_heading = "Metrics and Stats")]
pub with_metrics: bool,
/// Write metrics to file (JSON format). Can combine with -m for both table and file.
#[arg(
long = "metrics-file",
value_name = "FILE",
help_heading = "Metrics and Stats",
help = "Persist the metrics map (populated by track_*()) to disk as JSON."
)]
pub metrics_file: Option<String>,
/// Frequency table: count occurrences per distinct value of FIELD. Shorthand for track_freq.
#[arg(
long = "freq",
value_name = "FIELD",
help_heading = "Metrics and Stats",
help = "Frequency table: count occurrences per distinct value of FIELD.\n\nShorthand for track_freq(\"FIELD\", e.FIELD). Runs after all filters/transforms\nand implies -m. Repeatable. Nested fields use dotted paths (e.g. user.id).\nResults are sorted by count descending, so piping the tsv output to head gives\nthe top-N and tail gives the bottom-N (no --top/--bottom flags needed).\nControl output with --metrics=short|full|tsv|json or --metrics-file.\n\nExamples:\n --freq level\n --filter 'e.status>=500' --freq url\n --freq url | head # top URLs (tsv auto-selected when piped)\n --freq url | tail # rarest URLs"
)]
pub freq: Vec<String>,
/// Summarize a numeric FIELD: count, min, max, avg, p50/p95/p99. Shorthand for track_stats.
#[arg(
long = "describe",
value_name = "FIELD",
help_heading = "Metrics and Stats",
help = "Summarize a numeric FIELD: count, min, max, avg, p50/p95/p99.\n\nShorthand for track_stats(\"FIELD\", e.FIELD). Runs after all filters/transforms\nand implies -m. Repeatable. Non-numeric/missing values are skipped.\nControl output with --metrics=short|full|tsv|json or --metrics-file.\n\nExample:\n --describe duration_ms"
)]
pub describe: Vec<String>,
/// Estimate the number of distinct values of FIELD. Shorthand for track_cardinality.
#[arg(
long = "card",
value_name = "FIELD",
help_heading = "Metrics and Stats",
help = "Estimate the number of distinct values of FIELD (HyperLogLog).\n\nShorthand for track_cardinality(\"FIELD\", e.FIELD). Runs after all\nfilters/transforms and implies -m. Repeatable. Missing values are skipped.\nThe count is approximate (~1% error) but uses constant memory, so it scales to\nhigh-cardinality fields where track_freq/track_unique would not.\nControl output with --metrics=short|full|tsv|json or --metrics-file.\n\nExamples:\n --card user.id\n --filter 'e.status>=500' --card client_ip"
)]
pub card: Vec<String>,
/// Summarize log templates using Drain (summary-only, requires --keys with exactly one field).
#[arg(
long = "drain",
value_enum,
value_name = "FORMAT",
require_equals = true,
num_args = 0..=1,
default_missing_value = "table",
help_heading = "Template Discovery",
help = "Summarize log templates using Drain (summary-only; requires --keys with exactly one field; sequential mode only).\n\nFormats:\n table (default) Clean output: count + template\n full Detailed: adds line ranges + sample + template ID\n id Stable output: template_id + template (sorted by ID)\n json JSON with all metadata\n\nExamples:\n --drain Clean table (count + template)\n --drain=full With line numbers, samples, and IDs\n --drain=id Stable ID list for diffs\n --drain=json JSON output for scripting"
)]
pub drain: Option<DrainFormat>,
/// Discover field names, types, and cardinality from the log stream.
#[arg(
short = 'd',
long = "discover",
value_enum,
value_name = "FORMAT",
require_equals = true,
num_args = 0..=1,
default_missing_value = "table",
conflicts_with = "discover_final_fields",
help_heading = "Field Discovery",
help = "Profile fields: names, inferred types, cardinality estimates, and sample values.\nNested maps and arrays are flattened to 3 levels (e.g. user.name, user.roles[]).\nImplies -q/--quiet (events suppressed). Sequential mode only.\n\nFormats: table (default), json\n\nExamples:\n -d, --discover Table summary\n -d=json, --discover=json Machine-readable JSON"
)]
pub discover_fields: Option<DiscoverFieldsFormat>,
/// Profile final emitted fields instead of parsed input fields.
#[arg(
short = 'D',
long = "discover-final",
value_enum,
value_name = "FORMAT",
require_equals = true,
num_args = 0..=1,
default_missing_value = "table",
conflicts_with = "discover_fields",
help_heading = "Field Discovery",
help = "Profile final emitted fields after scripts and filters.\nImplies -q/--quiet (events suppressed). Sequential mode only.\n\nFormats: table (default), json\n\nExamples:\n -D, --discover-final Table summary of final fields\n -D=json, --discover-final=json Machine-readable JSON of final fields"
)]
pub discover_final_fields: Option<DiscoverFieldsFormat>,
/// Maximum depth for flattening nested maps/arrays during field discovery.
#[arg(
long = "discover-depth",
value_name = "N",
help_heading = "Field Discovery",
help = "Maximum depth for flattening nested maps/arrays into dotted keys (default: 3).\nDepth counts descents from the event root: a.b.c is depth 3.\nUse a higher value to inspect deeply nested JSON; use 1 to see only top-level fields; use 0 for unlimited depth.\n\nExamples:\n --discover --discover-depth=5 Descend up to 5 levels deep\n --discover --discover-depth=1 Top-level fields only\n --discover --discover-depth=0 Unlimited depth"
)]
pub discover_depth: Option<usize>,
/// Specify custom configuration file path.
#[arg(
long = "config-file",
value_name = "FILE",
help_heading = "Configuration Options"
)]
pub config_file: Option<String>,
/// Ignore configuration file.
#[arg(long = "ignore-config", help_heading = "Configuration Options")]
pub ignore_config: bool,
/// Use alias from configuration file.
#[arg(
short = 'a',
long = "alias",
value_name = "NAME",
help_heading = "Configuration Options"
)]
pub alias: Vec<String>,
/// Save current command as alias to configuration file.
#[arg(
long = "save-alias",
value_name = "NAME",
help_heading = "Configuration Options"
)]
pub save_alias: Option<String>,
/// Show configuration file and exit.
#[arg(long = "show-config", help_heading = "Configuration Options")]
pub show_config: bool,
/// Edit configuration file in default editor and exit.
#[arg(long = "edit-config", help_heading = "Configuration Options")]
pub edit_config: bool,
/// Show Rhai scripting guide and exit.
#[arg(long = "help-rhai", help_heading = "Help Options")]
pub help_rhai: bool,
/// Show available Rhai functions and exit (optional KEYWORD filters by name/description; smartcase: lowercase matches any case, uppercase is exact)
#[arg(long = "help-functions", value_name = "KEYWORD", num_args = 0..=1, default_missing_value = "", help_heading = "Help Options")]
pub help_functions: Option<String>,
/// Show practical Rhai examples and exit.
#[arg(long = "help-examples", help_heading = "Help Options")]
pub help_examples: bool,
/// Show time format help and exit.
#[arg(long = "help-time", help_heading = "Help Options")]
pub help_time: bool,
/// Show multiline strategy help and exit.
#[arg(long = "help-multiline", help_heading = "Help Options")]
pub help_multiline: bool,
/// Show regex format help and exit.
#[arg(long = "help-regex", help_heading = "Help Options")]
pub help_regex: bool,
/// Show format reference and exit.
#[arg(long = "help-formats", help_heading = "Help Options")]
pub help_formats: bool,
/// Generate shell completion script and exit.
#[arg(long = "completions", value_enum, help_heading = "Help Options")]
pub completions: Option<ShellCompletion>,
}
impl Cli {
/// Resolve inverted boolean flags to their actual values
pub fn resolve_boolean_flags(&mut self) {
// Handle stats/no-stats
if self.no_stats {
self.stats = None;
}
// Handle parallel/no-parallel
if self.no_parallel {
self.parallel = false;
}
// Handle metrics/no-metrics
if self.no_metrics {
self.metrics = None;
}
// Handle strict/no-strict
if self.no_strict {
self.strict = false;
}
}
}
/// Preprocess script by prepending include file contents
fn preprocess_script_with_includes(script: &str, includes: &[String]) -> Result<String> {
let mut result = String::new();
// Concatenate include files first
for include_path in includes {
let include_content = std::fs::read_to_string(include_path).map_err(|e| {
anyhow::anyhow!("Failed to read include file '{}': {}", include_path, e)
})?;
result.push_str(&include_content);
result.push('\n'); // Ensure separation between files
}
// Append main script
result.push_str(script);
Ok(result)
}
fn load_include_files(includes: &[String]) -> Result<Vec<crate::config::IncludeFile>> {
includes
.iter()
.map(|include_path| {
let content = std::fs::read_to_string(include_path).map_err(|e| {
anyhow::anyhow!("Failed to read include file '{}': {}", include_path, e)
})?;
Ok(crate::config::IncludeFile {
path: include_path.clone(),
content,
})
})
.collect()
}
/// Get includes that apply to begin/end stages based on CLI position
/// For begin: includes that appear before any script stage
/// For end: includes that appear after all script stages
fn get_begin_end_includes(matches: &ArgMatches) -> Result<(Vec<String>, Vec<String>)> {
let mut begin_includes = Vec::new();
let mut end_includes = Vec::new();
if let Some(include_indices) = matches.indices_of("includes") {
let include_values: Vec<&String> =
matches.get_many::<String>("includes").unwrap().collect();
// Collect all script stage positions
let mut script_positions = Vec::new();
if let Some(filter_indices) = matches.indices_of("filters") {
script_positions.extend(filter_indices);
}
if let Some(exec_indices) = matches.indices_of("execs") {
script_positions.extend(exec_indices);
}
if let Some(exec_file_indices) = matches.indices_of("exec_files") {
script_positions.extend(exec_file_indices);
}
if script_positions.is_empty() {
// No script stages - all includes go to begin
for (pos, _) in include_indices.enumerate() {
begin_includes.push(include_values[pos].clone());
}
} else {
script_positions.sort();
let first_script_pos = script_positions[0];
let last_script_pos = script_positions[script_positions.len() - 1];
for (pos, include_index) in include_indices.enumerate() {
let include_file = include_values[pos].clone();
if include_index < first_script_pos {
begin_includes.push(include_file);
} else if include_index > last_script_pos {
end_includes.push(include_file);
}
// Includes between script stages are handled by get_ordered_script_stages
}
}
}
Ok((begin_includes, end_includes))
}
impl Cli {
/// Extract filter and exec stages in the order they appeared on the command line
pub fn get_ordered_script_stages(&self, matches: &ArgMatches) -> Result<Vec<ScriptStageType>> {
use std::collections::HashMap;
let mut stages_with_indices = Vec::new();
let mut include_map: HashMap<usize, Vec<String>> = HashMap::new();
let parse_level_list = |raw: &str| -> Result<Vec<String>> {
let levels: Vec<String> = raw
.split(',')
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.collect();
if levels.is_empty() {
Err(anyhow::anyhow!(
"Level filters require at least one level (e.g. --levels error,critical)"
))
} else {
Ok(levels)
}
};
// First, collect all include arguments and map them to the next script stage
if let Some(include_indices) = matches.indices_of("includes") {
let include_values: Vec<&String> =
matches.get_many::<String>("includes").unwrap().collect();
// Collect all script stage positions
let mut script_positions = Vec::new();
if let Some(filter_indices) = matches.indices_of("filters") {
script_positions.extend(filter_indices);
}
if let Some(assert_indices) = matches.indices_of("asserts") {
script_positions.extend(assert_indices);
}
if let Some(exec_indices) = matches.indices_of("execs") {
script_positions.extend(exec_indices);
}
if let Some(exec_file_indices) = matches.indices_of("exec_files") {
script_positions.extend(exec_file_indices);
}
script_positions.sort();
// Associate each include with the next script stage
for (pos, include_index) in include_indices.enumerate() {
let include_file = include_values[pos].clone();
// Find the next script stage position after this include
if let Some(&next_script_pos) = script_positions
.iter()
.find(|&&script_pos| script_pos > include_index)
{
include_map
.entry(next_script_pos)
.or_default()
.push(include_file);
}
// If no script stage follows, the include will be ignored (could warn here in future)
}
}
// Get filter stages with their indices and apply preprocessing
if let Some(filter_indices) = matches.indices_of("filters") {
let filter_values: Vec<&String> =
matches.get_many::<String>("filters").unwrap().collect();
for (pos, index) in filter_indices.enumerate() {
let script = filter_values[pos].clone();
let empty_includes = Vec::new();
let includes = include_map.get(&index).unwrap_or(&empty_includes);
let include_files = load_include_files(includes)?;
stages_with_indices.push((
index,
ScriptStageType::Filter {
script,
includes: include_files,
},
));
}
}
// Get assert stages with their indices
if let Some(assert_indices) = matches.indices_of("asserts") {
let assert_values: Vec<&String> =
matches.get_many::<String>("asserts").unwrap().collect();
for (pos, index) in assert_indices.enumerate() {
let script = assert_values[pos].clone();
let empty_includes = Vec::new();
let includes = include_map.get(&index).unwrap_or(&empty_includes);
// Assertions don't support includes (same as filters)
if !includes.is_empty() {
eprintln!(
"{}",
crate::config::format_error_message_auto(
"--include is not supported with --assert (assertions must be pure expressions)"
)
);
std::process::exit(2);
}
stages_with_indices.push((index, ScriptStageType::Assert(script)));
}
}
// Get level filter stages (includes)
if let Some(level_indices) = matches.indices_of("levels") {
let level_values: Vec<&String> =
matches.get_many::<String>("levels").unwrap().collect();
for (pos, index) in level_indices.enumerate() {
let raw = level_values[pos];
let include_levels = parse_level_list(raw)?;
stages_with_indices.push((
index,
ScriptStageType::LevelFilter {
include: include_levels,
exclude: Vec::new(),
},
));
}
}
// Get level filter stages (exclusions)
if let Some(exclude_indices) = matches.indices_of("exclude_levels") {
let exclude_values: Vec<&String> = matches
.get_many::<String>("exclude_levels")
.unwrap()
.collect();
for (pos, index) in exclude_indices.enumerate() {
let raw = exclude_values[pos];
let exclude_levels = parse_level_list(raw)?;
stages_with_indices.push((
index,
ScriptStageType::LevelFilter {
include: Vec::new(),
exclude: exclude_levels,
},
));
}
}
// Get exec stages with their indices and apply preprocessing
if let Some(exec_indices) = matches.indices_of("execs") {
let exec_values: Vec<&String> = matches.get_many::<String>("execs").unwrap().collect();
for (pos, index) in exec_indices.enumerate() {
let script = exec_values[pos].clone();
let empty_includes = Vec::new();
let includes = include_map.get(&index).unwrap_or(&empty_includes);
let preprocessed_script = preprocess_script_with_includes(&script, includes)?;
stages_with_indices.push((index, ScriptStageType::Exec(preprocessed_script)));
}
}
// Get exec-file stages with their indices and apply preprocessing
if let Some(exec_file_indices) = matches.indices_of("exec_files") {
let exec_file_values: Vec<&String> =
matches.get_many::<String>("exec_files").unwrap().collect();
for (pos, index) in exec_file_indices.enumerate() {
let file_path = &exec_file_values[pos];
let script_content = std::fs::read_to_string(file_path).map_err(|e| {
anyhow::anyhow!("Failed to read exec file '{}': {}", file_path, e)
})?;
let empty_includes = Vec::new();
let includes = include_map.get(&index).unwrap_or(&empty_includes);
let preprocessed_script =
preprocess_script_with_includes(&script_content, includes)?;
stages_with_indices.push((index, ScriptStageType::Exec(preprocessed_script)));
}
}
// Sort by original command line position
stages_with_indices.sort_by_key(|(index, _)| *index);
// Detect consecutive --levels flags (likely user mistake)
let mut prev_was_level_include = false;
for (_, stage) in &stages_with_indices {
match stage {
ScriptStageType::LevelFilter {
include,
exclude: _,
} => {
if !include.is_empty() {
if prev_was_level_include {
// Found consecutive --levels flags
let hint = crate::config::format_hint_message_auto(
"Multiple --levels flags create sequential filters (AND). Use comma-separated for OR: --levels ERROR,WARN"
);
eprintln!("{}", hint);
break; // Only show hint once
}
prev_was_level_include = true;
} else {
// This is an exclude-only stage, reset the flag
prev_was_level_include = false;
}
}
_ => {
// Non-level stage, reset the flag
prev_was_level_include = false;
}
}
}
// Extract just the stages
let mut stages: Vec<ScriptStageType> = stages_with_indices
.into_iter()
.map(|(_, stage)| stage)
.collect();
// Metrics-sugar flags (--freq / --describe) are non-positional, so they
// always run LAST — after every --filter/-l/-e stage. That gives them the
// same post-pipeline vantage as --discover-final: they see fields
// created/renamed by earlier stages and only events that survived filtering.
for field in &self.freq {
stages.push(ScriptStageType::Exec(synthesize_freq_stage(field)?));
}
for field in &self.describe {
stages.push(ScriptStageType::Exec(synthesize_describe_stage(field)?));
}
for field in &self.card {
stages.push(ScriptStageType::Exec(synthesize_card_stage(field)?));
}
Ok(stages)
}
/// Get processed begin and end scripts with includes applied
pub fn get_processed_begin_end(
&self,
matches: &ArgMatches,
) -> Result<(Option<String>, Option<String>)> {
let (begin_includes, end_includes) = get_begin_end_includes(matches)?;
// Includes only become a begin/end script when there is an explicit
// --begin/--end to attach them to. We deliberately do NOT synthesize a
// begin/end stage out of includes alone:
// - Each stage has its own function namespace, so an include placed
// before the first filter/exec stage is already loaded into that
// stage by get_ordered_script_stages. Also materializing it as a
// phantom begin stage produced a second copy whose only observable
// effect was re-running the include's top-level statements at
// startup (double execution).
// - With no --begin/--end script, that phantom stage's function
// definitions live in a namespace no user code runs in, so it never
// served a purpose.
let processed_begin = match self.begin {
Some(ref begin_script) => Some(preprocess_script_with_includes(
begin_script,
&begin_includes,
)?),
None => None,
};
let processed_end = match self.end {
Some(ref end_script) => {
Some(preprocess_script_with_includes(end_script, &end_includes)?)
}
None => None,
};
Ok((processed_begin, processed_end))
}
}
/// Render a string as a double-quoted Rhai string literal, escaping `\` and `"`.
fn rhai_string_literal(s: &str) -> String {
format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\""))
}
/// Whether `s` is a bare top-level identifier (so `e.<s>` is safe and cheap).
/// Anything else (dotted paths, brackets, dashes) goes through `get_path`.
fn is_simple_field_ident(s: &str) -> bool {
let mut chars = s.chars();
match chars.next() {
Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
_ => return false,
}
chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
}
/// Build the value accessor for a metrics-sugar field. Simple identifiers use
/// cheap property access (`e.field`, which yields `()` for missing fields).
///
/// Dotted/bracket/special paths are ambiguous: a column literally named
/// `ip.dst` (common in CSV/TSV headers) collides with the nested-path syntax
/// `ip.dst`. We resolve it the way `-k` does — prefer a literal top-level key
/// of that exact name, and only fall back to nested `get_path` traversal when
/// no such key exists. Events are kept as nested maps (JSON is never flattened
/// to dotted keys), so `field in e` is true *only* for a literal dotted column,
/// leaving nested-JSON access on `get_path`'s documented path unchanged. Both
/// branches yield `()` when absent, so missing values are skipped by the
/// track_* functions either way.
fn field_value_accessor(field: &str) -> String {
if is_simple_field_ident(field) {
format!("e.{field}")
} else {
let literal = rhai_string_literal(field);
format!("(if {literal} in e {{ e[{literal}] }} else {{ get_path(e, {literal}) }})")
}
}
fn synthesize_freq_stage(field: &str) -> Result<String> {
if field.is_empty() {
return Err(anyhow::anyhow!(
"--freq requires a field name, e.g. --freq level"
));
}
Ok(format!(
"track_freq({}, {})",
rhai_string_literal(field),
field_value_accessor(field)
))
}
fn synthesize_describe_stage(field: &str) -> Result<String> {
if field.is_empty() {
return Err(anyhow::anyhow!(
"--describe requires a field name, e.g. --describe duration_ms"
));
}
Ok(format!(
"track_stats({}, {})",
rhai_string_literal(field),
field_value_accessor(field)
))
}
fn synthesize_card_stage(field: &str) -> Result<String> {
if field.is_empty() {
return Err(anyhow::anyhow!(
"--card requires a field name, e.g. --card user.id"
));
}
Ok(format!(
"track_cardinality({}, {})",
rhai_string_literal(field),
field_value_accessor(field)
))
}
/// Parse and validate format value - supports standard formats, cols:<spec>, regex:<pattern>, and csv/tsv with type annotations
fn parse_format_value(s: &str) -> Result<String, String> {
// Check if it's a regex format
if let Some(pattern) = s.strip_prefix("regex:") {
if pattern.trim().is_empty() {
return Err(
"regex format requires a pattern, e.g., 'regex:(?P<field>\\d+)'".to_string(),
);
}
return Ok(s.to_string());
}
// Check if it's a cols format
if let Some(spec) = s.strip_prefix("cols:") {
if spec.trim().is_empty() {
return Err(
"cols format requires a specification, e.g., 'cols:ts level *msg'".to_string(),
);
}
return Ok(s.to_string());
}
// Check if it's CSV/TSV with field specs (type annotations)
if s.starts_with("csv:") || s.starts_with("csv ") {
return Ok(s.to_string());
}
if s.starts_with("tsv:") || s.starts_with("tsv ") {
return Ok(s.to_string());
}
// Check if it's a cascade (comma-separated list of simple formats).
// Full validation happens in parse_input_format_spec; here we accept the
// form and catch the most obvious mistakes early.
if s.contains(',') {
let allowed = ["json", "line", "raw", "logfmt", "syslog", "cef", "combined"];
for part in s.split(',') {
let p = part.trim().to_lowercase();
if p.is_empty() {
return Err(format!("Empty entry in cascade format list: '{}'", s));
}
// Built-in application-log formats (adapted from lnav) are also valid in cascade.
if !allowed.contains(&p.as_str()) && crate::parsers::lnav_formats::by_name(&p).is_none()
{
let hint = if p == "cols"
|| p == "regex"
|| p.starts_with("cols:")
|| p.starts_with("regex:")
{
" To use cols:/regex: in a cascade, pass repeated -f flags instead, e.g. -f json -f 'cols:ts level *msg'."
} else {
""
};
return Err(format!(
"Unknown or unsupported format '{}' in cascade list '{}'. \
Allowed in a comma list: json, line, raw, logfmt, syslog, cef, combined, and built-in application-log formats ({}).{}",
part.trim(),
s,
crate::parsers::lnav_formats::names_csv(),
hint
));
}
}
return Ok(s.to_string());
}
// Check if it's a standard format
match s.to_lowercase().as_str() {
"auto" | "auto-per-file" | "json" | "line" | "raw" | "logfmt" | "syslog" | "cef"
| "csv" | "tsv" | "csvnh" | "tsvnh" | "combined" | "cols" => Ok(s.to_string()),
other => {
// Built-in application-log formats (adapted from lnav), e.g. -f log4j
if crate::parsers::lnav_formats::by_name(other).is_some() {
return Ok(s.to_string());
}
Err(format!(
"Unknown format '{}'. Supported formats: auto, auto-per-file, json, line, raw, logfmt, syslog, cef, csv, tsv, csvnh, tsvnh, combined, cols:<spec>, regex:<pattern>, or a built-in application-log format ({})",
s,
crate::parsers::lnav_formats::names_csv()
))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use clap::CommandFactory;
use std::io::Write;
use tempfile::NamedTempFile;
fn parse_cli(args: &[String]) -> (Cli, ArgMatches) {
let matches = Cli::command()
.try_get_matches_from(args.iter().map(|s| s.as_str()))
.expect("failed to build matches");
let cli = Cli::parse_from(args.to_vec());
(cli, matches)
}
#[test]
fn ordered_script_stages_preserve_cli_sequence() {
let mut exec_file = NamedTempFile::new().expect("temp file");
writeln!(exec_file, "meta.count = meta.count + 1;").expect("write script");
let exec_path = exec_file.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"--filter".to_string(),
"e.status >= 400".to_string(),
"-e".to_string(),
"e.alert = true;".to_string(),
"--filter".to_string(),
"e.status < 500".to_string(),
"-E".to_string(),
exec_path,
];
let (cli, matches) = parse_cli(&args);
let stages = cli
.get_ordered_script_stages(&matches)
.expect("stages should be parsed");
assert_eq!(stages.len(), 4);
assert!(matches!(
&stages[0],
ScriptStageType::Filter { script, .. } if script == "e.status >= 400"
));
assert!(matches!(
&stages[1],
ScriptStageType::Exec(script) if script == "e.alert = true;"
));
assert!(matches!(
&stages[2],
ScriptStageType::Filter { script, .. } if script == "e.status < 500"
));
assert!(
matches!(&stages[3], ScriptStageType::Exec(script) if script.contains("meta.count"))
);
}
#[test]
fn ordered_script_stages_error_when_exec_file_missing() {
use std::time::{SystemTime, UNIX_EPOCH};
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let missing_path = std::env::temp_dir().join(format!(
"kelora-missing-{}-{}.rhai",
std::process::id(),
timestamp
));
let _ = std::fs::remove_file(&missing_path);
let missing = missing_path.to_string_lossy().to_string();
let args = vec!["kelora".to_string(), "-E".to_string(), missing.clone()];
let (cli, matches) = parse_cli(&args);
let err = cli
.get_ordered_script_stages(&matches)
.expect_err("should report missing file");
assert!(err
.to_string()
.contains(&format!("Failed to read exec file '{}':", missing)));
}
#[test]
fn ordered_script_stages_empty_when_no_scripts_specified() {
let args = vec!["kelora".to_string()];
let (cli, matches) = parse_cli(&args);
let stages = cli
.get_ordered_script_stages(&matches)
.expect("empty stages should succeed");
assert!(stages.is_empty());
}
#[test]
fn ordered_script_stages_capture_level_filters_in_order() {
let args = vec![
"kelora".to_string(),
"-l".to_string(),
"error,critical".to_string(),
"-e".to_string(),
"track_freq(\"level\", e.level)".to_string(),
"--exclude-levels".to_string(),
"debug".to_string(),
];
let (cli, matches) = parse_cli(&args);
let stages = cli
.get_ordered_script_stages(&matches)
.expect("level stages should parse");
assert_eq!(stages.len(), 3);
assert!(matches!(
&stages[0],
ScriptStageType::LevelFilter { include, exclude }
if include == &vec!["error".to_string(), "critical".to_string()] && exclude.is_empty()
));
assert!(matches!(
&stages[1],
ScriptStageType::Exec(script) if script == "track_freq(\"level\", e.level)"
));
assert!(matches!(
&stages[2],
ScriptStageType::LevelFilter { include, exclude }
if include.is_empty() && exclude == &vec!["debug".to_string()]
));
}
#[test]
fn include_single_file_to_exec_stage() {
let mut include_file = NamedTempFile::new().expect("temp file");
writeln!(include_file, "fn helper() {{ return 42; }}").expect("write include");
let include_path = include_file.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"-I".to_string(),
include_path,
"--exec".to_string(),
"e.result = helper();".to_string(),
];
let (cli, matches) = parse_cli(&args);
let stages = cli
.get_ordered_script_stages(&matches)
.expect("stages should be parsed");
assert_eq!(stages.len(), 1);
if let ScriptStageType::Exec(script) = &stages[0] {
assert!(script.contains("fn helper() { return 42; }"));
assert!(script.contains("e.result = helper();"));
assert!(script.starts_with("fn helper()"));
} else {
panic!("Expected Exec stage");
}
}
#[test]
fn include_multiple_files_to_single_stage() {
let mut include1 = NamedTempFile::new().expect("temp file");
writeln!(include1, "fn helper1() {{ return 1; }}").expect("write include1");
let include1_path = include1.path().to_str().unwrap().to_string();
let mut include2 = NamedTempFile::new().expect("temp file");
writeln!(include2, "fn helper2() {{ return 2; }}").expect("write include2");
let include2_path = include2.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"-I".to_string(),
include1_path,
"-I".to_string(),
include2_path,
"--exec".to_string(),
"e.result = helper1() + helper2();".to_string(),
];
let (cli, matches) = parse_cli(&args);
let stages = cli
.get_ordered_script_stages(&matches)
.expect("stages should be parsed");
assert_eq!(stages.len(), 1);
if let ScriptStageType::Exec(script) = &stages[0] {
assert!(script.contains("fn helper1() { return 1; }"));
assert!(script.contains("fn helper2() { return 2; }"));
assert!(script.contains("e.result = helper1() + helper2();"));
} else {
panic!("Expected Exec stage");
}
}
#[test]
fn includes_apply_to_next_script_stage() {
let mut include1 = NamedTempFile::new().expect("temp file");
writeln!(include1, "fn util1() {{ return 1; }}").expect("write include1");
let include1_path = include1.path().to_str().unwrap().to_string();
let mut include2 = NamedTempFile::new().expect("temp file");
writeln!(include2, "fn util2() {{ return 2; }}").expect("write include2");
let include2_path = include2.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"-I".to_string(),
include1_path,
"--exec".to_string(),
"e.val1 = util1();".to_string(),
"-I".to_string(),
include2_path,
"--exec".to_string(),
"e.val2 = util2();".to_string(),
];
let (cli, matches) = parse_cli(&args);
let stages = cli
.get_ordered_script_stages(&matches)
.expect("stages should be parsed");
assert_eq!(stages.len(), 2);
// First stage should have include1
if let ScriptStageType::Exec(script) = &stages[0] {
assert!(script.contains("fn util1() { return 1; }"));
assert!(script.contains("e.val1 = util1();"));
assert!(!script.contains("fn util2() { return 2; }"));
} else {
panic!("Expected Exec stage");
}
// Second stage should have include2
if let ScriptStageType::Exec(script) = &stages[1] {
assert!(script.contains("fn util2() { return 2; }"));
assert!(script.contains("e.val2 = util2();"));
assert!(!script.contains("fn util1() { return 1; }"));
} else {
panic!("Expected Exec stage");
}
}
#[test]
fn include_with_exec_file() {
let mut include_file = NamedTempFile::new().expect("temp file");
writeln!(include_file, "fn shared_util() {{ return 42; }}").expect("write include");
let include_path = include_file.path().to_str().unwrap().to_string();
let mut exec_file = NamedTempFile::new().expect("temp file");
writeln!(exec_file, "e.value = shared_util();").expect("write exec");
let exec_path = exec_file.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"-I".to_string(),
include_path,
"-E".to_string(),
exec_path,
];
let (cli, matches) = parse_cli(&args);
let stages = cli
.get_ordered_script_stages(&matches)
.expect("stages should be parsed");
assert_eq!(stages.len(), 1);
if let ScriptStageType::Exec(script) = &stages[0] {
assert!(script.contains("fn shared_util() { return 42; }"));
assert!(script.contains("e.value = shared_util();"));
} else {
panic!("Expected Exec stage");
}
}
#[test]
fn include_with_filter_stage() {
let mut include_file = NamedTempFile::new().expect("temp file");
writeln!(include_file, "fn is_error(level) {{ level == \"ERROR\" }}")
.expect("write include");
let include_path = include_file.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"-I".to_string(),
include_path,
"--filter".to_string(),
"is_error(e.level)".to_string(),
];
let (cli, matches) = parse_cli(&args);
let stages = cli
.get_ordered_script_stages(&matches)
.expect("stages should be parsed");
assert_eq!(stages.len(), 1);
if let ScriptStageType::Filter { script, includes } = &stages[0] {
assert_eq!(includes.len(), 1);
assert!(includes[0]
.content
.contains("fn is_error(level) { level == \"ERROR\" }"));
assert_eq!(script, "is_error(e.level)");
} else {
panic!("Expected Filter stage");
}
}
#[test]
fn include_error_when_file_missing() {
let missing_path = "/non/existent/path.rhai";
let args = vec![
"kelora".to_string(),
"-I".to_string(),
missing_path.to_string(),
"--exec".to_string(),
"e.test = true;".to_string(),
];
let (cli, matches) = parse_cli(&args);
let err = cli
.get_ordered_script_stages(&matches)
.expect_err("should report missing include file");
assert!(err
.to_string()
.contains(&format!("Failed to read include file '{}':", missing_path)));
}
#[test]
fn get_processed_begin_end_with_includes() {
let mut include1 = NamedTempFile::new().expect("temp file");
writeln!(include1, "fn setup() {{ print('setup'); }}").expect("write include1");
let include1_path = include1.path().to_str().unwrap().to_string();
let mut include2 = NamedTempFile::new().expect("temp file");
writeln!(include2, "fn cleanup() {{ print('cleanup'); }}").expect("write include2");
let include2_path = include2.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"-I".to_string(),
include1_path,
"--begin".to_string(),
"setup();".to_string(),
"--exec".to_string(),
"e.processed = true;".to_string(),
"-I".to_string(),
include2_path,
"--end".to_string(),
"cleanup();".to_string(),
];
let (cli, matches) = parse_cli(&args);
let (begin, end) = cli
.get_processed_begin_end(&matches)
.expect("should process begin/end");
assert!(begin.is_some());
let begin_script = begin.unwrap();
assert!(begin_script.contains("fn setup() { print('setup'); }"));
assert!(begin_script.contains("setup();"));
assert!(end.is_some());
let end_script = end.unwrap();
assert!(end_script.contains("fn cleanup() { print('cleanup'); }"));
assert!(end_script.contains("cleanup();"));
}
#[test]
fn include_before_first_stage_without_begin_creates_no_begin_stage() {
// An include placed before the first filter/exec stage, with no
// explicit --begin, must NOT synthesize a phantom begin stage: the
// include is already loaded into the following exec stage, and a
// begin-from-includes copy would only re-run its top-level statements
// at startup (the double-execution bug).
let mut include_file = NamedTempFile::new().expect("temp file");
writeln!(include_file, "print('auto setup');").expect("write include");
let include_path = include_file.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"-I".to_string(),
include_path,
"--exec".to_string(),
"e.processed = true;".to_string(),
];
let (cli, matches) = parse_cli(&args);
let (begin, end) = cli
.get_processed_begin_end(&matches)
.expect("should process begin/end");
assert!(
begin.is_none(),
"include before the first stage must not create a begin stage without --begin"
);
assert!(end.is_none());
}
#[test]
fn trailing_include_without_end_creates_no_end_stage() {
// An include after all filter/exec stages, with no explicit --end, has
// nothing to attach to and must not synthesize a phantom end stage.
let mut include_file = NamedTempFile::new().expect("temp file");
writeln!(include_file, "print('auto teardown');").expect("write include");
let include_path = include_file.path().to_str().unwrap().to_string();
let args = vec![
"kelora".to_string(),
"--exec".to_string(),
"e.processed = true;".to_string(),
"-I".to_string(),
include_path,
];
let (cli, matches) = parse_cli(&args);
let (begin, end) = cli
.get_processed_begin_end(&matches)
.expect("should process begin/end");
assert!(begin.is_none());
assert!(
end.is_none(),
"trailing include must not create an end stage without --end"
);
}
#[test]
fn field_value_accessor_simple_ident_uses_property_access() {
assert_eq!(field_value_accessor("level"), "e.level");
assert_eq!(field_value_accessor("user_id"), "e.user_id");
}
#[test]
fn field_value_accessor_dotted_name_prefers_literal_then_nested() {
// A dotted field name (e.g. a flat CSV column literally named `ip.dst`,
// or a nested JSON path `user.id`) must try the literal top-level key
// first and only fall back to nested traversal. This keeps `--freq`/
// `--describe`/`--card` consistent with `-k` for literal dotted columns
// while preserving the documented nested-path behavior for JSON.
assert_eq!(
field_value_accessor("ip.dst"),
"(if \"ip.dst\" in e { e[\"ip.dst\"] } else { get_path(e, \"ip.dst\") })"
);
}
#[test]
fn synthesize_freq_stage_uses_dotted_accessor() {
assert_eq!(
synthesize_freq_stage("ip.dst").expect("freq stage"),
"track_freq(\"ip.dst\", (if \"ip.dst\" in e { e[\"ip.dst\"] } else { get_path(e, \"ip.dst\") }))"
);
// Simple identifiers keep the cheap property accessor.
assert_eq!(
synthesize_freq_stage("level").expect("freq stage"),
"track_freq(\"level\", e.level)"
);
}
}