kelora 2.0.0 - Docs.rs

// CLI-specific types and structures
// This module contains the command-line interface definitions and parsing logic

use crate::config::{MultilineJoin, ScriptStageType};
use anyhow::Result;
use clap::{ArgMatches, Parser};

// CLI types - specific to command-line interface
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum InputFormat {
    Auto,
    AutoPerFile,
    Json,
    Line,
    Raw,
    Logfmt,
    Syslog,
    Cef,
    Csv,
    Tsv,
    Csvnh,
    Tsvnh,
    Combined,
    Cols,
    Regex,
}

#[derive(clap::ValueEnum, Clone, Debug, Default)]
pub enum OutputFormat {
    #[default]
    Default,
    Json,
    Logfmt,
    Inspect,
    Levelmap,
    Keymap,
    Tailmap,
    Csv,
    Tsv,
    Csvnh,
    Tsvnh,
}

#[derive(clap::ValueEnum, Clone, Debug)]
pub enum FileOrder {
    Cli,
    Name,
    Mtime,
}

#[derive(clap::ValueEnum, Clone, Debug)]
pub enum MetricsFormat {
    Short,
    Full,
    Json,
    /// Tab-separated record stream (one `metric<TAB>key<TAB>value` row per line,
    /// sorted by count/score descending) for piping to head/tail/sort/awk.
    Tsv,
    /// Resolve at output time: the human report on a terminal, `tsv` when piped
    /// or redirected. The default for `-m` and `--freq`/`--describe`. Hidden
    /// because it is the implicit default rather than something to type.
    #[value(hide = true)]
    Auto,
}

#[derive(clap::ValueEnum, Clone, Debug)]
pub enum StatsFormat {
    Table,
    Json,
}

#[derive(clap::ValueEnum, Clone, Debug)]
pub enum DiscoverFieldsFormat {
    Table,
    Json,
}

#[derive(clap::ValueEnum, Clone, Debug, Default)]
pub enum DrainFormat {
    #[default]
    Table,
    Full,
    Id,
    Json,
}

#[derive(clap::ValueEnum, Clone, Copy, Debug)]
pub enum ShellCompletion {
    Bash,
    Zsh,
    Fish,
    #[value(name = "powershell")]
    PowerShell,
    Elvish,
}

// CLI structure - contains all command-line arguments and options
#[derive(Parser)]
#[command(name = "kelora")]
#[command(about = "A command-line log analysis tool with embedded Rhai scripting")]
#[command(
    long_about = "A command-line log analysis tool with embedded Rhai scripting\n\nProcesses logs through a streaming pipeline: parse -> filter/transform (Rhai) -> format.\nRuns sequentially by default; add --parallel for high-throughput batch analysis.\n\nRun 'kelora' in a terminal with no arguments for interactive mode - a readline-based REPL with\ncommand history, glob expansion, and proper quoting (especially helpful on Windows). When stdin\nis not a terminal (piped or redirected), kelora reads that input instead of going interactive.\n\nFor a quick reference with worked examples, run:  kelora -h\n\nTo search this reference, run:  kelora --help KEYWORD  (e.g. 'kelora --help -j', 'kelora --help --since', or 'kelora --help time')\n\nTopic-specific help (--help-rhai, --help-functions, --help-examples, ...) is listed under Help Options below."
)]
#[command(author = "Dirk Loss <mail@dirk-loss.de>")]
#[command(version)]
#[command(args_override_self = true)]
#[command(after_long_help = "Exit Codes:\n  \
    0    Success — the run did its job. By default Kelora is resilient: malformed\n       \
         lines in an otherwise-valid stream, and best-effort --exec transform\n       \
         errors, are reported on stderr and counted but do NOT change the exit code\n       \
         (use --strict to make the first such error fatal instead).\n  \
    1    Error. Any of:\n         \
         - a named input file could not be opened\n         \
         - an --assert condition failed\n         \
         - a gate stage saw input but never once succeeded (parsing failed on every\n           \
           line, or a --filter errored on every event)\n         \
         - a forbidden operation (e.g. mutating `conf` outside --begin)\n         \
         - with --strict, ANY single parse/filter/exec error (also aborts early)\n  \
    2    Invalid command-line usage (unknown flag, bad value, conflicting options, or a\n         \
         malformed config file).\n  \
    130  Interrupted (SIGINT / Ctrl-C).\n  \
    134  Internal panic (SIGABRT) — a bug; please report it.\n  \
    141  Broken pipe (SIGPIPE).\n  \
    143  Terminated (SIGTERM).")]
pub struct Cli {
    /// Input files (stdin if not specified, or use "-" to explicitly specify stdin)
    pub files: Vec<String>,

    /// Run without reading input (useful for scripts that only use --begin/--end stages)
    #[arg(long = "no-input", help_heading = "Input Options")]
    pub no_input: bool,

    /// Input format. Available formats: auto (default), auto-per-file, json, line, raw, logfmt, syslog, cef, csv, tsv, csvnh, tsvnh, combined, cols:<spec>, regex:<pattern>.
    /// With 'auto', the format is detected from the first non-empty line and applied to every line; for files that mix formats use a cascade (below) instead.
    /// Use cols:<spec> for column parsing, regex:<pattern> for regex parsing with named groups, and csv/tsv with optional type annotations.
    /// Built-in application-log formats: cri (Kubernetes container logs) plus glog, nginx-error, apache-error, log4j, python-logging, redis, s3, haproxy, iso8601-level (adapted from lnav). Select with -f <name>; most are also recognized by auto-detection. See --help-formats.
    /// Cascade mode: pass a comma-separated list (e.g. 'json,logfmt,line') to try each parser in order; the first success wins, so put catch-all fallbacks like 'line' or 'raw' last. Adds an '_format' field to each event.
    /// Repeat -f to build a cascade that includes spec-based parsers: -f json -f 'cols:ts(2) level *msg'. Each -f is tried in order; put catch-alls ('line', 'raw', 'cols:') last (regex declines non-matching lines, so it can sit earlier).
    /// Examples: -f json, -f json,line, -f json -f 'cols:ts level *msg', -f 'regex:(?P<code:int>\\d+) (?P<msg>.*)', -f 'csv status:int bytes:int'.
    #[arg(
        short = 'f',
        long = "input-format",
        default_value = "auto",
        action = clap::ArgAction::Append,
        num_args = 1,
        help_heading = "Input Options",
        value_parser = parse_format_value
    )]
    pub format: Vec<String>,

    /// Shortcut for -f json.
    #[arg(short = 'j', help_heading = "Input Options", conflicts_with = "format")]
    pub json_input: bool,

    /// File processing order.
    #[arg(
        long = "file-order",
        value_enum,
        value_name = "ORDER",
        default_value = "cli",
        help_heading = "Input Options"
    )]
    pub file_order: FileOrder,

    /// Merge already-sorted input files by timestamp; aborts on missing timestamps, parse failures, or per-file disorder.
    #[arg(long = "merge-sorted", help_heading = "Input Options")]
    pub merge_ts: bool,

    /// Skip the first N input lines.
    #[arg(long = "skip-lines", value_name = "N", help_heading = "Input Options")]
    pub skip_lines: Option<usize>,

    /// Read only the first N input lines (stops I/O early, complementing --take which limits output events)
    #[arg(long = "head", value_name = "N", help_heading = "Input Options")]
    pub head: Option<usize>,

    /// Start emitting sections from the line matching this regex (inclusive start).
    /// All --section-* regexes are unanchored; use ^...$ to match a whole line.
    #[arg(
        long = "section-from",
        value_name = "REGEX",
        help_heading = "Input Options",
        conflicts_with = "section_after"
    )]
    pub section_from: Option<String>,

    /// Start emitting sections after the line matching this regex (exclusive start)
    #[arg(
        long = "section-after",
        value_name = "REGEX",
        help_heading = "Input Options",
        conflicts_with = "section_from"
    )]
    pub section_after: Option<String>,

    /// Stop before the line matching this regex (exclusive end)
    #[arg(
        long = "section-before",
        value_name = "REGEX",
        help_heading = "Input Options",
        conflicts_with = "section_through"
    )]
    pub section_before: Option<String>,

    /// Stop after emitting the line matching this regex (inclusive end)
    #[arg(
        long = "section-through",
        value_name = "REGEX",
        help_heading = "Input Options",
        conflicts_with = "section_before"
    )]
    pub section_through: Option<String>,

    /// Maximum number of sections to process (default: -1 for unlimited)
    #[arg(
        long = "max-sections",
        value_name = "N",
        default_value = "-1",
        help_heading = "Input Options"
    )]
    pub max_sections: i64,

    /// Keep only input lines matching this regex pattern (applied before ignore-lines)
    #[arg(
        long = "keep-lines",
        value_name = "REGEX",
        help_heading = "Input Options"
    )]
    pub keep_lines: Option<String>,

    /// Ignore input lines matching this regex pattern.
    #[arg(
        long = "ignore-lines",
        value_name = "REGEX",
        help_heading = "Input Options"
    )]
    pub ignore_lines: Option<String>,

    /// Custom timestamp field name for parsing.
    #[arg(
        long = "ts-field",
        value_name = "FIELD",
        help_heading = "Input Options"
    )]
    pub ts_field: Option<String>,

    /// Custom timestamp format for parsing (uses chrono format strings)
    #[arg(
        long = "ts-format",
        value_name = "FORMAT",
        help_heading = "Input Options"
    )]
    pub ts_format: Option<String>,

    /// Assume timezone for input timestamps without timezone info (default: UTC).
    /// Use 'local' for system local time.
    /// Examples: 'Europe/Berlin', 'local', 'UTC'.
    #[arg(long = "input-tz", value_name = "TZ", help_heading = "Input Options")]
    pub input_tz: Option<String>,

    /// Multi-line event detection strategy. Supply values like `timestamp`,
    /// `timestamp:format=%Y-%m-%d %H-%M-%S`, `regex:match=^START`, or
    /// `regex:match=^START:end=^END$`. See `kelora --help-multiline` for details.
    #[arg(
        short = 'M',
        long = "multiline",
        value_name = "STRATEGY",
        help_heading = "Input Options"
    )]
    pub multiline: Option<String>,

    /// Join multiline lines with: space (default), newline, or empty.
    #[arg(
        long = "multiline-join",
        value_enum,
        value_name = "JOIN",
        default_value_t = MultilineJoin::Space,
        help_heading = "Input Options"
    )]
    pub multiline_join: MultilineJoin,

    /// Extract text before separator to specified field (runs before parsing)
    #[arg(
        long = "extract-prefix",
        value_name = "FIELD",
        help_heading = "Input Options"
    )]
    pub extract_prefix: Option<String>,

    /// Separator string for prefix extraction (default: pipe '|')
    #[arg(
        long = "prefix-sep",
        value_name = "SEP",
        default_value = "|",
        help_heading = "Input Options"
    )]
    pub prefix_sep: String,

    /// Column separator for cols:<spec> format (default: whitespace)
    #[arg(long = "cols-sep", value_name = "SEP", help_heading = "Input Options")]
    pub cols_sep: Option<String>,

    /// Pre-run a Rhai script before any other stage runs.
    #[arg(
        long = "begin",
        value_name = "EXPR",
        help_heading = "Processing Options",
        help = "Pre-run a Rhai script before any other stage runs.\n\nTypical use: seed the global `conf` map with lookup tables or shared context.\n\nHelpers available only here:\n  read_lines(path) -> Array<String>  # UTF-8, one entry per line\n  read_file(path)  -> String         # UTF-8, entire file contents\n\nData stored in `conf` becomes read-only afterwards. See --help-rhai for stage order."
    )]
    pub begin: Option<String>,

    #[arg(
        long = "filter",
        value_name = "EXPR",
        help_heading = "Processing Options",
        help = "Boolean filter expression; events where it evaluates to true are kept.\n\nCan be combined with --include (-I) to call helper functions defined in an\ninclude file. Include files used with --filter must contain only function\ndefinitions — top-level statements are rejected with an error.\n\nExample:\n  kelora -I helpers.rhai --filter 'is_error(e.level)' app.log\n\nSee --help-rhai for expression syntax."
    )]
    pub filters: Vec<String>,

    /// Transform/process exec scripts evaluated on each event. See --help-rhai for stage semantics.
    #[arg(
        short = 'e',
        long = "exec",
        value_name = "EXPR",
        help_heading = "Processing Options"
    )]
    pub execs: Vec<String>,

    /// Execute script from file (contents run in the exec stage).
    #[arg(
        short = 'E',
        long = "exec-file",
        value_name = "FILE",
        help_heading = "Processing Options"
    )]
    pub exec_files: Vec<String>,

    /// Assertion expressions that must evaluate to true. Violations are reported to stderr;
    /// processing continues unless --strict is enabled. See --help-rhai for expression syntax.
    #[arg(
        long = "assert",
        value_name = "EXPR",
        help_heading = "Processing Options"
    )]
    pub asserts: Vec<String>,
    #[arg(
        short = 'I',
        long = "include",
        value_name = "FILE",
        help_heading = "Processing Options",
        help = "Include a Rhai file of helper functions, loaded into the adjacent script stage.\n\nCommand-line position selects which stage the file applies to:\n  --include before a --filter/--exec    → that filter/exec stage\n  --include before the first stage       → the --begin stage (if present)\n  --include after the last stage         → the --end stage (if present)\n\nEach stage has its own scope: an include's functions are only visible to\nthe stage it is attached to. Repeat --include to share helpers across\nseveral stages. An include that attaches to begin/end has no effect unless\nthat --begin/--end stage exists.\n\nWhen used with --filter, the include file must contain only function\ndefinitions. Top-level statements (side effects) are rejected with an error.\n\nExample:\n  kelora -I helpers.rhai --filter 'is_error(e.level)' app.log"
    )]
    pub includes: Vec<String>,

    /// Run once after processing completes (post-processing stage). Ideal for summarising metrics or emitting reports. The global `metrics` map from track_*() calls is accessible here.
    #[arg(long = "end", value_name = "EXPR", help_heading = "Processing Options")]
    pub end: Option<String>,

    /// Allow Rhai scripts to create directories and write files on disk (required for file helpers like append_file or mkdir).
    #[arg(long = "allow-fs-writes", help_heading = "Processing Options")]
    pub allow_fs_writes: bool,

    /// Enable access to a sliding window of N+1 recent events (needed for window_* functions).
    #[arg(long = "window", value_name = "N", help_heading = "Processing Options")]
    pub window_size: Option<usize>,

    /// Aggregate events into fixed-size spans (count or duration) before running a span-close hook.
    #[arg(
        long = "span",
        value_name = "N|DURATION|FIELD",
        help_heading = "Processing Options",
        help = "Aggregate events into consecutive spans.\n  --span <N>         Close after every N events that pass filters.\n  --span <DURATION>  Close on aligned time windows (e.g. 5m, 1h, 30s).\n  --span <FIELD>     Close when the specified field value changes.\nUse with --span-close to run a Rhai snippet when each span finishes."
    )]
    pub span: Option<String>,

    /// Close span after a period of inactivity (mutually exclusive with --span)
    #[arg(
        long = "span-idle",
        value_name = "DURATION",
        help_heading = "Processing Options",
        help = "Close span after this duration of inactivity (e.g. --span-idle 5m). Requires timestamps and cannot be combined with --span."
    )]
    pub span_idle: Option<String>,

    /// Rhai snippet executed once every time a span closes.
    #[arg(
        long = "span-close",
        value_name = "EXPR",
        help_heading = "Processing Options",
        help = "Run a Rhai snippet when each span closes. Within the hook, read span.start, span.end, span.id, span.events, span.size, and span.metrics for span context. span.metrics carries per-window values only for additive aggregators (count, sum, avg, unique, bucket); non-additive ones (min, max, percentiles, cardinality, top, bottom) are omitted with a warning, so use span.events for those."
    )]
    pub span_close: Option<String>,

    /// Exit on first error (fail-fast behavior). Use --no-strict to force resilient mode, overriding a config default.
    #[arg(long = "strict", help_heading = "Error Handling")]
    pub strict: bool,

    /// Disable strict error handling (resilient mode)
    #[arg(
        long = "no-strict",
        hide = true,
        help_heading = "Error Handling",
        overrides_with = "strict"
    )]
    pub no_strict: bool,

    /// Abort on invalid UTF-8 instead of decoding losslessly.
    #[arg(
        long = "strict-utf8",
        help_heading = "Error Handling",
        help = "Abort on the first non-UTF-8 byte instead of the default lossy decoding.\n\nBy default, kelora tolerates non-UTF-8 input the way grep does: invalid byte sequences are replaced with U+FFFD (\u{fffd}) and a diagnostic reports how many lines were affected, so a single bad byte no longer truncates the rest of the stream. Pass --strict-utf8 to restore hard failure (exit 1) on invalid UTF-8."
    )]
    pub strict_utf8: bool,

    /// Cap the bytes a single line may use (circuit breaker; default 64MiB, 0 disables).
    #[arg(
        long = "max-line-bytes",
        value_name = "SIZE",
        help_heading = "Input Options",
        help = "Cap the number of bytes a single input line may consume in memory (default 64MiB).\n\nThis is a safety circuit breaker against runaway memory: a newline-free stream — including a tiny gzip/zstd payload that decompresses into one enormous line — would otherwise grow without bound and exhaust RAM. No real log line approaches the default, so it normally never triggers.\n\nWhen a line exceeds the cap it is truncated to the cap and a warning (\u{1f538}) reports how many lines were clipped; the run still succeeds (exit 0). With --strict an over-limit line is a hard error (exit 1) instead. Accepts a byte count or an IEC/SI suffix (64MiB, 1GiB, 1048576); 0/off/unlimited disables the cap.\n\nReading is streamed, so a large multi-line compressed file is unaffected — only a single over-long line trips this."
    )]
    pub max_line_bytes: Option<String>,

    /// Show detailed error information (use multiple times for more verbosity: -v, -vv, -vvv)
    #[arg(short = 'v', long = "verbose", action = clap::ArgAction::Count, help_heading = "Error Handling")]
    pub verbose: u8,

    /// Include only events with these log levels.
    #[arg(
        short = 'l',
        long = "levels",
        help_heading = "Filtering Options",
        help = "Include only events with these log levels (comma-separated, case-insensitive).\n\nUse comma-separated values for OR logic: --levels ERROR,WARN\nMultiple flags create sequential AND filters (advanced)."
    )]
    pub levels: Vec<String>,

    /// Exclude events with these log levels.
    #[arg(
        short = 'L',
        long = "exclude-levels",
        help_heading = "Filtering Options",
        help = "Exclude events with these log levels (comma-separated, case-insensitive)."
    )]
    pub exclude_levels: Vec<String>,

    /// Output only specific fields.
    #[arg(
        short = 'k',
        long = "keys",
        value_delimiter = ',',
        help_heading = "Filtering Options",
        help = "Output only these fields (comma-separated list).",
        long_help = "Output only these fields, in the order given (comma-separated list).\n\nActs on whole top-level fields. Nested values that --discover prints as dotted or bracketed paths (e.g. api.queries, tags[]) are NOT selectable here — flatten them first, e.g. --exec 'e.val = e.get_path(\"api.queries\")' then -k val. (A top-level field whose literal name contains a dot is matched as-is.)"
    )]
    pub keys: Vec<String>,

    /// Exclude specific fields from output.
    #[arg(
        short = 'K',
        long = "exclude-keys",
        value_delimiter = ',',
        help_heading = "Filtering Options",
        help = "Exclude these fields from output (comma-separated list).",
        long_help = "Exclude these fields from output (comma-separated list).\n\nActs on whole top-level fields; nested values (the dotted/bracketed paths shown by --discover) cannot be dropped here — remove them in an --exec stage instead."
    )]
    pub exclude_keys: Vec<String>,

    /// Start showing entries on or newer than the specified date.
    #[arg(
        long = "since",
        value_name = "TIME",
        help_heading = "Filtering Options",
        allow_hyphen_values = true,
        help = "Keep only events at or after this time.\n\nAccepts journalctl-style timestamps (e.g., 2024-01-15T12:00:00Z, '2024-01-15 12:00', '1h', '-30m', 'yesterday'). Can also use 'until+DURATION', 'until-DURATION', 'now+DURATION', or 'now-DURATION' anchors. See --help-time."
    )]
    pub since: Option<String>,

    /// Stop showing entries on or older than the specified date.
    #[arg(
        long = "until",
        value_name = "TIME",
        help_heading = "Filtering Options",
        allow_hyphen_values = true,
        help = "Keep only events at or before this time.\n\nAccepts journalctl-style timestamps (e.g., 2024-01-15T12:00:00Z, '2024-01-15 12:00', '1h', '+30m', 'tomorrow'). Can also use 'since+DURATION', 'since-DURATION', 'now+DURATION', or 'now-DURATION' anchors. See --help-time."
    )]
    pub until: Option<String>,

    /// Limit output to the first N events.
    #[arg(
        short = 'n',
        long = "take",
        value_name = "N",
        help_heading = "Filtering Options"
    )]
    pub take: Option<usize>,

    /// Show N lines before each match (requires filtering)
    #[arg(
        short = 'B',
        long = "before-context",
        value_name = "N",
        help_heading = "Filtering Options"
    )]
    pub before_context: Option<usize>,

    /// Show N lines after each match (requires filtering)
    #[arg(
        short = 'A',
        long = "after-context",
        value_name = "N",
        help_heading = "Filtering Options"
    )]
    pub after_context: Option<usize>,

    /// Show N lines before and after each match (requires filtering)
    #[arg(
        short = 'C',
        long = "context",
        value_name = "N",
        help_heading = "Filtering Options"
    )]
    pub context: Option<usize>,

    /// Output format.
    #[arg(
        short = 'F',
        long = "output-format",
        value_enum,
        value_name = "FORMAT",
        default_value = "default",
        help = "Output format.\n\nFormats:\n  default   Colored key-value output\n  json      JSON Lines (one object per line)\n  logfmt    Key=value pairs on one line\n  inspect   Debug view with type information\n  levelmap  Compact level timeline\n  keymap    First-character map for one selected field\n  tailmap   Percentile map for one numeric field\n  csv       Comma-separated with header row\n  tsv       Tab-separated with header row\n  csvnh     CSV without header row\n  tsvnh     TSV without header row\n\nSee --help-formats for requirements, extracted fields, and examples.",
        help_heading = "Output Options"
    )]
    pub output_format: OutputFormat,

    /// Shortcut for -F json.
    #[arg(
        short = 'J',
        help_heading = "Output Options",
        conflicts_with = "output_format"
    )]
    pub json_output: bool,

    /// Output only core fields.
    #[arg(short = 'c', long = "core", help_heading = "Output Options")]
    pub core: bool,

    /// Output file for formatted events.
    #[arg(
        short = 'o',
        long = "output-file",
        value_name = "FILE",
        help_heading = "Output Options"
    )]
    pub output_file: Option<String>,

    /// Suppress events (formatter output)
    #[arg(short = 'q', long = "quiet", help_heading = "Output Options")]
    pub quiet: bool,

    /// Show warnings (🔸) — problems that did not stop the run. Overrides a
    /// KELORA_NO_WARNINGS / config default and re-enables warnings in data-only
    /// modes like --metrics/--drain/--discover.
    #[arg(long = "warnings", help_heading = "Output Options", overrides_with_all = ["no_warnings", "warnings"])]
    pub warnings: bool,

    /// Suppress warnings (🔸). Errors still print; use --silent to hide those too.
    #[arg(long = "no-warnings", help_heading = "Output Options", overrides_with_all = ["warnings", "no_warnings"])]
    pub no_warnings: bool,

    /// Show hints (💡) — advisory suggestions (zero-result/typo/format tips).
    /// Overrides a KELORA_NO_HINTS / config default and re-enables hints in
    /// data-only modes like --metrics/--drain/--discover.
    #[arg(long = "hints", help_heading = "Output Options", overrides_with_all = ["no_hints", "hints"])]
    pub hints: bool,

    /// Suppress hints (💡, zero-result hints, format/typo tips).
    #[arg(long = "no-hints", help_heading = "Output Options", overrides_with_all = ["hints", "no_hints"])]
    pub no_hints: bool,

    /// Show both warnings and hints (shortcut for --warnings --hints), even in
    /// data-only modes. Error summaries always show unless --silent.
    #[arg(long = "diagnostics", help_heading = "Output Options", overrides_with_all = ["no_diagnostics", "diagnostics"])]
    pub diagnostics: bool,

    /// Suppress both warnings and hints (shortcut for --no-warnings --no-hints),
    /// plus per-line verbose errors. Error and parse summaries still print; use
    /// --silent to hide those too.
    #[arg(long = "no-diagnostics", help_heading = "Output Options", overrides_with_all = ["diagnostics", "no_diagnostics"])]
    pub no_diagnostics: bool,

    /// Silence pipeline stdout/stderr emitters (events/diagnostics/stats/terminal metrics); script output still allowed. Metrics files still write. Use --no-silent to override a config default.
    #[arg(long = "silent", help_heading = "Output Options")]
    pub silent: bool,

    /// Disable a silent default coming from config.
    #[arg(long = "no-silent", hide = true, help_heading = "Output Options")]
    pub no_silent: bool,

    /// Enable Rhai print/eprint output.
    #[arg(long = "script-output", help_heading = "Output Options", overrides_with_all = ["no_script_output", "script_output"])]
    pub script_output: bool,

    /// Suppress Rhai print/eprint and side-effect warnings (implied by data-only modes).
    #[arg(long = "no-script-output", hide = true, help_heading = "Output Options", overrides_with_all = ["script_output", "no_script_output"])]
    pub no_script_output: bool,

    /// Append a legend to map outputs (levelmap/keymap/tailmap) even when piped.
    /// By default the legend shows only when stdout is a terminal.
    #[arg(long = "legend", help_heading = "Output Options", overrides_with_all = ["no_legend", "legend"])]
    pub legend: bool,

    /// Suppress the legend on map outputs (levelmap/keymap/tailmap).
    #[arg(long = "no-legend", help_heading = "Output Options", overrides_with_all = ["legend", "no_legend"])]
    pub no_legend: bool,

    /// Output only field values (omit keys).
    #[arg(short = 'b', long = "brief", help_heading = "Default Format Options")]
    pub brief: bool,

    /// Expand nested structures (maps/arrays) with indentation.
    #[arg(long = "expand-nested", help_heading = "Default Format Options")]
    pub expand_nested: bool,

    /// Always word-wrap wide events onto indented continuation lines, even when
    /// piped or redirected. By default wrapping is enabled only when stdout is a
    /// terminal, so piped output stays one line per event.
    #[arg(
        long = "wrap",
        help_heading = "Default Format Options",
        overrides_with_all = ["no_wrap", "wrap"]
    )]
    pub wrap: bool,

    /// Never word-wrap; keep each event on a single line.
    #[arg(
        long = "no-wrap",
        help_heading = "Default Format Options",
        overrides_with_all = ["wrap", "no_wrap"]
    )]
    pub no_wrap: bool,

    /// Normalize the primary timestamp field to RFC3339 (ISO 8601 compatible).
    /// Modifies event data - affects all output formats.
    #[arg(long = "normalize-ts", help_heading = "Processing Options")]
    pub normalize_ts: bool,

    /// Display timestamps as local RFC3339 (ISO 8601 compatible).
    /// Display-only - only affects default formatter output.
    #[arg(
        short = 'z',
        long = "show-ts-local",
        help_heading = "Default Format Options"
    )]
    pub format_timestamps_local: bool,

    /// Display timestamps as UTC RFC3339 (ISO 8601 compatible).
    /// Display-only - only affects default formatter output.
    #[arg(
        short = 'Z',
        long = "show-ts-utc",
        help_heading = "Default Format Options"
    )]
    pub format_timestamps_utc: bool,

    /// Force colored output.
    #[arg(long = "force-color", help_heading = "Display Options", overrides_with_all = ["no_color", "force_color"])]
    pub force_color: bool,

    /// Disable colored output.
    #[arg(long = "no-color", help_heading = "Display Options", overrides_with_all = ["force_color", "no_color"])]
    pub no_color: bool,

    /// Insert a centered marker when time gaps grow large.
    #[arg(
        long = "mark-gaps",
        value_name = "DURATION",
        help_heading = "Display Options",
        help = "Insert a centered marker when the time delta between events exceeds the given duration.\nExample: --mark-gaps 30s prints a divider when consecutive events are separated by >=30s."
    )]
    pub mark_gaps: Option<String>,

    /// Force emoji prefixes (override auto-detection)
    #[arg(long = "force-emoji", help_heading = "Display Options", overrides_with_all = ["no_emoji", "force_emoji"])]
    pub force_emoji: bool,

    /// Disable emoji prefixes.
    #[arg(long = "no-emoji", help_heading = "Display Options", overrides_with_all = ["force_emoji", "no_emoji"])]
    pub no_emoji: bool,

    /// Enable parallel processing (default: sequential processing). Use --no-parallel to force sequential, overriding a config default.
    #[arg(short = 'P', long = "parallel", help_heading = "Performance Options")]
    pub parallel: bool,

    /// Disable parallel processing explicitly (default mode is sequential).
    #[arg(
        long = "no-parallel",
        hide = true,
        help_heading = "Performance Options",
        overrides_with = "parallel"
    )]
    pub no_parallel: bool,

    /// Number of worker threads (0 = auto-detect, one per available core)
    #[arg(
        long = "threads",
        value_name = "N",
        default_value_t = 0,
        help_heading = "Performance Options"
    )]
    pub threads: usize,

    /// Batch size (events per batch) for parallel processing.
    #[arg(
        long = "batch-size",
        value_name = "N",
        help_heading = "Performance Options"
    )]
    pub batch_size: Option<usize>,

    /// Batch timeout in milliseconds.
    #[arg(
        long = "batch-timeout",
        value_name = "MS",
        default_value_t = 200,
        help_heading = "Performance Options",
        help = "Flush partially full parallel batches after this idle period (milliseconds). Lower values reduce latency; higher values improve throughput."
    )]
    pub batch_timeout: u64,

    /// Disable ordered output.
    #[arg(long = "unordered", help_heading = "Performance Options")]
    pub no_preserve_order: bool,

    /// Show stats only (implies -q/--quiet). Use -s for default (table), or --stats=FORMAT for explicit format.
    #[arg(
        short = 's',
        long = "stats",
        value_enum,
        value_name = "FORMAT",
        require_equals = true,
        num_args = 0..=1,
        default_missing_value = "table",
        help_heading = "Metrics and Stats",
        help = "Show stats only (implies -q/--quiet).\n\nFormats: table, json\n\nExamples:\n  -s              Default table format\n  --stats=json    JSON output\n\nUse --no-stats to override a config default. Note the '=': --stats=json (a space, as in '-s json', is read as a filename)."
    )]
    pub stats: Option<StatsFormat>,

    /// Disable processing statistics explicitly (default: off).
    #[arg(
        long = "no-stats",
        hide = true,
        help_heading = "Metrics and Stats",
        overrides_with = "stats"
    )]
    pub no_stats: bool,

    /// Show stats alongside events (rare case).
    #[arg(long = "with-stats", help_heading = "Metrics and Stats")]
    pub with_stats: bool,

    /// Show metrics only (implies -q/--quiet). Use -m for default (table), or --metrics=FORMAT for explicit format.
    #[arg(
        short = 'm',
        long = "metrics",
        value_enum,
        value_name = "FORMAT",
        require_equals = true,
        num_args = 0..=1,
        default_missing_value = "auto",
        help_heading = "Metrics and Stats",
        help = "Show metrics only (implies -q/--quiet).\n\nFormats: short (first 5), full, tsv, json. Bare -m auto-selects: the\nhuman-readable table on a terminal, tsv when piped or redirected (like ls).\n\ntsv emits one tab-separated 'metric<TAB>key<TAB>value' record per line, sorted\nby count descending, so | head is top-N and | tail is bottom-N.\n\nExamples:\n  -m               Auto (table on a TTY, tsv when piped)\n  --metrics=full   Force the table even through a pipe\n  --metrics=tsv    Force the record stream even to a TTY\n  --metrics=short  Abbreviated (first 5 items)\n  --metrics=json   JSON output\n\nUse --no-metrics to override a config default. Note the '=': --metrics=json (a space, as in '-m json', is read as a filename)."
    )]
    pub metrics: Option<MetricsFormat>,

    /// Disable tracked metrics explicitly (default: off).
    #[arg(
        long = "no-metrics",
        hide = true,
        help_heading = "Metrics and Stats",
        overrides_with = "metrics"
    )]
    pub no_metrics: bool,

    /// Show metrics alongside events (rare case).
    #[arg(long = "with-metrics", help_heading = "Metrics and Stats")]
    pub with_metrics: bool,

    /// Write metrics to file (JSON format). Can combine with -m for both table and file.
    #[arg(
        long = "metrics-file",
        value_name = "FILE",
        help_heading = "Metrics and Stats",
        help = "Persist the metrics map (populated by track_*()) to disk as JSON."
    )]
    pub metrics_file: Option<String>,

    /// Frequency table: count occurrences per distinct value of FIELD. Shorthand for track_freq.
    #[arg(
        long = "freq",
        value_name = "FIELD",
        help_heading = "Metrics and Stats",
        help = "Frequency table: count occurrences per distinct value of FIELD.\n\nShorthand for track_freq(\"FIELD\", e.FIELD). Runs after all filters/transforms\nand implies -m. Repeatable. Nested fields use dotted paths (e.g. user.id).\nResults are sorted by count descending, so piping the tsv output to head gives\nthe top-N and tail gives the bottom-N (no --top/--bottom flags needed).\nControl output with --metrics=short|full|tsv|json or --metrics-file.\n\nExamples:\n  --freq level\n  --filter 'e.status>=500' --freq url\n  --freq url | head        # top URLs (tsv auto-selected when piped)\n  --freq url | tail        # rarest URLs"
    )]
    pub freq: Vec<String>,

    /// Summarize a numeric FIELD: count, min, max, avg, p50/p95/p99. Shorthand for track_stats.
    #[arg(
        long = "describe",
        value_name = "FIELD",
        help_heading = "Metrics and Stats",
        help = "Summarize a numeric FIELD: count, min, max, avg, p50/p95/p99.\n\nShorthand for track_stats(\"FIELD\", e.FIELD). Runs after all filters/transforms\nand implies -m. Repeatable. Non-numeric/missing values are skipped.\nControl output with --metrics=short|full|tsv|json or --metrics-file.\n\nExample:\n  --describe duration_ms"
    )]
    pub describe: Vec<String>,

    /// Estimate the number of distinct values of FIELD. Shorthand for track_cardinality.
    #[arg(
        long = "card",
        value_name = "FIELD",
        help_heading = "Metrics and Stats",
        help = "Estimate the number of distinct values of FIELD (HyperLogLog).\n\nShorthand for track_cardinality(\"FIELD\", e.FIELD). Runs after all\nfilters/transforms and implies -m. Repeatable. Missing values are skipped.\nThe count is approximate (~1% error) but uses constant memory, so it scales to\nhigh-cardinality fields where track_freq/track_unique would not.\nControl output with --metrics=short|full|tsv|json or --metrics-file.\n\nExamples:\n  --card user.id\n  --filter 'e.status>=500' --card client_ip"
    )]
    pub card: Vec<String>,

    /// Summarize log templates using Drain (summary-only, requires --keys with exactly one field).
    #[arg(
        long = "drain",
        value_enum,
        value_name = "FORMAT",
        require_equals = true,
        num_args = 0..=1,
        default_missing_value = "table",
        help_heading = "Template Discovery",
        help = "Summarize log templates using Drain (summary-only; requires --keys with exactly one field; sequential mode only).\n\nFormats:\n  table (default)  Clean output: count + template\n  full             Detailed: adds line ranges + sample + template ID\n  id               Stable output: template_id + template (sorted by ID)\n  json             JSON with all metadata\n\nExamples:\n  --drain          Clean table (count + template)\n  --drain=full     With line numbers, samples, and IDs\n  --drain=id       Stable ID list for diffs\n  --drain=json     JSON output for scripting"
    )]
    pub drain: Option<DrainFormat>,

    /// Discover field names, types, and cardinality from the log stream.
    #[arg(
        short = 'd',
        long = "discover",
        value_enum,
        value_name = "FORMAT",
        require_equals = true,
        num_args = 0..=1,
        default_missing_value = "table",
        conflicts_with = "discover_final_fields",
        help_heading = "Field Discovery",
        help = "Profile fields: names, inferred types, cardinality estimates, and sample values.\nNested maps and arrays are flattened to 3 levels (e.g. user.name, user.roles[]).\nImplies -q/--quiet (events suppressed). Sequential mode only.\n\nFormats: table (default), json\n\nExamples:\n  -d, --discover      Table summary\n  -d=json, --discover=json     Machine-readable JSON"
    )]
    pub discover_fields: Option<DiscoverFieldsFormat>,

    /// Profile final emitted fields instead of parsed input fields.
    #[arg(
        short = 'D',
        long = "discover-final",
        value_enum,
        value_name = "FORMAT",
        require_equals = true,
        num_args = 0..=1,
        default_missing_value = "table",
        conflicts_with = "discover_fields",
        help_heading = "Field Discovery",
        help = "Profile final emitted fields after scripts and filters.\nImplies -q/--quiet (events suppressed). Sequential mode only.\n\nFormats: table (default), json\n\nExamples:\n  -D, --discover-final          Table summary of final fields\n  -D=json, --discover-final=json     Machine-readable JSON of final fields"
    )]
    pub discover_final_fields: Option<DiscoverFieldsFormat>,

    /// Maximum depth for flattening nested maps/arrays during field discovery.
    #[arg(
        long = "discover-depth",
        value_name = "N",
        help_heading = "Field Discovery",
        help = "Maximum depth for flattening nested maps/arrays into dotted keys (default: 3).\nDepth counts descents from the event root: a.b.c is depth 3.\nUse a higher value to inspect deeply nested JSON; use 1 to see only top-level fields; use 0 for unlimited depth.\n\nExamples:\n  --discover --discover-depth=5    Descend up to 5 levels deep\n  --discover --discover-depth=1    Top-level fields only\n  --discover --discover-depth=0    Unlimited depth"
    )]
    pub discover_depth: Option<usize>,

    /// Specify custom configuration file path.
    #[arg(
        long = "config-file",
        value_name = "FILE",
        help_heading = "Configuration Options"
    )]
    pub config_file: Option<String>,

    /// Ignore configuration file.
    #[arg(long = "ignore-config", help_heading = "Configuration Options")]
    pub ignore_config: bool,

    /// Use alias from configuration file.
    #[arg(
        short = 'a',
        long = "alias",
        value_name = "NAME",
        help_heading = "Configuration Options"
    )]
    pub alias: Vec<String>,

    /// Save current command as alias to configuration file.
    #[arg(
        long = "save-alias",
        value_name = "NAME",
        help_heading = "Configuration Options"
    )]
    pub save_alias: Option<String>,

    /// Show configuration file and exit.
    #[arg(long = "show-config", help_heading = "Configuration Options")]
    pub show_config: bool,

    /// Edit configuration file in default editor and exit.
    #[arg(long = "edit-config", help_heading = "Configuration Options")]
    pub edit_config: bool,

    /// Show Rhai scripting guide and exit.
    #[arg(long = "help-rhai", help_heading = "Help Options")]
    pub help_rhai: bool,

    /// Show available Rhai functions and exit (optional KEYWORD filters by name/description; smartcase: lowercase matches any case, uppercase is exact)
    #[arg(long = "help-functions", value_name = "KEYWORD", num_args = 0..=1, default_missing_value = "", help_heading = "Help Options")]
    pub help_functions: Option<String>,

    /// Show practical Rhai examples and exit.
    #[arg(long = "help-examples", help_heading = "Help Options")]
    pub help_examples: bool,

    /// Show time format help and exit.
    #[arg(long = "help-time", help_heading = "Help Options")]
    pub help_time: bool,

    /// Show multiline strategy help and exit.
    #[arg(long = "help-multiline", help_heading = "Help Options")]
    pub help_multiline: bool,

    /// Show regex format help and exit.
    #[arg(long = "help-regex", help_heading = "Help Options")]
    pub help_regex: bool,

    /// Show format reference and exit.
    #[arg(long = "help-formats", help_heading = "Help Options")]
    pub help_formats: bool,

    /// Generate shell completion script and exit.
    #[arg(long = "completions", value_enum, help_heading = "Help Options")]
    pub completions: Option<ShellCompletion>,
}

impl Cli {
    /// Resolve inverted boolean flags to their actual values
    pub fn resolve_boolean_flags(&mut self) {
        // Handle stats/no-stats
        if self.no_stats {
            self.stats = None;
        }

        // Handle parallel/no-parallel
        if self.no_parallel {
            self.parallel = false;
        }

        // Handle metrics/no-metrics
        if self.no_metrics {
            self.metrics = None;
        }

        // Handle strict/no-strict
        if self.no_strict {
            self.strict = false;
        }
    }
}

/// Preprocess script by prepending include file contents
fn preprocess_script_with_includes(script: &str, includes: &[String]) -> Result<String> {
    let mut result = String::new();

    // Concatenate include files first
    for include_path in includes {
        let include_content = std::fs::read_to_string(include_path).map_err(|e| {
            anyhow::anyhow!("Failed to read include file '{}': {}", include_path, e)
        })?;
        result.push_str(&include_content);
        result.push('\n'); // Ensure separation between files
    }

    // Append main script
    result.push_str(script);
    Ok(result)
}

fn load_include_files(includes: &[String]) -> Result<Vec<crate::config::IncludeFile>> {
    includes
        .iter()
        .map(|include_path| {
            let content = std::fs::read_to_string(include_path).map_err(|e| {
                anyhow::anyhow!("Failed to read include file '{}': {}", include_path, e)
            })?;
            Ok(crate::config::IncludeFile {
                path: include_path.clone(),
                content,
            })
        })
        .collect()
}

/// Get includes that apply to begin/end stages based on CLI position
/// For begin: includes that appear before any script stage
/// For end: includes that appear after all script stages
fn get_begin_end_includes(matches: &ArgMatches) -> Result<(Vec<String>, Vec<String>)> {
    let mut begin_includes = Vec::new();
    let mut end_includes = Vec::new();

    if let Some(include_indices) = matches.indices_of("includes") {
        let include_values: Vec<&String> =
            matches.get_many::<String>("includes").unwrap().collect();

        // Collect all script stage positions
        let mut script_positions = Vec::new();
        if let Some(filter_indices) = matches.indices_of("filters") {
            script_positions.extend(filter_indices);
        }
        if let Some(exec_indices) = matches.indices_of("execs") {
            script_positions.extend(exec_indices);
        }
        if let Some(exec_file_indices) = matches.indices_of("exec_files") {
            script_positions.extend(exec_file_indices);
        }

        if script_positions.is_empty() {
            // No script stages - all includes go to begin
            for (pos, _) in include_indices.enumerate() {
                begin_includes.push(include_values[pos].clone());
            }
        } else {
            script_positions.sort();
            let first_script_pos = script_positions[0];
            let last_script_pos = script_positions[script_positions.len() - 1];

            for (pos, include_index) in include_indices.enumerate() {
                let include_file = include_values[pos].clone();

                if include_index < first_script_pos {
                    begin_includes.push(include_file);
                } else if include_index > last_script_pos {
                    end_includes.push(include_file);
                }
                // Includes between script stages are handled by get_ordered_script_stages
            }
        }
    }

    Ok((begin_includes, end_includes))
}

impl Cli {
    /// Extract filter and exec stages in the order they appeared on the command line
    pub fn get_ordered_script_stages(&self, matches: &ArgMatches) -> Result<Vec<ScriptStageType>> {
        use std::collections::HashMap;

        let mut stages_with_indices = Vec::new();
        let mut include_map: HashMap<usize, Vec<String>> = HashMap::new();

        let parse_level_list = |raw: &str| -> Result<Vec<String>> {
            let levels: Vec<String> = raw
                .split(',')
                .map(|s| s.trim())
                .filter(|s| !s.is_empty())
                .map(|s| s.to_string())
                .collect();

            if levels.is_empty() {
                Err(anyhow::anyhow!(
                    "Level filters require at least one level (e.g. --levels error,critical)"
                ))
            } else {
                Ok(levels)
            }
        };

        // First, collect all include arguments and map them to the next script stage
        if let Some(include_indices) = matches.indices_of("includes") {
            let include_values: Vec<&String> =
                matches.get_many::<String>("includes").unwrap().collect();

            // Collect all script stage positions
            let mut script_positions = Vec::new();

            if let Some(filter_indices) = matches.indices_of("filters") {
                script_positions.extend(filter_indices);
            }
            if let Some(assert_indices) = matches.indices_of("asserts") {
                script_positions.extend(assert_indices);
            }
            if let Some(exec_indices) = matches.indices_of("execs") {
                script_positions.extend(exec_indices);
            }
            if let Some(exec_file_indices) = matches.indices_of("exec_files") {
                script_positions.extend(exec_file_indices);
            }

            script_positions.sort();

            // Associate each include with the next script stage
            for (pos, include_index) in include_indices.enumerate() {
                let include_file = include_values[pos].clone();

                // Find the next script stage position after this include
                if let Some(&next_script_pos) = script_positions
                    .iter()
                    .find(|&&script_pos| script_pos > include_index)
                {
                    include_map
                        .entry(next_script_pos)
                        .or_default()
                        .push(include_file);
                }
                // If no script stage follows, the include will be ignored (could warn here in future)
            }
        }

        // Get filter stages with their indices and apply preprocessing
        if let Some(filter_indices) = matches.indices_of("filters") {
            let filter_values: Vec<&String> =
                matches.get_many::<String>("filters").unwrap().collect();
            for (pos, index) in filter_indices.enumerate() {
                let script = filter_values[pos].clone();
                let empty_includes = Vec::new();
                let includes = include_map.get(&index).unwrap_or(&empty_includes);
                let include_files = load_include_files(includes)?;
                stages_with_indices.push((
                    index,
                    ScriptStageType::Filter {
                        script,
                        includes: include_files,
                    },
                ));
            }
        }

        // Get assert stages with their indices
        if let Some(assert_indices) = matches.indices_of("asserts") {
            let assert_values: Vec<&String> =
                matches.get_many::<String>("asserts").unwrap().collect();
            for (pos, index) in assert_indices.enumerate() {
                let script = assert_values[pos].clone();
                let empty_includes = Vec::new();
                let includes = include_map.get(&index).unwrap_or(&empty_includes);
                // Assertions don't support includes (same as filters)
                if !includes.is_empty() {
                    eprintln!(
                        "{}",
                        crate::config::format_error_message_auto(
                            "--include is not supported with --assert (assertions must be pure expressions)"
                        )
                    );
                    std::process::exit(2);
                }
                stages_with_indices.push((index, ScriptStageType::Assert(script)));
            }
        }

        // Get level filter stages (includes)
        if let Some(level_indices) = matches.indices_of("levels") {
            let level_values: Vec<&String> =
                matches.get_many::<String>("levels").unwrap().collect();
            for (pos, index) in level_indices.enumerate() {
                let raw = level_values[pos];
                let include_levels = parse_level_list(raw)?;
                stages_with_indices.push((
                    index,
                    ScriptStageType::LevelFilter {
                        include: include_levels,
                        exclude: Vec::new(),
                    },
                ));
            }
        }

        // Get level filter stages (exclusions)
        if let Some(exclude_indices) = matches.indices_of("exclude_levels") {
            let exclude_values: Vec<&String> = matches
                .get_many::<String>("exclude_levels")
                .unwrap()
                .collect();
            for (pos, index) in exclude_indices.enumerate() {
                let raw = exclude_values[pos];
                let exclude_levels = parse_level_list(raw)?;
                stages_with_indices.push((
                    index,
                    ScriptStageType::LevelFilter {
                        include: Vec::new(),
                        exclude: exclude_levels,
                    },
                ));
            }
        }

        // Get exec stages with their indices and apply preprocessing
        if let Some(exec_indices) = matches.indices_of("execs") {
            let exec_values: Vec<&String> = matches.get_many::<String>("execs").unwrap().collect();
            for (pos, index) in exec_indices.enumerate() {
                let script = exec_values[pos].clone();
                let empty_includes = Vec::new();
                let includes = include_map.get(&index).unwrap_or(&empty_includes);
                let preprocessed_script = preprocess_script_with_includes(&script, includes)?;
                stages_with_indices.push((index, ScriptStageType::Exec(preprocessed_script)));
            }
        }

        // Get exec-file stages with their indices and apply preprocessing
        if let Some(exec_file_indices) = matches.indices_of("exec_files") {
            let exec_file_values: Vec<&String> =
                matches.get_many::<String>("exec_files").unwrap().collect();
            for (pos, index) in exec_file_indices.enumerate() {
                let file_path = &exec_file_values[pos];
                let script_content = std::fs::read_to_string(file_path).map_err(|e| {
                    anyhow::anyhow!("Failed to read exec file '{}': {}", file_path, e)
                })?;
                let empty_includes = Vec::new();
                let includes = include_map.get(&index).unwrap_or(&empty_includes);
                let preprocessed_script =
                    preprocess_script_with_includes(&script_content, includes)?;
                stages_with_indices.push((index, ScriptStageType::Exec(preprocessed_script)));
            }
        }

        // Sort by original command line position
        stages_with_indices.sort_by_key(|(index, _)| *index);

        // Detect consecutive --levels flags (likely user mistake)
        let mut prev_was_level_include = false;
        for (_, stage) in &stages_with_indices {
            match stage {
                ScriptStageType::LevelFilter {
                    include,
                    exclude: _,
                } => {
                    if !include.is_empty() {
                        if prev_was_level_include {
                            // Found consecutive --levels flags
                            let hint = crate::config::format_hint_message_auto(
                                "Multiple --levels flags create sequential filters (AND). Use comma-separated for OR: --levels ERROR,WARN"
                            );
                            eprintln!("{}", hint);
                            break; // Only show hint once
                        }
                        prev_was_level_include = true;
                    } else {
                        // This is an exclude-only stage, reset the flag
                        prev_was_level_include = false;
                    }
                }
                _ => {
                    // Non-level stage, reset the flag
                    prev_was_level_include = false;
                }
            }
        }

        // Extract just the stages
        let mut stages: Vec<ScriptStageType> = stages_with_indices
            .into_iter()
            .map(|(_, stage)| stage)
            .collect();

        // Metrics-sugar flags (--freq / --describe) are non-positional, so they
        // always run LAST — after every --filter/-l/-e stage. That gives them the
        // same post-pipeline vantage as --discover-final: they see fields
        // created/renamed by earlier stages and only events that survived filtering.
        for field in &self.freq {
            stages.push(ScriptStageType::Exec(synthesize_freq_stage(field)?));
        }
        for field in &self.describe {
            stages.push(ScriptStageType::Exec(synthesize_describe_stage(field)?));
        }
        for field in &self.card {
            stages.push(ScriptStageType::Exec(synthesize_card_stage(field)?));
        }

        Ok(stages)
    }

    /// Get processed begin and end scripts with includes applied
    pub fn get_processed_begin_end(
        &self,
        matches: &ArgMatches,
    ) -> Result<(Option<String>, Option<String>)> {
        let (begin_includes, end_includes) = get_begin_end_includes(matches)?;

        // Includes only become a begin/end script when there is an explicit
        // --begin/--end to attach them to. We deliberately do NOT synthesize a
        // begin/end stage out of includes alone:
        //   - Each stage has its own function namespace, so an include placed
        //     before the first filter/exec stage is already loaded into that
        //     stage by get_ordered_script_stages. Also materializing it as a
        //     phantom begin stage produced a second copy whose only observable
        //     effect was re-running the include's top-level statements at
        //     startup (double execution).
        //   - With no --begin/--end script, that phantom stage's function
        //     definitions live in a namespace no user code runs in, so it never
        //     served a purpose.
        let processed_begin = match self.begin {
            Some(ref begin_script) => Some(preprocess_script_with_includes(
                begin_script,
                &begin_includes,
            )?),
            None => None,
        };

        let processed_end = match self.end {
            Some(ref end_script) => {
                Some(preprocess_script_with_includes(end_script, &end_includes)?)
            }
            None => None,
        };

        Ok((processed_begin, processed_end))
    }
}

/// Render a string as a double-quoted Rhai string literal, escaping `\` and `"`.
fn rhai_string_literal(s: &str) -> String {
    format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\""))
}

/// Whether `s` is a bare top-level identifier (so `e.<s>` is safe and cheap).
/// Anything else (dotted paths, brackets, dashes) goes through `get_path`.
fn is_simple_field_ident(s: &str) -> bool {
    let mut chars = s.chars();
    match chars.next() {
        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
        _ => return false,
    }
    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
}

/// Build the value accessor for a metrics-sugar field. Simple identifiers use
/// cheap property access (`e.field`, which yields `()` for missing fields).
///
/// Dotted/bracket/special paths are ambiguous: a column literally named
/// `ip.dst` (common in CSV/TSV headers) collides with the nested-path syntax
/// `ip.dst`. We resolve it the way `-k` does — prefer a literal top-level key
/// of that exact name, and only fall back to nested `get_path` traversal when
/// no such key exists. Events are kept as nested maps (JSON is never flattened
/// to dotted keys), so `field in e` is true *only* for a literal dotted column,
/// leaving nested-JSON access on `get_path`'s documented path unchanged. Both
/// branches yield `()` when absent, so missing values are skipped by the
/// track_* functions either way.
fn field_value_accessor(field: &str) -> String {
    if is_simple_field_ident(field) {
        format!("e.{field}")
    } else {
        let literal = rhai_string_literal(field);
        format!("(if {literal} in e {{ e[{literal}] }} else {{ get_path(e, {literal}) }})")
    }
}

fn synthesize_freq_stage(field: &str) -> Result<String> {
    if field.is_empty() {
        return Err(anyhow::anyhow!(
            "--freq requires a field name, e.g. --freq level"
        ));
    }
    Ok(format!(
        "track_freq({}, {})",
        rhai_string_literal(field),
        field_value_accessor(field)
    ))
}

fn synthesize_describe_stage(field: &str) -> Result<String> {
    if field.is_empty() {
        return Err(anyhow::anyhow!(
            "--describe requires a field name, e.g. --describe duration_ms"
        ));
    }
    Ok(format!(
        "track_stats({}, {})",
        rhai_string_literal(field),
        field_value_accessor(field)
    ))
}

fn synthesize_card_stage(field: &str) -> Result<String> {
    if field.is_empty() {
        return Err(anyhow::anyhow!(
            "--card requires a field name, e.g. --card user.id"
        ));
    }
    Ok(format!(
        "track_cardinality({}, {})",
        rhai_string_literal(field),
        field_value_accessor(field)
    ))
}

/// Parse and validate format value - supports standard formats, cols:<spec>, regex:<pattern>, and csv/tsv with type annotations
fn parse_format_value(s: &str) -> Result<String, String> {
    // Check if it's a regex format
    if let Some(pattern) = s.strip_prefix("regex:") {
        if pattern.trim().is_empty() {
            return Err(
                "regex format requires a pattern, e.g., 'regex:(?P<field>\\d+)'".to_string(),
            );
        }
        return Ok(s.to_string());
    }

    // Check if it's a cols format
    if let Some(spec) = s.strip_prefix("cols:") {
        if spec.trim().is_empty() {
            return Err(
                "cols format requires a specification, e.g., 'cols:ts level *msg'".to_string(),
            );
        }
        return Ok(s.to_string());
    }

    // Check if it's CSV/TSV with field specs (type annotations)
    if s.starts_with("csv:") || s.starts_with("csv ") {
        return Ok(s.to_string());
    }
    if s.starts_with("tsv:") || s.starts_with("tsv ") {
        return Ok(s.to_string());
    }

    // Check if it's a cascade (comma-separated list of simple formats).
    // Full validation happens in parse_input_format_spec; here we accept the
    // form and catch the most obvious mistakes early.
    if s.contains(',') {
        let allowed = ["json", "line", "raw", "logfmt", "syslog", "cef", "combined"];
        for part in s.split(',') {
            let p = part.trim().to_lowercase();
            if p.is_empty() {
                return Err(format!("Empty entry in cascade format list: '{}'", s));
            }
            // Built-in application-log formats (adapted from lnav) are also valid in cascade.
            if !allowed.contains(&p.as_str()) && crate::parsers::lnav_formats::by_name(&p).is_none()
            {
                let hint = if p == "cols"
                    || p == "regex"
                    || p.starts_with("cols:")
                    || p.starts_with("regex:")
                {
                    " To use cols:/regex: in a cascade, pass repeated -f flags instead, e.g. -f json -f 'cols:ts level *msg'."
                } else {
                    ""
                };
                return Err(format!(
                    "Unknown or unsupported format '{}' in cascade list '{}'. \
Allowed in a comma list: json, line, raw, logfmt, syslog, cef, combined, and built-in application-log formats ({}).{}",
                    part.trim(),
                    s,
                    crate::parsers::lnav_formats::names_csv(),
                    hint
                ));
            }
        }
        return Ok(s.to_string());
    }

    // Check if it's a standard format
    match s.to_lowercase().as_str() {
        "auto" | "auto-per-file" | "json" | "line" | "raw" | "logfmt" | "syslog" | "cef"
        | "csv" | "tsv" | "csvnh" | "tsvnh" | "combined" | "cols" => Ok(s.to_string()),
        other => {
            // Built-in application-log formats (adapted from lnav), e.g. -f log4j
            if crate::parsers::lnav_formats::by_name(other).is_some() {
                return Ok(s.to_string());
            }
            Err(format!(
                "Unknown format '{}'. Supported formats: auto, auto-per-file, json, line, raw, logfmt, syslog, cef, csv, tsv, csvnh, tsvnh, combined, cols:<spec>, regex:<pattern>, or a built-in application-log format ({})",
                s,
                crate::parsers::lnav_formats::names_csv()
            ))
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use clap::CommandFactory;
    use std::io::Write;
    use tempfile::NamedTempFile;

    fn parse_cli(args: &[String]) -> (Cli, ArgMatches) {
        let matches = Cli::command()
            .try_get_matches_from(args.iter().map(|s| s.as_str()))
            .expect("failed to build matches");
        let cli = Cli::parse_from(args.to_vec());
        (cli, matches)
    }

    #[test]
    fn ordered_script_stages_preserve_cli_sequence() {
        let mut exec_file = NamedTempFile::new().expect("temp file");
        writeln!(exec_file, "meta.count = meta.count + 1;").expect("write script");
        let exec_path = exec_file.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "--filter".to_string(),
            "e.status >= 400".to_string(),
            "-e".to_string(),
            "e.alert = true;".to_string(),
            "--filter".to_string(),
            "e.status < 500".to_string(),
            "-E".to_string(),
            exec_path,
        ];

        let (cli, matches) = parse_cli(&args);
        let stages = cli
            .get_ordered_script_stages(&matches)
            .expect("stages should be parsed");

        assert_eq!(stages.len(), 4);
        assert!(matches!(
            &stages[0],
            ScriptStageType::Filter { script, .. } if script == "e.status >= 400"
        ));
        assert!(matches!(
            &stages[1],
            ScriptStageType::Exec(script) if script == "e.alert = true;"
        ));
        assert!(matches!(
            &stages[2],
            ScriptStageType::Filter { script, .. } if script == "e.status < 500"
        ));
        assert!(
            matches!(&stages[3], ScriptStageType::Exec(script) if script.contains("meta.count"))
        );
    }

    #[test]
    fn ordered_script_stages_error_when_exec_file_missing() {
        use std::time::{SystemTime, UNIX_EPOCH};

        let timestamp = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        let missing_path = std::env::temp_dir().join(format!(
            "kelora-missing-{}-{}.rhai",
            std::process::id(),
            timestamp
        ));
        let _ = std::fs::remove_file(&missing_path);
        let missing = missing_path.to_string_lossy().to_string();

        let args = vec!["kelora".to_string(), "-E".to_string(), missing.clone()];

        let (cli, matches) = parse_cli(&args);
        let err = cli
            .get_ordered_script_stages(&matches)
            .expect_err("should report missing file");
        assert!(err
            .to_string()
            .contains(&format!("Failed to read exec file '{}':", missing)));
    }

    #[test]
    fn ordered_script_stages_empty_when_no_scripts_specified() {
        let args = vec!["kelora".to_string()];
        let (cli, matches) = parse_cli(&args);
        let stages = cli
            .get_ordered_script_stages(&matches)
            .expect("empty stages should succeed");
        assert!(stages.is_empty());
    }

    #[test]
    fn ordered_script_stages_capture_level_filters_in_order() {
        let args = vec![
            "kelora".to_string(),
            "-l".to_string(),
            "error,critical".to_string(),
            "-e".to_string(),
            "track_freq(\"level\", e.level)".to_string(),
            "--exclude-levels".to_string(),
            "debug".to_string(),
        ];

        let (cli, matches) = parse_cli(&args);
        let stages = cli
            .get_ordered_script_stages(&matches)
            .expect("level stages should parse");

        assert_eq!(stages.len(), 3);
        assert!(matches!(
            &stages[0],
            ScriptStageType::LevelFilter { include, exclude }
                if include == &vec!["error".to_string(), "critical".to_string()] && exclude.is_empty()
        ));
        assert!(matches!(
            &stages[1],
            ScriptStageType::Exec(script) if script == "track_freq(\"level\", e.level)"
        ));
        assert!(matches!(
            &stages[2],
            ScriptStageType::LevelFilter { include, exclude }
                if include.is_empty() && exclude == &vec!["debug".to_string()]
        ));
    }

    #[test]
    fn include_single_file_to_exec_stage() {
        let mut include_file = NamedTempFile::new().expect("temp file");
        writeln!(include_file, "fn helper() {{ return 42; }}").expect("write include");
        let include_path = include_file.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "-I".to_string(),
            include_path,
            "--exec".to_string(),
            "e.result = helper();".to_string(),
        ];

        let (cli, matches) = parse_cli(&args);
        let stages = cli
            .get_ordered_script_stages(&matches)
            .expect("stages should be parsed");

        assert_eq!(stages.len(), 1);
        if let ScriptStageType::Exec(script) = &stages[0] {
            assert!(script.contains("fn helper() { return 42; }"));
            assert!(script.contains("e.result = helper();"));
            assert!(script.starts_with("fn helper()"));
        } else {
            panic!("Expected Exec stage");
        }
    }

    #[test]
    fn include_multiple_files_to_single_stage() {
        let mut include1 = NamedTempFile::new().expect("temp file");
        writeln!(include1, "fn helper1() {{ return 1; }}").expect("write include1");
        let include1_path = include1.path().to_str().unwrap().to_string();

        let mut include2 = NamedTempFile::new().expect("temp file");
        writeln!(include2, "fn helper2() {{ return 2; }}").expect("write include2");
        let include2_path = include2.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "-I".to_string(),
            include1_path,
            "-I".to_string(),
            include2_path,
            "--exec".to_string(),
            "e.result = helper1() + helper2();".to_string(),
        ];

        let (cli, matches) = parse_cli(&args);
        let stages = cli
            .get_ordered_script_stages(&matches)
            .expect("stages should be parsed");

        assert_eq!(stages.len(), 1);
        if let ScriptStageType::Exec(script) = &stages[0] {
            assert!(script.contains("fn helper1() { return 1; }"));
            assert!(script.contains("fn helper2() { return 2; }"));
            assert!(script.contains("e.result = helper1() + helper2();"));
        } else {
            panic!("Expected Exec stage");
        }
    }

    #[test]
    fn includes_apply_to_next_script_stage() {
        let mut include1 = NamedTempFile::new().expect("temp file");
        writeln!(include1, "fn util1() {{ return 1; }}").expect("write include1");
        let include1_path = include1.path().to_str().unwrap().to_string();

        let mut include2 = NamedTempFile::new().expect("temp file");
        writeln!(include2, "fn util2() {{ return 2; }}").expect("write include2");
        let include2_path = include2.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "-I".to_string(),
            include1_path,
            "--exec".to_string(),
            "e.val1 = util1();".to_string(),
            "-I".to_string(),
            include2_path,
            "--exec".to_string(),
            "e.val2 = util2();".to_string(),
        ];

        let (cli, matches) = parse_cli(&args);
        let stages = cli
            .get_ordered_script_stages(&matches)
            .expect("stages should be parsed");

        assert_eq!(stages.len(), 2);

        // First stage should have include1
        if let ScriptStageType::Exec(script) = &stages[0] {
            assert!(script.contains("fn util1() { return 1; }"));
            assert!(script.contains("e.val1 = util1();"));
            assert!(!script.contains("fn util2() { return 2; }"));
        } else {
            panic!("Expected Exec stage");
        }

        // Second stage should have include2
        if let ScriptStageType::Exec(script) = &stages[1] {
            assert!(script.contains("fn util2() { return 2; }"));
            assert!(script.contains("e.val2 = util2();"));
            assert!(!script.contains("fn util1() { return 1; }"));
        } else {
            panic!("Expected Exec stage");
        }
    }

    #[test]
    fn include_with_exec_file() {
        let mut include_file = NamedTempFile::new().expect("temp file");
        writeln!(include_file, "fn shared_util() {{ return 42; }}").expect("write include");
        let include_path = include_file.path().to_str().unwrap().to_string();

        let mut exec_file = NamedTempFile::new().expect("temp file");
        writeln!(exec_file, "e.value = shared_util();").expect("write exec");
        let exec_path = exec_file.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "-I".to_string(),
            include_path,
            "-E".to_string(),
            exec_path,
        ];

        let (cli, matches) = parse_cli(&args);
        let stages = cli
            .get_ordered_script_stages(&matches)
            .expect("stages should be parsed");

        assert_eq!(stages.len(), 1);
        if let ScriptStageType::Exec(script) = &stages[0] {
            assert!(script.contains("fn shared_util() { return 42; }"));
            assert!(script.contains("e.value = shared_util();"));
        } else {
            panic!("Expected Exec stage");
        }
    }

    #[test]
    fn include_with_filter_stage() {
        let mut include_file = NamedTempFile::new().expect("temp file");
        writeln!(include_file, "fn is_error(level) {{ level == \"ERROR\" }}")
            .expect("write include");
        let include_path = include_file.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "-I".to_string(),
            include_path,
            "--filter".to_string(),
            "is_error(e.level)".to_string(),
        ];

        let (cli, matches) = parse_cli(&args);
        let stages = cli
            .get_ordered_script_stages(&matches)
            .expect("stages should be parsed");

        assert_eq!(stages.len(), 1);
        if let ScriptStageType::Filter { script, includes } = &stages[0] {
            assert_eq!(includes.len(), 1);
            assert!(includes[0]
                .content
                .contains("fn is_error(level) { level == \"ERROR\" }"));
            assert_eq!(script, "is_error(e.level)");
        } else {
            panic!("Expected Filter stage");
        }
    }

    #[test]
    fn include_error_when_file_missing() {
        let missing_path = "/non/existent/path.rhai";

        let args = vec![
            "kelora".to_string(),
            "-I".to_string(),
            missing_path.to_string(),
            "--exec".to_string(),
            "e.test = true;".to_string(),
        ];

        let (cli, matches) = parse_cli(&args);
        let err = cli
            .get_ordered_script_stages(&matches)
            .expect_err("should report missing include file");
        assert!(err
            .to_string()
            .contains(&format!("Failed to read include file '{}':", missing_path)));
    }

    #[test]
    fn get_processed_begin_end_with_includes() {
        let mut include1 = NamedTempFile::new().expect("temp file");
        writeln!(include1, "fn setup() {{ print('setup'); }}").expect("write include1");
        let include1_path = include1.path().to_str().unwrap().to_string();

        let mut include2 = NamedTempFile::new().expect("temp file");
        writeln!(include2, "fn cleanup() {{ print('cleanup'); }}").expect("write include2");
        let include2_path = include2.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "-I".to_string(),
            include1_path,
            "--begin".to_string(),
            "setup();".to_string(),
            "--exec".to_string(),
            "e.processed = true;".to_string(),
            "-I".to_string(),
            include2_path,
            "--end".to_string(),
            "cleanup();".to_string(),
        ];

        let (cli, matches) = parse_cli(&args);
        let (begin, end) = cli
            .get_processed_begin_end(&matches)
            .expect("should process begin/end");

        assert!(begin.is_some());
        let begin_script = begin.unwrap();
        assert!(begin_script.contains("fn setup() { print('setup'); }"));
        assert!(begin_script.contains("setup();"));

        assert!(end.is_some());
        let end_script = end.unwrap();
        assert!(end_script.contains("fn cleanup() { print('cleanup'); }"));
        assert!(end_script.contains("cleanup();"));
    }

    #[test]
    fn include_before_first_stage_without_begin_creates_no_begin_stage() {
        // An include placed before the first filter/exec stage, with no
        // explicit --begin, must NOT synthesize a phantom begin stage: the
        // include is already loaded into the following exec stage, and a
        // begin-from-includes copy would only re-run its top-level statements
        // at startup (the double-execution bug).
        let mut include_file = NamedTempFile::new().expect("temp file");
        writeln!(include_file, "print('auto setup');").expect("write include");
        let include_path = include_file.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "-I".to_string(),
            include_path,
            "--exec".to_string(),
            "e.processed = true;".to_string(),
        ];

        let (cli, matches) = parse_cli(&args);
        let (begin, end) = cli
            .get_processed_begin_end(&matches)
            .expect("should process begin/end");

        assert!(
            begin.is_none(),
            "include before the first stage must not create a begin stage without --begin"
        );
        assert!(end.is_none());
    }

    #[test]
    fn trailing_include_without_end_creates_no_end_stage() {
        // An include after all filter/exec stages, with no explicit --end, has
        // nothing to attach to and must not synthesize a phantom end stage.
        let mut include_file = NamedTempFile::new().expect("temp file");
        writeln!(include_file, "print('auto teardown');").expect("write include");
        let include_path = include_file.path().to_str().unwrap().to_string();

        let args = vec![
            "kelora".to_string(),
            "--exec".to_string(),
            "e.processed = true;".to_string(),
            "-I".to_string(),
            include_path,
        ];

        let (cli, matches) = parse_cli(&args);
        let (begin, end) = cli
            .get_processed_begin_end(&matches)
            .expect("should process begin/end");

        assert!(begin.is_none());
        assert!(
            end.is_none(),
            "trailing include must not create an end stage without --end"
        );
    }

    #[test]
    fn field_value_accessor_simple_ident_uses_property_access() {
        assert_eq!(field_value_accessor("level"), "e.level");
        assert_eq!(field_value_accessor("user_id"), "e.user_id");
    }

    #[test]
    fn field_value_accessor_dotted_name_prefers_literal_then_nested() {
        // A dotted field name (e.g. a flat CSV column literally named `ip.dst`,
        // or a nested JSON path `user.id`) must try the literal top-level key
        // first and only fall back to nested traversal. This keeps `--freq`/
        // `--describe`/`--card` consistent with `-k` for literal dotted columns
        // while preserving the documented nested-path behavior for JSON.
        assert_eq!(
            field_value_accessor("ip.dst"),
            "(if \"ip.dst\" in e { e[\"ip.dst\"] } else { get_path(e, \"ip.dst\") })"
        );
    }

    #[test]
    fn synthesize_freq_stage_uses_dotted_accessor() {
        assert_eq!(
            synthesize_freq_stage("ip.dst").expect("freq stage"),
            "track_freq(\"ip.dst\", (if \"ip.dst\" in e { e[\"ip.dst\"] } else { get_path(e, \"ip.dst\") }))"
        );
        // Simple identifiers keep the cheap property accessor.
        assert_eq!(
            synthesize_freq_stage("level").expect("freq stage"),
            "track_freq(\"level\", e.level)"
        );
    }
}