1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#[cfg(feature = "jemalloc")]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
mod cli;
mod config;
mod destination;
mod enrich;
mod error;
mod format;
mod init;
mod journal;
mod manifest;
mod notify;
mod pipeline;
mod plan;
mod preflight;
mod quality;
mod redact;
mod resource;
mod scalar;
mod source;
mod sql;
mod state;
mod test_hook;
mod tuning;
mod types;
fn main() {
// F-NEW-F (0.7.5 audit): default log level was `error`, so every
// `log::warn!(...)` in the codebase (unused --param, --force as
// no-op, schema-drift advisories, redaction notices, plaintext
// credentials in URL, ...) was silently dropped unless the
// operator set RUST_LOG=warn. Showing warns by default makes
// these guardrails visible without changing anything for
// operators that already override RUST_LOG.
// Credential-redaction invariant (ADR-0014): the redact module names "logs"
// in its scope, but the `log::*` macros do not pass through the artifact-path
// redaction wired at the error/summary call sites — a `log::warn!("…{e}", e)`
// that captured a `scheme://user:password@host` connect error would print the
// password to stderr (and `main` defaults the filter to `warn`, so these
// lines are shown). Route every formatted line through `redacted_log_line` so
// the sink itself is the chokepoint: no call site has to remember.
use std::io::Write;
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn"))
.format(|buf, record| {
let line = redact::redacted_log_line(
&buf.timestamp().to_string(),
record.level().as_str(),
record.target(),
&record.args().to_string(),
);
writeln!(buf, "{line}")
})
.init();
let cli = cli::parse_cli();
let json_errors = cli.json_errors;
if let Err(e) = cli::dispatch(cli) {
// redact strips credentials; sanitize_terminal strips ANSI/OSC control
// bytes a malicious source DB can embed in an error string (V9/CWE-150)
// so the top-level error line cannot rewrite/clear the operator terminal.
let msg = crate::pipeline::parent_ui::sanitize_terminal(&redact::redact_error(&e));
// Machine-actionable exit-code taxonomy (see `error::ExitClass`): a
// scheduler branches on the code (2=retryable, 3=data-integrity,
// 4=schema-drift, 1=generic) instead of grepping `msg`.
let exit_class = crate::error::classify_exit(&e);
if json_errors {
eprintln!(
"{}",
serde_json::json!({ "error": msg, "exit_class": exit_class })
);
} else {
eprintln!("Error: {msg}");
}
std::process::exit(exit_class);
}
}