noetl_executor/tools_bridge.rs
1//! Bridge from the CLI's YAML-parsed [`crate::playbook::Tool`] enum
2//! onto the [`noetl_tools`] registry's dispatch API.
3//!
4//! Added in R-1.1 PR-2c-1 per § H.10.4 of Appendix H of the global
5//! hybrid cloud blueprint; fleshed out with adapter helpers in
6//! R-1.1 PR-2c-2. This module is the integration surface between
7//! the CLI's parsed playbook and the shared tool registry the
8//! worker (R-1.3) also uses.
9//!
10//! ## Strategy B rollout
11//!
12//! Replacement of the CLI's inline tool implementations happens
13//! incrementally — one tool kind per sub-PR (PR-2c-3 rhai, PR-2c-4
14//! shell, PR-2c-5 http, PR-2c-6 duckdb, PR-2c-7 playbook, PR-2c-8
15//! auth + sink). This module ships the adapter layer in PR-2c-2;
16//! each subsequent sub-PR fills in one [`dispatch_via_registry`]
17//! match arm and replaces the matching CLI call site in
18//! `repos/cli/src/playbook_runner.rs`.
19//!
20//! ## Why a bridge instead of converting the Tool enum directly
21//!
22//! The CLI's [`crate::playbook::Tool`] enum and the registry's
23//! [`noetl_tools::registry::ToolConfig`] carry different invariants:
24//!
25//! - The CLI's `Tool::Auth { provider, scopes, project }` resolves
26//! credentials inline during dispatch. The worker resolves them at
27//! credential-resolution time (before tool dispatch). The bridge
28//! needs to know which mode to use; it's not a trivial enum cast.
29//! - The CLI's `Tool::Sink { target, format }` writes outputs through
30//! the runner's filesystem helpers. The registry would dispatch
31//! sinks through the same `noetl-tools` registry, but the tool kind
32//! doesn't exist on the worker side yet (PR-2c-8 may add it).
33//! - The CLI's `Tool::DuckDb { db, query, params }` opens a fresh
34//! DuckDB connection per call. `noetl-tools::tools::duckdb`
35//! manages a pool. Semantic difference; needs careful migration.
36//!
37//! Keeping the bridge explicit forces these decisions into one place
38//! instead of scattering them across each tool-kind sub-PR.
39//!
40//! ## GCS upload helper (R-3, noetl/ai-meta#31)
41//!
42//! [`gcs_upload`] wraps `object_store::gcp::GoogleCloudStorageBuilder`
43//! so the CLI's `SinkTarget::Gcs` arm no longer shells out to `gsutil`.
44//! Auth flows through the same provider chain as
45//! [`resolve_auth_to_bearer`]: workload identity on GKE, Application
46//! Default Credentials on dev hosts. The helper accepts a pluggable
47//! `Arc<dyn ObjectStore>` so integration tests substitute an
48//! `object_store::memory::InMemory` store without real GCS. See
49//! [`gcs_upload`] for the full credential-chain and error-shape notes.
50
51#![allow(dead_code)] // until PR-2c-4 onwards wires the call sites in.
52
53use std::collections::HashMap;
54use std::sync::Arc;
55use std::time::Instant;
56
57use anyhow::Result;
58use bytes::Bytes;
59use object_store::path::Path as StorePath;
60use object_store::ObjectStore;
61use object_store::PutPayload;
62use noetl_tools::auth::GcpAuth;
63use noetl_tools::context::ExecutionContext as ToolsExecutionContext;
64use noetl_tools::registry::{Tool as ToolsRegistryTool, ToolConfig};
65use noetl_tools::result::{ToolResult, ToolStatus};
66use noetl_tools::tools::{DuckdbTool, HttpTool, RhaiTool, ShellTool};
67use tracing::{info_span, Instrument};
68
69use crate::playbook::{AuthConfig as CliAuthConfig, CmdsList, SinkFormat, Tool};
70
71// ---------------------------------------------------------------------------
72// Bridge outcome — what the dispatch returns back to the caller.
73// ---------------------------------------------------------------------------
74
75/// Outcome of a bridged tool dispatch.
76///
77/// The shape matches the existing CLI surface where
78/// `PlaybookRunner::execute_tool` returns `Result<Option<String>>`:
79/// `result == Some(s)` for a successful tool execution that produced
80/// output the runner stores in `step_results[step].result`; `None`
81/// for tools that do not produce a per-step string result (e.g.
82/// fire-and-forget sinks).
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct BridgeOutcome {
85 pub result: Option<String>,
86}
87
88impl BridgeOutcome {
89 pub fn empty() -> Self {
90 Self { result: None }
91 }
92}
93
94// ---------------------------------------------------------------------------
95// Bridge context — what the dispatch needs from the caller.
96// ---------------------------------------------------------------------------
97
98/// Per-call context for the bridge. Groups together what would
99/// otherwise be many parameters threaded through every dispatch site.
100///
101/// The CLI's `ExecutionContext` (`repos/cli/src/playbook_runner.rs`)
102/// has a different shape than [`ToolsExecutionContext`] — the CLI
103/// uses `HashMap<String, String>` for variables and tracks step
104/// results separately; `noetl-tools` uses `HashMap<String,
105/// serde_json::Value>` and bundles many more execution-level fields
106/// (server_url, worker_id, command_id, etc.).
107///
108/// `BridgeContext` is the narrow view the CLI hands to the bridge;
109/// [`to_tools_context`] expands it into the full
110/// [`ToolsExecutionContext`] shape.
111pub struct BridgeContext<'a> {
112 /// Execution id — required by [`ToolsExecutionContext`]. CLI
113 /// local mode synthesises this from the start time / playbook
114 /// path; the worker uses the snowflake id from `noetl.command`.
115 pub execution_id: i64,
116
117 /// Step name the bridged tool is running under.
118 pub step: &'a str,
119
120 /// CLI variables map (workload.*, vars.*, <step>.result, etc.).
121 pub variables: &'a HashMap<String, String>,
122
123 /// Control-plane server URL. Empty string when running in
124 /// CLI local mode without a server backend.
125 pub server_url: String,
126
127 /// Worker id / command id — `None` in CLI local mode.
128 pub worker_id: Option<String>,
129 pub command_id: Option<String>,
130}
131
132// ---------------------------------------------------------------------------
133// Adapters
134// ---------------------------------------------------------------------------
135
136/// Convert a [`BridgeContext`] into the [`ToolsExecutionContext`]
137/// shape `noetl-tools` tools expect. String variables become
138/// [`serde_json::Value::String`] entries; secrets stay empty (CLI
139/// local mode resolves credentials at the credential-resolver layer,
140/// not at tool dispatch).
141///
142/// Variable shape: **flat**. Each CLI variable `workload.region`
143/// becomes a JSON value at the same flat key in the resulting map.
144/// This matches what most `noetl-tools` tools (http / postgres / etc.)
145/// expect from their template engine. The rhai tool needs a
146/// *nested* shape so `workload.region` is reachable as a Rhai field
147/// access on a `workload` map; see [`to_tools_context_for_rhai`] for
148/// the restructured variant used inside the rhai dispatch arm.
149pub fn to_tools_context(bridge: &BridgeContext) -> ToolsExecutionContext {
150 let variables: HashMap<String, serde_json::Value> = bridge
151 .variables
152 .iter()
153 .map(|(k, v)| (k.clone(), serde_json::Value::String(v.clone())))
154 .collect();
155
156 ToolsExecutionContext {
157 execution_id: bridge.execution_id,
158 step: bridge.step.to_string(),
159 variables,
160 server_url: bridge.server_url.clone(),
161 worker_id: bridge.worker_id.clone(),
162 command_id: bridge.command_id.clone(),
163 ..ToolsExecutionContext::default()
164 }
165}
166
167/// Build a [`ToolsExecutionContext`] whose `variables` map matches the
168/// scope shape the CLI's inline `execute_rhai_script` produced — flat
169/// `workload.region` / `vars.x` / `<step>.<field>` keys grouped into
170/// nested objects so Rhai's `workload.region` / `vars.x` / `<step>.<field>`
171/// field-access syntax works.
172///
173/// PR-2c-3 introduces this for the rhai dispatch arm. Other tool
174/// kinds (http, postgres, duckdb, etc.) continue to consume the flat
175/// shape from [`to_tools_context`] because their template engines
176/// expect the `{{workload.region}}` lookup style, not Rhai-style
177/// field navigation.
178pub fn to_tools_context_for_rhai(bridge: &BridgeContext) -> ToolsExecutionContext {
179 let mut variables: HashMap<String, serde_json::Value> = HashMap::new();
180 let mut workload_map: serde_json::Map<String, serde_json::Value> = serde_json::Map::new();
181 let mut vars_map: serde_json::Map<String, serde_json::Value> = serde_json::Map::new();
182 let mut step_maps: HashMap<String, serde_json::Map<String, serde_json::Value>> =
183 HashMap::new();
184
185 for (key, value) in bridge.variables {
186 let val = serde_json::Value::String(value.clone());
187 if let Some(suffix) = key.strip_prefix("workload.") {
188 workload_map.insert(suffix.to_string(), val);
189 } else if let Some(suffix) = key.strip_prefix("vars.") {
190 vars_map.insert(suffix.to_string(), val);
191 } else if let Some((step, field)) = key.split_once('.') {
192 step_maps
193 .entry(step.to_string())
194 .or_default()
195 .insert(field.to_string(), val);
196 } else {
197 // Unprefixed keys land at the top level — same shape as
198 // [`to_tools_context`].
199 variables.insert(key.clone(), val);
200 }
201 }
202
203 if !workload_map.is_empty() {
204 variables.insert(
205 "workload".to_string(),
206 serde_json::Value::Object(workload_map),
207 );
208 }
209 if !vars_map.is_empty() {
210 variables.insert("vars".to_string(), serde_json::Value::Object(vars_map));
211 }
212 for (step, map) in step_maps {
213 variables.insert(step, serde_json::Value::Object(map));
214 }
215
216 ToolsExecutionContext {
217 execution_id: bridge.execution_id,
218 step: bridge.step.to_string(),
219 variables,
220 server_url: bridge.server_url.clone(),
221 worker_id: bridge.worker_id.clone(),
222 command_id: bridge.command_id.clone(),
223 ..ToolsExecutionContext::default()
224 }
225}
226
227/// Build a [`ToolConfig`] from a CLI [`Tool`] enum variant.
228///
229/// The `kind` string matches what [`noetl_tools::registry::ToolRegistry`]
230/// uses for dispatch. The `config` payload is the variant's fields
231/// serialized as JSON; the receiving tool deserializes its own
232/// expected schema from this value (e.g. `noetl_tools::tools::shell`
233/// expects `{"cmds": [...]}`).
234///
235/// `Tool::Unsupported` returns a `ToolConfig` with `kind: "unsupported"`
236/// — dispatch will fail at registry lookup, which matches the CLI's
237/// current behaviour of emitting an error.
238pub fn to_tools_config(tool: &Tool) -> ToolConfig {
239 let (kind, config) = match tool {
240 Tool::Shell { cmds } => {
241 // noetl-tools::ShellConfig expects a single `command`
242 // string. CLI's CmdsList::Multiple becomes a newline-
243 // joined block (one bash invocation with a multi-line
244 // script); CmdsList::Single becomes the string verbatim.
245 //
246 // Important: this is the per-call ToolConfig shape. The
247 // Tool::Shell arm of `dispatch_via_registry` does NOT use
248 // this helper because the CLI's runtime semantics require
249 // one bash invocation PER command (independent process,
250 // no shared cwd/env state) — the dispatch arm loops and
251 // builds per-command ToolConfigs via [`shell_command_config`].
252 (
253 "shell",
254 serde_json::json!({
255 "command": match cmds {
256 CmdsList::Single(s) => s.clone(),
257 CmdsList::Multiple(v) => v.join("\n"),
258 },
259 "shell": "bash",
260 "capture": true,
261 }),
262 )
263 }
264 Tool::Http {
265 method,
266 url,
267 headers,
268 params,
269 body,
270 auth: _, // resolved at dispatch time into a Bearer header; not threaded through ToolConfig.auth (see PR-2c-5)
271 } => (
272 "http",
273 // noetl-tools' HttpConfig deserializes the method via
274 // `#[serde(rename_all = "UPPERCASE")]`, so we emit the
275 // uppercased CLI string here. The body is wrapped as a
276 // JSON Value: if the CLI's body parses as JSON we pass the
277 // parsed Value (so reqwest serialises it as JSON with the
278 // right Content-Type); otherwise we pass it as a JSON
279 // string which noetl-tools sends verbatim as the body.
280 serde_json::json!({
281 "method": method.to_uppercase(),
282 "url": url,
283 "headers": headers,
284 "params": params,
285 "body": body.as_deref().map(http_body_value),
286 }),
287 ),
288 Tool::Playbook { path, args, input } => (
289 "playbook",
290 serde_json::json!({
291 "path": path,
292 "args": args,
293 "input": input,
294 }),
295 ),
296 Tool::DuckDb { db, query, params } => (
297 // noetl-tools' DuckdbConfig schema uses `db_path` (not
298 // `db`), `query` is required (so we substitute an empty
299 // string when the CLI doesn't carry one — the dispatch
300 // arm short-circuits in that case), and params are
301 // `Vec<serde_json::Value>` rather than `Vec<String>`.
302 // Conversion is faithful: a CLI string param becomes a
303 // JSON string value bound at the `?` placeholder by
304 // noetl-tools' DuckdbTool.
305 //
306 // Compatibility note: the CLI's pre-PR-2c-6
307 // `execute_duckdb_query` accepted but **ignored** the
308 // `params` field (signature was `_params: &[String]`).
309 // The bridge now binds them, which is a feature gain
310 // documented in the PR body and on the executor-crate-
311 // architecture wiki page.
312 "duckdb",
313 serde_json::json!({
314 "db_path": db,
315 "query": query.clone().unwrap_or_default(),
316 "params": params
317 .iter()
318 .map(|p| serde_json::Value::String(p.clone()))
319 .collect::<Vec<_>>(),
320 "as_objects": true,
321 }),
322 ),
323 Tool::Rhai { code, args } => (
324 "rhai",
325 serde_json::json!({
326 "code": code,
327 "args": args,
328 }),
329 ),
330 Tool::Auth { provider, scopes, project } => (
331 "auth",
332 serde_json::json!({
333 "provider": provider,
334 "scopes": scopes,
335 "project": project,
336 }),
337 ),
338 Tool::Sink { target, format } => (
339 "sink",
340 serde_json::json!({
341 "target": target_to_value(target),
342 "format": format!("{:?}", format).to_lowercase(),
343 }),
344 ),
345 Tool::Unsupported => ("unsupported", serde_json::json!({})),
346 };
347
348 ToolConfig {
349 kind: kind.to_string(),
350 config,
351 timeout: None,
352 retry: None,
353 auth: None,
354 }
355}
356
357/// Build a single-command ToolConfig for the shell tool. Used by
358/// the `Tool::Shell` dispatch arm to preserve the CLI's per-command
359/// bash-invocation semantics (independent process, no shared
360/// cwd/env state across commands).
361fn shell_command_config(command: &str) -> ToolConfig {
362 ToolConfig {
363 kind: "shell".to_string(),
364 config: serde_json::json!({
365 "command": command,
366 "shell": "bash",
367 "capture": true,
368 }),
369 timeout: None,
370 retry: None,
371 auth: None,
372 }
373}
374
375/// Convert a CLI HTTP body string into a JSON [`serde_json::Value`]
376/// suitable for noetl-tools' `HttpConfig.body` field. If the body
377/// parses as JSON, the parsed value is returned (and `reqwest` sends
378/// it with `Content-Type: application/json`). Otherwise the body
379/// is wrapped as a [`Value::String`] which `reqwest` writes
380/// verbatim as the request body.
381fn http_body_value(body: &str) -> serde_json::Value {
382 serde_json::from_str(body).unwrap_or_else(|_| serde_json::Value::String(body.to_string()))
383}
384
385/// Resolve a CLI [`AuthConfig`] to a Bearer token using noetl-tools'
386/// [`GcpAuth`] provider.
387///
388/// CLI providers `"gcp"`, `"google"`, and `"adc"` all map to GCP
389/// Application Default Credentials. Any other provider value
390/// returns an error matching the CLI's pre-PR-2c-5 behaviour.
391///
392/// This replaces the CLI's inline `get_auth_token` (which shelled
393/// out to `gcloud auth print-access-token`). See semantic
394/// divergence row on the executor-crate-architecture wiki page.
395pub async fn resolve_auth_to_bearer(cfg: &CliAuthConfig) -> Result<String> {
396 match cfg.provider.as_str() {
397 "gcp" | "google" | "adc" => {
398 let gcp = GcpAuth::new();
399 let scopes: Vec<&str> = cfg.scopes.iter().map(|s| s.as_str()).collect();
400 let token = if scopes.is_empty() {
401 gcp.get_default_token()
402 .await
403 .map_err(|e| anyhow::anyhow!("failed to get GCP access token: {}", e))?
404 } else {
405 gcp.get_token(&scopes)
406 .await
407 .map_err(|e| anyhow::anyhow!("failed to get GCP access token: {}", e))?
408 };
409 Ok(token)
410 }
411 other => anyhow::bail!(
412 "unsupported auth provider: {}. Supported: gcp, google, adc",
413 other
414 ),
415 }
416}
417
418/// Build the noetl-tools [`ToolConfig`] for an HTTP request.
419///
420/// Identical to the [`to_tools_config`] `Tool::Http` arm but pulled
421/// out so the dispatch arm can also inject an `Authorization:
422/// Bearer <token>` header when a CLI `AuthConfig` is present
423/// (resolved via [`resolve_auth_to_bearer`]).
424///
425/// CLI's `auth` is intentionally NOT mapped to noetl-tools'
426/// `ToolConfig.auth` field: that field expects an `AuthConfig` with
427/// `credential` / `token` lookup against `ExecutionContext.secrets`,
428/// which CLI local mode does not populate. Pre-resolving the
429/// token and injecting it as a header keeps the CLI's existing
430/// authority semantics (the CLI process's gcloud / ADC chain) and
431/// avoids reshaping the credential resolver path.
432fn http_tool_config(
433 method: &str,
434 url: &str,
435 headers: &HashMap<String, String>,
436 params: &HashMap<String, String>,
437 body: Option<&str>,
438 bearer: Option<&str>,
439) -> ToolConfig {
440 let mut merged_headers = headers.clone();
441 if let Some(token) = bearer {
442 merged_headers.insert(
443 "Authorization".to_string(),
444 format!("Bearer {}", token),
445 );
446 }
447 ToolConfig {
448 kind: "http".to_string(),
449 config: serde_json::json!({
450 "method": method.to_uppercase(),
451 "url": url,
452 "headers": merged_headers,
453 "params": params,
454 "body": body.map(http_body_value),
455 }),
456 timeout: None,
457 retry: None,
458 auth: None,
459 }
460}
461
462/// Reshape noetl-tools' HTTP result envelope back to the CLI's
463/// pre-PR-2c-5 shape.
464///
465/// noetl-tools' HttpTool always packs `data: {"status_code":
466/// u16, "headers": {...}, "body": <json>}` into the ToolResult,
467/// regardless of whether the HTTP response was 2xx (Success) or
468/// 4xx/5xx (Error). The CLI's `execute_http_request` returned the
469/// envelope `{"status": <int>, "body": <json>}` for ALL HTTP
470/// responses (including 4xx/5xx) so playbook steps could branch on
471/// the status code. We preserve that contract here: only network-
472/// transport failures bubble up as `anyhow::Error`; HTTP error
473/// statuses come back inside the JSON envelope.
474fn reshape_http_result(result: ToolResult) -> Result<BridgeOutcome> {
475 if let Some(data) = result.data {
476 let status_code = data
477 .get("status_code")
478 .and_then(|v| v.as_u64())
479 .unwrap_or(0) as i32;
480 let body = data
481 .get("body")
482 .cloned()
483 .unwrap_or(serde_json::Value::Null);
484 let envelope = serde_json::json!({
485 "status": status_code,
486 "body": body,
487 });
488 return Ok(BridgeOutcome {
489 result: Some(envelope.to_string()),
490 });
491 }
492 // No data — fall back to the generic from_tools_result path so
493 // we surface whatever error / stdout the tool emitted.
494 from_tools_result(result)
495}
496
497/// Build a [`ToolConfig`] for a DuckDB query.
498///
499/// Used by the `Tool::DuckDb` dispatch arm. Path resolution
500/// (playbook-relative vs absolute) and `mkdir -p` of the parent
501/// directory are handled at the CLI call site BEFORE the bridge is
502/// invoked, so this helper receives an already-resolved absolute
503/// path string (or `:memory:` for in-memory mode).
504fn duckdb_tool_config(
505 db_path: &str,
506 query: &str,
507 params: &[String],
508) -> ToolConfig {
509 ToolConfig {
510 kind: "duckdb".to_string(),
511 config: serde_json::json!({
512 "db_path": db_path,
513 "query": query,
514 "params": params
515 .iter()
516 .map(|p| serde_json::Value::String(p.clone()))
517 .collect::<Vec<_>>(),
518 // CLI's pre-PR-2c-6 SELECT result shape was an array of
519 // JSON objects keyed by column name; `as_objects: true`
520 // matches that. `reshape_duckdb_result` then unwraps
521 // the noetl-tools envelope back to the raw array.
522 "as_objects": true,
523 }),
524 timeout: None,
525 retry: None,
526 auth: None,
527 }
528}
529
530/// Reshape noetl-tools' DuckDB result envelope back to the CLI's
531/// pre-PR-2c-6 shape.
532///
533/// noetl-tools' DuckdbTool returns:
534/// - SELECT / WITH: `data: {"columns": [...], "rows": [{...}, ...],
535/// "row_count": N}`
536/// - non-SELECT: `data: {"affected_rows": N}`
537///
538/// The CLI's `execute_duckdb_query` returned:
539/// - SELECT / WITH: a JSON array of objects (pretty-printed)
540/// - non-SELECT: the literal string `{"status": "ok"}`
541///
542/// `reshape_duckdb_result` maps the former onto the latter so
543/// playbook steps that read `<step>.result[0].col_name` keep
544/// working. `affected_rows` from the noetl-tools envelope is
545/// dropped on purpose — the CLI never exposed it.
546fn reshape_duckdb_result(result: ToolResult) -> Result<BridgeOutcome> {
547 let data = match result.data {
548 Some(d) => d,
549 None => return from_tools_result(result),
550 };
551
552 if let Some(rows) = data.get("rows").and_then(|v| v.as_array()) {
553 // SELECT path. Return the rows array as a pretty-printed
554 // JSON string — matches the CLI's
555 // `serde_json::to_string_pretty(&results)`.
556 let pretty = serde_json::to_string_pretty(rows)?;
557 return Ok(BridgeOutcome { result: Some(pretty) });
558 }
559
560 if data.get("affected_rows").is_some() {
561 // Non-SELECT path. CLI emitted the literal `{"status":
562 // "ok"}` here; preserve that.
563 return Ok(BridgeOutcome {
564 result: Some(r#"{"status": "ok"}"#.to_string()),
565 });
566 }
567
568 // Unknown shape — fall back to the generic from_tools_result
569 // path so we still surface whatever the tool emitted.
570 from_tools_result(ToolResult {
571 status: result.status,
572 data: Some(data),
573 error: result.error,
574 stdout: result.stdout,
575 stderr: result.stderr,
576 exit_code: result.exit_code,
577 duration_ms: result.duration_ms,
578 // noetl-tools 2.21 added this marker field; the executor
579 // bridge has nothing to attach here (DuckDB doesn't dispatch
580 // async work), so it always falls through as `None`.
581 pending_callback: result.pending_callback,
582 })
583}
584
585/// Prepare the variable map for a sub-playbook invocation.
586///
587/// Used by the CLI's `Tool::Playbook` arm (which keeps owning the
588/// tree-walker recursion per § H.10). The helper merges the
589/// parent context's variables with the sub-playbook's
590/// `input:` (DSL v2) or `args:` (DSL v1 legacy), each rendered
591/// against the parent context via the caller-supplied
592/// `render_template` closure and prefixed with `workload.` to
593/// match the sub-playbook's expected variable shape.
594///
595/// `input` takes precedence over `args` when both are present —
596/// same precedence the CLI's pre-PR-2c-7 inline implementation
597/// applied.
598///
599/// `parent_vars`, `args`, and `input` correspond directly to the
600/// caller's `context.variables`, `Tool::Playbook.args`, and
601/// `Tool::Playbook.input` fields. The `render` closure receives
602/// each template string and is expected to return the rendered
603/// value (the CLI passes `|t| self.render_template(t, context)`).
604///
605/// Returning a fresh `HashMap` rather than mutating in place makes
606/// the helper easy to test and matches how the inline
607/// implementation operated.
608pub fn prepare_sub_playbook_vars<F>(
609 parent_vars: &HashMap<String, String>,
610 args: &HashMap<String, String>,
611 input: &HashMap<String, serde_yaml::Value>,
612 mut render: F,
613) -> Result<HashMap<String, String>>
614where
615 F: FnMut(&str) -> Result<String>,
616{
617 let mut sub_vars = parent_vars.clone();
618
619 if !input.is_empty() {
620 // DSL v2: tool.input takes precedence — render and prefix
621 // with `workload.`.
622 for (key, value_yaml) in input {
623 let template = match value_yaml {
624 serde_yaml::Value::String(s) => s.clone(),
625 serde_yaml::Value::Number(n) => n.to_string(),
626 serde_yaml::Value::Bool(b) => b.to_string(),
627 other => serde_yaml::to_string(other)?.trim().to_string(),
628 };
629 let value = render(&template)?;
630 sub_vars.insert(format!("workload.{}", key), value);
631 }
632 } else if !args.is_empty() {
633 // DSL v1 legacy: args field — prefix with `workload.`.
634 for (key, template) in args {
635 let value = render(template)?;
636 sub_vars.insert(format!("workload.{}", key), value);
637 }
638 }
639
640 Ok(sub_vars)
641}
642
643/// Apply post-resolution `Tool::Auth` side-effects to the CLI's
644/// execution context.
645///
646/// Returns the (key, value) pairs the caller should
647/// `set_variable` on its `ExecutionContext` so subsequent steps
648/// can reference `{{ auth.token }}` etc. Wrapping this in a
649/// helper means future call sites (the worker, integration tests)
650/// don't have to re-derive which keys to set.
651///
652/// `project` is the **already-rendered** project string (the CLI
653/// renders templates against its own context before calling this
654/// helper), or `None` if the playbook didn't supply one.
655///
656/// Output order:
657/// - `auth.project` (only if `project` is `Some` and non-empty)
658/// - `auth.token`
659/// - `auth.provider`
660///
661/// Matching the CLI's pre-PR-2c-8 ordering — `auth.project` set
662/// first by the inline arm, then the token + provider after the
663/// `resolve_auth_to_bearer` call.
664pub fn auth_context_updates(
665 provider: &str,
666 token: &str,
667 project: Option<&str>,
668) -> Vec<(String, String)> {
669 let mut updates: Vec<(String, String)> = Vec::with_capacity(3);
670 if let Some(p) = project {
671 if !p.is_empty() {
672 updates.push(("auth.project".to_string(), p.to_string()));
673 }
674 }
675 updates.push(("auth.token".to_string(), token.to_string()));
676 updates.push(("auth.provider".to_string(), provider.to_string()));
677 updates
678}
679
680/// Format the payload a `Tool::Sink` writes to its target.
681///
682/// Pure transformation lifted from the CLI's inline
683/// `Tool::Sink` arm. The CLI passes the last step's result
684/// (already a JSON-serialized string in `ExecutionContext`) and
685/// the playbook's declared `format:` field; the helper returns
686/// the formatted string ready to write to file / DuckDB / GCS.
687///
688/// Format rules:
689/// - [`SinkFormat::Json`]: pass-through. Same as CLI's
690/// pre-PR-2c-8 behaviour (the raw step-result string).
691/// - [`SinkFormat::Yaml`]: parse the input as JSON, then dump as
692/// YAML. Falls back to pass-through if the input doesn't parse.
693/// - [`SinkFormat::Csv`]: see [`json_to_csv`] for the rules.
694pub fn format_sink_payload(format: &SinkFormat, raw: &str) -> Result<String> {
695 match format {
696 SinkFormat::Json => Ok(raw.to_string()),
697 SinkFormat::Yaml => {
698 if let Ok(json_val) = serde_json::from_str::<serde_json::Value>(raw) {
699 Ok(serde_yaml::to_string(&json_val).unwrap_or_else(|_| raw.to_string()))
700 } else {
701 Ok(raw.to_string())
702 }
703 }
704 SinkFormat::Csv => json_to_csv(raw),
705 }
706}
707
708/// Convert a JSON-array-of-objects string into CSV.
709///
710/// Pure helper lifted from the CLI's inline `json_to_csv`. Returns
711/// the input unchanged if:
712/// - it doesn't parse as JSON,
713/// - it parses as a non-array value, or
714/// - it's an empty array, or
715/// - the first element isn't a JSON object.
716///
717/// Otherwise: emits a header row from the first object's keys
718/// followed by one row per array element. Values are converted
719/// via `Display`; strings that contain `,` or `"` are
720/// double-quoted with embedded `"` doubled — minimal RFC 4180
721/// quoting, matching the CLI's pre-PR-2c-8 implementation.
722pub fn json_to_csv(json_str: &str) -> Result<String> {
723 let value: serde_json::Value =
724 serde_json::from_str(json_str).unwrap_or(serde_json::Value::String(json_str.to_string()));
725
726 match value {
727 serde_json::Value::Array(arr) if !arr.is_empty() => {
728 let headers: Vec<String> = if let Some(serde_json::Value::Object(obj)) = arr.first() {
729 obj.keys().cloned().collect()
730 } else {
731 return Ok(json_str.to_string());
732 };
733
734 let mut csv = headers.join(",") + "\n";
735
736 for item in &arr {
737 if let serde_json::Value::Object(obj) = item {
738 let row: Vec<String> = headers
739 .iter()
740 .map(|h| {
741 obj.get(h)
742 .map(|v| match v {
743 serde_json::Value::String(s) => {
744 if s.contains(',') || s.contains('"') {
745 format!("\"{}\"", s.replace('"', "\"\""))
746 } else {
747 s.clone()
748 }
749 }
750 _ => v.to_string(),
751 })
752 .unwrap_or_default()
753 })
754 .collect();
755 csv.push_str(&row.join(","));
756 csv.push('\n');
757 }
758 }
759 Ok(csv)
760 }
761 _ => Ok(json_str.to_string()),
762 }
763}
764
765// ---------------------------------------------------------------------------
766// GCS upload helper (R-3, noetl/ai-meta#31)
767// ---------------------------------------------------------------------------
768
769/// Upload `data` to `gs://<bucket>/<key>` using the `object_store` crate.
770///
771/// # Credential chain
772///
773/// Authentication defaults to the same Application Default Credentials
774/// (ADC) / workload-identity chain that [`resolve_auth_to_bearer`] uses
775/// via `gcp_auth`. Concretely: `GoogleCloudStorageBuilder::from_env()`
776/// reads (in priority order):
777///
778/// 1. `GOOGLE_SERVICE_ACCOUNT_KEY` env var (JSON service-account key
779/// inline — useful for CI / test containers).
780/// 2. `GOOGLE_SERVICE_ACCOUNT` env var (path to a JSON key file).
781/// 3. The ambient Application Default Credentials
782/// (`~/.config/gcloud/application_default_credentials.json` on dev
783/// hosts; the GKE metadata server on cluster pods).
784///
785/// This matches GKE workload-identity on cluster and `gcloud auth
786/// application-default login` on dev hosts — the same two paths the
787/// former `gsutil cp` subprocess relied on.
788///
789/// # Error shape
790///
791/// Returns `anyhow::Error` with a human-readable message on failure
792/// (instead of a gsutil exit-code string). The CLI's `sink_to_gcs`
793/// wrapper maps this through the usual `?` chain.
794///
795/// # Observability
796///
797/// Wraps the upload in a `gcs.upload` tracing span that carries
798/// `bucket`, `key`, and `bytes` fields so the span is grep-able in
799/// structured logs. Upload duration is emitted as a debug-level event
800/// (`gcs.upload.duration_ms`) so tooling can aggregate latency without
801/// a Prometheus registry in the executor crate. A future PR can
802/// promote this to a proper histogram once the executor crate grows a
803/// metrics registry.
804///
805/// # Pluggable store (testing)
806///
807/// The `store` parameter is `Arc<dyn ObjectStore>`. Production callers
808/// pass `None` (the default GCS store is built from env); integration
809/// tests inject `Arc<object_store::memory::InMemory::new()>` to avoid
810/// real GCS calls. See `gcs_upload_with_store` for the inner
811/// implementation that both paths share.
812pub async fn gcs_upload(bucket: &str, key: &str, data: &str) -> Result<()> {
813 use object_store::gcp::GoogleCloudStorageBuilder;
814
815 let store = GoogleCloudStorageBuilder::from_env()
816 .with_bucket_name(bucket)
817 .build()
818 .map_err(|e| anyhow::anyhow!("failed to build GCS store for bucket {:?}: {}", bucket, e))?;
819
820 gcs_upload_with_store(Arc::new(store), key, data).await
821}
822
823/// Inner upload path shared by production and test callers.
824///
825/// Production: called by [`gcs_upload`] with a real
826/// `GoogleCloudStorage` store.
827/// Tests: called directly with `Arc<InMemory>` — no GCS dependency.
828pub async fn gcs_upload_with_store(
829 store: Arc<dyn ObjectStore>,
830 key: &str,
831 data: &str,
832) -> Result<()> {
833 let bytes = Bytes::from(data.to_string());
834 let byte_len = bytes.len();
835 let path = StorePath::from(key);
836
837 let span = info_span!(
838 "gcs.upload",
839 key = key,
840 bytes = byte_len,
841 );
842
843 async move {
844 let start = Instant::now();
845
846 store
847 .put(&path, PutPayload::from_bytes(bytes))
848 .await
849 .map_err(|e| anyhow::anyhow!("GCS upload failed for key {:?}: {}", key, e))?;
850
851 let elapsed_ms = start.elapsed().as_millis();
852 tracing::debug!(
853 target: "noetl::gcs",
854 duration_ms = elapsed_ms,
855 key = key,
856 bytes = byte_len,
857 "gcs.upload complete"
858 );
859
860 Ok(())
861 }
862 .instrument(span)
863 .await
864}
865
866fn target_to_value(target: &crate::playbook::SinkTarget) -> serde_json::Value {
867 match target {
868 crate::playbook::SinkTarget::File { path } => {
869 serde_json::json!({"type": "file", "path": path})
870 }
871 crate::playbook::SinkTarget::DuckDb { db, table } => {
872 serde_json::json!({"type": "duckdb", "db": db, "table": table})
873 }
874 crate::playbook::SinkTarget::Gcs { bucket, path } => {
875 serde_json::json!({"type": "gcs", "bucket": bucket, "path": path})
876 }
877 }
878}
879
880/// Convert a [`ToolResult`] back into the bridge outcome shape the
881/// CLI consumes. Success results carry `data` (or `stdout` if no
882/// `data` was populated) as the result string; failures bubble up
883/// as `anyhow::Error` so the CLI's existing error-handling chain
884/// continues to work.
885pub fn from_tools_result(result: ToolResult) -> Result<BridgeOutcome> {
886 match result.status {
887 ToolStatus::Success => {
888 let payload = result
889 .data
890 .map(|v| match v {
891 serde_json::Value::String(s) => s,
892 other => other.to_string(),
893 })
894 .or(result.stdout);
895 Ok(BridgeOutcome { result: payload })
896 }
897 ToolStatus::Error => Err(anyhow::anyhow!(
898 "tool execution failed: {}",
899 result.error.unwrap_or_else(|| "unknown error".to_string())
900 )),
901 ToolStatus::Timeout => Err(anyhow::anyhow!(
902 "tool execution timed out after {} ms",
903 result.duration_ms.unwrap_or(0)
904 )),
905 }
906}
907
908// ---------------------------------------------------------------------------
909// Dispatch — per-tool-kind match scaffold.
910// ---------------------------------------------------------------------------
911
912/// Bridge dispatch entry point. Each tool kind is replaced
913/// incrementally in subsequent sub-PRs (PR-2c-3 onwards).
914///
915/// The function is async because every concrete `noetl-tools` tool
916/// implementation is async (`Tool::execute` is `async`). The CLI
917/// adapts via `tokio::runtime::Handle::current().block_on(...)` if
918/// the call site is sync — see PR-2c-3's wiring for the pattern.
919pub async fn dispatch_via_registry(
920 tool: &Tool,
921 bridge: &BridgeContext<'_>,
922) -> Result<BridgeOutcome> {
923 let _config = to_tools_config(tool);
924 let _ctx = to_tools_context(bridge);
925
926 match tool {
927 Tool::Rhai { .. } => {
928 // PR-2c-3: first real tool replacement. Builds a
929 // RhaiTool from noetl-tools, dispatches against the
930 // adapter-converted config + context, and converts the
931 // result back through `from_tools_result`.
932 //
933 // Semantic note documented in the PR body: noetl-tools'
934 // `timestamp()` returns the Unix epoch as a string
935 // (e.g. "1716847425"), whereas the CLI's inline
936 // implementation returned `chrono::Local::now()
937 // .format("%H:%M:%S")` (e.g. "14:23:45"). Other
938 // helpers (log, print, parse_json, contains, http_*,
939 // get_gcp_token, sleep, sleep_ms) match.
940 let rhai_tool = RhaiTool::new();
941 let config = to_tools_config(tool);
942 // rhai needs a nested variable shape so
943 // `workload.region` is a Rhai field-access expression.
944 let ctx = to_tools_context_for_rhai(bridge);
945 let result = rhai_tool
946 .execute(&config, &ctx)
947 .await
948 .map_err(|e| anyhow::anyhow!("rhai dispatch failed: {}", e))?;
949 from_tools_result(result)
950 }
951 Tool::Shell { cmds } => {
952 // PR-2c-4: dispatch through noetl_tools::ShellTool.
953 //
954 // CLI semantics preserved:
955 // - CmdsList::Single splits on newlines into individual
956 // commands; each runs in its own bash invocation.
957 // - CmdsList::Multiple runs each element in its own
958 // bash invocation in order.
959 // - Bails on first non-zero exit (CLI's existing
960 // `anyhow::bail!("Command failed ...")`).
961 // - Returns the last command's stdout as the step result.
962 //
963 // Note vs CLI: noetl-tools' ShellTool collects stdout +
964 // stderr and returns them in the ToolResult at the end
965 // of execution. The CLI's inline implementation
966 // streamed output to the terminal line-by-line as the
967 // command ran. For long-running shell steps users no
968 // longer see real-time output. Documented in the PR
969 // body and on the executor-crate-architecture wiki
970 // page's semantic-divergence table.
971 let commands: Vec<String> = match cmds {
972 CmdsList::Single(cmd) => cmd
973 .lines()
974 .map(|s| s.trim())
975 .filter(|s| !s.is_empty())
976 .map(|s| s.to_string())
977 .collect(),
978 CmdsList::Multiple(c) => c.clone(),
979 };
980
981 let shell_tool = ShellTool::new();
982 let ctx = to_tools_context(bridge);
983 let mut last_outcome = BridgeOutcome::empty();
984 for command in commands {
985 let config = shell_command_config(&command);
986 let result = shell_tool
987 .execute(&config, &ctx)
988 .await
989 .map_err(|e| anyhow::anyhow!("shell dispatch failed: {}", e))?;
990
991 // noetl-tools' shell tool packs the result into
992 // ToolResult.data as a typed JSON object:
993 // {"exit_code": i32, "stdout": String, "stderr": String}
994 // For the CLI's step-result contract (a single
995 // string = the command's stdout), we unwrap stdout
996 // directly here. `from_tools_result` would
997 // otherwise stringify the whole JSON dict.
998 if result.status != ToolStatus::Success {
999 let exit_code = result
1000 .data
1001 .as_ref()
1002 .and_then(|d| d.get("exit_code"))
1003 .and_then(|v| v.as_i64());
1004 anyhow::bail!(
1005 "Command failed with exit code: {:?}",
1006 exit_code
1007 );
1008 }
1009 let stdout = result
1010 .data
1011 .as_ref()
1012 .and_then(|d| d.get("stdout"))
1013 .and_then(|v| v.as_str())
1014 .map(|s| s.trim_end_matches('\n').to_string());
1015 last_outcome = BridgeOutcome { result: stdout };
1016 }
1017 Ok(last_outcome)
1018 }
1019 Tool::Http {
1020 method,
1021 url,
1022 headers,
1023 params,
1024 body,
1025 auth,
1026 } => {
1027 // PR-2c-5: dispatch through noetl_tools::HttpTool.
1028 //
1029 // CLI semantics preserved:
1030 // - Auth resolution via GCP ADC (gcp / google / adc).
1031 // - Step result is the JSON envelope
1032 // `{"status": <int>, "body": <json-or-string>}`
1033 // regardless of HTTP status code (so playbook steps
1034 // can branch on `<step>.body.status`).
1035 //
1036 // Semantic divergences (documented on the executor-crate-
1037 // architecture wiki page):
1038 // - HTTP transport: curl subprocess → reqwest direct.
1039 // - GCP token: `gcloud auth print-access-token` shellout
1040 // → `gcp_auth` crate (workload-identity aware on GKE).
1041 // - Body bytes: CLI sent the body string verbatim via
1042 // `curl -d`. noetl-tools serializes the body as JSON
1043 // when the string parses as JSON (adding Content-Type:
1044 // application/json automatically), otherwise sends it
1045 // verbatim. See `http_body_value`.
1046 let bearer = if let Some(auth_cfg) = auth {
1047 Some(resolve_auth_to_bearer(auth_cfg).await?)
1048 } else {
1049 None
1050 };
1051 let config = http_tool_config(
1052 method,
1053 url,
1054 headers,
1055 params,
1056 body.as_deref(),
1057 bearer.as_deref(),
1058 );
1059 let http_tool = HttpTool::new();
1060 let ctx = to_tools_context(bridge);
1061 let result = http_tool
1062 .execute(&config, &ctx)
1063 .await
1064 .map_err(|e| anyhow::anyhow!("http dispatch failed: {}", e))?;
1065 reshape_http_result(result)
1066 }
1067 Tool::DuckDb { db, query, params } => {
1068 // PR-2c-6: dispatch through noetl_tools::DuckdbTool.
1069 //
1070 // CLI semantics preserved:
1071 // - The CLI's call site already resolved playbook-
1072 // relative paths (`resolve_duckdb_path`) and ran
1073 // `mkdir -p` on the parent directory before invoking
1074 // the bridge, so `db` here is an absolute path
1075 // string ready to hand to DuckDB.
1076 // - SELECT / WITH queries return a JSON array of
1077 // objects (pretty-printed).
1078 // - Non-SELECT queries return the literal envelope
1079 // `{"status": "ok"}` (CLI never exposed
1080 // noetl-tools' `affected_rows`).
1081 // - Empty / missing query short-circuits to an empty
1082 // outcome, matching the CLI arm's
1083 // `if let Some(query_str) = query` guard.
1084 //
1085 // Feature gain: CLI's pre-PR-2c-6 inline impl took a
1086 // `_params: &[String]` and silently ignored it. The
1087 // bridge now binds those params as JSON values at
1088 // `?` placeholders. Playbooks that had a stale
1089 // `params:` list under a query without `?` placeholders
1090 // continue to work (DuckDB ignores extra params); any
1091 // playbook that *intended* the params would now see
1092 // them applied — documented in the PR body.
1093 let query = match query {
1094 Some(q) if !q.trim().is_empty() => q,
1095 _ => return Ok(BridgeOutcome::empty()),
1096 };
1097 let config = duckdb_tool_config(db, query, params);
1098 let duckdb_tool = DuckdbTool::new();
1099 let ctx = to_tools_context(bridge);
1100 let result = duckdb_tool
1101 .execute(&config, &ctx)
1102 .await
1103 .map_err(|e| anyhow::anyhow!("duckdb dispatch failed: {}", e))?;
1104 reshape_duckdb_result(result)
1105 }
1106 Tool::Playbook { .. } => {
1107 // PR-2c-7: encodes the § H.10 architectural finding.
1108 //
1109 // `Tool::Playbook` is the recursion case of the CLI's
1110 // tree walker — it loads a sub-playbook YAML and
1111 // dispatches it through the same `PlaybookRunner` the
1112 // top-level invocation uses. `PlaybookRunner` lives in
1113 // the CLI binary, not in `noetl-executor` or
1114 // `noetl-tools`, so routing this tool through the
1115 // bridge would require either:
1116 // - dragging the tree walker into `noetl-executor`,
1117 // re-opening the § H.10 question that re-scoped
1118 // the crate to a utilities-and-types crate; or
1119 // - adding a callback trait to `noetl-tools` that
1120 // delegates back to the CLI binary, an
1121 // infrastructure layer nothing else in the
1122 // registry uses.
1123 //
1124 // The architecturally honest answer is that this tool
1125 // kind is NOT bridgeable. The CLI's `Tool::Playbook`
1126 // arm stays inline by design. Bailing loudly here
1127 // ensures any future code that tries to dispatch
1128 // `Tool::Playbook` through the bridge gets an
1129 // immediate, descriptive error instead of a silent
1130 // empty outcome.
1131 //
1132 // Sub-playbook variable preparation (the input + args
1133 // merging logic the CLI's call site performs before
1134 // recursing) DOES move into the executor as
1135 // [`prepare_sub_playbook_vars`] — that part is reusable
1136 // and testable independent of the tree walker.
1137 anyhow::bail!(
1138 "Tool::Playbook is not bridgeable: sub-playbook \
1139 execution stays in the CLI's tree walker per \
1140 § H.10 of the Rust migration roadmap. Use \
1141 `PlaybookRunner::new(path).run()` directly from \
1142 the CLI."
1143 );
1144 }
1145 Tool::Auth { .. } => {
1146 // PR-2c-8: `Tool::Auth` does not dispatch through the
1147 // registry. Token resolution lives in
1148 // [`resolve_auth_to_bearer`] (added in PR-2c-5);
1149 // applying the resulting token to the CLI's
1150 // `ExecutionContext` lives in [`auth_context_updates`]
1151 // (added in PR-2c-8). Both are sync helpers the CLI
1152 // calls directly without going through dispatch. The
1153 // arm bails so any future code path that tries to
1154 // route a `Tool::Auth` through the registry gets a
1155 // clear, descriptive error instead of silently
1156 // returning an empty outcome.
1157 anyhow::bail!(
1158 "Tool::Auth is not bridge-dispatched: use \
1159 `resolve_auth_to_bearer` for token resolution and \
1160 `auth_context_updates` for applying the token to \
1161 the caller's execution context. See § H.10 of the \
1162 Rust migration roadmap."
1163 );
1164 }
1165 Tool::Sink { .. } => {
1166 // PR-2c-8: `Tool::Sink` does not dispatch through the
1167 // registry either. noetl-tools' `TransferTool` is
1168 // database-to-database only (snowflake / postgres /
1169 // duckdb / http source → snowflake / postgres /
1170 // duckdb target); it has no file / GCS / object-store
1171 // target. The CLI's three sink targets (File,
1172 // DuckDb, Gcs) each stay inline:
1173 //
1174 // - **File**: `fs::write` is a one-liner; the format
1175 // conversion (json / yaml / csv) DID extract into
1176 // [`format_sink_payload`] so it's reusable and
1177 // testable.
1178 // - **DuckDb**: complex `INSERT INTO ... SELECT FROM
1179 // read_json_auto(...)` with a single-object fallback;
1180 // no `noetl-tools` equivalent. Stays inline by
1181 // design (§ H.10-style finding).
1182 // - **Gcs**: gsutil shellout. A follow-up sub-PR
1183 // (tracked separately) will migrate this to the
1184 // `object_store` crate per § H.4 of Appendix H.
1185 //
1186 // The arm bails so misuse is loud.
1187 anyhow::bail!(
1188 "Tool::Sink is not bridge-dispatched: noetl-tools \
1189 has no file / GCS / object-store target. Use \
1190 `format_sink_payload` for format conversion; the \
1191 CLI's sink targets (file / duckdb / gcs) stay \
1192 inline per § H.10. GCS migration to `object_store` \
1193 is tracked as a separate follow-up."
1194 );
1195 }
1196 Tool::Unsupported => {
1197 anyhow::bail!("unsupported tool kind");
1198 }
1199 }
1200}
1201
1202// ---------------------------------------------------------------------------
1203// Tests
1204// ---------------------------------------------------------------------------
1205
1206#[cfg(test)]
1207mod tests {
1208 use super::*;
1209 use crate::playbook::{AuthConfig as CliAuthConfig, SinkFormat, SinkTarget};
1210
1211 fn empty_vars() -> HashMap<String, String> {
1212 HashMap::new()
1213 }
1214
1215 fn bridge_ctx<'a>(vars: &'a HashMap<String, String>) -> BridgeContext<'a> {
1216 BridgeContext {
1217 execution_id: 12345,
1218 step: "test_step",
1219 variables: vars,
1220 server_url: String::new(),
1221 worker_id: None,
1222 command_id: None,
1223 }
1224 }
1225
1226 #[test]
1227 fn to_tools_context_wraps_string_variables_as_json_value() {
1228 let vars: HashMap<String, String> =
1229 [("workload.region".into(), "us-west-1".into())].into();
1230 let ctx = to_tools_context(&bridge_ctx(&vars));
1231 assert_eq!(ctx.execution_id, 12345);
1232 assert_eq!(ctx.step, "test_step");
1233 assert_eq!(
1234 ctx.variables.get("workload.region"),
1235 Some(&serde_json::Value::String("us-west-1".into()))
1236 );
1237 assert!(ctx.secrets.is_empty(), "secrets stay empty by default");
1238 }
1239
1240 #[test]
1241 fn to_tools_config_shell_single_cmd() {
1242 let tool = Tool::Shell {
1243 cmds: CmdsList::Single("ls -la".into()),
1244 };
1245 let cfg = to_tools_config(&tool);
1246 assert_eq!(cfg.kind, "shell");
1247 assert_eq!(cfg.config["command"], "ls -la");
1248 assert_eq!(cfg.config["shell"], "bash");
1249 assert_eq!(cfg.config["capture"], true);
1250 assert!(cfg.timeout.is_none());
1251 }
1252
1253 #[test]
1254 fn to_tools_config_shell_multiple_cmds_joins_with_newlines() {
1255 // The to_tools_config helper produces a SINGLE-command shape
1256 // by joining; the dispatch arm instead loops per command to
1257 // preserve the CLI's "fresh bash per command" semantics.
1258 let tool = Tool::Shell {
1259 cmds: CmdsList::Multiple(vec!["echo one".into(), "echo two".into()]),
1260 };
1261 let cfg = to_tools_config(&tool);
1262 assert_eq!(cfg.kind, "shell");
1263 assert_eq!(cfg.config["command"], "echo one\necho two");
1264 }
1265
1266 #[test]
1267 fn shell_command_config_emits_per_cmd_shape() {
1268 let cfg = shell_command_config("echo hi");
1269 assert_eq!(cfg.kind, "shell");
1270 assert_eq!(cfg.config["command"], "echo hi");
1271 assert_eq!(cfg.config["shell"], "bash");
1272 assert_eq!(cfg.config["capture"], true);
1273 }
1274
1275 #[test]
1276 fn to_tools_config_http_round_trips_essentials() {
1277 let tool = Tool::Http {
1278 method: "post".into(), // lowercase to verify uppercasing
1279 url: "https://example.com/api".into(),
1280 headers: HashMap::new(),
1281 params: HashMap::new(),
1282 body: Some(r#"{"k":"v"}"#.into()),
1283 auth: None,
1284 };
1285 let cfg = to_tools_config(&tool);
1286 assert_eq!(cfg.kind, "http");
1287 // noetl-tools' HttpConfig.method deserializes via
1288 // #[serde(rename_all = "UPPERCASE")] so the bridge always
1289 // uppercases the CLI's method string.
1290 assert_eq!(cfg.config["method"], "POST");
1291 assert_eq!(cfg.config["url"], "https://example.com/api");
1292 // JSON bodies are parsed into a JSON Value so reqwest
1293 // serialises them with Content-Type: application/json.
1294 assert_eq!(cfg.config["body"], serde_json::json!({"k": "v"}));
1295 }
1296
1297 #[test]
1298 fn to_tools_config_http_keeps_non_json_body_as_string() {
1299 let tool = Tool::Http {
1300 method: "POST".into(),
1301 url: "https://example.com".into(),
1302 headers: HashMap::new(),
1303 params: HashMap::new(),
1304 body: Some("not json at all".into()),
1305 auth: None,
1306 };
1307 let cfg = to_tools_config(&tool);
1308 assert_eq!(cfg.config["body"], "not json at all");
1309 }
1310
1311 #[test]
1312 fn http_body_value_parses_json_strings() {
1313 let v = http_body_value(r#"{"a":1}"#);
1314 assert_eq!(v, serde_json::json!({"a": 1}));
1315 }
1316
1317 #[test]
1318 fn http_body_value_falls_back_to_string() {
1319 let v = http_body_value("plain text body");
1320 assert_eq!(v, serde_json::Value::String("plain text body".into()));
1321 }
1322
1323 #[test]
1324 fn http_tool_config_injects_bearer_header() {
1325 let cfg = http_tool_config(
1326 "GET",
1327 "https://example.com",
1328 &HashMap::new(),
1329 &HashMap::new(),
1330 None,
1331 Some("test-token-123"),
1332 );
1333 assert_eq!(cfg.kind, "http");
1334 assert_eq!(
1335 cfg.config["headers"]["Authorization"],
1336 "Bearer test-token-123"
1337 );
1338 }
1339
1340 #[test]
1341 fn http_tool_config_preserves_caller_headers_with_bearer() {
1342 let mut hdrs = HashMap::new();
1343 hdrs.insert("X-Trace-Id".into(), "abc123".into());
1344 let cfg = http_tool_config(
1345 "POST",
1346 "https://example.com",
1347 &hdrs,
1348 &HashMap::new(),
1349 None,
1350 Some("token"),
1351 );
1352 assert_eq!(cfg.config["headers"]["X-Trace-Id"], "abc123");
1353 assert_eq!(cfg.config["headers"]["Authorization"], "Bearer token");
1354 }
1355
1356 #[test]
1357 fn http_tool_config_no_auth_omits_authorization_header() {
1358 let cfg = http_tool_config(
1359 "GET",
1360 "https://example.com",
1361 &HashMap::new(),
1362 &HashMap::new(),
1363 None,
1364 None,
1365 );
1366 let hdrs = cfg.config["headers"].as_object().unwrap();
1367 assert!(!hdrs.contains_key("Authorization"));
1368 }
1369
1370 #[test]
1371 fn reshape_http_result_extracts_envelope() {
1372 let mut result = ToolResult::success(serde_json::json!({
1373 "status_code": 200,
1374 "headers": {},
1375 "body": {"ok": true},
1376 }));
1377 result.exit_code = Some(0);
1378 let outcome = reshape_http_result(result).unwrap();
1379 let parsed: serde_json::Value =
1380 serde_json::from_str(outcome.result.as_deref().unwrap()).unwrap();
1381 assert_eq!(parsed["status"], 200);
1382 assert_eq!(parsed["body"], serde_json::json!({"ok": true}));
1383 }
1384
1385 #[test]
1386 fn reshape_http_result_preserves_4xx_envelope_without_erroring() {
1387 // CLI contract: HTTP error statuses come back inside the
1388 // `{status, body}` envelope, NOT as anyhow::Error. Only
1389 // network-transport failures bubble up.
1390 let mut result = ToolResult {
1391 status: ToolStatus::Error,
1392 data: Some(serde_json::json!({
1393 "status_code": 404,
1394 "headers": {},
1395 "body": {"error": "not found"},
1396 })),
1397 error: Some("HTTP 404 response".into()),
1398 stdout: None,
1399 stderr: None,
1400 exit_code: Some(1),
1401 duration_ms: Some(5),
1402 pending_callback: None,
1403 };
1404 result.exit_code = Some(1);
1405 let outcome = reshape_http_result(result).unwrap();
1406 let parsed: serde_json::Value =
1407 serde_json::from_str(outcome.result.as_deref().unwrap()).unwrap();
1408 assert_eq!(parsed["status"], 404);
1409 assert_eq!(parsed["body"], serde_json::json!({"error": "not found"}));
1410 }
1411
1412 #[tokio::test]
1413 async fn resolve_auth_to_bearer_rejects_unknown_provider() {
1414 let cfg = CliAuthConfig {
1415 provider: "azure".into(),
1416 scopes: vec![],
1417 };
1418 let err = resolve_auth_to_bearer(&cfg).await.unwrap_err();
1419 assert!(err.to_string().contains("unsupported auth provider"));
1420 }
1421
1422 // ---- PR-2c-6 — Tool::DuckDb bridge integration -------------------
1423
1424 #[test]
1425 fn duckdb_tool_config_emits_noetl_tools_schema() {
1426 let cfg = duckdb_tool_config(
1427 ":memory:",
1428 "SELECT 1",
1429 &["arg1".to_string()],
1430 );
1431 assert_eq!(cfg.kind, "duckdb");
1432 assert_eq!(cfg.config["db_path"], ":memory:");
1433 assert_eq!(cfg.config["query"], "SELECT 1");
1434 assert_eq!(cfg.config["as_objects"], true);
1435 assert_eq!(
1436 cfg.config["params"],
1437 serde_json::json!([serde_json::Value::String("arg1".into())])
1438 );
1439 }
1440
1441 #[test]
1442 fn to_tools_config_duckdb_carries_path_and_query() {
1443 let tool = Tool::DuckDb {
1444 db: "warehouse.db".into(),
1445 query: Some("SELECT count(*) FROM orders".into()),
1446 params: vec![],
1447 };
1448 let cfg = to_tools_config(&tool);
1449 assert_eq!(cfg.kind, "duckdb");
1450 assert_eq!(cfg.config["db_path"], "warehouse.db");
1451 assert_eq!(cfg.config["query"], "SELECT count(*) FROM orders");
1452 assert_eq!(cfg.config["as_objects"], true);
1453 }
1454
1455 #[test]
1456 fn to_tools_config_duckdb_missing_query_becomes_empty_string() {
1457 let tool = Tool::DuckDb {
1458 db: ":memory:".into(),
1459 query: None,
1460 params: vec![],
1461 };
1462 let cfg = to_tools_config(&tool);
1463 assert_eq!(cfg.config["query"], "");
1464 }
1465
1466 #[test]
1467 fn reshape_duckdb_result_select_returns_rows_array() {
1468 let result = ToolResult::success(serde_json::json!({
1469 "columns": ["id", "name"],
1470 "rows": [
1471 {"id": 1, "name": "alice"},
1472 {"id": 2, "name": "bob"},
1473 ],
1474 "row_count": 2
1475 }));
1476 let outcome = reshape_duckdb_result(result).unwrap();
1477 let parsed: serde_json::Value =
1478 serde_json::from_str(outcome.result.as_deref().unwrap()).unwrap();
1479 let arr = parsed.as_array().expect("result is an array");
1480 assert_eq!(arr.len(), 2);
1481 assert_eq!(arr[0]["id"], 1);
1482 assert_eq!(arr[0]["name"], "alice");
1483 assert_eq!(arr[1]["name"], "bob");
1484 }
1485
1486 #[test]
1487 fn reshape_duckdb_result_select_empty_returns_empty_array() {
1488 let result = ToolResult::success(serde_json::json!({
1489 "columns": ["id"],
1490 "rows": [],
1491 "row_count": 0
1492 }));
1493 let outcome = reshape_duckdb_result(result).unwrap();
1494 let parsed: serde_json::Value =
1495 serde_json::from_str(outcome.result.as_deref().unwrap()).unwrap();
1496 assert_eq!(parsed.as_array().unwrap().len(), 0);
1497 }
1498
1499 #[test]
1500 fn reshape_duckdb_result_non_select_returns_status_envelope() {
1501 let result = ToolResult::success(serde_json::json!({
1502 "affected_rows": 3
1503 }));
1504 let outcome = reshape_duckdb_result(result).unwrap();
1505 // CLI returned the literal `{"status": "ok"}` string for
1506 // non-SELECT queries; `affected_rows` is intentionally
1507 // dropped (CLI never exposed it, so playbooks can't depend
1508 // on it).
1509 assert_eq!(outcome.result.as_deref(), Some(r#"{"status": "ok"}"#));
1510 }
1511
1512 #[tokio::test]
1513 async fn dispatch_duckdb_select_returns_rows_array() {
1514 let vars = empty_vars();
1515 let bridge = bridge_ctx(&vars);
1516 let tool = Tool::DuckDb {
1517 db: ":memory:".into(),
1518 query: Some("SELECT 1 AS num, 'hello' AS msg".into()),
1519 params: vec![],
1520 };
1521 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
1522 let parsed: serde_json::Value =
1523 serde_json::from_str(outcome.result.as_deref().unwrap()).unwrap();
1524 let arr = parsed.as_array().expect("result is an array");
1525 assert_eq!(arr.len(), 1);
1526 assert_eq!(arr[0]["num"], 1);
1527 assert_eq!(arr[0]["msg"], "hello");
1528 }
1529
1530 #[tokio::test]
1531 async fn dispatch_duckdb_missing_query_returns_empty_outcome() {
1532 // Mirrors the CLI arm's `if let Some(query_str) = query` guard:
1533 // a Tool::DuckDb with no query falls through to None.
1534 let vars = empty_vars();
1535 let bridge = bridge_ctx(&vars);
1536 let tool = Tool::DuckDb {
1537 db: ":memory:".into(),
1538 query: None,
1539 params: vec![],
1540 };
1541 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
1542 assert!(outcome.result.is_none());
1543 }
1544
1545 #[tokio::test]
1546 async fn dispatch_duckdb_empty_query_returns_empty_outcome() {
1547 let vars = empty_vars();
1548 let bridge = bridge_ctx(&vars);
1549 let tool = Tool::DuckDb {
1550 db: ":memory:".into(),
1551 query: Some(" ".into()), // whitespace only
1552 params: vec![],
1553 };
1554 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
1555 assert!(outcome.result.is_none());
1556 }
1557
1558 // ---- PR-2c-7 — sub-playbook variable preparation ------------------
1559
1560 #[test]
1561 fn prepare_sub_playbook_vars_passes_parent_vars_through() {
1562 let parent: HashMap<String, String> =
1563 [("vars.timeout".into(), "30".into())].into();
1564 let sub = prepare_sub_playbook_vars(
1565 &parent,
1566 &HashMap::new(),
1567 &HashMap::new(),
1568 |t| Ok(t.to_string()),
1569 )
1570 .unwrap();
1571 assert_eq!(sub.get("vars.timeout"), Some(&"30".to_string()));
1572 }
1573
1574 #[test]
1575 fn prepare_sub_playbook_vars_v2_input_takes_precedence_over_v1_args() {
1576 let parent: HashMap<String, String> = HashMap::new();
1577 let mut input = HashMap::new();
1578 input.insert(
1579 "region".into(),
1580 serde_yaml::Value::String("us-east-1".into()),
1581 );
1582 let mut args = HashMap::new();
1583 args.insert("region".into(), "us-west-1".into());
1584
1585 let sub = prepare_sub_playbook_vars(&parent, &args, &input, |t| {
1586 Ok(t.to_string())
1587 })
1588 .unwrap();
1589 // input wins; args ignored when input is non-empty.
1590 assert_eq!(sub.get("workload.region"), Some(&"us-east-1".to_string()));
1591 }
1592
1593 #[test]
1594 fn prepare_sub_playbook_vars_v1_args_used_when_input_empty() {
1595 let parent: HashMap<String, String> = HashMap::new();
1596 let mut args = HashMap::new();
1597 args.insert("tier".into(), "prod".into());
1598 let sub = prepare_sub_playbook_vars(
1599 &parent,
1600 &args,
1601 &HashMap::new(),
1602 |t| Ok(t.to_string()),
1603 )
1604 .unwrap();
1605 assert_eq!(sub.get("workload.tier"), Some(&"prod".to_string()));
1606 }
1607
1608 #[test]
1609 fn prepare_sub_playbook_vars_renders_input_templates() {
1610 let parent: HashMap<String, String> = HashMap::new();
1611 let mut input = HashMap::new();
1612 input.insert(
1613 "url".into(),
1614 serde_yaml::Value::String("{{base}}/api".into()),
1615 );
1616 let sub = prepare_sub_playbook_vars(
1617 &parent,
1618 &HashMap::new(),
1619 &input,
1620 |t| Ok(t.replace("{{base}}", "https://example.com")),
1621 )
1622 .unwrap();
1623 assert_eq!(
1624 sub.get("workload.url"),
1625 Some(&"https://example.com/api".to_string())
1626 );
1627 }
1628
1629 #[test]
1630 fn prepare_sub_playbook_vars_coerces_yaml_numbers_and_bools() {
1631 let parent: HashMap<String, String> = HashMap::new();
1632 let mut input = HashMap::new();
1633 input.insert(
1634 "timeout".into(),
1635 serde_yaml::Value::Number(serde_yaml::Number::from(30)),
1636 );
1637 input.insert("verbose".into(), serde_yaml::Value::Bool(true));
1638 let sub = prepare_sub_playbook_vars(
1639 &parent,
1640 &HashMap::new(),
1641 &input,
1642 |t| Ok(t.to_string()),
1643 )
1644 .unwrap();
1645 assert_eq!(sub.get("workload.timeout"), Some(&"30".to_string()));
1646 assert_eq!(sub.get("workload.verbose"), Some(&"true".to_string()));
1647 }
1648
1649 #[test]
1650 fn prepare_sub_playbook_vars_passes_through_when_both_empty() {
1651 let parent: HashMap<String, String> = [(
1652 "workload.region".into(),
1653 "us-east-1".into(),
1654 )]
1655 .into();
1656 let sub = prepare_sub_playbook_vars(
1657 &parent,
1658 &HashMap::new(),
1659 &HashMap::new(),
1660 |t| Ok(t.to_string()),
1661 )
1662 .unwrap();
1663 // No input or args; parent vars come through unchanged.
1664 assert_eq!(sub.len(), 1);
1665 assert_eq!(
1666 sub.get("workload.region"),
1667 Some(&"us-east-1".to_string())
1668 );
1669 }
1670
1671 #[test]
1672 fn prepare_sub_playbook_vars_render_error_propagates() {
1673 let parent: HashMap<String, String> = HashMap::new();
1674 let mut input = HashMap::new();
1675 input.insert(
1676 "bad".into(),
1677 serde_yaml::Value::String("{{nope}}".into()),
1678 );
1679 let result = prepare_sub_playbook_vars(
1680 &parent,
1681 &HashMap::new(),
1682 &input,
1683 |_| Err(anyhow::anyhow!("render exploded")),
1684 );
1685 assert!(result.unwrap_err().to_string().contains("render exploded"));
1686 }
1687
1688 // ---- PR-2c-8 — Tool::Auth context updates -------------------------
1689
1690 #[test]
1691 fn auth_context_updates_includes_token_and_provider() {
1692 let updates = auth_context_updates("gcp", "tok-123", None);
1693 let map: HashMap<String, String> = updates.into_iter().collect();
1694 assert_eq!(map.get("auth.token"), Some(&"tok-123".to_string()));
1695 assert_eq!(map.get("auth.provider"), Some(&"gcp".to_string()));
1696 assert!(map.get("auth.project").is_none());
1697 }
1698
1699 #[test]
1700 fn auth_context_updates_includes_project_when_set() {
1701 let updates = auth_context_updates("adc", "t", Some("my-project"));
1702 let map: HashMap<String, String> = updates.into_iter().collect();
1703 assert_eq!(
1704 map.get("auth.project"),
1705 Some(&"my-project".to_string())
1706 );
1707 assert_eq!(map.get("auth.token"), Some(&"t".to_string()));
1708 assert_eq!(map.get("auth.provider"), Some(&"adc".to_string()));
1709 }
1710
1711 #[test]
1712 fn auth_context_updates_skips_empty_project() {
1713 let updates = auth_context_updates("gcp", "t", Some(""));
1714 let map: HashMap<String, String> = updates.into_iter().collect();
1715 assert!(map.get("auth.project").is_none());
1716 }
1717
1718 #[test]
1719 fn auth_context_updates_orders_project_before_token() {
1720 // The CLI's pre-PR-2c-8 inline arm set `auth.project` first,
1721 // then the token + provider after the auth call. Preserve
1722 // that ordering so observable side-effects (logs, traces)
1723 // match.
1724 let updates = auth_context_updates("gcp", "t", Some("p"));
1725 assert_eq!(updates[0].0, "auth.project");
1726 assert_eq!(updates[1].0, "auth.token");
1727 assert_eq!(updates[2].0, "auth.provider");
1728 }
1729
1730 // ---- PR-2c-8 — Sink payload formatting + CSV ----------------------
1731
1732 #[test]
1733 fn format_sink_payload_json_passthrough() {
1734 let raw = r#"{"k": "v"}"#;
1735 let out = format_sink_payload(&SinkFormat::Json, raw).unwrap();
1736 assert_eq!(out, raw);
1737 }
1738
1739 #[test]
1740 fn format_sink_payload_yaml_converts_json_object() {
1741 let raw = r#"{"k": "v"}"#;
1742 let out = format_sink_payload(&SinkFormat::Yaml, raw).unwrap();
1743 let reparsed: serde_yaml::Value = serde_yaml::from_str(&out).unwrap();
1744 assert_eq!(reparsed["k"].as_str(), Some("v"));
1745 }
1746
1747 #[test]
1748 fn format_sink_payload_yaml_falls_back_when_not_json() {
1749 let raw = "not even close to json";
1750 let out = format_sink_payload(&SinkFormat::Yaml, raw).unwrap();
1751 assert_eq!(out, raw);
1752 }
1753
1754 #[test]
1755 fn format_sink_payload_csv_uses_json_to_csv() {
1756 let raw = r#"[{"a":1,"b":2},{"a":3,"b":4}]"#;
1757 let out = format_sink_payload(&SinkFormat::Csv, raw).unwrap();
1758 assert!(out.contains("a,b\n") || out.contains("b,a\n"));
1759 // Two data rows + header.
1760 assert_eq!(out.lines().count(), 3);
1761 }
1762
1763 #[test]
1764 fn json_to_csv_returns_input_for_non_array() {
1765 assert_eq!(json_to_csv("not json").unwrap(), "not json");
1766 assert_eq!(json_to_csv(r#"{"k":"v"}"#).unwrap(), r#"{"k":"v"}"#);
1767 }
1768
1769 #[test]
1770 fn json_to_csv_returns_input_for_empty_array() {
1771 assert_eq!(json_to_csv("[]").unwrap(), "[]");
1772 }
1773
1774 #[test]
1775 fn json_to_csv_emits_header_and_rows_for_object_array() {
1776 let raw = r#"[{"name":"alice","age":30},{"name":"bob","age":25}]"#;
1777 let csv = json_to_csv(raw).unwrap();
1778 let lines: Vec<&str> = csv.lines().collect();
1779 assert_eq!(lines.len(), 3);
1780 // Header derived from first object's keys (order
1781 // preserved by serde_json::Map).
1782 assert!(lines[0] == "name,age" || lines[0] == "age,name");
1783 // Each subsequent line should contain both values.
1784 assert!(lines[1].contains("alice") && lines[1].contains("30"));
1785 assert!(lines[2].contains("bob") && lines[2].contains("25"));
1786 }
1787
1788 #[test]
1789 fn json_to_csv_quotes_strings_with_commas() {
1790 let raw = r#"[{"label":"a, b","n":1}]"#;
1791 let csv = json_to_csv(raw).unwrap();
1792 // Quoted field with the comma preserved inside.
1793 assert!(csv.contains("\"a, b\""), "csv: {csv}");
1794 }
1795
1796 #[test]
1797 fn json_to_csv_doubles_embedded_quotes() {
1798 let raw = r#"[{"q":"she said \"hi\""}]"#;
1799 let csv = json_to_csv(raw).unwrap();
1800 // RFC-4180-style: embedded `"` doubled, whole field quoted.
1801 assert!(csv.contains("\"she said \"\"hi\"\"\""), "csv: {csv}");
1802 }
1803
1804 #[test]
1805 fn json_to_csv_missing_field_emits_empty() {
1806 let raw = r#"[{"a":1,"b":2},{"a":3}]"#; // second row missing `b`
1807 let csv = json_to_csv(raw).unwrap();
1808 let lines: Vec<&str> = csv.lines().collect();
1809 // The second data row should end with a trailing comma or
1810 // have an empty field for `b`.
1811 assert!(
1812 lines[2].ends_with(",") || lines[2].contains(",,"),
1813 "csv: {csv}"
1814 );
1815 }
1816
1817 #[test]
1818 fn to_tools_config_rhai_carries_code() {
1819 let tool = Tool::Rhai {
1820 code: "let x = 1; x + 1".into(),
1821 args: HashMap::new(),
1822 };
1823 let cfg = to_tools_config(&tool);
1824 assert_eq!(cfg.kind, "rhai");
1825 assert_eq!(cfg.config["code"], "let x = 1; x + 1");
1826 }
1827
1828 #[test]
1829 fn to_tools_config_sink_emits_typed_target() {
1830 let tool = Tool::Sink {
1831 target: SinkTarget::File {
1832 path: "/tmp/out.json".into(),
1833 },
1834 format: SinkFormat::Json,
1835 };
1836 let cfg = to_tools_config(&tool);
1837 assert_eq!(cfg.kind, "sink");
1838 assert_eq!(cfg.config["target"]["type"], "file");
1839 assert_eq!(cfg.config["target"]["path"], "/tmp/out.json");
1840 assert_eq!(cfg.config["format"], "json");
1841 }
1842
1843 #[test]
1844 fn from_tools_result_success_returns_data_string() {
1845 let result = ToolResult::success(serde_json::Value::String("hello".into()));
1846 let outcome = from_tools_result(result).unwrap();
1847 assert_eq!(outcome.result, Some("hello".into()));
1848 }
1849
1850 #[test]
1851 fn from_tools_result_success_serialises_non_string_data() {
1852 let result = ToolResult::success(serde_json::json!({"k": "v"}));
1853 let outcome = from_tools_result(result).unwrap();
1854 assert_eq!(outcome.result, Some(r#"{"k":"v"}"#.into()));
1855 }
1856
1857 #[test]
1858 fn from_tools_result_success_falls_back_to_stdout() {
1859 let mut result = ToolResult::success(serde_json::Value::Null);
1860 result.data = None;
1861 result.stdout = Some("script output".into());
1862 let outcome = from_tools_result(result).unwrap();
1863 assert_eq!(outcome.result, Some("script output".into()));
1864 }
1865
1866 #[test]
1867 fn from_tools_result_error_propagates_message() {
1868 let result = ToolResult::error("connection refused");
1869 let err = from_tools_result(result).unwrap_err();
1870 assert!(err.to_string().contains("connection refused"));
1871 }
1872
1873 // PR-2c-8 removed the
1874 // `dispatch_via_registry_returns_empty_for_unwired_kind` test:
1875 // every Tool variant now either dispatches through the registry
1876 // (Rhai/Shell/Http/DuckDb), bails with a § H.10 finding
1877 // (Playbook/Auth/Sink), or bails as unsupported. See the
1878 // per-variant dispatch tests for the wired kinds and the bail
1879 // tests for Playbook/Auth/Sink/Unsupported.
1880
1881 #[tokio::test]
1882 async fn dispatch_auth_bails_pointing_at_helper() {
1883 // PR-2c-8: Tool::Auth has no bridge dispatch path. The
1884 // bridge bails with a message pointing at
1885 // `resolve_auth_to_bearer` + `auth_context_updates` so
1886 // misuse is loud rather than silent.
1887 let vars = empty_vars();
1888 let bridge = bridge_ctx(&vars);
1889 let tool = Tool::Auth {
1890 provider: "adc".into(),
1891 scopes: vec![],
1892 project: None,
1893 };
1894 let err = dispatch_via_registry(&tool, &bridge).await.unwrap_err();
1895 let msg = err.to_string();
1896 assert!(
1897 msg.contains("Tool::Auth")
1898 && msg.contains("resolve_auth_to_bearer")
1899 && msg.contains("auth_context_updates"),
1900 "error should point at the helpers: {msg}"
1901 );
1902 }
1903
1904 #[tokio::test]
1905 async fn dispatch_sink_bails_pointing_at_helper() {
1906 // PR-2c-8: Tool::Sink has no bridge dispatch path either —
1907 // noetl-tools' TransferTool is database-to-database only.
1908 // The bridge bails with a message pointing at
1909 // `format_sink_payload` for format conversion.
1910 let vars = empty_vars();
1911 let bridge = bridge_ctx(&vars);
1912 let tool = Tool::Sink {
1913 target: crate::playbook::SinkTarget::File {
1914 path: "/tmp/out.json".into(),
1915 },
1916 format: SinkFormat::Json,
1917 };
1918 let err = dispatch_via_registry(&tool, &bridge).await.unwrap_err();
1919 let msg = err.to_string();
1920 assert!(
1921 msg.contains("Tool::Sink") && msg.contains("format_sink_payload"),
1922 "error should point at the helper: {msg}"
1923 );
1924 }
1925
1926 #[tokio::test]
1927 async fn dispatch_playbook_bails_with_h10_finding() {
1928 // PR-2c-7: `Tool::Playbook` is not bridgeable. Make sure
1929 // the dispatch arm bails with a descriptive error rather
1930 // than silently returning an empty outcome.
1931 let vars = empty_vars();
1932 let bridge = bridge_ctx(&vars);
1933 let tool = Tool::Playbook {
1934 path: "sub.yaml".into(),
1935 args: HashMap::new(),
1936 input: HashMap::new(),
1937 };
1938 let err = dispatch_via_registry(&tool, &bridge).await.unwrap_err();
1939 let msg = err.to_string();
1940 assert!(
1941 msg.contains("Tool::Playbook")
1942 && msg.contains("not bridgeable")
1943 && msg.contains("§ H.10"),
1944 "error message should explain the § H.10 finding: {msg}"
1945 );
1946 }
1947
1948 // ---- PR-2c-4 — Tool::Shell bridge integration --------------------
1949
1950 #[tokio::test]
1951 async fn dispatch_shell_single_command_returns_stdout() {
1952 let vars = empty_vars();
1953 let bridge = bridge_ctx(&vars);
1954 let tool = Tool::Shell {
1955 cmds: CmdsList::Single("echo bridged".into()),
1956 };
1957 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
1958 // The bridge trims the trailing newline that `echo` adds so
1959 // the step result matches the CLI's pre-PR-2c-4 contract
1960 // (per-line stdout joined without trailing whitespace).
1961 assert_eq!(outcome.result, Some("bridged".into()));
1962 }
1963
1964 #[tokio::test]
1965 async fn dispatch_shell_multiple_returns_last_command_stdout() {
1966 // CLI semantic: with CmdsList::Multiple, each command runs
1967 // in its own bash invocation; the step result is the last
1968 // command's stdout.
1969 let vars = empty_vars();
1970 let bridge = bridge_ctx(&vars);
1971 let tool = Tool::Shell {
1972 cmds: CmdsList::Multiple(vec![
1973 "echo first".into(),
1974 "echo second".into(),
1975 "echo third".into(),
1976 ]),
1977 };
1978 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
1979 assert_eq!(outcome.result, Some("third".into()));
1980 }
1981
1982 #[tokio::test]
1983 async fn dispatch_shell_failure_propagates_error() {
1984 let vars = empty_vars();
1985 let bridge = bridge_ctx(&vars);
1986 let tool = Tool::Shell {
1987 cmds: CmdsList::Single("exit 7".into()),
1988 };
1989 let err = dispatch_via_registry(&tool, &bridge).await.unwrap_err();
1990 // noetl-tools' shell tool reports non-zero exit codes by
1991 // surfacing ToolResult.status == Error or by returning
1992 // result with exit_code set; either way the bridge's
1993 // from_tools_result converts that into an anyhow::Error.
1994 assert!(
1995 err.to_string().contains("shell")
1996 || err.to_string().contains("exit")
1997 || err.to_string().contains("failed"),
1998 "error message: {}",
1999 err
2000 );
2001 }
2002
2003 #[tokio::test]
2004 async fn dispatch_shell_single_with_newlines_runs_each_line_independently() {
2005 // CLI semantic: CmdsList::Single splits on newlines into
2006 // separate bash invocations. This means `cd /tmp` on one
2007 // line doesn't change the cwd of the next line.
2008 let vars = empty_vars();
2009 let bridge = bridge_ctx(&vars);
2010 let tool = Tool::Shell {
2011 cmds: CmdsList::Single("echo first_line\necho second_line".into()),
2012 };
2013 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
2014 assert_eq!(outcome.result, Some("second_line".into()));
2015 }
2016
2017 #[tokio::test]
2018 async fn dispatch_via_registry_unsupported_errors() {
2019 let vars = empty_vars();
2020 let bridge = bridge_ctx(&vars);
2021 let tool = Tool::Unsupported;
2022 let err = dispatch_via_registry(&tool, &bridge).await.unwrap_err();
2023 assert!(err.to_string().contains("unsupported"));
2024 }
2025
2026 // ---- PR-2c-3 — Tool::Rhai bridge integration ---------------------
2027
2028 #[tokio::test]
2029 async fn dispatch_rhai_evaluates_simple_arithmetic() {
2030 let vars = empty_vars();
2031 let bridge = bridge_ctx(&vars);
2032 let tool = Tool::Rhai {
2033 code: "let x = 40; let y = 2; (x + y).to_string()".into(),
2034 args: HashMap::new(),
2035 };
2036 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
2037 assert_eq!(outcome.result, Some("42".into()));
2038 }
2039
2040 #[tokio::test]
2041 async fn dispatch_rhai_reads_workload_variable_via_scope() {
2042 // `to_tools_context_for_rhai` groups the CLI's flat
2043 // `workload.region` key into a nested `workload` Map.
2044 // Rhai's `workload.region` then resolves as field access.
2045 let vars: HashMap<String, String> =
2046 [("workload.region".into(), "us-west-1".into())].into();
2047 let bridge = bridge_ctx(&vars);
2048 let tool = Tool::Rhai {
2049 code: r#"workload.region.to_string()"#.into(),
2050 args: HashMap::new(),
2051 };
2052 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
2053 assert_eq!(outcome.result, Some("us-west-1".into()));
2054 }
2055
2056 #[tokio::test]
2057 async fn dispatch_rhai_reads_step_result_via_field_access() {
2058 // Step results in the CLI surface as `<step>.result` keys.
2059 // The nested-shape adapter groups them under a step-named map.
2060 let vars: HashMap<String, String> = [
2061 ("check_health.result".into(), "ok".into()),
2062 ("check_health.status".into(), "200".into()),
2063 ]
2064 .into();
2065 let bridge = bridge_ctx(&vars);
2066 let tool = Tool::Rhai {
2067 code: r#"check_health.result.to_string()"#.into(),
2068 args: HashMap::new(),
2069 };
2070 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
2071 assert_eq!(outcome.result, Some("ok".into()));
2072 }
2073
2074 #[test]
2075 fn to_tools_context_for_rhai_groups_workload_prefix() {
2076 let vars: HashMap<String, String> = [
2077 ("workload.region".into(), "us-west-1".into()),
2078 ("workload.tier".into(), "prod".into()),
2079 ("vars.timeout".into(), "30".into()),
2080 ("step_a.result".into(), "done".into()),
2081 ("toplevel".into(), "kept_at_root".into()),
2082 ]
2083 .into();
2084 let bridge = bridge_ctx(&vars);
2085 let ctx = to_tools_context_for_rhai(&bridge);
2086
2087 let workload = ctx
2088 .variables
2089 .get("workload")
2090 .expect("workload group should exist")
2091 .as_object()
2092 .expect("workload should be an object");
2093 assert_eq!(workload.get("region"), Some(&serde_json::json!("us-west-1")));
2094 assert_eq!(workload.get("tier"), Some(&serde_json::json!("prod")));
2095
2096 let vars_map = ctx.variables.get("vars").and_then(|v| v.as_object()).unwrap();
2097 assert_eq!(vars_map.get("timeout"), Some(&serde_json::json!("30")));
2098
2099 let step_a = ctx.variables.get("step_a").and_then(|v| v.as_object()).unwrap();
2100 assert_eq!(step_a.get("result"), Some(&serde_json::json!("done")));
2101
2102 assert_eq!(
2103 ctx.variables.get("toplevel"),
2104 Some(&serde_json::json!("kept_at_root"))
2105 );
2106 }
2107
2108 #[tokio::test]
2109 async fn dispatch_rhai_string_literal_returns_unquoted() {
2110 let vars = empty_vars();
2111 let bridge = bridge_ctx(&vars);
2112 let tool = Tool::Rhai {
2113 code: r#""hello world""#.into(),
2114 args: HashMap::new(),
2115 };
2116 let outcome = dispatch_via_registry(&tool, &bridge).await.unwrap();
2117 // noetl-tools' RhaiTool returns the result through ToolResult.data
2118 // as a JSON value; for string results that means a JSON-quoted
2119 // string. from_tools_result strips the JSON quotes when data
2120 // is a Value::String.
2121 assert_eq!(outcome.result, Some("hello world".into()));
2122 }
2123
2124 // ---- Compiler proof: AuthConfig from playbook is still constructable
2125 // even though we don't pass it through to the bridge yet. Locks in
2126 // the field surface so PR-2c-5 / PR-2c-8 see a deliberate gap, not
2127 // a missing type.
2128 #[test]
2129 fn cli_auth_config_constructs() {
2130 let _auth = CliAuthConfig {
2131 provider: "adc".into(),
2132 scopes: vec!["https://www.googleapis.com/auth/cloud-platform".into()],
2133 };
2134 }
2135
2136 // ---- gcs_upload helper (R-3, noetl/ai-meta#31) ------------------
2137 //
2138 // These tests exercise `gcs_upload_with_store` — the inner path
2139 // shared by production (real GCS) and test (InMemory) callers.
2140 // The `gcs_upload` function (which builds the real GCS store from
2141 // env) is NOT tested here — real GCS credentials are not available
2142 // in CI. The call shape (bucket → builder → store → put) is the
2143 // same in both paths; the InMemory tests lock in the object_store
2144 // API surface and the helper's error-handling contract.
2145
2146 #[tokio::test]
2147 async fn gcs_upload_with_store_writes_data_to_object_store() {
2148 // Verifies the happy path: data is uploaded and can be read
2149 // back from the same InMemory store — proving gcs_upload_with_store
2150 // calls ObjectStore::put with the correct path + payload.
2151 use object_store::memory::InMemory;
2152 use object_store::ObjectStore;
2153
2154 let store = Arc::new(InMemory::new());
2155 gcs_upload_with_store(Arc::clone(&store) as Arc<dyn ObjectStore>, "output/data.json", r#"{"k":"v"}"#)
2156 .await
2157 .expect("upload should succeed");
2158
2159 let path = StorePath::from("output/data.json");
2160 let retrieved = store.get(&path).await.expect("should read back uploaded object");
2161 let body = retrieved.bytes().await.expect("should get bytes");
2162 assert_eq!(body, bytes::Bytes::from(r#"{"k":"v"}"#));
2163 }
2164
2165 #[tokio::test]
2166 async fn gcs_upload_with_store_overwrites_existing_key() {
2167 // Second upload to the same key must overwrite the first — the
2168 // InMemory store's put is idempotent on the key, which is the
2169 // same contract the real GCS object-level PUT provides.
2170 use object_store::memory::InMemory;
2171 use object_store::ObjectStore;
2172
2173 let store = Arc::new(InMemory::new());
2174 gcs_upload_with_store(Arc::clone(&store) as Arc<dyn ObjectStore>, "data.csv", "first").await.unwrap();
2175 gcs_upload_with_store(Arc::clone(&store) as Arc<dyn ObjectStore>, "data.csv", "second").await.unwrap();
2176
2177 let path = StorePath::from("data.csv");
2178 let body = store.get(&path).await.unwrap().bytes().await.unwrap();
2179 assert_eq!(body, bytes::Bytes::from("second"));
2180 }
2181
2182 #[tokio::test]
2183 async fn gcs_upload_with_store_handles_nested_key_paths() {
2184 // GCS object keys can contain slashes (they are logical paths
2185 // within a bucket, not filesystem paths). StorePath should
2186 // preserve the full slash-separated key.
2187 use object_store::memory::InMemory;
2188 use object_store::ObjectStore;
2189
2190 let store = Arc::new(InMemory::new());
2191 gcs_upload_with_store(
2192 Arc::clone(&store) as Arc<dyn ObjectStore>,
2193 "runs/2026-06-01/output/result.json",
2194 "[]",
2195 )
2196 .await
2197 .unwrap();
2198
2199 let path = StorePath::from("runs/2026-06-01/output/result.json");
2200 let body = store.get(&path).await.unwrap().bytes().await.unwrap();
2201 assert_eq!(body, bytes::Bytes::from("[]"));
2202 }
2203
2204 #[tokio::test]
2205 async fn gcs_upload_with_store_uploads_empty_string() {
2206 // An empty payload is a valid GCS object — the helper must not
2207 // short-circuit or error on empty data.
2208 use object_store::memory::InMemory;
2209 use object_store::ObjectStore;
2210
2211 let store = Arc::new(InMemory::new());
2212 gcs_upload_with_store(Arc::clone(&store) as Arc<dyn ObjectStore>, "empty.txt", "").await.unwrap();
2213
2214 let path = StorePath::from("empty.txt");
2215 let body = store.get(&path).await.unwrap().bytes().await.unwrap();
2216 assert_eq!(body.len(), 0);
2217 }
2218}