1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# =============================================================================
# HTTP Client to Console Pipeline Example
# =============================================================================
#
# This example demonstrates a simple pipeline that:
# 1. Polls an HTTP API at regular intervals (http_client source)
# 2. Prints the JSON response to the console (console sink)
#
# Usage:
# pipeflow run examples/http_to_console.yaml
#
# =============================================================================
# -----------------------------------------------------------------------------
# System Configuration (Optional)
# -----------------------------------------------------------------------------
# Applies default settings to all pipeline components.
# Individual sources can override output_buffer_size.
system:
# output_buffer_size: Broadcast channel capacity for sources/transforms (default: 1024)
# Higher values reduce the chance of slow consumers lagging/dropping messages, but increase memory usage.
output_buffer_size: 1024
# -----------------------------------------------------------------------------
# Pipeline Definition
# -----------------------------------------------------------------------------
pipeline:
# ---------------------------------------------------------------------------
# Sources
# ---------------------------------------------------------------------------
# Each source generates messages that flow through the pipeline.
sources:
- id: api_poller
type: http_client
# Source-specific output buffer size override (Optional)
# output_buffer_size: 512
config:
# =====================================================================
# HTTP Client Source Configuration Reference
# =====================================================================
#
# Required:
# ---------
# url: Target URL to poll (must be a valid HTTP/HTTPS URL)
#
# Optional:
# ---------
# interval: Polling interval (default: "60s")
# Supports human-readable formats:
# - "10s" → 10 seconds
# - "5m" → 5 minutes
# - "1h" → 1 hour
# - "1h 30m" → 1 hour 30 minutes
#
# schedule: Cron schedule (5 or 6 fields; 5-field schedules assume "0" seconds)
# When set, interval is ignored.
# - "0 0 * * *" → Daily at 00:00 (local time)
# - "0 */5 * * * *" → Every 5 minutes
#
# method: HTTP method (default: "GET")
# Supported: GET | POST | PUT | DELETE
#
# headers: Request headers as key-value pairs (default: {})
# Useful for authentication, content-type, etc.
#
# =====================================================================
# --- Required ---
url: "https://httpbin.org/json"
# --- Optional (explicit values shown, all have defaults) ---
interval: "10s"
# schedule: "0 0 * * *"
method: "GET"
headers:
# Example headers:
# Accept: "application/json"
# Authorization: "Bearer ${API_TOKEN}"
# X-Request-ID: "pipeflow-poll"
# ---------------------------------------------------------------------------
# Transforms
# ---------------------------------------------------------------------------
# Transforms connect sources to sinks. A transform can have zero steps
# to act as a simple pass-through.
transforms:
- id: pass_through
inputs:
outputs:
# ---------------------------------------------------------------------------
# Sinks
# ---------------------------------------------------------------------------
# Each sink receives messages from transforms.
sinks:
- id: console_out
type: console
config:
# =====================================================================
# Console Sink Configuration Reference
# =====================================================================
#
# Optional:
# ---------
# format: Output format for messages (default: "pretty")
# Options:
# - pretty: Indented, human-readable JSON
# - json: Compact single-line JSON (for log aggregators)
# - text: Plain text format (payload only)
#
# =====================================================================
format: text