pipeflow 0.0.4

A lightweight, configuration-driven data pipeline framework
Documentation
# =============================================================================
# HTTP Client to Console Pipeline Example
# =============================================================================
#
# This example demonstrates a simple pipeline that:
#   1. Polls an HTTP API at regular intervals (http_client source)
#   2. Prints the JSON response to the console (console sink)
#
# Usage:
#   pipeflow run examples/http_to_console.yaml
#
# =============================================================================

# -----------------------------------------------------------------------------
# System Configuration (Optional)
# -----------------------------------------------------------------------------
# Applies default settings to all pipeline components.
# Individual sources can override output_buffer_size.
system:
  # output_buffer_size: Broadcast channel capacity for sources/transforms (default: 1024)
  # Higher values reduce the chance of slow consumers lagging/dropping messages, but increase memory usage.
  output_buffer_size: 1024

# -----------------------------------------------------------------------------
# Pipeline Definition
# -----------------------------------------------------------------------------
pipeline:
  # ---------------------------------------------------------------------------
  # Sources
  # ---------------------------------------------------------------------------
  # Each source generates messages that flow through the pipeline.
  sources:
    - id: api_poller
      type: http_client

      # Source-specific output buffer size override (Optional)
      # output_buffer_size: 512

      config:
        # =====================================================================
        # HTTP Client Source Configuration Reference
        # =====================================================================
        #
        # Required:
        # ---------
        #   url: Target URL to poll (must be a valid HTTP/HTTPS URL)
        #
        # Optional:
        # ---------
        #   interval: Polling interval (default: "60s")
        #             Supports human-readable formats:
        #               - "10s"      → 10 seconds
        #               - "5m"       → 5 minutes
        #               - "1h"       → 1 hour
        #               - "1h 30m"   → 1 hour 30 minutes
        #
        #   schedule: Cron schedule (5 or 6 fields; 5-field schedules assume "0" seconds)
        #             When set, interval is ignored.
        #               - "0 0 * * *"   → Daily at 00:00 (local time)
        #               - "0 */5 * * * *" → Every 5 minutes
        #
        #   method: HTTP method (default: "GET")
        #           Supported: GET | POST | PUT | DELETE
        #
        #   headers: Request headers as key-value pairs (default: {})
        #            Useful for authentication, content-type, etc.
        #
        # =====================================================================

        # --- Required ---
        url: "https://httpbin.org/json"

        # --- Optional (explicit values shown, all have defaults) ---
        interval: "10s"
        # schedule: "0 0 * * *"
        method: "GET"
        headers: {}
        # Example headers:
        #   Accept: "application/json"
        #   Authorization: "Bearer ${API_TOKEN}"
        #   X-Request-ID: "pipeflow-poll"

  # ---------------------------------------------------------------------------
  # Transforms
  # ---------------------------------------------------------------------------
  # Transforms connect sources to sinks. A transform can have zero steps
  # to act as a simple pass-through.
  transforms:
    - id: pass_through
      inputs: [api_poller]
      outputs: [console_out]

  # ---------------------------------------------------------------------------
  # Sinks
  # ---------------------------------------------------------------------------
  # Each sink receives messages from transforms.
  sinks:
    - id: console_out
      type: console

      config:
        # =====================================================================
        # Console Sink Configuration Reference
        # =====================================================================
        #
        # Optional:
        # ---------
        #   format: Output format for messages (default: "pretty")
        #           Options:
        #             - pretty: Indented, human-readable JSON
        #             - json:   Compact single-line JSON (for log aggregators)
        #             - text:   Plain text format (payload only)
        #
        # =====================================================================

        format: text