yomo 2.0.2

A QUIC-based runtime for AI-LLM tool routing and serverless execution
Documentation
auth_token: ""

zipper:
  host: "127.0.0.1"
  port: 9000
  tls: {}

http_api:
  host: "127.0.0.1"
  port: 9001
  enable_tool_api: false

llm_providers:
  # LLM provider types:
  # - openai-compatible: OpenAI-compatible /v1/chat/completions upstream.
  #   required params: model, api_key, base_url
  # - vllm_deepseek: vllm deepseek specialized provider.
  #   required params: model, api_key, base_url
  # - tokenhub: Tencent TokenHub /v1/chat/completions provider.
  #   required params: model, api_key
  #   optional params: base_url (default https://tokenhub.tencentmaas.com/v1)
  # - vertexai: Google Vertex AI generateContent/streamGenerateContent.
  #   required params: model, project_id, location, credentials_file
  # Common fields:
  # - model_id: external model name used by requests
  # - default: optional, marks default model when model is not specified
  # - label: optional display label for logs/ops
  - type: "openai-compatible"
    model_id: "gpt-4o-mini"
    default: true
    params:
      model: "gpt-4o-mini"
      api_key: ""
      base_url: "https://api.openai.com/v1"
  # - type: "vllm_deepseek"
  #   model_id: "deepseek-v4-flash"
  #   params:
  #     model: "DeepSeek-V4-Flash"
  #     api_key: ""
  #     base_url: "http://127.0.0.1:8000/v1"
  # - type: "tokenhub"
  #   model_id: "deepseek-v4-flash"
  #   params:
  #     model: "deepseek-v4-flash"
  #     api_key: ""
  #     base_url: "https://tokenhub.tencentmaas.com/v1"
  # - type: "vertexai"
  #   model_id: "gemini-2.5-flash"
  #   params:
  #     model: "gemini-2.5-flash"
  #     project_id: "your-gcp-project-id"
  #     location: "global"
  #     credentials_file: "/abs/path/to/service-account.json"

model_api:
  # model_api provider types:
  # - passthrough: generic proxy for embeddings/rerank/audio/images endpoints.
  #   required params: api_key, base_url, model
  # - responses: OpenAI /v1/responses adapter.
  #   required params: api_key, base_url, model
  # - bedrock-messages: AWS Bedrock Anthropic Messages adapter.
  #   required params: model, aws_region
  #   optional params: anthropic_version(default bedrock-2023-05-31),
  #     max_tokens(default 4096), aws_bearer_token
  # - generate_content: Google Vertex generateContent adapter.
  #   required params: model, project_id, credentials_file
  #   optional params: location(default global)
  providers:
    - type: "responses"
      model_id: "gpt-4o-mini"
      params:
        model: "gpt-4o-mini"
        api_key: ""
        base_url: "https://api.openai.com/v1"
    # - type: "responses"
    #   model_id: "gpt-5.3-codex"
    #   params:
    #     model: "gpt-5.3-codex"
    #     api_key: ""
    #     base_url: "https://api.openai.com/v1"
    # - type: "bedrock-messages"
    #   model_id: "claude-sonnet-4-6"
    #   params:
    #     model: "global.anthropic.claude-sonnet-4-6"
    #     aws_region: "ap-northeast-1"
    #     aws_bearer_token: ""
    #     anthropic_version: "bedrock-2023-05-31"
    #     max_tokens: "4096"
    # - type: "generate_content"
    #   model_id: "gemini-2.5-flash"
    #   params:
    #     model: "gemini-2.5-flash"
    #     project_id: "your-gcp-project-id"
    #     location: "global"
    #     credentials_file: "/abs/path/to/service-account.json"

  endpoints:
    # Supported endpoint paths:
    # /responses, /messages, /models/:generateContent,
    # /embeddings, /rerank,
    # /audio/speech, /audio/transcriptions,
    # /images/generations, /images/edits
    - path: "/responses"
      models:
        - "gpt-4o-mini"
      default_model: "gpt-4o-mini"