inference-gateway-sdk 0.6.0

---
openapi: 3.1.0
info:
  title: Inference Gateway API
  description: |
    API for interacting with various language models through the Inference Gateway.
  version: 1.0.0
servers:
  - url: http://localhost:8080
paths:
  /llms:
    get:
      summary: List all language models
      operationId: listModels
      security:
        - bearerAuth: []
      responses:
        "200":
          description: A list of models by provider
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: "#/components/schemas/ListModelsResponse"
        "401":
          $ref: "#/components/responses/Unauthorized"
  /llms/{provider}:
    get:
      summary: List all models for a specific provider
      operationId: listModelsByProvider
      parameters:
        - name: provider
          in: path
          required: true
          schema:
            $ref: "#/components/schemas/Providers"
      security:
        - bearerAuth: []
      responses:
        "200":
          description: A list of models
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ListModelsResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
  /llms/{provider}/generate:
    post:
      summary: Generate content with a specific provider's LLM
      operationId: generateContent
      parameters:
        - name: provider
          in: path
          required: true
          schema:
            $ref: "#/components/schemas/Providers"
      security:
        - bearerAuth: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/GenerateRequest"
      responses:
        "200":
          description: Generated content
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/GenerateResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "500":
          $ref: "#/components/responses/InternalError"
  /proxy/{provider}/{path}:
    parameters:
      - name: provider
        in: path
        required: true
        schema:
          $ref: "#/components/schemas/Providers"
      - name: path
        in: path
        required: true
        style: simple
        explode: false
        schema:
          type: string
        description: The remaining path to proxy to the provider
    get:
      summary: Proxy GET request to provider
      operationId: proxyGet
      responses:
        "200":
          $ref: "#/components/responses/ProviderResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "500":
          $ref: "#/components/responses/InternalError"
      security:
        - bearerAuth: []
    post:
      summary: Proxy POST request to provider
      operationId: proxyPost
      requestBody:
        $ref: "#/components/requestBodies/ProviderRequest"
      responses:
        "200":
          $ref: "#/components/responses/ProviderResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "500":
          $ref: "#/components/responses/InternalError"
      security:
        - bearerAuth: []
    put:
      summary: Proxy PUT request to provider
      operationId: proxyPut
      requestBody:
        $ref: "#/components/requestBodies/ProviderRequest"
      responses:
        "200":
          $ref: "#/components/responses/ProviderResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "500":
          $ref: "#/components/responses/InternalError"
      security:
        - bearerAuth: []
    delete:
      summary: Proxy DELETE request to provider
      operationId: proxyDelete
      responses:
        "200":
          $ref: "#/components/responses/ProviderResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "500":
          $ref: "#/components/responses/InternalError"
      security:
        - bearerAuth: []
    patch:
      summary: Proxy PATCH request to provider
      operationId: proxyPatch
      requestBody:
        $ref: "#/components/requestBodies/ProviderRequest"
      responses:
        "200":
          $ref: "#/components/responses/ProviderResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "500":
          $ref: "#/components/responses/InternalError"
      security:
        - bearerAuth: []
  /health:
    get:
      summary: Health check
      responses:
        "200":
          description: Health check successful
components:
  requestBodies:
    ProviderRequest:
      required: true
      description: |
        ProviderRequest depends on the specific provider and endpoint being called
        If you decide to use this approach, please follow the provider-specific documentations.
      content:
        application/json:
          schema:
            type: object
            properties:
              model:
                type: string
              messages:
                type: array
                items:
                  type: object
                  properties:
                    role:
                      type: string
                    content:
                      type: string
              temperature:
                type: number
                format: float64
                default: 0.7
            examples:
              - openai:
                  summary: OpenAI chat completion request
                  value:
                    model: "gpt-3.5-turbo"
                    messages:
                      - role: "user"
                        content: "Hello! How can I assist you today?"
                    temperature: 0.7
              - anthropic:
                  summary: Anthropic Claude request
                  value:
                    model: "claude-3-opus-20240229"
                    messages:
                      - role: "user"
                        content: "Explain quantum computing"
                    temperature: 0.5
  responses:
    BadRequest:
      description: Bad request
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/Error"
    Unauthorized:
      description: Unauthorized
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/Error"
    InternalError:
      description: Internal server error
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/Error"
    ProviderResponse:
      description: |
        ProviderResponse depends on the specific provider and endpoint being called
        If you decide to use this approach, please follow the provider-specific documentations.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ProviderSpecificResponse"
          examples:
            openai:
              summary: OpenAI API response
              value:
                {
                  "id": "chatcmpl-123",
                  "object": "chat.completion",
                  "created": 1677652288,
                  "model": "gpt-3.5-turbo",
                  "choices":
                    [
                      {
                        "index": 0,
                        "message":
                          {
                            "role": "assistant",
                            "content": "Hello! How can I help you today?",
                          },
                        "finish_reason": "stop",
                      },
                    ],
                }
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: |
        Authentication is optional by default.
        To enable authentication, set ENABLE_AUTH to true.
        When enabled, requests must include a valid JWT token in the Authorization header.
  schemas:
    Providers:
      type: string
      enum:
        - ollama
        - groq
        - openai
        - cloudflare
        - cohere
        - anthropic
    ProviderSpecificResponse:
      type: object
      description: |
        Provider-specific response format. Examples:

        OpenAI GET /v1/models response:
        ```json
        {
          "data": [
            {
              "id": "gpt-4",
              "object": "model",
              "created": 1687882410
            }
          ]
        }
        ```

        Anthropic GET /v1/models response:
        ```json
        {
          "models": [
            {
              "name": "claude-3-opus-20240229",
              "description": "Most capable model for highly complex tasks"
            }
          ]
        }
        ```
      additionalProperties: true
    ProviderAuthType:
      type: string
      description: Authentication type for providers
      enum:
        - bearer
        - xheader
        - query
        - none
    Error:
      type: object
      properties:
        error:
          type: string
    MessageRole:
      type: string
      description: Role of the message sender
      enum:
        - system
        - user
        - assistant
    Message:
      type: object
      description: Message structure for provider requests
      properties:
        role:
          $ref: "#/components/schemas/MessageRole"
        content:
          type: string
    Model:
      type: object
      description: Common model information
      properties:
        name:
          type: string
    ListModelsResponse:
      type: object
      description: Response structure for listing models
      properties:
        provider:
          $ref: "#/components/schemas/Providers"
        models:
          type: array
          items:
            $ref: "#/components/schemas/Model"
    GenerateRequest:
      type: object
      description: Request structure for token generation
      required:
        - model
        - messages
      properties:
        model:
          type: string
        messages:
          type: array
          items:
            $ref: "#/components/schemas/Message"
        stream:
          type: boolean
          default: false
          description: Whether to stream tokens as they are generated in raw json
        ssevents:
          type: boolean
          default: false
          description: |
            Whether to use Server-Sent Events for token generation.
            When enabled, the response will be streamed as SSE with the following event types:
              - message-start: Initial message event with assistant role
              - stream-start: Stream initialization
              - content-start: Content beginning
              - content-delta: Content update with new tokens
              - content-end: Content completion
              - message-end: Message completion
              - stream-end: Stream completion

              **Note:** Depending on the provider, some events may not be present.
    ResponseTokens:
      type: object
      description: Token response structure
      properties:
        role:
          type: string
        model:
          type: string
        content:
          type: string
    GenerateResponse:
      type: object
      description: Response structure for token generation
      properties:
        provider:
          type: string
        response:
          $ref: "#/components/schemas/ResponseTokens"
    Config:
      x-config:
        sections:
          - general:
              title: "General settings"
              settings:
                - name: application_name
                  env: "APPLICATION_NAME"
                  type: string
                  default: "inference-gateway"
                  description: "The name of the application"
                - name: environment
                  env: "ENVIRONMENT"
                  type: string
                  default: "production"
                  description: "The environment"
                - name: enable_telemetry
                  env: "ENABLE_TELEMETRY"
                  type: bool
                  default: "false"
                  description: "Enable telemetry"
                - name: enable_auth
                  env: "ENABLE_AUTH"
                  type: bool
                  default: "false"
                  description: "Enable authentication"
          - oidc:
              title: "OpenID Connect"
              settings:
                - name: issuer_url
                  env: "OIDC_ISSUER_URL"
                  type: string
                  default: "http://keycloak:8080/realms/inference-gateway-realm"
                  description: "OIDC issuer URL"
                - name: client_id
                  env: "OIDC_CLIENT_ID"
                  type: string
                  default: "inference-gateway-client"
                  description: "OIDC client ID"
                  secret: true
                - name: client_secret
                  env: "OIDC_CLIENT_SECRET"
                  type: string
                  description: "OIDC client secret"
                  secret: true
          - server:
              title: "Server settings"
              settings:
                - name: host
                  env: "SERVER_HOST"
                  type: string
                  default: "0.0.0.0"
                  description: "Server host"
                - name: port
                  env: "SERVER_PORT"
                  type: string
                  default: "8080"
                  description: "Server port"
                - name: read_timeout
                  env: "SERVER_READ_TIMEOUT"
                  type: time.Duration
                  default: "30s"
                  description: "Read timeout"
                - name: write_timeout
                  env: "SERVER_WRITE_TIMEOUT"
                  type: time.Duration
                  default: "30s"
                  description: "Write timeout"
                - name: idle_timeout
                  env: "SERVER_IDLE_TIMEOUT"
                  type: time.Duration
                  default: "120s"
                  description: "Idle timeout"
                - name: tls_cert_path
                  env: "SERVER_TLS_CERT_PATH"
                  type: string
                  description: "TLS certificate path"
                - name: tls_key_path
                  env: "SERVER_TLS_KEY_PATH"
                  type: string
                  description: "TLS key path"
          - client:
              title: "Client settings"
              settings:
                - name: timeout
                  env: "CLIENT_TIMEOUT"
                  type: time.Duration
                  default: "30s"
                  description: "Client timeout"
                - name: max_idle_conns
                  env: "CLIENT_MAX_IDLE_CONNS"
                  type: int
                  default: "20"
                  description: "Maximum idle connections"
                - name: max_idle_conns_per_host
                  env: "CLIENT_MAX_IDLE_CONNS_PER_HOST"
                  type: int
                  default: "20"
                  description: "Maximum idle connections per host"
                - name: idle_conn_timeout
                  env: "CLIENT_IDLE_CONN_TIMEOUT"
                  type: time.Duration
                  default: "30s"
                  description: "Idle connection timeout"
                - name: tls_min_version
                  env: "CLIENT_TLS_MIN_VERSION"
                  type: string
                  default: "TLS12"
                  description: "Minimum TLS version"
          - providers:
              title: "Providers"
              settings:
                - name: anthropic_api_url
                  env: "ANTHROPIC_API_URL"
                  type: string
                  default: "https://api.anthropic.com"
                  description: "Anthropic API URL"
                - name: anthropic_api_key
                  env: "ANTHROPIC_API_KEY"
                  type: string
                  description: "Anthropic API Key"
                  secret: true
                - name: cloudflare_api_url
                  env: "CLOUDFLARE_API_URL"
                  type: string
                  default: "https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}"
                  description: "Cloudflare API URL"
                - name: cloudflare_api_key
                  env: "CLOUDFLARE_API_KEY"
                  type: string
                  description: "Cloudflare API Key"
                  secret: true
                - name: cohere_api_url
                  env: "COHERE_API_URL"
                  type: string
                  default: "https://api.cohere.com"
                  description: "Cohere API URL"
                - name: cohere_api_key
                  env: "COHERE_API_KEY"
                  type: string
                  description: "Cohere API Key"
                  secret: true
                - name: groq_api_url
                  env: "GROQ_API_URL"
                  type: string
                  default: "https://api.groq.com"
                  description: "Groq API URL"
                - name: groq_api_key
                  env: "GROQ_API_KEY"
                  type: string
                  description: "Groq API Key"
                  secret: true
                - name: ollama_api_url
                  env: "OLLAMA_API_URL"
                  type: string
                  default: "http://ollama:8080"
                  description: "Ollama API URL"
                - name: ollama_api_key
                  env: "OLLAMA_API_KEY"
                  type: string
                  description: "Ollama API Key"
                  secret: true
                - name: openai_api_url
                  env: "OPENAI_API_URL"
                  type: string
                  default: "https://api.openai.com"
                  description: "OpenAI API URL"
                - name: openai_api_key
                  env: "OPENAI_API_KEY"
                  type: string
                  description: "OpenAI API Key"
                  secret: true
      x-provider-configs:
        ollama:
          id: "ollama"
          url: "http://ollama:8080"
          auth_type: "none"
          endpoints:
            list:
              endpoint: "/api/tags"
              method: "GET"
              schema:
                response:
                  type: object
                  properties:
                    models:
                      type: array
                      items:
                        type: object
                        properties:
                          name:
                            type: string
                          modified_at:
                            type: string
                          size:
                            type: integer
                          digest:
                            type: string
                          details:
                            type: object
                            properties:
                              format:
                                type: string
                              family:
                                type: string
                              families:
                                type: array
                                items:
                                  type: string
                              parameter_size:
                                type: string
            generate:
              endpoint: "/api/generate"
              method: "POST"
              schema:
                request:
                  type: object
                  properties:
                    model:
                      type: string
                    prompt:
                      type: string
                    stream:
                      type: boolean
                    system:
                      type: string
                    temperature:
                      type: number
                      format: float64
                      default: 0.7
                response:
                  type: object
                  properties:
                    provider:
                      type: string
                    response:
                      type: object
                      properties:
                        role:
                          type: string
                        model:
                          type: string
                        content:
                          type: string
        openai:
          id: "openai"
          url: "https://api.openai.com"
          auth_type: "bearer"
          endpoints:
            list:
              endpoint: "/v1/models"
              method: "GET"
              schema:
                response:
                  type: object
                  properties:
                    object:
                      type: string
                    data:
                      type: array
                      items:
                        type: object
                        properties:
                          id:
                            type: string
                          object:
                            type: string
                          created:
                            type: integer
                            format: int64
                          owned_by:
                            type: string
                          permission:
                            type: array
                            items:
                              type: object
                              properties:
                                id:
                                  type: string
                                object:
                                  type: string
                                created:
                                  type: integer
                                  format: int64
                                allow_create_engine:
                                  type: boolean
                                allow_sampling:
                                  type: boolean
                                allow_logprobs:
                                  type: boolean
                                allow_search_indices:
                                  type: boolean
                                allow_view:
                                  type: boolean
                                allow_fine_tuning:
                                  type: boolean
                          root:
                            type: string
                          parent:
                            type: string
            generate:
              endpoint: "/v1/chat/completions"
              method: "POST"
              schema:
                request:
                  type: object
                  properties:
                    model:
                      type: string
                    messages:
                      type: array
                      items:
                        type: object
                        properties:
                          role:
                            type: string
                          content:
                            type: string
                    temperature:
                      type: number
                      format: float64
                      default: 0.7
                response:
                  type: object
                  properties:
                    model:
                      type: string
                    choices:
                      type: array
                      items:
                        type: object
                        properties:
                          message:
                            type: object
                            properties:
                              role:
                                type: string
                              content:
                                type: string
        groq:
          id: "groq"
          url: "https://api.groq.com"
          auth_type: "bearer"
          endpoints:
            list:
              endpoint: "/openai/v1/models"
              method: "GET"
              schema:
                response:
                  type: object
                  properties:
                    object:
                      type: string
                    data:
                      type: array
                      items:
                        type: object
                        properties:
                          id:
                            type: string
                          object:
                            type: string
                          created:
                            type: integer
                            format: int64
                          owned_by:
                            type: string
                          active:
                            type: boolean
                          context_window:
                            type: integer
                          public_apps:
                            type: object
            generate:
              endpoint: "/openai/v1/chat/completions"
              method: "POST"
              schema:
                request:
                  type: object
                  properties:
                    model:
                      type: string
                    messages:
                      type: array
                      items:
                        type: object
                        properties:
                          role:
                            type: string
                          content:
                            type: string
                    temperature:
                      type: number
                      format: float64
                      default: 0.7
                response:
                  type: object
                  properties:
                    model:
                      type: string
                    choices:
                      type: array
                      items:
                        type: object
                        properties:
                          message:
                            type: object
                            properties:
                              role:
                                type: string
                              content:
                                type: string
        cloudflare:
          id: "cloudflare"
          url: "https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}"
          auth_type: "bearer"
          endpoints:
            list:
              endpoint: "/ai/finetunes/public"
              method: "GET"
              schema:
                response:
                  type: object
                  properties:
                    result:
                      type: array
                      items:
                        type: object
                        properties:
                          id:
                            type: string
                          name:
                            type: string
                          description:
                            type: string
                          created_at:
                            type: string
                          modified_at:
                            type: string
                          public:
                            type: integer
                          model:
                            type: string
            generate:
              endpoint: "/v1/chat/completions"
              method: "POST"
              schema:
                request:
                  type: object
                  properties:
                    prompt:
                      type: string
                    model:
                      type: string
                    temperature:
                      type: number
                      format: float64
                      default: 0.7
                response:
                  type: object
                  properties:
                    result:
                      type: object
                      properties:
                        response:
                          type: string
        cohere:
          id: "cohere"
          url: "https://api.cohere.com"
          auth_type: "bearer"
          endpoints:
            list:
              endpoint: "/v1/models"
              method: "GET"
              schema:
                response:
                  type: object
                  properties:
                    models:
                      type: array
                      items:
                        type: object
                        properties:
                          name:
                            type: string
                          endpoints:
                            type: array
                            items:
                              type: string
                          finetuned:
                            type: boolean
                          context_length:
                            type: number
                            format: float64
                          tokenizer_url:
                            type: string
                          default_endpoints:
                            type: array
                            items:
                              type: string
                    next_page_token:
                      type: string
            generate:
              endpoint: "/v2/chat"
              method: "POST"
              schema:
                request:
                  type: object
                  properties:
                    model:
                      type: string
                    messages:
                      type: array
                      items:
                        type: object
                        properties:
                          role:
                            type: string
                          content:
                            type: string
                    temperature:
                      type: number
                      format: float64
                      default: 0.7
                response:
                  type: object
                  properties:
                    message:
                      type: object
                      properties:
                        role:
                          type: string
                        content:
                          type: array
                          items:
                            type: object
                            properties:
                              type:
                                type: string
                              text:
                                type: string
        anthropic:
          id: "anthropic"
          url: "https://api.anthropic.com"
          auth_type: "xheader"
          extra_headers:
            anthropic-version: "2023-06-01"
          endpoints:
            list:
              endpoint: "/v1/models"
              method: "GET"
              schema:
                response:
                  type: object
                  properties:
                    models:
                      type: array
                      items:
                        type: object
                        properties:
                          type:
                            type: string
                          id:
                            type: string
                          display_name:
                            type: string
                          created_at:
                            type: string
                    has_more:
                      type: boolean
                    first_id:
                      type: string
                    last_id:
                      type: string
            generate:
              endpoint: "/v1/messages"
              method: "POST"
              schema:
                request:
                  type: object
                  properties:
                    model:
                      type: string
                    messages:
                      type: array
                      items:
                        type: object
                        properties:
                          role:
                            type: string
                          content:
                            type: string
                    temperature:
                      type: number
                      format: float64
                      default: 0.7
                response:
                  type: object
                  properties:
                    model:
                      type: string
                    choices:
                      type: array
                      items:
                        type: object
                        properties:
                          message:
                            type: object
                            properties:
                              role:
                                type: string
                              content:
                                type: string