1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# FlyLLM Configuration Example
# Copy this file to flyllm.toml and configure your providers
# =============================================================================
# GLOBAL SETTINGS
# =============================================================================
[]
# Load balancing strategy: "lru" (default), "lowest_latency", or "random"
= "lru"
# Maximum retry attempts for failed requests (default: 5)
= 3
# Optional: Enable debug logging to a folder
# debug_folder = "./debug"
# =============================================================================
# TASK DEFINITIONS
# Define reusable parameter sets that can be assigned to providers
# =============================================================================
[[]]
= "summary"
= 500
= 0.3
[[]]
= "creative_writing"
= 2000
= 0.9
[[]]
= "code_generation"
= 4000
= 0.2
[[]]
= "chat"
= 1000
= 0.7
# =============================================================================
# PROVIDER INSTANCES
# Configure your LLM providers here
# API keys can use environment variables: "${VAR_NAME}"
# =============================================================================
# --- OpenAI ---
[[]]
= "openai"
= "gpt-4-turbo"
= "${OPENAI_API_KEY}"
= ["summary", "code_generation", "chat"]
= true
# You can add multiple instances of the same provider with different keys
# [[providers]]
# type = "openai"
# model = "gpt-4-turbo"
# api_key = "${OPENAI_API_KEY_SECONDARY}"
# name = "openai-backup" # Optional: friendly name for this instance
# tasks = ["chat"]
# --- Anthropic (Claude) ---
[[]]
= "anthropic"
= "claude-3-sonnet-20240229"
= "${ANTHROPIC_API_KEY}"
= ["creative_writing", "summary", "chat"]
= true
# --- Mistral ---
# [[providers]]
# type = "mistral"
# model = "mistral-large-latest"
# api_key = "${MISTRAL_API_KEY}"
# tasks = ["summary", "chat"]
# enabled = true
# --- Google (Gemini) ---
# [[providers]]
# type = "google"
# model = "gemini-pro"
# api_key = "${GOOGLE_API_KEY}"
# tasks = ["summary", "chat"]
# enabled = true
# --- Groq (Fast Inference) ---
# [[providers]]
# type = "groq"
# model = "llama-3.1-70b-versatile"
# api_key = "${GROQ_API_KEY}"
# tasks = ["chat", "summary"]
# enabled = true
# --- Together AI (Open Source Models) ---
# [[providers]]
# type = "togetherai"
# model = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
# api_key = "${TOGETHER_API_KEY}"
# tasks = ["chat"]
# enabled = true
# --- Cohere ---
# [[providers]]
# type = "cohere"
# model = "command-r-plus"
# api_key = "${COHERE_API_KEY}"
# tasks = ["summary"]
# enabled = true
# --- Perplexity (Search-Augmented) ---
# [[providers]]
# type = "perplexity"
# model = "sonar"
# api_key = "${PERPLEXITY_API_KEY}"
# tasks = ["chat"]
# enabled = true
# --- Ollama (Local) ---
# No API key required for local Ollama
# [[providers]]
# type = "ollama"
# model = "llama3"
# api_key = ""
# endpoint = "http://localhost:11434" # Custom endpoint (optional)
# tasks = ["chat", "summary"]
# enabled = true
# --- LM Studio (Local) ---
# No API key required for local LM Studio
# [[providers]]
# type = "lmstudio"
# model = "local-model"
# api_key = ""
# endpoint = "http://localhost:1234" # Custom endpoint (optional)
# tasks = ["code_generation"]
# enabled = true