omnillm 0.1.5

Production-grade LLM API gateway with multi-key load balancing, per-key rate limiting, circuit breaking, and cost tracking
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
use crate::provider_registry::SupportLevel;

use super::{
    PrimitiveBudgetClass, PrimitiveEndpointKind, PrimitiveEndpointSupport,
    PrimitiveProviderDescriptor, PrimitiveProviderKind, PrimitiveProviderRegistry,
    PrimitiveStreamMode, PrimitiveSupportTier, ProviderPrimitiveWireFormat,
};

pub fn embedded_primitive_provider_registry() -> PrimitiveProviderRegistry {
    PrimitiveProviderRegistry {
        providers: vec![
            PrimitiveProviderDescriptor {
                kind: PrimitiveProviderKind::OpenAi,
                default_base_url: Some("https://api.openai.com/v1".into()),
                endpoints: vec![
                    support(
                        PrimitiveEndpointKind::Responses,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiResponses],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::ChatCompletions,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiChatCompletions],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::Images,
                        SupportLevel::Native,
                        &[
                            ProviderPrimitiveWireFormat::OpenAiImages,
                            ProviderPrimitiveWireFormat::OpenAiImageEdits,
                            ProviderPrimitiveWireFormat::OpenAiImageVariations,
                        ],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Realtime,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiRealtime],
                        &[PrimitiveStreamMode::WebSocket, PrimitiveStreamMode::WebRtc],
                    ),
                    support(
                        PrimitiveEndpointKind::AudioTranscriptions,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiAudioTranscriptions],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::AudioTranslations,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiAudioTranslations],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::AudioSpeech,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiAudioSpeech],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::BinaryChunks],
                    ),
                    support(
                        PrimitiveEndpointKind::Embeddings,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiEmbeddings],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Files,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiFiles],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Uploads,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiUploads],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Models,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiModels],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Batches,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::OpenAiBatches],
                        &[PrimitiveStreamMode::None],
                    ),
                ],
            },
            PrimitiveProviderDescriptor {
                kind: PrimitiveProviderKind::Anthropic,
                default_base_url: Some("https://api.anthropic.com/v1".into()),
                endpoints: vec![
                    support(
                        PrimitiveEndpointKind::Messages,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::AnthropicMessages],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::CountTokens,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::AnthropicCountTokens],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Batches,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::AnthropicMessageBatches],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Files,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::AnthropicFiles],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Models,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::AnthropicModels],
                        &[PrimitiveStreamMode::None],
                    ),
                ],
            },
            PrimitiveProviderDescriptor {
                kind: PrimitiveProviderKind::Gemini,
                default_base_url: Some("https://generativelanguage.googleapis.com/v1beta".into()),
                endpoints: vec![
                    support(
                        PrimitiveEndpointKind::Messages,
                        SupportLevel::Native,
                        &[
                            ProviderPrimitiveWireFormat::GeminiGenerateContent,
                            ProviderPrimitiveWireFormat::GeminiStreamGenerateContent,
                        ],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::CountTokens,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::GeminiCountTokens],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Embeddings,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::GeminiEmbedContent],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Live,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::GeminiLive],
                        &[PrimitiveStreamMode::WebSocket],
                    ),
                    support(
                        PrimitiveEndpointKind::Files,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::GeminiFiles],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Caches,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::GeminiCaches],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Models,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::GeminiModels],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Operations,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::GeminiOperations],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Batches,
                        SupportLevel::Native,
                        &[ProviderPrimitiveWireFormat::GeminiBatches],
                        &[PrimitiveStreamMode::None],
                    ),
                ],
            },
            PrimitiveProviderDescriptor {
                kind: PrimitiveProviderKind::AzureOpenAi,
                default_base_url: None,
                endpoints: vec![
                    support(
                        PrimitiveEndpointKind::Responses,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::OpenAiResponses],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::ChatCompletions,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::OpenAiChatCompletions],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::Images,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::OpenAiImages],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::AudioTranscriptions,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::OpenAiAudioTranscriptions],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::AudioSpeech,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::OpenAiAudioSpeech],
                        &[PrimitiveStreamMode::None],
                    ),
                ],
            },
            PrimitiveProviderDescriptor {
                kind: PrimitiveProviderKind::VertexAi,
                default_base_url: None,
                endpoints: vec![
                    support(
                        PrimitiveEndpointKind::Messages,
                        SupportLevel::Compatible,
                        &[
                            ProviderPrimitiveWireFormat::GeminiGenerateContent,
                            ProviderPrimitiveWireFormat::GeminiStreamGenerateContent,
                        ],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::CountTokens,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::GeminiCountTokens],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Embeddings,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::GeminiEmbedContent],
                        &[PrimitiveStreamMode::None],
                    ),
                    support(
                        PrimitiveEndpointKind::Rerank,
                        SupportLevel::Planned,
                        &[],
                        &[],
                    ),
                ],
            },
            PrimitiveProviderDescriptor {
                kind: PrimitiveProviderKind::Bedrock,
                default_base_url: None,
                endpoints: vec![
                    support(
                        PrimitiveEndpointKind::Messages,
                        SupportLevel::Planned,
                        &[],
                        &[],
                    ),
                    support(
                        PrimitiveEndpointKind::Custom,
                        SupportLevel::Planned,
                        &[],
                        &[],
                    ),
                ],
            },
            PrimitiveProviderDescriptor {
                kind: PrimitiveProviderKind::OpenAiCompatible,
                default_base_url: None,
                endpoints: vec![
                    support(
                        PrimitiveEndpointKind::ChatCompletions,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::OpenAiCompatibleChatCompletions],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::Responses,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::OpenAiResponses],
                        &[PrimitiveStreamMode::None, PrimitiveStreamMode::Sse],
                    ),
                    support(
                        PrimitiveEndpointKind::Embeddings,
                        SupportLevel::Compatible,
                        &[ProviderPrimitiveWireFormat::OpenAiEmbeddings],
                        &[PrimitiveStreamMode::None],
                    ),
                ],
            },
            PrimitiveProviderDescriptor {
                kind: PrimitiveProviderKind::Custom,
                default_base_url: None,
                endpoints: vec![support(
                    PrimitiveEndpointKind::Custom,
                    SupportLevel::Compatible,
                    &[ProviderPrimitiveWireFormat::CustomHttp],
                    &[
                        PrimitiveStreamMode::None,
                        PrimitiveStreamMode::Sse,
                        PrimitiveStreamMode::BinaryChunks,
                    ],
                )],
            },
        ],
    }
}

fn support(
    endpoint: PrimitiveEndpointKind,
    level: SupportLevel,
    wire_formats: &[ProviderPrimitiveWireFormat],
    stream_modes: &[PrimitiveStreamMode],
) -> PrimitiveEndpointSupport {
    PrimitiveEndpointSupport {
        endpoint,
        level,
        wire_formats: wire_formats.to_vec(),
        stream_modes: stream_modes.to_vec(),
        scope_tier: infer_scope_tier(endpoint, wire_formats, stream_modes),
        budget_class: infer_budget_class(endpoint, wire_formats),
    }
}

fn infer_scope_tier(
    endpoint: PrimitiveEndpointKind,
    wire_formats: &[ProviderPrimitiveWireFormat],
    stream_modes: &[PrimitiveStreamMode],
) -> PrimitiveSupportTier {
    if stream_modes.iter().any(|mode| {
        matches!(
            mode,
            PrimitiveStreamMode::WebSocket
                | PrimitiveStreamMode::WebRtc
                | PrimitiveStreamMode::BinaryChunks
        )
    }) {
        return PrimitiveSupportTier::P3TransportExpansion;
    }

    if matches!(endpoint, PrimitiveEndpointKind::Batches) {
        return PrimitiveSupportTier::P2AsyncJobLifecycle;
    }

    if matches!(
        endpoint,
        PrimitiveEndpointKind::Files
            | PrimitiveEndpointKind::Models
            | PrimitiveEndpointKind::Operations
            | PrimitiveEndpointKind::Caches
            | PrimitiveEndpointKind::Uploads
    ) {
        return PrimitiveSupportTier::P1LowRiskHttpGaps;
    }

    if wire_formats.iter().any(|wire_format| {
        matches!(
            wire_format,
            ProviderPrimitiveWireFormat::GeminiFiles
                | ProviderPrimitiveWireFormat::GeminiCaches
                | ProviderPrimitiveWireFormat::AnthropicFiles
        )
    }) {
        return PrimitiveSupportTier::P1LowRiskHttpGaps;
    }

    PrimitiveSupportTier::P0KeepAndHarden
}

pub(super) fn infer_budget_class(
    endpoint: PrimitiveEndpointKind,
    wire_formats: &[ProviderPrimitiveWireFormat],
) -> PrimitiveBudgetClass {
    if matches!(
        endpoint,
        PrimitiveEndpointKind::Models
            | PrimitiveEndpointKind::Operations
            | PrimitiveEndpointKind::Caches
    ) {
        return PrimitiveBudgetClass::MetadataOrControlPlaneZeroCost;
    }

    if matches!(
        endpoint,
        PrimitiveEndpointKind::Files | PrimitiveEndpointKind::Uploads
    ) {
        return PrimitiveBudgetClass::UploadOrStorage;
    }

    if matches!(
        endpoint,
        PrimitiveEndpointKind::Images
            | PrimitiveEndpointKind::AudioTranscriptions
            | PrimitiveEndpointKind::AudioSpeech
    ) {
        return PrimitiveBudgetClass::BillableUnitMetered;
    }

    if wire_formats.iter().any(|wire_format| {
        matches!(
            wire_format,
            ProviderPrimitiveWireFormat::OpenAiImages
                | ProviderPrimitiveWireFormat::OpenAiImageEdits
                | ProviderPrimitiveWireFormat::OpenAiImageVariations
                | ProviderPrimitiveWireFormat::OpenAiAudioTranscriptions
                | ProviderPrimitiveWireFormat::OpenAiAudioTranslations
                | ProviderPrimitiveWireFormat::OpenAiAudioSpeech
        )
    }) {
        return PrimitiveBudgetClass::BillableUnitMetered;
    }

    PrimitiveBudgetClass::TokenMetered
}