edgequake_llm/
lib.rs

1//! EdgeQuake LLM - LLM and Embedding Provider Abstraction
2//!
3//! # Implements
4//!
5//! - **FEAT0017**: Multi-Provider LLM Support
6//! - **FEAT0018**: Embedding Provider Abstraction
7//! - **FEAT0019**: LLM Response Caching
8//! - **FEAT0020**: API Rate Limiting
9//! - **FEAT0005**: Embedding Generation (via providers)
10//!
11//! # Enforces
12//!
13//! - **BR0301**: LLM API rate limits (configurable per provider)
14//! - **BR0302**: Document size limits (context window awareness)
15//! - **BR0303**: Cost tracking per request
16//! - **BR0010**: Embedding dimension validated (1536 default)
17//!
18//! This crate provides traits and implementations for:
19//! - Text completion (LLM providers)
20//! - Text embedding (embedding providers)
21//! - Token counting and management
22//! - Rate limiting for API calls
23//! - Response caching for cost reduction
24//!
25//! # Providers
26//!
27//! | Provider | FEAT0017 | Chat | Embeddings | Notes |
28//! |----------|----------|------|------------|-------|
29//! | OpenAI | ✓ | ✓ | ✓ | Primary production provider |
30//! | Azure OpenAI | ✓ | ✓ | ✓ | Enterprise deployments |
31//! | Ollama | ✓ | ✓ | ✓ | Local/on-prem models |
32//! | LM Studio | ✓ | ✓ | ✓ | Local OpenAI-compatible API |
33//! | Gemini | ✓ | ✓ | ✓ | Google AI |
34//! | Mock | ✓ | ✓ | ✓ | Testing (no API calls) |
35//!
36//! # Architecture
37//!
38//! The crate uses trait-based abstraction to support multiple LLM backends:
39//! - OpenAI (GPT-4, GPT-3.5)
40//! - OpenAI-compatible APIs (Ollama, LM Studio, etc.)
41//! - Anthropic (Claude 3.5, Claude 3)
42//! - Future: Mistral, local models
43//!
44//! # Example
45//!
46//! ```ignore
47//! use edgequake_llm::{LLMProvider, OpenAIProvider};
48//!
49//! let provider = OpenAIProvider::new("your-api-key");
50//! let response = provider.complete("Hello, world!").await?;
51//! ```
52//!
53//! # See Also
54//!
55//! - [`crate::traits`] for provider trait definitions
56//! - [`crate::providers`] for concrete implementations
57//! - [`crate::cache`] for response caching
58
59pub mod cache;
60pub mod cache_prompt;
61pub mod cost_tracker; // OODA-21: Session-level cost tracking
62pub mod error;
63pub mod factory;
64pub mod inference_metrics; // OODA-33: Unified streaming metrics
65pub mod middleware;
66pub mod model_config;
67pub mod providers;
68pub mod rate_limiter;
69pub mod registry;
70pub mod reranker;
71pub mod retry;
72pub mod tokenizer;
73pub mod traits;
74
75pub use cache::{CacheConfig, CacheStats, CachedProvider, LLMCache};
76pub use cache_prompt::{
77    apply_cache_control, parse_cache_stats, CachePromptConfig, CacheStats as PromptCacheStats,
78};
79pub use cost_tracker::{
80    format_cost, format_tokens, CostEntry, CostSummary, ModelPricing, SessionCostTracker,
81};
82pub use error::{LlmError, Result, RetryStrategy};
83pub use factory::{ProviderFactory, ProviderType};
84pub use inference_metrics::InferenceMetrics; // OODA-33
85pub use middleware::{
86    LLMMiddleware, LLMMiddlewareStack, LLMRequest, LogLevel, LoggingLLMMiddleware,
87    MetricsLLMMiddleware, MetricsSummary,
88};
89pub use model_config::{
90    DefaultsConfig, ModelCapabilities, ModelCard, ModelConfigError, ModelCost, ModelType,
91    ModelsConfig, ProviderConfig, ProviderType as ConfigProviderType,
92};
93pub use providers::azure_openai::AzureOpenAIProvider;
94pub use providers::gemini::GeminiProvider;
95pub use providers::jina::JinaProvider;
96pub use providers::lmstudio::LMStudioProvider;
97pub use providers::mock::MockProvider;
98pub use providers::ollama::{
99    OllamaModelDetails, OllamaModelInfo, OllamaModelsResponse, OllamaProvider,
100};
101pub use providers::openai::OpenAIProvider;
102// FEAT-007: Mistral AI provider
103pub use providers::mistral::MistralProvider;
104// FEAT-020: AWS Bedrock provider (feature-gated)
105#[cfg(feature = "bedrock")]
106pub use providers::bedrock::BedrockProvider;
107// OODA-01: Anthropic (Claude) provider
108pub use providers::anthropic::AnthropicProvider;
109// OODA-02: OpenRouter provider (200+ models)
110// OODA-72: Dynamic model discovery with caching
111pub use providers::openrouter::{
112    ModelArchitecture as OpenRouterModelArchitecture, ModelInfo as OpenRouterModelInfo,
113    ModelPricing as OpenRouterModelPricing, ModelsResponse as OpenRouterModelsResponse,
114    OpenRouterProvider,
115};
116// OODA-200: Configurable OpenAI-compatible provider
117pub use providers::openai_compatible::OpenAICompatibleProvider;
118// OODA-71: xAI Grok provider (api.x.ai)
119pub use providers::vscode::{
120    Model as CopilotModel, ModelsResponse as CopilotModelsResponse, VsCodeCopilotProvider,
121};
122pub use providers::xai::XAIProvider;
123pub use rate_limiter::{RateLimitedProvider, RateLimiter, RateLimiterConfig};
124pub use registry::ProviderRegistry;
125pub use reranker::{
126    BM25Reranker, HttpReranker, HybridReranker, MockReranker, RRFReranker, RerankConfig,
127    RerankResult, Reranker, ScoreAggregation, TermOverlapReranker,
128};
129pub use retry::RetryExecutor;
130pub use tokenizer::Tokenizer;
131pub use traits::{
132    CacheControl, ChatMessage, ChatRole, CompletionOptions, EmbeddingProvider, FunctionCall,
133    FunctionDefinition, ImageData, LLMProvider, LLMResponse, ToolCall, ToolChoice, ToolDefinition,
134    ToolResult,
135};
edgequake_llm/lib.rs

edgequake_llm/
lib.rs