Skip to main content

spn_core/
providers.rs

1//! Provider definitions for LLM and MCP services.
2//!
3//! This module is the **single source of truth** for all provider metadata
4//! across the SuperNovae ecosystem.
5
6/// Category of provider service.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
8#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
9pub enum ProviderCategory {
10    /// LLM inference providers (Anthropic, OpenAI, etc.)
11    Llm,
12    /// MCP service providers (Neo4j, GitHub, etc.)
13    Mcp,
14    /// Local model runners (Ollama)
15    Local,
16}
17
18/// Provider metadata.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
20pub struct Provider {
21    /// Unique identifier (e.g., "anthropic", "openai")
22    pub id: &'static str,
23    /// Human-readable name (e.g., "Anthropic Claude")
24    pub name: &'static str,
25    /// Environment variable name (e.g., "ANTHROPIC_API_KEY")
26    pub env_var: &'static str,
27    /// Expected key prefix for validation (e.g., "sk-ant-")
28    pub key_prefix: Option<&'static str>,
29    /// Provider category
30    pub category: ProviderCategory,
31    /// Whether this provider requires an API key
32    pub requires_key: bool,
33    /// Description of the provider
34    pub description: &'static str,
35}
36
37/// All known providers in the SuperNovae ecosystem.
38///
39/// This constant is the **single source of truth** for provider definitions.
40/// It replaces the duplicated PROVIDERS arrays in nika and spn.
41pub static KNOWN_PROVIDERS: &[Provider] = &[
42    // ==================== LLM Providers ====================
43    Provider {
44        id: "anthropic",
45        name: "Anthropic Claude",
46        env_var: "ANTHROPIC_API_KEY",
47        key_prefix: Some("sk-ant-"),
48        category: ProviderCategory::Llm,
49        requires_key: true,
50        description: "Claude models (Opus, Sonnet, Haiku)",
51    },
52    Provider {
53        id: "openai",
54        name: "OpenAI GPT",
55        env_var: "OPENAI_API_KEY",
56        key_prefix: Some("sk-"),
57        category: ProviderCategory::Llm,
58        requires_key: true,
59        description: "GPT-4, GPT-3.5, and other OpenAI models",
60    },
61    Provider {
62        id: "mistral",
63        name: "Mistral AI",
64        env_var: "MISTRAL_API_KEY",
65        key_prefix: None,
66        category: ProviderCategory::Llm,
67        requires_key: true,
68        description: "Mistral and Mixtral models",
69    },
70    Provider {
71        id: "groq",
72        name: "Groq",
73        env_var: "GROQ_API_KEY",
74        key_prefix: Some("gsk_"),
75        category: ProviderCategory::Llm,
76        requires_key: true,
77        description: "Ultra-fast inference with Groq LPU",
78    },
79    Provider {
80        id: "deepseek",
81        name: "DeepSeek",
82        env_var: "DEEPSEEK_API_KEY",
83        key_prefix: Some("sk-"),
84        category: ProviderCategory::Llm,
85        requires_key: true,
86        description: "DeepSeek Coder and Chat models",
87    },
88    Provider {
89        id: "gemini",
90        name: "Google Gemini",
91        env_var: "GEMINI_API_KEY",
92        key_prefix: None,
93        category: ProviderCategory::Llm,
94        requires_key: true,
95        description: "Gemini Pro and Ultra models",
96    },
97    Provider {
98        id: "ollama",
99        name: "Ollama",
100        env_var: "OLLAMA_API_BASE_URL",
101        key_prefix: None,
102        category: ProviderCategory::Local,
103        requires_key: false,
104        description: "Local model runner (llama, mistral, etc.) [DEPRECATED: use native]",
105    },
106    Provider {
107        id: "native",
108        name: "Native (mistral.rs)",
109        env_var: "NATIVE_MODEL_PATH",
110        key_prefix: None,
111        category: ProviderCategory::Local,
112        requires_key: false,
113        description: "Local inference via mistral.rs (in-process)",
114    },
115    // ==================== Cloud LLM Providers (rig-core unsupported) ====================
116    Provider {
117        id: "cohere",
118        name: "Cohere",
119        env_var: "COHERE_API_KEY",
120        key_prefix: None,
121        category: ProviderCategory::Llm,
122        requires_key: true,
123        description: "Cohere Command and Embed models",
124    },
125    Provider {
126        id: "together",
127        name: "Together AI",
128        env_var: "TOGETHER_API_KEY",
129        key_prefix: None,
130        category: ProviderCategory::Llm,
131        requires_key: true,
132        description: "Together AI inference platform",
133    },
134    Provider {
135        id: "fireworks",
136        name: "Fireworks AI",
137        env_var: "FIREWORKS_API_KEY",
138        key_prefix: None,
139        category: ProviderCategory::Llm,
140        requires_key: true,
141        description: "Fireworks AI fast inference",
142    },
143    Provider {
144        id: "cerebras",
145        name: "Cerebras",
146        env_var: "CEREBRAS_API_KEY",
147        key_prefix: None,
148        category: ProviderCategory::Llm,
149        requires_key: true,
150        description: "Cerebras ultra-fast inference",
151    },
152    // NOTE: Replicate and Anyscale NOT included:
153    // - Replicate: NOT a chat API (async prediction polling)
154    // - Anyscale: SERVICE SUNSET June 2024
155    // ==================== MCP Service Providers ====================
156    Provider {
157        id: "neo4j",
158        name: "Neo4j Graph Database",
159        env_var: "NEO4J_PASSWORD",
160        key_prefix: None,
161        category: ProviderCategory::Mcp,
162        requires_key: true,
163        description: "Graph database for knowledge storage",
164    },
165    Provider {
166        id: "github",
167        name: "GitHub API",
168        env_var: "GITHUB_TOKEN",
169        key_prefix: Some("ghp_"),
170        category: ProviderCategory::Mcp,
171        requires_key: true,
172        description: "GitHub API access",
173    },
174    Provider {
175        id: "slack",
176        name: "Slack API",
177        env_var: "SLACK_BOT_TOKEN",
178        key_prefix: Some("xoxb-"),
179        category: ProviderCategory::Mcp,
180        requires_key: true,
181        description: "Slack workspace integration",
182    },
183    Provider {
184        id: "perplexity",
185        name: "Perplexity AI",
186        env_var: "PERPLEXITY_API_KEY",
187        key_prefix: Some("pplx-"),
188        category: ProviderCategory::Mcp,
189        requires_key: true,
190        description: "AI-powered web search",
191    },
192    Provider {
193        id: "firecrawl",
194        name: "Firecrawl",
195        env_var: "FIRECRAWL_API_KEY",
196        key_prefix: Some("fc-"),
197        category: ProviderCategory::Mcp,
198        requires_key: true,
199        description: "Web scraping and crawling",
200    },
201    Provider {
202        id: "supadata",
203        name: "Supadata API",
204        env_var: "SUPADATA_API_KEY",
205        key_prefix: None,
206        category: ProviderCategory::Mcp,
207        requires_key: true,
208        description: "Video transcription and web scraping",
209    },
210    Provider {
211        id: "dataforseo",
212        name: "DataForSEO",
213        env_var: "DATAFORSEO_API_KEY",
214        key_prefix: None,
215        category: ProviderCategory::Mcp,
216        requires_key: true,
217        description: "SEO data, keyword research, SERP analysis",
218    },
219    Provider {
220        id: "ahrefs",
221        name: "Ahrefs API",
222        env_var: "AHREFS_API_KEY",
223        key_prefix: None,
224        category: ProviderCategory::Mcp,
225        requires_key: true,
226        description: "Backlink analysis and SEO metrics",
227    },
228];
229
230/// Find a provider by ID (case-insensitive).
231///
232/// # Example
233///
234/// ```
235/// use spn_core::find_provider;
236///
237/// let provider = find_provider("anthropic").unwrap();
238/// assert_eq!(provider.env_var, "ANTHROPIC_API_KEY");
239///
240/// let provider = find_provider("OPENAI").unwrap();
241/// assert_eq!(provider.id, "openai");
242/// ```
243#[must_use]
244pub fn find_provider(id: &str) -> Option<&'static Provider> {
245    KNOWN_PROVIDERS
246        .iter()
247        .find(|p| p.id.eq_ignore_ascii_case(id))
248}
249
250/// Get the environment variable name for a provider.
251///
252/// # Example
253///
254/// ```
255/// use spn_core::provider_to_env_var;
256///
257/// assert_eq!(provider_to_env_var("anthropic"), Some("ANTHROPIC_API_KEY"));
258/// assert_eq!(provider_to_env_var("unknown"), None);
259/// ```
260pub fn provider_to_env_var(id: &str) -> Option<&'static str> {
261    find_provider(id).map(|p| p.env_var)
262}
263
264/// Get all providers in a specific category.
265///
266/// # Example
267///
268/// ```
269/// use spn_core::{providers_by_category, ProviderCategory};
270///
271/// let llm_providers: Vec<_> = providers_by_category(ProviderCategory::Llm).collect();
272/// assert!(llm_providers.iter().any(|p| p.id == "anthropic"));
273/// ```
274pub fn providers_by_category(
275    category: ProviderCategory,
276) -> impl Iterator<Item = &'static Provider> {
277    KNOWN_PROVIDERS
278        .iter()
279        .filter(move |p| p.category == category)
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    #[test]
287    fn test_find_provider() {
288        assert!(find_provider("anthropic").is_some());
289        assert!(find_provider("ANTHROPIC").is_some());
290        assert!(find_provider("unknown").is_none());
291    }
292
293    #[test]
294    fn test_provider_to_env_var() {
295        assert_eq!(provider_to_env_var("anthropic"), Some("ANTHROPIC_API_KEY"));
296        assert_eq!(provider_to_env_var("github"), Some("GITHUB_TOKEN"));
297        assert_eq!(provider_to_env_var("unknown"), None);
298    }
299
300    #[test]
301    fn test_providers_by_category() {
302        let llm: Vec<_> = providers_by_category(ProviderCategory::Llm).collect();
303        // 10 LLM providers: anthropic, openai, mistral, groq, deepseek, gemini,
304        //                   cohere, together, fireworks, cerebras
305        assert!(
306            llm.len() >= 10,
307            "Expected at least 10 LLM providers, got {}",
308            llm.len()
309        );
310        assert!(llm.iter().all(|p| p.category == ProviderCategory::Llm));
311
312        let mcp: Vec<_> = providers_by_category(ProviderCategory::Mcp).collect();
313        assert!(mcp.len() >= 5);
314        assert!(mcp.iter().all(|p| p.category == ProviderCategory::Mcp));
315
316        let local: Vec<_> = providers_by_category(ProviderCategory::Local).collect();
317        // 2 Local providers: ollama, native
318        assert!(
319            local.len() >= 2,
320            "Expected at least 2 Local providers, got {}",
321            local.len()
322        );
323        assert!(local.iter().all(|p| p.category == ProviderCategory::Local));
324    }
325
326    #[test]
327    fn test_all_providers_have_env_var() {
328        for provider in KNOWN_PROVIDERS {
329            assert!(
330                !provider.env_var.is_empty(),
331                "Provider {} missing env_var",
332                provider.id
333            );
334        }
335    }
336
337    #[test]
338    fn test_provider_count() {
339        // Ensure we have at least 20 providers (12 LLM + 8 MCP)
340        // LLM: anthropic, openai, mistral, groq, deepseek, gemini, ollama, native,
341        //      cohere, together, fireworks, cerebras
342        // MCP: neo4j, github, slack, perplexity, firecrawl, supadata, dataforseo, ahrefs
343        assert!(
344            KNOWN_PROVIDERS.len() >= 20,
345            "Expected at least 20 providers, got {}",
346            KNOWN_PROVIDERS.len()
347        );
348    }
349
350    #[test]
351    fn test_new_cloud_providers() {
352        // Verify the new cloud providers exist
353        assert!(find_provider("cohere").is_some());
354        assert!(find_provider("together").is_some());
355        assert!(find_provider("fireworks").is_some());
356        assert!(find_provider("cerebras").is_some());
357        assert!(find_provider("native").is_some());
358    }
359}