Skip to main content

mentedb_extraction/
config.rs

1use serde::{Deserialize, Serialize};
2
3/// Which LLM provider to use for extraction.
4#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
5pub enum LlmProvider {
6    OpenAI,
7    Anthropic,
8    Ollama,
9    Custom,
10}
11
12impl LlmProvider {
13    /// Default API URL for this provider.
14    pub fn default_url(&self) -> &str {
15        match self {
16            LlmProvider::OpenAI => "https://api.openai.com/v1/chat/completions",
17            LlmProvider::Anthropic => "https://api.anthropic.com/v1/messages",
18            LlmProvider::Ollama => "http://localhost:11434/api/chat",
19            LlmProvider::Custom => "http://localhost:8080/v1/chat/completions",
20        }
21    }
22
23    /// Default model name for this provider.
24    pub fn default_model(&self) -> &str {
25        match self {
26            LlmProvider::OpenAI => "gpt-4o-mini",
27            LlmProvider::Anthropic => "claude-sonnet-4-20250514",
28            LlmProvider::Ollama => "llama3",
29            LlmProvider::Custom => "default",
30        }
31    }
32}
33
34/// Configuration for the extraction pipeline.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ExtractionConfig {
37    /// Which LLM provider to use.
38    pub provider: LlmProvider,
39    /// API key for the provider (not needed for Ollama).
40    pub api_key: Option<String>,
41    /// API endpoint URL (defaults based on provider).
42    pub api_url: String,
43    /// Model name to use.
44    pub model: String,
45    /// Maximum number of memories to extract from a single conversation.
46    pub max_extractions_per_conversation: usize,
47    /// Minimum confidence score for a memory to be accepted (0.0 to 1.0).
48    pub quality_threshold: f32,
49    /// Embedding similarity above which a memory is considered a duplicate (0.0 to 1.0).
50    pub deduplication_threshold: f32,
51    /// Whether to check new memories against existing ones for contradictions.
52    pub enable_contradiction_check: bool,
53    /// Whether to check new memories against existing ones for duplicates.
54    pub enable_deduplication: bool,
55}
56
57impl ExtractionConfig {
58    /// Create a config for OpenAI with the given API key.
59    pub fn openai(api_key: impl Into<String>) -> Self {
60        Self {
61            provider: LlmProvider::OpenAI,
62            api_key: Some(api_key.into()),
63            api_url: LlmProvider::OpenAI.default_url().to_string(),
64            model: LlmProvider::OpenAI.default_model().to_string(),
65            ..Self::default()
66        }
67    }
68
69    /// Create a config for Anthropic with the given API key.
70    pub fn anthropic(api_key: impl Into<String>) -> Self {
71        Self {
72            provider: LlmProvider::Anthropic,
73            api_key: Some(api_key.into()),
74            api_url: LlmProvider::Anthropic.default_url().to_string(),
75            model: LlmProvider::Anthropic.default_model().to_string(),
76            ..Self::default()
77        }
78    }
79
80    /// Create a config for a local Ollama instance.
81    pub fn ollama() -> Self {
82        Self {
83            provider: LlmProvider::Ollama,
84            api_key: None,
85            api_url: LlmProvider::Ollama.default_url().to_string(),
86            model: LlmProvider::Ollama.default_model().to_string(),
87            ..Self::default()
88        }
89    }
90}
91
92impl Default for ExtractionConfig {
93    fn default() -> Self {
94        Self {
95            provider: LlmProvider::OpenAI,
96            api_key: None,
97            api_url: LlmProvider::OpenAI.default_url().to_string(),
98            model: LlmProvider::OpenAI.default_model().to_string(),
99            max_extractions_per_conversation: 50,
100            quality_threshold: 0.6,
101            deduplication_threshold: 0.85,
102            enable_contradiction_check: true,
103            enable_deduplication: true,
104        }
105    }
106}