memvid_cli/enrich/
claude.rs

1//! Claude (Anthropic) enrichment engine using Claude Haiku 4.5.
2//!
3//! This engine uses the Anthropic API to extract structured memory cards
4//! from text content. Supports parallel batch processing for speed.
5//! Uses Claude Haiku 4.5 - the fastest Claude model.
6
7use anyhow::{anyhow, Result};
8use memvid_core::enrich::{EnrichmentContext, EnrichmentEngine, EnrichmentResult};
9use memvid_core::types::{MemoryCard, MemoryCardBuilder, MemoryKind, Polarity};
10use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
11use reqwest::blocking::Client;
12use serde::{Deserialize, Serialize};
13use std::sync::Arc;
14use std::time::Duration;
15use tracing::{debug, info, warn};
16
17/// The extraction prompt for Claude
18const EXTRACTION_PROMPT: &str = r#"You are a memory extraction assistant. Extract structured facts from the text.
19
20For each distinct fact, preference, event, or relationship mentioned, output a memory card in this exact format:
21MEMORY_START
22kind: <Fact|Preference|Event|Profile|Relationship|Other>
23entity: <the main entity this memory is about, use "user" for the human in the conversation>
24slot: <a short key describing what aspect of the entity>
25value: <the actual information>
26polarity: <Positive|Negative|Neutral>
27MEMORY_END
28
29Only extract information that is explicitly stated. Do not infer or guess.
30If there are no clear facts to extract, output MEMORY_NONE.
31
32Extract memories from this text:
33"#;
34
35/// Claude API request message
36#[derive(Debug, Serialize, Clone)]
37struct Message {
38    role: String,
39    content: String,
40}
41
42/// Claude API request
43#[derive(Debug, Serialize)]
44struct ClaudeRequest {
45    model: String,
46    max_tokens: u32,
47    messages: Vec<Message>,
48}
49
50/// Claude API response
51#[derive(Debug, Deserialize)]
52struct ClaudeResponse {
53    content: Vec<ContentBlock>,
54}
55
56#[derive(Debug, Deserialize)]
57struct ContentBlock {
58    #[serde(rename = "type")]
59    content_type: String,
60    text: Option<String>,
61}
62
63/// Claude enrichment engine using Claude Haiku 4.5 (fastest) with parallel processing.
64pub struct ClaudeEngine {
65    /// API key
66    api_key: String,
67    /// Model to use
68    model: String,
69    /// Whether the engine is initialized
70    ready: bool,
71    /// Number of parallel workers (default: 20)
72    parallelism: usize,
73    /// Shared HTTP client (built in `init`)
74    client: Option<Client>,
75}
76
77impl ClaudeEngine {
78    /// Create a new Claude engine.
79    pub fn new() -> Self {
80        let api_key = std::env::var("ANTHROPIC_API_KEY").unwrap_or_default();
81        Self {
82            api_key,
83            model: "claude-haiku-4-5-20251001".to_string(),
84            ready: false,
85            parallelism: 20,
86            client: None,
87        }
88    }
89
90    /// Create with a specific model.
91    pub fn with_model(model: &str) -> Self {
92        let api_key = std::env::var("ANTHROPIC_API_KEY").unwrap_or_default();
93        Self {
94            api_key,
95            model: model.to_string(),
96            ready: false,
97            parallelism: 20,
98            client: None,
99        }
100    }
101
102    /// Set parallelism level.
103    pub fn with_parallelism(mut self, n: usize) -> Self {
104        self.parallelism = n;
105        self
106    }
107
108
109    /// Run inference via Claude API (blocking, thread-safe).
110    fn run_inference_blocking(
111        client: &Client,
112        api_key: &str,
113        model: &str,
114        text: &str,
115    ) -> Result<String> {
116        let prompt = format!("{}\n\n{}", EXTRACTION_PROMPT, text);
117
118        let request = ClaudeRequest {
119            model: model.to_string(),
120            max_tokens: 1024,
121            messages: vec![Message {
122                role: "user".to_string(),
123                content: prompt,
124            }],
125        };
126
127        let response = client
128            .post("https://api.anthropic.com/v1/messages")
129            .header("x-api-key", api_key)
130            .header("anthropic-version", "2023-06-01")
131            .header("Content-Type", "application/json")
132            .json(&request)
133            .send()
134            .map_err(|e| anyhow!("Claude API request failed: {}", e))?;
135
136        if !response.status().is_success() {
137            let status = response.status();
138            let body = response.text().unwrap_or_default();
139            return Err(anyhow!("Claude API error {}: {}", status, body));
140        }
141
142        let claude_response: ClaudeResponse = response
143            .json()
144            .map_err(|e| anyhow!("Failed to parse Claude response: {}", e))?;
145
146        claude_response
147            .content
148            .iter()
149            .find(|c| c.content_type == "text")
150            .and_then(|c| c.text.clone())
151            .ok_or_else(|| anyhow!("No text response from Claude"))
152    }
153
154    /// Parse the LLM output into memory cards.
155    fn parse_output(output: &str, frame_id: u64, uri: &str, timestamp: i64) -> Vec<MemoryCard> {
156        let mut cards = Vec::new();
157
158        if output.contains("MEMORY_NONE") {
159            return cards;
160        }
161
162        for block in output.split("MEMORY_START") {
163            let block = block.trim();
164            if block.is_empty() || !block.contains("MEMORY_END") {
165                continue;
166            }
167
168            let block = block.split("MEMORY_END").next().unwrap_or("").trim();
169
170            let mut kind = None;
171            let mut entity = None;
172            let mut slot = None;
173            let mut value = None;
174            let mut polarity = Polarity::Neutral;
175
176            for line in block.lines() {
177                let line = line.trim();
178                if let Some(rest) = line.strip_prefix("kind:") {
179                    kind = parse_memory_kind(rest.trim());
180                } else if let Some(rest) = line.strip_prefix("entity:") {
181                    entity = Some(rest.trim().to_string());
182                } else if let Some(rest) = line.strip_prefix("slot:") {
183                    slot = Some(rest.trim().to_string());
184                } else if let Some(rest) = line.strip_prefix("value:") {
185                    value = Some(rest.trim().to_string());
186                } else if let Some(rest) = line.strip_prefix("polarity:") {
187                    polarity = parse_polarity(rest.trim());
188                }
189            }
190
191            if let (Some(k), Some(e), Some(s), Some(v)) = (kind, entity, slot, value) {
192                if !e.is_empty() && !s.is_empty() && !v.is_empty() {
193                    match MemoryCardBuilder::new()
194                        .kind(k)
195                        .entity(&e)
196                        .slot(&s)
197                        .value(&v)
198                        .polarity(polarity)
199                        .source(frame_id, Some(uri.to_string()))
200                        .document_date(timestamp)
201                        .engine("claude:claude-haiku-4-5", "1.0.0")
202                        .build(0)
203                    {
204                        Ok(card) => cards.push(card),
205                        Err(err) => {
206                            warn!("Failed to build memory card: {}", err);
207                        }
208                    }
209                }
210            }
211        }
212
213        cards
214    }
215
216    /// Process multiple frames in parallel and return all cards.
217    pub fn enrich_batch(
218        &self,
219        contexts: Vec<EnrichmentContext>,
220    ) -> Result<Vec<(u64, Vec<MemoryCard>)>> {
221        let client = self
222            .client
223            .as_ref()
224            .ok_or_else(|| anyhow!("Claude engine not initialized (init() not called)"))?
225            .clone();
226        let client = Arc::new(client);
227        let api_key = Arc::new(self.api_key.clone());
228        let model = Arc::new(self.model.clone());
229        let total = contexts.len();
230
231        info!(
232            "Starting parallel enrichment of {} frames with {} workers",
233            total, self.parallelism
234        );
235
236        let pool = rayon::ThreadPoolBuilder::new()
237            .num_threads(self.parallelism)
238            .build()
239            .map_err(|err| anyhow!("failed to build enrichment thread pool: {err}"))?;
240
241        let results: Vec<(u64, Vec<MemoryCard>)> = pool.install(|| {
242            contexts
243                .into_par_iter()
244                .enumerate()
245                .map(|(i, ctx)| {
246                    if ctx.text.is_empty() {
247                        return (ctx.frame_id, vec![]);
248                    }
249
250                    if i > 0 && i % 50 == 0 {
251                        info!("Enrichment progress: {}/{} frames", i, total);
252                    }
253
254                    match Self::run_inference_blocking(&client, &api_key, &model, &ctx.text) {
255                        Ok(output) => {
256                            debug!(
257                                "Claude output for frame {}: {}",
258                                ctx.frame_id,
259                                &output[..output.len().min(100)]
260                            );
261                            let cards =
262                                Self::parse_output(&output, ctx.frame_id, &ctx.uri, ctx.timestamp);
263                            (ctx.frame_id, cards)
264                        }
265                        Err(err) => {
266                            warn!(
267                                "Claude inference failed for frame {}: {}",
268                                ctx.frame_id, err
269                            );
270                            (ctx.frame_id, vec![])
271                        }
272                    }
273                })
274                .collect()
275        });
276
277        info!(
278            "Parallel enrichment complete: {} frames processed",
279            results.len()
280        );
281        Ok(results)
282    }
283}
284
285fn parse_memory_kind(s: &str) -> Option<MemoryKind> {
286    match s.to_lowercase().as_str() {
287        "fact" => Some(MemoryKind::Fact),
288        "preference" => Some(MemoryKind::Preference),
289        "event" => Some(MemoryKind::Event),
290        "profile" => Some(MemoryKind::Profile),
291        "relationship" => Some(MemoryKind::Relationship),
292        "other" => Some(MemoryKind::Other),
293        _ => None,
294    }
295}
296
297fn parse_polarity(s: &str) -> Polarity {
298    match s.to_lowercase().as_str() {
299        "positive" => Polarity::Positive,
300        "negative" => Polarity::Negative,
301        _ => Polarity::Neutral,
302    }
303}
304
305impl EnrichmentEngine for ClaudeEngine {
306    fn kind(&self) -> &str {
307        "claude:claude-haiku-4-5"
308    }
309
310    fn version(&self) -> &str {
311        "1.0.0"
312    }
313
314    fn init(&mut self) -> memvid_core::Result<()> {
315        if self.api_key.is_empty() {
316            return Err(memvid_core::MemvidError::EmbeddingFailed {
317                reason: "ANTHROPIC_API_KEY environment variable not set".into(),
318            });
319        }
320        let client = crate::http::blocking_client(Duration::from_secs(60)).map_err(|err| {
321            memvid_core::MemvidError::EmbeddingFailed {
322                reason: format!("Failed to create Claude HTTP client: {err}").into(),
323            }
324        })?;
325        self.client = Some(client);
326        self.ready = true;
327        Ok(())
328    }
329
330    fn is_ready(&self) -> bool {
331        self.ready
332    }
333
334    fn enrich(&self, ctx: &EnrichmentContext) -> EnrichmentResult {
335        if ctx.text.is_empty() {
336            return EnrichmentResult::empty();
337        }
338
339        let client = match self.client.as_ref() {
340            Some(client) => client,
341            None => {
342                return EnrichmentResult::failed(
343                    "Claude engine not initialized (init() not called)".to_string(),
344                )
345            }
346        };
347
348        match Self::run_inference_blocking(client, &self.api_key, &self.model, &ctx.text) {
349            Ok(output) => {
350                debug!("Claude output for frame {}: {}", ctx.frame_id, output);
351                let cards = Self::parse_output(&output, ctx.frame_id, &ctx.uri, ctx.timestamp);
352                EnrichmentResult::success(cards)
353            }
354            Err(err) => EnrichmentResult::failed(format!("Claude inference failed: {}", err)),
355        }
356    }
357}
358
359impl Default for ClaudeEngine {
360    fn default() -> Self {
361        Self::new()
362    }
363}