memvid_cli/enrich/
claude.rs

1//! Claude (Anthropic) enrichment engine using Claude Haiku 4.5.
2//!
3//! This engine uses the Anthropic API to extract structured memory cards
4//! from text content. Supports parallel batch processing for speed.
5//! Uses Claude Haiku 4.5 - the fastest Claude model.
6
7use anyhow::{anyhow, Result};
8use memvid_core::enrich::{EnrichmentContext, EnrichmentEngine, EnrichmentResult};
9use memvid_core::types::{MemoryCard, MemoryCardBuilder, MemoryKind, Polarity};
10use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
11use reqwest::blocking::Client;
12use serde::{Deserialize, Serialize};
13use std::sync::Arc;
14use std::time::Duration;
15use tracing::{debug, info, warn};
16
17/// The extraction prompt for Claude
18const EXTRACTION_PROMPT: &str = r#"You are a memory extraction assistant. Extract structured facts from the text.
19
20For each distinct fact, preference, event, or relationship mentioned, output a memory card in this exact format:
21MEMORY_START
22kind: <Fact|Preference|Event|Profile|Relationship|Other>
23entity: <the main entity this memory is about, use "user" for the human in the conversation>
24slot: <a short key describing what aspect of the entity>
25value: <the actual information>
26polarity: <Positive|Negative|Neutral>
27MEMORY_END
28
29Only extract information that is explicitly stated. Do not infer or guess.
30If there are no clear facts to extract, output MEMORY_NONE.
31
32Extract memories from this text:
33"#;
34
35/// Claude API request message
36#[derive(Debug, Serialize, Clone)]
37struct Message {
38    role: String,
39    content: String,
40}
41
42/// Claude API request
43#[derive(Debug, Serialize)]
44struct ClaudeRequest {
45    model: String,
46    max_tokens: u32,
47    messages: Vec<Message>,
48}
49
50/// Claude API response
51#[derive(Debug, Deserialize)]
52struct ClaudeResponse {
53    content: Vec<ContentBlock>,
54}
55
56#[derive(Debug, Deserialize)]
57struct ContentBlock {
58    #[serde(rename = "type")]
59    content_type: String,
60    text: Option<String>,
61}
62
63/// Claude enrichment engine using Claude Haiku 4.5 (fastest) with parallel processing.
64pub struct ClaudeEngine {
65    /// API key
66    api_key: String,
67    /// Model to use
68    model: String,
69    /// Whether the engine is initialized
70    ready: bool,
71    /// Number of parallel workers (default: 20)
72    parallelism: usize,
73    /// Shared HTTP client (built in `init`)
74    client: Option<Client>,
75}
76
77impl ClaudeEngine {
78    /// Create a new Claude engine.
79    pub fn new() -> Self {
80        let api_key = std::env::var("ANTHROPIC_API_KEY").unwrap_or_default();
81        Self {
82            api_key,
83            model: "claude-haiku-4-5-20251001".to_string(),
84            ready: false,
85            parallelism: 20,
86            client: None,
87        }
88    }
89
90    /// Create with a specific model.
91    pub fn with_model(model: &str) -> Self {
92        let api_key = std::env::var("ANTHROPIC_API_KEY").unwrap_or_default();
93        Self {
94            api_key,
95            model: model.to_string(),
96            ready: false,
97            parallelism: 20,
98            client: None,
99        }
100    }
101
102    /// Set parallelism level.
103    pub fn with_parallelism(mut self, n: usize) -> Self {
104        self.parallelism = n;
105        self
106    }
107
108    /// Run inference via Claude API (blocking, thread-safe).
109    fn run_inference_blocking(
110        client: &Client,
111        api_key: &str,
112        model: &str,
113        text: &str,
114    ) -> Result<String> {
115        let prompt = format!("{}\n\n{}", EXTRACTION_PROMPT, text);
116
117        let request = ClaudeRequest {
118            model: model.to_string(),
119            max_tokens: 1024,
120            messages: vec![Message {
121                role: "user".to_string(),
122                content: prompt,
123            }],
124        };
125
126        let response = client
127            .post("https://api.anthropic.com/v1/messages")
128            .header("x-api-key", api_key)
129            .header("anthropic-version", "2023-06-01")
130            .header("Content-Type", "application/json")
131            .json(&request)
132            .send()
133            .map_err(|e| anyhow!("Claude API request failed: {}", e))?;
134
135        if !response.status().is_success() {
136            let status = response.status();
137            let body = response.text().unwrap_or_default();
138            return Err(anyhow!("Claude API error {}: {}", status, body));
139        }
140
141        let claude_response: ClaudeResponse = response
142            .json()
143            .map_err(|e| anyhow!("Failed to parse Claude response: {}", e))?;
144
145        claude_response
146            .content
147            .iter()
148            .find(|c| c.content_type == "text")
149            .and_then(|c| c.text.clone())
150            .ok_or_else(|| anyhow!("No text response from Claude"))
151    }
152
153    /// Parse the LLM output into memory cards.
154    fn parse_output(output: &str, frame_id: u64, uri: &str, timestamp: i64) -> Vec<MemoryCard> {
155        let mut cards = Vec::new();
156
157        if output.contains("MEMORY_NONE") {
158            return cards;
159        }
160
161        for block in output.split("MEMORY_START") {
162            let block = block.trim();
163            if block.is_empty() || !block.contains("MEMORY_END") {
164                continue;
165            }
166
167            let block = block.split("MEMORY_END").next().unwrap_or("").trim();
168
169            let mut kind = None;
170            let mut entity = None;
171            let mut slot = None;
172            let mut value = None;
173            let mut polarity = Polarity::Neutral;
174
175            for line in block.lines() {
176                let line = line.trim();
177                if let Some(rest) = line.strip_prefix("kind:") {
178                    kind = parse_memory_kind(rest.trim());
179                } else if let Some(rest) = line.strip_prefix("entity:") {
180                    entity = Some(rest.trim().to_string());
181                } else if let Some(rest) = line.strip_prefix("slot:") {
182                    slot = Some(rest.trim().to_string());
183                } else if let Some(rest) = line.strip_prefix("value:") {
184                    value = Some(rest.trim().to_string());
185                } else if let Some(rest) = line.strip_prefix("polarity:") {
186                    polarity = parse_polarity(rest.trim());
187                }
188            }
189
190            if let (Some(k), Some(e), Some(s), Some(v)) = (kind, entity, slot, value) {
191                if !e.is_empty() && !s.is_empty() && !v.is_empty() {
192                    match MemoryCardBuilder::new()
193                        .kind(k)
194                        .entity(&e)
195                        .slot(&s)
196                        .value(&v)
197                        .polarity(polarity)
198                        .source(frame_id, Some(uri.to_string()))
199                        .document_date(timestamp)
200                        .engine("claude:claude-haiku-4-5", "1.0.0")
201                        .build(0)
202                    {
203                        Ok(card) => cards.push(card),
204                        Err(err) => {
205                            warn!("Failed to build memory card: {}", err);
206                        }
207                    }
208                }
209            }
210        }
211
212        cards
213    }
214
215    /// Process multiple frames in parallel and return all cards.
216    pub fn enrich_batch(
217        &self,
218        contexts: Vec<EnrichmentContext>,
219    ) -> Result<Vec<(u64, Vec<MemoryCard>)>> {
220        let client = self
221            .client
222            .as_ref()
223            .ok_or_else(|| anyhow!("Claude engine not initialized (init() not called)"))?
224            .clone();
225        let client = Arc::new(client);
226        let api_key = Arc::new(self.api_key.clone());
227        let model = Arc::new(self.model.clone());
228        let total = contexts.len();
229
230        info!(
231            "Starting parallel enrichment of {} frames with {} workers",
232            total, self.parallelism
233        );
234
235        let pool = rayon::ThreadPoolBuilder::new()
236            .num_threads(self.parallelism)
237            .build()
238            .map_err(|err| anyhow!("failed to build enrichment thread pool: {err}"))?;
239
240        let results: Vec<(u64, Vec<MemoryCard>)> = pool.install(|| {
241            contexts
242                .into_par_iter()
243                .enumerate()
244                .map(|(i, ctx)| {
245                    if ctx.text.is_empty() {
246                        return (ctx.frame_id, vec![]);
247                    }
248
249                    if i > 0 && i % 50 == 0 {
250                        info!("Enrichment progress: {}/{} frames", i, total);
251                    }
252
253                    match Self::run_inference_blocking(&client, &api_key, &model, &ctx.text) {
254                        Ok(output) => {
255                            debug!(
256                                "Claude output for frame {}: {}",
257                                ctx.frame_id,
258                                &output[..output.len().min(100)]
259                            );
260                            let cards =
261                                Self::parse_output(&output, ctx.frame_id, &ctx.uri, ctx.timestamp);
262                            (ctx.frame_id, cards)
263                        }
264                        Err(err) => {
265                            warn!(
266                                "Claude inference failed for frame {}: {}",
267                                ctx.frame_id, err
268                            );
269                            (ctx.frame_id, vec![])
270                        }
271                    }
272                })
273                .collect()
274        });
275
276        info!(
277            "Parallel enrichment complete: {} frames processed",
278            results.len()
279        );
280        Ok(results)
281    }
282}
283
284fn parse_memory_kind(s: &str) -> Option<MemoryKind> {
285    match s.to_lowercase().as_str() {
286        "fact" => Some(MemoryKind::Fact),
287        "preference" => Some(MemoryKind::Preference),
288        "event" => Some(MemoryKind::Event),
289        "profile" => Some(MemoryKind::Profile),
290        "relationship" => Some(MemoryKind::Relationship),
291        "other" => Some(MemoryKind::Other),
292        _ => None,
293    }
294}
295
296fn parse_polarity(s: &str) -> Polarity {
297    match s.to_lowercase().as_str() {
298        "positive" => Polarity::Positive,
299        "negative" => Polarity::Negative,
300        _ => Polarity::Neutral,
301    }
302}
303
304impl EnrichmentEngine for ClaudeEngine {
305    fn kind(&self) -> &str {
306        "claude:claude-haiku-4-5"
307    }
308
309    fn version(&self) -> &str {
310        "1.0.0"
311    }
312
313    fn init(&mut self) -> memvid_core::Result<()> {
314        if self.api_key.is_empty() {
315            return Err(memvid_core::MemvidError::EmbeddingFailed {
316                reason: "ANTHROPIC_API_KEY environment variable not set".into(),
317            });
318        }
319        let client = crate::http::blocking_client(Duration::from_secs(60)).map_err(|err| {
320            memvid_core::MemvidError::EmbeddingFailed {
321                reason: format!("Failed to create Claude HTTP client: {err}").into(),
322            }
323        })?;
324        self.client = Some(client);
325        self.ready = true;
326        Ok(())
327    }
328
329    fn is_ready(&self) -> bool {
330        self.ready
331    }
332
333    fn enrich(&self, ctx: &EnrichmentContext) -> EnrichmentResult {
334        if ctx.text.is_empty() {
335            return EnrichmentResult::empty();
336        }
337
338        let client = match self.client.as_ref() {
339            Some(client) => client,
340            None => {
341                return EnrichmentResult::failed(
342                    "Claude engine not initialized (init() not called)".to_string(),
343                )
344            }
345        };
346
347        match Self::run_inference_blocking(client, &self.api_key, &self.model, &ctx.text) {
348            Ok(output) => {
349                debug!("Claude output for frame {}: {}", ctx.frame_id, output);
350                let cards = Self::parse_output(&output, ctx.frame_id, &ctx.uri, ctx.timestamp);
351                EnrichmentResult::success(cards)
352            }
353            Err(err) => EnrichmentResult::failed(format!("Claude inference failed: {}", err)),
354        }
355    }
356}
357
358impl Default for ClaudeEngine {
359    fn default() -> Self {
360        Self::new()
361    }
362}