codex-memory 3.0.15

A simple memory storage service with MCP interface for Claude Desktop
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
use crate::common::TestDatabaseManager;
use anyhow::Result;
use codex_memory::{mcp_server::MCPHandlers, Storage};
use serde_json::json;
use std::sync::Arc;

/// Test LLM service failures and degraded mode operation
#[tokio::test]
async fn test_ollama_service_unavailable() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Store content when LLM might be unavailable - should still work
    let params = json!({
        "content": "This content should be stored even if Ollama is down",
        "context": "Test context for Ollama service unavailable test",
        "summary": "Test summary for Ollama service unavailable test",
        "tags": ["ollama-down", "fallback"]
    });

    let result = handlers.handle_tool_call("store_memory", params).await;

    match result {
        Ok(response) => {
            println!("Content stored successfully despite potential LLM unavailability");

            // Verify content was stored with fallback behavior
            if let Some(id) = response["id"].as_str() {
                let retrieved = storage
                    .get(uuid::Uuid::parse_str(id)?)
                    .await?
                    .expect("Should retrieve stored content");

                assert_eq!(
                    retrieved.content,
                    "This content should be stored even if Ollama is down"
                );
                assert_eq!(retrieved.tags, vec!["ollama-down", "fallback"]);

                // Context and summary should be None if LLM failed
                // (Application should gracefully degrade)
                println!(
                    "Context: {:?}, Summary: {:?}",
                    retrieved.context, retrieved.summary
                );
            }
        }
        Err(e) => {
            println!("Storage failed when LLM unavailable: {}", e);
            // This might be expected behavior depending on implementation
            // The important thing is that it fails gracefully, not with panic
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_timeout_scenarios() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Test with content that might cause LLM to timeou
    let timeout_triggering_content = [
        // Extremely repetitive conten
        "repeat ".repeat(10000),

        // Very long single sentence
        "This is an extremely long run-on sentence that goes on and on without any punctuation or breaks and might cause the LLM to struggle with processing or generating appropriate summaries or context ".repeat(500),

        // Mixed languages that might confuse LLM
        "English text 中文内容 русский текст العربية मराठी ภาษาไทย 한국어 日本語 ".repeat(100),

        // Technical content with many special tokens
        "fn main() { let mut vec: Vec<Arc<RwLock<HashMap<String, Box<dyn Trait>>>>> = Vec::new(); }".repeat(1000),
    ];

    for (i, content) in timeout_triggering_content.iter().enumerate() {
        println!("Testing potential timeout content #{}", i);

        let params = json!({
            "content": content,
            "context": format!("Timeout test context {}", i),
            "summary": format!("Timeout test summary {}", i),
            "tags": [format!("timeout-test-{}", i)]
        });

        let start = std::time::Instant::now();
        let result = handlers.handle_tool_call("store_memory", params).await;
        let duration = start.elapsed();

        println!("Processing took: {:?}", duration);

        match result {
            Ok(response) => {
                println!("Content #{} processed successfully", i);

                // Verify reasonable processing time (should have timeout)
                if duration > std::time::Duration::from_secs(60) {
                    println!(
                        "WARNING: Processing took longer than expected: {:?}",
                        duration
                    );
                }

                if let Some(id) = response["id"].as_str() {
                    let retrieved = storage
                        .get(uuid::Uuid::parse_str(id)?)
                        .await?
                        .expect("Should retrieve content");

                    assert_eq!(retrieved.content.len(), content.len());
                }
            }
            Err(e) => {
                println!("Content #{} failed (timeout expected): {}", i, e);

                // Verify it's a timeout/processing error, not a crash
                let error_msg = e.to_string().to_lowercase();
                assert!(
                    error_msg.contains("timeout")
                        || error_msg.contains("processing")
                        || error_msg.contains("connection")
                        || error_msg.contains("unavailable"),
                    "Error should indicate processing/timeout issue: {}",
                    e
                );
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_malformed_response_handling() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Content that might cause LLM to return malformed responses
    let problematic_inputs = [
        // Content with lots of quotes and special chars that might break JSON
        r#"Content with "quotes" and 'apostrophes' and `backticks` and \backslashes"#,
        // Content that looks like code/JSON that might confuse response parsing
        r#"{"this": "looks like json", "but": "it's actually content"}"#,
        // Content with control characters
        "Content with\nnewlines\tand\ttabs\rand\0nulls",
        // Empty-like content that might cause LLM confusion
        "\n\n\n   \t\t\t   \n\n",
        // Content that might trigger safety filters
        "This content discusses security vulnerabilities and potential attack vectors",
    ];

    for (i, content) in problematic_inputs.iter().enumerate() {
        println!(
            "Testing problematic input #{}: {}",
            i,
            content.escape_debug()
        );

        let params = json!({
            "content": content,
            "context": format!("Malformed test context {}", i),
            "summary": format!("Malformed test summary {}", i),
            "tags": [format!("malformed-test-{}", i)]
        });

        let result = handlers.handle_tool_call("store_memory", params).await;

        match result {
            Ok(response) => {
                println!("Problematic input #{} handled successfully", i);

                if let Some(id) = response["id"].as_str() {
                    let retrieved = storage
                        .get(uuid::Uuid::parse_str(id)?)
                        .await?
                        .expect("Should retrieve content");

                    // Verify content was preserved exactly
                    assert_eq!(retrieved.content, *content);

                    // Check that context/summary are reasonable (not corrupted)
                    // Context is now required, not optional
                    assert!(
                        !retrieved.context.is_empty(),
                        "Context should not be empty string"
                    );
                    assert!(
                        retrieved.context.len() < 10000,
                        "Context should not be excessively long"
                    );

                    // Summary is now required, not optional
                    assert!(
                        !retrieved.summary.is_empty(),
                        "Summary should not be empty string"
                    );
                    assert!(
                        retrieved.summary.len() < 5000,
                        "Summary should not be excessively long"
                    );
                }
            }
            Err(e) => {
                println!("Problematic input #{} failed: {}", i, e);

                // Should fail gracefully, not crash
                let error_msg = e.to_string();
                assert!(
                    !error_msg.contains("panic"),
                    "Should not panic on malformed input"
                );
                assert!(
                    !error_msg.contains("unwrap"),
                    "Should handle errors gracefully"
                );
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_context_length_exceeded() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Create content that exceeds typical LLM context windows
    let base_text = "This is a sample paragraph that will be repeated many times to create content that exceeds the context window limits of most language models. It contains various concepts and ideas that might be interesting to summarize. ";

    let context_length_tests = vec![
        (1000, base_text.repeat(1000)), // ~130K chars
        (2000, base_text.repeat(2000)), // ~260K chars
        (4000, base_text.repeat(4000)), // ~520K chars
        (8000, base_text.repeat(8000)), // ~1M chars
    ];

    for (repeat_count, content) in context_length_tests {
        println!(
            "Testing context length with {} repeats ({} chars)",
            repeat_count,
            content.len()
        );

        let params = json!({
            "content": content,
            "context": format!("Context length test context {}", repeat_count),
            "summary": format!("Context length test summary {}", repeat_count),
            "tags": [format!("context-length-{}", repeat_count)]
        });

        let start = std::time::Instant::now();
        let result = handlers.handle_tool_call("store_memory", params).await;
        let duration = start.elapsed();

        println!("Processing took: {:?}", duration);

        match result {
            Ok(response) => {
                println!(
                    "Large content ({} chars) processed successfully",
                    content.len()
                );

                if let Some(id) = response["id"].as_str() {
                    let retrieved = storage
                        .get(uuid::Uuid::parse_str(id)?)
                        .await?
                        .expect("Should retrieve content");

                    // Verify full content preserved
                    assert_eq!(retrieved.content.len(), content.len());

                    // Check if LLM managed to generate reasonable summaries despite size
                    // Summary is now required
                    // Summary should be much shorter than original
                    let compression_ratio = content.len() as f64 / retrieved.summary.len() as f64;
                    println!("Compression ratio: {:.2}:1", compression_ratio);

                    assert!(
                        compression_ratio > 10.0,
                        "Summary should compress content significantly"
                    );
                }
            }
            Err(e) => {
                println!("Large content ({} chars) failed: {}", content.len(), e);

                // Check that failure is due to size/processing limits, not crashes
                let error_msg = e.to_string().to_lowercase();
                assert!(
                    error_msg.contains("too large")
                        || error_msg.contains("context")
                        || error_msg.contains("limit")
                        || error_msg.contains("timeout")
                        || error_msg.contains("memory"),
                    "Error should indicate size/processing limitation: {}",
                    e
                );
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_concurrent_llm_requests() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = Arc::new(MCPHandlers::new(storage));

    // Launch many concurrent requests that require LLM processing
    let mut handles = vec![];

    for i in 0..20 {
        let handlers_clone = handlers.clone();
        let handle = tokio::spawn(async move {
            let content = format!(
                "Concurrent LLM test content #{} - This is a longer piece of text that should
                 trigger LLM processing for context and summary generation. It contains multiple
                 sentences and concepts that need to be analyzed and processed by the language model
                 to generate appropriate metadata and summaries.",
                i
            );

            let params = json!({
                "content": content,
                "context": format!("Concurrent LLM test context {}", i),
                "summary": format!("Concurrent LLM test summary {}", i),
                "tags": [format!("concurrent-llm-{}", i), "stress-test"]
            });

            handlers_clone
                .handle_tool_call("store_memory", params)
                .await
        });
        handles.push(handle);
    }

    // Wait for all requests with timeout
    let timeout_duration = std::time::Duration::from_secs(120); // Generous timeout for LLM
    let start = std::time::Instant::now();

    let mut successes = 0;
    let mut failures = 0;
    let mut timeouts = 0;

    for handle in handles {
        match tokio::time::timeout(timeout_duration, handle).await {
            Ok(Ok(Ok(_))) => successes += 1,
            Ok(Ok(Err(e))) => {
                println!("LLM request failed: {}", e);
                failures += 1;
            }
            Ok(Err(e)) => {
                println!("Task failed: {}", e);
                failures += 1;
            }
            Err(_) => {
                println!("Request timed out");
                timeouts += 1;
            }
        }
    }

    let total_duration = start.elapsed();
    println!("Concurrent LLM requests completed in {:?}", total_duration);
    println!(
        "Results: {} succeeded, {} failed, {} timed out",
        successes, failures, timeouts
    );

    // At least some requests should succeed (allow for LLM service issues)
    assert!(
        successes > 0,
        "At least some concurrent LLM requests should succeed"
    );

    // Total time shouldn't be excessive (proper concurrent processing)
    assert!(
        total_duration < std::time::Duration::from_secs(300),
        "Concurrent requests should complete within reasonable time"
    );

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_service_recovery() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Test recovery after simulated LLM service issues
    println!("Testing LLM service recovery scenarios");

    // First request - might fail if service is having issues
    let params1 = json!({
        "content": "First request to test service availability",
        "context": "Recovery test context for first request",
        "summary": "Recovery test summary for first request",
        "tags": ["recovery-test", "first"]
    });

    let result1 = handlers.handle_tool_call("store_memory", params1).await;

    // Wait a bit to allow for potential recovery
    tokio::time::sleep(std::time::Duration::from_secs(2)).await;

    // Second request - test if service recovers
    let params2 = json!({
        "content": "Second request after potential recovery",
        "context": "Recovery test context for second request",
        "summary": "Recovery test summary for second request",
        "tags": ["recovery-test", "second"]
    });

    let result2 = handlers.handle_tool_call("store_memory", params2).await;

    // Analyze recovery patterns
    match (result1, result2) {
        (Ok(_), Ok(_)) => {
            println!("Both requests succeeded - service is stable");
        }
        (Err(e1), Ok(_)) => {
            println!("Service recovered: first failed ({}), second succeeded", e1);
            // This demonstrates good recovery behavior
        }
        (Ok(_), Err(e2)) => {
            println!("Service degraded: first succeeded, second failed ({})", e2);
        }
        (Err(e1), Err(e2)) => {
            println!("Service unavailable: both failed ({}, {})", e1, e2);
            // This is acceptable if LLM service is truly down
        }
    }

    // Test that basic storage still works even if LLM processing fails
    let basic_params = json!({
        "content": "Basic storage test without LLM dependency",
        "context": "Basic storage test context",
        "summary": "Basic storage test summary"
    });

    let basic_result = handlers
        .handle_tool_call("store_memory", basic_params)
        .await;

    // This should always work (content storage doesn't depend on LLM)
    match basic_result {
        Ok(response) => {
            println!("Basic storage works independent of LLM service");

            if let Some(id) = response["id"].as_str() {
                let retrieved = storage
                    .get(uuid::Uuid::parse_str(id)?)
                    .await?
                    .expect("Should retrieve basic content");
                assert_eq!(
                    retrieved.content,
                    "Basic storage test without LLM dependency"
                );
            }
        }
        Err(e) => {
            println!("WARNING: Basic storage failed: {}", e);
            // This indicates a fundamental problem, not just LLM issues
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_response_validation() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Test content that might cause LLM to return invalid/unexpected responses
    let validation_tests = vec![
        ("Empty response trigger", ""),
        (
            "JSON breaking quotes",
            r#"Content with "nested "quotes" and 'mixed quotes'"#,
        ),
        ("Unicode stress test", "🎉🚀💾🔥⭐🎯📝🏷️🌈🔧✨🎨🎪🎭🎪🎨"),
        (
            "Code-like content",
            "fn main() { println!(\"Hello, world!\"); }",
        ),
        ("Markdown-like", "# Header\n- List item\n**bold** *italic*"),
    ];

    for (test_name, content) in validation_tests {
        println!("Testing LLM response validation: {}", test_name);

        let params = json!({
            "content": content,
            "context": "Validation test context",
            "summary": "Validation test summary",
            "tags": ["validation-test"]
        });

        let result = handlers.handle_tool_call("store_memory", params).await;

        match result {
            Ok(response) => {
                if let Some(id) = response["id"].as_str() {
                    let retrieved = storage
                        .get(uuid::Uuid::parse_str(id)?)
                        .await?
                        .expect("Should retrieve content");

                    // Validate that LLM-generated fields are reasonable
                    // Context is now required
                    // Context should be valid UTF-8 and reasonable length
                    assert!(
                        retrieved.context.is_ascii() || content.chars().all(|c| !c.is_control()),
                        "Context should not contain control characters"
                    );
                    assert!(
                        retrieved.context.len() < 50000,
                        "Context should not be excessively long"
                    );

                    // Summary is now required
                    // Summary should be shorter than original (unless original is very short)
                    if content.len() > 100 {
                        assert!(
                            retrieved.summary.len() < content.len(),
                            "Summary should be shorter than original content"
                        );
                    }
                    assert!(
                        retrieved.summary.len() < 10000,
                        "Summary should not be excessively long"
                    );

                    println!("{} passed validation", test_name);
                }
            }
            Err(e) => {
                println!("  ⚠️  {} failed: {}", test_name, e);
                // Failure is acceptable, but should be graceful
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}