opencrabs 0.3.55

//! QMD Memory Store Benchmarks
//!
//! Benchmarks for core memory operations:
//! - Store open (cold start)
//! - Single file indexing
//! - Hash-based skip (already indexed, unchanged)
//! - FTS5 search across varying corpus sizes
//! - Bulk reindex

#![allow(clippy::all)]

use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use qmd::Store;
use tempfile::TempDir;

/// Sample memory log content resembling real compaction summaries.
const SAMPLE_ENTRIES: &[&str] = &[
    "# Session Summary\n\nFixed authentication bug in login flow. The JWT token was not being refreshed properly when the session expired. Updated the middleware to check token expiry before each API call.\n\n## Files Modified\n- src/auth/middleware.rs\n- src/auth/token.rs\n- tests/auth_test.rs",
    "# Session Summary\n\nRefactored database connection pooling. Replaced manual connection management with sqlx pool. Added retry logic for transient connection failures.\n\n## Key Decisions\n- Using sqlx instead of diesel for async support\n- Pool size set to 10 connections\n- Retry up to 3 times with exponential backoff",
    "# Session Summary\n\nImplemented Discord bot integration. Added message handling, slash commands, and channel management. The bot uses serenity framework with gateway intents for message content.\n\n## Architecture\n- Event handler pattern for incoming messages\n- Command registry for slash commands\n- Rate limiting per user per channel",
    "# Session Summary\n\nDebugged memory leak in TUI rendering. The issue was caused by accumulating terminal buffers that were never freed. Fixed by implementing proper cleanup in the render loop.\n\n## Root Cause\n- Crossterm alternate screen buffers not being dropped\n- Fixed by adding explicit flush and cleanup on exit",
    "# Session Summary\n\nAdded vector search capability using qmd crate. The FTS5 engine provides BM25 ranking for full-text search. Integrated as a library dependency replacing the CLI subprocess approach.\n\n## Performance\n- Sub-millisecond search latency\n- SHA-256 content hashing for dedup\n- Background reindex on startup",
    "# Session Summary\n\nOptimized context compaction algorithm. Reduced token usage by 40% through smarter summarization prompts. The compaction now preserves code snippets and error messages while condensing narrative.\n\n## Changes\n- New compaction prompt template\n- Preserve code blocks during summarization\n- Track token savings metrics",
    "# Session Summary\n\nImplemented Slack integration with OAuth2 flow. Added workspace installation, event subscriptions, and interactive message components. Uses slack-morphism crate.\n\n## Features\n- Slash command /opencrabs for invoking the agent\n- Thread-based conversations\n- File upload support for code review",
    "# Session Summary\n\nFixed CI pipeline failures. Updated GitHub Actions workflow to use latest Rust nightly for portable_simd. Added caching for Cargo dependencies to speed up builds.\n\n## Issues Resolved\n- nightly-2025-12 broke portable_simd API\n- Added wacore-binary patch for compatibility\n- Build time reduced from 12min to 4min",
    "# Session Summary\n\nAdded WhatsApp integration via whatsapp-rust crate. Implemented QR code pairing, message routing, and media handling. Uses SQLite-backed session store for persistence.\n\n## Components\n- QR code display in terminal\n- Message handler with conversation context\n- Media download and upload support",
    "# Session Summary\n\nRefactored tool system to use trait-based registry. Each tool implements the Tool trait with name, description, schema, and execute methods. Tools are registered at startup and dispatched by name.\n\n## Design\n- ToolCapability enum for permission model\n- Async execute with structured ToolResult\n- Approval flow for dangerous operations",
];

/// Create a store in a temp directory and populate it with `n` documents.
fn setup_store(n: usize) -> (Store, TempDir) {
    let dir = TempDir::new().expect("tempdir");
    let db = dir.path().join("bench.db");
    let store = Store::open(&db).expect("store open");

    let now = "2026-01-01T00:00:00";
    for i in 0..n {
        let body = if i < SAMPLE_ENTRIES.len() {
            SAMPLE_ENTRIES[i].to_string()
        } else {
            format!(
                "# Session {i}\n\nAutogenerated benchmark entry number {i}. \
                 Contains keywords like authentication, database, refactor, \
                 performance, and integration for search testing."
            )
        };
        let hash = Store::hash_content(&body);
        let title = Store::extract_title(&body);
        let path = format!("entry-{i}.md");

        store.insert_content(&hash, &body, now).unwrap();
        store
            .insert_document("memory", &path, &title, &hash, now, now)
            .unwrap();
    }

    (store, dir)
}

/// Benchmark: Store::open cold start (empty database).
fn bench_store_open(c: &mut Criterion) {
    c.bench_function("store_open", |b| {
        b.iter(|| {
            let dir = TempDir::new().unwrap();
            let db = dir.path().join("bench.db");
            black_box(Store::open(&db).unwrap());
        });
    });
}

/// Benchmark: Index a single file (insert_content + insert_document).
fn bench_index_file(c: &mut Criterion) {
    use std::sync::atomic::{AtomicU64, Ordering};
    static COUNTER: AtomicU64 = AtomicU64::new(0);

    let body = SAMPLE_ENTRIES[0];
    let now = "2026-01-01T00:00:00";

    c.bench_function("index_file", |b| {
        // Fresh store per sample so we never hit constraint violations
        let dir = TempDir::new().unwrap();
        let db = dir.path().join("bench.db");
        let store = Store::open(&db).unwrap();

        b.iter(|| {
            let i = COUNTER.fetch_add(1, Ordering::Relaxed);
            let body = format!("{body}\n<!-- iter {i} -->");
            let hash = Store::hash_content(&body);
            let title = Store::extract_title(&body);
            let path = format!("bench-{i}.md");

            store.insert_content(&hash, &body, now).unwrap();
            black_box(
                store
                    .insert_document("memory", &path, &title, &hash, now, now)
                    .unwrap(),
            );
        });
    });
}

/// Benchmark: Hash-based skip (file already indexed, unchanged content).
fn bench_hash_skip(c: &mut Criterion) {
    let dir = TempDir::new().unwrap();
    let db = dir.path().join("bench.db");
    let store = Store::open(&db).unwrap();

    let body = SAMPLE_ENTRIES[0];
    let hash = Store::hash_content(body);
    let title = Store::extract_title(body);
    let now = "2026-01-01T00:00:00";

    store.insert_content(&hash, body, now).unwrap();
    store
        .insert_document("memory", "existing.md", &title, &hash, now, now)
        .unwrap();

    c.bench_function("hash_skip", |b| {
        b.iter(|| {
            let hash = Store::hash_content(body);
            let found = store.find_active_document("memory", "existing.md").unwrap();
            black_box(match found {
                Some((_id, existing_hash, _title)) if existing_hash == hash => true,
                _ => false,
            });
        });
    });

    drop(store);
}

/// Benchmark: FTS5 search across varying corpus sizes.
fn bench_search_fts(c: &mut Criterion) {
    let mut group = c.benchmark_group("search_fts");

    for &corpus_size in &[10, 50, 100, 500] {
        group.bench_with_input(
            BenchmarkId::from_parameter(corpus_size),
            &corpus_size,
            |b, &n| {
                let (store, _dir) = setup_store(n);
                b.iter(|| {
                    black_box(
                        store
                            .search_fts("\"authentication\" \"database\"", 5, Some("memory"))
                            .unwrap(),
                    );
                });
            },
        );
    }

    group.finish();
}

/// Benchmark: Single-term search (common case).
fn bench_search_single_term(c: &mut Criterion) {
    let (store, _dir) = setup_store(50);

    c.bench_function("search_single_term", |b| {
        b.iter(|| {
            black_box(
                store
                    .search_fts("\"authentication\"", 5, Some("memory"))
                    .unwrap(),
            );
        });
    });
}

/// Benchmark: Search with no collection filter (all collections).
fn bench_search_all_collections(c: &mut Criterion) {
    let (store, _dir) = setup_store(50);

    c.bench_function("search_all_collections", |b| {
        b.iter(|| {
            black_box(store.search_fts("\"integration\"", 10, None).unwrap());
        });
    });
}

/// Benchmark: Bulk reindex (index N files sequentially).
fn bench_bulk_index(c: &mut Criterion) {
    let mut group = c.benchmark_group("bulk_index");

    for &count in &[10, 50] {
        group.bench_with_input(BenchmarkId::from_parameter(count), &count, |b, &n| {
            b.iter(|| {
                let dir = TempDir::new().unwrap();
                let db = dir.path().join("bench.db");
                let store = Store::open(&db).unwrap();
                let now = "2026-01-01T00:00:00";

                for i in 0..n {
                    let body = if i < SAMPLE_ENTRIES.len() {
                        SAMPLE_ENTRIES[i].to_string()
                    } else {
                        format!("# Entry {i}\n\nBenchmark content {i}.")
                    };
                    let hash = Store::hash_content(&body);
                    let title = Store::extract_title(&body);
                    let path = format!("entry-{i}.md");

                    store.insert_content(&hash, &body, now).unwrap();
                    store
                        .insert_document("memory", &path, &title, &hash, now, now)
                        .unwrap();
                }

                black_box(&store);
            });
        });
    }

    group.finish();
}

/// Benchmark: Document deactivation (prune deleted file).
fn bench_deactivate(c: &mut Criterion) {
    use std::sync::atomic::{AtomicU64, Ordering};
    static COUNTER: AtomicU64 = AtomicU64::new(0);

    c.bench_function("deactivate_document", |b| {
        let dir = TempDir::new().unwrap();
        let db = dir.path().join("bench.db");
        let store = Store::open(&db).unwrap();
        let body = SAMPLE_ENTRIES[0];
        let hash = Store::hash_content(body);
        let title = Store::extract_title(body);
        let now = "2026-01-01T00:00:00";

        // Insert content once; only documents vary per iteration
        store.insert_content(&hash, body, now).unwrap();

        b.iter(|| {
            let i = COUNTER.fetch_add(1, Ordering::Relaxed);
            let path = format!("deactivate-{i}.md");
            store
                .insert_document("memory", &path, &title, &hash, now, now)
                .unwrap();
            black_box(store.deactivate_document("memory", &path).unwrap());
        });
    });
}

/// Create a store with documents and fake 768-dim embeddings for vector benchmarks.
fn setup_store_with_embeddings(n: usize) -> (Store, TempDir) {
    let (store, dir) = setup_store(n);
    store.ensure_vector_table(768).unwrap();

    let now = "2026-01-01T00:00:00";
    for i in 0..n {
        let body = if i < SAMPLE_ENTRIES.len() {
            SAMPLE_ENTRIES[i].to_string()
        } else {
            format!(
                "# Session {i}\n\nAutogenerated benchmark entry number {i}. \
                 Contains keywords like authentication, database, refactor, \
                 performance, and integration for search testing."
            )
        };
        let hash = Store::hash_content(&body);
        // Deterministic fake embedding: set a few dims based on index
        let mut emb = vec![0.0f32; 768];
        emb[i % 768] = 1.0;
        emb[(i * 7) % 768] = 0.5;
        emb[(i * 13) % 768] = 0.3;
        store
            .insert_embedding(&hash, 0, 0, &emb, "bench-model", now)
            .unwrap();
    }

    (store, dir)
}

/// Benchmark: Vector similarity search across varying corpus sizes.
fn bench_search_vec(c: &mut Criterion) {
    let mut group = c.benchmark_group("search_vec");

    for &corpus_size in &[10, 50, 100] {
        group.bench_with_input(
            BenchmarkId::from_parameter(corpus_size),
            &corpus_size,
            |b, &n| {
                let (store, _dir) = setup_store_with_embeddings(n);
                let mut query = vec![0.0f32; 768];
                query[0] = 0.9;
                query[1] = 0.5;
                b.iter(|| {
                    black_box(store.search_vec(&query, 5, Some("memory")).unwrap());
                });
            },
        );
    }

    group.finish();
}

/// Benchmark: Hybrid search (FTS + vector → RRF) on a corpus with embeddings.
fn bench_hybrid_rrf(c: &mut Criterion) {
    use qmd::hybrid_search_rrf;

    let (store, _dir) = setup_store_with_embeddings(50);
    let mut query_emb = vec![0.0f32; 768];
    query_emb[0] = 0.9;
    query_emb[1] = 0.5;

    c.bench_function("hybrid_rrf_50", |b| {
        b.iter(|| {
            let fts = store
                .search_fts("\"authentication\" \"database\"", 10, Some("memory"))
                .unwrap();
            let vec = store.search_vec(&query_emb, 10, Some("memory")).unwrap();

            let fts_tuples: Vec<_> = fts
                .iter()
                .map(|r| {
                    (
                        r.doc.path.clone(),
                        r.doc.path.clone(),
                        r.doc.title.clone(),
                        String::new(),
                    )
                })
                .collect();
            let vec_tuples: Vec<_> = vec
                .iter()
                .map(|r| {
                    (
                        r.doc.path.clone(),
                        r.doc.path.clone(),
                        r.doc.title.clone(),
                        String::new(),
                    )
                })
                .collect();

            black_box(hybrid_search_rrf(fts_tuples, vec_tuples, 60));
        });
    });
}

/// Benchmark: Insert embedding for a single document.
fn bench_insert_embedding(c: &mut Criterion) {
    let dir = TempDir::new().unwrap();
    let db = dir.path().join("bench.db");
    let store = Store::open(&db).unwrap();
    store.ensure_vector_table(768).unwrap();

    use std::sync::atomic::{AtomicU64, Ordering};
    static COUNTER: AtomicU64 = AtomicU64::new(0);

    let now = "2026-01-01T00:00:00";
    let emb = vec![0.1f32; 768];

    c.bench_function("insert_embedding", |b| {
        b.iter(|| {
            let i = COUNTER.fetch_add(1, Ordering::Relaxed);
            let body = format!("# Bench {i}\nContent for embedding benchmark {i}");
            let hash = Store::hash_content(&body);
            store.insert_content(&hash, &body, now).unwrap();
            store
                .insert_document("memory", &format!("emb-{i}.md"), "Bench", &hash, now, now)
                .unwrap();
            black_box(
                store
                    .insert_embedding(&hash, 0, 0, &emb, "bench-model", now)
                    .unwrap(),
            );
        });
    });
}

criterion_group!(
    benches,
    bench_store_open,
    bench_index_file,
    bench_hash_skip,
    bench_search_fts,
    bench_search_single_term,
    bench_search_all_collections,
    bench_bulk_index,
    bench_deactivate,
    bench_search_vec,
    bench_hybrid_rrf,
    bench_insert_embedding,
);
criterion_main!(benches);