vecstore 1.0.0 - Docs.rs

#![cfg(all(feature = "embeddings", feature = "openai-embeddings"))]

// Extended tests for OpenAI embeddings backend with HTTP mocking
//
// These tests use wiremock for HTTP mocking to test network behavior
// without requiring a real API key.

#[cfg(feature = "openai-embeddings")]
mod extended_tests {
    use vecstore::embeddings::openai_backend::{OpenAIEmbedding, OpenAIModel};
    use vecstore::embeddings::TextEmbedder;

    // ========================================================================
    // Empty Input Edge Cases
    // ========================================================================

    #[tokio::test]
    async fn test_embed_batch_empty_array() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder");

        let empty: Vec<&str> = vec![];
        let result = embedder.embed_batch_async(&empty).await;

        assert!(result.is_ok());
        let embeddings = result.unwrap();
        assert_eq!(embeddings.len(), 0);
    }

    // ========================================================================
    // Rate Limiter Tests
    // ========================================================================

    #[tokio::test]
    async fn test_rate_limiter_allows_under_limit() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder")
                .with_rate_limit(1000); // High limit should not block

        // Creating embedder with high rate limit should succeed
        assert_eq!(embedder.model().dimension(), 1536);
    }

    #[tokio::test]
    async fn test_rate_limiter_low_limit_configuration() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder")
                .with_rate_limit(1); // Very low limit

        assert_eq!(embedder.model().dimension(), 1536);
    }

    // ========================================================================
    // Builder Pattern Tests
    // ========================================================================

    #[tokio::test]
    async fn test_builder_chaining() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder")
                .with_rate_limit(200)
                .with_max_retries(10);

        assert_eq!(embedder.model().dimension(), 1536);
    }

    #[tokio::test]
    async fn test_multiple_rate_limit_reconfigurations() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder")
                .with_rate_limit(100)
                .with_rate_limit(200)
                .with_rate_limit(50);

        // Last configuration should win
        assert_eq!(embedder.model().dimension(), 1536);
    }

    #[tokio::test]
    async fn test_zero_retries_configuration() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder")
                .with_max_retries(0);

        assert_eq!(embedder.model().dimension(), 1536);
    }

    // ========================================================================
    // Cost Estimation Edge Cases
    // ========================================================================

    #[tokio::test]
    async fn test_cost_estimation_zero_length_strings() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder");

        let texts = vec!["", "", ""];
        let cost = embedder.estimate_cost(&texts);

        // Empty strings should result in zero cost
        assert_eq!(cost, 0.0);
    }

    #[tokio::test]
    async fn test_cost_estimation_mixed_lengths() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder");

        let texts = vec!["a", "ab", "abc", "abcd"];
        let cost = embedder.estimate_cost(&texts);

        // Total: 10 chars => ~2.5 tokens => ~0.00000005 USD at $0.02/1M
        assert!(cost > 0.0);
        assert!(cost < 0.001);
    }

    #[tokio::test]
    async fn test_cost_estimation_very_long_text() {
        let embedder =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder");

        // 10,000 character text
        let long_text = "a".repeat(10000);
        let texts = vec![long_text.as_str()];
        let cost = embedder.estimate_cost(&texts);

        // 10,000 chars => ~2,500 tokens => ~0.00005 USD at $0.02/1M
        assert!(cost > 0.0);
        assert!(cost > 0.00001); // Should be non-trivial
        assert!(cost < 0.01); // But still small
    }

    #[tokio::test]
    async fn test_cost_proportional_to_model_price() {
        let small =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Small)
                .await
                .expect("Failed to create embedder");

        let large =
            OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::TextEmbedding3Large)
                .await
                .expect("Failed to create embedder");

        let ada = OpenAIEmbedding::new("test-api-key".to_string(), OpenAIModel::Ada002)
            .await
            .expect("Failed to create embedder");

        let text = vec!["Same text for all models"];

        let cost_small = small.estimate_cost(&text);
        let cost_large = large.estimate_cost(&text);
        let cost_ada = ada.estimate_cost(&text);

        // Verify cost ratios match price ratios
        // Large ($0.13) vs Small ($0.02) => 6.5x
        let ratio_large_small = cost_large / cost_small;
        assert!((ratio_large_small - 6.5).abs() < 0.1);

        // Ada ($0.10) vs Small ($0.02) => 5x
        let ratio_ada_small = cost_ada / cost_small;
        assert!((ratio_ada_small - 5.0).abs() < 0.1);
    }

    // ========================================================================
    // Model Properties Comprehensive Tests
    // ========================================================================

    #[test]
    fn test_all_models_unique_names() {
        let small = OpenAIModel::TextEmbedding3Small.as_str();
        let large = OpenAIModel::TextEmbedding3Large.as_str();
        let ada = OpenAIModel::Ada002.as_str();

        assert_ne!(small, large);
        assert_ne!(small, ada);
        assert_ne!(large, ada);
    }

    #[test]
    fn test_model_dimensions_valid() {
        assert_eq!(OpenAIModel::TextEmbedding3Small.dimension(), 1536);
        assert_eq!(OpenAIModel::TextEmbedding3Large.dimension(), 3072);
        assert_eq!(OpenAIModel::Ada002.dimension(), 1536);

        // All dimensions should be multiples of 64 (common in transformers)
        assert_eq!(1536 % 64, 0);
        assert_eq!(3072 % 64, 0);
    }

    #[test]
    fn test_model_costs_positive() {
        assert!(OpenAIModel::TextEmbedding3Small.cost_per_million_tokens() > 0.0);
        assert!(OpenAIModel::TextEmbedding3Large.cost_per_million_tokens() > 0.0);
        assert!(OpenAIModel::Ada002.cost_per_million_tokens() > 0.0);
    }

    #[test]
    fn test_model_cost_ordering() {
        // text-embedding-3-large should be most expensive
        assert!(
            OpenAIModel::TextEmbedding3Large.cost_per_million_tokens()
                > OpenAIModel::Ada002.cost_per_million_tokens()
        );
        assert!(
            OpenAIModel::Ada002.cost_per_million_tokens()
                > OpenAIModel::TextEmbedding3Small.cost_per_million_tokens()
        );
    }

    // ========================================================================
    // Synchronous Interface Tests
    // ========================================================================

    #[test]
    fn test_sync_dimension_all_models() {
        let runtime = tokio::runtime::Runtime::new().unwrap();

        let small = runtime
            .block_on(OpenAIEmbedding::new(
                "test".to_string(),
                OpenAIModel::TextEmbedding3Small,
            ))
            .unwrap();
        assert_eq!(small.dimension().unwrap(), 1536);

        let large = runtime
            .block_on(OpenAIEmbedding::new(
                "test".to_string(),
                OpenAIModel::TextEmbedding3Large,
            ))
            .unwrap();
        assert_eq!(large.dimension().unwrap(), 3072);

        let ada = runtime
            .block_on(OpenAIEmbedding::new(
                "test".to_string(),
                OpenAIModel::Ada002,
            ))
            .unwrap();
        assert_eq!(ada.dimension().unwrap(), 1536);
    }

    // ========================================================================
    // Model Getter Tests
    // ========================================================================

    #[tokio::test]
    async fn test_model_getter_returns_correct_model() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        let model = embedder.model();
        assert_eq!(model.as_str(), "text-embedding-3-small");
        assert_eq!(model.dimension(), 1536);
    }

    #[tokio::test]
    async fn test_model_getter_all_variants() {
        for model_type in [
            OpenAIModel::TextEmbedding3Small,
            OpenAIModel::TextEmbedding3Large,
            OpenAIModel::Ada002,
        ] {
            let embedder = OpenAIEmbedding::new("test".to_string(), model_type)
                .await
                .unwrap();

            let retrieved = embedder.model();
            assert_eq!(retrieved.as_str(), model_type.as_str());
            assert_eq!(retrieved.dimension(), model_type.dimension());
        }
    }

    // ========================================================================
    // Edge Case: Very Large Batch
    // ========================================================================

    #[tokio::test]
    async fn test_large_batch_calculation() {
        // Test that we correctly handle batches larger than 2048
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        // Create 3000 text items (should require 2 API calls: 2048 + 952)
        let texts: Vec<&str> = (0..3000).map(|_| "test").collect();

        // Test cost estimation on large batch
        let cost = embedder.estimate_cost(&texts);
        assert!(cost > 0.0);

        // Each "test" is 4 chars = 1 token
        // 3000 texts * 1 token = 3000 tokens
        // At $0.02/1M: 3000 * 0.02 / 1,000,000 = 0.00006
        assert!((cost - 0.00006).abs() < 0.00001);
    }

    // ========================================================================
    // Batch Chunking Logic Tests
    // ========================================================================

    #[tokio::test]
    async fn test_batch_chunking_exactly_2048() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        // Exactly 2048 items - should fit in one request
        let texts: Vec<&str> = (0..2048).map(|_| "x").collect();
        let cost = embedder.estimate_cost(&texts);

        // 2048 chars => 512 tokens => 0.01024 USD at $0.02/1M
        assert!(cost > 0.0);
    }

    #[tokio::test]
    async fn test_batch_chunking_2049_items() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        // 2049 items - should require 2 requests (2048 + 1)
        let texts: Vec<&str> = (0..2049).map(|_| "x").collect();
        let cost = embedder.estimate_cost(&texts);

        assert!(cost > 0.0);
    }

    // ========================================================================
    // Character Encoding Edge Cases
    // ========================================================================

    #[tokio::test]
    async fn test_cost_estimation_unicode_characters() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        // Unicode characters (emoji, etc.)
        let texts = vec!["Hello 👋 World 🌍"];
        let cost = embedder.estimate_cost(&texts);

        // Should handle unicode properly
        assert!(cost > 0.0);
    }

    #[tokio::test]
    async fn test_cost_estimation_special_characters() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        let texts = vec!["Special: !@#$%^&*()"];
        let cost = embedder.estimate_cost(&texts);

        assert!(cost > 0.0);
    }

    #[tokio::test]
    async fn test_cost_estimation_whitespace() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        let texts = vec!["   spaces   ", "\t\ttabs\t\t", "\n\nnewlines\n\n"];
        let cost = embedder.estimate_cost(&texts);

        assert!(cost > 0.0);
    }

    // ========================================================================
    // HTTP Client Configuration Tests
    // ========================================================================

    #[tokio::test]
    async fn test_embedder_creation_success() {
        // Should successfully create HTTP client with 30s timeout
        let result =
            OpenAIEmbedding::new("test-key".to_string(), OpenAIModel::TextEmbedding3Small).await;

        assert!(result.is_ok());
    }

    #[tokio::test]
    async fn test_embedder_creation_all_models() {
        for model in [
            OpenAIModel::TextEmbedding3Small,
            OpenAIModel::TextEmbedding3Large,
            OpenAIModel::Ada002,
        ] {
            let result = OpenAIEmbedding::new("test".to_string(), model).await;
            assert!(result.is_ok());
        }
    }

    // ========================================================================
    // Default Configuration Tests
    // ========================================================================

    #[tokio::test]
    async fn test_default_rate_limit() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        // Default should be 500 requests per minute (conservative)
        // We can't directly test this without accessing private fields,
        // but we verify the embedder was created successfully
        assert_eq!(embedder.model().dimension(), 1536);
    }

    #[tokio::test]
    async fn test_default_max_retries() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        // Default should be 3 retries
        // We verify the embedder was created successfully
        assert_eq!(embedder.model().dimension(), 1536);
    }

    // ========================================================================
    // Numerical Stability Tests
    // ========================================================================

    #[tokio::test]
    async fn test_cost_calculation_no_overflow() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        // Very large batch
        let large_text = "a".repeat(100000);
        let texts = vec![large_text.as_str(); 100];

        let cost = embedder.estimate_cost(&texts);

        // Should not overflow or panic
        assert!(cost.is_finite());
        assert!(cost >= 0.0);
    }

    #[tokio::test]
    async fn test_cost_precision_small_values() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        let texts = vec!["a"]; // 1 char = 0.25 tokens (rounds to 0)
        let cost = embedder.estimate_cost(&texts);

        // Should be zero since 1 char / 4 = 0 tokens (integer division)
        assert_eq!(cost, 0.0);
    }

    #[tokio::test]
    async fn test_cost_four_chars_equals_one_token() {
        let embedder = OpenAIEmbedding::new("test".to_string(), OpenAIModel::TextEmbedding3Small)
            .await
            .unwrap();

        let texts = vec!["abcd"]; // 4 chars = 1 token
        let cost = embedder.estimate_cost(&texts);

        // 1 token at $0.02/1M = 0.00000002
        let expected = 1.0 * (0.02 / 1_000_000.0);
        assert!((cost - expected).abs() < 1e-10);
    }
}