oxirs-embed 0.2.4

//! Advanced GraphQL API for embedding queries and management
//!
//! This module provides a comprehensive GraphQL interface for interacting with
//! the embedding system, supporting type-safe queries, nested embeddings,
//! filtering, aggregations, and real-time subscriptions.

use crate::{CacheManager, ModelRegistry};
use async_graphql::{
    Context, Enum, FieldResult, InputObject, Object, Schema, SimpleObject, Subscription, Union, ID,
};
use chrono::Utc;
use futures_util::Stream;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::Arc;
use tokio::sync::RwLock;
use tokio_stream::{wrappers::BroadcastStream, StreamExt};
use uuid::Uuid;

/// GraphQL schema type
pub type EmbeddingSchema = Schema<QueryRoot, MutationRoot, SubscriptionRoot>;

/// Root query object
pub struct QueryRoot;

/// Root mutation object  
pub struct MutationRoot;

/// Root subscription object
pub struct SubscriptionRoot;

/// GraphQL context containing services
pub struct GraphQLContext {
    pub model_registry: Arc<ModelRegistry>,
    pub cache_manager: Arc<CacheManager>,
    pub event_broadcaster: Arc<RwLock<tokio::sync::broadcast::Sender<EmbeddingEvent>>>,
}

impl GraphQLContext {
    pub fn new(model_registry: Arc<ModelRegistry>, cache_manager: Arc<CacheManager>) -> Self {
        let (tx, _) = tokio::sync::broadcast::channel(1000);
        Self {
            model_registry,
            cache_manager,
            event_broadcaster: Arc::new(RwLock::new(tx)),
        }
    }
}

/// Embedding query result
#[derive(SimpleObject)]
pub struct EmbeddingResult {
    pub entity_id: String,
    pub embedding: Vec<f32>,
    pub dimensions: i32,
    pub model_name: String,
    pub confidence: Option<f64>,
    pub metadata: Option<HashMap<String, String>>,
    pub timestamp: String, // Use String representation for GraphQL compatibility
}

/// Similarity search result
#[derive(SimpleObject)]
pub struct SimilarityResult {
    pub entity_id: String,
    pub similarity_score: f64,
    pub embedding: Option<Vec<f32>>,
    pub metadata: Option<HashMap<String, String>>,
    pub distance_metric: String,
}

/// Batch embedding result
#[derive(SimpleObject)]
pub struct BatchEmbeddingResult {
    pub job_id: ID,
    pub status: BatchStatus,
    pub progress: f64,
    pub total_entities: i32,
    pub processed_entities: i32,
    pub estimated_completion: Option<String>,
    pub results: Vec<EmbeddingResult>,
    pub errors: Vec<String>,
}

/// Model information
#[derive(SimpleObject)]
pub struct ModelInfo {
    pub id: ID,
    pub name: String,
    pub version: String,
    pub model_type: ModelType,
    pub dimensions: i32,
    pub parameters: HashMap<String, String>,
    pub performance_metrics: Option<PerformanceMetrics>,
    pub created_at: String,
    pub updated_at: String,
}

/// Performance metrics
#[derive(SimpleObject)]
pub struct PerformanceMetrics {
    pub inference_latency_ms: f64,
    pub throughput_embeddings_per_sec: f64,
    pub memory_usage_mb: f64,
    pub accuracy_score: Option<f64>,
    pub quality_metrics: HashMap<String, f64>,
}

/// Aggregation result
#[derive(SimpleObject)]
pub struct AggregationResult {
    pub field: String,
    pub aggregation_type: AggregationType,
    pub value: f64,
    pub count: i32,
    pub metadata: HashMap<String, String>,
}

/// Clustering result
#[derive(SimpleObject)]
pub struct ClusteringResult {
    pub cluster_id: i32,
    pub centroid: Vec<f32>,
    pub entities: Vec<String>,
    pub cohesion_score: f64,
    pub metadata: HashMap<String, String>,
}

/// Embedding analytics
#[derive(SimpleObject)]
pub struct EmbeddingAnalytics {
    pub total_embeddings: i32,
    pub dimensions_distribution: Vec<DimensionStat>,
    pub model_usage: Vec<ModelUsageStat>,
    pub quality_trends: Vec<QualityTrend>,
    pub performance_summary: PerformanceMetrics,
    pub cache_statistics: CacheStats,
}

/// Dimension statistics
#[derive(SimpleObject)]
pub struct DimensionStat {
    pub dimensions: i32,
    pub count: i32,
    pub percentage: f64,
}

/// Model usage statistics
#[derive(SimpleObject)]
pub struct ModelUsageStat {
    pub model_name: String,
    pub usage_count: i32,
    pub success_rate: f64,
    pub average_latency_ms: f64,
}

/// Quality trend data
#[derive(SimpleObject)]
pub struct QualityTrend {
    pub timestamp: String,
    pub quality_score: f64,
    pub metric_name: String,
}

/// Cache statistics
#[derive(SimpleObject)]
pub struct CacheStats {
    pub hit_rate: f64,
    pub total_requests: i32,
    pub cache_size_mb: f64,
    pub evictions: i32,
}

/// Input types for queries
/// Embedding query input
#[derive(InputObject)]
pub struct EmbeddingQueryInput {
    pub entity_ids: Option<Vec<String>>,
    pub model_name: Option<String>,
    pub include_metadata: Option<bool>,
    pub format: Option<EmbeddingFormat>,
    pub filters: Option<EmbeddingFilters>,
}

/// Similarity search input
#[derive(InputObject)]
pub struct SimilaritySearchInput {
    pub query_embedding: Option<Vec<f32>>,
    pub query_entity_id: Option<String>,
    pub model_name: String,
    pub top_k: Option<i32>,
    pub threshold: Option<f64>,
    pub distance_metric: Option<DistanceMetric>,
    pub filters: Option<SimilarityFilters>,
}

/// Batch embedding input
#[derive(InputObject)]
pub struct BatchEmbeddingInput {
    pub entity_ids: Vec<String>,
    pub model_name: String,
    pub chunk_size: Option<i32>,
    pub priority: Option<BatchPriority>,
    pub callback_url: Option<String>,
    pub metadata: Option<HashMap<String, String>>,
}

/// Embedding filters
#[derive(InputObject)]
pub struct EmbeddingFilters {
    pub dimensions: Option<IntRange>,
    pub confidence: Option<FloatRange>,
    pub created_after: Option<String>,
    pub created_before: Option<String>,
    pub has_metadata: Option<bool>,
    pub metadata_filters: Option<HashMap<String, String>>,
}

/// Similarity filters
#[derive(InputObject)]
pub struct SimilarityFilters {
    pub entity_types: Option<Vec<String>>,
    pub exclude_entities: Option<Vec<String>>,
    pub metadata_filters: Option<HashMap<String, String>>,
    pub confidence_threshold: Option<f64>,
}

/// Aggregation input
#[derive(InputObject)]
pub struct AggregationInput {
    pub field: String,
    pub aggregation_type: AggregationType,
    pub group_by: Option<Vec<String>>,
    pub filters: Option<EmbeddingFilters>,
}

/// Clustering input
#[derive(InputObject)]
pub struct ClusteringInput {
    pub entity_ids: Option<Vec<String>>,
    pub model_name: String,
    pub num_clusters: Option<i32>,
    pub algorithm: Option<ClusteringAlgorithm>,
    pub distance_metric: Option<DistanceMetric>,
}

/// Time range input
#[derive(InputObject)]
pub struct TimeRange {
    pub start: String,
    pub end: String,
}

/// Range types
#[derive(InputObject)]
pub struct IntRange {
    pub min: Option<i32>,
    pub max: Option<i32>,
}

#[derive(InputObject)]
pub struct FloatRange {
    pub min: Option<f64>,
    pub max: Option<f64>,
}

/// Enums

#[derive(Enum, Copy, Clone, Eq, PartialEq)]
pub enum ModelType {
    Transformer,
    TransE,
    DistMult,
    ComplEx,
    RotatE,
    QuatE,
    GNN,
    Custom,
}

#[derive(Enum, Copy, Clone, Eq, PartialEq)]
pub enum EmbeddingFormat {
    Dense,
    Sparse,
    Compressed,
    Quantized,
}

#[derive(Enum, Copy, Clone, Eq, PartialEq)]
pub enum DistanceMetric {
    Cosine,
    Euclidean,
    Manhattan,
    Jaccard,
    Hamming,
}

#[derive(Enum, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub enum BatchStatus {
    Pending,
    Running,
    Completed,
    Failed,
    Cancelled,
}

#[derive(Enum, Copy, Clone, Eq, PartialEq)]
pub enum BatchPriority {
    Low,
    Normal,
    High,
    Critical,
}

#[derive(Enum, Copy, Clone, Eq, PartialEq)]
pub enum AggregationType {
    Count,
    Sum,
    Average,
    Min,
    Max,
    StdDev,
    Percentile,
}

#[derive(Enum, Copy, Clone, Eq, PartialEq)]
pub enum ClusteringAlgorithm {
    KMeans,
    DBSCAN,
    Hierarchical,
    SpectralClustering,
}

/// Event types for subscriptions
#[derive(Clone, Serialize, Deserialize, Union)]
pub enum EmbeddingEvent {
    EmbeddingGenerated(EmbeddingGeneratedEvent),
    BatchCompleted(BatchCompletedEvent),
    ModelUpdated(ModelUpdatedEvent),
    QualityAlert(QualityAlertEvent),
}

#[derive(Clone, Serialize, Deserialize, SimpleObject)]
pub struct EmbeddingGeneratedEvent {
    pub entity_id: String,
    pub model_name: String,
    pub timestamp: String,
    pub quality_score: Option<f64>,
}

#[derive(Clone, Serialize, Deserialize, SimpleObject)]
pub struct BatchCompletedEvent {
    pub job_id: String,
    pub status: BatchStatus,
    pub processed_count: i32,
    pub error_count: i32,
    pub completion_time: String,
}

#[derive(Clone, Serialize, Deserialize, SimpleObject)]
pub struct ModelUpdatedEvent {
    pub model_name: String,
    pub version: String,
    pub update_type: String,
    pub timestamp: String,
}

#[derive(Clone, Serialize, Deserialize, SimpleObject)]
pub struct QualityAlertEvent {
    pub alert_type: String,
    pub severity: String,
    pub message: String,
    pub affected_entities: Vec<String>,
    pub timestamp: String,
}

/// GraphQL resolvers

#[Object]
impl QueryRoot {
    /// Get embeddings for specified entities
    async fn embeddings(
        &self,
        ctx: &Context<'_>,
        input: EmbeddingQueryInput,
    ) -> FieldResult<Vec<EmbeddingResult>> {
        let _context = ctx.data::<GraphQLContext>()?;

        // Implementation logic here
        let mut results = Vec::new();

        if let Some(entity_ids) = input.entity_ids {
            for entity_id in entity_ids {
                // Mock implementation - replace with actual embedding retrieval
                results.push(EmbeddingResult {
                    entity_id: entity_id.clone(),
                    embedding: vec![0.1, 0.2, 0.3], // Mock embedding
                    dimensions: 3,
                    model_name: input
                        .model_name
                        .clone()
                        .unwrap_or_else(|| "default".to_string()),
                    confidence: Some(0.95),
                    metadata: None,
                    timestamp: Utc::now().to_rfc3339(),
                });
            }
        }

        Ok(results)
    }

    /// Search for similar entities
    async fn similarity_search(
        &self,
        ctx: &Context<'_>,
        _input: SimilaritySearchInput,
    ) -> FieldResult<Vec<SimilarityResult>> {
        let _context = ctx.data::<GraphQLContext>()?;

        // Mock implementation
        let results = vec![SimilarityResult {
            entity_id: "similar_entity_1".to_string(),
            similarity_score: 0.92,
            embedding: Some(vec![0.1, 0.2, 0.3]),
            metadata: None,
            distance_metric: "cosine".to_string(),
        }];

        Ok(results)
    }

    /// Get model information
    async fn models(
        &self,
        ctx: &Context<'_>,
        _names: Option<Vec<String>>,
    ) -> FieldResult<Vec<ModelInfo>> {
        let _context = ctx.data::<GraphQLContext>()?;

        // Mock implementation
        let models = vec![ModelInfo {
            id: ID::from("model_1"),
            name: "TransE".to_string(),
            version: "1.0.0".to_string(),
            model_type: ModelType::TransE,
            dimensions: 128,
            parameters: HashMap::new(),
            performance_metrics: None,
            created_at: Utc::now().to_rfc3339(),
            updated_at: Utc::now().to_rfc3339(),
        }];

        Ok(models)
    }

    /// Get aggregated statistics
    async fn aggregation(
        &self,
        ctx: &Context<'_>,
        input: AggregationInput,
    ) -> FieldResult<AggregationResult> {
        let _context = ctx.data::<GraphQLContext>()?;

        // Mock implementation
        Ok(AggregationResult {
            field: input.field,
            aggregation_type: input.aggregation_type,
            value: 42.0,
            count: 100,
            metadata: HashMap::new(),
        })
    }

    /// Perform clustering analysis
    async fn clustering(
        &self,
        ctx: &Context<'_>,
        _input: ClusteringInput,
    ) -> FieldResult<Vec<ClusteringResult>> {
        let _context = ctx.data::<GraphQLContext>()?;

        // Mock implementation
        let results = vec![ClusteringResult {
            cluster_id: 0,
            centroid: vec![0.1, 0.2, 0.3],
            entities: vec!["entity1".to_string(), "entity2".to_string()],
            cohesion_score: 0.85,
            metadata: HashMap::new(),
        }];

        Ok(results)
    }

    /// Get comprehensive analytics
    async fn analytics(
        &self,
        ctx: &Context<'_>,
        _time_range: Option<TimeRange>,
    ) -> FieldResult<EmbeddingAnalytics> {
        let _context = ctx.data::<GraphQLContext>()?;

        // Mock implementation
        Ok(EmbeddingAnalytics {
            total_embeddings: 10000,
            dimensions_distribution: vec![
                DimensionStat {
                    dimensions: 128,
                    count: 7000,
                    percentage: 70.0,
                },
                DimensionStat {
                    dimensions: 256,
                    count: 3000,
                    percentage: 30.0,
                },
            ],
            model_usage: vec![],
            quality_trends: vec![],
            performance_summary: PerformanceMetrics {
                inference_latency_ms: 25.5,
                throughput_embeddings_per_sec: 1000.0,
                memory_usage_mb: 512.0,
                accuracy_score: Some(0.95),
                quality_metrics: HashMap::new(),
            },
            cache_statistics: CacheStats {
                hit_rate: 0.85,
                total_requests: 50000,
                cache_size_mb: 256.0,
                evictions: 100,
            },
        })
    }
}

#[Object]
impl MutationRoot {
    /// Start batch embedding generation
    async fn start_batch_embedding(
        &self,
        ctx: &Context<'_>,
        input: BatchEmbeddingInput,
    ) -> FieldResult<BatchEmbeddingResult> {
        let _context = ctx.data::<GraphQLContext>()?;

        let job_id = Uuid::new_v4();

        // Mock implementation
        Ok(BatchEmbeddingResult {
            job_id: ID::from(job_id.to_string()),
            status: BatchStatus::Pending,
            progress: 0.0,
            total_entities: input.entity_ids.len() as i32,
            processed_entities: 0,
            estimated_completion: Some((Utc::now() + chrono::Duration::minutes(10)).to_rfc3339()),
            results: vec![],
            errors: vec![],
        })
    }

    /// Cancel batch job
    async fn cancel_batch_job(&self, ctx: &Context<'_>, _job_id: ID) -> FieldResult<bool> {
        let _context = ctx.data::<GraphQLContext>()?;

        // Mock implementation
        Ok(true)
    }

    /// Update model configuration
    async fn update_model(
        &self,
        ctx: &Context<'_>,
        model_name: String,
        parameters: HashMap<String, String>,
    ) -> FieldResult<ModelInfo> {
        let _context = ctx.data::<GraphQLContext>()?;

        // Mock implementation
        Ok(ModelInfo {
            id: ID::from("model_1"),
            name: model_name,
            version: "1.1.0".to_string(),
            model_type: ModelType::TransE,
            dimensions: 128,
            parameters,
            performance_metrics: None,
            created_at: Utc::now().to_rfc3339(),
            updated_at: Utc::now().to_rfc3339(),
        })
    }
}

#[Subscription]
impl SubscriptionRoot {
    /// Subscribe to embedding generation events
    async fn embedding_events(
        &self,
        ctx: &Context<'_>,
        _entity_filter: Option<Vec<String>>,
    ) -> Pin<Box<dyn Stream<Item = EmbeddingEvent> + Send>> {
        let context = ctx
            .data::<GraphQLContext>()
            .expect("GraphQLContext should be available in context");
        let rx = context.event_broadcaster.read().await.subscribe();

        let stream = BroadcastStream::new(rx).filter_map(|result| result.ok());

        Box::pin(stream)
    }

    /// Subscribe to batch job updates
    async fn batch_updates(
        &self,
        ctx: &Context<'_>,
        _job_id: Option<ID>,
    ) -> Pin<Box<dyn Stream<Item = BatchCompletedEvent> + Send>> {
        let context = ctx
            .data::<GraphQLContext>()
            .expect("GraphQLContext should be available in context");
        let rx = context.event_broadcaster.read().await.subscribe();

        let stream = BroadcastStream::new(rx).filter_map(|result| match result {
            Ok(EmbeddingEvent::BatchCompleted(event)) => Some(event),
            _ => None,
        });

        Box::pin(stream)
    }

    /// Subscribe to quality alerts
    async fn quality_alerts(
        &self,
        ctx: &Context<'_>,
        _severity_filter: Option<Vec<String>>,
    ) -> Pin<Box<dyn Stream<Item = QualityAlertEvent> + Send>> {
        let context = ctx
            .data::<GraphQLContext>()
            .expect("GraphQLContext should be available in context");
        let rx = context.event_broadcaster.read().await.subscribe();

        let stream = BroadcastStream::new(rx).filter_map(|result| match result {
            Ok(EmbeddingEvent::QualityAlert(event)) => Some(event),
            _ => None,
        });

        Box::pin(stream)
    }
}

/// Create the GraphQL schema
pub fn create_schema(context: GraphQLContext) -> EmbeddingSchema {
    Schema::build(QueryRoot, MutationRoot, SubscriptionRoot)
        .data(context)
        .finish()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::ModelRegistry;

    #[tokio::test]
    async fn test_graphql_context_creation() {
        let storage_path = tempfile::tempdir()
            .expect("should succeed")
            .path()
            .to_path_buf();
        let model_registry = Arc::new(ModelRegistry::new(storage_path));
        let cache_config = crate::caching::CacheConfig::default();
        let cache_manager = Arc::new(CacheManager::new(cache_config));

        let context = GraphQLContext::new(model_registry, cache_manager);
        assert!(context.event_broadcaster.read().await.receiver_count() == 0);
    }

    #[tokio::test]
    async fn test_schema_creation() {
        let storage_path = tempfile::tempdir()
            .expect("should succeed")
            .path()
            .to_path_buf();
        let model_registry = Arc::new(ModelRegistry::new(storage_path));
        let cache_config = crate::caching::CacheConfig::default();
        let cache_manager = Arc::new(CacheManager::new(cache_config));
        let context = GraphQLContext::new(model_registry, cache_manager);

        let schema = create_schema(context);
        // Note: type_name method doesn't exist in async-graphql 7.0
        // Just verify the schema was created successfully by checking it's not null
        assert!(!schema.sdl().is_empty());
    }
}