oxirs-chat 0.2.4

RAG chat API with LLM integration and natural language to SPARQL translation
Documentation
//! Knowledge graph exploration and entity expansion
//!
//! Provides graph traversal capabilities with entity expansion and relationship discovery.

use super::*;

/// Graph traversal engine for knowledge graph exploration
pub struct GraphTraversal {
    store: Arc<dyn Store>,
}

impl GraphTraversal {
    pub fn new(store: Arc<dyn Store>) -> Self {
        Self { store }
    }

    /// Perform graph search based on extracted entities
    pub async fn perform_graph_search(
        &self,
        _query: &str,
        entities: &[ExtractedEntity],
        max_depth: usize,
    ) -> Result<Vec<RagSearchResult>> {
        let mut results = Vec::new();
        let mut visited_entities = HashSet::new();

        for entity in entities {
            if let Some(ref iri) = entity.iri {
                if visited_entities.contains(iri) {
                    continue;
                }
                visited_entities.insert(iri.clone());

                // Basic entity traversal
                let entity_triples = self.find_entity_triples(iri, max_depth).await?;

                for triple in entity_triples {
                    results.push(RagSearchResult {
                        triple,
                        score: entity.confidence * 0.8, // Slightly lower score than direct matches
                        search_type: SearchType::GraphTraversal,
                    });
                }

                // Enhanced entity expansion
                let expanded_results = self.expand_entity_context(iri, entity.confidence).await?;
                results.extend(expanded_results);
            }
        }

        // Remove duplicates and apply graph-specific ranking
        self.deduplicate_and_rank_graph_results(results)
    }

    /// Find triples related to an entity with given depth
    async fn find_entity_triples(&self, entity_iri: &str, max_depth: usize) -> Result<Vec<Triple>> {
        let mut triples = Vec::new();
        let mut visited = HashSet::new();
        let mut current_entities = vec![entity_iri.to_string()];

        for _depth in 0..max_depth {
            let next_entities = Vec::new();

            for entity in &current_entities {
                if visited.contains(entity) {
                    continue;
                }
                visited.insert(entity.clone());

                // Find triples where entity is subject
                if let Ok(subject_triples) = self.find_triples_with_subject(entity).await {
                    triples.extend(subject_triples);
                }

                // Find triples where entity is object
                if let Ok(object_triples) = self.find_triples_with_object(entity).await {
                    triples.extend(object_triples);
                }
            }

            current_entities = next_entities;
            if current_entities.is_empty() {
                break;
            }
        }

        Ok(triples)
    }

    /// Expand entity context with related entities and properties
    async fn expand_entity_context(
        &self,
        entity_iri: &str,
        base_confidence: f32,
    ) -> Result<Vec<RagSearchResult>> {
        let mut expanded_results = Vec::new();

        // Find type information
        let type_triples = self.find_entity_types(entity_iri).await?;
        for triple in type_triples {
            expanded_results.push(RagSearchResult {
                triple,
                score: base_confidence * 0.9, // High score for type information
                search_type: SearchType::GraphTraversal,
            });
        }

        // Find same-type entities (for entity recommendation)
        let same_type_entities = self.find_same_type_entities(entity_iri, 5).await?;
        for triple in same_type_entities {
            expanded_results.push(RagSearchResult {
                triple,
                score: base_confidence * 0.6, // Lower score for related entities
                search_type: SearchType::GraphTraversal,
            });
        }

        // Find property domains and ranges
        let property_context = self.find_property_context(entity_iri).await?;
        for triple in property_context {
            expanded_results.push(RagSearchResult {
                triple,
                score: base_confidence * 0.7,
                search_type: SearchType::GraphTraversal,
            });
        }

        Ok(expanded_results)
    }

    /// Find type information for an entity
    async fn find_entity_types(&self, entity_iri: &str) -> Result<Vec<Triple>> {
        let type_predicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
        let mut type_triples = Vec::new();

        if let Ok(subject_triples) = self.find_triples_with_subject(entity_iri).await {
            for triple in subject_triples {
                if triple.predicate().to_string().contains(type_predicate) {
                    type_triples.push(triple);
                }
            }
        }

        Ok(type_triples)
    }

    /// Find entities of the same type
    async fn find_same_type_entities(&self, entity_iri: &str, limit: usize) -> Result<Vec<Triple>> {
        let mut same_type_triples = Vec::new();

        // First, get the types of the input entity
        let entity_types = self.find_entity_types(entity_iri).await?;

        if entity_types.is_empty() {
            return Ok(same_type_triples);
        }

        // For each type, find other entities of the same type (simplified implementation)
        for type_triple in entity_types.iter().take(2) {
            // Limit to 2 types for performance
            if let Ok(entities_of_type) = self
                .find_entities_of_type(&type_triple.object().to_string(), limit)
                .await
            {
                same_type_triples.extend(entities_of_type);
            }
        }

        Ok(same_type_triples)
    }

    /// Find entities of a specific type
    async fn find_entities_of_type(&self, type_iri: &str, limit: usize) -> Result<Vec<Triple>> {
        // This is a simplified implementation
        // In a real implementation, you would query the store for entities of this type
        let _ = (type_iri, limit);
        Ok(Vec::new())
    }

    /// Find property context (domains, ranges)
    async fn find_property_context(&self, entity_iri: &str) -> Result<Vec<Triple>> {
        // Simplified implementation
        let _ = entity_iri;
        Ok(Vec::new())
    }

    /// Find triples with entity as subject
    async fn find_triples_with_subject(&self, entity_iri: &str) -> Result<Vec<Triple>> {
        // This would query the actual store in a real implementation
        let _ = entity_iri;
        Ok(Vec::new())
    }

    /// Find triples with entity as object
    async fn find_triples_with_object(&self, entity_iri: &str) -> Result<Vec<Triple>> {
        // This would query the actual store in a real implementation
        let _ = entity_iri;
        Ok(Vec::new())
    }

    /// Remove duplicates and rank graph results
    fn deduplicate_and_rank_graph_results(
        &self,
        mut results: Vec<RagSearchResult>,
    ) -> Result<Vec<RagSearchResult>> {
        // Remove duplicates based on triple content
        let mut seen = HashSet::new();
        results.retain(|result| {
            let key = format!(
                "{}:{}:{}",
                result.triple.subject(),
                result.triple.predicate(),
                result.triple.object()
            );
            seen.insert(key)
        });

        // Sort by score (descending)
        results.sort_by(|a, b| {
            b.score
                .partial_cmp(&a.score)
                .unwrap_or(std::cmp::Ordering::Equal)
        });

        Ok(results)
    }
}

/// Extracted entity from query analysis
#[derive(Debug, Clone)]
pub struct ExtractedEntity {
    pub text: String,
    pub entity_type: EntityType,
    pub iri: Option<String>,
    pub confidence: f32,
    pub aliases: Vec<String>,
}

/// Entity types for classification
#[derive(Debug, Clone, PartialEq)]
pub enum EntityType {
    Person,
    Organization,
    Location,
    Concept,
    Event,
    Other,
}

/// Extracted relationship from query analysis
#[derive(Debug, Clone)]
pub struct ExtractedRelationship {
    pub subject: String,
    pub predicate: String,
    pub object: String,
    pub confidence: f32,
    pub relation_type: RelationType,
}

/// Relationship types for classification
#[derive(Debug, Clone, PartialEq)]
pub enum RelationType {
    CausalRelation,
    TemporalRelation,
    SpatialRelation,
    ConceptualRelation,
    Other,
}