graphrag-core 0.2.0

Core portable library for GraphRAG - works on native and WASM
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
//! Implicit relationship inference system

use crate::core::{Entity, EntityId, KnowledgeGraph, TextChunk};
use std::collections::HashMap;

/// Represents a relationship inferred between two entities
///
/// This structure contains information about a relationship discovered through
/// co-occurrence analysis and contextual pattern matching.
#[derive(Debug, Clone)]
pub struct InferredRelation {
    /// Source entity in the relationship
    pub source: EntityId,
    /// Target entity in the relationship
    pub target: EntityId,
    /// Type of relationship (e.g., "FRIENDS", "DISCUSSES", "LOCATED_IN")
    pub relation_type: String,
    /// Confidence score for this inference (0.0-1.0)
    pub confidence: f32,
    /// Number of text chunks providing evidence for this relationship
    pub evidence_count: usize,
}

/// Configuration for the relationship inference engine
///
/// Controls the behavior and thresholds used when inferring implicit relationships
/// between entities based on their co-occurrence in text.
#[derive(Debug, Clone)]
pub struct InferenceConfig {
    /// Minimum confidence threshold for accepting an inferred relationship
    pub min_confidence: f32,
    /// Maximum number of candidate relationships to return per query
    pub max_candidates: usize,
    /// Threshold for determining if entities co-occur frequently enough
    pub co_occurrence_threshold: f32,
}

impl Default for InferenceConfig {
    fn default() -> Self {
        Self {
            min_confidence: 0.3,
            max_candidates: 10,
            co_occurrence_threshold: 0.4,
        }
    }
}

/// Engine for inferring implicit relationships between entities
///
/// The inference engine analyzes entity co-occurrence patterns, proximity,
/// and contextual clues to discover relationships that may not be explicitly
/// stated in the text.
pub struct InferenceEngine {
    /// Configuration controlling inference behavior
    config: InferenceConfig,
}

impl InferenceEngine {
    /// Create a new inference engine with the given configuration
    ///
    /// # Arguments
    ///
    /// * `config` - Configuration controlling inference thresholds and limits
    pub fn new(config: InferenceConfig) -> Self {
        Self { config }
    }

    /// Infer relationships for a target entity
    ///
    /// Analyzes the knowledge graph to find entities that frequently co-occur with
    /// the target entity and have contextual evidence of a relationship.
    ///
    /// # Arguments
    ///
    /// * `target_entity` - The entity to find relationships for
    /// * `relation_type` - The type of relationship to infer (e.g., "FRIENDS")
    /// * `knowledge_graph` - The knowledge graph containing entities and chunks
    ///
    /// # Returns
    ///
    /// Returns a vector of inferred relationships, sorted by confidence score,
    /// limited to `max_candidates` from the configuration.
    pub fn infer_relationships(
        &self,
        target_entity: &EntityId,
        relation_type: &str,
        knowledge_graph: &KnowledgeGraph,
    ) -> Vec<InferredRelation> {
        let mut inferred_relations = Vec::new();

        // Find target entity
        let target_ent = knowledge_graph.entities().find(|e| &e.id == target_entity);

        if target_ent.is_none() {
            return inferred_relations;
        }

        // Get chunks containing target entity
        let target_chunks: Vec<_> = knowledge_graph
            .chunks()
            .filter(|chunk| chunk.entities.contains(target_entity))
            .collect();

        // Find co-occurring entities
        let mut entity_scores: HashMap<EntityId, f32> = HashMap::new();

        for chunk in &target_chunks {
            for entity_id in &chunk.entities {
                if entity_id != target_entity {
                    let evidence_score =
                        self.calculate_evidence_score(chunk, target_entity, entity_id);
                    *entity_scores.entry(entity_id.clone()).or_insert(0.0) += evidence_score;
                }
            }
        }

        // Create inferred relations for high-scoring entities
        for (entity_id, score) in entity_scores {
            let normalized_score = (score / target_chunks.len() as f32).min(1.0);

            if normalized_score >= self.config.min_confidence {
                inferred_relations.push(InferredRelation {
                    source: target_entity.clone(),
                    target: entity_id,
                    relation_type: relation_type.to_string(),
                    confidence: normalized_score,
                    evidence_count: target_chunks.len(),
                });
            }
        }

        // Sort by confidence and limit results
        inferred_relations.sort_by(|a, b| {
            b.confidence
                .partial_cmp(&a.confidence)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        inferred_relations.truncate(self.config.max_candidates);

        inferred_relations
    }

    /// Calculate evidence score for a potential relationship
    ///
    /// Analyzes a text chunk to determine how strongly it suggests a relationship
    /// between two entities. Uses proximity analysis, pattern matching, and
    /// contextual clues.
    ///
    /// # Arguments
    ///
    /// * `chunk` - The text chunk containing both entities
    /// * `entity_a` - First entity ID
    /// * `entity_b` - Second entity ID
    ///
    /// # Returns
    ///
    /// Returns a score between 0.0 and 1.0 indicating relationship strength.
    /// Higher scores indicate stronger evidence of a relationship.
    fn calculate_evidence_score(
        &self,
        chunk: &TextChunk,
        entity_a: &EntityId,
        entity_b: &EntityId,
    ) -> f32 {
        let content = &chunk.content.to_lowercase();
        let mut score: f32 = 0.2; // Lower base co-occurrence score

        // Get entity names for contextual analysis
        let entity_a_name = self.extract_entity_name(entity_a);
        let entity_b_name = self.extract_entity_name(entity_b);

        // Calculate proximity score between entities in text
        let proximity_bonus =
            self.calculate_proximity_score(content, &entity_a_name, &entity_b_name);
        score += proximity_bonus;

        // Enhanced friendship indicators with contextual patterns
        let friendship_patterns = [
            // Direct friendship terms
            ("best friend", 0.8),
            ("close friend", 0.7),
            ("good friend", 0.6),
            ("friend", 0.4),
            ("friends", 0.4),
            ("friendship", 0.5),
            // Activity-based friendship indicators
            ("played together", 0.6),
            ("went together", 0.5),
            ("talked with", 0.4),
            ("helped each other", 0.7),
            ("shared", 0.3),
            ("together", 0.3),
            // Emotional bonding indicators
            ("trusted", 0.6),
            ("loyal", 0.5),
            ("bond", 0.5),
            ("close", 0.4),
            ("cared for", 0.6),
            ("looked after", 0.5),
            ("protected", 0.6),
            // Adventure/activity companionship
            ("adventure", 0.4),
            ("explore", 0.3),
            ("journey", 0.3),
            ("companion", 0.6),
            ("partner", 0.5),
            ("ally", 0.5),
        ];

        // Contextual pattern matching with weighted scores
        for (pattern, weight) in &friendship_patterns {
            if content.contains(pattern) {
                // Additional context bonus if entities are mentioned near the pattern
                let context_bonus =
                    if self.entities_near_pattern(content, &entity_a_name, &entity_b_name, pattern)
                    {
                        weight * 0.5
                    } else {
                        *weight * 0.3
                    };
                score += context_bonus;
            }
        }

        // Enhanced negative indicators with contextual analysis
        let negative_patterns = [
            ("enemy", -0.8),
            ("enemies", -0.8),
            ("rival", -0.6),
            ("rivals", -0.6),
            ("fought", -0.5),
            ("fight", -0.4),
            ("battle", -0.4),
            ("conflict", -0.5),
            ("angry at", -0.6),
            ("hate", -0.7),
            ("hated", -0.7),
            ("despise", -0.6),
            ("betrayed", -0.8),
            ("betrayal", -0.7),
            ("argued", -0.3),
            ("quarrel", -0.4),
            ("against", -0.2),
            ("opposed", -0.4),
            ("disagree", -0.2),
        ];

        for (pattern, weight) in &negative_patterns {
            if content.contains(pattern) {
                let context_penalty =
                    if self.entities_near_pattern(content, &entity_a_name, &entity_b_name, pattern)
                    {
                        weight * 1.2
                    } else {
                        weight * 0.8
                    };
                score += context_penalty; // weight is already negative
            }
        }

        // Family relationship indicators (neutral for friendship)
        let family_patterns = ["brother", "sister", "cousin", "aunt", "uncle", "family"];
        let mut has_family_relation = false;
        for pattern in &family_patterns {
            if content.contains(pattern) {
                has_family_relation = true;
                break;
            }
        }

        // Family relations can still be friendships, but lower weight
        if has_family_relation {
            score *= 0.8;
        }

        score.clamp(0.0, 1.0)
    }

    /// Extract clean entity name from an entity ID
    ///
    /// Entity IDs typically have format "TYPE_normalized_name". This method
    /// extracts just the name portion and formats it for matching.
    ///
    /// # Arguments
    ///
    /// * `entity_id` - The entity ID to extract the name from
    ///
    /// # Returns
    ///
    /// Returns the cleaned, lowercase entity name with underscores replaced by spaces
    fn extract_entity_name(&self, entity_id: &EntityId) -> String {
        // EntityId format is typically "TYPE_normalized_name"
        let id_str = &entity_id.0;
        if let Some(underscore_pos) = id_str.find('_') {
            id_str[underscore_pos + 1..]
                .replace('_', " ")
                .to_lowercase()
        } else {
            id_str.to_lowercase()
        }
    }

    /// Calculate proximity score between entities in text
    ///
    /// Determines how close two entities are mentioned in the text. Closer proximity
    /// suggests a stronger relationship between the entities.
    ///
    /// # Arguments
    ///
    /// * `content` - The text content to analyze
    /// * `entity_a` - Name of the first entity
    /// * `entity_b` - Name of the second entity
    ///
    /// # Returns
    ///
    /// Returns a proximity score:
    /// - 0.4 for very close (0-2 words apart)
    /// - 0.3 for close (3-5 words apart)
    /// - 0.2 for medium distance (6-10 words apart)
    /// - 0.1 for far (11-20 words apart)
    /// - 0.05 for very far (20+ words apart)
    fn calculate_proximity_score(&self, content: &str, entity_a: &str, entity_b: &str) -> f32 {
        let words: Vec<&str> = content.split_whitespace().collect();
        let mut positions_a = Vec::new();
        let mut positions_b = Vec::new();

        // Find all positions of entity mentions
        for (i, word) in words.iter().enumerate() {
            if word.to_lowercase().contains(entity_a) {
                positions_a.push(i);
            }
            if word.to_lowercase().contains(entity_b) {
                positions_b.push(i);
            }
        }

        if positions_a.is_empty() || positions_b.is_empty() {
            return 0.0;
        }

        // Find minimum distance between any mentions
        let mut min_distance = usize::MAX;
        for &pos_a in &positions_a {
            for &pos_b in &positions_b {
                let distance = pos_a.abs_diff(pos_b);
                min_distance = min_distance.min(distance);
            }
        }

        // Convert distance to proximity score (closer = higher score)
        match min_distance {
            0..=2 => 0.4,   // Very close (same sentence likely)
            3..=5 => 0.3,   // Close
            6..=10 => 0.2,  // Medium distance
            11..=20 => 0.1, // Far
            _ => 0.05,      // Very far
        }
    }

    /// Check if entities are mentioned near a relationship pattern
    ///
    /// Determines if both entities appear within a 200-character window
    /// around a relationship keyword or pattern. This helps determine if
    /// a relationship pattern actually applies to these specific entities.
    ///
    /// # Arguments
    ///
    /// * `content` - The text content to search
    /// * `entity_a` - Name of the first entity
    /// * `entity_b` - Name of the second entity
    /// * `pattern` - The relationship pattern to search for (e.g., "friend", "enemy")
    ///
    /// # Returns
    ///
    /// Returns `true` if both entities are found within 100 characters before and
    /// after the pattern, `false` otherwise.
    fn entities_near_pattern(
        &self,
        content: &str,
        entity_a: &str,
        entity_b: &str,
        pattern: &str,
    ) -> bool {
        if let Some(pattern_pos) = content.find(pattern) {
            let start = pattern_pos.saturating_sub(100); // 100 chars before
            let end = (pattern_pos + pattern.len() + 100).min(content.len()); // 100 chars after
            let context = &content[start..end];

            context.contains(entity_a) && context.contains(entity_b)
        } else {
            false
        }
    }

    /// Find an entity in the knowledge graph by name
    ///
    /// Performs a case-insensitive substring search to find an entity whose
    /// name contains the given search string.
    ///
    /// # Arguments
    ///
    /// * `knowledge_graph` - The knowledge graph to search
    /// * `name` - The name (or partial name) to search for
    ///
    /// # Returns
    ///
    /// Returns `Some(&Entity)` if a matching entity is found, `None` otherwise.
    pub fn find_entity_by_name<'a>(
        &self,
        knowledge_graph: &'a KnowledgeGraph,
        name: &str,
    ) -> Option<&'a Entity> {
        knowledge_graph
            .entities()
            .find(|e| e.name.to_lowercase().contains(&name.to_lowercase()))
    }
}