1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# GraphRAG Advanced Features Configuration Example
# This file demonstrates all advanced features from Phases 2-3
# Copy and modify this file to enable state-of-the-art GraphRAG capabilities
# ============================================================================
# BASIC CONFIGURATION
# ============================================================================
= "output"
= 400
= 50
= "semantic" # or "algorithmic" or "hybrid"
# ============================================================================
# PHASE 1 FEATURES (Foundation)
# ============================================================================
[]
= 0.5
= true
= 2
# Phase 1.1: Triple Reflection - Validate extracted relationships
= true
= 0.7 # Minimum confidence to keep a triple (0.0-1.0)
# Phase 1.2: Temporal Fields - Enable temporal reasoning
# Temporal fields are automatically extracted when present in text
# No additional configuration needed
# Phase 1.3: ATOM Atomic Fact Extraction (Simplified)
= false # Set to true for fine-grained fact extraction
= 400 # Maximum tokens per atomic fact
# ============================================================================
# PHASE 2 FEATURES (Retrieval Enhancements)
# ============================================================================
[]
# Phase 2.1: Symbolic Anchoring (CatRAG-style)
# Automatically applied for conceptual queries (e.g., "What is love?")
[]
= 0.3 # Minimum relevance score for anchors (0.0-1.0)
= 5 # Maximum anchors to extract per query
= 10 # Maximum entities grounded per anchor
# Phase 2.2: Dynamic Edge Weighting
# Query-aware relationship weight adjustment
[]
= true # Boost relationships semantically similar to query
= true # Boost recent/relevant temporal relationships
= true # Boost relationships matching query concepts
= true # Boost strong causal relationships
# Phase 2.3: Causal Chain Analysis
# Multi-step causal reasoning (e.g., "What caused X to lead to Y?")
[]
= 0.3 # Minimum confidence for causal chains (0.0-1.0)
= 0.5 # Minimum causal strength to consider (0.0-1.0)
= 5 # Maximum chain depth to search
= true # Require chronological ordering in chains
# ============================================================================
# PHASE 3 FEATURES (Advanced Optimizations)
# ============================================================================
# Phase 3.1: Hierarchical Relationship Clustering
# Multi-level relationship organization using Leiden algorithm
[]
= 3 # Number of hierarchy levels (2-5)
= [0.8, 1.0, 1.5] # Resolution parameters (higher = more clusters)
= 3 # Minimum relationships per cluster
= true # Generate LLM summaries for clusters (requires Ollama)
# Phase 3.2: Graph Weight Optimization (DW-GRPO)
# Heuristic optimization of relationship weights based on query performance
[]
= 0.05 # Learning rate for weight adjustments (0.01-0.5)
= 20 # Maximum optimization iterations
= 5 # Window size for slope calculation
= 0.01 # Minimum slope to avoid stagnation
= true # Use LLM for quality evaluation (requires Ollama)
# Objective weights (must sum to ~1.0)
[]
= 0.4 # Weight for relevance objective
= 0.4 # Weight for faithfulness objective
= 0.2 # Weight for conciseness objective
# ============================================================================
# EMBEDDINGS CONFIGURATION
# ============================================================================
[]
= "nomic-embed-text" # Ollama embedding model
= 768
= "http://localhost:11434"
= 32
# ============================================================================
# GRAPH CONFIGURATION
# ============================================================================
[]
= 10
= 0.7
= true
= 0.6
[]
= 3
= "bfs" # or "dfs"
= 50
# ============================================================================
# RETRIEVAL CONFIGURATION
# ============================================================================
[]
= 10
= "hybrid" # "vector", "graph", or "hybrid"
# ============================================================================
# OLLAMA CONFIGURATION
# ============================================================================
[]
= "http://localhost:11434"
= "llama3.2" # Model for relationship extraction and summaries
= "nomic-embed-text"
= 300
= 3
# ============================================================================
# PARALLEL PROCESSING
# ============================================================================
[]
= true
= 4
= 10
= 4
= true
= true
= true
# ============================================================================
# USAGE NOTES
# ============================================================================
# 1. Triple Reflection: Improves quality by validating relationships against source text
# - Best for: High-precision applications where accuracy matters
# - Cost: +30-50% processing time
# - Enable: entities.enable_triple_reflection = true
# 2. Atomic Fact Extraction: Fine-grained fact extraction (ATOM-style)
# - Best for: Scientific texts, detailed analysis
# - Cost: +50-100% processing time
# - Enable: entities.use_atomic_facts = true
# 3. Symbolic Anchoring: Better conceptual/abstract query handling
# - Best for: "What is X?" philosophical/conceptual queries
# - Cost: Minimal (only affects retrieval)
# - Auto-enabled for conceptual queries
# 4. Dynamic Edge Weighting: Query-aware relationship scoring
# - Best for: Complex queries requiring context-aware ranking
# - Cost: Minimal (only affects retrieval)
# - Enable: advanced_features.dynamic_weighting.*
# 5. Causal Chain Analysis: Multi-step causal reasoning
# - Best for: "Why did X cause Y?" causal queries
# - Cost: Moderate (only for causal queries)
# - Enable: advanced_features.causal_analysis.*
# 6. Hierarchical Clustering: Multi-level relationship organization
# - Best for: Large graphs needing structure
# - Cost: One-time build cost
# - Enable: Call build_relationship_hierarchy() after graph construction
# 7. Weight Optimization: Improve retrieval quality over time
# - Best for: Production systems with test queries
# - Cost: One-time optimization phase
# - Enable: advanced_features.weight_optimization.*
# ============================================================================
# RECOMMENDED CONFIGURATIONS
# ============================================================================
# HIGH PRECISION (research, accuracy-critical):
# - enable_triple_reflection = true
# - use_atomic_facts = true
# - validation_min_confidence = 0.8
# - min_confidence = 0.7
# BALANCED (general purpose):
# - enable_triple_reflection = true
# - use_atomic_facts = false
# - All dynamic weighting enabled
# - Causal analysis enabled
# HIGH THROUGHPUT (large-scale, performance-critical):
# - enable_triple_reflection = false
# - use_atomic_facts = false
# - generate_summaries = false
# - use_llm_eval = false
# - Rely on dynamic weighting only
# CONCEPTUAL/PHILOSOPHICAL QUERIES:
# - Symbolic anchoring enabled (default)
# - max_anchors = 10
# - enable_semantic_boost = true
# CAUSAL REASONING:
# - require_temporal_consistency = true
# - min_causal_strength = 0.7
# - max_chain_depth = 7