1use serde::{Deserialize, Serialize};
6
7pub const ENTITY_EXTRACTION_PROMPT: &str = r#"-Goal-
9Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
10
11-Steps-
121. Identify all entities. For each identified entity, extract the following information:
13- entity_name: Name of the entity, capitalized
14- entity_type: One of the following types: [{entity_types}]
15- entity_description: Comprehensive description of the entity's attributes and activities
16Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>)
17
182. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
19For each pair of related entities, extract the following information:
20- source_entity: name of the source entity, as identified in step 1
21- target_entity: name of the target entity, as identified in step 1
22- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
23- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
24Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_strength>)
25
263. Return output in JSON format with the following structure:
27{{
28 "entities": [
29 {{
30 "name": "entity name",
31 "type": "entity type",
32 "description": "entity description"
33 }}
34 ],
35 "relationships": [
36 {{
37 "source": "source entity name",
38 "target": "target entity name",
39 "description": "relationship description",
40 "strength": 0.8
41 }}
42 ]
43}}
44
45-Real Data-
46######################
47Entity Types: {entity_types}
48Text: {input_text}
49######################
50Output:
51"#;
52
53pub const GLEANING_CONTINUATION_PROMPT: &str = r#"-Goal-
55You previously extracted entities and relationships from a text document. Review your previous extraction and the original text to identify any additional entities or relationships you may have missed in the first pass.
56
57-Steps-
581. Review the entities you previously identified:
59{previous_entities}
60
612. Review the relationships you previously identified:
62{previous_relationships}
63
643. Carefully review the original text again and identify:
65- Any entities you may have missed
66- Any relationships between entities you may have overlooked
67- Any entities that need better descriptions
68
694. Return ONLY the NEW entities and relationships you discovered in this pass, using the same JSON format:
70{{
71 "entities": [
72 {{
73 "name": "entity name",
74 "type": "entity type",
75 "description": "entity description"
76 }}
77 ],
78 "relationships": [
79 {{
80 "source": "source entity name",
81 "target": "target entity name",
82 "description": "relationship description",
83 "strength": 0.8
84 }}
85 ]
86}}
87
88If you found no additional entities or relationships, return empty arrays.
89
90-Real Data-
91######################
92Entity Types: {entity_types}
93Text: {input_text}
94######################
95Output:
96"#;
97
98pub const COMPLETION_CHECK_PROMPT: &str = r#"Based on the text below and the entities/relationships already extracted, are there any significant entities or relationships that have been missed?
100
101Text:
102{input_text}
103
104Current Entities ({entity_count}):
105{entities_summary}
106
107Current Relationships ({relationship_count}):
108{relationships_summary}
109
110Think carefully about:
1111. Are all important characters, people, organizations mentioned in the text captured?
1122. Are all significant locations, places, settings identified?
1133. Are all key events, objects, concepts extracted?
1144. Are all meaningful relationships between entities documented?
115
116Respond with ONLY "YES" if the extraction is complete and thorough, or "NO" if there are still significant entities or relationships missing.
117
118Answer (YES or NO):"#;
119
120pub const ENTITY_EXTRACTION_JSON_SCHEMA: &str = r#"{
122 "type": "object",
123 "properties": {
124 "entities": {
125 "type": "array",
126 "items": {
127 "type": "object",
128 "properties": {
129 "name": {"type": "string"},
130 "type": {"type": "string"},
131 "description": {"type": "string"}
132 },
133 "required": ["name", "type", "description"]
134 }
135 },
136 "relationships": {
137 "type": "array",
138 "items": {
139 "type": "object",
140 "properties": {
141 "source": {"type": "string"},
142 "target": {"type": "string"},
143 "description": {"type": "string"},
144 "strength": {"type": "number"}
145 },
146 "required": ["source", "target", "description", "strength"]
147 }
148 }
149 },
150 "required": ["entities", "relationships"]
151}"#;
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct ExtractionOutput {
159 pub entities: Vec<EntityData>,
161 pub relationships: Vec<RelationshipData>,
163}
164
165#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct EntityData {
171 pub name: String,
173 #[serde(rename = "type")]
175 pub entity_type: String,
176 pub description: String,
178}
179
180#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct RelationshipData {
186 pub source: String,
188 pub target: String,
190 pub description: String,
192 pub strength: f64,
194}
195
196pub struct PromptBuilder {
198 entity_types: Vec<String>,
199 tuple_delimiter: String,
200}
201
202impl PromptBuilder {
203 pub fn new(entity_types: Vec<String>) -> Self {
205 Self {
206 entity_types,
207 tuple_delimiter: "|".to_string(),
208 }
209 }
210
211 pub fn build_extraction_prompt(&self, text: &str) -> String {
213 let entity_types_str = self.entity_types.join(", ");
214
215 ENTITY_EXTRACTION_PROMPT
216 .replace("{entity_types}", &entity_types_str)
217 .replace("{tuple_delimiter}", &self.tuple_delimiter)
218 .replace("{input_text}", text)
219 }
220
221 pub fn build_continuation_prompt(
223 &self,
224 text: &str,
225 previous_entities: &[EntityData],
226 previous_relationships: &[RelationshipData],
227 ) -> String {
228 let entity_types_str = self.entity_types.join(", ");
229
230 let entities_summary = previous_entities
232 .iter()
233 .map(|e| format!("- {} ({}): {}", e.name, e.entity_type, e.description))
234 .collect::<Vec<_>>()
235 .join("\n");
236
237 let relationships_summary = previous_relationships
239 .iter()
240 .map(|r| format!("- {} -> {}: {} (strength: {:.2})", r.source, r.target, r.description, r.strength))
241 .collect::<Vec<_>>()
242 .join("\n");
243
244 GLEANING_CONTINUATION_PROMPT
245 .replace("{entity_types}", &entity_types_str)
246 .replace("{input_text}", text)
247 .replace("{previous_entities}", &entities_summary)
248 .replace("{previous_relationships}", &relationships_summary)
249 }
250
251 pub fn build_completion_prompt(
253 &self,
254 text: &str,
255 entities: &[EntityData],
256 relationships: &[RelationshipData],
257 ) -> String {
258 let entities_summary = entities
260 .iter()
261 .take(20) .map(|e| format!("- {} ({})", e.name, e.entity_type))
263 .collect::<Vec<_>>()
264 .join("\n");
265
266 let entities_summary = if entities.len() > 20 {
267 format!("{}...\n(showing 20 of {} entities)", entities_summary, entities.len())
268 } else {
269 entities_summary
270 };
271
272 let relationships_summary = relationships
274 .iter()
275 .take(20) .map(|r| format!("- {} -> {}", r.source, r.target))
277 .collect::<Vec<_>>()
278 .join("\n");
279
280 let relationships_summary = if relationships.len() > 20 {
281 format!("{}...\n(showing 20 of {} relationships)", relationships_summary, relationships.len())
282 } else {
283 relationships_summary
284 };
285
286 COMPLETION_CHECK_PROMPT
287 .replace("{input_text}", text)
288 .replace("{entity_count}", &entities.len().to_string())
289 .replace("{entities_summary}", &entities_summary)
290 .replace("{relationship_count}", &relationships.len().to_string())
291 .replace("{relationships_summary}", &relationships_summary)
292 }
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298
299 #[test]
300 fn test_build_extraction_prompt() {
301 let builder = PromptBuilder::new(vec![
302 "PERSON".to_string(),
303 "LOCATION".to_string(),
304 "ORGANIZATION".to_string(),
305 ]);
306
307 let prompt = builder.build_extraction_prompt("Tom and Huck went to the cave.");
308
309 assert!(prompt.contains("PERSON"));
310 assert!(prompt.contains("LOCATION"));
311 assert!(prompt.contains("ORGANIZATION"));
312 assert!(prompt.contains("Tom and Huck went to the cave."));
313 }
314
315 #[test]
316 fn test_build_continuation_prompt() {
317 let builder = PromptBuilder::new(vec!["PERSON".to_string()]);
318
319 let previous_entities = vec![
320 EntityData {
321 name: "Tom".to_string(),
322 entity_type: "PERSON".to_string(),
323 description: "A young boy".to_string(),
324 },
325 ];
326
327 let previous_relationships = vec![
328 RelationshipData {
329 source: "Tom".to_string(),
330 target: "Huck".to_string(),
331 description: "friends".to_string(),
332 strength: 0.9,
333 },
334 ];
335
336 let prompt = builder.build_continuation_prompt(
337 "Tom and Huck are best friends.",
338 &previous_entities,
339 &previous_relationships,
340 );
341
342 assert!(prompt.contains("Tom"));
343 assert!(prompt.contains("Huck"));
344 assert!(prompt.contains("friends"));
345 }
346
347 #[test]
348 fn test_build_completion_prompt() {
349 let builder = PromptBuilder::new(vec!["PERSON".to_string()]);
350
351 let entities = vec![
352 EntityData {
353 name: "Tom".to_string(),
354 entity_type: "PERSON".to_string(),
355 description: "A young boy".to_string(),
356 },
357 ];
358
359 let relationships = vec![
360 RelationshipData {
361 source: "Tom".to_string(),
362 target: "Huck".to_string(),
363 description: "friends".to_string(),
364 strength: 0.9,
365 },
366 ];
367
368 let prompt = builder.build_completion_prompt("Test text", &entities, &relationships);
369
370 assert!(prompt.contains("Tom"));
371 assert!(prompt.contains("YES or NO"));
372 }
373
374 #[test]
375 fn test_extraction_output_serialization() {
376 let output = ExtractionOutput {
377 entities: vec![
378 EntityData {
379 name: "Tom Sawyer".to_string(),
380 entity_type: "PERSON".to_string(),
381 description: "The protagonist".to_string(),
382 },
383 ],
384 relationships: vec![
385 RelationshipData {
386 source: "Tom Sawyer".to_string(),
387 target: "Huck Finn".to_string(),
388 description: "best friends".to_string(),
389 strength: 0.95,
390 },
391 ],
392 };
393
394 let json = serde_json::to_string(&output).unwrap();
395 assert!(json.contains("Tom Sawyer"));
396 assert!(json.contains("PERSON"));
397
398 let deserialized: ExtractionOutput = serde_json::from_str(&json).unwrap();
399 assert_eq!(deserialized.entities.len(), 1);
400 assert_eq!(deserialized.relationships.len(), 1);
401 }
402}