cognee_models/triplet.rs
1//! Triplet data model for relationship-based embeddings.
2//!
3//! Mirrors Python's `cognee/modules/engine/models/Triplet.py`
4//! Triplets represent semantic relationships between entities in a format suitable for embedding.
5
6use serde::{Deserialize, Serialize};
7use uuid::Uuid;
8
9/// A triplet representing a semantic relationship between two entities.
10///
11/// Triplets are embedded as text in the format:
12/// "source_text-›relationship_text-›target_text"
13///
14/// Example: "Steve Jobs: Co-founder of Apple-›founded-›Apple Inc.: Technology company"
15///
16/// Python reference: cognee/modules/engine/models/Triplet.py
17//
18// `Triplet` intentionally does NOT implement `HasDataPoint`: it does
19// not embed a `DataPoint` (it has its own `id: Uuid` field and is
20// constructed deterministically via UUID v5 from the edge key). Its
21// provenance lands via the vector-store payload helper in
22// `cognee_core::provenance` indirectly when the originating edge is
23// stamped, not via `stamp_tree` recursion. See gap-05 task 05-04 §4.4.
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
25pub struct Triplet {
26 /// Unique identifier for this triplet.
27 /// Generated as uuid5 from edge key (deterministic).
28 pub id: Uuid,
29
30 /// Source entity ID.
31 pub source_entity_id: Uuid,
32
33 /// Target entity ID.
34 pub target_entity_id: Uuid,
35
36 /// Relationship name (edge type).
37 pub relationship_name: String,
38
39 /// Embeddable text representation.
40 /// Format: "{source_text}-›{relationship_text}-›{target_text}"
41 /// This is the text that gets embedded for semantic search.
42 pub text: String,
43
44 /// Optional: Source entity name for display/debugging.
45 pub source_name: Option<String>,
46
47 /// Optional: Target entity name for display/debugging.
48 pub target_name: Option<String>,
49}
50
51impl Triplet {
52 /// Create a new triplet with deterministic ID.
53 ///
54 /// The ID is generated using UUID v5 from the edge key, matching Python's behavior.
55 ///
56 /// # Arguments
57 /// * `source_entity_id` - Source entity UUID
58 /// * `target_entity_id` - Target entity UUID
59 /// * `relationship_name` - Relationship/edge type name
60 /// * `text` - Formatted text for embedding
61 ///
62 /// # Example
63 /// ```
64 /// use cognee_models::Triplet;
65 /// use uuid::Uuid;
66 ///
67 /// let source_id = Uuid::new_v4();
68 /// let target_id = Uuid::new_v4();
69 /// let triplet = Triplet::new(
70 /// source_id,
71 /// target_id,
72 /// "founded".to_string(),
73 /// "Steve Jobs-›founded-›Apple Inc.".to_string(),
74 /// );
75 /// ```
76 pub fn new(
77 source_entity_id: Uuid,
78 target_entity_id: Uuid,
79 relationship_name: String,
80 text: String,
81 ) -> Self {
82 // Generate deterministic ID matching Python's generate_node_id():
83 // uuid5(NAMESPACE_OID, (src + rel + tgt).lower().replace(" ", "_").replace("'", ""))
84 // Python reference: cognee/modules/engine/utils/generate_node_id.py
85 let raw = format!("{source_entity_id}{relationship_name}{target_entity_id}");
86 let normalized = raw.to_lowercase().replace(' ', "_").replace('\'', "");
87 let id = Uuid::new_v5(&Uuid::NAMESPACE_OID, normalized.as_bytes());
88
89 Self {
90 id,
91 source_entity_id,
92 target_entity_id,
93 relationship_name,
94 text,
95 source_name: None,
96 target_name: None,
97 }
98 }
99
100 /// Set source and target names for display purposes.
101 ///
102 /// # Arguments
103 /// * `source_name` - Source entity name
104 /// * `target_name` - Target entity name
105 pub fn with_names(mut self, source_name: String, target_name: String) -> Self {
106 self.source_name = Some(source_name);
107 self.target_name = Some(target_name);
108 self
109 }
110
111 /// Get embeddable text (for consistency with other models).
112 pub fn get_text(&self) -> &str {
113 &self.text
114 }
115}
116
117#[cfg(test)]
118#[allow(
119 clippy::unwrap_used,
120 clippy::expect_used,
121 reason = "test code — panics are acceptable failures"
122)]
123mod tests {
124 use super::*;
125
126 #[test]
127 fn test_triplet_creation() {
128 let source_id = Uuid::new_v4();
129 let target_id = Uuid::new_v4();
130 let triplet = Triplet::new(
131 source_id,
132 target_id,
133 "founded".to_string(),
134 "Steve Jobs-›founded-›Apple Inc.".to_string(),
135 );
136
137 assert_eq!(triplet.source_entity_id, source_id);
138 assert_eq!(triplet.target_entity_id, target_id);
139 assert_eq!(triplet.relationship_name, "founded");
140 assert!(triplet.text.contains("-›"));
141 assert_eq!(triplet.source_name, None);
142 assert_eq!(triplet.target_name, None);
143 }
144
145 #[test]
146 fn test_triplet_with_names() {
147 let source_id = Uuid::new_v4();
148 let target_id = Uuid::new_v4();
149 let triplet = Triplet::new(
150 source_id,
151 target_id,
152 "works_at".to_string(),
153 "Alice-›works at-›TechCorp".to_string(),
154 )
155 .with_names("Alice".to_string(), "TechCorp".to_string());
156
157 assert_eq!(triplet.source_name, Some("Alice".to_string()));
158 assert_eq!(triplet.target_name, Some("TechCorp".to_string()));
159 }
160
161 #[test]
162 fn test_triplet_deterministic_id() {
163 // Same inputs should produce same ID (UUID v5)
164 let source_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
165 let target_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440001").unwrap();
166
167 let triplet1 = Triplet::new(
168 source_id,
169 target_id,
170 "relates".to_string(),
171 "A-›relates-›B".to_string(),
172 );
173
174 let triplet2 = Triplet::new(
175 source_id,
176 target_id,
177 "relates".to_string(),
178 "A-›relates-›B".to_string(),
179 );
180
181 assert_eq!(triplet1.id, triplet2.id, "IDs should be deterministic");
182 }
183
184 #[test]
185 fn test_triplet_id_matches_python_generate_node_id() {
186 // Verify ID generation matches Python's generate_node_id():
187 // uuid5(NAMESPACE_OID, (src + rel + tgt).lower().replace(" ", "_").replace("'", ""))
188 let source_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
189 let target_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440001").unwrap();
190 let relationship = "founded";
191
192 let triplet = Triplet::new(
193 source_id,
194 target_id,
195 relationship.to_string(),
196 "test".to_string(),
197 );
198
199 // Manually compute expected ID using Python's formula:
200 // raw = str(source_id) + relationship_name + str(target_id)
201 // normalized = raw.lower().replace(" ", "_").replace("'", "")
202 let raw = format!("{source_id}{relationship}{target_id}");
203 let normalized = raw.to_lowercase().replace(' ', "_").replace('\'', "");
204 let expected_id = Uuid::new_v5(&Uuid::NAMESPACE_OID, normalized.as_bytes());
205
206 assert_eq!(
207 triplet.id, expected_id,
208 "ID should match Python generate_node_id formula"
209 );
210 }
211
212 #[test]
213 fn test_triplet_get_text() {
214 let triplet = Triplet::new(
215 Uuid::new_v4(),
216 Uuid::new_v4(),
217 "test".to_string(),
218 "test text".to_string(),
219 );
220
221 assert_eq!(triplet.get_text(), "test text");
222 }
223}