Skip to main content

cognee_models/
triplet.rs

1//! Triplet data model for relationship-based embeddings.
2//!
3//! Mirrors Python's `cognee/modules/engine/models/Triplet.py`
4//! Triplets represent semantic relationships between entities in a format suitable for embedding.
5
6use serde::{Deserialize, Serialize};
7use uuid::Uuid;
8
9/// A triplet representing a semantic relationship between two entities.
10///
11/// Triplets are embedded as text in the format:
12/// "source_text-›relationship_text-›target_text"
13///
14/// Example: "Steve Jobs: Co-founder of Apple-›founded-›Apple Inc.: Technology company"
15///
16/// Python reference: cognee/modules/engine/models/Triplet.py
17//
18// `Triplet` intentionally does NOT implement `HasDataPoint`: it does
19// not embed a `DataPoint` (it has its own `id: Uuid` field and is
20// constructed deterministically via UUID v5 from the edge key). Its
21// provenance lands via the vector-store payload helper in
22// `cognee_core::provenance` indirectly when the originating edge is
23// stamped, not via `stamp_tree` recursion. See gap-05 task 05-04 §4.4.
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
25pub struct Triplet {
26    /// Unique identifier for this triplet.
27    /// Generated as uuid5 from edge key (deterministic).
28    pub id: Uuid,
29
30    /// Source entity ID.
31    pub source_entity_id: Uuid,
32
33    /// Target entity ID.
34    pub target_entity_id: Uuid,
35
36    /// Relationship name (edge type).
37    pub relationship_name: String,
38
39    /// Embeddable text representation.
40    /// Format: "{source_text}-›{relationship_text}-›{target_text}"
41    /// This is the text that gets embedded for semantic search.
42    pub text: String,
43
44    /// Optional: Source entity name for display/debugging.
45    pub source_name: Option<String>,
46
47    /// Optional: Target entity name for display/debugging.
48    pub target_name: Option<String>,
49}
50
51impl Triplet {
52    /// Create a new triplet with deterministic ID.
53    ///
54    /// The ID is generated using UUID v5 from the edge key, matching Python's behavior.
55    ///
56    /// # Arguments
57    /// * `source_entity_id` - Source entity UUID
58    /// * `target_entity_id` - Target entity UUID
59    /// * `relationship_name` - Relationship/edge type name
60    /// * `text` - Formatted text for embedding
61    ///
62    /// # Example
63    /// ```
64    /// use cognee_models::Triplet;
65    /// use uuid::Uuid;
66    ///
67    /// let source_id = Uuid::new_v4();
68    /// let target_id = Uuid::new_v4();
69    /// let triplet = Triplet::new(
70    ///     source_id,
71    ///     target_id,
72    ///     "founded".to_string(),
73    ///     "Steve Jobs-›founded-›Apple Inc.".to_string(),
74    /// );
75    /// ```
76    pub fn new(
77        source_entity_id: Uuid,
78        target_entity_id: Uuid,
79        relationship_name: String,
80        text: String,
81    ) -> Self {
82        // Generate deterministic ID matching Python's generate_node_id():
83        //   uuid5(NAMESPACE_OID, (src + rel + tgt).lower().replace(" ", "_").replace("'", ""))
84        // Python reference: cognee/modules/engine/utils/generate_node_id.py
85        let raw = format!("{source_entity_id}{relationship_name}{target_entity_id}");
86        let normalized = raw.to_lowercase().replace(' ', "_").replace('\'', "");
87        let id = Uuid::new_v5(&Uuid::NAMESPACE_OID, normalized.as_bytes());
88
89        Self {
90            id,
91            source_entity_id,
92            target_entity_id,
93            relationship_name,
94            text,
95            source_name: None,
96            target_name: None,
97        }
98    }
99
100    /// Set source and target names for display purposes.
101    ///
102    /// # Arguments
103    /// * `source_name` - Source entity name
104    /// * `target_name` - Target entity name
105    pub fn with_names(mut self, source_name: String, target_name: String) -> Self {
106        self.source_name = Some(source_name);
107        self.target_name = Some(target_name);
108        self
109    }
110
111    /// Get embeddable text (for consistency with other models).
112    pub fn get_text(&self) -> &str {
113        &self.text
114    }
115}
116
117#[cfg(test)]
118#[allow(
119    clippy::unwrap_used,
120    clippy::expect_used,
121    reason = "test code — panics are acceptable failures"
122)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn test_triplet_creation() {
128        let source_id = Uuid::new_v4();
129        let target_id = Uuid::new_v4();
130        let triplet = Triplet::new(
131            source_id,
132            target_id,
133            "founded".to_string(),
134            "Steve Jobs-›founded-›Apple Inc.".to_string(),
135        );
136
137        assert_eq!(triplet.source_entity_id, source_id);
138        assert_eq!(triplet.target_entity_id, target_id);
139        assert_eq!(triplet.relationship_name, "founded");
140        assert!(triplet.text.contains("-›"));
141        assert_eq!(triplet.source_name, None);
142        assert_eq!(triplet.target_name, None);
143    }
144
145    #[test]
146    fn test_triplet_with_names() {
147        let source_id = Uuid::new_v4();
148        let target_id = Uuid::new_v4();
149        let triplet = Triplet::new(
150            source_id,
151            target_id,
152            "works_at".to_string(),
153            "Alice-›works at-›TechCorp".to_string(),
154        )
155        .with_names("Alice".to_string(), "TechCorp".to_string());
156
157        assert_eq!(triplet.source_name, Some("Alice".to_string()));
158        assert_eq!(triplet.target_name, Some("TechCorp".to_string()));
159    }
160
161    #[test]
162    fn test_triplet_deterministic_id() {
163        // Same inputs should produce same ID (UUID v5)
164        let source_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
165        let target_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440001").unwrap();
166
167        let triplet1 = Triplet::new(
168            source_id,
169            target_id,
170            "relates".to_string(),
171            "A-›relates-›B".to_string(),
172        );
173
174        let triplet2 = Triplet::new(
175            source_id,
176            target_id,
177            "relates".to_string(),
178            "A-›relates-›B".to_string(),
179        );
180
181        assert_eq!(triplet1.id, triplet2.id, "IDs should be deterministic");
182    }
183
184    #[test]
185    fn test_triplet_id_matches_python_generate_node_id() {
186        // Verify ID generation matches Python's generate_node_id():
187        //   uuid5(NAMESPACE_OID, (src + rel + tgt).lower().replace(" ", "_").replace("'", ""))
188        let source_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
189        let target_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440001").unwrap();
190        let relationship = "founded";
191
192        let triplet = Triplet::new(
193            source_id,
194            target_id,
195            relationship.to_string(),
196            "test".to_string(),
197        );
198
199        // Manually compute expected ID using Python's formula:
200        // raw = str(source_id) + relationship_name + str(target_id)
201        // normalized = raw.lower().replace(" ", "_").replace("'", "")
202        let raw = format!("{source_id}{relationship}{target_id}");
203        let normalized = raw.to_lowercase().replace(' ', "_").replace('\'', "");
204        let expected_id = Uuid::new_v5(&Uuid::NAMESPACE_OID, normalized.as_bytes());
205
206        assert_eq!(
207            triplet.id, expected_id,
208            "ID should match Python generate_node_id formula"
209        );
210    }
211
212    #[test]
213    fn test_triplet_get_text() {
214        let triplet = Triplet::new(
215            Uuid::new_v4(),
216            Uuid::new_v4(),
217            "test".to_string(),
218            "test text".to_string(),
219        );
220
221        assert_eq!(triplet.get_text(), "test text");
222    }
223}