Skip to main content

cognee_models/
edge_type.rs

1//! EdgeType - Storage-layer edge type model for indexing.
2//!
3//! Mirrors Python's `cognee/modules/engine/models/EdgeType.py`
4//! Represents a type of relationship (e.g., "works_at", "located_in", "knows").
5
6use chrono::Utc;
7use serde::{Deserialize, Serialize};
8use uuid::Uuid;
9
10use crate::DataPoint;
11use crate::has_datapoint::HasDataPoint;
12
13/// Storage-layer edge type model.
14///
15/// Represents a type of relationship between entities (e.g., "works_at",
16/// "located_in", "knows"). Used for indexing and semantic search of
17/// relationship types.
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
19pub struct EdgeType {
20    /// Base data point fields (id, timestamps, metadata, etc.)
21    #[serde(flatten)]
22    pub base: DataPoint,
23
24    /// Relationship name (e.g., "works_at", "located_in")
25    pub relationship_name: String,
26
27    /// Number of edges of this type (for statistics)
28    pub number_of_edges: i32,
29}
30
31impl EdgeType {
32    /// Index fields to embed for vector search.
33    pub const INDEX_FIELDS: &'static [&'static str] = &["relationship_name"];
34
35    /// Compute a deterministic UUID for an EdgeType from its relationship name.
36    ///
37    /// Mirrors Python's `generate_edge_id(edge_id=text)`:
38    /// `uuid5(NAMESPACE_OID, text.lower().replace(" ", "_").replace("'", ""))`
39    pub fn deterministic_id(relationship_name: &str) -> Uuid {
40        let normalized = relationship_name
41            .to_lowercase()
42            .replace(' ', "_")
43            .replace('\'', "");
44        Uuid::new_v5(&Uuid::NAMESPACE_OID, normalized.as_bytes())
45    }
46
47    /// Create a new EdgeType with a random UUID.
48    ///
49    /// # Arguments
50    /// * `relationship_name` - Relationship name (e.g., "works_at")
51    /// * `dataset_id` - Dataset UUID
52    pub fn new(relationship_name: impl Into<String>, dataset_id: Option<Uuid>) -> Self {
53        let mut metadata = std::collections::HashMap::new();
54        metadata.insert(
55            "index_fields".to_string(),
56            serde_json::json!(Self::INDEX_FIELDS),
57        );
58
59        Self {
60            base: DataPoint::with_metadata("EdgeType", dataset_id, metadata),
61            relationship_name: relationship_name.into(),
62            number_of_edges: 0,
63        }
64    }
65
66    /// Create a new EdgeType with a deterministic UUID derived from the
67    /// relationship name, matching Python's `generate_edge_id`.
68    ///
69    /// # Arguments
70    /// * `relationship_name` - Relationship name (e.g., "works_at")
71    /// * `dataset_id` - Dataset UUID
72    pub fn new_deterministic(
73        relationship_name: impl Into<String>,
74        dataset_id: Option<Uuid>,
75    ) -> Self {
76        let name = relationship_name.into();
77        let id = Self::deterministic_id(&name);
78        let now = Utc::now().timestamp_millis();
79
80        let mut metadata = std::collections::HashMap::new();
81        metadata.insert(
82            "index_fields".to_string(),
83            serde_json::json!(Self::INDEX_FIELDS),
84        );
85
86        Self {
87            base: DataPoint {
88                id,
89                created_at: now,
90                updated_at: now,
91                ontology_valid: false,
92                version: 1,
93                topological_rank: None,
94                metadata,
95                data_type: "EdgeType".to_string(),
96                belongs_to_set: dataset_id.map(|ds_id| vec![serde_json::json!(ds_id.to_string())]),
97                source_pipeline: None,
98                source_task: None,
99                source_node_set: None,
100                source_user: None,
101                source_content_hash: None,
102                feedback_weight: 0.5,
103            },
104            relationship_name: name,
105            number_of_edges: 0,
106        }
107    }
108
109    /// Get the relationship name (for embedding).
110    pub fn get_embeddable_text(&self) -> String {
111        self.relationship_name.clone()
112    }
113
114    /// Increment the edge count.
115    pub fn increment_count(&mut self) {
116        self.number_of_edges += 1;
117        self.base.touch();
118    }
119
120    /// Set the edge count.
121    pub fn set_count(&mut self, count: i32) {
122        self.number_of_edges = count;
123        self.base.touch();
124    }
125
126    /// Get the edge count.
127    pub fn count(&self) -> i32 {
128        self.number_of_edges
129    }
130}
131
132impl HasDataPoint for EdgeType {
133    fn data_point(&self) -> &DataPoint {
134        &self.base
135    }
136    fn data_point_mut(&mut self) -> &mut DataPoint {
137        &mut self.base
138    }
139    // for_each_child_mut: default no-op — EdgeType has no nested
140    // `HasDataPoint` children.
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_edge_type_creation() {
149        let et = EdgeType::new("works_at", None);
150
151        assert_eq!(et.relationship_name, "works_at");
152        assert_eq!(et.number_of_edges, 0);
153        assert_eq!(et.base.data_type, "EdgeType");
154    }
155
156    #[test]
157    fn test_edge_type_with_dataset() {
158        let dataset_id = Uuid::new_v4();
159        let et = EdgeType::new("works_at", Some(dataset_id));
160
161        assert_eq!(
162            et.base.belongs_to_set,
163            Some(vec![serde_json::json!(dataset_id.to_string())])
164        );
165    }
166
167    #[test]
168    fn test_edge_type_index_fields() {
169        let et = EdgeType::new("works_at", None);
170        let index_fields = et.base.get_metadata("index_fields");
171
172        assert_eq!(
173            index_fields,
174            Some(&serde_json::json!(["relationship_name"]))
175        );
176    }
177
178    #[test]
179    fn test_edge_type_embeddable_text() {
180        let et = EdgeType::new("works_at", None);
181        assert_eq!(et.get_embeddable_text(), "works_at");
182    }
183
184    #[test]
185    fn test_edge_type_increment_count() {
186        let mut et = EdgeType::new("works_at", None);
187        assert_eq!(et.count(), 0);
188
189        et.increment_count();
190        assert_eq!(et.count(), 1);
191
192        et.increment_count();
193        assert_eq!(et.count(), 2);
194    }
195
196    #[test]
197    fn test_edge_type_set_count() {
198        let mut et = EdgeType::new("works_at", None);
199        et.set_count(10);
200        assert_eq!(et.count(), 10);
201    }
202
203    #[test]
204    fn test_edge_type_increment_updates_timestamp() {
205        let mut et = EdgeType::new("works_at", None);
206        let old_time = et.base.updated_at;
207
208        std::thread::sleep(std::time::Duration::from_millis(10));
209        et.increment_count();
210
211        // updated_at is i64 (millis since epoch); touch() should advance it
212        assert!(et.base.updated_at >= old_time);
213    }
214
215    #[test]
216    fn test_deterministic_id_basic() {
217        let id1 = EdgeType::deterministic_id("works_at");
218        let id2 = EdgeType::deterministic_id("works_at");
219        assert_eq!(id1, id2, "same input must produce same UUID");
220    }
221
222    #[test]
223    fn test_deterministic_id_normalization() {
224        // Spaces become underscores, apostrophes removed, lowercased
225        let id1 = EdgeType::deterministic_id("Works At");
226        let id2 = EdgeType::deterministic_id("works_at");
227        assert_eq!(
228            id1, id2,
229            "normalization should make 'Works At' equal 'works_at'"
230        );
231
232        let id3 = EdgeType::deterministic_id("it's_related");
233        let id4 = EdgeType::deterministic_id("its_related");
234        assert_eq!(id3, id4, "apostrophe removal should match");
235    }
236
237    #[test]
238    fn test_deterministic_id_matches_python() {
239        // Python: uuid5(NAMESPACE_OID, "works_at") with NAMESPACE_OID = 6ba7b812-...
240        // We can verify the computation is correct by ensuring it is a v5 UUID
241        // in the OID namespace.
242        let id = EdgeType::deterministic_id("works_at");
243        assert_eq!(
244            id,
245            Uuid::new_v5(&Uuid::NAMESPACE_OID, b"works_at"),
246            "deterministic_id('works_at') should equal uuid5(OID, 'works_at')"
247        );
248    }
249
250    #[test]
251    fn test_new_deterministic_constructor() {
252        let et = EdgeType::new_deterministic("works_at", None);
253        assert_eq!(et.relationship_name, "works_at");
254        assert_eq!(et.base.data_type, "EdgeType");
255        assert_eq!(et.base.id, EdgeType::deterministic_id("works_at"));
256        assert_eq!(et.number_of_edges, 0);
257    }
258
259    #[test]
260    fn test_new_deterministic_with_dataset() {
261        let dataset_id = Uuid::new_v4();
262        let et = EdgeType::new_deterministic("located_in", Some(dataset_id));
263        assert_eq!(
264            et.base.belongs_to_set,
265            Some(vec![serde_json::json!(dataset_id.to_string())])
266        );
267        assert_eq!(et.base.id, EdgeType::deterministic_id("located_in"));
268    }
269
270    #[test]
271    fn test_deterministic_id_different_names_differ() {
272        let id1 = EdgeType::deterministic_id("works_at");
273        let id2 = EdgeType::deterministic_id("located_in");
274        assert_ne!(id1, id2, "different names must produce different UUIDs");
275    }
276
277    #[test]
278    fn edge_type_implements_has_datapoint() {
279        let et = EdgeType::new("rel", None);
280        let dp_id = et.base.id;
281        assert_eq!(et.data_point().id, dp_id);
282        let mut et2 = et;
283        assert_eq!(et2.data_point_mut().id, dp_id);
284    }
285}