cognee_cognify/summarization/
models.rs1use cognee_models::DataPoint;
8use cognee_models::HasDataPoint;
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11use serde_json::json;
12use uuid::Uuid;
13
14#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
19pub struct SummarizedContent {
20 pub summary: String,
22
23 pub description: String,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct TextSummary {
34 #[serde(flatten)]
36 pub base: DataPoint,
37
38 pub made_from: Option<Uuid>,
40
41 pub text: String,
43
44 #[serde(skip_serializing_if = "Option::is_none")]
46 pub description: Option<String>,
47
48 pub model: String,
50}
51
52impl TextSummary {
53 pub fn new(chunk_id: Uuid, text: String, description: Option<String>, model: String) -> Self {
64 let id = Uuid::new_v5(&chunk_id, b"TextSummary");
66
67 let mut base = DataPoint::new("TextSummary", None);
68 base.id = id;
69 base.metadata
70 .insert("index_fields".to_string(), json!(["text"]));
71
72 Self {
73 base,
74 made_from: Some(chunk_id),
75 text,
76 description,
77 model,
78 }
79 }
80
81 pub fn from_summarized_content(
88 chunk_id: Uuid,
89 summarized: SummarizedContent,
90 model: String,
91 ) -> Self {
92 Self::new(
93 chunk_id,
94 summarized.summary,
95 Some(summarized.description),
96 model,
97 )
98 }
99}
100
101impl HasDataPoint for TextSummary {
102 fn data_point(&self) -> &DataPoint {
103 &self.base
104 }
105 fn data_point_mut(&mut self) -> &mut DataPoint {
106 &mut self.base
107 }
108 }
111
112#[cfg(test)]
113#[allow(
114 clippy::unwrap_used,
115 clippy::expect_used,
116 reason = "test code — panics are acceptable failures"
117)]
118mod tests {
119 use super::*;
120
121 #[test]
122 fn test_text_summary_deterministic_id() {
123 let chunk_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
124
125 let summary1 = TextSummary::new(
126 chunk_id,
127 "Test summary".to_string(),
128 None,
129 "gpt-4".to_string(),
130 );
131
132 let summary2 = TextSummary::new(
133 chunk_id,
134 "Different text".to_string(),
135 None,
136 "gpt-3.5-turbo".to_string(),
137 );
138
139 assert_eq!(summary1.base.id, summary2.base.id);
141
142 let different_chunk_id = Uuid::new_v4();
144 let summary3 = TextSummary::new(
145 different_chunk_id,
146 "Test summary".to_string(),
147 None,
148 "gpt-4".to_string(),
149 );
150 assert_ne!(summary1.base.id, summary3.base.id);
151 }
152
153 #[test]
154 fn test_from_summarized_content() {
155 let chunk_id = Uuid::new_v4();
156 let summarized = SummarizedContent {
157 summary: "Brief summary".to_string(),
158 description: "Detailed description with key points.".to_string(),
159 };
160
161 let text_summary = TextSummary::from_summarized_content(
162 chunk_id,
163 summarized.clone(),
164 "llama3".to_string(),
165 );
166
167 assert_eq!(text_summary.made_from, Some(chunk_id));
168 assert_eq!(text_summary.text, summarized.summary);
169 assert_eq!(text_summary.description, Some(summarized.description));
170 assert_eq!(text_summary.model, "llama3");
171 assert_eq!(
172 text_summary.base.id,
173 Uuid::new_v5(&chunk_id, b"TextSummary")
174 );
175 }
176
177 #[test]
178 fn test_serialization() {
179 let chunk_id = Uuid::new_v4();
180 let summary = TextSummary::new(
181 chunk_id,
182 "Summary text".to_string(),
183 Some("Description".to_string()),
184 "gpt-4".to_string(),
185 );
186
187 let json = serde_json::to_string(&summary).unwrap();
188 let deserialized: TextSummary = serde_json::from_str(&json).unwrap();
189
190 assert_eq!(summary.base.id, deserialized.base.id);
191 assert_eq!(summary.made_from, deserialized.made_from);
192 assert_eq!(summary.text, deserialized.text);
193 assert_eq!(summary.description, deserialized.description);
194 assert_eq!(summary.model, deserialized.model);
195 }
196
197 #[test]
198 fn test_data_point_base_fields() {
199 let chunk_id = Uuid::new_v4();
200 let summary = TextSummary::new(
201 chunk_id,
202 "Test summary".to_string(),
203 None,
204 "gpt-4".to_string(),
205 );
206
207 assert_eq!(summary.base.data_type, "TextSummary");
209 assert_eq!(
210 summary.base.metadata.get("index_fields"),
211 Some(&json!(["text"]))
212 );
213 assert!(summary.base.created_at > 0);
214 assert!(summary.base.updated_at > 0);
215 assert_eq!(summary.base.version, 1);
216 }
217
218 #[test]
219 fn text_summary_implements_has_datapoint() {
220 let chunk_id = Uuid::new_v4();
221 let summary = TextSummary::new(
222 chunk_id,
223 "Summary text".to_string(),
224 None,
225 "gpt-4".to_string(),
226 );
227 let dp_id = summary.base.id;
228 assert_eq!(summary.data_point().id, dp_id);
229 let mut s2 = summary;
230 assert_eq!(s2.data_point_mut().id, dp_id);
231 }
232}