use cognee_models::DataPoint;
use cognee_models::HasDataPoint;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use serde_json::json;
use uuid::Uuid;
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct SummarizedContent {
pub summary: String,
pub description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TextSummary {
#[serde(flatten)]
pub base: DataPoint,
pub made_from: Option<Uuid>,
pub text: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
pub model: String,
}
impl TextSummary {
pub fn new(chunk_id: Uuid, text: String, description: Option<String>, model: String) -> Self {
let id = Uuid::new_v5(&chunk_id, b"TextSummary");
let mut base = DataPoint::new("TextSummary", None);
base.id = id;
base.metadata
.insert("index_fields".to_string(), json!(["text"]));
Self {
base,
made_from: Some(chunk_id),
text,
description,
model,
}
}
pub fn from_summarized_content(
chunk_id: Uuid,
summarized: SummarizedContent,
model: String,
) -> Self {
Self::new(
chunk_id,
summarized.summary,
Some(summarized.description),
model,
)
}
}
impl HasDataPoint for TextSummary {
fn data_point(&self) -> &DataPoint {
&self.base
}
fn data_point_mut(&mut self) -> &mut DataPoint {
&mut self.base
}
}
#[cfg(test)]
#[allow(
clippy::unwrap_used,
clippy::expect_used,
reason = "test code — panics are acceptable failures"
)]
mod tests {
use super::*;
#[test]
fn test_text_summary_deterministic_id() {
let chunk_id = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
let summary1 = TextSummary::new(
chunk_id,
"Test summary".to_string(),
None,
"gpt-4".to_string(),
);
let summary2 = TextSummary::new(
chunk_id,
"Different text".to_string(),
None,
"gpt-3.5-turbo".to_string(),
);
assert_eq!(summary1.base.id, summary2.base.id);
let different_chunk_id = Uuid::new_v4();
let summary3 = TextSummary::new(
different_chunk_id,
"Test summary".to_string(),
None,
"gpt-4".to_string(),
);
assert_ne!(summary1.base.id, summary3.base.id);
}
#[test]
fn test_from_summarized_content() {
let chunk_id = Uuid::new_v4();
let summarized = SummarizedContent {
summary: "Brief summary".to_string(),
description: "Detailed description with key points.".to_string(),
};
let text_summary = TextSummary::from_summarized_content(
chunk_id,
summarized.clone(),
"llama3".to_string(),
);
assert_eq!(text_summary.made_from, Some(chunk_id));
assert_eq!(text_summary.text, summarized.summary);
assert_eq!(text_summary.description, Some(summarized.description));
assert_eq!(text_summary.model, "llama3");
assert_eq!(
text_summary.base.id,
Uuid::new_v5(&chunk_id, b"TextSummary")
);
}
#[test]
fn test_serialization() {
let chunk_id = Uuid::new_v4();
let summary = TextSummary::new(
chunk_id,
"Summary text".to_string(),
Some("Description".to_string()),
"gpt-4".to_string(),
);
let json = serde_json::to_string(&summary).unwrap();
let deserialized: TextSummary = serde_json::from_str(&json).unwrap();
assert_eq!(summary.base.id, deserialized.base.id);
assert_eq!(summary.made_from, deserialized.made_from);
assert_eq!(summary.text, deserialized.text);
assert_eq!(summary.description, deserialized.description);
assert_eq!(summary.model, deserialized.model);
}
#[test]
fn test_data_point_base_fields() {
let chunk_id = Uuid::new_v4();
let summary = TextSummary::new(
chunk_id,
"Test summary".to_string(),
None,
"gpt-4".to_string(),
);
assert_eq!(summary.base.data_type, "TextSummary");
assert_eq!(
summary.base.metadata.get("index_fields"),
Some(&json!(["text"]))
);
assert!(summary.base.created_at > 0);
assert!(summary.base.updated_at > 0);
assert_eq!(summary.base.version, 1);
}
#[test]
fn text_summary_implements_has_datapoint() {
let chunk_id = Uuid::new_v4();
let summary = TextSummary::new(
chunk_id,
"Summary text".to_string(),
None,
"gpt-4".to_string(),
);
let dp_id = summary.base.id;
assert_eq!(summary.data_point().id, dp_id);
let mut s2 = summary;
assert_eq!(s2.data_point_mut().id, dp_id);
}
}