use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;
pub type Metadata = HashMap<String, String>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Document {
pub id: String,
pub content: String,
pub metadata: Metadata,
}
impl Document {
pub fn new(content: impl Into<String>) -> Self {
Self {
id: Uuid::new_v4().to_string(),
content: content.into(),
metadata: HashMap::new(),
}
}
pub fn with_metadata(mut self, k: impl Into<String>, v: impl Into<String>) -> Self {
self.metadata.insert(k.into(), v.into());
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Chunk {
pub document_id: String,
pub index: usize,
pub text: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Node {
pub id: String,
pub chunk: Chunk,
pub embedding: Vec<f32>,
pub edges: Vec<String>,
}
impl Node {
pub fn new(chunk: Chunk, embedding: Vec<f32>) -> Self {
Self {
id: Uuid::new_v4().to_string(),
chunk,
embedding,
edges: Vec::new(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_document_new_generates_unique_ids() {
let d1 = Document::new("hello");
let d2 = Document::new("hello");
assert_ne!(d1.id, d2.id);
}
#[test]
fn test_document_content_stored() {
let doc = Document::new("The Rust programming language.");
assert_eq!(doc.content, "The Rust programming language.");
assert!(doc.metadata.is_empty());
}
#[test]
fn test_document_with_metadata_builder() {
let doc = Document::new("test content")
.with_metadata("author", "Suraj")
.with_metadata("source", "test");
assert_eq!(doc.metadata.get("author").unwrap(), "Suraj");
assert_eq!(doc.metadata.get("source").unwrap(), "test");
assert_eq!(doc.metadata.len(), 2);
}
#[test]
fn test_document_empty_content() {
let doc = Document::new("");
assert_eq!(doc.content, "");
assert!(!doc.id.is_empty());
}
#[test]
fn test_node_new() {
let chunk = Chunk { document_id: "doc-1".into(), index: 0, text: "hello world".into() };
let embedding = vec![0.1, 0.2, 0.3];
let node = Node::new(chunk, embedding.clone());
assert!(!node.id.is_empty());
assert_eq!(node.chunk.text, "hello world");
assert_eq!(node.embedding, embedding);
assert!(node.edges.is_empty());
}
#[test]
fn test_document_serialization_roundtrip() {
let doc = Document::new("serialize me").with_metadata("key", "val");
let json = serde_json::to_string(&doc).unwrap();
let deserialized: Document = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.id, doc.id);
assert_eq!(deserialized.content, doc.content);
assert_eq!(deserialized.metadata.get("key").unwrap(), "val");
}
#[test]
fn test_node_serialization_roundtrip() {
let chunk = Chunk { document_id: "d1".into(), index: 5, text: "chunk text".into() };
let node = Node::new(chunk, vec![1.0, 2.0, 3.0]);
let json = serde_json::to_string(&node).unwrap();
let deserialized: Node = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.id, node.id);
assert_eq!(deserialized.chunk.index, 5);
assert_eq!(deserialized.embedding, vec![1.0, 2.0, 3.0]);
}
}