use crate::error::Result;
use crate::index::ast_chunker::ChunkType;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
#[async_trait]
pub trait MetadataGenerator: Send + Sync {
async fn generate_metadata(
&self,
content: &str,
context: &MetadataContext,
) -> Result<DocumentMetadata>;
fn model_name(&self) -> &str;
}
#[derive(Debug, Clone)]
pub struct MetadataContext {
pub source_type: String,
pub language: Option<String>,
pub file_extension: Option<String>,
pub collection_name: String,
pub provider_config: Option<String>,
pub created_at: String,
pub modified_at: String,
pub existing_structure: Option<Vec<ChunkType>>,
}
impl MetadataContext {
pub fn new(source_type: String, collection_name: String) -> Self {
Self {
source_type,
collection_name,
language: None,
file_extension: None,
provider_config: None,
created_at: String::new(),
modified_at: String::new(),
existing_structure: None,
}
}
pub fn with_language(mut self, language: String) -> Self {
self.language = Some(language);
self
}
pub fn with_extension(mut self, extension: String) -> Self {
self.file_extension = Some(extension);
self
}
pub fn with_provider_config(mut self, config: String) -> Self {
self.provider_config = Some(config);
self
}
pub fn with_timestamps(mut self, created_at: String, modified_at: String) -> Self {
self.created_at = created_at;
self.modified_at = modified_at;
self
}
pub fn with_structure(mut self, structure: Vec<ChunkType>) -> Self {
self.existing_structure = Some(structure);
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct DocumentMetadata {
pub summary: String,
pub semantic_title: String,
pub keywords: Vec<String>,
pub category: String,
pub intent: String,
pub concepts: Vec<String>,
pub difficulty: String,
pub suggested_queries: Vec<String>,
}
impl DocumentMetadata {
pub fn new() -> Self {
Self {
summary: String::new(),
semantic_title: String::new(),
keywords: Vec::new(),
category: String::new(),
intent: String::new(),
concepts: Vec::new(),
difficulty: String::new(),
suggested_queries: Vec::new(),
}
}
pub fn basic(title: String, summary: String) -> Self {
Self {
summary,
semantic_title: title,
keywords: Vec::new(),
category: "unknown".to_string(),
intent: String::new(),
concepts: Vec::new(),
difficulty: "intermediate".to_string(),
suggested_queries: Vec::new(),
}
}
pub fn is_complete(&self) -> bool {
!self.summary.is_empty()
&& !self.semantic_title.is_empty()
&& !self.keywords.is_empty()
&& !self.category.is_empty()
}
pub fn to_json(&self) -> Result<String> {
serde_json::to_string(self).map_err(|e| e.into())
}
pub fn from_json(json: &str) -> Result<Self> {
serde_json::from_str(json).map_err(|e| e.into())
}
}
impl Default for DocumentMetadata {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metadata_context_builder() {
let context = MetadataContext::new("file".to_string(), "test-collection".to_string())
.with_language("rust".to_string())
.with_extension("rs".to_string())
.with_timestamps("2024-01-01".to_string(), "2024-01-02".to_string());
assert_eq!(context.source_type, "file");
assert_eq!(context.collection_name, "test-collection");
assert_eq!(context.language, Some("rust".to_string()));
assert_eq!(context.file_extension, Some("rs".to_string()));
}
#[test]
fn test_document_metadata_basic() {
let metadata = DocumentMetadata::basic(
"Test Document".to_string(),
"This is a test summary.".to_string(),
);
assert_eq!(metadata.semantic_title, "Test Document");
assert_eq!(metadata.summary, "This is a test summary.");
assert_eq!(metadata.difficulty, "intermediate");
assert!(!metadata.is_complete());
}
#[test]
fn test_document_metadata_complete() {
let metadata = DocumentMetadata {
summary: "A comprehensive test".to_string(),
semantic_title: "Test".to_string(),
keywords: vec!["test".to_string()],
category: "test".to_string(),
intent: "Testing".to_string(),
concepts: vec!["testing".to_string()],
difficulty: "beginner".to_string(),
suggested_queries: vec!["how to test".to_string()],
};
assert!(metadata.is_complete());
}
#[test]
fn test_metadata_json_serialization() {
let metadata = DocumentMetadata {
summary: "Test summary".to_string(),
semantic_title: "Test Title".to_string(),
keywords: vec!["test".to_string(), "rust".to_string()],
category: "tutorial".to_string(),
intent: "Learn testing".to_string(),
concepts: vec!["unit testing".to_string()],
difficulty: "beginner".to_string(),
suggested_queries: vec!["rust testing".to_string()],
};
let json = metadata.to_json().unwrap();
let parsed = DocumentMetadata::from_json(&json).unwrap();
assert_eq!(metadata, parsed);
}
#[test]
fn test_metadata_context_with_structure() {
let structure = vec![ChunkType::Function, ChunkType::Struct];
let context = MetadataContext::new("file".to_string(), "code".to_string())
.with_structure(structure.clone());
assert_eq!(context.existing_structure, Some(structure));
}
}