1use crate::error::Result;
4use crate::index::ast_chunker::ChunkType;
5use async_trait::async_trait;
6use serde::{Deserialize, Serialize};
7
8#[async_trait]
10pub trait MetadataGenerator: Send + Sync {
11 async fn generate_metadata(
13 &self,
14 content: &str,
15 context: &MetadataContext,
16 ) -> Result<DocumentMetadata>;
17
18 fn model_name(&self) -> &str;
20}
21
22#[derive(Debug, Clone)]
24pub struct MetadataContext {
25 pub source_type: String,
27 pub language: Option<String>,
29 pub file_extension: Option<String>,
31 pub collection_name: String,
33 pub provider_config: Option<String>,
35 pub created_at: String,
37 pub modified_at: String,
39 pub existing_structure: Option<Vec<ChunkType>>,
41}
42
43impl MetadataContext {
44 pub fn new(source_type: String, collection_name: String) -> Self {
46 Self {
47 source_type,
48 collection_name,
49 language: None,
50 file_extension: None,
51 provider_config: None,
52 created_at: String::new(),
53 modified_at: String::new(),
54 existing_structure: None,
55 }
56 }
57
58 pub fn with_language(mut self, language: String) -> Self {
60 self.language = Some(language);
61 self
62 }
63
64 pub fn with_extension(mut self, extension: String) -> Self {
66 self.file_extension = Some(extension);
67 self
68 }
69
70 pub fn with_provider_config(mut self, config: String) -> Self {
72 self.provider_config = Some(config);
73 self
74 }
75
76 pub fn with_timestamps(mut self, created_at: String, modified_at: String) -> Self {
78 self.created_at = created_at;
79 self.modified_at = modified_at;
80 self
81 }
82
83 pub fn with_structure(mut self, structure: Vec<ChunkType>) -> Self {
85 self.existing_structure = Some(structure);
86 self
87 }
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
92pub struct DocumentMetadata {
93 pub summary: String,
95 pub semantic_title: String,
97 pub keywords: Vec<String>,
99 pub category: String,
101 pub intent: String,
103 pub concepts: Vec<String>,
105 pub difficulty: String,
107 pub suggested_queries: Vec<String>,
109}
110
111impl DocumentMetadata {
112 pub fn new() -> Self {
114 Self {
115 summary: String::new(),
116 semantic_title: String::new(),
117 keywords: Vec::new(),
118 category: String::new(),
119 intent: String::new(),
120 concepts: Vec::new(),
121 difficulty: String::new(),
122 suggested_queries: Vec::new(),
123 }
124 }
125
126 pub fn basic(title: String, summary: String) -> Self {
128 Self {
129 summary,
130 semantic_title: title,
131 keywords: Vec::new(),
132 category: "unknown".to_string(),
133 intent: String::new(),
134 concepts: Vec::new(),
135 difficulty: "intermediate".to_string(),
136 suggested_queries: Vec::new(),
137 }
138 }
139
140 pub fn is_complete(&self) -> bool {
142 !self.summary.is_empty()
143 && !self.semantic_title.is_empty()
144 && !self.keywords.is_empty()
145 && !self.category.is_empty()
146 }
147
148 pub fn to_json(&self) -> Result<String> {
150 serde_json::to_string(self).map_err(|e| e.into())
151 }
152
153 pub fn from_json(json: &str) -> Result<Self> {
155 serde_json::from_str(json).map_err(|e| e.into())
156 }
157}
158
159impl Default for DocumentMetadata {
160 fn default() -> Self {
161 Self::new()
162 }
163}
164
165#[cfg(test)]
166mod tests {
167 use super::*;
168
169 #[test]
170 fn test_metadata_context_builder() {
171 let context = MetadataContext::new("file".to_string(), "test-collection".to_string())
172 .with_language("rust".to_string())
173 .with_extension("rs".to_string())
174 .with_timestamps("2024-01-01".to_string(), "2024-01-02".to_string());
175
176 assert_eq!(context.source_type, "file");
177 assert_eq!(context.collection_name, "test-collection");
178 assert_eq!(context.language, Some("rust".to_string()));
179 assert_eq!(context.file_extension, Some("rs".to_string()));
180 }
181
182 #[test]
183 fn test_document_metadata_basic() {
184 let metadata = DocumentMetadata::basic(
185 "Test Document".to_string(),
186 "This is a test summary.".to_string(),
187 );
188
189 assert_eq!(metadata.semantic_title, "Test Document");
190 assert_eq!(metadata.summary, "This is a test summary.");
191 assert_eq!(metadata.difficulty, "intermediate");
192 assert!(!metadata.is_complete());
193 }
194
195 #[test]
196 fn test_document_metadata_complete() {
197 let metadata = DocumentMetadata {
198 summary: "A comprehensive test".to_string(),
199 semantic_title: "Test".to_string(),
200 keywords: vec!["test".to_string()],
201 category: "test".to_string(),
202 intent: "Testing".to_string(),
203 concepts: vec!["testing".to_string()],
204 difficulty: "beginner".to_string(),
205 suggested_queries: vec!["how to test".to_string()],
206 };
207
208 assert!(metadata.is_complete());
209 }
210
211 #[test]
212 fn test_metadata_json_serialization() {
213 let metadata = DocumentMetadata {
214 summary: "Test summary".to_string(),
215 semantic_title: "Test Title".to_string(),
216 keywords: vec!["test".to_string(), "rust".to_string()],
217 category: "tutorial".to_string(),
218 intent: "Learn testing".to_string(),
219 concepts: vec!["unit testing".to_string()],
220 difficulty: "beginner".to_string(),
221 suggested_queries: vec!["rust testing".to_string()],
222 };
223
224 let json = metadata.to_json().unwrap();
225 let parsed = DocumentMetadata::from_json(&json).unwrap();
226
227 assert_eq!(metadata, parsed);
228 }
229
230 #[test]
231 fn test_metadata_context_with_structure() {
232 let structure = vec![ChunkType::Function, ChunkType::Struct];
233 let context = MetadataContext::new("file".to_string(), "code".to_string())
234 .with_structure(structure.clone());
235
236 assert_eq!(context.existing_structure, Some(structure));
237 }
238}