graphrag_core/core/
metadata.rs1use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
15pub struct ChunkMetadata {
16 pub chapter: Option<String>,
20
21 pub section: Option<String>,
25
26 pub subsection: Option<String>,
30
31 pub topic: Option<String>,
35
36 pub keywords: Vec<String>,
40
41 pub summary: Option<String>,
45
46 pub structural_level: Option<u8>,
50
51 pub position_in_document: Option<f32>,
56
57 pub heading_path: Vec<String>,
62
63 pub confidence: Option<f32>,
67
68 #[serde(default)]
72 pub custom: std::collections::HashMap<String, String>,
73}
74
75impl ChunkMetadata {
76 pub fn new() -> Self {
78 Self::default()
79 }
80
81 pub fn with_chapter(mut self, chapter: String) -> Self {
83 self.chapter = Some(chapter);
84 self
85 }
86
87 pub fn with_section(mut self, section: String) -> Self {
89 self.section = Some(section);
90 self
91 }
92
93 pub fn with_subsection(mut self, subsection: String) -> Self {
95 self.subsection = Some(subsection);
96 self
97 }
98
99 pub fn with_keywords(mut self, keywords: Vec<String>) -> Self {
101 self.keywords = keywords;
102 self
103 }
104
105 pub fn with_summary(mut self, summary: String) -> Self {
107 self.summary = Some(summary);
108 self
109 }
110
111 pub fn with_structural_level(mut self, level: u8) -> Self {
113 self.structural_level = Some(level);
114 self
115 }
116
117 pub fn with_position(mut self, position: f32) -> Self {
119 self.position_in_document = Some(position.clamp(0.0, 1.0));
120 self
121 }
122
123 pub fn with_heading_path(mut self, path: Vec<String>) -> Self {
125 self.heading_path = path;
126 self
127 }
128
129 pub fn add_custom(mut self, key: String, value: String) -> Self {
131 self.custom.insert(key, value);
132 self
133 }
134
135 pub fn has_structure_info(&self) -> bool {
137 self.chapter.is_some() || self.section.is_some() || self.subsection.is_some()
138 }
139
140 pub fn has_semantic_info(&self) -> bool {
142 !self.keywords.is_empty() || self.summary.is_some()
143 }
144
145 pub fn get_deepest_heading(&self) -> Option<&String> {
147 self.subsection
148 .as_ref()
149 .or(self.section.as_ref())
150 .or(self.chapter.as_ref())
151 }
152
153 pub fn get_hierarchy_string(&self) -> Option<String> {
157 if self.heading_path.is_empty() {
158 return None;
159 }
160 Some(self.heading_path.join(" > "))
161 }
162
163 pub fn completeness_score(&self) -> f32 {
167 let mut score = 0.0;
168 let total = 9.0;
169
170 if self.chapter.is_some() {
171 score += 1.0;
172 }
173 if self.section.is_some() {
174 score += 1.0;
175 }
176 if self.subsection.is_some() {
177 score += 1.0;
178 }
179 if self.topic.is_some() {
180 score += 1.0;
181 }
182 if !self.keywords.is_empty() {
183 score += 1.0;
184 }
185 if self.summary.is_some() {
186 score += 1.0;
187 }
188 if self.structural_level.is_some() {
189 score += 1.0;
190 }
191 if self.position_in_document.is_some() {
192 score += 1.0;
193 }
194 if !self.heading_path.is_empty() {
195 score += 1.0;
196 }
197
198 score / total
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
207 fn test_metadata_creation() {
208 let metadata = ChunkMetadata::new();
209 assert!(metadata.chapter.is_none());
210 assert!(metadata.keywords.is_empty());
211 assert_eq!(metadata.completeness_score(), 0.0);
212 }
213
214 #[test]
215 fn test_metadata_builder() {
216 let metadata = ChunkMetadata::new()
217 .with_chapter("Chapter 1".to_string())
218 .with_section("Section 1.1".to_string())
219 .with_keywords(vec!["test".to_string(), "metadata".to_string()])
220 .with_summary("This is a test summary.".to_string());
221
222 assert_eq!(metadata.chapter, Some("Chapter 1".to_string()));
223 assert_eq!(metadata.section, Some("Section 1.1".to_string()));
224 assert_eq!(metadata.keywords.len(), 2);
225 assert!(metadata.has_structure_info());
226 assert!(metadata.has_semantic_info());
227 }
228
229 #[test]
230 fn test_heading_hierarchy() {
231 let metadata = ChunkMetadata::new().with_heading_path(vec![
232 "Chapter 1".to_string(),
233 "Section 1.1".to_string(),
234 "Subsection 1.1.1".to_string(),
235 ]);
236
237 assert_eq!(
238 metadata.get_hierarchy_string(),
239 Some("Chapter 1 > Section 1.1 > Subsection 1.1.1".to_string())
240 );
241 }
242
243 #[test]
244 fn test_deepest_heading() {
245 let mut metadata = ChunkMetadata::new();
246 assert!(metadata.get_deepest_heading().is_none());
247
248 metadata.chapter = Some("Chapter 1".to_string());
249 assert_eq!(metadata.get_deepest_heading(), Some(&"Chapter 1".to_string()));
250
251 metadata.section = Some("Section 1.1".to_string());
252 assert_eq!(
253 metadata.get_deepest_heading(),
254 Some(&"Section 1.1".to_string())
255 );
256
257 metadata.subsection = Some("Subsection 1.1.1".to_string());
258 assert_eq!(
259 metadata.get_deepest_heading(),
260 Some(&"Subsection 1.1.1".to_string())
261 );
262 }
263
264 #[test]
265 fn test_completeness_score() {
266 let mut metadata = ChunkMetadata::new();
267 assert_eq!(metadata.completeness_score(), 0.0);
268
269 metadata.chapter = Some("Chapter 1".to_string());
270 metadata.keywords = vec!["test".to_string()];
271 metadata.summary = Some("Summary".to_string());
272
273 let score = metadata.completeness_score();
274 assert!(score > 0.0 && score < 1.0);
275 }
276
277 #[test]
278 fn test_position_clamping() {
279 let metadata = ChunkMetadata::new().with_position(1.5);
280 assert_eq!(metadata.position_in_document, Some(1.0));
281
282 let metadata2 = ChunkMetadata::new().with_position(-0.5);
283 assert_eq!(metadata2.position_in_document, Some(0.0));
284 }
285
286 #[test]
287 fn test_custom_metadata() {
288 let metadata = ChunkMetadata::new()
289 .add_custom("author".to_string(), "John Doe".to_string())
290 .add_custom("date".to_string(), "2024-01-01".to_string());
291
292 assert_eq!(metadata.custom.len(), 2);
293 assert_eq!(
294 metadata.custom.get("author"),
295 Some(&"John Doe".to_string())
296 );
297 }
298
299 #[test]
300 fn test_serialization() {
301 let metadata = ChunkMetadata::new()
302 .with_chapter("Chapter 1".to_string())
303 .with_keywords(vec!["test".to_string()])
304 .with_position(0.5);
305
306 let json = serde_json::to_string(&metadata).unwrap();
307 let deserialized: ChunkMetadata = serde_json::from_str(&json).unwrap();
308
309 assert_eq!(metadata, deserialized);
310 }
311}