1use std::collections::HashMap;
29
30use serde::{Deserialize, Serialize};
31
32use super::hasher::hash_content;
33use super::types::{ChunkContext, ChunkKind, ChunkSource, EmbedChunk};
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct HierarchyConfig {
38 pub summarize_classes: bool,
40
41 pub summarize_structs: bool,
43
44 pub summarize_modules: bool,
46
47 pub min_children_for_summary: usize,
49
50 pub include_child_signatures: bool,
52
53 pub max_children_in_summary: usize,
55}
56
57impl Default for HierarchyConfig {
58 fn default() -> Self {
59 Self {
60 summarize_classes: true,
61 summarize_structs: true,
62 summarize_modules: false, min_children_for_summary: 2,
64 include_child_signatures: true,
65 max_children_in_summary: 20,
66 }
67 }
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct ChildReference {
73 pub id: String,
75
76 pub name: String,
78
79 pub kind: ChunkKind,
81
82 #[serde(skip_serializing_if = "Option::is_none")]
84 pub signature: Option<String>,
85
86 #[serde(skip_serializing_if = "Option::is_none")]
88 pub brief: Option<String>,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct HierarchySummary {
94 pub container_id: String,
96
97 pub container_name: String,
99
100 pub container_kind: ChunkKind,
102
103 pub children: Vec<ChildReference>,
105
106 pub total_children: usize,
108}
109
110pub struct HierarchyBuilder {
112 config: HierarchyConfig,
113}
114
115impl Default for HierarchyBuilder {
116 fn default() -> Self {
117 Self::new()
118 }
119}
120
121impl HierarchyBuilder {
122 pub fn new() -> Self {
124 Self { config: HierarchyConfig::default() }
125 }
126
127 pub fn with_config(config: HierarchyConfig) -> Self {
129 Self { config }
130 }
131
132 pub fn build_hierarchy(&self, chunks: &[EmbedChunk]) -> Vec<EmbedChunk> {
137 let mut parent_children: HashMap<String, Vec<&EmbedChunk>> = HashMap::new();
139
140 for chunk in chunks {
142 if let Some(ref parent) = chunk.source.parent {
143 let key = format!("{}:{}", chunk.source.file, parent);
144 parent_children.entry(key).or_default().push(chunk);
145 }
146 }
147
148 let mut summaries = Vec::new();
150
151 for chunk in chunks {
152 if !self.should_summarize(&chunk.kind) {
153 continue;
154 }
155
156 let key = format!("{}:{}", chunk.source.file, chunk.source.symbol);
157 let children = parent_children.get(&key);
158
159 if let Some(children) = children {
160 if children.len() >= self.config.min_children_for_summary {
161 if let Some(summary) = self.create_summary_chunk(chunk, children) {
162 summaries.push(summary);
163 }
164 }
165 }
166 }
167
168 summaries
169 }
170
171 fn should_summarize(&self, kind: &ChunkKind) -> bool {
173 match kind {
174 ChunkKind::Class => self.config.summarize_classes,
175 ChunkKind::Struct => self.config.summarize_structs,
176 ChunkKind::Module => self.config.summarize_modules,
177 ChunkKind::Interface | ChunkKind::Trait => self.config.summarize_classes,
178 _ => false,
179 }
180 }
181
182 fn create_summary_chunk(
184 &self,
185 container: &EmbedChunk,
186 children: &[&EmbedChunk],
187 ) -> Option<EmbedChunk> {
188 let mut child_refs: Vec<ChildReference> = children
190 .iter()
191 .take(self.config.max_children_in_summary)
192 .map(|child| ChildReference {
193 id: child.id.clone(),
194 name: child.source.symbol.clone(),
195 kind: child.kind,
196 signature: if self.config.include_child_signatures {
197 child.context.signature.clone()
198 } else {
199 None
200 },
201 brief: child.context.docstring.as_ref().and_then(|d| {
202 d.lines().next().map(|s| {
203 let s = s.trim();
204 if s.len() > 100 {
205 format!("{}...", &s[..97])
206 } else {
207 s.to_owned()
208 }
209 })
210 }),
211 })
212 .collect();
213
214 child_refs.sort_by(|a, b| a.name.cmp(&b.name));
216
217 let summary_content = self.build_summary_content(container, &child_refs, children.len());
219
220 let hash = hash_content(&summary_content);
222
223 let mut tags = vec!["summary".to_owned(), "hierarchy".to_owned()];
225 tags.extend(container.context.tags.iter().cloned());
226
227 Some(EmbedChunk {
228 id: hash.short_id,
229 full_hash: hash.full_hash,
230 content: summary_content,
231 tokens: 0, kind: container.kind,
233 source: ChunkSource {
234 repo: container.source.repo.clone(),
235 file: container.source.file.clone(),
236 lines: container.source.lines,
237 symbol: format!("{}_summary", container.source.symbol),
238 fqn: container
239 .source
240 .fqn
241 .as_ref()
242 .map(|f| format!("{}_summary", f)),
243 language: container.source.language.clone(),
244 parent: container.source.parent.clone(),
245 visibility: container.source.visibility,
246 is_test: container.source.is_test,
247 module_path: container.source.module_path.clone(),
248 parent_chunk_id: None,
249 },
250 children_ids: Vec::new(),
251 context: ChunkContext {
252 docstring: container.context.docstring.clone(),
253 comments: Vec::new(),
254 signature: container.context.signature.clone(),
255 calls: Vec::new(), called_by: Vec::new(),
257 imports: container.context.imports.clone(),
258 tags,
259 keywords: container.context.keywords.clone(),
260 context_prefix: container.context.context_prefix.clone(),
261 summary: None,
262 qualified_calls: Vec::new(),
263 unresolved_calls: Vec::new(),
264 identifiers: container.context.identifiers.clone(),
265 type_signature: None,
266 parameter_types: Vec::new(),
267 return_type: None,
268 error_types: Vec::new(),
269 lines_of_code: 0,
270 max_nesting_depth: 0,
271 git: container.context.git.clone(),
272 complexity_score: None,
273 dependents_count: None,
274 },
275 repr: "code".to_owned(),
276 code_chunk_id: None,
277 part: None,
278 })
279 }
280
281 fn build_summary_content(
283 &self,
284 container: &EmbedChunk,
285 child_refs: &[ChildReference],
286 total_children: usize,
287 ) -> String {
288 let mut content = String::new();
289
290 if let Some(ref sig) = container.context.signature {
292 content.push_str(sig);
293 content.push('\n');
294 }
295
296 if let Some(ref doc) = container.context.docstring {
298 content.push('\n');
299 content.push_str(doc);
300 content.push('\n');
301 }
302
303 content.push_str("\n/* Members:\n");
305
306 for child in child_refs {
307 content.push_str(" * - ");
308 content.push_str(&child.name);
309
310 if let Some(ref sig) = child.signature {
311 let sig_line = sig.lines().next().unwrap_or(sig).trim();
313 if sig_line != child.name {
314 content.push_str(": ");
315 content.push_str(sig_line);
316 }
317 }
318
319 if let Some(ref brief) = child.brief {
320 content.push_str(" - ");
321 content.push_str(brief);
322 }
323
324 content.push('\n');
325 }
326
327 if total_children > child_refs.len() {
328 content.push_str(&format!(" * ... and {} more\n", total_children - child_refs.len()));
329 }
330
331 content.push_str(" */\n");
332
333 content
334 }
335
336 pub fn enrich_chunks(&self, chunks: &mut [EmbedChunk]) {
340 let mut parent_children: HashMap<String, Vec<String>> = HashMap::new();
342
343 for chunk in chunks.iter() {
344 if let Some(ref parent) = chunk.source.parent {
345 let key = format!("{}:{}", chunk.source.file, parent);
346 parent_children
347 .entry(key)
348 .or_default()
349 .push(chunk.source.symbol.clone());
350 }
351 }
352
353 for chunk in chunks.iter_mut() {
355 let key = format!("{}:{}", chunk.source.file, chunk.source.symbol);
356 if let Some(children) = parent_children.get(&key) {
357 chunk
358 .context
359 .tags
360 .push(format!("has-children:{}", children.len()));
361 }
362
363 if chunk.source.parent.is_some() {
365 chunk.context.tags.push("has-parent".to_owned());
366 }
367 }
368 }
369}
370
371pub fn get_hierarchy_summary(
373 chunks: &[EmbedChunk],
374 container_symbol: &str,
375 file: &str,
376) -> Option<HierarchySummary> {
377 let container = chunks
379 .iter()
380 .find(|c| c.source.symbol == container_symbol && c.source.file == file)?;
381
382 let children: Vec<ChildReference> = chunks
384 .iter()
385 .filter(|c| c.source.parent.as_deref() == Some(container_symbol) && c.source.file == file)
386 .map(|c| ChildReference {
387 id: c.id.clone(),
388 name: c.source.symbol.clone(),
389 kind: c.kind,
390 signature: c.context.signature.clone(),
391 brief: c
392 .context
393 .docstring
394 .as_ref()
395 .and_then(|d| d.lines().next().map(|s| s.trim().to_owned())),
396 })
397 .collect();
398
399 Some(HierarchySummary {
400 container_id: container.id.clone(),
401 container_name: container.source.symbol.clone(),
402 container_kind: container.kind,
403 total_children: children.len(),
404 children,
405 })
406}
407
408#[cfg(test)]
409mod tests {
410 use super::*;
411 use crate::embedding::types::{RepoIdentifier, Visibility};
412
413 fn create_test_chunk(
414 id: &str,
415 symbol: &str,
416 kind: ChunkKind,
417 parent: Option<&str>,
418 signature: Option<&str>,
419 docstring: Option<&str>,
420 ) -> EmbedChunk {
421 EmbedChunk {
422 id: id.to_owned(),
423 full_hash: format!("{}_full", id),
424 content: format!("content of {}", symbol),
425 tokens: 100,
426 kind,
427 source: ChunkSource {
428 repo: RepoIdentifier::default(),
429 file: "test.rs".to_owned(),
430 lines: (1, 10),
431 symbol: symbol.to_owned(),
432 fqn: Some(format!("test::{}", symbol)),
433 language: "Rust".to_owned(),
434 parent: parent.map(String::from),
435 visibility: Visibility::Public,
436 is_test: false,
437 module_path: None,
438 parent_chunk_id: None,
439 },
440 children_ids: Vec::new(),
441 context: ChunkContext {
442 docstring: docstring.map(String::from),
443 comments: Vec::new(),
444 signature: signature.map(String::from),
445 calls: Vec::new(),
446 called_by: Vec::new(),
447 imports: Vec::new(),
448 tags: Vec::new(),
449 keywords: Vec::new(),
450 context_prefix: None,
451 summary: None,
452 qualified_calls: Vec::new(),
453 unresolved_calls: Vec::new(),
454 identifiers: None,
455 type_signature: None,
456 parameter_types: Vec::new(),
457 return_type: None,
458 error_types: Vec::new(),
459 lines_of_code: 10,
460 max_nesting_depth: 2,
461 git: None,
462 complexity_score: None,
463 dependents_count: None,
464 },
465 repr: "code".to_string(),
466 code_chunk_id: None,
467 part: None,
468 }
469 }
470
471 #[test]
472 fn test_build_hierarchy_basic() {
473 let chunks = vec![
474 create_test_chunk(
475 "c1",
476 "UserService",
477 ChunkKind::Class,
478 None,
479 Some("class UserService"),
480 Some("Service for user management"),
481 ),
482 create_test_chunk(
483 "c2",
484 "get_user",
485 ChunkKind::Method,
486 Some("UserService"),
487 Some("fn get_user(&self, id: u64) -> User"),
488 Some("Get a user by ID"),
489 ),
490 create_test_chunk(
491 "c3",
492 "create_user",
493 ChunkKind::Method,
494 Some("UserService"),
495 Some("fn create_user(&self, data: UserData) -> User"),
496 Some("Create a new user"),
497 ),
498 create_test_chunk(
499 "c4",
500 "delete_user",
501 ChunkKind::Method,
502 Some("UserService"),
503 Some("fn delete_user(&self, id: u64)"),
504 Some("Delete a user"),
505 ),
506 ];
507
508 let builder = HierarchyBuilder::new();
509 let summaries = builder.build_hierarchy(&chunks);
510
511 assert_eq!(summaries.len(), 1);
512 assert!(summaries[0].source.symbol.contains("summary"));
513 assert!(summaries[0].content.contains("Members:"));
514 assert!(summaries[0].content.contains("get_user"));
515 assert!(summaries[0].content.contains("create_user"));
516 assert!(summaries[0].content.contains("delete_user"));
517 }
518
519 #[test]
520 fn test_hierarchy_min_children() {
521 let chunks = vec![
522 create_test_chunk(
523 "c1",
524 "SmallClass",
525 ChunkKind::Class,
526 None,
527 Some("class SmallClass"),
528 None,
529 ),
530 create_test_chunk(
531 "c2",
532 "only_method",
533 ChunkKind::Method,
534 Some("SmallClass"),
535 None,
536 None,
537 ),
538 ];
539
540 let builder = HierarchyBuilder::with_config(HierarchyConfig {
541 min_children_for_summary: 2, ..Default::default()
543 });
544
545 let summaries = builder.build_hierarchy(&chunks);
546 assert!(summaries.is_empty()); }
548
549 #[test]
550 fn test_hierarchy_enrich_chunks() {
551 let mut chunks = vec![
552 create_test_chunk("c1", "MyClass", ChunkKind::Class, None, None, None),
553 create_test_chunk("c2", "method1", ChunkKind::Method, Some("MyClass"), None, None),
554 create_test_chunk("c3", "method2", ChunkKind::Method, Some("MyClass"), None, None),
555 ];
556
557 let builder = HierarchyBuilder::new();
558 builder.enrich_chunks(&mut chunks);
559
560 assert!(chunks[0]
562 .context
563 .tags
564 .iter()
565 .any(|t| t.starts_with("has-children:")));
566
567 assert!(chunks[1].context.tags.contains(&"has-parent".to_owned()));
569 assert!(chunks[2].context.tags.contains(&"has-parent".to_owned()));
570 }
571
572 #[test]
573 fn test_get_hierarchy_summary() {
574 let chunks = vec![
575 create_test_chunk(
576 "c1",
577 "MyStruct",
578 ChunkKind::Struct,
579 None,
580 Some("struct MyStruct"),
581 None,
582 ),
583 create_test_chunk("c2", "field1", ChunkKind::Variable, Some("MyStruct"), None, None),
584 create_test_chunk(
585 "c3",
586 "new",
587 ChunkKind::Function,
588 Some("MyStruct"),
589 Some("fn new() -> Self"),
590 Some("Create a new instance"),
591 ),
592 ];
593
594 let summary = get_hierarchy_summary(&chunks, "MyStruct", "test.rs");
595 assert!(summary.is_some());
596
597 let summary = summary.unwrap();
598 assert_eq!(summary.container_name, "MyStruct");
599 assert_eq!(summary.total_children, 2);
600 assert!(summary.children.iter().any(|c| c.name == "field1"));
601 assert!(summary.children.iter().any(|c| c.name == "new"));
602 }
603
604 #[test]
605 fn test_summary_content_format() {
606 let chunks = vec![
607 create_test_chunk(
608 "c1",
609 "Calculator",
610 ChunkKind::Class,
611 None,
612 Some("pub struct Calculator"),
613 Some("A simple calculator"),
614 ),
615 create_test_chunk(
616 "c2",
617 "add",
618 ChunkKind::Method,
619 Some("Calculator"),
620 Some("fn add(&self, a: i32, b: i32) -> i32"),
621 Some("Add two numbers"),
622 ),
623 create_test_chunk(
624 "c3",
625 "subtract",
626 ChunkKind::Method,
627 Some("Calculator"),
628 Some("fn subtract(&self, a: i32, b: i32) -> i32"),
629 Some("Subtract two numbers"),
630 ),
631 ];
632
633 let builder = HierarchyBuilder::new();
634 let summaries = builder.build_hierarchy(&chunks);
635
636 assert_eq!(summaries.len(), 1);
637 let summary = &summaries[0];
638
639 assert!(summary.content.contains("pub struct Calculator"));
641 assert!(summary.content.contains("A simple calculator"));
642 assert!(summary.content.contains("/* Members:"));
643 assert!(summary.content.contains(" * - add"));
644 assert!(summary.content.contains(" * - subtract"));
645 assert!(summary.content.contains(" */"));
646 }
647
648 #[test]
649 fn test_config_options() {
650 let config = HierarchyConfig {
651 summarize_classes: true,
652 summarize_structs: false,
653 summarize_modules: false,
654 min_children_for_summary: 1,
655 include_child_signatures: false,
656 max_children_in_summary: 5,
657 };
658
659 let builder = HierarchyBuilder::with_config(config);
660
661 let class_chunks = vec![
662 create_test_chunk("c1", "MyClass", ChunkKind::Class, None, None, None),
663 create_test_chunk("c2", "m1", ChunkKind::Method, Some("MyClass"), None, None),
664 ];
665
666 let struct_chunks = vec![
667 create_test_chunk("s1", "MyStruct", ChunkKind::Struct, None, None, None),
668 create_test_chunk("s2", "f1", ChunkKind::Variable, Some("MyStruct"), None, None),
669 ];
670
671 assert_eq!(builder.build_hierarchy(&class_chunks).len(), 1);
673
674 assert_eq!(builder.build_hierarchy(&struct_chunks).len(), 0);
676 }
677}