1use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet, VecDeque};
9use ucm_core::{Block, BlockId, Content, Document, EdgeType};
10
11use crate::error::Result;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15pub enum NavigateDirection {
16 Down,
18 Up,
20 Both,
22 Siblings,
24 BreadthFirst,
26 DepthFirst,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
32pub enum TraversalOutput {
33 StructureOnly,
35 #[default]
37 StructureAndBlocks,
38 StructureWithPreviews,
40}
41
42#[derive(Debug, Clone, Default, Serialize, Deserialize)]
44pub struct TraversalFilter {
45 pub include_roles: Vec<String>,
47 pub exclude_roles: Vec<String>,
49 pub include_tags: Vec<String>,
51 pub exclude_tags: Vec<String>,
53 pub content_pattern: Option<String>,
55 pub edge_types: Vec<EdgeType>,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct TraversalNode {
62 pub id: BlockId,
63 pub depth: usize,
64 pub parent_id: Option<BlockId>,
65 pub content_preview: Option<String>,
66 pub semantic_role: Option<String>,
67 pub child_count: usize,
68 pub edge_count: usize,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct TraversalEdge {
74 pub source: BlockId,
75 pub target: BlockId,
76 pub edge_type: EdgeType,
77}
78
79#[derive(Debug, Clone, Default, Serialize, Deserialize)]
81pub struct TraversalSummary {
82 pub total_nodes: usize,
83 pub total_edges: usize,
84 pub max_depth: usize,
85 pub nodes_by_role: HashMap<String, usize>,
86 pub truncated: bool,
87 pub truncation_reason: Option<String>,
88}
89
90#[derive(Debug, Clone, Default, Serialize, Deserialize)]
92pub struct TraversalMetadata {
93 pub start_id: Option<BlockId>,
94 pub direction: Option<NavigateDirection>,
95 pub max_depth: Option<usize>,
96 pub execution_time_ms: Option<u64>,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct TraversalResult {
102 pub nodes: Vec<TraversalNode>,
103 pub edges: Vec<TraversalEdge>,
104 pub paths: Vec<Vec<BlockId>>,
105 pub summary: TraversalSummary,
106 pub metadata: TraversalMetadata,
107}
108
109impl TraversalResult {
110 pub fn empty() -> Self {
111 Self {
112 nodes: Vec::new(),
113 edges: Vec::new(),
114 paths: Vec::new(),
115 summary: TraversalSummary::default(),
116 metadata: TraversalMetadata::default(),
117 }
118 }
119}
120
121#[derive(Debug, Clone)]
123pub struct TraversalConfig {
124 pub max_depth: usize,
125 pub max_nodes: usize,
126 pub default_preview_length: usize,
127 pub include_orphans: bool,
128 pub cache_enabled: bool,
129}
130
131impl Default for TraversalConfig {
132 fn default() -> Self {
133 Self {
134 max_depth: 100,
135 max_nodes: 10000,
136 default_preview_length: 100,
137 include_orphans: false,
138 cache_enabled: true,
139 }
140 }
141}
142
143pub struct TraversalEngine {
145 config: TraversalConfig,
146}
147
148impl TraversalEngine {
149 pub fn new() -> Self {
151 Self {
152 config: TraversalConfig::default(),
153 }
154 }
155
156 pub fn with_config(config: TraversalConfig) -> Self {
158 Self { config }
159 }
160
161 pub fn navigate(
163 &self,
164 doc: &Document,
165 start_id: Option<BlockId>,
166 direction: NavigateDirection,
167 depth: Option<usize>,
168 filter: Option<TraversalFilter>,
169 output: TraversalOutput,
170 ) -> Result<TraversalResult> {
171 let start = start_id.unwrap_or(doc.root);
172 let max_depth = depth
173 .unwrap_or(self.config.max_depth)
174 .min(self.config.max_depth);
175 let filter = filter.unwrap_or_default();
176
177 #[cfg(not(target_arch = "wasm32"))]
178 let start_time = std::time::Instant::now();
179
180 let result = match direction {
181 NavigateDirection::Down => self.traverse_down(doc, start, max_depth, &filter, output),
182 NavigateDirection::Up => self.traverse_up(doc, start, max_depth, &filter, output),
183 NavigateDirection::Both => self.traverse_both(doc, start, max_depth, &filter, output),
184 NavigateDirection::Siblings => self.traverse_siblings(doc, start, &filter, output),
185 NavigateDirection::BreadthFirst => {
186 self.traverse_bfs(doc, start, max_depth, &filter, output)
187 }
188 NavigateDirection::DepthFirst => {
189 self.traverse_dfs(doc, start, max_depth, &filter, output)
190 }
191 }?;
192
193 let mut result = result;
194 result.metadata.start_id = Some(start);
195 result.metadata.direction = Some(direction);
196 result.metadata.max_depth = Some(max_depth);
197
198 #[cfg(not(target_arch = "wasm32"))]
199 {
200 result.metadata.execution_time_ms = Some(start_time.elapsed().as_millis() as u64);
201 }
202
203 Ok(result)
204 }
205
206 pub fn expand(
208 &self,
209 doc: &Document,
210 node_id: &BlockId,
211 output: TraversalOutput,
212 ) -> Result<TraversalResult> {
213 self.navigate(
214 doc,
215 Some(*node_id),
216 NavigateDirection::Down,
217 Some(1),
218 None,
219 output,
220 )
221 }
222
223 pub fn path_to_root(&self, doc: &Document, node_id: &BlockId) -> Result<Vec<BlockId>> {
225 let mut path = vec![*node_id];
226 let mut current = *node_id;
227
228 while let Some(parent) = doc.parent(¤t) {
229 path.push(*parent);
230 if *parent == doc.root {
231 break;
232 }
233 current = *parent;
234 }
235
236 path.reverse();
237 Ok(path)
238 }
239
240 pub fn find_paths(
242 &self,
243 doc: &Document,
244 from: &BlockId,
245 to: &BlockId,
246 max_paths: usize,
247 ) -> Result<Vec<Vec<BlockId>>> {
248 let mut paths = Vec::new();
249 let mut visited = HashSet::new();
250 let mut current_path = vec![*from];
251
252 self.find_paths_recursive(
253 doc,
254 from,
255 to,
256 &mut visited,
257 &mut current_path,
258 &mut paths,
259 max_paths,
260 )?;
261
262 Ok(paths)
263 }
264
265 #[allow(clippy::too_many_arguments)]
266 fn find_paths_recursive(
267 &self,
268 doc: &Document,
269 current: &BlockId,
270 target: &BlockId,
271 visited: &mut HashSet<BlockId>,
272 current_path: &mut Vec<BlockId>,
273 paths: &mut Vec<Vec<BlockId>>,
274 max_paths: usize,
275 ) -> Result<()> {
276 if paths.len() >= max_paths {
277 return Ok(());
278 }
279
280 if current == target {
281 paths.push(current_path.clone());
282 return Ok(());
283 }
284
285 visited.insert(*current);
286
287 for child in doc.children(current) {
289 if !visited.contains(child) {
290 current_path.push(*child);
291 self.find_paths_recursive(
292 doc,
293 child,
294 target,
295 visited,
296 current_path,
297 paths,
298 max_paths,
299 )?;
300 current_path.pop();
301 }
302 }
303
304 if let Some(block) = doc.get_block(current) {
306 for edge in &block.edges {
307 if !visited.contains(&edge.target) {
308 current_path.push(edge.target);
309 self.find_paths_recursive(
310 doc,
311 &edge.target,
312 target,
313 visited,
314 current_path,
315 paths,
316 max_paths,
317 )?;
318 current_path.pop();
319 }
320 }
321 }
322
323 visited.remove(current);
324 Ok(())
325 }
326
327 fn traverse_down(
329 &self,
330 doc: &Document,
331 start: BlockId,
332 max_depth: usize,
333 filter: &TraversalFilter,
334 output: TraversalOutput,
335 ) -> Result<TraversalResult> {
336 self.traverse_bfs(doc, start, max_depth, filter, output)
337 }
338
339 fn traverse_up(
341 &self,
342 doc: &Document,
343 start: BlockId,
344 max_depth: usize,
345 filter: &TraversalFilter,
346 output: TraversalOutput,
347 ) -> Result<TraversalResult> {
348 let mut nodes = Vec::new();
349 let mut current = start;
350 let mut depth = 0;
351
352 while depth <= max_depth {
353 if let Some(block) = doc.get_block(¤t) {
354 if self.matches_filter(block, filter) {
355 nodes.push(self.create_traversal_node(doc, ¤t, depth, None, output));
356 }
357 }
358
359 if let Some(parent) = doc.parent(¤t) {
360 current = *parent;
361 depth += 1;
362 } else {
363 break;
364 }
365 }
366
367 let summary = TraversalSummary {
368 total_nodes: nodes.len(),
369 max_depth: depth,
370 ..Default::default()
371 };
372
373 Ok(TraversalResult {
374 nodes,
375 edges: Vec::new(),
376 paths: Vec::new(),
377 summary,
378 metadata: TraversalMetadata::default(),
379 })
380 }
381
382 fn traverse_both(
384 &self,
385 doc: &Document,
386 start: BlockId,
387 max_depth: usize,
388 filter: &TraversalFilter,
389 output: TraversalOutput,
390 ) -> Result<TraversalResult> {
391 let up_result = self.traverse_up(doc, start, max_depth, filter, output)?;
392 let down_result = self.traverse_down(doc, start, max_depth, filter, output)?;
393
394 let mut seen = HashSet::new();
396 let mut nodes = Vec::new();
397
398 for node in up_result
399 .nodes
400 .into_iter()
401 .chain(down_result.nodes.into_iter())
402 {
403 if seen.insert(node.id) {
404 nodes.push(node);
405 }
406 }
407
408 let max_depth = nodes.iter().map(|n| n.depth).max().unwrap_or(0);
409 let summary = TraversalSummary {
410 total_nodes: nodes.len(),
411 max_depth,
412 ..Default::default()
413 };
414
415 Ok(TraversalResult {
416 nodes,
417 edges: Vec::new(),
418 paths: Vec::new(),
419 summary,
420 metadata: TraversalMetadata::default(),
421 })
422 }
423
424 fn traverse_siblings(
426 &self,
427 doc: &Document,
428 start: BlockId,
429 filter: &TraversalFilter,
430 output: TraversalOutput,
431 ) -> Result<TraversalResult> {
432 let mut nodes = Vec::new();
433
434 if let Some(parent) = doc.parent(&start) {
435 for sibling in doc.children(parent) {
436 if let Some(block) = doc.get_block(sibling) {
437 if self.matches_filter(block, filter) {
438 nodes.push(self.create_traversal_node(
439 doc,
440 sibling,
441 0,
442 Some(*parent),
443 output,
444 ));
445 }
446 }
447 }
448 }
449
450 let summary = TraversalSummary {
451 total_nodes: nodes.len(),
452 max_depth: 0,
453 ..Default::default()
454 };
455
456 Ok(TraversalResult {
457 nodes,
458 edges: Vec::new(),
459 paths: Vec::new(),
460 summary,
461 metadata: TraversalMetadata::default(),
462 })
463 }
464
465 fn traverse_bfs(
467 &self,
468 doc: &Document,
469 start: BlockId,
470 max_depth: usize,
471 filter: &TraversalFilter,
472 output: TraversalOutput,
473 ) -> Result<TraversalResult> {
474 let mut nodes = Vec::new();
475 let mut edges = Vec::new();
476 let mut visited = HashSet::new();
477 let mut queue = VecDeque::new();
478 let mut nodes_by_role: HashMap<String, usize> = HashMap::new();
479
480 queue.push_back((start, None::<BlockId>, 0usize));
481
482 while let Some((node_id, parent_id, depth)) = queue.pop_front() {
483 if depth > max_depth || nodes.len() >= self.config.max_nodes {
484 break;
485 }
486
487 if visited.contains(&node_id) {
488 continue;
489 }
490 visited.insert(node_id);
491
492 if let Some(block) = doc.get_block(&node_id) {
493 if self.matches_filter(block, filter) {
494 let node = self.create_traversal_node(doc, &node_id, depth, parent_id, output);
495
496 if let Some(role) = &node.semantic_role {
497 *nodes_by_role.entry(role.clone()).or_insert(0) += 1;
498 }
499
500 nodes.push(node);
501
502 for edge in &block.edges {
504 if filter.edge_types.is_empty()
505 || filter.edge_types.contains(&edge.edge_type)
506 {
507 edges.push(TraversalEdge {
508 source: node_id,
509 target: edge.target,
510 edge_type: edge.edge_type.clone(),
511 });
512 }
513 }
514 }
515
516 for child in doc.children(&node_id) {
518 if !visited.contains(child) {
519 queue.push_back((*child, Some(node_id), depth + 1));
520 }
521 }
522 }
523 }
524
525 let max_depth_found = nodes.iter().map(|n| n.depth).max().unwrap_or(0);
526 let truncated = nodes.len() >= self.config.max_nodes;
527
528 let summary = TraversalSummary {
529 total_nodes: nodes.len(),
530 total_edges: edges.len(),
531 max_depth: max_depth_found,
532 nodes_by_role,
533 truncated,
534 truncation_reason: if truncated {
535 Some(format!(
536 "Max nodes limit ({}) reached",
537 self.config.max_nodes
538 ))
539 } else {
540 None
541 },
542 };
543
544 Ok(TraversalResult {
545 nodes,
546 edges,
547 paths: Vec::new(),
548 summary,
549 metadata: TraversalMetadata::default(),
550 })
551 }
552
553 fn traverse_dfs(
555 &self,
556 doc: &Document,
557 start: BlockId,
558 max_depth: usize,
559 filter: &TraversalFilter,
560 output: TraversalOutput,
561 ) -> Result<TraversalResult> {
562 let mut nodes = Vec::new();
563 let mut edges = Vec::new();
564 let mut visited = HashSet::new();
565 let mut nodes_by_role: HashMap<String, usize> = HashMap::new();
566
567 self.dfs_recursive(
568 doc,
569 start,
570 None,
571 0,
572 max_depth,
573 filter,
574 output,
575 &mut visited,
576 &mut nodes,
577 &mut edges,
578 &mut nodes_by_role,
579 )?;
580
581 let max_depth_found = nodes.iter().map(|n| n.depth).max().unwrap_or(0);
582 let truncated = nodes.len() >= self.config.max_nodes;
583
584 let summary = TraversalSummary {
585 total_nodes: nodes.len(),
586 total_edges: edges.len(),
587 max_depth: max_depth_found,
588 nodes_by_role,
589 truncated,
590 truncation_reason: if truncated {
591 Some(format!(
592 "Max nodes limit ({}) reached",
593 self.config.max_nodes
594 ))
595 } else {
596 None
597 },
598 };
599
600 Ok(TraversalResult {
601 nodes,
602 edges,
603 paths: Vec::new(),
604 summary,
605 metadata: TraversalMetadata::default(),
606 })
607 }
608
609 #[allow(clippy::too_many_arguments)]
610 fn dfs_recursive(
611 &self,
612 doc: &Document,
613 node_id: BlockId,
614 parent_id: Option<BlockId>,
615 depth: usize,
616 max_depth: usize,
617 filter: &TraversalFilter,
618 output: TraversalOutput,
619 visited: &mut HashSet<BlockId>,
620 nodes: &mut Vec<TraversalNode>,
621 edges: &mut Vec<TraversalEdge>,
622 nodes_by_role: &mut HashMap<String, usize>,
623 ) -> Result<()> {
624 if depth > max_depth || nodes.len() >= self.config.max_nodes {
625 return Ok(());
626 }
627
628 if visited.contains(&node_id) {
629 return Ok(());
630 }
631 visited.insert(node_id);
632
633 if let Some(block) = doc.get_block(&node_id) {
634 if self.matches_filter(block, filter) {
635 let node = self.create_traversal_node(doc, &node_id, depth, parent_id, output);
636
637 if let Some(role) = &node.semantic_role {
638 *nodes_by_role.entry(role.clone()).or_insert(0) += 1;
639 }
640
641 nodes.push(node);
642
643 for edge in &block.edges {
645 if filter.edge_types.is_empty() || filter.edge_types.contains(&edge.edge_type) {
646 edges.push(TraversalEdge {
647 source: node_id,
648 target: edge.target,
649 edge_type: edge.edge_type.clone(),
650 });
651 }
652 }
653 }
654
655 for child in doc.children(&node_id) {
657 self.dfs_recursive(
658 doc,
659 *child,
660 Some(node_id),
661 depth + 1,
662 max_depth,
663 filter,
664 output,
665 visited,
666 nodes,
667 edges,
668 nodes_by_role,
669 )?;
670 }
671 }
672
673 Ok(())
674 }
675
676 fn matches_filter(&self, block: &Block, filter: &TraversalFilter) -> bool {
678 if !filter.include_roles.is_empty() {
680 let role = block
681 .metadata
682 .semantic_role
683 .as_ref()
684 .map(|r| r.category.as_str().to_string())
685 .unwrap_or_default();
686 if !filter.include_roles.contains(&role) {
687 return false;
688 }
689 }
690
691 if !filter.exclude_roles.is_empty() {
693 let role = block
694 .metadata
695 .semantic_role
696 .as_ref()
697 .map(|r| r.category.as_str().to_string())
698 .unwrap_or_default();
699 if filter.exclude_roles.contains(&role) {
700 return false;
701 }
702 }
703
704 if !filter.include_tags.is_empty() {
706 let has_tag = filter
707 .include_tags
708 .iter()
709 .any(|t| block.metadata.tags.contains(t));
710 if !has_tag {
711 return false;
712 }
713 }
714
715 if !filter.exclude_tags.is_empty() {
717 let has_excluded = filter
718 .exclude_tags
719 .iter()
720 .any(|t| block.metadata.tags.contains(t));
721 if has_excluded {
722 return false;
723 }
724 }
725
726 if let Some(ref pattern) = filter.content_pattern {
728 let content_text = self.extract_content_text(&block.content);
729 if !content_text
730 .to_lowercase()
731 .contains(&pattern.to_lowercase())
732 {
733 return false;
734 }
735 }
736
737 true
738 }
739
740 fn create_traversal_node(
742 &self,
743 doc: &Document,
744 block_id: &BlockId,
745 depth: usize,
746 parent_id: Option<BlockId>,
747 output: TraversalOutput,
748 ) -> TraversalNode {
749 let block = doc.get_block(block_id);
750 let children = doc.children(block_id);
751
752 let content_preview = match output {
753 TraversalOutput::StructureOnly => None,
754 TraversalOutput::StructureWithPreviews | TraversalOutput::StructureAndBlocks => block
755 .map(|b| {
756 let text = self.extract_content_text(&b.content);
757 if text.len() > self.config.default_preview_length {
758 format!("{}...", &text[..self.config.default_preview_length])
759 } else {
760 text
761 }
762 }),
763 };
764
765 let semantic_role = block
766 .and_then(|b| b.metadata.semantic_role.as_ref())
767 .map(|r| r.category.as_str().to_string());
768
769 let edge_count = block.map(|b| b.edges.len()).unwrap_or(0);
770
771 TraversalNode {
772 id: *block_id,
773 depth,
774 parent_id,
775 content_preview,
776 semantic_role,
777 child_count: children.len(),
778 edge_count,
779 }
780 }
781
782 fn extract_content_text(&self, content: &Content) -> String {
784 match content {
785 Content::Text(t) => t.text.clone(),
786 Content::Code(c) => c.source.clone(),
787 Content::Table(t) => format!("Table: {} rows", t.rows.len()),
788 Content::Math(m) => m.expression.clone(),
789 Content::Media(m) => m.alt_text.clone().unwrap_or_else(|| "Media".to_string()),
790 Content::Json { .. } => "JSON data".to_string(),
791 Content::Binary { .. } => "Binary data".to_string(),
792 Content::Composite { children, .. } => {
793 format!("Composite: {} children", children.len())
794 }
795 }
796 }
797}
798
799impl Default for TraversalEngine {
800 fn default() -> Self {
801 Self::new()
802 }
803}
804
805#[cfg(test)]
806mod tests {
807 use super::*;
808 use ucm_core::DocumentId;
809
810 fn create_test_document() -> Document {
811 let mut doc = Document::new(DocumentId::new("test"));
812 let root = doc.root;
813
814 let h1 = Block::new(Content::text("Chapter 1"), Some("heading1"));
816 let h1_id = doc.add_block(h1, &root).unwrap();
817
818 let p1 = Block::new(Content::text("Introduction paragraph"), Some("paragraph"));
819 doc.add_block(p1, &h1_id).unwrap();
820
821 let h2 = Block::new(Content::text("Section 1.1"), Some("heading2"));
822 let h2_id = doc.add_block(h2, &h1_id).unwrap();
823
824 let p2 = Block::new(Content::text("Section content"), Some("paragraph"));
825 doc.add_block(p2, &h2_id).unwrap();
826
827 doc
828 }
829
830 #[test]
831 fn test_bfs_traversal() {
832 let doc = create_test_document();
833 let engine = TraversalEngine::new();
834
835 let result = engine
836 .navigate(
837 &doc,
838 None,
839 NavigateDirection::BreadthFirst,
840 Some(10),
841 None,
842 TraversalOutput::StructureAndBlocks,
843 )
844 .unwrap();
845
846 assert!(!result.nodes.is_empty());
847 assert!(result.summary.total_nodes >= 4);
848 }
849
850 #[test]
851 fn test_dfs_traversal() {
852 let doc = create_test_document();
853 let engine = TraversalEngine::new();
854
855 let result = engine
856 .navigate(
857 &doc,
858 None,
859 NavigateDirection::DepthFirst,
860 Some(10),
861 None,
862 TraversalOutput::StructureAndBlocks,
863 )
864 .unwrap();
865
866 assert!(!result.nodes.is_empty());
867 }
868
869 #[test]
870 fn test_path_to_root() {
871 let doc = create_test_document();
872 let engine = TraversalEngine::new();
873
874 let root_children = doc.children(&doc.root);
876 if let Some(h1_id) = root_children.first() {
877 let h1_children = doc.children(h1_id);
878 if let Some(h2_id) = h1_children.iter().find(|id| {
879 doc.get_block(id)
880 .and_then(|b| b.metadata.semantic_role.as_ref())
881 .map(|r| r.category.as_str() == "heading2")
882 .unwrap_or(false)
883 }) {
884 let path = engine.path_to_root(&doc, h2_id).unwrap();
885 assert!(path.len() >= 2);
886 assert_eq!(path[0], doc.root);
887 }
888 }
889 }
890
891 #[test]
892 fn test_filter_by_role() {
893 let doc = create_test_document();
894 let engine = TraversalEngine::new();
895
896 let filter = TraversalFilter {
897 include_roles: vec!["heading1".to_string(), "heading2".to_string()],
898 ..Default::default()
899 };
900
901 let result = engine
902 .navigate(
903 &doc,
904 None,
905 NavigateDirection::BreadthFirst,
906 Some(10),
907 Some(filter),
908 TraversalOutput::StructureAndBlocks,
909 )
910 .unwrap();
911
912 for node in &result.nodes {
914 if let Some(role) = &node.semantic_role {
915 assert!(role.starts_with("heading"));
916 }
917 }
918 }
919
920 #[test]
921 fn test_expand_node() {
922 let doc = create_test_document();
923 let engine = TraversalEngine::new();
924
925 let result = engine
926 .expand(&doc, &doc.root, TraversalOutput::StructureAndBlocks)
927 .unwrap();
928
929 assert!(!result.nodes.is_empty());
931 }
932
933 #[test]
934 fn test_max_depth_limit() {
935 let doc = create_test_document();
936 let config = TraversalConfig {
937 max_depth: 1,
938 ..Default::default()
939 };
940 let engine = TraversalEngine::with_config(config);
941
942 let result = engine
943 .navigate(
944 &doc,
945 None,
946 NavigateDirection::BreadthFirst,
947 Some(1),
948 None,
949 TraversalOutput::StructureAndBlocks,
950 )
951 .unwrap();
952
953 for node in &result.nodes {
955 assert!(node.depth <= 1);
956 }
957 }
958}