1use crate::block::{Block, BlockState};
4use crate::edge::EdgeIndex;
5use crate::error::{Error, ErrorCode, Result, ValidationIssue};
6use crate::id::BlockId;
7use crate::metadata::TokenModel;
8use crate::version::DocumentVersion;
9use chrono::{DateTime, Utc};
10use serde::{Deserialize, Serialize};
11use std::collections::{BTreeMap, HashMap, HashSet};
12use std::str::FromStr;
13
14#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
16pub struct DocumentId(pub String);
17
18impl DocumentId {
19 pub fn new(id: impl Into<String>) -> Self {
20 Self(id.into())
21 }
22
23 pub fn generate() -> Self {
24 let ts = Utc::now().timestamp_nanos_opt().unwrap_or(0);
26 Self(format!("doc_{:x}", ts))
27 }
28}
29
30impl std::fmt::Display for DocumentId {
31 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 write!(f, "{}", self.0)
33 }
34}
35
36#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
38pub struct DocumentMetadata {
39 #[serde(skip_serializing_if = "Option::is_none")]
41 pub title: Option<String>,
42
43 #[serde(skip_serializing_if = "Option::is_none")]
45 pub description: Option<String>,
46
47 #[serde(default, skip_serializing_if = "Vec::is_empty")]
49 pub authors: Vec<String>,
50
51 pub created_at: DateTime<Utc>,
53
54 pub modified_at: DateTime<Utc>,
56
57 #[serde(skip_serializing_if = "Option::is_none")]
59 pub language: Option<String>,
60
61 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
63 pub custom: HashMap<String, serde_json::Value>,
64}
65
66impl DocumentMetadata {
67 pub fn new() -> Self {
68 let now = Utc::now();
69 Self {
70 created_at: now,
71 modified_at: now,
72 ..Default::default()
73 }
74 }
75
76 pub fn with_title(mut self, title: impl Into<String>) -> Self {
77 self.title = Some(title.into());
78 self
79 }
80
81 pub fn touch(&mut self) {
82 self.modified_at = Utc::now();
83 }
84}
85
86#[derive(Debug, Clone, Default)]
88pub struct DocumentIndices {
89 pub by_tag: HashMap<String, HashSet<BlockId>>,
91 pub by_role: HashMap<String, HashSet<BlockId>>,
93 pub by_content_type: HashMap<String, HashSet<BlockId>>,
95 pub by_label: HashMap<String, BlockId>,
97}
98
99impl DocumentIndices {
100 pub fn new() -> Self {
101 Self::default()
102 }
103
104 pub fn index_block(&mut self, block: &Block) {
106 let id = &block.id;
107
108 for tag in &block.metadata.tags {
110 self.by_tag.entry(tag.clone()).or_default().insert(*id);
111 }
112
113 if let Some(role) = &block.metadata.semantic_role {
115 self.by_role
116 .entry(role.category.as_str().to_string())
117 .or_default()
118 .insert(*id);
119 }
120
121 self.by_content_type
123 .entry(block.content_type().to_string())
124 .or_default()
125 .insert(*id);
126
127 if let Some(label) = &block.metadata.label {
129 self.by_label.insert(label.clone(), *id);
130 }
131 }
132
133 pub fn remove_block(&mut self, block: &Block) {
135 let id = &block.id;
136
137 for tag in &block.metadata.tags {
138 if let Some(set) = self.by_tag.get_mut(tag) {
139 set.remove(id);
140 }
141 }
142
143 if let Some(role) = &block.metadata.semantic_role {
144 if let Some(set) = self.by_role.get_mut(role.category.as_str()) {
145 set.remove(id);
146 }
147 }
148
149 if let Some(set) = self.by_content_type.get_mut(block.content_type()) {
150 set.remove(id);
151 }
152
153 if let Some(label) = &block.metadata.label {
154 self.by_label.remove(label);
155 }
156 }
157
158 pub fn rebuild(&mut self, blocks: &HashMap<BlockId, Block>) {
160 self.by_tag.clear();
161 self.by_role.clear();
162 self.by_content_type.clear();
163 self.by_label.clear();
164
165 for block in blocks.values() {
166 self.index_block(block);
167 }
168 }
169
170 pub fn find_by_tag(&self, tag: &str) -> HashSet<BlockId> {
172 self.by_tag.get(tag).cloned().unwrap_or_default()
173 }
174
175 pub fn find_by_type(&self, content_type: &str) -> HashSet<BlockId> {
177 self.by_content_type
178 .get(content_type)
179 .cloned()
180 .unwrap_or_default()
181 }
182
183 pub fn find_by_label(&self, label: &str) -> Option<BlockId> {
185 self.by_label.get(label).cloned()
186 }
187}
188
189#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct PortableDocument {
195 pub id: String,
196 pub root: String,
197 pub structure: BTreeMap<String, Vec<String>>,
198 pub blocks: BTreeMap<String, Block>,
199 pub metadata: DocumentMetadata,
200 pub version: u64,
201}
202
203impl PortableDocument {
204 pub fn from_document(doc: &Document) -> Self {
205 let mut structure = BTreeMap::new();
206 for (parent, children) in &doc.structure {
207 let mut ordered = children.clone();
208 ordered.sort_by_key(|id| id.to_string());
209 structure.insert(
210 parent.to_string(),
211 ordered.into_iter().map(|id| id.to_string()).collect(),
212 );
213 }
214
215 let mut blocks = BTreeMap::new();
216 for (id, block) in &doc.blocks {
217 blocks.insert(id.to_string(), block.clone());
218 }
219
220 Self {
221 id: doc.id.0.clone(),
222 root: doc.root.to_string(),
223 structure,
224 blocks,
225 metadata: doc.metadata.clone(),
226 version: doc.version.counter,
227 }
228 }
229
230 pub fn to_document(&self) -> Result<Document> {
231 let root =
232 BlockId::from_str(&self.root).map_err(|_| Error::InvalidBlockId(self.root.clone()))?;
233
234 let mut structure = HashMap::new();
235 for (parent, children) in &self.structure {
236 let parent_id =
237 BlockId::from_str(parent).map_err(|_| Error::InvalidBlockId(parent.clone()))?;
238 let parsed_children = children
239 .iter()
240 .map(|child| {
241 BlockId::from_str(child).map_err(|_| Error::InvalidBlockId(child.clone()))
242 })
243 .collect::<Result<Vec<_>>>()?;
244 structure.insert(parent_id, parsed_children);
245 }
246
247 let mut blocks = HashMap::new();
248 for (id, block) in &self.blocks {
249 let block_id = BlockId::from_str(id).map_err(|_| Error::InvalidBlockId(id.clone()))?;
250 blocks.insert(block_id, block.clone());
251 }
252
253 let mut doc = Document {
254 id: DocumentId::new(self.id.clone()),
255 root,
256 structure,
257 blocks,
258 metadata: self.metadata.clone(),
259 indices: DocumentIndices::default(),
260 edge_index: EdgeIndex::default(),
261 version: DocumentVersion {
262 counter: self.version,
263 timestamp: Utc::now(),
264 state_hash: [0u8; 8],
265 },
266 };
267 doc.rebuild_indices();
268 Ok(doc)
269 }
270}
271
272#[derive(Debug, Clone)]
274pub struct Document {
275 pub id: DocumentId,
277
278 pub root: BlockId,
280
281 pub structure: HashMap<BlockId, Vec<BlockId>>,
283
284 pub blocks: HashMap<BlockId, Block>,
286
287 pub metadata: DocumentMetadata,
289
290 pub indices: DocumentIndices,
292
293 pub edge_index: EdgeIndex,
295
296 pub version: DocumentVersion,
298}
299
300impl Document {
301 pub fn new(id: DocumentId) -> Self {
303 let root = Block::root();
304 let root_id = root.id;
305
306 let mut blocks = HashMap::new();
307 blocks.insert(root_id, root);
308
309 Self {
310 id,
311 root: root_id,
312 structure: HashMap::new(),
313 blocks,
314 metadata: DocumentMetadata::new(),
315 indices: DocumentIndices::new(),
316 edge_index: EdgeIndex::new(),
317 version: DocumentVersion::initial(),
318 }
319 }
320
321 pub fn create() -> Self {
323 Self::new(DocumentId::generate())
324 }
325
326 pub fn to_portable(&self) -> PortableDocument {
327 PortableDocument::from_document(self)
328 }
329
330 pub fn from_portable(portable: &PortableDocument) -> Result<Self> {
331 portable.to_document()
332 }
333
334 pub fn with_metadata(mut self, metadata: DocumentMetadata) -> Self {
336 self.metadata = metadata;
337 self
338 }
339
340 pub fn get_block(&self, id: &BlockId) -> Option<&Block> {
342 self.blocks.get(id)
343 }
344
345 pub fn get_block_mut(&mut self, id: &BlockId) -> Option<&mut Block> {
347 self.blocks.get_mut(id)
348 }
349
350 pub fn children(&self, parent: &BlockId) -> &[BlockId] {
352 self.structure
353 .get(parent)
354 .map(|v| v.as_slice())
355 .unwrap_or(&[])
356 }
357
358 pub fn parent(&self, child: &BlockId) -> Option<&BlockId> {
360 for (parent, children) in &self.structure {
361 if children.contains(child) {
362 return Some(parent);
363 }
364 }
365 None
366 }
367
368 pub fn parent_of(&self, child: &BlockId) -> Option<&Block> {
370 self.parent(child).and_then(|id| self.blocks.get(id))
371 }
372
373 pub fn add_block(&mut self, block: Block, parent: &BlockId) -> Result<BlockId> {
375 if !self.blocks.contains_key(parent) {
376 return Err(Error::BlockNotFound(parent.to_string()));
377 }
378
379 let id = block.id;
380
381 for edge in &block.edges {
383 self.edge_index.add_edge(&id, edge);
384 }
385
386 self.indices.index_block(&block);
388
389 self.blocks.insert(id, block);
391
392 self.structure.entry(*parent).or_default().push(id);
394
395 self.touch();
396 Ok(id)
397 }
398
399 pub fn add_block_at(
401 &mut self,
402 block: Block,
403 parent: &BlockId,
404 index: usize,
405 ) -> Result<BlockId> {
406 if !self.blocks.contains_key(parent) {
407 return Err(Error::BlockNotFound(parent.to_string()));
408 }
409
410 let id = block.id;
411
412 for edge in &block.edges {
413 self.edge_index.add_edge(&id, edge);
414 }
415
416 self.indices.index_block(&block);
417 self.blocks.insert(id, block);
418
419 let children = self.structure.entry(*parent).or_default();
420 let insert_idx = index.min(children.len());
421 children.insert(insert_idx, id);
422
423 self.touch();
424 Ok(id)
425 }
426
427 pub fn add_edge(
429 &mut self,
430 source: &BlockId,
431 edge_type: crate::edge::EdgeType,
432 target: BlockId,
433 ) {
434 let edge = crate::edge::Edge::new(edge_type, target);
435 if let Some(block) = self.blocks.get_mut(source) {
436 block.edges.push(edge.clone());
437 self.edge_index.add_edge(source, &edge);
438 }
439 }
440
441 pub fn remove_from_structure(&mut self, id: &BlockId) -> bool {
443 let mut removed = false;
444 for children in self.structure.values_mut() {
445 let len_before = children.len();
446 children.retain(|c| c != id);
447 if children.len() < len_before {
448 removed = true;
449 }
450 }
451 if removed {
452 self.touch();
453 }
454 removed
455 }
456
457 pub fn delete_block(&mut self, id: &BlockId) -> Result<Block> {
459 self.remove_from_structure(id);
461
462 self.structure.remove(id);
464
465 self.edge_index.remove_block(id);
467
468 let block = self
470 .blocks
471 .remove(id)
472 .ok_or_else(|| Error::BlockNotFound(id.to_string()))?;
473
474 self.indices.remove_block(&block);
476
477 self.touch();
478 Ok(block)
479 }
480
481 pub fn delete_cascade(&mut self, id: &BlockId) -> Result<Vec<Block>> {
483 let descendants = self.descendants(id);
484 let mut deleted = Vec::new();
485
486 for desc_id in descendants.into_iter().rev() {
488 if let Ok(block) = self.delete_block(&desc_id) {
489 deleted.push(block);
490 }
491 }
492
493 if let Ok(block) = self.delete_block(id) {
494 deleted.push(block);
495 }
496
497 Ok(deleted)
498 }
499
500 pub fn move_block(&mut self, id: &BlockId, new_parent: &BlockId) -> Result<()> {
502 if !self.blocks.contains_key(id) {
503 return Err(Error::BlockNotFound(id.to_string()));
504 }
505 if !self.blocks.contains_key(new_parent) {
506 return Err(Error::BlockNotFound(new_parent.to_string()));
507 }
508
509 if self.is_ancestor(id, new_parent) {
511 return Err(Error::CycleDetected(id.to_string()));
512 }
513
514 self.remove_from_structure(id);
515 self.structure.entry(*new_parent).or_default().push(*id);
516
517 self.touch();
518 Ok(())
519 }
520
521 pub fn move_block_at(
523 &mut self,
524 id: &BlockId,
525 new_parent: &BlockId,
526 index: usize,
527 ) -> Result<()> {
528 if !self.blocks.contains_key(id) {
529 return Err(Error::BlockNotFound(id.to_string()));
530 }
531 if !self.blocks.contains_key(new_parent) {
532 return Err(Error::BlockNotFound(new_parent.to_string()));
533 }
534
535 if self.is_ancestor(id, new_parent) {
536 return Err(Error::CycleDetected(id.to_string()));
537 }
538
539 self.remove_from_structure(id);
540 let children = self.structure.entry(*new_parent).or_default();
541 let insert_idx = index.min(children.len());
542 children.insert(insert_idx, *id);
543
544 self.touch();
545 Ok(())
546 }
547
548 pub fn move_block_before(&mut self, id: &BlockId, before: &BlockId) -> Result<()> {
550 if !self.blocks.contains_key(id) {
551 return Err(Error::BlockNotFound(id.to_string()));
552 }
553 if !self.blocks.contains_key(before) {
554 return Err(Error::BlockNotFound(before.to_string()));
555 }
556
557 let parent = *self
558 .parent(before)
559 .ok_or_else(|| Error::BlockNotFound(format!("parent of {}", before)))?;
560
561 if self.is_ancestor(id, &parent) {
562 return Err(Error::CycleDetected(id.to_string()));
563 }
564
565 self.remove_from_structure(id);
566 let children = self.structure.entry(parent).or_default();
567
568 if let Some(pos) = children.iter().position(|child| child == before) {
569 children.insert(pos, *id);
570 } else {
571 children.push(*id);
572 }
573
574 self.touch();
575 Ok(())
576 }
577
578 pub fn move_block_after(&mut self, id: &BlockId, after: &BlockId) -> Result<()> {
580 if !self.blocks.contains_key(id) {
581 return Err(Error::BlockNotFound(id.to_string()));
582 }
583 if !self.blocks.contains_key(after) {
584 return Err(Error::BlockNotFound(after.to_string()));
585 }
586
587 let parent = *self
588 .parent(after)
589 .ok_or_else(|| Error::BlockNotFound(format!("parent of {}", after)))?;
590
591 if self.is_ancestor(id, &parent) {
592 return Err(Error::CycleDetected(id.to_string()));
593 }
594
595 self.remove_from_structure(id);
596 let children = self.structure.entry(parent).or_default();
597
598 if let Some(pos) = children.iter().position(|child| child == after) {
599 children.insert(pos + 1, *id);
600 } else {
601 children.push(*id);
602 }
603
604 self.touch();
605 Ok(())
606 }
607
608 pub fn is_ancestor(&self, potential_ancestor: &BlockId, block: &BlockId) -> bool {
610 let mut current = Some(*block);
611 while let Some(id) = current {
612 if &id == potential_ancestor {
613 return true;
614 }
615 current = self.parent(&id).cloned();
616 }
617 false
618 }
619
620 pub fn descendants(&self, id: &BlockId) -> Vec<BlockId> {
622 let mut result = Vec::new();
623 let mut stack = vec![*id];
624
625 while let Some(current) = stack.pop() {
626 if let Some(children) = self.structure.get(¤t) {
627 for child in children {
628 result.push(*child);
629 stack.push(*child);
630 }
631 }
632 }
633
634 result
635 }
636
637 pub fn is_reachable(&self, id: &BlockId) -> bool {
639 if id == &self.root {
640 return true;
641 }
642
643 let mut visited = HashSet::new();
644 let mut stack = vec![self.root];
645
646 while let Some(current) = stack.pop() {
647 if ¤t == id {
648 return true;
649 }
650 if visited.insert(current) {
651 if let Some(children) = self.structure.get(¤t) {
652 stack.extend(children.iter().cloned());
653 }
654 }
655 }
656
657 false
658 }
659
660 pub fn find_orphans(&self) -> Vec<BlockId> {
662 let mut reachable = HashSet::new();
663 let mut stack = vec![self.root];
664
665 while let Some(current) = stack.pop() {
666 if reachable.insert(current) {
667 if let Some(children) = self.structure.get(¤t) {
668 stack.extend(children.iter().cloned());
669 }
670 }
671 }
672
673 self.blocks
674 .keys()
675 .filter(|id| !reachable.contains(*id))
676 .cloned()
677 .collect()
678 }
679
680 pub fn block_state(&self, id: &BlockId) -> BlockState {
682 if !self.blocks.contains_key(id) {
683 BlockState::Deleted
684 } else if self.is_reachable(id) {
685 BlockState::Live
686 } else {
687 BlockState::Orphaned
688 }
689 }
690
691 pub fn prune_unreachable(&mut self) -> Vec<Block> {
693 let orphans = self.find_orphans();
694 let mut pruned = Vec::new();
695
696 for id in orphans {
697 if let Ok(block) = self.delete_block(&id) {
698 pruned.push(block);
699 }
700 }
701
702 pruned
703 }
704
705 pub fn block_count(&self) -> usize {
707 self.blocks.len()
708 }
709
710 pub fn total_tokens(&self, model: TokenModel) -> u32 {
712 self.blocks
713 .values()
714 .map(|b| b.token_estimate().for_model(model))
715 .sum()
716 }
717
718 pub fn validate(&self) -> Vec<ValidationIssue> {
720 let mut issues = Vec::new();
721
722 let orphans = self.find_orphans();
724 for orphan in orphans {
725 issues.push(ValidationIssue::warning(
726 ErrorCode::E203OrphanedBlock,
727 format!("Block {} is unreachable from root", orphan),
728 ));
729 }
730
731 if self.has_cycles() {
733 issues.push(ValidationIssue::error(
734 ErrorCode::E201CycleDetected,
735 "Document structure contains a cycle",
736 ));
737 }
738
739 for block in self.blocks.values() {
741 for edge in &block.edges {
742 if !self.blocks.contains_key(&edge.target) {
743 issues.push(ValidationIssue::error(
744 ErrorCode::E001BlockNotFound,
745 format!(
746 "Block {} references non-existent block {}",
747 block.id, edge.target
748 ),
749 ));
750 }
751 }
752 }
753
754 issues
755 }
756
757 fn has_cycles(&self) -> bool {
759 let mut visited = HashSet::new();
760 let mut rec_stack = HashSet::new();
761
762 fn dfs(
763 node: &BlockId,
764 structure: &HashMap<BlockId, Vec<BlockId>>,
765 visited: &mut HashSet<BlockId>,
766 rec_stack: &mut HashSet<BlockId>,
767 ) -> bool {
768 visited.insert(*node);
769 rec_stack.insert(*node);
770
771 if let Some(children) = structure.get(node) {
772 for child in children {
773 if !visited.contains(child) {
774 if dfs(child, structure, visited, rec_stack) {
775 return true;
776 }
777 } else if rec_stack.contains(child) {
778 return true;
779 }
780 }
781 }
782
783 rec_stack.remove(node);
784 false
785 }
786
787 dfs(&self.root, &self.structure, &mut visited, &mut rec_stack)
788 }
789
790 fn touch(&mut self) {
792 self.metadata.touch();
793 self.version.increment([0u8; 8]); }
795
796 pub fn rebuild_indices(&mut self) {
798 self.indices.rebuild(&self.blocks);
799
800 self.edge_index.clear();
801 for block in self.blocks.values() {
802 for edge in &block.edges {
803 self.edge_index.add_edge(&block.id, edge);
804 }
805 }
806 }
807}
808
809#[cfg(test)]
810mod tests {
811 use super::*;
812 use crate::content::Content;
813
814 #[test]
815 fn test_document_creation() {
816 let doc = Document::create();
817 assert_eq!(doc.block_count(), 1); assert!(doc.blocks.contains_key(&doc.root));
819 }
820
821 #[test]
822 fn test_add_block() {
823 let mut doc = Document::create();
824 let block = Block::new(Content::text("Hello"), Some("intro"));
825 let root = doc.root;
826
827 let id = doc.add_block(block, &root).unwrap();
828 assert_eq!(doc.block_count(), 2);
829 assert!(doc.is_reachable(&id));
830 }
831
832 #[test]
833 fn test_move_block() {
834 let mut doc = Document::create();
835 let root = doc.root;
836
837 let parent1 = doc
838 .add_block(Block::new(Content::text("Parent 1"), None), &root)
839 .unwrap();
840 let parent2 = doc
841 .add_block(Block::new(Content::text("Parent 2"), None), &root)
842 .unwrap();
843 let child = doc
844 .add_block(Block::new(Content::text("Child"), None), &parent1)
845 .unwrap();
846
847 assert!(doc.children(&parent1).contains(&child));
848
849 doc.move_block(&child, &parent2).unwrap();
850
851 assert!(!doc.children(&parent1).contains(&child));
852 assert!(doc.children(&parent2).contains(&child));
853 }
854
855 #[test]
856 fn test_cycle_detection() {
857 let mut doc = Document::create();
858 let root = doc.root;
859
860 let a = doc
861 .add_block(Block::new(Content::text("A"), None), &root)
862 .unwrap();
863 let b = doc
864 .add_block(Block::new(Content::text("B"), None), &a)
865 .unwrap();
866
867 let result = doc.move_block(&a, &b);
869 assert!(result.is_err());
870 }
871
872 #[test]
873 fn test_orphan_detection() {
874 let mut doc = Document::create();
875 let root = doc.root;
876
877 let block = Block::new(Content::text("Test"), None);
878 let id = doc.add_block(block, &root).unwrap();
879
880 assert!(doc.find_orphans().is_empty());
881
882 doc.remove_from_structure(&id);
883 assert_eq!(doc.find_orphans(), vec![id]);
884 }
885
886 #[test]
887 fn test_cascade_delete() {
888 let mut doc = Document::create();
889 let root = doc.root;
890
891 let parent = doc
892 .add_block(Block::new(Content::text("Parent"), None), &root)
893 .unwrap();
894 let _child1 = doc
895 .add_block(Block::new(Content::text("Child 1"), None), &parent)
896 .unwrap();
897 let _child2 = doc
898 .add_block(Block::new(Content::text("Child 2"), None), &parent)
899 .unwrap();
900
901 assert_eq!(doc.block_count(), 4);
902
903 let deleted = doc.delete_cascade(&parent).unwrap();
904 assert_eq!(deleted.len(), 3); assert_eq!(doc.block_count(), 1); }
907
908 #[test]
909 fn test_indices() {
910 let mut doc = Document::create();
911 let root = doc.root;
912
913 let block = Block::new(Content::text("Test"), None)
914 .with_tag("important")
915 .with_label("My Block");
916 let id = doc.add_block(block, &root).unwrap();
917
918 assert!(doc.indices.find_by_tag("important").contains(&id));
919 assert_eq!(doc.indices.find_by_label("My Block"), Some(id));
920 }
921}