1use crate::block::{Block, BlockState};
4use crate::edge::EdgeIndex;
5use crate::error::{Error, ErrorCode, Result, ValidationIssue};
6use crate::id::BlockId;
7use crate::metadata::TokenModel;
8use crate::version::DocumentVersion;
9use chrono::{DateTime, Utc};
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12
13#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
15pub struct DocumentId(pub String);
16
17impl DocumentId {
18 pub fn new(id: impl Into<String>) -> Self {
19 Self(id.into())
20 }
21
22 pub fn generate() -> Self {
23 let ts = Utc::now().timestamp_nanos_opt().unwrap_or(0);
25 Self(format!("doc_{:x}", ts))
26 }
27}
28
29impl std::fmt::Display for DocumentId {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 write!(f, "{}", self.0)
32 }
33}
34
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
37pub struct DocumentMetadata {
38 #[serde(skip_serializing_if = "Option::is_none")]
40 pub title: Option<String>,
41
42 #[serde(skip_serializing_if = "Option::is_none")]
44 pub description: Option<String>,
45
46 #[serde(default, skip_serializing_if = "Vec::is_empty")]
48 pub authors: Vec<String>,
49
50 pub created_at: DateTime<Utc>,
52
53 pub modified_at: DateTime<Utc>,
55
56 #[serde(skip_serializing_if = "Option::is_none")]
58 pub language: Option<String>,
59
60 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
62 pub custom: HashMap<String, serde_json::Value>,
63}
64
65impl DocumentMetadata {
66 pub fn new() -> Self {
67 let now = Utc::now();
68 Self {
69 created_at: now,
70 modified_at: now,
71 ..Default::default()
72 }
73 }
74
75 pub fn with_title(mut self, title: impl Into<String>) -> Self {
76 self.title = Some(title.into());
77 self
78 }
79
80 pub fn touch(&mut self) {
81 self.modified_at = Utc::now();
82 }
83}
84
85#[derive(Debug, Clone, Default)]
87pub struct DocumentIndices {
88 pub by_tag: HashMap<String, HashSet<BlockId>>,
90 pub by_role: HashMap<String, HashSet<BlockId>>,
92 pub by_content_type: HashMap<String, HashSet<BlockId>>,
94 pub by_label: HashMap<String, BlockId>,
96}
97
98impl DocumentIndices {
99 pub fn new() -> Self {
100 Self::default()
101 }
102
103 pub fn index_block(&mut self, block: &Block) {
105 let id = &block.id;
106
107 for tag in &block.metadata.tags {
109 self.by_tag.entry(tag.clone()).or_default().insert(*id);
110 }
111
112 if let Some(role) = &block.metadata.semantic_role {
114 self.by_role
115 .entry(role.category.as_str().to_string())
116 .or_default()
117 .insert(*id);
118 }
119
120 self.by_content_type
122 .entry(block.content_type().to_string())
123 .or_default()
124 .insert(*id);
125
126 if let Some(label) = &block.metadata.label {
128 self.by_label.insert(label.clone(), *id);
129 }
130 }
131
132 pub fn remove_block(&mut self, block: &Block) {
134 let id = &block.id;
135
136 for tag in &block.metadata.tags {
137 if let Some(set) = self.by_tag.get_mut(tag) {
138 set.remove(id);
139 }
140 }
141
142 if let Some(role) = &block.metadata.semantic_role {
143 if let Some(set) = self.by_role.get_mut(role.category.as_str()) {
144 set.remove(id);
145 }
146 }
147
148 if let Some(set) = self.by_content_type.get_mut(block.content_type()) {
149 set.remove(id);
150 }
151
152 if let Some(label) = &block.metadata.label {
153 self.by_label.remove(label);
154 }
155 }
156
157 pub fn rebuild(&mut self, blocks: &HashMap<BlockId, Block>) {
159 self.by_tag.clear();
160 self.by_role.clear();
161 self.by_content_type.clear();
162 self.by_label.clear();
163
164 for block in blocks.values() {
165 self.index_block(block);
166 }
167 }
168
169 pub fn find_by_tag(&self, tag: &str) -> HashSet<BlockId> {
171 self.by_tag.get(tag).cloned().unwrap_or_default()
172 }
173
174 pub fn find_by_type(&self, content_type: &str) -> HashSet<BlockId> {
176 self.by_content_type
177 .get(content_type)
178 .cloned()
179 .unwrap_or_default()
180 }
181
182 pub fn find_by_label(&self, label: &str) -> Option<BlockId> {
184 self.by_label.get(label).cloned()
185 }
186}
187
188#[derive(Debug, Clone)]
190pub struct Document {
191 pub id: DocumentId,
193
194 pub root: BlockId,
196
197 pub structure: HashMap<BlockId, Vec<BlockId>>,
199
200 pub blocks: HashMap<BlockId, Block>,
202
203 pub metadata: DocumentMetadata,
205
206 pub indices: DocumentIndices,
208
209 pub edge_index: EdgeIndex,
211
212 pub version: DocumentVersion,
214}
215
216impl Document {
217 pub fn new(id: DocumentId) -> Self {
219 let root = Block::root();
220 let root_id = root.id;
221
222 let mut blocks = HashMap::new();
223 blocks.insert(root_id, root);
224
225 Self {
226 id,
227 root: root_id,
228 structure: HashMap::new(),
229 blocks,
230 metadata: DocumentMetadata::new(),
231 indices: DocumentIndices::new(),
232 edge_index: EdgeIndex::new(),
233 version: DocumentVersion::initial(),
234 }
235 }
236
237 pub fn create() -> Self {
239 Self::new(DocumentId::generate())
240 }
241
242 pub fn with_metadata(mut self, metadata: DocumentMetadata) -> Self {
244 self.metadata = metadata;
245 self
246 }
247
248 pub fn get_block(&self, id: &BlockId) -> Option<&Block> {
250 self.blocks.get(id)
251 }
252
253 pub fn get_block_mut(&mut self, id: &BlockId) -> Option<&mut Block> {
255 self.blocks.get_mut(id)
256 }
257
258 pub fn children(&self, parent: &BlockId) -> &[BlockId] {
260 self.structure
261 .get(parent)
262 .map(|v| v.as_slice())
263 .unwrap_or(&[])
264 }
265
266 pub fn parent(&self, child: &BlockId) -> Option<&BlockId> {
268 for (parent, children) in &self.structure {
269 if children.contains(child) {
270 return Some(parent);
271 }
272 }
273 None
274 }
275
276 pub fn parent_of(&self, child: &BlockId) -> Option<&Block> {
278 self.parent(child).and_then(|id| self.blocks.get(id))
279 }
280
281 pub fn add_block(&mut self, block: Block, parent: &BlockId) -> Result<BlockId> {
283 if !self.blocks.contains_key(parent) {
284 return Err(Error::BlockNotFound(parent.to_string()));
285 }
286
287 let id = block.id;
288
289 for edge in &block.edges {
291 self.edge_index.add_edge(&id, edge);
292 }
293
294 self.indices.index_block(&block);
296
297 self.blocks.insert(id, block);
299
300 self.structure.entry(*parent).or_default().push(id);
302
303 self.touch();
304 Ok(id)
305 }
306
307 pub fn add_block_at(
309 &mut self,
310 block: Block,
311 parent: &BlockId,
312 index: usize,
313 ) -> Result<BlockId> {
314 if !self.blocks.contains_key(parent) {
315 return Err(Error::BlockNotFound(parent.to_string()));
316 }
317
318 let id = block.id;
319
320 for edge in &block.edges {
321 self.edge_index.add_edge(&id, edge);
322 }
323
324 self.indices.index_block(&block);
325 self.blocks.insert(id, block);
326
327 let children = self.structure.entry(*parent).or_default();
328 let insert_idx = index.min(children.len());
329 children.insert(insert_idx, id);
330
331 self.touch();
332 Ok(id)
333 }
334
335 pub fn add_edge(
337 &mut self,
338 source: &BlockId,
339 edge_type: crate::edge::EdgeType,
340 target: BlockId,
341 ) {
342 let edge = crate::edge::Edge::new(edge_type, target);
343 self.edge_index.add_edge(source, &edge);
344 }
345
346 pub fn remove_from_structure(&mut self, id: &BlockId) -> bool {
348 let mut removed = false;
349 for children in self.structure.values_mut() {
350 let len_before = children.len();
351 children.retain(|c| c != id);
352 if children.len() < len_before {
353 removed = true;
354 }
355 }
356 if removed {
357 self.touch();
358 }
359 removed
360 }
361
362 pub fn delete_block(&mut self, id: &BlockId) -> Result<Block> {
364 self.remove_from_structure(id);
366
367 self.structure.remove(id);
369
370 self.edge_index.remove_block(id);
372
373 let block = self
375 .blocks
376 .remove(id)
377 .ok_or_else(|| Error::BlockNotFound(id.to_string()))?;
378
379 self.indices.remove_block(&block);
381
382 self.touch();
383 Ok(block)
384 }
385
386 pub fn delete_cascade(&mut self, id: &BlockId) -> Result<Vec<Block>> {
388 let descendants = self.descendants(id);
389 let mut deleted = Vec::new();
390
391 for desc_id in descendants.into_iter().rev() {
393 if let Ok(block) = self.delete_block(&desc_id) {
394 deleted.push(block);
395 }
396 }
397
398 if let Ok(block) = self.delete_block(id) {
399 deleted.push(block);
400 }
401
402 Ok(deleted)
403 }
404
405 pub fn move_block(&mut self, id: &BlockId, new_parent: &BlockId) -> Result<()> {
407 if !self.blocks.contains_key(id) {
408 return Err(Error::BlockNotFound(id.to_string()));
409 }
410 if !self.blocks.contains_key(new_parent) {
411 return Err(Error::BlockNotFound(new_parent.to_string()));
412 }
413
414 if self.is_ancestor(id, new_parent) {
416 return Err(Error::CycleDetected(id.to_string()));
417 }
418
419 self.remove_from_structure(id);
420 self.structure.entry(*new_parent).or_default().push(*id);
421
422 self.touch();
423 Ok(())
424 }
425
426 pub fn move_block_at(
428 &mut self,
429 id: &BlockId,
430 new_parent: &BlockId,
431 index: usize,
432 ) -> Result<()> {
433 if !self.blocks.contains_key(id) {
434 return Err(Error::BlockNotFound(id.to_string()));
435 }
436 if !self.blocks.contains_key(new_parent) {
437 return Err(Error::BlockNotFound(new_parent.to_string()));
438 }
439
440 if self.is_ancestor(id, new_parent) {
441 return Err(Error::CycleDetected(id.to_string()));
442 }
443
444 self.remove_from_structure(id);
445 let children = self.structure.entry(*new_parent).or_default();
446 let insert_idx = index.min(children.len());
447 children.insert(insert_idx, *id);
448
449 self.touch();
450 Ok(())
451 }
452
453 pub fn move_block_before(&mut self, id: &BlockId, before: &BlockId) -> Result<()> {
455 if !self.blocks.contains_key(id) {
456 return Err(Error::BlockNotFound(id.to_string()));
457 }
458 if !self.blocks.contains_key(before) {
459 return Err(Error::BlockNotFound(before.to_string()));
460 }
461
462 let parent = *self
463 .parent(before)
464 .ok_or_else(|| Error::BlockNotFound(format!("parent of {}", before)))?;
465
466 if self.is_ancestor(id, &parent) {
467 return Err(Error::CycleDetected(id.to_string()));
468 }
469
470 self.remove_from_structure(id);
471 let children = self.structure.entry(parent).or_default();
472
473 if let Some(pos) = children.iter().position(|child| child == before) {
474 children.insert(pos, *id);
475 } else {
476 children.push(*id);
477 }
478
479 self.touch();
480 Ok(())
481 }
482
483 pub fn move_block_after(&mut self, id: &BlockId, after: &BlockId) -> Result<()> {
485 if !self.blocks.contains_key(id) {
486 return Err(Error::BlockNotFound(id.to_string()));
487 }
488 if !self.blocks.contains_key(after) {
489 return Err(Error::BlockNotFound(after.to_string()));
490 }
491
492 let parent = *self
493 .parent(after)
494 .ok_or_else(|| Error::BlockNotFound(format!("parent of {}", after)))?;
495
496 if self.is_ancestor(id, &parent) {
497 return Err(Error::CycleDetected(id.to_string()));
498 }
499
500 self.remove_from_structure(id);
501 let children = self.structure.entry(parent).or_default();
502
503 if let Some(pos) = children.iter().position(|child| child == after) {
504 children.insert(pos + 1, *id);
505 } else {
506 children.push(*id);
507 }
508
509 self.touch();
510 Ok(())
511 }
512
513 pub fn is_ancestor(&self, potential_ancestor: &BlockId, block: &BlockId) -> bool {
515 let mut current = Some(*block);
516 while let Some(id) = current {
517 if &id == potential_ancestor {
518 return true;
519 }
520 current = self.parent(&id).cloned();
521 }
522 false
523 }
524
525 pub fn descendants(&self, id: &BlockId) -> Vec<BlockId> {
527 let mut result = Vec::new();
528 let mut stack = vec![*id];
529
530 while let Some(current) = stack.pop() {
531 if let Some(children) = self.structure.get(¤t) {
532 for child in children {
533 result.push(*child);
534 stack.push(*child);
535 }
536 }
537 }
538
539 result
540 }
541
542 pub fn is_reachable(&self, id: &BlockId) -> bool {
544 if id == &self.root {
545 return true;
546 }
547
548 let mut visited = HashSet::new();
549 let mut stack = vec![self.root];
550
551 while let Some(current) = stack.pop() {
552 if ¤t == id {
553 return true;
554 }
555 if visited.insert(current) {
556 if let Some(children) = self.structure.get(¤t) {
557 stack.extend(children.iter().cloned());
558 }
559 }
560 }
561
562 false
563 }
564
565 pub fn find_orphans(&self) -> Vec<BlockId> {
567 let mut reachable = HashSet::new();
568 let mut stack = vec![self.root];
569
570 while let Some(current) = stack.pop() {
571 if reachable.insert(current) {
572 if let Some(children) = self.structure.get(¤t) {
573 stack.extend(children.iter().cloned());
574 }
575 }
576 }
577
578 self.blocks
579 .keys()
580 .filter(|id| !reachable.contains(*id))
581 .cloned()
582 .collect()
583 }
584
585 pub fn block_state(&self, id: &BlockId) -> BlockState {
587 if !self.blocks.contains_key(id) {
588 BlockState::Deleted
589 } else if self.is_reachable(id) {
590 BlockState::Live
591 } else {
592 BlockState::Orphaned
593 }
594 }
595
596 pub fn prune_unreachable(&mut self) -> Vec<Block> {
598 let orphans = self.find_orphans();
599 let mut pruned = Vec::new();
600
601 for id in orphans {
602 if let Ok(block) = self.delete_block(&id) {
603 pruned.push(block);
604 }
605 }
606
607 pruned
608 }
609
610 pub fn block_count(&self) -> usize {
612 self.blocks.len()
613 }
614
615 pub fn total_tokens(&self, model: TokenModel) -> u32 {
617 self.blocks
618 .values()
619 .map(|b| b.token_estimate().for_model(model))
620 .sum()
621 }
622
623 pub fn validate(&self) -> Vec<ValidationIssue> {
625 let mut issues = Vec::new();
626
627 let orphans = self.find_orphans();
629 for orphan in orphans {
630 issues.push(ValidationIssue::warning(
631 ErrorCode::E203OrphanedBlock,
632 format!("Block {} is unreachable from root", orphan),
633 ));
634 }
635
636 if self.has_cycles() {
638 issues.push(ValidationIssue::error(
639 ErrorCode::E201CycleDetected,
640 "Document structure contains a cycle",
641 ));
642 }
643
644 for block in self.blocks.values() {
646 for edge in &block.edges {
647 if !self.blocks.contains_key(&edge.target) {
648 issues.push(ValidationIssue::error(
649 ErrorCode::E001BlockNotFound,
650 format!(
651 "Block {} references non-existent block {}",
652 block.id, edge.target
653 ),
654 ));
655 }
656 }
657 }
658
659 issues
660 }
661
662 fn has_cycles(&self) -> bool {
664 let mut visited = HashSet::new();
665 let mut rec_stack = HashSet::new();
666
667 fn dfs(
668 node: &BlockId,
669 structure: &HashMap<BlockId, Vec<BlockId>>,
670 visited: &mut HashSet<BlockId>,
671 rec_stack: &mut HashSet<BlockId>,
672 ) -> bool {
673 visited.insert(*node);
674 rec_stack.insert(*node);
675
676 if let Some(children) = structure.get(node) {
677 for child in children {
678 if !visited.contains(child) {
679 if dfs(child, structure, visited, rec_stack) {
680 return true;
681 }
682 } else if rec_stack.contains(child) {
683 return true;
684 }
685 }
686 }
687
688 rec_stack.remove(node);
689 false
690 }
691
692 dfs(&self.root, &self.structure, &mut visited, &mut rec_stack)
693 }
694
695 fn touch(&mut self) {
697 self.metadata.touch();
698 self.version.increment([0u8; 8]); }
700
701 pub fn rebuild_indices(&mut self) {
703 self.indices.rebuild(&self.blocks);
704
705 self.edge_index.clear();
706 for block in self.blocks.values() {
707 for edge in &block.edges {
708 self.edge_index.add_edge(&block.id, edge);
709 }
710 }
711 }
712}
713
714#[cfg(test)]
715mod tests {
716 use super::*;
717 use crate::content::Content;
718
719 #[test]
720 fn test_document_creation() {
721 let doc = Document::create();
722 assert_eq!(doc.block_count(), 1); assert!(doc.blocks.contains_key(&doc.root));
724 }
725
726 #[test]
727 fn test_add_block() {
728 let mut doc = Document::create();
729 let block = Block::new(Content::text("Hello"), Some("intro"));
730 let root = doc.root;
731
732 let id = doc.add_block(block, &root).unwrap();
733 assert_eq!(doc.block_count(), 2);
734 assert!(doc.is_reachable(&id));
735 }
736
737 #[test]
738 fn test_move_block() {
739 let mut doc = Document::create();
740 let root = doc.root;
741
742 let parent1 = doc
743 .add_block(Block::new(Content::text("Parent 1"), None), &root)
744 .unwrap();
745 let parent2 = doc
746 .add_block(Block::new(Content::text("Parent 2"), None), &root)
747 .unwrap();
748 let child = doc
749 .add_block(Block::new(Content::text("Child"), None), &parent1)
750 .unwrap();
751
752 assert!(doc.children(&parent1).contains(&child));
753
754 doc.move_block(&child, &parent2).unwrap();
755
756 assert!(!doc.children(&parent1).contains(&child));
757 assert!(doc.children(&parent2).contains(&child));
758 }
759
760 #[test]
761 fn test_cycle_detection() {
762 let mut doc = Document::create();
763 let root = doc.root;
764
765 let a = doc
766 .add_block(Block::new(Content::text("A"), None), &root)
767 .unwrap();
768 let b = doc
769 .add_block(Block::new(Content::text("B"), None), &a)
770 .unwrap();
771
772 let result = doc.move_block(&a, &b);
774 assert!(result.is_err());
775 }
776
777 #[test]
778 fn test_orphan_detection() {
779 let mut doc = Document::create();
780 let root = doc.root;
781
782 let block = Block::new(Content::text("Test"), None);
783 let id = doc.add_block(block, &root).unwrap();
784
785 assert!(doc.find_orphans().is_empty());
786
787 doc.remove_from_structure(&id);
788 assert_eq!(doc.find_orphans(), vec![id]);
789 }
790
791 #[test]
792 fn test_cascade_delete() {
793 let mut doc = Document::create();
794 let root = doc.root;
795
796 let parent = doc
797 .add_block(Block::new(Content::text("Parent"), None), &root)
798 .unwrap();
799 let _child1 = doc
800 .add_block(Block::new(Content::text("Child 1"), None), &parent)
801 .unwrap();
802 let _child2 = doc
803 .add_block(Block::new(Content::text("Child 2"), None), &parent)
804 .unwrap();
805
806 assert_eq!(doc.block_count(), 4);
807
808 let deleted = doc.delete_cascade(&parent).unwrap();
809 assert_eq!(deleted.len(), 3); assert_eq!(doc.block_count(), 1); }
812
813 #[test]
814 fn test_indices() {
815 let mut doc = Document::create();
816 let root = doc.root;
817
818 let block = Block::new(Content::text("Test"), None)
819 .with_tag("important")
820 .with_label("My Block");
821 let id = doc.add_block(block, &root).unwrap();
822
823 assert!(doc.indices.find_by_tag("important").contains(&id));
824 assert_eq!(doc.indices.find_by_label("My Block"), Some(id));
825 }
826}