Skip to main content

ucm_core/
document.rs

1//! Document - a collection of blocks with hierarchical structure.
2
3use crate::block::{Block, BlockState};
4use crate::edge::EdgeIndex;
5use crate::error::{Error, ErrorCode, Result, ValidationIssue};
6use crate::id::BlockId;
7use crate::metadata::TokenModel;
8use crate::version::DocumentVersion;
9use chrono::{DateTime, Utc};
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12
13/// Document identifier
14#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
15pub struct DocumentId(pub String);
16
17impl DocumentId {
18    pub fn new(id: impl Into<String>) -> Self {
19        Self(id.into())
20    }
21
22    pub fn generate() -> Self {
23        // Use chrono for WASM compatibility (chrono supports wasmbind feature)
24        let ts = Utc::now().timestamp_nanos_opt().unwrap_or(0);
25        Self(format!("doc_{:x}", ts))
26    }
27}
28
29impl std::fmt::Display for DocumentId {
30    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31        write!(f, "{}", self.0)
32    }
33}
34
35/// Document metadata
36#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
37pub struct DocumentMetadata {
38    /// Document title
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub title: Option<String>,
41
42    /// Document description
43    #[serde(skip_serializing_if = "Option::is_none")]
44    pub description: Option<String>,
45
46    /// Authors
47    #[serde(default, skip_serializing_if = "Vec::is_empty")]
48    pub authors: Vec<String>,
49
50    /// Creation timestamp
51    pub created_at: DateTime<Utc>,
52
53    /// Last modification timestamp
54    pub modified_at: DateTime<Utc>,
55
56    /// Language (ISO 639-1)
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub language: Option<String>,
59
60    /// Custom metadata
61    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
62    pub custom: HashMap<String, serde_json::Value>,
63}
64
65impl DocumentMetadata {
66    pub fn new() -> Self {
67        let now = Utc::now();
68        Self {
69            created_at: now,
70            modified_at: now,
71            ..Default::default()
72        }
73    }
74
75    pub fn with_title(mut self, title: impl Into<String>) -> Self {
76        self.title = Some(title.into());
77        self
78    }
79
80    pub fn touch(&mut self) {
81        self.modified_at = Utc::now();
82    }
83}
84
85/// Secondary indices for fast lookup
86#[derive(Debug, Clone, Default)]
87pub struct DocumentIndices {
88    /// Blocks by tag
89    pub by_tag: HashMap<String, HashSet<BlockId>>,
90    /// Blocks by semantic role category
91    pub by_role: HashMap<String, HashSet<BlockId>>,
92    /// Blocks by content type
93    pub by_content_type: HashMap<String, HashSet<BlockId>>,
94    /// Blocks by label
95    pub by_label: HashMap<String, BlockId>,
96}
97
98impl DocumentIndices {
99    pub fn new() -> Self {
100        Self::default()
101    }
102
103    /// Index a block
104    pub fn index_block(&mut self, block: &Block) {
105        let id = &block.id;
106
107        // Index by tags
108        for tag in &block.metadata.tags {
109            self.by_tag.entry(tag.clone()).or_default().insert(*id);
110        }
111
112        // Index by semantic role
113        if let Some(role) = &block.metadata.semantic_role {
114            self.by_role
115                .entry(role.category.as_str().to_string())
116                .or_default()
117                .insert(*id);
118        }
119
120        // Index by content type
121        self.by_content_type
122            .entry(block.content_type().to_string())
123            .or_default()
124            .insert(*id);
125
126        // Index by label
127        if let Some(label) = &block.metadata.label {
128            self.by_label.insert(label.clone(), *id);
129        }
130    }
131
132    /// Remove a block from indices
133    pub fn remove_block(&mut self, block: &Block) {
134        let id = &block.id;
135
136        for tag in &block.metadata.tags {
137            if let Some(set) = self.by_tag.get_mut(tag) {
138                set.remove(id);
139            }
140        }
141
142        if let Some(role) = &block.metadata.semantic_role {
143            if let Some(set) = self.by_role.get_mut(role.category.as_str()) {
144                set.remove(id);
145            }
146        }
147
148        if let Some(set) = self.by_content_type.get_mut(block.content_type()) {
149            set.remove(id);
150        }
151
152        if let Some(label) = &block.metadata.label {
153            self.by_label.remove(label);
154        }
155    }
156
157    /// Rebuild all indices from blocks
158    pub fn rebuild(&mut self, blocks: &HashMap<BlockId, Block>) {
159        self.by_tag.clear();
160        self.by_role.clear();
161        self.by_content_type.clear();
162        self.by_label.clear();
163
164        for block in blocks.values() {
165            self.index_block(block);
166        }
167    }
168
169    /// Find blocks by tag
170    pub fn find_by_tag(&self, tag: &str) -> HashSet<BlockId> {
171        self.by_tag.get(tag).cloned().unwrap_or_default()
172    }
173
174    /// Find blocks by content type
175    pub fn find_by_type(&self, content_type: &str) -> HashSet<BlockId> {
176        self.by_content_type
177            .get(content_type)
178            .cloned()
179            .unwrap_or_default()
180    }
181
182    /// Find block by label
183    pub fn find_by_label(&self, label: &str) -> Option<BlockId> {
184        self.by_label.get(label).cloned()
185    }
186}
187
188/// A UCM document is a collection of blocks with hierarchical structure.
189#[derive(Debug, Clone)]
190pub struct Document {
191    /// Document identifier
192    pub id: DocumentId,
193
194    /// Root block ID
195    pub root: BlockId,
196
197    /// Adjacency map: parent -> ordered children
198    pub structure: HashMap<BlockId, Vec<BlockId>>,
199
200    /// All blocks in the document
201    pub blocks: HashMap<BlockId, Block>,
202
203    /// Document-level metadata
204    pub metadata: DocumentMetadata,
205
206    /// Secondary indices for fast lookup
207    pub indices: DocumentIndices,
208
209    /// Edge index for relationship traversal
210    pub edge_index: EdgeIndex,
211
212    /// Document version for concurrency control
213    pub version: DocumentVersion,
214}
215
216impl Document {
217    /// Create a new empty document
218    pub fn new(id: DocumentId) -> Self {
219        let root = Block::root();
220        let root_id = root.id;
221
222        let mut blocks = HashMap::new();
223        blocks.insert(root_id, root);
224
225        Self {
226            id,
227            root: root_id,
228            structure: HashMap::new(),
229            blocks,
230            metadata: DocumentMetadata::new(),
231            indices: DocumentIndices::new(),
232            edge_index: EdgeIndex::new(),
233            version: DocumentVersion::initial(),
234        }
235    }
236
237    /// Create with a generated ID
238    pub fn create() -> Self {
239        Self::new(DocumentId::generate())
240    }
241
242    /// Set document metadata
243    pub fn with_metadata(mut self, metadata: DocumentMetadata) -> Self {
244        self.metadata = metadata;
245        self
246    }
247
248    /// Get a block by ID
249    pub fn get_block(&self, id: &BlockId) -> Option<&Block> {
250        self.blocks.get(id)
251    }
252
253    /// Get a mutable block by ID
254    pub fn get_block_mut(&mut self, id: &BlockId) -> Option<&mut Block> {
255        self.blocks.get_mut(id)
256    }
257
258    /// Get children of a block
259    pub fn children(&self, parent: &BlockId) -> &[BlockId] {
260        self.structure
261            .get(parent)
262            .map(|v| v.as_slice())
263            .unwrap_or(&[])
264    }
265
266    /// Get parent of a block
267    pub fn parent(&self, child: &BlockId) -> Option<&BlockId> {
268        for (parent, children) in &self.structure {
269            if children.contains(child) {
270                return Some(parent);
271            }
272        }
273        None
274    }
275
276    /// Get the parent block (convenience method)
277    pub fn parent_of(&self, child: &BlockId) -> Option<&Block> {
278        self.parent(child).and_then(|id| self.blocks.get(id))
279    }
280
281    /// Add a block to the document
282    pub fn add_block(&mut self, block: Block, parent: &BlockId) -> Result<BlockId> {
283        if !self.blocks.contains_key(parent) {
284            return Err(Error::BlockNotFound(parent.to_string()));
285        }
286
287        let id = block.id;
288
289        // Index edges
290        for edge in &block.edges {
291            self.edge_index.add_edge(&id, edge);
292        }
293
294        // Index block
295        self.indices.index_block(&block);
296
297        // Add to blocks
298        self.blocks.insert(id, block);
299
300        // Add to structure
301        self.structure.entry(*parent).or_default().push(id);
302
303        self.touch();
304        Ok(id)
305    }
306
307    /// Add a block at a specific position
308    pub fn add_block_at(
309        &mut self,
310        block: Block,
311        parent: &BlockId,
312        index: usize,
313    ) -> Result<BlockId> {
314        if !self.blocks.contains_key(parent) {
315            return Err(Error::BlockNotFound(parent.to_string()));
316        }
317
318        let id = block.id;
319
320        for edge in &block.edges {
321            self.edge_index.add_edge(&id, edge);
322        }
323
324        self.indices.index_block(&block);
325        self.blocks.insert(id, block);
326
327        let children = self.structure.entry(*parent).or_default();
328        let insert_idx = index.min(children.len());
329        children.insert(insert_idx, id);
330
331        self.touch();
332        Ok(id)
333    }
334
335    /// Add an edge between two blocks (wrapper for edge_index)
336    pub fn add_edge(
337        &mut self,
338        source: &BlockId,
339        edge_type: crate::edge::EdgeType,
340        target: BlockId,
341    ) {
342        let edge = crate::edge::Edge::new(edge_type, target);
343        self.edge_index.add_edge(source, &edge);
344    }
345
346    /// Remove a block from the structure (makes it orphaned)
347    pub fn remove_from_structure(&mut self, id: &BlockId) -> bool {
348        let mut removed = false;
349        for children in self.structure.values_mut() {
350            let len_before = children.len();
351            children.retain(|c| c != id);
352            if children.len() < len_before {
353                removed = true;
354            }
355        }
356        if removed {
357            self.touch();
358        }
359        removed
360    }
361
362    /// Delete a block completely
363    pub fn delete_block(&mut self, id: &BlockId) -> Result<Block> {
364        // Remove from structure
365        self.remove_from_structure(id);
366
367        // Remove children structure
368        self.structure.remove(id);
369
370        // Remove from edge index
371        self.edge_index.remove_block(id);
372
373        // Remove and return block
374        let block = self
375            .blocks
376            .remove(id)
377            .ok_or_else(|| Error::BlockNotFound(id.to_string()))?;
378
379        // Remove from indices
380        self.indices.remove_block(&block);
381
382        self.touch();
383        Ok(block)
384    }
385
386    /// Delete a block and all its descendants
387    pub fn delete_cascade(&mut self, id: &BlockId) -> Result<Vec<Block>> {
388        let descendants = self.descendants(id);
389        let mut deleted = Vec::new();
390
391        // Delete in reverse order (children first)
392        for desc_id in descendants.into_iter().rev() {
393            if let Ok(block) = self.delete_block(&desc_id) {
394                deleted.push(block);
395            }
396        }
397
398        if let Ok(block) = self.delete_block(id) {
399            deleted.push(block);
400        }
401
402        Ok(deleted)
403    }
404
405    /// Move a block to a new parent
406    pub fn move_block(&mut self, id: &BlockId, new_parent: &BlockId) -> Result<()> {
407        if !self.blocks.contains_key(id) {
408            return Err(Error::BlockNotFound(id.to_string()));
409        }
410        if !self.blocks.contains_key(new_parent) {
411            return Err(Error::BlockNotFound(new_parent.to_string()));
412        }
413
414        // Check for cycle
415        if self.is_ancestor(id, new_parent) {
416            return Err(Error::CycleDetected(id.to_string()));
417        }
418
419        self.remove_from_structure(id);
420        self.structure.entry(*new_parent).or_default().push(*id);
421
422        self.touch();
423        Ok(())
424    }
425
426    /// Move a block to a specific position under a parent
427    pub fn move_block_at(
428        &mut self,
429        id: &BlockId,
430        new_parent: &BlockId,
431        index: usize,
432    ) -> Result<()> {
433        if !self.blocks.contains_key(id) {
434            return Err(Error::BlockNotFound(id.to_string()));
435        }
436        if !self.blocks.contains_key(new_parent) {
437            return Err(Error::BlockNotFound(new_parent.to_string()));
438        }
439
440        if self.is_ancestor(id, new_parent) {
441            return Err(Error::CycleDetected(id.to_string()));
442        }
443
444        self.remove_from_structure(id);
445        let children = self.structure.entry(*new_parent).or_default();
446        let insert_idx = index.min(children.len());
447        children.insert(insert_idx, *id);
448
449        self.touch();
450        Ok(())
451    }
452
453    /// Move a block before another block (sibling ordering)
454    pub fn move_block_before(&mut self, id: &BlockId, before: &BlockId) -> Result<()> {
455        if !self.blocks.contains_key(id) {
456            return Err(Error::BlockNotFound(id.to_string()));
457        }
458        if !self.blocks.contains_key(before) {
459            return Err(Error::BlockNotFound(before.to_string()));
460        }
461
462        let parent = *self
463            .parent(before)
464            .ok_or_else(|| Error::BlockNotFound(format!("parent of {}", before)))?;
465
466        if self.is_ancestor(id, &parent) {
467            return Err(Error::CycleDetected(id.to_string()));
468        }
469
470        self.remove_from_structure(id);
471        let children = self.structure.entry(parent).or_default();
472
473        if let Some(pos) = children.iter().position(|child| child == before) {
474            children.insert(pos, *id);
475        } else {
476            children.push(*id);
477        }
478
479        self.touch();
480        Ok(())
481    }
482
483    /// Move a block after another block (sibling ordering)
484    pub fn move_block_after(&mut self, id: &BlockId, after: &BlockId) -> Result<()> {
485        if !self.blocks.contains_key(id) {
486            return Err(Error::BlockNotFound(id.to_string()));
487        }
488        if !self.blocks.contains_key(after) {
489            return Err(Error::BlockNotFound(after.to_string()));
490        }
491
492        let parent = *self
493            .parent(after)
494            .ok_or_else(|| Error::BlockNotFound(format!("parent of {}", after)))?;
495
496        if self.is_ancestor(id, &parent) {
497            return Err(Error::CycleDetected(id.to_string()));
498        }
499
500        self.remove_from_structure(id);
501        let children = self.structure.entry(parent).or_default();
502
503        if let Some(pos) = children.iter().position(|child| child == after) {
504            children.insert(pos + 1, *id);
505        } else {
506            children.push(*id);
507        }
508
509        self.touch();
510        Ok(())
511    }
512
513    /// Check if a block is an ancestor of another
514    pub fn is_ancestor(&self, potential_ancestor: &BlockId, block: &BlockId) -> bool {
515        let mut current = Some(*block);
516        while let Some(id) = current {
517            if &id == potential_ancestor {
518                return true;
519            }
520            current = self.parent(&id).cloned();
521        }
522        false
523    }
524
525    /// Get all descendants of a block
526    pub fn descendants(&self, id: &BlockId) -> Vec<BlockId> {
527        let mut result = Vec::new();
528        let mut stack = vec![*id];
529
530        while let Some(current) = stack.pop() {
531            if let Some(children) = self.structure.get(&current) {
532                for child in children {
533                    result.push(*child);
534                    stack.push(*child);
535                }
536            }
537        }
538
539        result
540    }
541
542    /// Check if a block is reachable from root
543    pub fn is_reachable(&self, id: &BlockId) -> bool {
544        if id == &self.root {
545            return true;
546        }
547
548        let mut visited = HashSet::new();
549        let mut stack = vec![self.root];
550
551        while let Some(current) = stack.pop() {
552            if &current == id {
553                return true;
554            }
555            if visited.insert(current) {
556                if let Some(children) = self.structure.get(&current) {
557                    stack.extend(children.iter().cloned());
558                }
559            }
560        }
561
562        false
563    }
564
565    /// Find all orphaned blocks
566    pub fn find_orphans(&self) -> Vec<BlockId> {
567        let mut reachable = HashSet::new();
568        let mut stack = vec![self.root];
569
570        while let Some(current) = stack.pop() {
571            if reachable.insert(current) {
572                if let Some(children) = self.structure.get(&current) {
573                    stack.extend(children.iter().cloned());
574                }
575            }
576        }
577
578        self.blocks
579            .keys()
580            .filter(|id| !reachable.contains(*id))
581            .cloned()
582            .collect()
583    }
584
585    /// Get block state
586    pub fn block_state(&self, id: &BlockId) -> BlockState {
587        if !self.blocks.contains_key(id) {
588            BlockState::Deleted
589        } else if self.is_reachable(id) {
590            BlockState::Live
591        } else {
592            BlockState::Orphaned
593        }
594    }
595
596    /// Prune unreachable blocks
597    pub fn prune_unreachable(&mut self) -> Vec<Block> {
598        let orphans = self.find_orphans();
599        let mut pruned = Vec::new();
600
601        for id in orphans {
602            if let Ok(block) = self.delete_block(&id) {
603                pruned.push(block);
604            }
605        }
606
607        pruned
608    }
609
610    /// Get total block count
611    pub fn block_count(&self) -> usize {
612        self.blocks.len()
613    }
614
615    /// Get total token estimate
616    pub fn total_tokens(&self, model: TokenModel) -> u32 {
617        self.blocks
618            .values()
619            .map(|b| b.token_estimate().for_model(model))
620            .sum()
621    }
622
623    /// Validate document structure
624    pub fn validate(&self) -> Vec<ValidationIssue> {
625        let mut issues = Vec::new();
626
627        // Check for orphans
628        let orphans = self.find_orphans();
629        for orphan in orphans {
630            issues.push(ValidationIssue::warning(
631                ErrorCode::E203OrphanedBlock,
632                format!("Block {} is unreachable from root", orphan),
633            ));
634        }
635
636        // Check for cycles
637        if self.has_cycles() {
638            issues.push(ValidationIssue::error(
639                ErrorCode::E201CycleDetected,
640                "Document structure contains a cycle",
641            ));
642        }
643
644        // Check for dangling references
645        for block in self.blocks.values() {
646            for edge in &block.edges {
647                if !self.blocks.contains_key(&edge.target) {
648                    issues.push(ValidationIssue::error(
649                        ErrorCode::E001BlockNotFound,
650                        format!(
651                            "Block {} references non-existent block {}",
652                            block.id, edge.target
653                        ),
654                    ));
655                }
656            }
657        }
658
659        issues
660    }
661
662    /// Check for cycles in structure
663    fn has_cycles(&self) -> bool {
664        let mut visited = HashSet::new();
665        let mut rec_stack = HashSet::new();
666
667        fn dfs(
668            node: &BlockId,
669            structure: &HashMap<BlockId, Vec<BlockId>>,
670            visited: &mut HashSet<BlockId>,
671            rec_stack: &mut HashSet<BlockId>,
672        ) -> bool {
673            visited.insert(*node);
674            rec_stack.insert(*node);
675
676            if let Some(children) = structure.get(node) {
677                for child in children {
678                    if !visited.contains(child) {
679                        if dfs(child, structure, visited, rec_stack) {
680                            return true;
681                        }
682                    } else if rec_stack.contains(child) {
683                        return true;
684                    }
685                }
686            }
687
688            rec_stack.remove(node);
689            false
690        }
691
692        dfs(&self.root, &self.structure, &mut visited, &mut rec_stack)
693    }
694
695    /// Touch document (update modified timestamp and version)
696    fn touch(&mut self) {
697        self.metadata.touch();
698        self.version.increment([0u8; 8]); // TODO: compute actual state hash
699    }
700
701    /// Rebuild all indices
702    pub fn rebuild_indices(&mut self) {
703        self.indices.rebuild(&self.blocks);
704
705        self.edge_index.clear();
706        for block in self.blocks.values() {
707            for edge in &block.edges {
708                self.edge_index.add_edge(&block.id, edge);
709            }
710        }
711    }
712}
713
714#[cfg(test)]
715mod tests {
716    use super::*;
717    use crate::content::Content;
718
719    #[test]
720    fn test_document_creation() {
721        let doc = Document::create();
722        assert_eq!(doc.block_count(), 1); // Just root
723        assert!(doc.blocks.contains_key(&doc.root));
724    }
725
726    #[test]
727    fn test_add_block() {
728        let mut doc = Document::create();
729        let block = Block::new(Content::text("Hello"), Some("intro"));
730        let root = doc.root;
731
732        let id = doc.add_block(block, &root).unwrap();
733        assert_eq!(doc.block_count(), 2);
734        assert!(doc.is_reachable(&id));
735    }
736
737    #[test]
738    fn test_move_block() {
739        let mut doc = Document::create();
740        let root = doc.root;
741
742        let parent1 = doc
743            .add_block(Block::new(Content::text("Parent 1"), None), &root)
744            .unwrap();
745        let parent2 = doc
746            .add_block(Block::new(Content::text("Parent 2"), None), &root)
747            .unwrap();
748        let child = doc
749            .add_block(Block::new(Content::text("Child"), None), &parent1)
750            .unwrap();
751
752        assert!(doc.children(&parent1).contains(&child));
753
754        doc.move_block(&child, &parent2).unwrap();
755
756        assert!(!doc.children(&parent1).contains(&child));
757        assert!(doc.children(&parent2).contains(&child));
758    }
759
760    #[test]
761    fn test_cycle_detection() {
762        let mut doc = Document::create();
763        let root = doc.root;
764
765        let a = doc
766            .add_block(Block::new(Content::text("A"), None), &root)
767            .unwrap();
768        let b = doc
769            .add_block(Block::new(Content::text("B"), None), &a)
770            .unwrap();
771
772        // Try to move A under B (would create cycle)
773        let result = doc.move_block(&a, &b);
774        assert!(result.is_err());
775    }
776
777    #[test]
778    fn test_orphan_detection() {
779        let mut doc = Document::create();
780        let root = doc.root;
781
782        let block = Block::new(Content::text("Test"), None);
783        let id = doc.add_block(block, &root).unwrap();
784
785        assert!(doc.find_orphans().is_empty());
786
787        doc.remove_from_structure(&id);
788        assert_eq!(doc.find_orphans(), vec![id]);
789    }
790
791    #[test]
792    fn test_cascade_delete() {
793        let mut doc = Document::create();
794        let root = doc.root;
795
796        let parent = doc
797            .add_block(Block::new(Content::text("Parent"), None), &root)
798            .unwrap();
799        let _child1 = doc
800            .add_block(Block::new(Content::text("Child 1"), None), &parent)
801            .unwrap();
802        let _child2 = doc
803            .add_block(Block::new(Content::text("Child 2"), None), &parent)
804            .unwrap();
805
806        assert_eq!(doc.block_count(), 4);
807
808        let deleted = doc.delete_cascade(&parent).unwrap();
809        assert_eq!(deleted.len(), 3); // parent + 2 children
810        assert_eq!(doc.block_count(), 1); // just root
811    }
812
813    #[test]
814    fn test_indices() {
815        let mut doc = Document::create();
816        let root = doc.root;
817
818        let block = Block::new(Content::text("Test"), None)
819            .with_tag("important")
820            .with_label("My Block");
821        let id = doc.add_block(block, &root).unwrap();
822
823        assert!(doc.indices.find_by_tag("important").contains(&id));
824        assert_eq!(doc.indices.find_by_label("My Block"), Some(id));
825    }
826}