cml_rs/lib.rs
1//! # CML (Content Markup Language)
2//!
3//! CML is a semantic markup language designed for long-term interpretable content storage.
4//! It separates content from presentation and enables efficient vector-based semantic search.
5//!
6//! ## CML Structure
7//!
8//! ```xml
9//! <cml profile="core" version="0.2" encoding="utf-8">
10//! <header>
11//! <title>Document Title</title>
12//! <author role="author">Author Name</author>
13//! <date type="created" when="2025-12-22"/>
14//! </header>
15//! <body>
16//! <section id="intro">
17//! <heading size="1">Introduction</heading>
18//! <paragraph>Content here with <em>inline elements</em>.</paragraph>
19//! </section>
20//! </body>
21//! <footer>
22//! </footer>
23//! </cml>
24//! ```
25//!
26//! ## Features
27//!
28//! - **Profile-based schemas**: Domain-specific document structures (law, code, edu)
29//! - **Pathless references**: `namespace:identifier` format (e.g., `president:47`)
30//! - **Active documents**: Currency conversion, date localization (future)
31//! - **Validation**: ID uniqueness, reference integrity, structural correctness
32//!
33//! ## Usage
34//!
35//! ```rust
36//! use cml::{CmlParser, CmlGenerator, CmlValidator};
37//!
38//! // Parse CML
39//! let xml = r#"<cml profile="core" version="0.2" encoding="utf-8">
40//! <header><title>Test</title></header>
41//! <body><paragraph>Hello!</paragraph></body>
42//! <footer></footer>
43//! </cml>"#;
44//!
45//! let doc = CmlParser::parse_str(xml)?;
46//!
47//! // Validate
48//! CmlValidator::validate(&doc)?;
49//!
50//! // Generate
51//! let xml = CmlGenerator::generate(&doc)?;
52//! # Ok::<(), cml::CmlError>(())
53//! ```
54
55// Core CML modules
56pub mod generator;
57pub mod parser;
58pub mod types;
59pub mod validator;
60
61// Utilities
62// TODO: Revisit chunker and embedding_store after the semantic compiler move.
63pub mod chunker;
64pub mod embedding_store;
65pub mod id_generator;
66
67// Re-export primary types and functions
68pub use generator::CmlGenerator;
69pub use parser::CmlParser;
70pub use types::*;
71pub use validator::CmlValidator;
72
73// Re-export utilities
74// TODO: Revisit chunker and embedding_store after the semantic compiler move.
75pub use chunker::{Chunk, CmlChunker, CHUNK_OVERLAP_TOKENS, MAX_CHUNK_TOKENS};
76pub use embedding_store::{ChunkMatch, EmbeddingStore, MatchType, EMBEDDING_DIM};
77pub use id_generator::{BookstackIdGenerator, CodeIdGenerator, ElementId, LegalIdGenerator};
78
79/// Result type for CML operations
80pub type Result<T> = std::result::Result<T, CmlError>;
81
82/// Errors that can occur during CML processing
83#[derive(Debug, thiserror::Error)]
84pub enum CmlError {
85 #[error("XML parsing error: {0}")]
86 XmlError(#[from] quick_xml::Error),
87
88 #[error("XML attribute error: {0}")]
89 AttrError(#[from] quick_xml::events::attributes::AttrError),
90
91 #[error("Invalid document structure: {0}")]
92 InvalidStructure(String),
93
94 #[error("Missing required attribute: {0}")]
95 MissingAttribute(String),
96
97 #[error("Invalid attribute value: {0}")]
98 InvalidAttribute(String),
99
100 #[error("Schema validation failed: {0}")]
101 ValidationError(String),
102
103 #[error("Duplicate ID: {0}")]
104 DuplicateId(String),
105
106 #[error("Reference not found: {0}")]
107 ReferenceNotFound(String),
108
109 #[error("IO error: {0}")]
110 IoError(#[from] std::io::Error),
111
112 #[error("UTF-8 error: {0}")]
113 Utf8Error(#[from] std::string::FromUtf8Error),
114}