cml_rs/lib.rs
1//! # CML (Content Markup Language)
2//!
3//! CML is a semantic markup language designed for long-term interpretable content storage.
4//! It separates content from presentation and enables efficient vector-based semantic search.
5//!
6//! ## CML Structure
7//!
8//! ```xml
9//! <cml profile="core" version="0.2" encoding="utf-8">
10//! <header>
11//! <title>Document Title</title>
12//! <author role="author">Author Name</author>
13//! <date type="created" when="2025-12-22"/>
14//! </header>
15//! <body>
16//! <section id="intro">
17//! <heading size="1">Introduction</heading>
18//! <paragraph>Content here with <em>inline elements</em>.</paragraph>
19//! </section>
20//! </body>
21//! <footer>
22//! </footer>
23//! </cml>
24//! ```
25//!
26//! ## Features
27//!
28//! - **Profile-based schemas**: Domain-specific document structures (law, code, edu)
29//! - **Pathless references**: `namespace:identifier` format (e.g., `president:47`)
30//! - **Active documents**: Currency conversion, date localization (future)
31//! - **Validation**: ID uniqueness, reference integrity, structural correctness
32//!
33//! ## Usage
34//!
35//! ```rust
36//! use cml_rs::{CmlParser, CmlGenerator, CmlValidator};
37//!
38//! // Parse CML
39//! let xml = r#"<cml profile="core" version="0.2" encoding="utf-8">
40//! <header><title>Test</title></header>
41//! <body><paragraph>Hello!</paragraph></body>
42//! <footer></footer>
43//! </cml>"#;
44//!
45//! let doc = CmlParser::parse_str(xml)?;
46//!
47//! // Validate
48//! CmlValidator::validate(&doc)?;
49//!
50//! // Generate
51//! let xml = CmlGenerator::generate(&doc)?;
52//! # Ok::<(), cml_rs::CmlError>(())
53//! ```
54
55// Core CML modules
56pub mod generator;
57pub mod parser;
58pub mod profile;
59pub mod types;
60pub mod validator;
61
62// Utilities
63// TODO: Revisit chunker and embedding_store after the semantic compiler move.
64pub mod chunker;
65pub mod embedding_store;
66pub mod id_generator;
67
68// Re-export primary types and functions
69pub use generator::CmlGenerator;
70pub use parser::CmlParser;
71pub use profile::{Profile, ProfileRegistry, ResolvedProfile};
72pub use types::*;
73pub use validator::CmlValidator;
74
75// Re-export utilities
76// TODO: Revisit chunker and embedding_store after the semantic compiler move.
77pub use chunker::{Chunk, CmlChunker, CHUNK_OVERLAP_TOKENS, MAX_CHUNK_TOKENS};
78pub use embedding_store::{ChunkMatch, EmbeddingStore, MatchType, EMBEDDING_DIM};
79pub use id_generator::{BookstackIdGenerator, CodeIdGenerator, ElementId, LegalIdGenerator};
80
81/// Result type for CML operations
82pub type Result<T> = std::result::Result<T, CmlError>;
83
84/// Errors that can occur during CML processing
85#[derive(Debug, thiserror::Error)]
86pub enum CmlError {
87 #[error("XML parsing error: {0}")]
88 XmlError(#[from] quick_xml::Error),
89
90 #[error("XML attribute error: {0}")]
91 AttrError(#[from] quick_xml::events::attributes::AttrError),
92
93 #[error("Invalid document structure: {0}")]
94 InvalidStructure(String),
95
96 #[error("Missing required attribute: {0}")]
97 MissingAttribute(String),
98
99 #[error("Invalid attribute value: {0}")]
100 InvalidAttribute(String),
101
102 #[error("Schema validation failed: {0}")]
103 ValidationError(String),
104
105 #[error("Duplicate ID: {0}")]
106 DuplicateId(String),
107
108 #[error("Reference not found: {0}")]
109 ReferenceNotFound(String),
110
111 #[error("IO error: {0}")]
112 IoError(#[from] std::io::Error),
113
114 #[error("UTF-8 error: {0}")]
115 Utf8Error(#[from] std::string::FromUtf8Error),
116}