Skip to main content

cml_rs/
id_generator.rs

1//! Hybrid ID generation system for CML elements
2//!
3//! Generates stable, hierarchical IDs based on document structure position,
4//! with content hash validation for change detection.
5//!
6//! # ID Format Examples
7//!
8//! ## code:api Profile
9//! - Module: `std.vec`
10//! - Struct: `std.vec.Vec`
11//! - Method: `std.vec.Vec.push`
12//! - Function: `std.vec.from_elem`
13//!
14//! ## legal:constitution Profile
15//! - Article: `us.constitution.art.1`
16//! - Section: `us.constitution.art.1.sec.8`
17//! - Clause: `us.constitution.art.1.sec.8.cl.3`
18//! - Amendment: `us.constitution.amendment.14`
19//!
20//! ## bookstack:wiki Profile
21//! - Book: `book-rust-guide`
22//! - Chapter: `book-rust-guide.chapter-collections`
23//! - Page: `book-rust-guide.chapter-collections.page-vectors`
24
25use sha2::{Digest, Sha256};
26use std::fmt;
27
28/// Element ID with content hash validation
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct ElementId {
31    /// Position-based hierarchical ID
32    pub id: String,
33
34    /// SHA-256 hash of element content (hex string)
35    pub content_hash: String,
36}
37
38impl ElementId {
39    /// Create a new element ID with content hash
40    pub fn new(id: impl Into<String>, content: &str) -> Self {
41        let id = id.into();
42        let content_hash = Self::hash_content(content);
43        Self { id, content_hash }
44    }
45
46    /// Create an element ID without computing hash (use for parent references)
47    pub fn from_id(id: impl Into<String>) -> Self {
48        Self {
49            id: id.into(),
50            content_hash: String::new(),
51        }
52    }
53
54    /// Compute SHA-256 hash of content
55    fn hash_content(content: &str) -> String {
56        let mut hasher = Sha256::new();
57        hasher.update(content.as_bytes());
58        format!("sha256:{:x}", hasher.finalize())
59    }
60
61    /// Verify that content matches the stored hash
62    pub fn verify(&self, content: &str) -> bool {
63        if self.content_hash.is_empty() {
64            return true; // No hash to verify
65        }
66        Self::hash_content(content) == self.content_hash
67    }
68
69    /// Get just the ID string
70    pub fn as_str(&self) -> &str {
71        &self.id
72    }
73}
74
75impl fmt::Display for ElementId {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        write!(f, "{}", self.id)
78    }
79}
80
81impl From<String> for ElementId {
82    fn from(id: String) -> Self {
83        Self::from_id(id)
84    }
85}
86
87impl From<&str> for ElementId {
88    fn from(id: &str) -> Self {
89        Self::from_id(id)
90    }
91}
92
93/// ID generator for code:api profile
94pub struct CodeIdGenerator {
95    namespace: String,
96}
97
98impl CodeIdGenerator {
99    /// Create a new code ID generator
100    pub fn new(namespace: impl Into<String>) -> Self {
101        Self {
102            namespace: namespace.into(),
103        }
104    }
105
106    /// Generate module ID: `namespace.module`
107    pub fn module_id(&self, module_name: &str, docs: &str) -> ElementId {
108        let id = format!("{}.{}", self.namespace, module_name);
109        ElementId::new(id, docs)
110    }
111
112    /// Generate struct ID: `namespace.module.Struct`
113    pub fn struct_id(&self, module_name: &str, struct_name: &str, docs: &str) -> ElementId {
114        let id = format!("{}.{}.{}", self.namespace, module_name, struct_name);
115        ElementId::new(id, docs)
116    }
117
118    /// Generate method ID: `namespace.module.Struct.method`
119    pub fn method_id(
120        &self,
121        module_name: &str,
122        struct_name: &str,
123        method_name: &str,
124        signature: &str,
125        docs: &str,
126    ) -> ElementId {
127        let id = format!(
128            "{}.{}.{}.{}",
129            self.namespace, module_name, struct_name, method_name
130        );
131        // Hash includes signature for overload detection
132        let content = format!("{}\n{}", signature, docs);
133        ElementId::new(id, &content)
134    }
135
136    /// Generate function ID: `namespace.module.function`
137    pub fn function_id(&self, module_name: &str, function_name: &str, docs: &str) -> ElementId {
138        let id = format!("{}.{}.{}", self.namespace, module_name, function_name);
139        ElementId::new(id, docs)
140    }
141
142    /// Generate enum ID: `namespace.module.Enum`
143    pub fn enum_id(&self, module_name: &str, enum_name: &str, docs: &str) -> ElementId {
144        let id = format!("{}.{}.{}", self.namespace, module_name, enum_name);
145        ElementId::new(id, docs)
146    }
147
148    /// Generate trait ID: `namespace.module.Trait`
149    pub fn trait_id(&self, module_name: &str, trait_name: &str, docs: &str) -> ElementId {
150        let id = format!("{}.{}.{}", self.namespace, module_name, trait_name);
151        ElementId::new(id, docs)
152    }
153}
154
155/// ID generator for legal:constitution profile
156pub struct LegalIdGenerator {
157    document_id: String,
158}
159
160impl LegalIdGenerator {
161    /// Create a new legal ID generator
162    pub fn new(document_id: impl Into<String>) -> Self {
163        Self {
164            document_id: document_id.into(),
165        }
166    }
167
168    /// Generate article ID: `document.art.{num}`
169    pub fn article_id(&self, article_num: &str, content: &str) -> ElementId {
170        let id = format!(
171            "{}.art.{}",
172            self.document_id,
173            Self::normalize_num(article_num)
174        );
175        ElementId::new(id, content)
176    }
177
178    /// Generate section ID: `document.art.{num}.sec.{num}`
179    pub fn section_id(&self, article_num: &str, section_num: &str, content: &str) -> ElementId {
180        let id = format!(
181            "{}.art.{}.sec.{}",
182            self.document_id,
183            Self::normalize_num(article_num),
184            Self::normalize_num(section_num)
185        );
186        ElementId::new(id, content)
187    }
188
189    /// Generate clause ID: `document.art.{num}.sec.{num}.cl.{num}`
190    pub fn clause_id(
191        &self,
192        article_num: &str,
193        section_num: &str,
194        clause_num: &str,
195        content: &str,
196    ) -> ElementId {
197        let id = format!(
198            "{}.art.{}.sec.{}.cl.{}",
199            self.document_id,
200            Self::normalize_num(article_num),
201            Self::normalize_num(section_num),
202            Self::normalize_num(clause_num)
203        );
204        ElementId::new(id, content)
205    }
206
207    /// Generate paragraph ID: `document.art.{num}.sec.{num}.cl.{num}.para.{letter}`
208    pub fn paragraph_id(
209        &self,
210        article_num: &str,
211        section_num: &str,
212        clause_num: &str,
213        para_num: &str,
214        content: &str,
215    ) -> ElementId {
216        let id = format!(
217            "{}.art.{}.sec.{}.cl.{}.para.{}",
218            self.document_id,
219            Self::normalize_num(article_num),
220            Self::normalize_num(section_num),
221            Self::normalize_num(clause_num),
222            Self::normalize_num(para_num)
223        );
224        ElementId::new(id, content)
225    }
226
227    /// Generate amendment ID: `document.amendment.{num}`
228    pub fn amendment_id(&self, amendment_num: &str, content: &str) -> ElementId {
229        let id = format!(
230            "{}.amendment.{}",
231            self.document_id,
232            Self::normalize_num(amendment_num)
233        );
234        ElementId::new(id, content)
235    }
236
237    /// Normalize numbering (Roman numerals, letters, etc. to lowercase)
238    fn normalize_num(num: &str) -> String {
239        num.trim().to_lowercase().replace(' ', "-")
240    }
241}
242
243/// ID generator for bookstack:wiki profile
244pub struct BookstackIdGenerator;
245
246impl BookstackIdGenerator {
247    /// Create a new bookstack ID generator
248    pub fn new() -> Self {
249        Self
250    }
251
252    /// Generate book ID: `book-{slug}`
253    pub fn book_id(title: &str, description: &str) -> ElementId {
254        let slug = Self::slugify(title);
255        let id = format!("book-{}", slug);
256        ElementId::new(id, &format!("{}\n{}", title, description))
257    }
258
259    /// Generate chapter ID: `book-{slug}.chapter-{slug}`
260    pub fn chapter_id(book_slug: &str, chapter_title: &str, content: &str) -> ElementId {
261        let chapter_slug = Self::slugify(chapter_title);
262        let id = format!("{}.chapter-{}", book_slug, chapter_slug);
263        ElementId::new(id, content)
264    }
265
266    /// Generate page ID: `book-{slug}.chapter-{slug}.page-{slug}`
267    pub fn page_id(
268        book_slug: &str,
269        chapter_slug: &str,
270        page_title: &str,
271        content: &str,
272    ) -> ElementId {
273        let page_slug = Self::slugify(page_title);
274        let id = format!("{}.{}.page-{}", book_slug, chapter_slug, page_slug);
275        ElementId::new(id, content)
276    }
277
278    /// Generate shelf ID: `shelf-{slug}`
279    pub fn shelf_id(name: &str, description: &str) -> ElementId {
280        let slug = Self::slugify(name);
281        let id = format!("shelf-{}", slug);
282        ElementId::new(id, &format!("{}\n{}", name, description))
283    }
284
285    /// Convert title to URL-safe slug
286    fn slugify(text: &str) -> String {
287        let slug = text
288            .to_lowercase()
289            .chars()
290            .map(|c| {
291                if c.is_alphanumeric() {
292                    c
293                } else if c.is_whitespace() || c == '-' || c == '_' {
294                    '-'
295                } else {
296                    ' ' // Will be filtered out
297                }
298            })
299            .collect::<String>()
300            .split_whitespace()
301            .collect::<Vec<_>>()
302            .join("-");
303
304        // Remove consecutive dashes
305        let mut result = String::new();
306        let mut last_was_dash = false;
307        for c in slug.chars() {
308            if c == '-' {
309                if !last_was_dash {
310                    result.push(c);
311                    last_was_dash = true;
312                }
313            } else {
314                result.push(c);
315                last_was_dash = false;
316            }
317        }
318        result.trim_matches('-').to_string()
319    }
320}
321
322impl Default for BookstackIdGenerator {
323    fn default() -> Self {
324        Self::new()
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn test_element_id_creation() {
334        let id = ElementId::new("std.vec.Vec.push", "Pushes an item onto the vector");
335        assert_eq!(id.id, "std.vec.Vec.push");
336        assert!(id.content_hash.starts_with("sha256:"));
337        assert!(id.verify("Pushes an item onto the vector"));
338        assert!(!id.verify("Different content"));
339    }
340
341    #[test]
342    fn test_code_id_generator() {
343        let gen = CodeIdGenerator::new("std");
344
345        let module_id = gen.module_id("vec", "Vector module documentation");
346        assert_eq!(module_id.id, "std.vec");
347
348        let struct_id = gen.struct_id("vec", "Vec", "A contiguous growable array");
349        assert_eq!(struct_id.id, "std.vec.Vec");
350
351        let method_id = gen.method_id(
352            "vec",
353            "Vec",
354            "push",
355            "pub fn push(&mut self, value: T)",
356            "Pushes an item",
357        );
358        assert_eq!(method_id.id, "std.vec.Vec.push");
359    }
360
361    #[test]
362    fn test_legal_id_generator() {
363        let gen = LegalIdGenerator::new("us.constitution");
364
365        let article_id = gen.article_id("I", "Article I content");
366        assert_eq!(article_id.id, "us.constitution.art.i");
367
368        let section_id = gen.section_id("I", "8", "Section 8 content");
369        assert_eq!(section_id.id, "us.constitution.art.i.sec.8");
370
371        let clause_id = gen.clause_id("I", "8", "3", "Commerce Clause");
372        assert_eq!(clause_id.id, "us.constitution.art.i.sec.8.cl.3");
373
374        let amendment_id = gen.amendment_id("XIV", "Amendment XIV content");
375        assert_eq!(amendment_id.id, "us.constitution.amendment.xiv");
376    }
377
378    #[test]
379    fn test_bookstack_id_generator() {
380        let book_id =
381            BookstackIdGenerator::book_id("Rust Programming Guide", "A comprehensive guide");
382        assert_eq!(book_id.id, "book-rust-programming-guide");
383
384        let chapter_id = BookstackIdGenerator::chapter_id(
385            "book-rust-guide",
386            "Getting Started",
387            "Chapter content",
388        );
389        assert_eq!(chapter_id.id, "book-rust-guide.chapter-getting-started");
390
391        let page_id = BookstackIdGenerator::page_id(
392            "book-rust-guide",
393            "chapter-getting-started",
394            "Installation & Setup",
395            "Page content",
396        );
397        assert_eq!(
398            page_id.id,
399            "book-rust-guide.chapter-getting-started.page-installation-setup"
400        );
401    }
402
403    #[test]
404    fn test_slugify() {
405        assert_eq!(BookstackIdGenerator::slugify("Hello World"), "hello-world");
406        assert_eq!(
407            BookstackIdGenerator::slugify("C++ Programming"),
408            "c-programming"
409        );
410        assert_eq!(
411            BookstackIdGenerator::slugify("Multiple   Spaces"),
412            "multiple-spaces"
413        );
414        assert_eq!(BookstackIdGenerator::slugify("Trim-Dashes-"), "trim-dashes");
415    }
416
417    #[test]
418    fn test_content_hash_deterministic() {
419        let id1 = ElementId::new("test", "Same content");
420        let id2 = ElementId::new("test", "Same content");
421        assert_eq!(id1.content_hash, id2.content_hash);
422
423        let id3 = ElementId::new("test", "Different content");
424        assert_ne!(id1.content_hash, id3.content_hash);
425    }
426}