1use crate::content::Content;
12use crate::error::{Error, ErrorCode, Result};
13use crate::normalize::normalize_content;
14use serde::{Deserialize, Serialize};
15use sha2::{Digest, Sha256};
16use std::fmt;
17use std::str::FromStr;
18
19#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
33pub struct BlockId(#[serde(with = "hex_array")] pub [u8; 12]);
34
35impl BlockId {
36 pub fn from_bytes(bytes: [u8; 12]) -> Self {
38 Self(bytes)
39 }
40
41 pub fn as_bytes(&self) -> &[u8; 12] {
43 &self.0
44 }
45
46 pub fn root() -> Self {
48 let mut bytes = [0u8; 12];
49 bytes[0] = 0xFF; Self(bytes)
51 }
52
53 pub fn is_root(&self) -> bool {
55 self.0[0] == 0xFF && self.0[1..].iter().all(|&b| b == 0)
56 }
57
58 pub fn from_hex(s: &str) -> Result<Self> {
60 let bytes = hex::decode(s).map_err(|_| {
61 Error::new(
62 ErrorCode::E002InvalidBlockId,
63 format!("Invalid hex string: {}", s),
64 )
65 })?;
66 if bytes.len() > 12 {
67 return Err(Error::new(
68 ErrorCode::E002InvalidBlockId,
69 "Hex string too long",
70 ));
71 }
72 let mut arr = [0u8; 12];
73 let start = 12 - bytes.len();
74 arr[start..].copy_from_slice(&bytes);
75 Ok(Self(arr))
76 }
77}
78
79impl fmt::Debug for BlockId {
80 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81 write!(f, "BlockId({})", self)
82 }
83}
84
85impl fmt::Display for BlockId {
86 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87 write!(f, "blk_{}", hex::encode(self.0))
88 }
89}
90
91impl FromStr for BlockId {
92 type Err = Error;
93
94 fn from_str(s: &str) -> Result<Self> {
95 let hex_part = s
96 .strip_prefix("blk_")
97 .ok_or_else(|| Error::InvalidBlockId(format!("missing 'blk_' prefix: {}", s)))?;
98
99 if hex_part.len() != 24 {
100 return Err(Error::InvalidBlockId(format!(
101 "expected 24 hex characters, got {}",
102 hex_part.len()
103 )));
104 }
105
106 let bytes = hex::decode(hex_part)
107 .map_err(|e| Error::InvalidBlockId(format!("invalid hex: {}", e)))?;
108
109 if bytes.len() != 12 {
110 return Err(Error::InvalidBlockId(format!(
111 "expected 12 bytes, got {}",
112 bytes.len()
113 )));
114 }
115
116 let mut arr = [0u8; 12];
117 arr.copy_from_slice(&bytes);
118 Ok(BlockId(arr))
119 }
120}
121
122#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
124pub struct ContentHash(#[serde(with = "hex_array_32")] pub [u8; 32]);
125
126impl ContentHash {
127 pub fn from_bytes(bytes: [u8; 32]) -> Self {
128 Self(bytes)
129 }
130
131 pub fn as_bytes(&self) -> &[u8; 32] {
132 &self.0
133 }
134}
135
136impl fmt::Debug for ContentHash {
137 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138 write!(f, "ContentHash({})", hex::encode(&self.0[..8]))
139 }
140}
141
142impl fmt::Display for ContentHash {
143 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144 write!(f, "{}", hex::encode(self.0))
145 }
146}
147
148#[derive(Debug, Clone, Default)]
150pub struct IdGeneratorConfig {
151 pub namespace: Option<String>,
153 pub include_semantic_role: bool,
155}
156
157impl IdGeneratorConfig {
158 pub fn new() -> Self {
159 Self::default()
160 }
161
162 pub fn with_namespace(mut self, namespace: impl Into<String>) -> Self {
163 self.namespace = Some(namespace.into());
164 self
165 }
166
167 pub fn with_semantic_role(mut self, include: bool) -> Self {
168 self.include_semantic_role = include;
169 self
170 }
171}
172
173#[derive(Debug, Clone)]
175pub struct IdGenerator {
176 config: IdGeneratorConfig,
177}
178
179impl IdGenerator {
180 pub fn new(config: IdGeneratorConfig) -> Self {
181 Self { config }
182 }
183
184 pub fn with_defaults() -> Self {
185 Self::new(IdGeneratorConfig::default())
186 }
187
188 pub fn generate(&self, content: &Content, semantic_role: Option<&str>) -> BlockId {
190 generate_block_id(content, semantic_role, self.config.namespace.as_deref())
191 }
192
193 pub fn content_hash(&self, content: &Content) -> ContentHash {
195 compute_content_hash(content)
196 }
197}
198
199impl Default for IdGenerator {
200 fn default() -> Self {
201 Self::with_defaults()
202 }
203}
204
205pub fn generate_block_id(
233 content: &Content,
234 semantic_role: Option<&str>,
235 namespace: Option<&str>,
236) -> BlockId {
237 let mut hasher = Sha256::new();
238
239 if let Some(ns) = namespace {
241 hasher.update(ns.as_bytes());
242 hasher.update(b":");
243 }
244
245 hasher.update(content.type_tag().as_bytes());
247 hasher.update(b":");
248
249 if let Some(role) = semantic_role {
251 hasher.update(role.as_bytes());
252 }
253 hasher.update(b":");
254
255 let normalized = normalize_content(content);
257 hasher.update(normalized.as_bytes());
258
259 let hash = hasher.finalize();
261 let mut id_bytes = [0u8; 12];
262 id_bytes.copy_from_slice(&hash[..12]);
263
264 BlockId(id_bytes)
265}
266
267pub fn compute_content_hash(content: &Content) -> ContentHash {
269 let mut hasher = Sha256::new();
270 let normalized = normalize_content(content);
271 hasher.update(normalized.as_bytes());
272 let hash = hasher.finalize();
273 let mut hash_bytes = [0u8; 32];
274 hash_bytes.copy_from_slice(&hash);
275 ContentHash(hash_bytes)
276}
277
278mod hex_array {
280 use serde::{self, Deserialize, Deserializer, Serializer};
281
282 pub fn serialize<S>(bytes: &[u8; 12], serializer: S) -> Result<S::Ok, S::Error>
283 where
284 S: Serializer,
285 {
286 serializer.serialize_str(&hex::encode(bytes))
287 }
288
289 pub fn deserialize<'de, D>(deserializer: D) -> Result<[u8; 12], D::Error>
290 where
291 D: Deserializer<'de>,
292 {
293 let s = String::deserialize(deserializer)?;
294 let bytes = hex::decode(&s).map_err(serde::de::Error::custom)?;
295 if bytes.len() != 12 {
296 return Err(serde::de::Error::custom("expected 12 bytes"));
297 }
298 let mut arr = [0u8; 12];
299 arr.copy_from_slice(&bytes);
300 Ok(arr)
301 }
302}
303
304mod hex_array_32 {
305 use serde::{self, Deserialize, Deserializer, Serializer};
306
307 pub fn serialize<S>(bytes: &[u8; 32], serializer: S) -> Result<S::Ok, S::Error>
308 where
309 S: Serializer,
310 {
311 serializer.serialize_str(&hex::encode(bytes))
312 }
313
314 pub fn deserialize<'de, D>(deserializer: D) -> Result<[u8; 32], D::Error>
315 where
316 D: Deserializer<'de>,
317 {
318 let s = String::deserialize(deserializer)?;
319 let bytes = hex::decode(&s).map_err(serde::de::Error::custom)?;
320 if bytes.len() != 32 {
321 return Err(serde::de::Error::custom("expected 32 bytes"));
322 }
323 let mut arr = [0u8; 32];
324 arr.copy_from_slice(&bytes);
325 Ok(arr)
326 }
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332
333 #[test]
334 fn test_block_id_display() {
335 let id = BlockId::from_bytes([
336 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
337 ]);
338 assert_eq!(id.to_string(), "blk_0102030405060708090a0b0c");
339 }
340
341 #[test]
342 fn test_block_id_parse() {
343 let id_str = "blk_0102030405060708090a0b0c";
344 let id: BlockId = id_str.parse().unwrap();
345 assert_eq!(id.to_string(), id_str);
346 }
347
348 #[test]
349 fn test_block_id_parse_invalid() {
350 assert!("invalid".parse::<BlockId>().is_err());
351 assert!("blk_invalid".parse::<BlockId>().is_err());
352 assert!("blk_0102".parse::<BlockId>().is_err()); }
354
355 #[test]
356 fn test_deterministic_id_generation() {
357 let content = Content::text("Hello, world!");
358
359 let id1 = generate_block_id(&content, Some("intro"), None);
360 let id2 = generate_block_id(&content, Some("intro"), None);
361 assert_eq!(id1, id2);
362 }
363
364 #[test]
365 fn test_different_role_different_id() {
366 let content = Content::text("Hello, world!");
367
368 let id1 = generate_block_id(&content, Some("intro"), None);
369 let id2 = generate_block_id(&content, Some("conclusion"), None);
370 assert_ne!(id1, id2);
371 }
372
373 #[test]
374 fn test_namespace_isolation() {
375 let content = Content::text("Hello");
376
377 let id1 = generate_block_id(&content, None, Some("tenant-a"));
378 let id2 = generate_block_id(&content, None, Some("tenant-b"));
379 assert_ne!(id1, id2);
380 }
381
382 #[test]
383 fn test_root_block_id() {
384 let root = BlockId::root();
385 assert!(root.is_root());
386
387 let non_root = BlockId::from_bytes([0x01; 12]);
388 assert!(!non_root.is_root());
389 }
390
391 #[test]
392 fn test_content_hash() {
393 let content = Content::text("Hello");
394
395 let hash1 = compute_content_hash(&content);
396 let hash2 = compute_content_hash(&content);
397 assert_eq!(hash1, hash2);
398 }
399
400 #[test]
401 fn test_id_generator() {
402 let gen = IdGenerator::new(IdGeneratorConfig::new().with_namespace("test"));
403 let content = Content::text("Hello");
404
405 let id = gen.generate(&content, Some("intro"));
406 assert!(!id.is_root());
407 }
408}