1use anyhow::Result;
2use serde_json::json;
3use sha2::{Digest, Sha256};
4use std::collections::HashMap;
5use ucm_core::{
6 normalize::{canonical_json, normalize_content},
7 Block, BlockId, Document, Edge, EdgeType,
8};
9
10use crate::model::*;
11
12pub fn canonical_codegraph_json(doc: &Document) -> Result<String> {
13 let logical_by_id = logical_key_index(doc);
14
15 let mut node_entries = Vec::new();
16 for (id, block) in &doc.blocks {
17 if *id == doc.root {
18 continue;
19 }
20
21 let logical_key = logical_by_id
22 .get(id)
23 .cloned()
24 .unwrap_or_else(|| id.to_string());
25
26 let class = node_class(block).unwrap_or_else(|| "unknown".to_string());
27 let metadata = normalized_block_metadata(block);
28
29 node_entries.push(json!({
30 "logical_key": logical_key,
31 "node_class": class,
32 "semantic_role": block.metadata.semantic_role.as_ref().map(|r| r.to_string()),
33 "content_type": block.content.type_tag(),
34 "content": normalize_content(&block.content),
35 "metadata": metadata,
36 }));
37 }
38
39 node_entries.sort_by(|a, b| {
40 let ak = a
41 .get("logical_key")
42 .and_then(|v| v.as_str())
43 .unwrap_or_default();
44 let bk = b
45 .get("logical_key")
46 .and_then(|v| v.as_str())
47 .unwrap_or_default();
48 ak.cmp(bk)
49 });
50
51 let mut structure_entries = Vec::new();
52 for (parent, children) in &doc.structure {
53 let parent_key = logical_by_id
54 .get(parent)
55 .cloned()
56 .unwrap_or_else(|| parent.to_string());
57
58 let mut child_keys: Vec<String> = children
59 .iter()
60 .map(|child| {
61 logical_by_id
62 .get(child)
63 .cloned()
64 .unwrap_or_else(|| child.to_string())
65 })
66 .collect();
67 child_keys.sort();
68
69 structure_entries.push(json!({
70 "parent": parent_key,
71 "children": child_keys,
72 }));
73 }
74
75 structure_entries.sort_by(|a, b| {
76 let ak = a.get("parent").and_then(|v| v.as_str()).unwrap_or_default();
77 let bk = b.get("parent").and_then(|v| v.as_str()).unwrap_or_default();
78 ak.cmp(bk)
79 });
80
81 let mut edge_entries = Vec::new();
82 for (source_id, block) in &doc.blocks {
83 let source_key = logical_by_id
84 .get(source_id)
85 .cloned()
86 .unwrap_or_else(|| source_id.to_string());
87
88 for edge in &block.edges {
89 let target_key = logical_by_id
90 .get(&edge.target)
91 .cloned()
92 .unwrap_or_else(|| edge.target.to_string());
93 edge_entries.push(json!({
94 "source": source_key,
95 "edge_type": edge.edge_type.as_str(),
96 "target": target_key,
97 "metadata": normalized_edge_metadata(edge),
98 }));
99 }
100 }
101
102 edge_entries.sort_by(|a, b| {
103 let a_source = a.get("source").and_then(|v| v.as_str()).unwrap_or_default();
104 let b_source = b.get("source").and_then(|v| v.as_str()).unwrap_or_default();
105 a_source
106 .cmp(b_source)
107 .then_with(|| {
108 a.get("edge_type")
109 .and_then(|v| v.as_str())
110 .unwrap_or_default()
111 .cmp(
112 b.get("edge_type")
113 .and_then(|v| v.as_str())
114 .unwrap_or_default(),
115 )
116 })
117 .then_with(|| {
118 a.get("target")
119 .and_then(|v| v.as_str())
120 .unwrap_or_default()
121 .cmp(b.get("target").and_then(|v| v.as_str()).unwrap_or_default())
122 })
123 });
124
125 let canonical = json!({
126 "profile": CODEGRAPH_PROFILE,
127 "profile_version": CODEGRAPH_PROFILE_VERSION,
128 "nodes": node_entries,
129 "structure": structure_entries,
130 "edges": edge_entries,
131 "document_metadata": normalized_document_metadata(doc),
132 });
133
134 Ok(canonical_json(&canonical))
135}
136
137pub fn canonical_fingerprint(doc: &Document) -> Result<String> {
138 let canonical = canonical_codegraph_json(doc)?;
139 let mut hasher = Sha256::new();
140 hasher.update(canonical.as_bytes());
141 let digest = hasher.finalize();
142 Ok(hex::encode(digest))
143}
144
145pub(super) fn normalize_temporal_fields(doc: &mut Document) {
146 let ts = deterministic_timestamp();
147 doc.metadata.created_at = ts;
148 doc.metadata.modified_at = ts;
149 doc.version.timestamp = ts;
150
151 for block in doc.blocks.values_mut() {
152 block.metadata.created_at = ts;
153 block.metadata.modified_at = ts;
154 block.version.timestamp = ts;
155
156 for edge in &mut block.edges {
157 edge.created_at = ts;
158 }
159 }
160}
161
162pub(super) fn deterministic_timestamp() -> chrono::DateTime<chrono::Utc> {
163 chrono::DateTime::parse_from_rfc3339("1970-01-01T00:00:00Z")
164 .unwrap()
165 .with_timezone(&chrono::Utc)
166}
167
168pub(super) fn sort_structure_children_by_logical_key(doc: &mut Document) {
169 let key_index = logical_key_index(doc);
170
171 for children in doc.structure.values_mut() {
172 children.sort_by(|a, b| {
173 let ka = key_index.get(a).cloned().unwrap_or_else(|| a.to_string());
174 let kb = key_index.get(b).cloned().unwrap_or_else(|| b.to_string());
175 ka.cmp(&kb)
176 });
177 }
178}
179
180pub(super) fn sort_edges(doc: &mut Document) {
181 let key_index = logical_key_index(doc);
182
183 for block in doc.blocks.values_mut() {
184 block.edges.sort_by(|a, b| {
185 let at = key_index
186 .get(&a.target)
187 .cloned()
188 .unwrap_or_else(|| a.target.to_string());
189 let bt = key_index
190 .get(&b.target)
191 .cloned()
192 .unwrap_or_else(|| b.target.to_string());
193
194 a.edge_type
195 .as_str()
196 .cmp(&b.edge_type.as_str())
197 .then_with(|| at.cmp(&bt))
198 });
199 }
200}
201
202pub(super) fn compute_stats(doc: &Document) -> CodeGraphStats {
203 let mut stats = CodeGraphStats::default();
204
205 for (id, block) in &doc.blocks {
206 if *id == doc.root {
207 continue;
208 }
209
210 stats.total_nodes += 1;
211
212 match node_class(block).as_deref() {
213 Some("repository") => stats.repository_nodes += 1,
214 Some("directory") => stats.directory_nodes += 1,
215 Some("file") => {
216 stats.file_nodes += 1;
217 if let Some(lang) = block
218 .metadata
219 .custom
220 .get(META_LANGUAGE)
221 .and_then(|v| v.as_str())
222 {
223 *stats.languages.entry(lang.to_string()).or_default() += 1;
224 }
225 }
226 Some("symbol") => stats.symbol_nodes += 1,
227 _ => {}
228 }
229
230 for edge in &block.edges {
231 stats.total_edges += 1;
232 match &edge.edge_type {
233 EdgeType::References => stats.reference_edges += 1,
234 EdgeType::Custom(name) if name == "exports" => stats.export_edges += 1,
235 _ => {}
236 }
237 }
238 }
239
240 stats
241}
242
243pub(super) fn block_logical_key(block: &Block) -> Option<String> {
244 block
245 .metadata
246 .custom
247 .get(META_LOGICAL_KEY)
248 .and_then(|v| v.as_str())
249 .map(|s| s.to_string())
250}
251
252pub(super) fn block_path(block: &Block) -> Option<String> {
253 block
254 .metadata
255 .custom
256 .get(META_CODEREF)
257 .and_then(|v| v.get("path"))
258 .and_then(|v| v.as_str())
259 .map(|s| s.to_string())
260}
261
262pub(super) fn node_class(block: &Block) -> Option<String> {
263 if let Some(class) = block
264 .metadata
265 .custom
266 .get(META_NODE_CLASS)
267 .and_then(|v| v.as_str())
268 {
269 return Some(class.to_string());
270 }
271
272 if let Some(role) = &block.metadata.semantic_role {
273 if role.category == ucm_core::RoleCategory::Custom {
274 if let Some(sub) = &role.subcategory {
275 return Some(sub.to_string());
276 }
277 }
278 }
279
280 None
281}
282
283pub(super) fn validate_required_metadata(
284 class_name: &str,
285 block: &Block,
286 diagnostics: &mut Vec<CodeGraphDiagnostic>,
287) {
288 let required = match class_name {
289 "repository" => vec![META_LOGICAL_KEY, META_CODEREF],
290 "directory" => vec![META_LOGICAL_KEY, META_CODEREF],
291 "file" => vec![META_LOGICAL_KEY, META_CODEREF, META_LANGUAGE],
292 "symbol" => vec![
293 META_LOGICAL_KEY,
294 META_CODEREF,
295 META_LANGUAGE,
296 META_SYMBOL_KIND,
297 META_SYMBOL_NAME,
298 META_EXPORTED,
299 ],
300 _ => {
301 diagnostics.push(CodeGraphDiagnostic::error(
302 "CG1017",
303 format!("invalid node_class '{}'", class_name),
304 ));
305 return;
306 }
307 };
308
309 for key in required {
310 if !block.metadata.custom.contains_key(key) {
311 diagnostics.push(
312 CodeGraphDiagnostic::error(
313 "CG1018",
314 format!(
315 "node class '{}' missing required metadata key '{}'",
316 class_name, key
317 ),
318 )
319 .with_logical_key(block_logical_key(block).unwrap_or_else(|| block.id.to_string())),
320 );
321 }
322 }
323
324 if let Some(logical_key) = block_logical_key(block) {
325 let expected_prefix = match class_name {
326 "repository" => "repository:",
327 "directory" => "directory:",
328 "file" => "file:",
329 "symbol" => "symbol:",
330 _ => "",
331 };
332
333 if !expected_prefix.is_empty() && !logical_key.starts_with(expected_prefix) {
334 diagnostics.push(
335 CodeGraphDiagnostic::error(
336 "CG1019",
337 format!(
338 "logical_key '{}' must start with '{}'",
339 logical_key, expected_prefix
340 ),
341 )
342 .with_logical_key(logical_key),
343 );
344 }
345 }
346}
347
348pub(super) fn logical_key_index(doc: &Document) -> HashMap<BlockId, String> {
349 doc.blocks
350 .iter()
351 .map(|(id, block)| {
352 (
353 *id,
354 block_logical_key(block).unwrap_or_else(|| id.to_string()),
355 )
356 })
357 .collect()
358}
359
360pub(super) fn normalized_document_metadata(doc: &Document) -> serde_json::Value {
361 let mut custom = serde_json::Map::new();
362 let mut custom_entries: Vec<_> = doc.metadata.custom.iter().collect();
363 custom_entries.sort_by(|a, b| a.0.cmp(b.0));
364 for (k, v) in custom_entries {
365 if is_volatile_metadata_key(k) {
366 continue;
367 }
368 custom.insert(k.clone(), v.clone());
369 }
370
371 json!({
372 "title": doc.metadata.title,
373 "description": doc.metadata.description,
374 "authors": doc.metadata.authors,
375 "language": doc.metadata.language,
376 "custom": custom,
377 })
378}
379
380pub(super) fn normalized_block_metadata(block: &Block) -> serde_json::Value {
381 let mut custom = serde_json::Map::new();
382 let mut entries: Vec<_> = block.metadata.custom.iter().collect();
383 entries.sort_by(|a, b| a.0.cmp(b.0));
384 for (k, v) in entries {
385 if is_volatile_metadata_key(k) {
386 continue;
387 }
388 custom.insert(k.clone(), v.clone());
389 }
390
391 json!({
392 "label": block.metadata.label,
393 "semantic_role": block.metadata.semantic_role.as_ref().map(|r| r.to_string()),
394 "tags": block.metadata.tags,
395 "summary": block.metadata.summary,
396 "custom": custom,
397 })
398}
399
400pub(super) fn normalized_edge_metadata(edge: &Edge) -> serde_json::Value {
401 let mut custom = serde_json::Map::new();
402 let mut entries: Vec<_> = edge.metadata.custom.iter().collect();
403 entries.sort_by(|a, b| a.0.cmp(b.0));
404 for (k, v) in entries {
405 if is_volatile_metadata_key(k) {
406 continue;
407 }
408 custom.insert(k.clone(), v.clone());
409 }
410
411 json!({
412 "confidence": edge.metadata.confidence,
413 "description": edge.metadata.description,
414 "custom": custom,
415 })
416}
417
418pub(super) fn is_volatile_metadata_key(key: &str) -> bool {
419 matches!(key, "generated_at" | "runtime" | "session" | "timestamp")
420}