Skip to main content

ucp_codegraph/legacy/
canonical.rs

1use anyhow::Result;
2use serde_json::json;
3use sha2::{Digest, Sha256};
4use std::collections::HashMap;
5use ucm_core::{
6    normalize::{canonical_json, normalize_content},
7    Block, BlockId, Document, Edge, EdgeType,
8};
9
10use crate::model::*;
11
12pub fn canonical_codegraph_json(doc: &Document) -> Result<String> {
13    let logical_by_id = logical_key_index(doc);
14
15    let mut node_entries = Vec::new();
16    for (id, block) in &doc.blocks {
17        if *id == doc.root {
18            continue;
19        }
20
21        let logical_key = logical_by_id
22            .get(id)
23            .cloned()
24            .unwrap_or_else(|| id.to_string());
25
26        let class = node_class(block).unwrap_or_else(|| "unknown".to_string());
27        let metadata = normalized_block_metadata(block);
28
29        node_entries.push(json!({
30            "logical_key": logical_key,
31            "node_class": class,
32            "semantic_role": block.metadata.semantic_role.as_ref().map(|r| r.to_string()),
33            "content_type": block.content.type_tag(),
34            "content": normalize_content(&block.content),
35            "metadata": metadata,
36        }));
37    }
38
39    node_entries.sort_by(|a, b| {
40        let ak = a
41            .get("logical_key")
42            .and_then(|v| v.as_str())
43            .unwrap_or_default();
44        let bk = b
45            .get("logical_key")
46            .and_then(|v| v.as_str())
47            .unwrap_or_default();
48        ak.cmp(bk)
49    });
50
51    let mut structure_entries = Vec::new();
52    for (parent, children) in &doc.structure {
53        let parent_key = logical_by_id
54            .get(parent)
55            .cloned()
56            .unwrap_or_else(|| parent.to_string());
57
58        let mut child_keys: Vec<String> = children
59            .iter()
60            .map(|child| {
61                logical_by_id
62                    .get(child)
63                    .cloned()
64                    .unwrap_or_else(|| child.to_string())
65            })
66            .collect();
67        child_keys.sort();
68
69        structure_entries.push(json!({
70            "parent": parent_key,
71            "children": child_keys,
72        }));
73    }
74
75    structure_entries.sort_by(|a, b| {
76        let ak = a.get("parent").and_then(|v| v.as_str()).unwrap_or_default();
77        let bk = b.get("parent").and_then(|v| v.as_str()).unwrap_or_default();
78        ak.cmp(bk)
79    });
80
81    let mut edge_entries = Vec::new();
82    for (source_id, block) in &doc.blocks {
83        let source_key = logical_by_id
84            .get(source_id)
85            .cloned()
86            .unwrap_or_else(|| source_id.to_string());
87
88        for edge in &block.edges {
89            let target_key = logical_by_id
90                .get(&edge.target)
91                .cloned()
92                .unwrap_or_else(|| edge.target.to_string());
93            edge_entries.push(json!({
94                "source": source_key,
95                "edge_type": edge.edge_type.as_str(),
96                "target": target_key,
97                "metadata": normalized_edge_metadata(edge),
98            }));
99        }
100    }
101
102    edge_entries.sort_by(|a, b| {
103        let a_source = a.get("source").and_then(|v| v.as_str()).unwrap_or_default();
104        let b_source = b.get("source").and_then(|v| v.as_str()).unwrap_or_default();
105        a_source
106            .cmp(b_source)
107            .then_with(|| {
108                a.get("edge_type")
109                    .and_then(|v| v.as_str())
110                    .unwrap_or_default()
111                    .cmp(
112                        b.get("edge_type")
113                            .and_then(|v| v.as_str())
114                            .unwrap_or_default(),
115                    )
116            })
117            .then_with(|| {
118                a.get("target")
119                    .and_then(|v| v.as_str())
120                    .unwrap_or_default()
121                    .cmp(b.get("target").and_then(|v| v.as_str()).unwrap_or_default())
122            })
123    });
124
125    let canonical = json!({
126        "profile": CODEGRAPH_PROFILE,
127        "profile_version": CODEGRAPH_PROFILE_VERSION,
128        "nodes": node_entries,
129        "structure": structure_entries,
130        "edges": edge_entries,
131        "document_metadata": normalized_document_metadata(doc),
132    });
133
134    Ok(canonical_json(&canonical))
135}
136
137pub fn canonical_fingerprint(doc: &Document) -> Result<String> {
138    let canonical = canonical_codegraph_json(doc)?;
139    let mut hasher = Sha256::new();
140    hasher.update(canonical.as_bytes());
141    let digest = hasher.finalize();
142    Ok(hex::encode(digest))
143}
144
145pub(super) fn normalize_temporal_fields(doc: &mut Document) {
146    let ts = deterministic_timestamp();
147    doc.metadata.created_at = ts;
148    doc.metadata.modified_at = ts;
149    doc.version.timestamp = ts;
150
151    for block in doc.blocks.values_mut() {
152        block.metadata.created_at = ts;
153        block.metadata.modified_at = ts;
154        block.version.timestamp = ts;
155
156        for edge in &mut block.edges {
157            edge.created_at = ts;
158        }
159    }
160}
161
162pub(super) fn deterministic_timestamp() -> chrono::DateTime<chrono::Utc> {
163    chrono::DateTime::parse_from_rfc3339("1970-01-01T00:00:00Z")
164        .unwrap()
165        .with_timezone(&chrono::Utc)
166}
167
168pub(super) fn sort_structure_children_by_logical_key(doc: &mut Document) {
169    let key_index = logical_key_index(doc);
170
171    for children in doc.structure.values_mut() {
172        children.sort_by(|a, b| {
173            let ka = key_index.get(a).cloned().unwrap_or_else(|| a.to_string());
174            let kb = key_index.get(b).cloned().unwrap_or_else(|| b.to_string());
175            ka.cmp(&kb)
176        });
177    }
178}
179
180pub(super) fn sort_edges(doc: &mut Document) {
181    let key_index = logical_key_index(doc);
182
183    for block in doc.blocks.values_mut() {
184        block.edges.sort_by(|a, b| {
185            let at = key_index
186                .get(&a.target)
187                .cloned()
188                .unwrap_or_else(|| a.target.to_string());
189            let bt = key_index
190                .get(&b.target)
191                .cloned()
192                .unwrap_or_else(|| b.target.to_string());
193
194            a.edge_type
195                .as_str()
196                .cmp(&b.edge_type.as_str())
197                .then_with(|| at.cmp(&bt))
198        });
199    }
200}
201
202pub(super) fn compute_stats(doc: &Document) -> CodeGraphStats {
203    let mut stats = CodeGraphStats::default();
204
205    for (id, block) in &doc.blocks {
206        if *id == doc.root {
207            continue;
208        }
209
210        stats.total_nodes += 1;
211
212        match node_class(block).as_deref() {
213            Some("repository") => stats.repository_nodes += 1,
214            Some("directory") => stats.directory_nodes += 1,
215            Some("file") => {
216                stats.file_nodes += 1;
217                if let Some(lang) = block
218                    .metadata
219                    .custom
220                    .get(META_LANGUAGE)
221                    .and_then(|v| v.as_str())
222                {
223                    *stats.languages.entry(lang.to_string()).or_default() += 1;
224                }
225            }
226            Some("symbol") => stats.symbol_nodes += 1,
227            _ => {}
228        }
229
230        for edge in &block.edges {
231            stats.total_edges += 1;
232            match &edge.edge_type {
233                EdgeType::References => stats.reference_edges += 1,
234                EdgeType::Custom(name) if name == "exports" => stats.export_edges += 1,
235                _ => {}
236            }
237        }
238    }
239
240    stats
241}
242
243pub(super) fn block_logical_key(block: &Block) -> Option<String> {
244    block
245        .metadata
246        .custom
247        .get(META_LOGICAL_KEY)
248        .and_then(|v| v.as_str())
249        .map(|s| s.to_string())
250}
251
252pub(super) fn block_path(block: &Block) -> Option<String> {
253    block
254        .metadata
255        .custom
256        .get(META_CODEREF)
257        .and_then(|v| v.get("path"))
258        .and_then(|v| v.as_str())
259        .map(|s| s.to_string())
260}
261
262pub(super) fn node_class(block: &Block) -> Option<String> {
263    if let Some(class) = block
264        .metadata
265        .custom
266        .get(META_NODE_CLASS)
267        .and_then(|v| v.as_str())
268    {
269        return Some(class.to_string());
270    }
271
272    if let Some(role) = &block.metadata.semantic_role {
273        if role.category == ucm_core::RoleCategory::Custom {
274            if let Some(sub) = &role.subcategory {
275                return Some(sub.to_string());
276            }
277        }
278    }
279
280    None
281}
282
283pub(super) fn validate_required_metadata(
284    class_name: &str,
285    block: &Block,
286    diagnostics: &mut Vec<CodeGraphDiagnostic>,
287) {
288    let required = match class_name {
289        "repository" => vec![META_LOGICAL_KEY, META_CODEREF],
290        "directory" => vec![META_LOGICAL_KEY, META_CODEREF],
291        "file" => vec![META_LOGICAL_KEY, META_CODEREF, META_LANGUAGE],
292        "symbol" => vec![
293            META_LOGICAL_KEY,
294            META_CODEREF,
295            META_LANGUAGE,
296            META_SYMBOL_KIND,
297            META_SYMBOL_NAME,
298            META_EXPORTED,
299        ],
300        _ => {
301            diagnostics.push(CodeGraphDiagnostic::error(
302                "CG1017",
303                format!("invalid node_class '{}'", class_name),
304            ));
305            return;
306        }
307    };
308
309    for key in required {
310        if !block.metadata.custom.contains_key(key) {
311            diagnostics.push(
312                CodeGraphDiagnostic::error(
313                    "CG1018",
314                    format!(
315                        "node class '{}' missing required metadata key '{}'",
316                        class_name, key
317                    ),
318                )
319                .with_logical_key(block_logical_key(block).unwrap_or_else(|| block.id.to_string())),
320            );
321        }
322    }
323
324    if let Some(logical_key) = block_logical_key(block) {
325        let expected_prefix = match class_name {
326            "repository" => "repository:",
327            "directory" => "directory:",
328            "file" => "file:",
329            "symbol" => "symbol:",
330            _ => "",
331        };
332
333        if !expected_prefix.is_empty() && !logical_key.starts_with(expected_prefix) {
334            diagnostics.push(
335                CodeGraphDiagnostic::error(
336                    "CG1019",
337                    format!(
338                        "logical_key '{}' must start with '{}'",
339                        logical_key, expected_prefix
340                    ),
341                )
342                .with_logical_key(logical_key),
343            );
344        }
345    }
346}
347
348pub(super) fn logical_key_index(doc: &Document) -> HashMap<BlockId, String> {
349    doc.blocks
350        .iter()
351        .map(|(id, block)| {
352            (
353                *id,
354                block_logical_key(block).unwrap_or_else(|| id.to_string()),
355            )
356        })
357        .collect()
358}
359
360pub(super) fn normalized_document_metadata(doc: &Document) -> serde_json::Value {
361    let mut custom = serde_json::Map::new();
362    let mut custom_entries: Vec<_> = doc.metadata.custom.iter().collect();
363    custom_entries.sort_by(|a, b| a.0.cmp(b.0));
364    for (k, v) in custom_entries {
365        if is_volatile_metadata_key(k) {
366            continue;
367        }
368        custom.insert(k.clone(), v.clone());
369    }
370
371    json!({
372        "title": doc.metadata.title,
373        "description": doc.metadata.description,
374        "authors": doc.metadata.authors,
375        "language": doc.metadata.language,
376        "custom": custom,
377    })
378}
379
380pub(super) fn normalized_block_metadata(block: &Block) -> serde_json::Value {
381    let mut custom = serde_json::Map::new();
382    let mut entries: Vec<_> = block.metadata.custom.iter().collect();
383    entries.sort_by(|a, b| a.0.cmp(b.0));
384    for (k, v) in entries {
385        if is_volatile_metadata_key(k) {
386            continue;
387        }
388        custom.insert(k.clone(), v.clone());
389    }
390
391    json!({
392        "label": block.metadata.label,
393        "semantic_role": block.metadata.semantic_role.as_ref().map(|r| r.to_string()),
394        "tags": block.metadata.tags,
395        "summary": block.metadata.summary,
396        "custom": custom,
397    })
398}
399
400pub(super) fn normalized_edge_metadata(edge: &Edge) -> serde_json::Value {
401    let mut custom = serde_json::Map::new();
402    let mut entries: Vec<_> = edge.metadata.custom.iter().collect();
403    entries.sort_by(|a, b| a.0.cmp(b.0));
404    for (k, v) in entries {
405        if is_volatile_metadata_key(k) {
406            continue;
407        }
408        custom.insert(k.clone(), v.clone());
409    }
410
411    json!({
412        "confidence": edge.metadata.confidence,
413        "description": edge.metadata.description,
414        "custom": custom,
415    })
416}
417
418pub(super) fn is_volatile_metadata_key(key: &str) -> bool {
419    matches!(key, "generated_at" | "runtime" | "session" | "timestamp")
420}