Skip to main content

kanonak_codec/
lib.rs

1//! kanonak-codec — the generic, ontology-independent codec runtime (Rust port).
2//!
3//! Given a `CodecSchema` (the per-package metadata a generated SDK embeds) and a
4//! set of typed nodes, it builds the canonical input model and content-addresses
5//! it via `kanonak-canonical` (the same content-form the Python/TypeScript
6//! references and the `kanonak hash` CLI produce). It also (de)serializes the
7//! normalized-JSON wire form. Self-contained: carriers come from the schema's
8//! datatype URIs, and the resolved foundation URIs are embedded by the generator,
9//! so hashing needs no runtime ontology resolution.
10//!
11//! A node is a plain JSON object (`serde_json::Map<String, serde_json::Value>`) —
12//! the `$`-envelope plus alias-collapsed local-name fields. A generated typed
13//! model serializes to one. Note: `serde_json::Value` (the node field model) is
14//! distinct from `kanonak_canonical::Value` (the canonical-input value enum).
15
16use kanonak_canonical::{
17    canonical_form as canonical_form_pkg, canonical_hash as canonical_hash_pkg, carrier_of,
18    CanonError, Package, Statement, Subject, Value,
19};
20use serde_json::{Map, Value as Json};
21
22/// The five `$`-envelope keys, which never become statements/predicates.
23const ENVELOPE_KEYS: [&str; 5] = ["$type", "$id", "$contentHash", "$version", "$extra"];
24
25/// A node is a JSON object.
26pub type Node = Map<String, Json>;
27
28/// Errors raised by the codec runtime. Fails loudly — no fallbacks.
29#[derive(Debug)]
30pub enum CodecError {
31    /// A node, schema, or package context was malformed.
32    Malformed(String),
33    /// The underlying canonical library rejected a lexical/value.
34    Canon(CanonError),
35}
36
37impl std::fmt::Display for CodecError {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            CodecError::Malformed(m) => write!(f, "{}", m),
41            CodecError::Canon(e) => write!(f, "{}", e.0),
42        }
43    }
44}
45
46impl std::error::Error for CodecError {}
47
48impl From<CanonError> for CodecError {
49    fn from(e: CanonError) -> Self {
50        CodecError::Canon(e)
51    }
52}
53
54fn err<T>(msg: impl Into<String>) -> Result<T, CodecError> {
55    Err(CodecError::Malformed(msg.into()))
56}
57
58/// The raw lexical token of a scalar — the input the canonical form normalizes.
59/// bool -> "true"/"false"; string -> the string; number -> its plain decimal
60/// string (serde_json's `Number::to_string()` gives "5" / "1.5", never
61/// scientific notation). The canonical crate re-normalizes from there.
62fn lexical(value: &Json) -> String {
63    match value {
64        Json::Bool(b) => {
65            if *b {
66                "true".to_string()
67            } else {
68                "false".to_string()
69            }
70        }
71        Json::String(s) => s.clone(),
72        Json::Number(n) => n.to_string(),
73        other => other.to_string(),
74    }
75}
76
77/// Build a single canonical `Value` for one (non-list) field datum, per its
78/// schema prop.
79fn build_value(prop: &Json, raw: &Json) -> Result<Value, CodecError> {
80    let kind = prop
81        .get("kind")
82        .and_then(|k| k.as_str())
83        .ok_or_else(|| CodecError::Malformed("schema prop is missing 'kind'".into()))?;
84    if kind == "object" {
85        if let Some(reference) = raw.get("$ref").and_then(|r| r.as_str()) {
86            return Ok(Value::Reference(reference.to_string()));
87        }
88        return err(
89            "Embedded object values are not yet supported by the codec runtime; \
90             pass a reference ({\"$ref\": ...}).",
91        );
92    }
93    let datatype = prop
94        .get("datatype")
95        .and_then(|d| d.as_str())
96        .ok_or_else(|| CodecError::Malformed("datatype prop is missing 'datatype'".into()))?;
97    match carrier_of(datatype) {
98        None => Ok(Value::Raw(lexical(raw))),
99        Some(carrier) => Ok(Value::Typed {
100            carrier,
101            lexical: lexical(raw),
102        }),
103    }
104}
105
106/// The statements for one node: the rdf:type triple, then each modeled/raw field
107/// (lists collapse to a `Value::List`), then each `$extra` entry.
108fn statements(node: &Node, schema: &Json) -> Result<Vec<Statement>, CodecError> {
109    let type_uri = node
110        .get("$type")
111        .and_then(|t| t.as_str())
112        .filter(|s| !s.is_empty())
113        .ok_or_else(|| CodecError::Malformed("node is missing $type".into()))?;
114
115    let classes = schema
116        .get("classes")
117        .ok_or_else(|| CodecError::Malformed("schema is missing 'classes'".into()))?;
118    let cls = classes
119        .get(type_uri)
120        .ok_or_else(|| CodecError::Malformed(format!("no schema for type {}", type_uri)))?;
121    let props = cls
122        .get("props")
123        .ok_or_else(|| CodecError::Malformed(format!("class {} is missing 'props'", type_uri)))?;
124
125    let type_predicate = schema
126        .get("typePredicate")
127        .and_then(|p| p.as_str())
128        .ok_or_else(|| CodecError::Malformed("schema is missing 'typePredicate'".into()))?;
129
130    let mut out: Vec<Statement> = vec![Statement {
131        predicate: type_predicate.to_string(),
132        value: Value::Reference(type_uri.to_string()),
133    }];
134
135    for (key, raw) in node.iter() {
136        if ENVELOPE_KEYS.contains(&key.as_str()) || raw.is_null() {
137            continue;
138        }
139        match props.get(key) {
140            None => out.push(Statement {
141                predicate: key.clone(),
142                value: Value::Raw(lexical(raw)),
143            }),
144            Some(prop) => {
145                let predicate = prop
146                    .get("predicate")
147                    .and_then(|p| p.as_str())
148                    .ok_or_else(|| {
149                        CodecError::Malformed(format!("prop {} is missing 'predicate'", key))
150                    })?;
151                let value = match raw.as_array() {
152                    Some(items) => {
153                        let mut list = Vec::with_capacity(items.len());
154                        for item in items {
155                            list.push(build_value(prop, item)?);
156                        }
157                        Value::List(list)
158                    }
159                    None => build_value(prop, raw)?,
160                };
161                out.push(Statement {
162                    predicate: predicate.to_string(),
163                    value,
164                });
165            }
166        }
167    }
168
169    if let Some(extra) = node.get("$extra") {
170        let extra = extra.as_object().ok_or_else(|| {
171            CodecError::Malformed("$extra must be an object".into())
172        })?;
173        for (predicate, raw) in extra.iter() {
174            if raw.is_null() {
175                continue;
176            }
177            out.push(Statement {
178                predicate: predicate.clone(),
179                value: Value::Raw(lexical(raw)),
180            });
181        }
182    }
183    Ok(out)
184}
185
186/// Build the canonical input model: a subject per node + the synthesized
187/// package-wrapper subject (raw label + `Package` type), exactly the subject set
188/// `kanonak hash` produces for the equivalent authored package.
189pub fn build_package(nodes: &[Node], schema: &Json, pkg: &Json) -> Result<Package, CodecError> {
190    let mut subjects: Vec<Subject> = Vec::with_capacity(nodes.len() + 1);
191    for node in nodes {
192        let id = node
193            .get("$id")
194            .and_then(|i| i.as_str())
195            .filter(|s| !s.is_empty())
196            .ok_or_else(|| CodecError::Malformed("node is missing $id".into()))?;
197        subjects.push(Subject {
198            uri: id.to_string(),
199            statements: statements(node, schema)?,
200        });
201    }
202
203    let publisher = pkg
204        .get("publisher")
205        .and_then(|p| p.as_str())
206        .ok_or_else(|| CodecError::Malformed("pkg is missing 'publisher'".into()))?;
207    let package_name = pkg
208        .get("packageName")
209        .and_then(|p| p.as_str())
210        .ok_or_else(|| CodecError::Malformed("pkg is missing 'packageName'".into()))?;
211    let version = pkg
212        .get("version")
213        .and_then(|p| p.as_str())
214        .ok_or_else(|| CodecError::Malformed("pkg is missing 'version'".into()))?;
215
216    let pkg_uri = format!(
217        "{}/{}@{}/{}",
218        publisher, package_name, version, package_name
219    );
220
221    let type_predicate = schema
222        .get("typePredicate")
223        .and_then(|p| p.as_str())
224        .ok_or_else(|| CodecError::Malformed("schema is missing 'typePredicate'".into()))?;
225    let label_predicate = schema
226        .get("labelPredicate")
227        .and_then(|p| p.as_str())
228        .ok_or_else(|| CodecError::Malformed("schema is missing 'labelPredicate'".into()))?;
229    let package_type_uri = schema
230        .get("packageTypeUri")
231        .and_then(|p| p.as_str())
232        .ok_or_else(|| CodecError::Malformed("schema is missing 'packageTypeUri'".into()))?;
233
234    let mut pkg_statements: Vec<Statement> = Vec::new();
235    if let Some(label) = pkg.get("label") {
236        if !label.is_null() {
237            let label = label
238                .as_str()
239                .ok_or_else(|| CodecError::Malformed("pkg label must be a string".into()))?;
240            pkg_statements.push(Statement {
241                predicate: label_predicate.to_string(),
242                value: Value::Raw(label.to_string()),
243            });
244        }
245    }
246    pkg_statements.push(Statement {
247        predicate: type_predicate.to_string(),
248        value: Value::Reference(package_type_uri.to_string()),
249    });
250    subjects.push(Subject {
251        uri: pkg_uri,
252        statements: pkg_statements,
253    });
254
255    Ok(Package { subjects })
256}
257
258/// The canonical form (the `{subjects:[...]}` JSON) of a package from nodes.
259pub fn canonical_form(nodes: &[Node], schema: &Json, pkg: &Json) -> Result<String, CodecError> {
260    Ok(canonical_form_pkg(&build_package(nodes, schema, pkg)?)?)
261}
262
263/// The `sha256:` content hash of a package from nodes — matches `kanonak hash`.
264pub fn content_hash(nodes: &[Node], schema: &Json, pkg: &Json) -> Result<String, CodecError> {
265    Ok(canonical_hash_pkg(&build_package(nodes, schema, pkg)?)?)
266}
267
268/// Serialize a typed node to its normalized-JSON wire form. `$extra` entries ride
269/// as sibling fields after the modeled ones; a modeled field wins a name
270/// collision (`[JsonExtensionData]` semantics). No `$extra` key on the wire.
271pub fn serialize(node: &Node) -> Node {
272    let mut out = Map::new();
273    for (key, value) in node.iter() {
274        if key == "$extra" || value.is_null() {
275            continue;
276        }
277        out.insert(key.clone(), value.clone());
278    }
279    if let Some(extra) = node.get("$extra").and_then(|e| e.as_object()) {
280        for (key, value) in extra.iter() {
281            if !value.is_null() && !out.contains_key(key) {
282                out.insert(key.clone(), value.clone());
283            }
284        }
285    }
286    out
287}
288
289/// Parse normalized JSON into a typed node. `$`-envelope keys and fields modeled
290/// on the node's `$type` stay top-level; every other key is collected into
291/// `$extra` so a strongly-typed consumer round-trips it losslessly.
292pub fn deserialize(json_obj: &Node, schema: &Json) -> Result<Node, CodecError> {
293    let type_uri = json_obj
294        .get("$type")
295        .and_then(|t| t.as_str())
296        .ok_or_else(|| CodecError::Malformed("Cannot deserialize: missing string $type".into()))?;
297
298    let classes = schema
299        .get("classes")
300        .ok_or_else(|| CodecError::Malformed("schema is missing 'classes'".into()))?;
301    let cls = classes.get(type_uri).ok_or_else(|| {
302        CodecError::Malformed(format!("Cannot deserialize: no schema for type {}", type_uri))
303    })?;
304    let props = cls
305        .get("props")
306        .ok_or_else(|| CodecError::Malformed(format!("class {} is missing 'props'", type_uri)))?;
307
308    let mut node = Map::new();
309    node.insert("$type".to_string(), Json::String(type_uri.to_string()));
310    let mut extra = Map::new();
311    for (key, value) in json_obj.iter() {
312        if key == "$type" {
313            continue;
314        }
315        if key.starts_with('$') || props.get(key).is_some() {
316            node.insert(key.clone(), value.clone());
317        } else {
318            extra.insert(key.clone(), value.clone());
319        }
320    }
321    if !extra.is_empty() {
322        node.insert("$extra".to_string(), Json::Object(extra));
323    }
324    Ok(node)
325}