Skip to main content

cbor_ld/
processor.rs

1use cbor2::Value;
2use cbor2::value::Integer;
3use std::collections::{BTreeMap, BTreeSet};
4
5use crate::constants::{
6    CBOR_LD_TAG, DEFAULT_REGISTRY_ENTRY_ID, FIRST_CUSTOM_TERM_ID, SECURITY_MULTIBASE, XSD_DATE,
7    XSD_DATETIME, keywords_table,
8};
9use crate::error::{Error, Result};
10use crate::table::{ReverseTypeTable, TableKey, TypeTable};
11
12/// Options for JSON-LD to CBOR-LD encoding.
13#[derive(Clone, Copy, Debug)]
14pub struct EncodeOptions<'a> {
15    /// The CBOR-LD registry entry ID. `0` disables semantic compression.
16    pub registry_entry_id: u64,
17    /// Registry type table material. Required for compressed IDs other than
18    /// `1`; ID `1` uses an empty default table.
19    pub type_table: Option<&'a TypeTable>,
20    /// Whether to use cbor2 deterministic/canonical serialization.
21    pub canonical: bool,
22}
23
24impl Default for EncodeOptions<'_> {
25    fn default() -> Self {
26        Self {
27            registry_entry_id: 0,
28            type_table: None,
29            canonical: true,
30        }
31    }
32}
33
34impl<'a> EncodeOptions<'a> {
35    /// Creates uncompressed CBOR-LD options using registry entry ID `0`.
36    pub fn uncompressed() -> Self {
37        Self::default()
38    }
39
40    /// Creates compressed CBOR-LD options.
41    pub fn compressed(registry_entry_id: u64, type_table: &'a TypeTable) -> Self {
42        Self {
43            registry_entry_id,
44            type_table: Some(type_table),
45            canonical: true,
46        }
47    }
48}
49
50/// Options for CBOR-LD to JSON-LD decoding.
51#[derive(Clone, Copy, Debug, Default)]
52pub struct DecodeOptions<'a> {
53    /// Registry type table material. Required for compressed IDs other than
54    /// `1`; ID `1` uses an empty default table.
55    pub type_table: Option<&'a TypeTable>,
56}
57
58/// Encodes a JSON-LD document as CBOR-LD.
59///
60/// This helper works without a document loader and is sufficient for
61/// uncompressed documents or compressed documents that only use inline
62/// contexts. Use [`encode_with_loader`] when remote context URLs are present.
63pub fn encode(jsonld_document: &Value, options: EncodeOptions<'_>) -> Result<Vec<u8>> {
64    encode_with_loader(jsonld_document, options, |url| {
65        Err(Error::DocumentLoader(format!(
66            "no document loader configured for {url:?}"
67        )))
68    })
69}
70
71/// Encodes a JSON-LD document as CBOR-LD with a document loader.
72///
73/// The loader receives a context URL and must return the loaded JSON-LD
74/// document as a [`Value`] containing an `@context` entry.
75pub fn encode_with_loader<F>(
76    jsonld_document: &Value,
77    options: EncodeOptions<'_>,
78    document_loader: F,
79) -> Result<Vec<u8>>
80where
81    F: FnMut(&str) -> Result<Value>,
82{
83    let registry_entry_id = options.registry_entry_id;
84    let payload = if registry_entry_id == 0 {
85        jsonld_document.clone()
86    } else {
87        let type_table = table_for_registry(registry_entry_id, options.type_table)?;
88        let mut converter = Converter::new_compression(type_table, document_loader);
89        converter.convert(jsonld_document)?
90    };
91
92    let tagged = Value::Tag(
93        CBOR_LD_TAG,
94        Box::new(Value::Array(vec![
95            integer(registry_entry_id.into()),
96            payload,
97        ])),
98    );
99    if options.canonical {
100        cbor2::to_canonical_vec(&tagged).map_err(|e| Error::Cbor(e.to_string()))
101    } else {
102        cbor2::to_vec(&tagged).map_err(|e| Error::Cbor(e.to_string()))
103    }
104}
105
106/// Decodes CBOR-LD bytes into a JSON-LD document.
107///
108/// This helper works without a document loader and is sufficient for
109/// uncompressed documents or compressed documents that only use inline
110/// contexts. Use [`decode_with_loader`] when remote context URLs are present.
111pub fn decode(cborld_bytes: &[u8], options: DecodeOptions<'_>) -> Result<Value> {
112    decode_with_loader(cborld_bytes, options, |url| {
113        Err(Error::DocumentLoader(format!(
114            "no document loader configured for {url:?}"
115        )))
116    })
117}
118
119/// Decodes CBOR-LD bytes into a JSON-LD document with a document loader.
120pub fn decode_with_loader<F>(
121    cborld_bytes: &[u8],
122    options: DecodeOptions<'_>,
123    document_loader: F,
124) -> Result<Value>
125where
126    F: FnMut(&str) -> Result<Value>,
127{
128    cbor2::validate(cborld_bytes).map_err(|e| Error::Cbor(e.to_string()))?;
129    let value: Value = cbor2::from_slice(cborld_bytes).map_err(|e| Error::Cbor(e.to_string()))?;
130    let (registry_entry_id, payload) = parse_cborld_tag(value)?;
131    if registry_entry_id == 0 {
132        return Ok(payload);
133    }
134
135    let type_table = table_for_registry(registry_entry_id, options.type_table)?;
136    let mut converter = Converter::new_decompression(type_table, document_loader);
137    converter.convert(&payload)
138}
139
140fn table_for_registry(registry_entry_id: u64, type_table: Option<&TypeTable>) -> Result<TypeTable> {
141    match (registry_entry_id, type_table) {
142        (DEFAULT_REGISTRY_ENTRY_ID, None) => TypeTable::new().normalized(),
143        (_, Some(table)) => table.normalized(),
144        (id, None) => Err(Error::NoTypeTable(id)),
145    }
146}
147
148fn parse_cborld_tag(value: Value) -> Result<(u64, Value)> {
149    let Value::Tag(tag, boxed) = value else {
150        return Err(Error::NotCborld("missing CBOR-LD tag".to_owned()));
151    };
152    if tag != CBOR_LD_TAG {
153        return Err(Error::NotCborld(format!(
154            "unexpected tag {tag}, expected {CBOR_LD_TAG}"
155        )));
156    }
157    let Value::Array(mut array) = *boxed else {
158        return Err(Error::NotCborld(
159            "tagged CBOR-LD item must be an array".to_owned(),
160        ));
161    };
162    if array.len() != 2 {
163        return Err(Error::NotCborld(
164            "tagged CBOR-LD array must contain registryEntryId and payload".to_owned(),
165        ));
166    }
167    let payload = array.pop().expect("length checked");
168    let id = array.pop().expect("length checked");
169    let id = non_negative_u64(&id).ok_or_else(|| {
170        Error::NotCborld("registryEntryId must be a non-negative integer".to_owned())
171    })?;
172    Ok((id, payload))
173}
174
175#[derive(Clone, Copy, Debug, PartialEq, Eq)]
176enum Strategy {
177    Compression,
178    Decompression,
179}
180
181struct Converter<F>
182where
183    F: FnMut(&str) -> Result<Value>,
184{
185    strategy: Strategy,
186    type_table: TypeTable,
187    reverse_type_table: ReverseTypeTable,
188    type_table_encoded_as_bytes: BTreeSet<String>,
189    context_loader: ContextLoader<F>,
190}
191
192impl<F> Converter<F>
193where
194    F: FnMut(&str) -> Result<Value>,
195{
196    fn new_compression(type_table: TypeTable, document_loader: F) -> Self {
197        let context_loader = ContextLoader::new(document_loader, false);
198        Self {
199            strategy: Strategy::Compression,
200            reverse_type_table: BTreeMap::new(),
201            type_table,
202            type_table_encoded_as_bytes: type_table_encoded_as_bytes(),
203            context_loader,
204        }
205    }
206
207    fn new_decompression(type_table: TypeTable, document_loader: F) -> Self {
208        let reverse_type_table = type_table.reverse();
209        let context_loader = ContextLoader::new(document_loader, true);
210        Self {
211            strategy: Strategy::Decompression,
212            type_table,
213            reverse_type_table,
214            type_table_encoded_as_bytes: type_table_encoded_as_bytes(),
215            context_loader,
216        }
217    }
218
219    fn convert(&mut self, input: &Value) -> Result<Value> {
220        let active_ctx = ActiveContext::new(BTreeMap::new(), None);
221        match input {
222            Value::Array(items) => items
223                .iter()
224                .map(|item| self.convert_one(active_ctx.clone(), item))
225                .collect::<Result<Vec<_>>>()
226                .map(Value::Array),
227            _ => self.convert_one(active_ctx, input),
228        }
229    }
230
231    fn convert_one(&mut self, mut active_ctx: ActiveContext, input: &Value) -> Result<Value> {
232        let mut output = Value::Map(Vec::new());
233        active_ctx = match self.strategy {
234            Strategy::Compression => {
235                self.convert_contexts_compression(active_ctx, input, &mut output)?
236            }
237            Strategy::Decompression => {
238                self.convert_contexts_decompression(active_ctx, input, &mut output)?
239            }
240        };
241
242        let object_types = match self.strategy {
243            Strategy::Compression => self.get_object_types_compression(&active_ctx, input)?,
244            Strategy::Decompression => self.get_object_types_decompression(&active_ctx, input)?,
245        };
246        active_ctx =
247            active_ctx.apply_type_scoped_contexts(&object_types, &mut self.context_loader)?;
248
249        let entries = match self.strategy {
250            Strategy::Compression => self.get_input_entries_compression(&active_ctx, input)?,
251            Strategy::Decompression => self.get_input_entries_decompression(&active_ctx, input)?,
252        };
253
254        for (term_info, value) in entries {
255            let value_active_ctx = active_ctx
256                .apply_property_scoped_context(&term_info.term, &mut self.context_loader)?;
257            let values: Vec<&Value> = if term_info.plural {
258                value
259                    .as_array()
260                    .ok_or_else(|| {
261                        Error::InvalidInput(format!(
262                            "plural term {:?} must contain an array",
263                            term_info.term
264                        ))
265                    })?
266                    .iter()
267                    .collect()
268            } else {
269                vec![value]
270            };
271
272            let mut converted = Vec::with_capacity(values.len());
273            for value in values {
274                converted.push(self.convert_value(
275                    value_active_ctx.clone(),
276                    term_info.term_type(),
277                    value,
278                    &term_info,
279                )?);
280            }
281            let output_values = if term_info.plural {
282                Value::Array(converted)
283            } else {
284                converted.into_iter().next().unwrap_or(Value::Array(vec![]))
285            };
286
287            match self.strategy {
288                Strategy::Compression => {
289                    map_set(&mut output, term_info.term_id.clone(), output_values)?;
290                }
291                Strategy::Decompression => {
292                    map_set(
293                        &mut output,
294                        Value::Text(term_info.term.clone()),
295                        output_values,
296                    )?;
297                }
298            }
299        }
300
301        Ok(output)
302    }
303
304    fn convert_value(
305        &mut self,
306        active_ctx: ActiveContext,
307        term_type: Option<&str>,
308        value: &Value,
309        term_info: &TermInfo,
310    ) -> Result<Value> {
311        if matches!(value, Value::Null) {
312            return Ok(Value::Null);
313        }
314
315        let converted = match self.strategy {
316            Strategy::Compression => self.convert_value_compression(term_type, value, term_info)?,
317            Strategy::Decompression => {
318                self.convert_value_decompression(term_type, value, term_info)?
319            }
320        };
321        if let Some(value) = converted {
322            return Ok(value);
323        }
324
325        if let Value::Array(items) = value {
326            return items
327                .iter()
328                .map(|item| self.convert_value(active_ctx.clone(), term_type, item, term_info))
329                .collect::<Result<Vec<_>>>()
330                .map(Value::Array);
331        }
332
333        if matches!(value, Value::Map(_)) {
334            return self.convert_one(active_ctx, value);
335        }
336
337        Ok(value.clone())
338    }
339
340    fn convert_contexts_compression(
341        &mut self,
342        active_ctx: ActiveContext,
343        input: &Value,
344        output: &mut Value,
345    ) -> Result<ActiveContext> {
346        let active_ctx = active_ctx.apply_embedded_contexts(input, &mut self.context_loader)?;
347        let Some(context) = map_get_text(input, "@context") else {
348            return Ok(active_ctx);
349        };
350
351        let is_array = matches!(context, Value::Array(_));
352        let contexts: Vec<&Value> = match context {
353            Value::Array(items) => items.iter().collect(),
354            value => vec![value],
355        };
356        let mut encoded_contexts = Vec::with_capacity(contexts.len());
357        for value in contexts {
358            encoded_contexts.push(self.encode_context(value)?);
359        }
360        let key = integer(if is_array { 1 } else { 0 });
361        let value = if is_array {
362            Value::Array(encoded_contexts)
363        } else {
364            encoded_contexts.into_iter().next().unwrap_or(Value::Null)
365        };
366        map_set(output, key, value)?;
367        Ok(active_ctx)
368    }
369
370    fn convert_contexts_decompression(
371        &mut self,
372        active_ctx: ActiveContext,
373        input: &Value,
374        output: &mut Value,
375    ) -> Result<ActiveContext> {
376        let singular = map_get(input, &integer(0));
377        let plural = map_get(input, &integer(1));
378        if singular.is_some() && plural.is_some() {
379            return Err(Error::InvalidInput(
380                "both singular and plural context IDs were found".to_owned(),
381            ));
382        }
383
384        if let Some(value) = singular {
385            map_set(
386                output,
387                Value::Text("@context".to_owned()),
388                self.decode_context(value)?,
389            )?;
390        } else if let Some(value) = plural {
391            let items = value.as_array().ok_or_else(|| {
392                Error::InvalidInput("encoded plural context value must be an array".to_owned())
393            })?;
394            let mut contexts = Vec::with_capacity(items.len());
395            for item in items {
396                contexts.push(self.decode_context(item)?);
397            }
398            map_set(
399                output,
400                Value::Text("@context".to_owned()),
401                Value::Array(contexts),
402            )?;
403        }
404
405        active_ctx.apply_embedded_contexts(output, &mut self.context_loader)
406    }
407
408    fn encode_context(&self, context: &Value) -> Result<Value> {
409        let Value::Text(context) = context else {
410            return Ok(context.clone());
411        };
412        if let Some(id) = self
413            .type_table
414            .subtable("context")
415            .and_then(|table| table.get(&TableKey::Text(context.clone())))
416        {
417            Ok(integer((*id).into()))
418        } else {
419            Ok(Value::Text(context.clone()))
420        }
421    }
422
423    fn decode_context(&self, value: &Value) -> Result<Value> {
424        if let Some(id) = non_negative_u64(value) {
425            let Some(url) = self
426                .reverse_type_table
427                .get("context")
428                .and_then(|table| table.get(&id))
429            else {
430                return Err(Error::UndefinedCompressedContext(id));
431            };
432            return Ok(url.to_value());
433        }
434        Ok(map_to_json_object(value))
435    }
436
437    fn get_input_entries_compression<'a>(
438        &self,
439        active_ctx: &ActiveContext,
440        input: &'a Value,
441    ) -> Result<Vec<(TermInfo, &'a Value)>> {
442        let mut entries = Vec::new();
443        let map = input.as_map().ok_or_else(|| {
444            Error::InvalidInput("compression input must be a JSON-LD object".to_owned())
445        })?;
446        let mut keyed = Vec::new();
447        for (key, value) in map {
448            let Some(term) = key.as_text() else {
449                return Err(Error::InvalidInput(
450                    "JSON-LD object keys must be text strings".to_owned(),
451                ));
452            };
453            keyed.push((term.to_owned(), value));
454        }
455        keyed.sort_by(|a, b| a.0.cmp(&b.0));
456
457        for (term, value) in keyed {
458            if term == "@context" {
459                continue;
460            }
461            let plural = matches!(value, Value::Array(_));
462            let term_id = active_ctx.get_id_for_term(&term, plural, &self.context_loader);
463            let def = active_ctx.get_term_definition(&term);
464            entries.push((
465                TermInfo {
466                    term,
467                    term_id,
468                    plural,
469                    def,
470                },
471                value,
472            ));
473        }
474        Ok(entries)
475    }
476
477    fn get_input_entries_decompression<'a>(
478        &self,
479        active_ctx: &ActiveContext,
480        input: &'a Value,
481    ) -> Result<Vec<(TermInfo, &'a Value)>> {
482        let mut entries = Vec::new();
483        let map = input.as_map().ok_or_else(|| {
484            Error::InvalidInput("decompression input must be a CBOR-LD map".to_owned())
485        })?;
486        for (key, value) in map {
487            if key == &integer(0) || key == &integer(1) {
488                continue;
489            }
490            let (term, plural) = self.context_loader.get_term_for_id(key)?;
491            let def = active_ctx.get_term_definition(&term);
492            entries.push((
493                TermInfo {
494                    term,
495                    term_id: key.clone(),
496                    plural,
497                    def,
498                },
499                value,
500            ));
501        }
502        entries.sort_by(|a, b| a.0.term.cmp(&b.0.term));
503        Ok(entries)
504    }
505
506    fn get_object_types_compression(
507        &self,
508        active_ctx: &ActiveContext,
509        input: &Value,
510    ) -> Result<BTreeSet<String>> {
511        let mut object_types = BTreeSet::new();
512        for term in &active_ctx.type_terms {
513            if let Some(types) = map_get_text(input, term) {
514                match types {
515                    Value::Array(items) => {
516                        for item in items {
517                            if let Some(text) = item.as_text() {
518                                object_types.insert(text.to_owned());
519                            }
520                        }
521                    }
522                    Value::Text(text) => {
523                        object_types.insert(text.clone());
524                    }
525                    _ => {}
526                }
527            }
528        }
529        Ok(object_types)
530    }
531
532    fn get_object_types_decompression(
533        &mut self,
534        active_ctx: &ActiveContext,
535        input: &Value,
536    ) -> Result<BTreeSet<String>> {
537        let mut object_types = BTreeSet::new();
538        for term in &active_ctx.type_terms {
539            let term_id = active_ctx.get_id_for_term(term, false, &self.context_loader);
540            let plural_id = non_negative_u64(&term_id).map(|id| integer((id + 1).into()));
541            let Some(value) = map_get(input, &term_id).or_else(|| {
542                plural_id
543                    .as_ref()
544                    .and_then(|plural_id| map_get(input, plural_id))
545            }) else {
546                continue;
547            };
548
549            let term_info = self.context_loader.term_info_for_id(&term_id, active_ctx)?;
550            let values: Vec<&Value> = match value {
551                Value::Array(items) => items.iter().collect(),
552                value => vec![value],
553            };
554            for value in values {
555                let decoded = self
556                    .decode_value_with_table(Some("@vocab"), value, &term_info)?
557                    .unwrap_or_else(|| value.clone());
558                if let Value::Text(text) = decoded {
559                    object_types.insert(text);
560                }
561            }
562        }
563        Ok(object_types)
564    }
565
566    fn convert_value_compression(
567        &self,
568        term_type: Option<&str>,
569        value: &Value,
570        term_info: &TermInfo,
571    ) -> Result<Option<Value>> {
572        if matches!(value, Value::Map(_) | Value::Array(_)) {
573            return Ok(None);
574        }
575        self.encode_value(term_type, value, term_info).map(Some)
576    }
577
578    fn convert_value_decompression(
579        &mut self,
580        term_type: Option<&str>,
581        value: &Value,
582        term_info: &TermInfo,
583    ) -> Result<Option<Value>> {
584        if matches!(value, Value::Map(_)) {
585            return Ok(None);
586        }
587        if let Some(decoded) = self.decode_value_with_table(term_type, value, term_info)? {
588            return Ok(Some(decoded));
589        }
590        if !matches!(value, Value::Array(_)) {
591            return Ok(Some(value.clone()));
592        }
593        Ok(None)
594    }
595
596    fn encode_value(
597        &self,
598        term_type: Option<&str>,
599        value: &Value,
600        term_info: &TermInfo,
601    ) -> Result<Value> {
602        let table_type = get_table_type(term_info, term_type);
603        if table_type == "url" && !matches!(value, Value::Text(_)) {
604            return Err(Error::UnsupportedValue(
605                "URL values must be text strings".to_owned(),
606            ));
607        }
608
609        if let Some(subtable) = self.type_table.subtable(&table_type) {
610            let key = TableKey::from_value(value)?;
611            let mut int_value = subtable.get(&key).map(|id| *id as i128);
612            let mut convert_to_bytes = false;
613            let mut include_sign = false;
614
615            if int_value.is_some() {
616                convert_to_bytes = self.type_table_encoded_as_bytes.contains(&table_type);
617            } else if table_type != "none"
618                && let Some(n) = value.as_integer()
619            {
620                int_value = Some(i128::from(n));
621                convert_to_bytes = true;
622                include_sign = true;
623            }
624
625            if let Some(int_value) = int_value {
626                if convert_to_bytes {
627                    let bytes = if include_sign {
628                        bytes_from_int(int_value)?
629                    } else {
630                        bytes_from_uint(u64::try_from(int_value).map_err(|_| {
631                            Error::UnsupportedValue(format!(
632                                "negative table ID cannot be encoded as unsigned bytes: {int_value}"
633                            ))
634                        })?)?
635                    };
636                    return Ok(Value::Bytes(bytes));
637                }
638                return Ok(integer(int_value));
639            }
640        }
641
642        if table_type == "url" {
643            if let Some(encoded) = self.encode_url(value)? {
644                return Ok(encoded);
645            }
646        } else if table_type == SECURITY_MULTIBASE {
647            if let Some(encoded) = encode_multibase(value) {
648                return Ok(encoded);
649            }
650        } else if table_type == XSD_DATE {
651            if let Some(encoded) = encode_xsd_date(value)? {
652                return Ok(encoded);
653            }
654        } else if table_type == XSD_DATETIME
655            && let Some(encoded) = encode_xsd_datetime(value)?
656        {
657            return Ok(encoded);
658        }
659
660        Ok(value.clone())
661    }
662
663    fn decode_value_with_table(
664        &mut self,
665        term_type: Option<&str>,
666        value: &Value,
667        term_info: &TermInfo,
668    ) -> Result<Option<Value>> {
669        let table_type = get_table_type(term_info, term_type);
670        if let Some(subtable) = self.reverse_type_table.get(&table_type) {
671            let is_bytes = matches!(value, Value::Bytes(_));
672            let table_uses_bytes = self.type_table_encoded_as_bytes.contains(&table_type);
673            let mut use_table = false;
674            let mut int_value = None;
675
676            if is_bytes && table_uses_bytes {
677                let bytes = value.as_bytes().expect("checked");
678                use_table = true;
679                int_value = Some(uint_from_bytes(bytes)?);
680            } else if !table_uses_bytes {
681                int_value = non_negative_u64(value);
682                use_table = int_value.is_some();
683            }
684
685            if use_table {
686                let id = int_value.expect("set when table is used");
687                let Some(decoded) = subtable.get(&id) else {
688                    return Err(Error::UnknownCompressedValue(id));
689                };
690                return Ok(Some(decoded.to_value()));
691            }
692
693            if is_bytes && table_type != "none" {
694                let decoded = int_from_bytes(value.as_bytes().expect("checked"))?;
695                return Ok(Some(integer(decoded)));
696            }
697        }
698
699        if table_type == "url" {
700            if let Some(decoded) = self.decode_url(value)? {
701                return Ok(Some(decoded));
702            }
703        } else if table_type == SECURITY_MULTIBASE {
704            if let Some(decoded) = decode_multibase(value) {
705                return Ok(Some(decoded));
706            }
707        } else if table_type == XSD_DATE {
708            if let Some(decoded) = decode_xsd_date(value)? {
709                return Ok(Some(decoded));
710            }
711        } else if table_type == XSD_DATETIME
712            && let Some(decoded) = decode_xsd_datetime(value)?
713        {
714            return Ok(Some(decoded));
715        }
716
717        Ok(None)
718    }
719
720    fn encode_url(&self, value: &Value) -> Result<Option<Value>> {
721        let Value::Text(url) = value else {
722            return Ok(None);
723        };
724
725        let term_id = self.context_loader.get_id_for_term(url, false);
726        if !matches!(term_id, Value::Text(_)) {
727            return Ok(Some(term_id));
728        }
729
730        if let Some(rest) = url.strip_prefix("https://") {
731            return Ok(Some(Value::Array(vec![
732                integer(2),
733                Value::Text(rest.to_owned()),
734            ])));
735        }
736        if let Some(rest) = url.strip_prefix("http://") {
737            return Ok(Some(Value::Array(vec![
738                integer(1),
739                Value::Text(rest.to_owned()),
740            ])));
741        }
742        if let Some(rest) = url.strip_prefix("urn:uuid:") {
743            let encoded = if rest.to_ascii_lowercase() == rest {
744                Value::Bytes(parse_uuid_bytes(rest)?)
745            } else {
746                Value::Text(rest.to_owned())
747            };
748            return Ok(Some(Value::Array(vec![integer(3), encoded])));
749        }
750        if let Some(rest) = url.strip_prefix("data:") {
751            return Ok(Some(encode_data_url(rest)));
752        }
753        for (prefix, id) in [("did:v1:nym:", 1024u64), ("did:key:", 1025u64)] {
754            if let Some(rest) = url.strip_prefix(prefix) {
755                let mut split = rest.splitn(2, '#');
756                let authority = split.next().unwrap_or_default();
757                let fragment = split.next();
758                let mut entries = vec![integer(id.into()), encode_multibase58_part(authority)];
759                if let Some(fragment) = fragment {
760                    entries.push(encode_multibase58_part(fragment));
761                }
762                return Ok(Some(Value::Array(entries)));
763            }
764        }
765
766        Ok(None)
767    }
768
769    fn decode_url(&mut self, value: &Value) -> Result<Option<Value>> {
770        if matches!(value, Value::Text(_)) {
771            return Ok(None);
772        }
773        if let Value::Array(items) = value {
774            let Some(scheme_id) = items.first().and_then(non_negative_u64) else {
775                return Err(Error::UnknownCompressedValue(0));
776            };
777            return match scheme_id {
778                1 | 2 => {
779                    if items.len() == 2 {
780                        if let Some(rest) = items[1].as_text() {
781                            let prefix = if scheme_id == 1 {
782                                "http://"
783                            } else {
784                                "https://"
785                            };
786                            Ok(Some(Value::Text(format!("{prefix}{rest}"))))
787                        } else {
788                            Err(Error::InvalidInput(
789                                "compressed HTTP URL suffix must be text".to_owned(),
790                            ))
791                        }
792                    } else {
793                        Err(Error::UnknownCompressedValue(scheme_id))
794                    }
795                }
796                3 => {
797                    if items.len() != 2 {
798                        return Err(Error::UnknownCompressedValue(scheme_id));
799                    }
800                    let rest = match &items[1] {
801                        Value::Text(text) => text.clone(),
802                        Value::Bytes(bytes) => format_uuid(bytes)?,
803                        _ => {
804                            return Err(Error::InvalidInput(
805                                "compressed UUID value must be text or bytes".to_owned(),
806                            ));
807                        }
808                    };
809                    Ok(Some(Value::Text(format!("urn:uuid:{rest}"))))
810                }
811                4 => decode_data_url(items).map(Some),
812                1024 | 1025 => decode_base58_did_url(scheme_id, items).map(Some),
813                id => Err(Error::UnknownCompressedValue(id)),
814            };
815        }
816
817        let (term, _) = self.context_loader.get_term_for_id(value)?;
818        Ok(Some(Value::Text(term)))
819    }
820}
821
822#[derive(Clone, Debug)]
823struct ActiveContext {
824    term_map: BTreeMap<String, TermDef>,
825    previous: Option<Box<ActiveContext>>,
826    type_terms: Vec<String>,
827}
828
829impl ActiveContext {
830    fn new(term_map: BTreeMap<String, TermDef>, previous: Option<ActiveContext>) -> Self {
831        let mut type_terms = vec!["@type".to_owned()];
832        for (term, def) in &term_map {
833            if def.id.as_deref() == Some("@type") {
834                type_terms.push(term.clone());
835            }
836        }
837        Self {
838            term_map,
839            previous: previous.map(Box::new),
840            type_terms,
841        }
842    }
843
844    fn apply_embedded_contexts<F>(
845        &self,
846        input: &Value,
847        loader: &mut ContextLoader<F>,
848    ) -> Result<Self>
849    where
850        F: FnMut(&str) -> Result<Value>,
851    {
852        let term_map = update_term_map(
853            self.term_map.clone(),
854            map_get_text(input, "@context"),
855            loader,
856            false,
857            false,
858        )?;
859        Ok(Self::new(term_map, Some(self.clone())))
860    }
861
862    fn apply_property_scoped_context<F>(
863        &self,
864        term: &str,
865        loader: &mut ContextLoader<F>,
866    ) -> Result<Self>
867    where
868        F: FnMut(&str) -> Result<Value>,
869    {
870        let reverted = self.revert_term_map();
871        let contexts = self.term_map.get(term).and_then(|def| def.context.as_ref());
872        let term_map = update_term_map(reverted, contexts, loader, true, false)?;
873        Ok(Self::new(term_map, Some(self.clone())))
874    }
875
876    fn apply_type_scoped_contexts<F>(
877        &self,
878        object_types: &BTreeSet<String>,
879        loader: &mut ContextLoader<F>,
880    ) -> Result<Self>
881    where
882        F: FnMut(&str) -> Result<Value>,
883    {
884        let mut term_map = self.term_map.clone();
885        for object_type in object_types {
886            let contexts = term_map
887                .get(object_type)
888                .and_then(|def| def.context.as_ref())
889                .cloned();
890            term_map = update_term_map(term_map, contexts.as_ref(), loader, false, true)?;
891        }
892        Ok(Self::new(term_map, Some(self.clone())))
893    }
894
895    fn revert_term_map(&self) -> BTreeMap<String, TermDef> {
896        let mut new_term_map = BTreeMap::new();
897        let mut non_propagating = Vec::new();
898        for (term, def) in &self.term_map {
899            if def.propagate {
900                new_term_map.insert(term.clone(), def.clone());
901            } else {
902                non_propagating.push(term.clone());
903            }
904        }
905
906        for term in non_propagating {
907            let mut current = self.previous.as_deref();
908            while let Some(ctx) = current {
909                match ctx.term_map.get(&term) {
910                    Some(def) if !def.propagate => {
911                        current = ctx.previous.as_deref();
912                    }
913                    Some(def) => {
914                        new_term_map.insert(term.clone(), def.clone());
915                        break;
916                    }
917                    None => break,
918                }
919            }
920        }
921
922        new_term_map
923    }
924
925    fn get_id_for_term<F>(&self, term: &str, plural: bool, loader: &ContextLoader<F>) -> Value
926    where
927        F: FnMut(&str) -> Result<Value>,
928    {
929        loader.get_id_for_term(term, plural)
930    }
931
932    fn get_term_definition(&self, term: &str) -> TermDef {
933        self.term_map.get(term).cloned().unwrap_or_default()
934    }
935}
936
937fn update_term_map<F>(
938    mut active_term_map: BTreeMap<String, TermDef>,
939    contexts: Option<&Value>,
940    loader: &mut ContextLoader<F>,
941    property_scope: bool,
942    type_scope: bool,
943) -> Result<BTreeMap<String, TermDef>>
944where
945    F: FnMut(&str) -> Result<Value>,
946{
947    let context_values: Vec<Value> = match contexts {
948        Some(Value::Array(items)) => items.clone(),
949        Some(value) => vec![value.clone()],
950        None => return Ok(active_term_map),
951    };
952
953    let allow_protected_override = property_scope;
954    let propagate_default = !type_scope;
955
956    for context_value in context_values {
957        let entry = loader.load(&context_value)?;
958        let context_obj = object_to_string_map(&entry.context)?;
959        let propagate = context_obj
960            .get("@propagate")
961            .and_then(Value::as_bool)
962            .unwrap_or(propagate_default);
963
964        let mut new_term_map = entry.term_map.clone();
965        for def in new_term_map.values_mut() {
966            def.propagate = propagate;
967        }
968
969        resolve_curies(&active_term_map, &context_obj, &mut new_term_map)?;
970
971        for (term, active_def) in &active_term_map {
972            if let Some(def) = new_term_map.get_mut(term) {
973                if active_def.protected {
974                    if !allow_protected_override && def.value != active_def.value {
975                        return Err(Error::ProtectedTermRedefinition(term.clone()));
976                    }
977                    let mut protected = active_def.clone();
978                    protected.propagate = def.propagate;
979                    *def = protected;
980                }
981            } else if !matches!(context_obj.get(term), Some(Value::Null)) {
982                new_term_map.insert(term.clone(), active_def.clone());
983            }
984        }
985
986        active_term_map = new_term_map;
987    }
988
989    Ok(active_term_map)
990}
991
992#[derive(Clone, Debug)]
993struct ContextEntry {
994    context: Value,
995    term_map: BTreeMap<String, TermDef>,
996}
997
998struct ContextLoader<F>
999where
1000    F: FnMut(&str) -> Result<Value>,
1001{
1002    document_loader: F,
1003    context_map: BTreeMap<String, ContextEntry>,
1004    next_term_id: u64,
1005    term_to_id: BTreeMap<String, u64>,
1006    id_to_term: BTreeMap<u64, String>,
1007    build_reverse_map: bool,
1008}
1009
1010impl<F> ContextLoader<F>
1011where
1012    F: FnMut(&str) -> Result<Value>,
1013{
1014    fn new(document_loader: F, build_reverse_map: bool) -> Self {
1015        let term_to_id = keywords_table();
1016        let id_to_term = if build_reverse_map {
1017            term_to_id.iter().map(|(k, v)| (*v, k.clone())).collect()
1018        } else {
1019            BTreeMap::new()
1020        };
1021        Self {
1022            document_loader,
1023            context_map: BTreeMap::new(),
1024            next_term_id: FIRST_CUSTOM_TERM_ID,
1025            term_to_id,
1026            id_to_term,
1027            build_reverse_map,
1028        }
1029    }
1030
1031    fn get_id_for_term(&self, term: &str, plural: bool) -> Value {
1032        match self.term_to_id.get(term) {
1033            Some(id) => integer((id + u64::from(plural)).into()),
1034            None => Value::Text(term.to_owned()),
1035        }
1036    }
1037
1038    fn get_term_for_id(&self, id: &Value) -> Result<(String, bool)> {
1039        if let Some(term) = id.as_text() {
1040            return Ok((term.to_owned(), false));
1041        }
1042        let Some(id) = non_negative_u64(id) else {
1043            return Err(Error::InvalidInput(
1044                "term ID must be text or integer".to_owned(),
1045            ));
1046        };
1047        let plural = (id & 1) == 1;
1048        let base_id = if plural { id - 1 } else { id };
1049        let Some(term) = self.id_to_term.get(&base_id) else {
1050            return Err(Error::UnknownTermId(id));
1051        };
1052        Ok((term.clone(), plural))
1053    }
1054
1055    fn term_info_for_id(&self, id: &Value, active_ctx: &ActiveContext) -> Result<TermInfo> {
1056        let (term, plural) = self.get_term_for_id(id)?;
1057        let def = active_ctx.get_term_definition(&term);
1058        Ok(TermInfo {
1059            term,
1060            term_id: id.clone(),
1061            plural,
1062            def,
1063        })
1064    }
1065
1066    fn load(&mut self, context: &Value) -> Result<ContextEntry> {
1067        match context {
1068            Value::Text(url) => {
1069                if let Some(entry) = self.context_map.get(url) {
1070                    return Ok(entry.clone());
1071                }
1072                let document = (self.document_loader)(url)?;
1073                let loaded_context = map_get_text(&document, "@context").ok_or_else(|| {
1074                    Error::DocumentLoader(format!(
1075                        "loaded document for {url:?} does not contain @context"
1076                    ))
1077                })?;
1078                let entry = self.add_context(loaded_context.clone(), Some(url.clone()))?;
1079                Ok(entry)
1080            }
1081            Value::Null => self.add_context(Value::Map(Vec::new()), None),
1082            Value::Map(_) => self.add_context(context.clone(), None),
1083            other => Err(Error::InvalidTermDefinition(format!(
1084                "context must be a URL, object, array item, or null; got {other}"
1085            ))),
1086        }
1087    }
1088
1089    fn add_context(
1090        &mut self,
1091        mut context: Value,
1092        context_url: Option<String>,
1093    ) -> Result<ContextEntry> {
1094        let mut context_obj = object_to_string_map(&context)?;
1095
1096        if let Some(Value::Text(import_url)) = context_obj.get("@import") {
1097            let import_entry = if let Some(entry) = self.context_map.get(import_url) {
1098                entry.clone()
1099            } else {
1100                let document = (self.document_loader)(import_url)?;
1101                let import_context = map_get_text(&document, "@context").ok_or_else(|| {
1102                    Error::DocumentLoader(format!(
1103                        "imported document for {import_url:?} does not contain @context"
1104                    ))
1105                })?;
1106                self.add_context(import_context.clone(), Some(import_url.clone()))?
1107            };
1108            let mut merged = object_to_string_map(&import_entry.context)?;
1109            merged.extend(context_obj);
1110            context = string_map_to_value(merged.clone());
1111            context_obj = merged;
1112        }
1113
1114        let mut term_map = BTreeMap::new();
1115        let is_protected = context_obj
1116            .get("@protected")
1117            .and_then(Value::as_bool)
1118            .unwrap_or(false);
1119        let mut keys: Vec<_> = context_obj.keys().cloned().collect();
1120        keys.sort();
1121        let keywords = keywords_table();
1122        for key in keys {
1123            if keywords.contains_key(&key) || key == "@import" {
1124                continue;
1125            }
1126            let Some(def_value) = context_obj.get(&key) else {
1127                continue;
1128            };
1129            if matches!(def_value, Value::Null) {
1130                continue;
1131            }
1132            let def = TermDef::from_context_value(&key, def_value, is_protected)?;
1133            term_map.insert(key.clone(), def);
1134
1135            if !self.term_to_id.contains_key(&key) {
1136                let id = self.next_term_id;
1137                self.next_term_id += 2;
1138                self.term_to_id.insert(key.clone(), id);
1139                if self.build_reverse_map {
1140                    self.id_to_term.insert(id, key);
1141                }
1142            }
1143        }
1144
1145        let entry = ContextEntry { context, term_map };
1146        if let Some(url) = context_url {
1147            self.context_map.insert(url, entry.clone());
1148        }
1149        Ok(entry)
1150    }
1151}
1152
1153#[derive(Clone, Debug)]
1154struct TermInfo {
1155    term: String,
1156    term_id: Value,
1157    plural: bool,
1158    def: TermDef,
1159}
1160
1161impl TermInfo {
1162    fn term_type(&self) -> Option<&str> {
1163        self.def.type_.as_deref()
1164    }
1165}
1166
1167#[derive(Clone, Debug, Default)]
1168struct TermDef {
1169    value: BTreeMap<String, Value>,
1170    id: Option<String>,
1171    type_: Option<String>,
1172    context: Option<Value>,
1173    protected: bool,
1174    propagate: bool,
1175}
1176
1177impl TermDef {
1178    fn from_context_value(term: &str, value: &Value, protected: bool) -> Result<Self> {
1179        let mut object = match value {
1180            Value::Text(id) => {
1181                let mut object = BTreeMap::new();
1182                object.insert("@id".to_owned(), Value::Text(id.clone()));
1183                object
1184            }
1185            Value::Map(_) => object_to_string_map(value)?,
1186            _ => {
1187                return Err(Error::InvalidTermDefinition(format!(
1188                    "term {term:?} must be a string or object"
1189                )));
1190            }
1191        };
1192        let id = object
1193            .get("@id")
1194            .and_then(Value::as_text)
1195            .map(str::to_owned);
1196        let type_ = object
1197            .get("@type")
1198            .and_then(Value::as_text)
1199            .map(str::to_owned);
1200        let context = object.get("@context").cloned();
1201        object.insert("protected".to_owned(), Value::Bool(protected));
1202        Ok(Self {
1203            value: object,
1204            id,
1205            type_,
1206            context,
1207            protected,
1208            propagate: true,
1209        })
1210    }
1211
1212    fn set_id(&mut self, id: String) {
1213        self.id = Some(id.clone());
1214        self.value.insert("@id".to_owned(), Value::Text(id));
1215    }
1216
1217    fn set_type(&mut self, type_: String) {
1218        self.type_ = Some(type_.clone());
1219        self.value.insert("@type".to_owned(), Value::Text(type_));
1220    }
1221}
1222
1223fn resolve_curies(
1224    active_term_map: &BTreeMap<String, TermDef>,
1225    context: &BTreeMap<String, Value>,
1226    new_term_map: &mut BTreeMap<String, TermDef>,
1227) -> Result<()> {
1228    let terms: Vec<_> = new_term_map.keys().cloned().collect();
1229    for term in terms {
1230        let def = new_term_map
1231            .get_mut(&term)
1232            .expect("term came from map keys");
1233        if let Some(id) = def.id.clone() {
1234            def.set_id(resolve_curie(active_term_map, context, &id));
1235        } else {
1236            let resolved = resolve_curie(active_term_map, context, &term);
1237            if resolved.contains(':') {
1238                def.set_id(resolved);
1239            }
1240        }
1241        if let Some(type_) = def.type_.clone() {
1242            def.set_type(resolve_curie(active_term_map, context, &type_));
1243        }
1244        if def.id.is_none() {
1245            return Err(Error::InvalidTermDefinition(format!(
1246                "the @id value for term {term:?} could not be determined"
1247            )));
1248        }
1249    }
1250    Ok(())
1251}
1252
1253fn resolve_curie(
1254    active_term_map: &BTreeMap<String, TermDef>,
1255    context: &BTreeMap<String, Value>,
1256    possible_curie: &str,
1257) -> String {
1258    let Some((prefix, suffix)) = possible_curie.split_once(':') else {
1259        return possible_curie.to_owned();
1260    };
1261
1262    let prefix_id = match context.get(prefix) {
1263        Some(Value::Text(id)) => Some(id.clone()),
1264        Some(Value::Map(_)) => object_to_string_map(context.get(prefix).expect("checked"))
1265            .ok()
1266            .and_then(|obj| obj.get("@id").and_then(Value::as_text).map(str::to_owned)),
1267        _ => active_term_map.get(prefix).and_then(|def| def.id.clone()),
1268    };
1269
1270    let Some(prefix_id) = prefix_id else {
1271        return possible_curie.to_owned();
1272    };
1273    let resolved = format!("{prefix_id}{suffix}");
1274    if resolved == possible_curie {
1275        resolved
1276    } else {
1277        resolve_curie(active_term_map, context, &resolved)
1278    }
1279}
1280
1281fn get_table_type(term_info: &TermInfo, term_type: Option<&str>) -> String {
1282    if term_info.term == "@id"
1283        || term_info.term == "@type"
1284        || term_info.def.id.as_deref() == Some("@id")
1285        || term_info.def.id.as_deref() == Some("@type")
1286        || term_type == Some("@id")
1287        || term_type == Some("@vocab")
1288    {
1289        "url".to_owned()
1290    } else {
1291        term_type.unwrap_or("none").to_owned()
1292    }
1293}
1294
1295fn type_table_encoded_as_bytes() -> BTreeSet<String> {
1296    ["none", XSD_DATE, XSD_DATETIME, "url"]
1297        .into_iter()
1298        .map(str::to_owned)
1299        .collect()
1300}
1301
1302fn bytes_from_uint(int_value: u64) -> Result<Vec<u8>> {
1303    if int_value < 0xff {
1304        Ok(vec![int_value as u8])
1305    } else if int_value < 0xffff {
1306        Ok((int_value as u16).to_be_bytes().to_vec())
1307    } else if int_value < 0xffff_ffff {
1308        Ok((int_value as u32).to_be_bytes().to_vec())
1309    } else {
1310        Ok(int_value.to_be_bytes().to_vec())
1311    }
1312}
1313
1314fn bytes_from_int(int_value: i128) -> Result<Vec<u8>> {
1315    if (-128..0x7f).contains(&int_value) {
1316        Ok((int_value as i8).to_be_bytes().to_vec())
1317    } else if (i16::MIN as i128..0x7fff).contains(&int_value) {
1318        Ok((int_value as i16).to_be_bytes().to_vec())
1319    } else if (i32::MIN as i128..0x7fff_ffff).contains(&int_value) {
1320        Ok((int_value as i32).to_be_bytes().to_vec())
1321    } else if (i64::MIN as i128..=i64::MAX as i128).contains(&int_value) {
1322        Ok((int_value as i64).to_be_bytes().to_vec())
1323    } else {
1324        Err(Error::UnsupportedValue(format!(
1325            "compression value {int_value} too large"
1326        )))
1327    }
1328}
1329
1330fn uint_from_bytes(bytes: &[u8]) -> Result<u64> {
1331    match bytes.len() {
1332        1 => Ok(bytes[0].into()),
1333        2 => Ok(u16::from_be_bytes([bytes[0], bytes[1]]).into()),
1334        4 => Ok(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]).into()),
1335        8 => Ok(u64::from_be_bytes([
1336            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
1337        ])),
1338        _ => Err(Error::UnrecognizedBytes(format!("{bytes:?}"))),
1339    }
1340}
1341
1342fn int_from_bytes(bytes: &[u8]) -> Result<i128> {
1343    match bytes.len() {
1344        1 => Ok(i8::from_be_bytes([bytes[0]]).into()),
1345        2 => Ok(i16::from_be_bytes([bytes[0], bytes[1]]).into()),
1346        4 => Ok(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]).into()),
1347        8 => Ok(i64::from_be_bytes([
1348            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
1349        ])
1350        .into()),
1351        _ => Err(Error::UnrecognizedBytes(format!("{bytes:?}"))),
1352    }
1353}
1354
1355fn encode_multibase(value: &Value) -> Option<Value> {
1356    let Value::Text(text) = value else {
1357        return None;
1358    };
1359    let (prefix, suffix) = match text.as_bytes().first().copied() {
1360        Some(b'z') => (b'z', base58_decode(&text[1..])?),
1361        Some(b'u') => (b'u', base64url_decode(&text[1..])?),
1362        Some(b'M') => (b'M', base64_decode_standard(&text[1..])?),
1363        _ => return None,
1364    };
1365    let mut bytes = Vec::with_capacity(1 + suffix.len());
1366    bytes.push(prefix);
1367    bytes.extend(suffix);
1368    Some(Value::Bytes(bytes))
1369}
1370
1371fn decode_multibase(value: &Value) -> Option<Value> {
1372    let Value::Bytes(bytes) = value else {
1373        return None;
1374    };
1375    let (&prefix, suffix) = bytes.split_first()?;
1376    let text = match prefix {
1377        b'z' => format!("z{}", base58_encode(suffix)),
1378        b'u' => format!("u{}", base64url_encode(suffix)),
1379        b'M' => format!("M{}", base64_encode_standard(suffix)),
1380        _ => return None,
1381    };
1382    Some(Value::Text(text))
1383}
1384
1385fn encode_data_url(rest: &str) -> Value {
1386    if let Some((mediatype, data)) = parse_data_base64(rest)
1387        && let Some(bytes) = base64_decode_standard(data)
1388        && base64_encode_standard(&bytes) == data
1389    {
1390        return Value::Array(vec![
1391            integer(4),
1392            Value::Text(mediatype.to_owned()),
1393            Value::Bytes(bytes),
1394        ]);
1395    }
1396    Value::Array(vec![integer(4), Value::Text(rest.to_owned())])
1397}
1398
1399fn parse_data_base64(rest: &str) -> Option<(&str, &str)> {
1400    let marker = ";base64,";
1401    let idx = rest.find(marker)?;
1402    Some((&rest[..idx], &rest[idx + marker.len()..]))
1403}
1404
1405fn decode_data_url(items: &[Value]) -> Result<Value> {
1406    match items {
1407        [_, Value::Text(rest)] => Ok(Value::Text(format!("data:{rest}"))),
1408        [_, Value::Text(mediatype), Value::Bytes(bytes)] => Ok(Value::Text(format!(
1409            "data:{mediatype};base64,{}",
1410            base64_encode_standard(bytes)
1411        ))),
1412        _ => Err(Error::UnknownCompressedValue(4)),
1413    }
1414}
1415
1416fn encode_multibase58_part(value: &str) -> Value {
1417    if let Some(rest) = value.strip_prefix('z')
1418        && let Some(bytes) = base58_decode(rest)
1419    {
1420        return Value::Bytes(bytes);
1421    }
1422    Value::Text(value.to_owned())
1423}
1424
1425fn decode_base58_did_url(scheme_id: u64, items: &[Value]) -> Result<Value> {
1426    if !(2..=3).contains(&items.len()) {
1427        return Err(Error::UnknownCompressedValue(scheme_id));
1428    }
1429    let prefix = match scheme_id {
1430        1024 => "did:v1:nym:",
1431        1025 => "did:key:",
1432        _ => return Err(Error::UnknownCompressedValue(scheme_id)),
1433    };
1434    let mut url = prefix.to_owned();
1435    url.push_str(&decode_base58_part(&items[1])?);
1436    if let Some(fragment) = items.get(2) {
1437        url.push('#');
1438        url.push_str(&decode_base58_part(fragment)?);
1439    }
1440    Ok(Value::Text(url))
1441}
1442
1443fn decode_base58_part(value: &Value) -> Result<String> {
1444    match value {
1445        Value::Text(text) => Ok(text.clone()),
1446        Value::Bytes(bytes) => Ok(format!("z{}", base58_encode(bytes))),
1447        _ => Err(Error::InvalidInput(
1448            "DID URL component must be text or bytes".to_owned(),
1449        )),
1450    }
1451}
1452
1453fn encode_xsd_date(value: &Value) -> Result<Option<Value>> {
1454    let Value::Text(text) = value else {
1455        return Ok(None);
1456    };
1457    if text.contains('T') {
1458        return Ok(None);
1459    }
1460    let Some((year, month, day)) = parse_date(text) else {
1461        return Ok(None);
1462    };
1463    let seconds = days_from_civil(year, month, day) * 86_400;
1464    if format_date_from_seconds(seconds)? != *text {
1465        return Ok(Some(Value::Text(text.clone())));
1466    }
1467    Ok(Some(integer(seconds.into())))
1468}
1469
1470fn decode_xsd_date(value: &Value) -> Result<Option<Value>> {
1471    let Some(seconds) = integer_i64(value) else {
1472        return Ok(None);
1473    };
1474    Ok(Some(Value::Text(format_date_from_seconds(seconds)?)))
1475}
1476
1477fn encode_xsd_datetime(value: &Value) -> Result<Option<Value>> {
1478    let Value::Text(text) = value else {
1479        return Ok(None);
1480    };
1481    if !text.contains('T') {
1482        return Ok(None);
1483    }
1484    let Some(parsed) = parse_datetime(text) else {
1485        return Ok(None);
1486    };
1487    let seconds = parsed.seconds;
1488    if parsed.millis == 0 && !parsed.had_millis {
1489        if format_datetime(seconds, 0, false)? != *text {
1490            return Ok(Some(Value::Text(text.clone())));
1491        }
1492        return Ok(Some(integer(seconds.into())));
1493    }
1494    if format_datetime(seconds, parsed.millis, true)? != *text {
1495        return Ok(Some(Value::Text(text.clone())));
1496    }
1497    Ok(Some(Value::Array(vec![
1498        integer(seconds.into()),
1499        integer(parsed.millis.into()),
1500    ])))
1501}
1502
1503fn decode_xsd_datetime(value: &Value) -> Result<Option<Value>> {
1504    if let Some(seconds) = integer_i64(value) {
1505        return Ok(Some(Value::Text(format_datetime(seconds, 0, false)?)));
1506    }
1507    if let Value::Array(items) = value
1508        && items.len() == 2
1509        && let (Some(seconds), Some(millis)) = (integer_i64(&items[0]), non_negative_u64(&items[1]))
1510    {
1511        return Ok(Some(Value::Text(format_datetime(
1512            seconds,
1513            u16::try_from(millis).map_err(|_| {
1514                Error::InvalidInput("dateTime millisecond value too large".to_owned())
1515            })?,
1516            true,
1517        )?)));
1518    }
1519    Ok(None)
1520}
1521
1522#[derive(Clone, Copy)]
1523struct ParsedDateTime {
1524    seconds: i64,
1525    millis: u16,
1526    had_millis: bool,
1527}
1528
1529fn parse_date(text: &str) -> Option<(i32, u32, u32)> {
1530    if text.len() != 10 {
1531        return None;
1532    }
1533    let year = text[0..4].parse().ok()?;
1534    let month = text[5..7].parse().ok()?;
1535    let day = text[8..10].parse().ok()?;
1536    if &text[4..5] != "-" || &text[7..8] != "-" {
1537        return None;
1538    }
1539    if !(1..=12).contains(&month) || !(1..=days_in_month(year, month)).contains(&day) {
1540        return None;
1541    }
1542    Some((year, month, day))
1543}
1544
1545fn parse_datetime(text: &str) -> Option<ParsedDateTime> {
1546    if !text.ends_with('Z') || text.len() < 20 {
1547        return None;
1548    }
1549    let (date, time) = text[..text.len() - 1].split_once('T')?;
1550    let (year, month, day) = parse_date(date)?;
1551    let hour: u32 = time[0..2].parse().ok()?;
1552    let minute: u32 = time[3..5].parse().ok()?;
1553    let second: u32 = time[6..8].parse().ok()?;
1554    if &time[2..3] != ":" || &time[5..6] != ":" {
1555        return None;
1556    }
1557    if hour > 23 || minute > 59 || second > 59 {
1558        return None;
1559    }
1560    let (millis, had_millis) = if time.len() == 8 {
1561        (0, false)
1562    } else if time.len() == 12 && &time[8..9] == "." {
1563        (time[9..12].parse().ok()?, true)
1564    } else {
1565        return None;
1566    };
1567    let days = days_from_civil(year, month, day);
1568    let seconds = days * 86_400 + i64::from(hour * 3600 + minute * 60 + second);
1569    Some(ParsedDateTime {
1570        seconds,
1571        millis,
1572        had_millis,
1573    })
1574}
1575
1576fn format_date_from_seconds(seconds: i64) -> Result<String> {
1577    if seconds % 86_400 != 0 {
1578        return Err(Error::InvalidInput(
1579            "xsd:date seconds are not aligned to midnight UTC".to_owned(),
1580        ));
1581    }
1582    let (year, month, day) = civil_from_days(seconds.div_euclid(86_400));
1583    Ok(format!("{year:04}-{month:02}-{day:02}"))
1584}
1585
1586fn format_datetime(seconds: i64, millis: u16, include_millis: bool) -> Result<String> {
1587    if millis > 999 {
1588        return Err(Error::InvalidInput(
1589            "dateTime millisecond value must be <= 999".to_owned(),
1590        ));
1591    }
1592    let days = seconds.div_euclid(86_400);
1593    let seconds_of_day = seconds.rem_euclid(86_400);
1594    let (year, month, day) = civil_from_days(days);
1595    let hour = seconds_of_day / 3600;
1596    let minute = (seconds_of_day % 3600) / 60;
1597    let second = seconds_of_day % 60;
1598    if include_millis {
1599        Ok(format!(
1600            "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z"
1601        ))
1602    } else {
1603        Ok(format!(
1604            "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z"
1605        ))
1606    }
1607}
1608
1609fn days_in_month(year: i32, month: u32) -> u32 {
1610    match month {
1611        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1612        4 | 6 | 9 | 11 => 30,
1613        2 if is_leap_year(year) => 29,
1614        2 => 28,
1615        _ => 0,
1616    }
1617}
1618
1619fn is_leap_year(year: i32) -> bool {
1620    (year % 4 == 0 && year % 100 != 0) || year % 400 == 0
1621}
1622
1623fn days_from_civil(year: i32, month: u32, day: u32) -> i64 {
1624    let year = year - i32::from(month <= 2);
1625    let era = if year >= 0 { year } else { year - 399 } / 400;
1626    let yoe = year - era * 400;
1627    let month = month as i32;
1628    let doy = (153 * (month + if month > 2 { -3 } else { 9 }) + 2) / 5 + day as i32 - 1;
1629    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
1630    i64::from(era * 146_097 + doe - 719_468)
1631}
1632
1633fn civil_from_days(days: i64) -> (i32, u32, u32) {
1634    let z = days + 719_468;
1635    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
1636    let doe = z - era * 146_097;
1637    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
1638    let mut year = yoe + era * 400;
1639    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
1640    let mp = (5 * doy + 2) / 153;
1641    let day = doy - (153 * mp + 2) / 5 + 1;
1642    let month = mp + if mp < 10 { 3 } else { -9 };
1643    year += i64::from(month <= 2);
1644    (year as i32, month as u32, day as u32)
1645}
1646
1647fn parse_uuid_bytes(text: &str) -> Result<Vec<u8>> {
1648    if text.len() != 36 {
1649        return Err(Error::UnsupportedValue(format!("invalid UUID {text:?}")));
1650    }
1651    for idx in [8, 13, 18, 23] {
1652        if text.as_bytes()[idx] != b'-' {
1653            return Err(Error::UnsupportedValue(format!("invalid UUID {text:?}")));
1654        }
1655    }
1656    let mut compact = String::with_capacity(32);
1657    for (idx, ch) in text.chars().enumerate() {
1658        if [8, 13, 18, 23].contains(&idx) {
1659            continue;
1660        }
1661        compact.push(ch);
1662    }
1663    let mut bytes = Vec::with_capacity(16);
1664    for idx in (0..32).step_by(2) {
1665        let byte = u8::from_str_radix(&compact[idx..idx + 2], 16)
1666            .map_err(|_| Error::UnsupportedValue(format!("invalid UUID {text:?}")))?;
1667        bytes.push(byte);
1668    }
1669    Ok(bytes)
1670}
1671
1672fn format_uuid(bytes: &[u8]) -> Result<String> {
1673    if bytes.len() != 16 {
1674        return Err(Error::InvalidInput(
1675            "compressed UUID bytes must be exactly 16 bytes".to_owned(),
1676        ));
1677    }
1678    let hex = bytes.iter().map(|b| format!("{b:02x}")).collect::<String>();
1679    Ok(format!(
1680        "{}-{}-{}-{}-{}",
1681        &hex[0..8],
1682        &hex[8..12],
1683        &hex[12..16],
1684        &hex[16..20],
1685        &hex[20..32]
1686    ))
1687}
1688
1689const BASE58_ALPHABET: &[u8; 58] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
1690
1691fn base58_decode(text: &str) -> Option<Vec<u8>> {
1692    let mut bytes = vec![0u8];
1693    for ch in text.bytes() {
1694        let value = BASE58_ALPHABET.iter().position(|&b| b == ch)? as u32;
1695        let mut carry = value;
1696        for byte in bytes.iter_mut().rev() {
1697            let x = u32::from(*byte) * 58 + carry;
1698            *byte = (x & 0xff) as u8;
1699            carry = x >> 8;
1700        }
1701        while carry > 0 {
1702            bytes.insert(0, (carry & 0xff) as u8);
1703            carry >>= 8;
1704        }
1705    }
1706    for ch in text.bytes().take_while(|&b| b == b'1') {
1707        let _ = ch;
1708        bytes.insert(0, 0);
1709    }
1710    while bytes.len() > 1 && bytes[0] == 0 && !text.starts_with('1') {
1711        bytes.remove(0);
1712    }
1713    Some(bytes)
1714}
1715
1716fn base58_encode(bytes: &[u8]) -> String {
1717    if bytes.is_empty() {
1718        return String::new();
1719    }
1720    let mut digits = vec![0u8];
1721    for byte in bytes {
1722        let mut carry = u32::from(*byte);
1723        for digit in digits.iter_mut().rev() {
1724            let x = u32::from(*digit) * 256 + carry;
1725            *digit = (x % 58) as u8;
1726            carry = x / 58;
1727        }
1728        while carry > 0 {
1729            digits.insert(0, (carry % 58) as u8);
1730            carry /= 58;
1731        }
1732    }
1733    let mut out = String::new();
1734    for _ in bytes.iter().take_while(|&&b| b == 0) {
1735        out.push('1');
1736    }
1737    for digit in digits {
1738        out.push(BASE58_ALPHABET[digit as usize] as char);
1739    }
1740    out
1741}
1742
1743const BASE64_STANDARD: &[u8; 64] =
1744    b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1745const BASE64_URL: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
1746
1747fn base64_encode_standard(bytes: &[u8]) -> String {
1748    base64_encode(bytes, BASE64_STANDARD, true)
1749}
1750
1751fn base64url_encode(bytes: &[u8]) -> String {
1752    base64_encode(bytes, BASE64_URL, false)
1753}
1754
1755fn base64_encode(bytes: &[u8], alphabet: &[u8; 64], pad: bool) -> String {
1756    let mut out = String::new();
1757    for chunk in bytes.chunks(3) {
1758        let b0 = chunk[0];
1759        let b1 = *chunk.get(1).unwrap_or(&0);
1760        let b2 = *chunk.get(2).unwrap_or(&0);
1761        let n = (u32::from(b0) << 16) | (u32::from(b1) << 8) | u32::from(b2);
1762        out.push(alphabet[((n >> 18) & 0x3f) as usize] as char);
1763        out.push(alphabet[((n >> 12) & 0x3f) as usize] as char);
1764        if chunk.len() > 1 {
1765            out.push(alphabet[((n >> 6) & 0x3f) as usize] as char);
1766        } else if pad {
1767            out.push('=');
1768        }
1769        if chunk.len() > 2 {
1770            out.push(alphabet[(n & 0x3f) as usize] as char);
1771        } else if pad {
1772            out.push('=');
1773        }
1774    }
1775    out
1776}
1777
1778fn base64_decode_standard(text: &str) -> Option<Vec<u8>> {
1779    if !text.len().is_multiple_of(4) {
1780        return None;
1781    }
1782    base64_decode(text, BASE64_STANDARD, true)
1783}
1784
1785fn base64url_decode(text: &str) -> Option<Vec<u8>> {
1786    base64_decode(text, BASE64_URL, false)
1787}
1788
1789fn base64_decode(text: &str, alphabet: &[u8; 64], padded: bool) -> Option<Vec<u8>> {
1790    let mut values = Vec::new();
1791    let mut padding = 0usize;
1792    for (idx, ch) in text.bytes().enumerate() {
1793        if ch == b'=' {
1794            if !padded {
1795                return None;
1796            }
1797            padding += 1;
1798            if idx < text.len().saturating_sub(2) {
1799                return None;
1800            }
1801            values.push(0);
1802        } else {
1803            if padding > 0 {
1804                return None;
1805            }
1806            values.push(alphabet.iter().position(|&b| b == ch)? as u8);
1807        }
1808    }
1809    if !padded {
1810        while values.len() % 4 != 0 {
1811            values.push(0);
1812            padding += 1;
1813        }
1814    }
1815    if values.len() % 4 != 0 || padding > 2 {
1816        return None;
1817    }
1818
1819    let mut out = Vec::new();
1820    for chunk in values.chunks(4) {
1821        let n = (u32::from(chunk[0]) << 18)
1822            | (u32::from(chunk[1]) << 12)
1823            | (u32::from(chunk[2]) << 6)
1824            | u32::from(chunk[3]);
1825        out.push(((n >> 16) & 0xff) as u8);
1826        out.push(((n >> 8) & 0xff) as u8);
1827        out.push((n & 0xff) as u8);
1828    }
1829    for _ in 0..padding {
1830        out.pop();
1831    }
1832    Some(out)
1833}
1834
1835fn map_get_text<'a>(value: &'a Value, key: &str) -> Option<&'a Value> {
1836    map_get(value, &Value::Text(key.to_owned()))
1837}
1838
1839fn map_get<'a>(value: &'a Value, key: &Value) -> Option<&'a Value> {
1840    let Value::Map(map) = value else {
1841        return None;
1842    };
1843    map.iter()
1844        .find_map(|(k, v)| if k == key { Some(v) } else { None })
1845}
1846
1847fn map_set(map_value: &mut Value, key: Value, value: Value) -> Result<()> {
1848    let Value::Map(map) = map_value else {
1849        return Err(Error::InvalidInput("output must be a map".to_owned()));
1850    };
1851    if let Some((_, existing)) = map.iter_mut().find(|(k, _)| k == &key) {
1852        *existing = value;
1853    } else {
1854        map.push((key, value));
1855    }
1856    Ok(())
1857}
1858
1859fn map_to_json_object(value: &Value) -> Value {
1860    match value {
1861        Value::Array(items) => Value::Array(items.iter().map(map_to_json_object).collect()),
1862        Value::Map(map) => Value::Map(
1863            map.iter()
1864                .map(|(key, value)| (map_to_json_object(key), map_to_json_object(value)))
1865                .collect(),
1866        ),
1867        other => other.clone(),
1868    }
1869}
1870
1871fn object_to_string_map(value: &Value) -> Result<BTreeMap<String, Value>> {
1872    let Value::Map(map) = value else {
1873        return Err(Error::InvalidTermDefinition(
1874            "context must be an object".to_owned(),
1875        ));
1876    };
1877    let mut out = BTreeMap::new();
1878    for (key, value) in map {
1879        let Some(key) = key.as_text() else {
1880            return Err(Error::InvalidTermDefinition(
1881                "context keys must be text strings".to_owned(),
1882            ));
1883        };
1884        out.insert(key.to_owned(), value.clone());
1885    }
1886    Ok(out)
1887}
1888
1889fn string_map_to_value(map: BTreeMap<String, Value>) -> Value {
1890    Value::Map(
1891        map.into_iter()
1892            .map(|(key, value)| (Value::Text(key), value))
1893            .collect(),
1894    )
1895}
1896
1897fn integer(value: i128) -> Value {
1898    Value::Integer(Integer::try_from(value).expect("integer value fits CBOR major type 0/1"))
1899}
1900
1901fn non_negative_u64(value: &Value) -> Option<u64> {
1902    let n = value.as_integer()?;
1903    u64::try_from(n).ok()
1904}
1905
1906fn integer_i64(value: &Value) -> Option<i64> {
1907    let n = value.as_integer()?;
1908    i64::try_from(n).ok()
1909}