Skip to main content

prikk_object/
canonical.rs

1//! Deterministic canonical encoding helpers.
2//!
3//! The encoding is a minimal TLV seed for Prikk identity bytes. Field tags are local to each
4//! payload definition. This module is intentionally small and explicit: callers must encode fields
5//! in the order defined by their payload contracts.
6
7use prikk_error::{PrikkError, Result};
8
9/// Wire type code for canonical field values.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11#[repr(u8)]
12pub enum WireType {
13    /// UTF-8 string bytes.
14    String = 1,
15    /// Opaque byte string.
16    Bytes = 2,
17    /// Unsigned 32-bit integer, big-endian.
18    U32 = 3,
19    /// Unsigned 64-bit integer, big-endian.
20    U64 = 4,
21    /// Boolean as one byte, 0 or 1.
22    Bool = 5,
23    /// Nested canonical record bytes.
24    Record = 6,
25}
26
27/// Trait for values that can emit Prikk canonical bytes.
28pub trait CanonicalEncode {
29    /// Encode this value into canonical bytes.
30    fn encode_canonical(&self, writer: &mut CanonicalWriter) -> Result<()>;
31
32    /// Return canonical bytes for this value.
33    fn to_canonical_bytes(&self) -> Result<Vec<u8>> {
34        let mut writer = CanonicalWriter::new();
35        self.encode_canonical(&mut writer)?;
36        Ok(writer.finish())
37    }
38}
39
40/// Canonical TLV writer.
41#[derive(Debug, Default)]
42pub struct CanonicalWriter {
43    bytes: Vec<u8>,
44    last_tag: Option<u16>,
45}
46
47impl CanonicalWriter {
48    /// Create an empty writer.
49    #[must_use]
50    pub fn new() -> Self {
51        Self::default()
52    }
53
54    /// Finish and return the canonical byte buffer.
55    #[must_use]
56    pub fn finish(self) -> Vec<u8> {
57        self.bytes
58    }
59
60    /// Emit a string field.
61    pub fn field_string(&mut self, tag: u16, value: &str) -> Result<()> {
62        self.field_raw(tag, WireType::String, value.as_bytes())
63    }
64
65    /// Emit an optional string field.
66    pub fn field_string_opt(&mut self, tag: u16, value: Option<&str>) -> Result<()> {
67        if let Some(value) = value {
68            self.field_string(tag, value)?;
69        }
70        Ok(())
71    }
72
73    /// Emit a bytes field.
74    pub fn field_bytes(&mut self, tag: u16, value: &[u8]) -> Result<()> {
75        self.field_raw(tag, WireType::Bytes, value)
76    }
77
78    /// Emit a u32 field.
79    pub fn field_u32(&mut self, tag: u16, value: u32) -> Result<()> {
80        self.field_raw(tag, WireType::U32, &value.to_be_bytes())
81    }
82
83    /// Emit a u64 field.
84    pub fn field_u64(&mut self, tag: u16, value: u64) -> Result<()> {
85        self.field_raw(tag, WireType::U64, &value.to_be_bytes())
86    }
87
88    /// Emit a bool field.
89    pub fn field_bool(&mut self, tag: u16, value: bool) -> Result<()> {
90        let encoded = if value { [1_u8] } else { [0_u8] };
91        self.field_raw(tag, WireType::Bool, &encoded)
92    }
93
94    /// Emit a nested canonical record.
95    pub fn field_record<T: CanonicalEncode>(&mut self, tag: u16, value: &T) -> Result<()> {
96        let mut nested = CanonicalWriter::new();
97        value.encode_canonical(&mut nested)?;
98        self.field_raw(tag, WireType::Record, &nested.finish())
99    }
100
101    /// Emit a repeated nested record field. Each item is emitted with the same tag.
102    pub fn repeated_record<T: CanonicalEncode>(&mut self, tag: u16, values: &[T]) -> Result<()> {
103        for value in values {
104            self.field_record(tag, value)?;
105        }
106        Ok(())
107    }
108
109    /// Emit a repeated string field. Each item is emitted with the same tag.
110    pub fn repeated_string(&mut self, tag: u16, values: &[String]) -> Result<()> {
111        for value in values {
112            self.field_string(tag, value)?;
113        }
114        Ok(())
115    }
116
117    /// Emit a repeated object-id field. Each item is emitted with the same tag.
118    pub fn repeated_object_id(&mut self, tag: u16, values: &[crate::ObjectId]) -> Result<()> {
119        for value in values {
120            self.field_bytes(tag, value.as_bytes())?;
121        }
122        Ok(())
123    }
124
125    /// Emit a raw field.
126    pub fn field_raw(&mut self, tag: u16, wire_type: WireType, value: &[u8]) -> Result<()> {
127        if tag == 0 {
128            return Err(PrikkError::CanonicalEncoding(
129                "field tag 0 is reserved".to_string(),
130            ));
131        }
132        if let Some(last) = self.last_tag {
133            if tag < last {
134                return Err(PrikkError::CanonicalEncoding(format!(
135                    "field tag order violation: {tag} after {last}"
136                )));
137            }
138        }
139        self.last_tag = Some(tag);
140        self.bytes.extend_from_slice(&tag.to_be_bytes());
141        self.bytes.push(wire_type as u8);
142        self.bytes
143            .extend_from_slice(&(value.len() as u64).to_be_bytes());
144        self.bytes.extend_from_slice(value);
145        Ok(())
146    }
147}
148
149/// Return true if the slice is strictly sorted and contains no duplicates.
150#[must_use]
151pub fn is_strictly_sorted<T: Ord>(values: &[T]) -> bool {
152    values.windows(2).all(|pair| {
153        let mut items = pair.iter();
154        match (items.next(), items.next()) {
155            (Some(left), Some(right)) => left < right,
156            _ => true,
157        }
158    })
159}
160
161/// Return true if a sequence is exactly `1..=n`.
162#[must_use]
163pub fn is_contiguous_op_seq(values: &[u32]) -> bool {
164    values
165        .iter()
166        .enumerate()
167        .all(|(idx, value)| *value as usize == idx + 1)
168}
169
170#[cfg(test)]
171mod tests {
172    use super::CanonicalWriter;
173
174    #[test]
175    fn rejects_decreasing_tags() {
176        let mut writer = CanonicalWriter::new();
177        assert!(writer.field_u32(2, 1).is_ok());
178        assert!(writer.field_u32(1, 1).is_err());
179    }
180}