Skip to main content

prikk_object/
canonical.rs

1//! Deterministic canonical encoding helpers.
2//!
3//! The encoding is a minimal TLV seed for Prikk identity bytes. Field tags are local to each
4//! payload definition. This module is intentionally small and explicit: callers must encode fields
5//! in the order defined by their payload contracts.
6
7use prikk_error::{PrikkError, Result};
8
9/// Canonical field value type — FDD-03 §7.1 `value_type`. (The Rust type keeps the
10/// historical name `WireType`; its codes and semantics are the normative
11/// §7.1 table.) The `u8` code is part of every field record and therefore part of
12/// object identity.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14#[repr(u8)]
15pub enum WireType {
16    /// Boolean as one byte, 0 or 1.
17    Bool = 0x01,
18    /// Unsigned 16-bit integer, big-endian.
19    U16 = 0x02,
20    /// Unsigned 32-bit integer, big-endian.
21    U32 = 0x03,
22    /// Unsigned 64-bit integer, big-endian.
23    U64 = 0x04,
24    /// Discriminated `enum_u16` value, big-endian.
25    EnumU16 = 0x05,
26    /// UTF-8 string bytes.
27    String = 0x10,
28    /// Opaque byte string.
29    Bytes = 0x11,
30    /// 32-byte object identifier.
31    ObjectId = 0x12,
32    /// Normalized repo-relative UTF-8 path.
33    RepoPath = 0x13,
34    /// Nested canonical record bytes.
35    Record = 0x20,
36    /// Item of a repeated record list.
37    RecordListItem = 0x21,
38}
39
40/// Trait for values that can emit Prikk canonical bytes.
41pub trait CanonicalEncode {
42    /// Encode this value into canonical bytes.
43    fn encode_canonical(&self, writer: &mut CanonicalWriter) -> Result<()>;
44
45    /// Return canonical bytes for this value.
46    fn to_canonical_bytes(&self) -> Result<Vec<u8>> {
47        let mut writer = CanonicalWriter::new();
48        self.encode_canonical(&mut writer)?;
49        Ok(writer.finish())
50    }
51}
52
53/// Canonical TLV writer.
54#[derive(Debug, Default)]
55pub struct CanonicalWriter {
56    bytes: Vec<u8>,
57    last_tag: Option<u16>,
58}
59
60impl CanonicalWriter {
61    /// Create an empty writer.
62    #[must_use]
63    pub fn new() -> Self {
64        Self::default()
65    }
66
67    /// Finish and return the canonical byte buffer.
68    #[must_use]
69    pub fn finish(self) -> Vec<u8> {
70        self.bytes
71    }
72
73    /// Emit a string field.
74    pub fn field_string(&mut self, tag: u16, value: &str) -> Result<()> {
75        self.field_raw(tag, WireType::String, value.as_bytes())
76    }
77
78    /// Emit an optional string field.
79    pub fn field_string_opt(&mut self, tag: u16, value: Option<&str>) -> Result<()> {
80        if let Some(value) = value {
81            self.field_string(tag, value)?;
82        }
83        Ok(())
84    }
85
86    /// Emit a bytes field.
87    pub fn field_bytes(&mut self, tag: u16, value: &[u8]) -> Result<()> {
88        self.field_raw(tag, WireType::Bytes, value)
89    }
90
91    /// Emit a u32 field.
92    pub fn field_u32(&mut self, tag: u16, value: u32) -> Result<()> {
93        self.field_raw(tag, WireType::U32, &value.to_be_bytes())
94    }
95
96    /// Emit a u64 field.
97    pub fn field_u64(&mut self, tag: u16, value: u64) -> Result<()> {
98        self.field_raw(tag, WireType::U64, &value.to_be_bytes())
99    }
100
101    /// Emit a bool field.
102    pub fn field_bool(&mut self, tag: u16, value: bool) -> Result<()> {
103        let encoded = if value { [1_u8] } else { [0_u8] };
104        self.field_raw(tag, WireType::Bool, &encoded)
105    }
106
107    /// Emit a u16 field.
108    pub fn field_u16(&mut self, tag: u16, value: u16) -> Result<()> {
109        self.field_raw(tag, WireType::U16, &value.to_be_bytes())
110    }
111
112    /// Emit an `enum_u16` field (discriminated enum value).
113    pub fn field_enum_u16(&mut self, tag: u16, value: u16) -> Result<()> {
114        self.field_raw(tag, WireType::EnumU16, &value.to_be_bytes())
115    }
116
117    /// Emit an object-id field (32 bytes, `value_type` `object_id`).
118    pub fn field_object_id(&mut self, tag: u16, value: &crate::ObjectId) -> Result<()> {
119        self.field_raw(tag, WireType::ObjectId, value.as_bytes())
120    }
121
122    /// Emit a repo-path field. The caller passes an already-normalized
123    /// repo-relative UTF-8 path.
124    pub fn field_repo_path(&mut self, tag: u16, value: &str) -> Result<()> {
125        self.field_raw(tag, WireType::RepoPath, value.as_bytes())
126    }
127
128    /// Emit a nested canonical record.
129    pub fn field_record<T: CanonicalEncode>(&mut self, tag: u16, value: &T) -> Result<()> {
130        let mut nested = CanonicalWriter::new();
131        value.encode_canonical(&mut nested)?;
132        self.field_raw(tag, WireType::Record, &nested.finish())
133    }
134
135    /// Emit a repeated nested record field. Each item is emitted with the same tag.
136    pub fn repeated_record<T: CanonicalEncode>(&mut self, tag: u16, values: &[T]) -> Result<()> {
137        for value in values {
138            self.field_record(tag, value)?;
139        }
140        Ok(())
141    }
142
143    /// Emit a single record-list item (`value_type` `record_list_item`).
144    pub fn field_record_list_item<T: CanonicalEncode>(
145        &mut self,
146        tag: u16,
147        value: &T,
148    ) -> Result<()> {
149        let mut nested = CanonicalWriter::new();
150        value.encode_canonical(&mut nested)?;
151        self.field_raw(tag, WireType::RecordListItem, &nested.finish())
152    }
153
154    /// Emit a repeated record list using `record_list_item` framing. Each item is
155    /// emitted with the same tag.
156    pub fn repeated_record_list<T: CanonicalEncode>(
157        &mut self,
158        tag: u16,
159        values: &[T],
160    ) -> Result<()> {
161        for value in values {
162            self.field_record_list_item(tag, value)?;
163        }
164        Ok(())
165    }
166
167    /// Emit a repeated string field. Each item is emitted with the same tag.
168    pub fn repeated_string(&mut self, tag: u16, values: &[String]) -> Result<()> {
169        for value in values {
170            self.field_string(tag, value)?;
171        }
172        Ok(())
173    }
174
175    /// Emit a repeated object-id field. Each item is emitted with the same tag,
176    /// using the `object_id` value_type (FDD-03 §7.1).
177    pub fn repeated_object_id(&mut self, tag: u16, values: &[crate::ObjectId]) -> Result<()> {
178        for value in values {
179            self.field_object_id(tag, value)?;
180        }
181        Ok(())
182    }
183
184    /// Emit a raw field.
185    pub fn field_raw(&mut self, tag: u16, wire_type: WireType, value: &[u8]) -> Result<()> {
186        if tag == 0 {
187            return Err(PrikkError::CanonicalEncoding(
188                "field tag 0 is reserved".to_string(),
189            ));
190        }
191        if let Some(last) = self.last_tag {
192            if tag < last {
193                return Err(PrikkError::CanonicalEncoding(format!(
194                    "field tag order violation: {tag} after {last}"
195                )));
196            }
197        }
198        self.last_tag = Some(tag);
199        self.bytes.extend_from_slice(&tag.to_be_bytes());
200        self.bytes.push(wire_type as u8);
201        self.bytes
202            .extend_from_slice(&(value.len() as u64).to_be_bytes());
203        self.bytes.extend_from_slice(value);
204        Ok(())
205    }
206}
207
208/// Return true if the slice is strictly sorted and contains no duplicates.
209#[must_use]
210pub fn is_strictly_sorted<T: Ord>(values: &[T]) -> bool {
211    values.windows(2).all(|pair| {
212        let mut items = pair.iter();
213        match (items.next(), items.next()) {
214            (Some(left), Some(right)) => left < right,
215            _ => true,
216        }
217    })
218}
219
220/// Return true if a sequence is exactly `1..=n`.
221#[must_use]
222pub fn is_contiguous_op_seq(values: &[u32]) -> bool {
223    values
224        .iter()
225        .enumerate()
226        .all(|(idx, value)| *value as usize == idx + 1)
227}
228
229#[cfg(test)]
230mod tests {
231    use super::CanonicalWriter;
232
233    #[test]
234    fn rejects_decreasing_tags() {
235        let mut writer = CanonicalWriter::new();
236        assert!(writer.field_u32(2, 1).is_ok());
237        assert!(writer.field_u32(1, 1).is_err());
238    }
239}