git_bug/replica/entity/operation/
mod.rs

1// git-bug-rs - A rust library for interfacing with git-bug repositories
2//
3// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
4// SPDX-License-Identifier: GPL-3.0-or-later
5//
6// This file is part of git-bug-rs/git-gub.
7//
8// You should have received a copy of the License along with this program.
9// If not, see <https://www.gnu.org/licenses/agpl.txt>.
10
11//! A generic representation of an operation. This contains the data, that every
12//! operation needs to contain (i.e., author, nonce, etc.).
13
14use std::fmt::Display;
15
16use serde::{Deserialize, Serialize};
17use sha2::{Digest, Sha256};
18use simd_json::{
19    borrowed::{self, Value},
20    derived::{ValueTryAsScalar, ValueTryIntoObject, ValueTryIntoString},
21    owned,
22    value::prelude::base::Writable,
23};
24
25use super::{
26    Entity,
27    id::{Id, entity_id::EntityId},
28    identity::IdentityStub,
29    nonce::Nonce,
30    timestamp::TimeStamp,
31};
32use crate::replica::entity::operation::operation_data::OperationData;
33
34pub mod operation_data;
35pub mod operation_pack;
36pub mod operations;
37
38/// An collection of the shared data, every Operation track, and the specific
39/// Operation data needed for an [`Entity's`][`Entity`] Operation.
40// As explained in the toplevel doc comment, this Derive is only a implementation detail.
41#[allow(clippy::unsafe_derive_deserialize)]
42#[derive(Debug, Deserialize, Serialize)]
43pub struct Operation<E: Entity> {
44    pub(super) author: IdentityStub,
45    pub(super) creation_time: TimeStamp,
46    // Use a vec here, so that we can keep the order of insertion
47    pub(super) metadata: Option<Vec<(String, String)>>,
48
49    /// Mandatory random bytes to ensure a uniqueness of the data used to later
50    /// generate the ID.
51    ///
52    /// It has no functional purpose and should be ignored.
53    nonce: Nonce,
54
55    /// Always set instead of calculated on the fly, to allow them to be cached in the disk cache.
56    #[serde(bound = "EntityId<E>: serde::Serialize + serde::de::DeserializeOwned")]
57    id: EntityId<E>,
58
59    #[serde(bound = "E::OperationData: serde::Serialize + serde::de::DeserializeOwned")]
60    pub(super) data: E::OperationData,
61}
62
63impl<E: Entity> Display for Operation<E> {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        <Self as std::fmt::Debug>::fmt(self, f)
66    }
67}
68
69impl<E: Entity> Operation<E> {
70    /// Return the author of this [`Operation`].
71    pub fn author(&self) -> IdentityStub {
72        self.author
73    }
74
75    /// Return the operation data of this [`Operation`].
76    pub fn operation_data(&self) -> &E::OperationData {
77        &self.data
78    }
79
80    /// Return the Unix time stamp of this [`Operations`][`Operation`] creation.
81    pub fn creation_time(&self) -> TimeStamp {
82        self.creation_time
83    }
84
85    /// Return the metadata of this [`Operation`].
86    pub fn metadata(&self) -> impl Iterator<Item = &(String, String)> {
87        self.metadata.iter().flat_map(|a| a.iter())
88    }
89
90    /// Encodes this Operation to it's JSON value.
91    pub fn as_value(&self) -> borrowed::Object<'_> {
92        Self::as_value_parts(
93            &self.data,
94            // Safety:
95            // Only used for storage, we are not trusting it.
96            unsafe { self.creation_time.to_unsafe() }.value,
97            self.nonce,
98            self.metadata.as_ref(),
99        )
100    }
101
102    fn as_value_parts<'a>(
103        data: &'a E::OperationData,
104        creation_time: u64,
105        nonce: Nonce,
106        metadata: Option<&'a Vec<(String, String)>>,
107    ) -> borrowed::Object<'a> {
108        // HACK(@bpeetz): This function preserves order, only because the underlying halfbrown,
109        // type keeps the order for the first 32 elements (they use a vec, for performance
110        // reasons.)
111        // See [1] for a real solution. <2025-05-28>
112        //
113        // [1]: https://github.com/simd-lite/simd-json/issues/378
114
115        let mut object = borrowed::Object::new();
116
117        // Safety:
118        //  This hashmap is new. As such, no duplicated keys should be inserted.
119        unsafe {
120            object.insert_nocheck("type".into(), data.to_json_type().into());
121            object.insert_nocheck("timestamp".into(), creation_time.into());
122            object.insert_nocheck("nonce".into(), Into::<String>::into(nonce).into());
123        }
124
125        if let Some(meta) = metadata {
126            let mut metadata = borrowed::Object::new();
127
128            for (k, v) in meta {
129                assert_eq!(
130                    metadata.insert(k.into(), v.as_str().into()),
131                    None,
132                    "No duplicate name expected"
133                );
134            }
135
136            unsafe {
137                // Safety:
138                //  This key was not inserted before.
139                object.insert_nocheck("metadata".into(), metadata.into());
140            }
141        }
142
143        for (k, v) in data.as_value() {
144            assert_eq!(object.insert(k, v), None, "No duplicate name expected");
145        }
146
147        object
148    }
149
150    /// Parses this Operation from an JSON value.
151    ///
152    /// # Errors
153    /// If the value does not conform to the expected JSON representation.
154    pub fn from_value(raw: owned::Value, author: IdentityStub) -> Result<Self, decode::Error> {
155        {
156            struct BaseOp {
157                r#type: u64,
158                timestamp: u64,
159                nonce: Nonce,
160                metadata: Option<Vec<(String, String)>>,
161            }
162
163            let base_op: BaseOp = {
164                use crate::replica::entity::operation::operation_data::get;
165
166                let mut object = raw.clone().try_into_object()?;
167
168                let r#type = get! {object, "type", try_as_u64, decode::Error};
169                let timestamp = get! {object, "timestamp", try_as_u64, decode::Error};
170                let nonce =
171                    Nonce::try_from(get! {object, "nonce", try_into_string, decode::Error})?;
172                let metadata = get! {@option[next] object, "metadata", |some: owned::Value| {
173                    let object = some.try_into_object()?;
174
175                    Ok::<_, decode::Error>(
176                        Some(get! {@mk_map object, try_into_string, decode::Error}))
177                }, read::Error};
178
179                BaseOp {
180                    r#type,
181                    timestamp,
182                    nonce,
183                    metadata,
184                }
185            };
186
187            let operation_data =
188                E::OperationData::from_value(raw, base_op.r#type).map_err(|err| {
189                    // FIXME(@bpeetz): Use the actual error instead of this string. <2025-04-19>
190                    decode::Error::DateDecode(err.to_string())
191                })?;
192
193            // Calculate the id eagerly, so that we can cache it.
194            let id = {
195                /// Escape html sequences in JSON.
196                ///
197                /// # Note
198                /// This follows the go JSON package:
199                /// <https://pkg.go.dev/encoding/json#Marshal>.
200                ///
201                /// We need this, as git-bug's go json package does this, and we have to keep
202                /// bit-to-bit reproducibility.
203                fn html_escape(input: &str) -> String {
204                    let mut output = String::new();
205
206                    for ch in input.chars() {
207                        let next = match ch {
208                            '<' => &['\\', 'u', '0', '0', '3', 'c'][..],
209                            '>' => &['\\', 'u', '0', '0', '3', 'e'][..],
210                            '&' => &['\\', 'u', '0', '0', '2', '6'][..],
211                            '\u{2028}' => &['\\', 'u', '2', '0', '2', '8'][..],
212                            '\u{2029}' => &['\\', 'u', '2', '0', '2', '9'][..],
213                            _ => &[ch][..],
214                        };
215
216                        for ch in next {
217                            output.push(*ch);
218                        }
219                    }
220
221                    output
222                }
223
224                let mut hasher = Sha256::new();
225
226                let object = Value::Object(Box::new(Self::as_value_parts(
227                    &operation_data,
228                    base_op.timestamp,
229                    base_op.nonce,
230                    base_op.metadata.as_ref(),
231                )));
232
233                // NOTE(@bpeetz): We cannot escape the string before running
234                // it through [`encode`], as our escapes would otherwise be escaped again.
235                // Thus, this is the best way to ensure that they are actually
236                // meaningful. <2025-05-30>
237                let str_escaped = { html_escape(&object.encode()) };
238
239                hasher.update(str_escaped);
240                let result = hasher.finalize();
241                let id = Id::from_sha256_hash(&result);
242
243                unsafe {
244                    // Safety:
245                    // We have just decoded the id and must just hope that it matches.
246                    EntityId::from_id(id)
247                }
248            };
249
250            Ok(Self {
251                author,
252                creation_time: TimeStamp::from(base_op.timestamp),
253                metadata: base_op.metadata,
254                nonce: base_op.nonce,
255                data: operation_data,
256                id,
257            })
258        }
259    }
260
261    /// Return the ID of this operation.
262    ///
263    /// This would first serialize the [`Operation`] to it's JSON encoding and then
264    /// calculate the sha256 hash of the resulting string.
265    pub fn id(&self) -> EntityId<E> {
266        self.id
267    }
268}
269
270#[allow(missing_docs)]
271pub mod decode {
272    /// The Error returned by
273    /// [`Operation::from_value`][`super::Operation::from_value`].
274    #[derive(Debug, thiserror::Error)]
275    pub enum Error {
276        #[error("Failed to read this operation's specific data: {0}")]
277        DateDecode(String),
278
279        #[error("Expected the value to be object: {0}")]
280        ValueNotObject(#[from] simd_json::TryTypeError),
281
282        #[error("Object was missing the '{field}' field")]
283        MissingJsonField { field: &'static str },
284
285        #[error("Expected the '{field}' field to be a certain type, but was it not: {err}.")]
286        WrongJsonType {
287            err: simd_json::TryTypeError,
288            field: &'static str,
289        },
290
291        #[error("Failed to decode the Nonce as base64: {0}")]
292        NonceParse(#[from] base64::DecodeSliceError),
293    }
294}
295
296#[cfg(test)]
297mod test {
298    use simd_json::prelude::Writable;
299
300    use super::Operation;
301    use crate::{
302        entities::issue::{Issue, issue_operation::IssueOperationData},
303        replica::entity::{
304            id::{Id, entity_id::EntityId},
305            identity::IdentityStub,
306            nonce::Nonce,
307            timestamp::TimeStamp,
308        },
309    };
310
311    /// Send an operation through a round-trip.
312    fn roundtrip(start: &Operation<Issue>) -> Operation<Issue> {
313        let mut string: String =
314            simd_json::borrowed::Value::Object(Box::new(start.as_value())).encode();
315        eprintln!("Encoded: {string}");
316
317        let end = Operation::<Issue>::from_value(
318            simd_json::to_owned_value(unsafe { string.as_bytes_mut() }).unwrap(),
319            start.author,
320        )
321        .unwrap();
322
323        end
324    }
325
326    /// Assert, that both operations are equal, while taking things
327    /// like the unsafe timestamp into account.
328    fn assert_equal(start: &Operation<Issue>, end: &Operation<Issue>) {
329        assert_eq!(start.author, end.author);
330        assert_eq!(unsafe { start.creation_time.to_unsafe() }, unsafe {
331            end.creation_time.to_unsafe()
332        });
333        assert_eq!(start.metadata, end.metadata);
334        assert_eq!(start.nonce, end.nonce);
335        assert_eq!(start.id, end.id);
336        assert_eq!(start.data, end.data);
337    }
338
339    #[test]
340    fn operation_round_trip_simple() {
341        let start = Operation::<Issue> {
342            author: IdentityStub {
343                id: unsafe {
344                    EntityId::from_id(
345                        Id::from_hex(
346                            b"1df6ca7c48f3e061c9659887a651e02154307c18d56607a50828280255415e21",
347                        )
348                        .unwrap(),
349                    )
350                },
351            },
352            creation_time: TimeStamp::from(1_745_068_324),
353            metadata: None,
354            nonce: Nonce::try_from("YdUYiTWowuc/QkH3hKK3ewjqi1s=").unwrap(),
355            id: unsafe {
356                EntityId::from_id(
357                    Id::from_hex(
358                        b"ff28595c4f5236549cab1cfc7fd7c42b7c37352a8a59a70e3b0b4a82b821c735",
359                    )
360                    .unwrap(),
361                )
362            },
363            data: IssueOperationData::Create {
364                title: "test 73".to_owned(),
365                message: "test1".to_owned(),
366                files: vec![],
367            },
368        };
369
370        let end = roundtrip(&start);
371
372        assert_equal(&start, &end);
373    }
374
375    #[test]
376    fn operation_round_trip_html_triggers() {
377        let start = Operation::<Issue> {
378            author: IdentityStub {
379                id: unsafe {
380                    EntityId::from_id(
381                        Id::from_hex(
382                            b"1df6ca7c48f3e061c9659887a651e02154307c18d56607a50828280255415e21",
383                        )
384                        .unwrap(),
385                    )
386                },
387            },
388            creation_time: TimeStamp::from(1_748_601_272),
389            metadata: None,
390            nonce: Nonce::try_from("YZjlOqrXSFy/OZiAJS3y5CrBxgg=").unwrap(),
391            id: unsafe {
392                EntityId::from_id(
393                    Id::from_hex(
394                        b"dc872211c65d3fb533d0d303b658261b5b0ed287ba728d305d0014e1b19ac027",
395                    )
396                    .unwrap(),
397                )
398            },
399            data: IssueOperationData::Create {
400                title: "<>".to_owned(),
401                message: String::new(),
402                files: vec![],
403            },
404        };
405
406        let end = roundtrip(&start);
407
408        assert_equal(&start, &end);
409    }
410
411    #[test]
412    fn operation_round_trip_long() {
413        let start = Operation::<Issue> {
414            author: IdentityStub {
415                id: unsafe {
416                    EntityId::from_id(
417                        Id::from_hex(
418                            b"7f24a6ff7ee2ed2c60904026359f0f4818e6466ccb0582fedd8eaa04edabbdd5",
419                        )
420                        .unwrap(),
421                    )
422                },
423            },
424            creation_time: TimeStamp::from(1_537_546_348),
425            metadata: Some(vec![
426                (
427                    "github-id".to_owned(),
428                    "MDU6SXNzdWUzNjI2ODM2Mzk=".to_owned(),
429                ),
430                (
431                    "github-url".to_owned(),
432                    "https://github.com/rust-lang/rust/issues/54437".to_owned(),
433                ),
434                ("origin".to_owned(), "github".to_owned()),
435            ]),
436            nonce: Nonce::try_from("Q2M2mXgBZaBQKKUnS5QBxky00P8=").unwrap(),
437            id: unsafe {
438                EntityId::from_id(
439                    Id::from_hex(
440                        b"b8f4a62333974e95eb69e6956d07e799662acefd28b00ab83fcd72d1ef6522eb",
441                    )
442                    .unwrap(),
443                )
444            },
445            data: IssueOperationData::Create {
446                title: "In beta 1.30.0-beta.2, `cargo test` runs rustdoc with \
447                        `-Zunstable-options`, which errors"
448                    .to_owned(),
449                message: "In beta 1.30.0-beta.2, `cargo test` runs `rustdoc -Zunstable-options \
450                          --edition=2018 ...` which errors out with \n\n> error: the option `Z` \
451                          is only accepted on the nightly compiler\n\nUsing `rustdoc \
452                          --edition=2018 ...`, omitting the `-Zunstable-options` flag, works as \
453                          expected.\n\n## Meta\n```\ncargo --version --verbose\ncargo 1.30.0-beta \
454                          (308b1eabd 2018-09-19)\nrelease: 1.30.0\ncommit-hash: \
455                          308b1eabd6195812b91d646a0292224bb014b449\ncommit-date: \
456                          2018-09-19\n\nrustdoc --version --verbose\nrustdoc 1.30.0-beta.2 \
457                          (7a0062e46 2018-09-19)\nbinary: rustdoc\ncommit-hash: \
458                          7a0062e46844def0edcd86da1abafafd9cdbbeaf\ncommit-date: \
459                          2018-09-19\nhost: x86_64-apple-darwin\nrelease: 1.30.0-beta.2\nLLVM \
460                          version: 8.0\n```"
461                    .to_owned(),
462                files: vec![],
463            },
464        };
465
466        let end = roundtrip(&start);
467
468        assert_equal(&start, &end);
469    }
470}