obj-db 1.0.2

Embedded document database. Stable file format, full ACID, single-file portability.
Documentation
//! Cross-language interop fixtures for the Phase 6.5 Python typed
//! ergonomic layer (issue #12).
//!
//! This test pins the postcard wire shape Rust's
//! `#[derive(obj::Document)]` produces for a known logical schema.
//! The Python-side test in `crates/obj-py/tests/test_interop.py`
//! asserts the same byte sequence so the cross-language byte-
//! identity claim has a single, machine-verifiable anchor.
//!
//! # Schema
//!
//! `Person { name: String, age: u64 }` — the simplest two-field
//! shape that exercises one variable-length type (`String`) and
//! one varint-encoded scalar (`u64`).
//!
//! For `Person { name: "Ada", age: 36 }` the postcard payload is:
//!
//! - `name` (String, varint length + UTF-8 bytes):
//!     - length 3 → `0x03`
//!     - bytes `b"Ada"` → `0x41 0x64 0x61`
//! - `age` (U64, unsigned LEB128 varint):
//!     - 36 → `0x24` (fits in 1 byte)
//!
//! Total payload: `[0x03, 0x41, 0x64, 0x61, 0x24]` (5 bytes).
//!
//! This sequence is the load-bearing byte-identity fixture. If
//! either side drifts, the cross-language interop claim breaks.

#![forbid(unsafe_code)]

use serde::{Deserialize, Serialize};

use obj::Document;
use obj_core::codec::{encode, DocumentHeader, DOC_HEADER_SIZE};

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
#[allow(clippy::module_name_repetitions)] // mirrors the Python class name
struct Person {
    name: String,
    age: u64,
}

impl Document for Person {
    const COLLECTION: &'static str = "people";
    const VERSION: u32 = 1;
}

/// The byte-identity fixture for `Person { name: "Ada", age: 36 }`
/// where `age` is `u64` on the Rust side and
/// `Annotated[int, "u64"]` on the Python side.
///
/// Bytes: `[0x03, 0x41, 0x64, 0x61, 0x24]` — varint length 3 +
/// "Ada" + unsigned varint 36.
pub const PERSON_ADA_36_POSTCARD_U64: &[u8] = &[0x03, 0x41, 0x64, 0x61, 0x24];

/// Byte-identity fixture for the I64 variant. Python's default int
/// maps to `Dynamic::I64`; this fixture pins the matching Rust
/// shape (signed i64 + zigzag varint).
///
/// Bytes: `[0x03, 0x41, 0x64, 0x61, 0x48]` — varint length 3 +
/// "Ada" + zigzag varint 36 (`zigzag(36) = 72 = 0x48`).
pub const PERSON_ADA_36_POSTCARD_I64: &[u8] = &[0x03, 0x41, 0x64, 0x61, 0x48];

/// Version-2 schema used by the header-level byte-identity test
/// (issue #13). Mirrors `Person`'s field layout so the postcard
/// payload bytes are identical to the v1 U64 fixture; only the
/// `type_version` field in the record header differs.
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
struct PersonV2 {
    name: String,
    age: u64,
}

impl Document for PersonV2 {
    /// Distinct collection name so the v2 fixture does not collide
    /// with the v1 `Person` collection used elsewhere in this test
    /// file.
    const COLLECTION: &'static str = "people_v2";
    const VERSION: u32 = 2;
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
struct PersonI64 {
    name: String,
    age: i64,
}

impl Document for PersonI64 {
    const COLLECTION: &'static str = "people_i64";
    const VERSION: u32 = 1;
}

#[test]
fn rust_derive_emits_known_postcard_bytes_u64() {
    let person = Person {
        name: "Ada".to_owned(),
        age: 36,
    };
    let bytes = postcard::to_allocvec(&person).expect("postcard encode");
    assert_eq!(
        bytes, PERSON_ADA_36_POSTCARD_U64,
        "Rust U64 postcard payload drifted from the cross-language fixture"
    );
}

#[test]
fn rust_derive_emits_known_postcard_bytes_i64() {
    let person = PersonI64 {
        name: "Ada".to_owned(),
        age: 36,
    };
    let bytes = postcard::to_allocvec(&person).expect("postcard encode");
    assert_eq!(
        bytes, PERSON_ADA_36_POSTCARD_I64,
        "Rust I64 postcard payload drifted from the cross-language fixture"
    );
}

#[test]
fn rust_derive_round_trips() {
    let person = Person {
        name: "Ada".to_owned(),
        age: 36,
    };
    let bytes = postcard::to_allocvec(&person).expect("encode");
    let back: Person = postcard::from_bytes(&bytes).expect("decode");
    assert_eq!(back, person);
}

#[test]
fn db_write_then_read_round_trip() {
    // End-to-end: open a fresh DB, insert a Person via the typed
    // API, read it back, verify equality.
    let tmpdir = tempfile::tempdir().expect("tempdir");
    let path = tmpdir.path().join("interop.obj");
    let db = obj::Db::open(&path).expect("open");
    let id = db
        .insert(Person {
            name: "Ada".to_owned(),
            age: 36,
        })
        .expect("insert");
    let got: Person = db.get(id).expect("get").expect("present");
    assert_eq!(
        got,
        Person {
            name: "Ada".to_owned(),
            age: 36,
        }
    );
}

/// Compute the expected on-disk record bytes (header + payload) for
/// `PersonV2 { name: "Ada", age: 36 }` written into a fresh
/// collection.
///
/// `collection_id` is `1` because the catalog allocates ids from
/// `1` upwards and the test inserts into a single fresh collection
/// per DB (issue #13 — header-level byte-identity fixture).
fn person_v2_expected_record_bytes() -> Vec<u8> {
    let doc = PersonV2 {
        name: "Ada".to_owned(),
        age: 36,
    };
    // `encode` is the codec entry point Rust `#[derive(Document)]`
    // writes go through; it assembles `header + payload` in one
    // allocation. This is the exact byte sequence the Python typed
    // path must produce for a `@document(version=2)` write into the
    // same logical schema.
    encode(&doc, 1).expect("codec encode")
}

#[test]
fn rust_v2_record_header_bytes_are_pinned() {
    // The header is 16 bytes laid out as four little-endian u32s.
    // For PersonV2 at collection_id=1, version=2, payload =
    // PERSON_ADA_36_POSTCARD_U64 (5 bytes), the header fields are:
    //   collection_id  : 0x00000001 LE -> 01 00 00 00
    //   type_version   : 0x00000002 LE -> 02 00 00 00
    //   payload_len    : 0x00000005 LE -> 05 00 00 00
    //   payload_crc32c : crc32c(payload) LE
    let bytes = person_v2_expected_record_bytes();
    assert_eq!(
        bytes.len(),
        DOC_HEADER_SIZE + PERSON_ADA_36_POSTCARD_U64.len(),
        "expected record = 16-byte header + 5-byte payload"
    );
    // collection_id
    assert_eq!(&bytes[0..4], &[0x01, 0x00, 0x00, 0x00]);
    // type_version
    assert_eq!(
        &bytes[4..8],
        &[0x02, 0x00, 0x00, 0x00],
        "type_version field must carry T::VERSION (=2) in LE u32"
    );
    // payload_len
    assert_eq!(&bytes[8..12], &[0x05, 0x00, 0x00, 0x00]);
    // payload tail
    assert_eq!(&bytes[DOC_HEADER_SIZE..], PERSON_ADA_36_POSTCARD_U64);
    // Re-decode the header through DocumentHeader::read_from for a
    // single-call sanity check.
    let header = DocumentHeader::read_from(&bytes).expect("header decode");
    assert_eq!(header.collection_id, 1);
    assert_eq!(header.type_version, 2);
    assert_eq!(header.payload_len, 5);
    // Surface the actual hex bytes when `--nocapture` is set so a
    // future maintainer can grep for the load-bearing fixture
    // without re-deriving it.
    eprintln!(
        "PersonV2 v2 record bytes (hex): {}",
        bytes
            .iter()
            .map(|b| format!("{b:02x}"))
            .collect::<Vec<_>>()
            .join(" ")
    );
}

#[test]
fn rust_v2_engine_write_matches_codec_encode() {
    // End-to-end: write a PersonV2 via the typed Db API into a fresh
    // DB and verify the on-disk record bytes are byte-identical to
    // `encode(&doc, 1)`. This pins the writer pipeline against the
    // codec contract — `WriteTxn::collection::<T>().insert(doc)`
    // and `obj_core::codec::encode(&doc, collection_id)` must
    // produce the same bytes for the same logical document.
    let tmpdir = tempfile::tempdir().expect("tempdir");
    let path = tmpdir.path().join("v2_record.obj");
    let db = obj::Db::open(&path).expect("open");
    let _ = db
        .insert(PersonV2 {
            name: "Ada".to_owned(),
            age: 36,
        })
        .expect("insert");
    // Use the typed `dump_raw` API to retrieve the raw record bytes
    // back from disk (header + payload) and assert byte-identity.
    let dump = db.dump_raw("people_v2", 0).expect("dump_raw");
    let records: Vec<_> = dump.map(|step| step.expect("dump step")).collect();
    assert_eq!(records.len(), 1, "expected exactly one record");
    let record = &records[0];
    assert_eq!(record.header.collection_id, 1);
    assert_eq!(record.header.type_version, 2);
    assert_eq!(record.header.payload_len, 5);
    assert_eq!(record.payload.as_slice(), PERSON_ADA_36_POSTCARD_U64);
    // Reassemble the on-disk record bytes (header + payload) and
    // assert against `encode`. The two paths share
    // `wrap_raw_payload_with_version` semantics on the engine side
    // and `codec::encode` semantics on the codec side; this test
    // pins them to the same bytes.
    let mut reassembled = Vec::with_capacity(DOC_HEADER_SIZE + record.payload.len());
    record.header.write_to(&mut reassembled);
    reassembled.extend_from_slice(&record.payload);
    let expected = person_v2_expected_record_bytes();
    assert_eq!(
        reassembled, expected,
        "Rust-derive on-disk record drifted from codec::encode output"
    );
}