obj-cli 1.0.0 - Docs.rs

//! `obj dump <path> --collection <name>` — type-erased record walk.
//!
//! Prints one record per doc. The default `--format header` prints
//! the four header fields (collection id, type version, payload
//! length, payload CRC32C) + the primary id. `--format hex`
//! appends the payload bytes in hex (one block per record).
//!
//! No schema-aware decode is attempted; the CLI does not have
//! access to user-defined `Document` impls. See `--help` for the
//! documented limitation.
//!
//! Exit codes: 0 on success, 2 on missing collection / I/O error.

use std::path::Path;

use obj::{Db, DumpRecord};

use crate::DumpFormat;

/// Run `obj dump` and return the process exit code.
///
/// Power-of-ten Rule 4 — the body stays under 60 lines by
/// deferring per-record formatting to the helpers below.
pub(crate) fn run(path: &Path, collection: &str, limit: usize, format: DumpFormat) -> i32 {
    let db = match Db::open(path) {
        Ok(db) => db,
        Err(err) => {
            eprintln!("error: failed to open {}: {err}", path.display());
            return 2;
        }
    };
    let iter = match db.dump_raw(collection, limit) {
        Ok(it) => it,
        Err(err) => {
            eprintln!("error: dump failed: {err}");
            return 2;
        }
    };
    println!("## {collection}");
    println!("limit: {}", if limit == 0 { "none" } else { "set" });
    println!();
    let mut printed: u64 = 0;
    for step in iter {
        match step {
            Ok(record) => {
                print_record(&record, format);
                printed = printed.saturating_add(1);
            }
            Err(err) => {
                eprintln!("error: per-doc decode failure: {err}");
                return 2;
            }
        }
    }
    println!("# emitted: {printed}");
    0
}

/// Print one record. Power-of-ten Rule 4 keeps the per-record
/// printer short — it is the hot path for large dumps.
fn print_record(record: &DumpRecord, format: DumpFormat) {
    println!("- id: {}", record.id.get());
    println!("  collection_id: {}", record.header.collection_id);
    println!("  type_version: {}", record.header.type_version);
    println!("  payload_len: {}", record.header.payload_len);
    println!("  payload_crc32c: {:#010x}", record.header.payload_crc32c);
    if matches!(format, DumpFormat::Hex) {
        println!("  payload_hex: {}", encode_hex(&record.payload));
    }
}

/// Lowercase hex encoding of `bytes`. No `0x` prefix; each input
/// byte becomes two output characters. Power-of-ten Rule 3: the
/// output `Vec<u8>` is pre-sized so a single allocation suffices.
/// Rule 7: avoids `fmt::Write::write_fmt` and its `Result` return
/// type entirely by indexing into a static digit table.
fn encode_hex(bytes: &[u8]) -> String {
    const HEX_DIGITS: &[u8; 16] = b"0123456789abcdef";
    let mut out: Vec<u8> = Vec::with_capacity(bytes.len().saturating_mul(2));
    for b in bytes {
        out.push(HEX_DIGITS[usize::from(b >> 4)]);
        out.push(HEX_DIGITS[usize::from(b & 0x0F)]);
    }
    // SAFETY-equivalent: every pushed byte is from the ASCII
    // `HEX_DIGITS` table; the output is valid UTF-8 by
    // construction. `from_utf8` is the safe API; `unwrap_or_default`
    // gives an empty string on the unreachable path (Rule 7
    // discipline — no `.unwrap()` on the production path).
    String::from_utf8(out).unwrap_or_default()
}