fmtview 0.4.2

Fast CLI viewer for highlighting, search, and diffs across JSON, JSONL, markup, Markdown, TOML, text, and Jinja
Documentation
use std::{io::Write, time::Instant};

use tempfile::NamedTempFile;

use crate::{
    input::InputSource,
    transform::{FormatKind, FormatOptions, format_record_to_string, format_source_to_temp},
};

use super::{
    fixtures::{
        HUGE_STRING_FRAGMENT, generated_huge_object_array_record,
        generated_huge_string_field_record, generated_json_document_source,
        generated_jsonl_records, generated_xml_document_source,
    },
    runner::{BenchCase, BenchSample},
};

pub(super) const CASES: &[BenchCase] = &[
    BenchCase {
        label: "jsonl record batch CPU",
        shape: "record-stream",
        layer: "transform",
        run: bench_jsonl_record_batch_format,
    },
    BenchCase {
        label: "jsonl source full format",
        shape: "record-stream",
        layer: "transform+write",
        run: bench_jsonl_source_full_format,
    },
    BenchCase {
        label: "single huge object-array record format",
        shape: "record-stream/huge-record",
        layer: "transform",
        run: bench_single_huge_object_array_record_format,
    },
    BenchCase {
        label: "single huge string field record format",
        shape: "record-stream/huge-record",
        layer: "transform",
        run: bench_single_huge_string_field_record_format,
    },
    BenchCase {
        label: "json whole-document format",
        shape: "whole-document",
        layer: "transform",
        run: bench_json_whole_document_format,
    },
    BenchCase {
        label: "xml whole-document format",
        shape: "whole-document",
        layer: "transform",
        run: bench_xml_whole_document_format,
    },
];
fn bench_jsonl_record_batch_format() -> BenchSample {
    let records = generated_jsonl_records(16_384, 512);
    let input_bytes = records.iter().map(Vec::len).sum::<usize>();
    let started = Instant::now();
    let mut output_bytes = 0_usize;

    for record in &records {
        let rendered = format_record_to_string(record, FormatKind::Jsonl, 2).unwrap();
        output_bytes = output_bytes.saturating_add(rendered.len());
    }

    let elapsed = started.elapsed();
    assert!(output_bytes > input_bytes);
    BenchSample {
        elapsed,
        records: records.len(),
        items: 0,
        string_bytes: 0,
        lines: 0,
        indexed_lines: 0,
        window_lines: 0,
        input_bytes,
        output_bytes,
    }
}

fn bench_jsonl_source_full_format() -> BenchSample {
    let mut temp = NamedTempFile::new().unwrap();
    let records = generated_jsonl_records(16_384, 512);
    let mut input_bytes = 0_usize;
    for record in &records {
        temp.write_all(record).unwrap();
        temp.write_all(b"\n").unwrap();
        input_bytes = input_bytes.saturating_add(record.len()).saturating_add(1);
    }
    temp.flush().unwrap();
    let source = InputSource::from_arg(temp.path().to_str().unwrap(), None).unwrap();
    let options = FormatOptions {
        kind: FormatKind::Jsonl,
        indent: 2,
    };

    let started = Instant::now();
    let formatted = format_source_to_temp(&source, &options).unwrap();
    let elapsed = started.elapsed();
    let output_bytes = formatted.as_file().metadata().unwrap().len() as usize;

    assert!(output_bytes > input_bytes);
    BenchSample {
        elapsed,
        records: records.len(),
        items: 0,
        string_bytes: 0,
        lines: 0,
        indexed_lines: 0,
        window_lines: 0,
        input_bytes,
        output_bytes,
    }
}

fn bench_json_whole_document_format() -> BenchSample {
    let (_temp, source, input_bytes, items) = generated_json_document_source(32_768, 128);
    let options = FormatOptions {
        kind: FormatKind::Json,
        indent: 2,
    };

    let started = Instant::now();
    let formatted = format_source_to_temp(&source, &options).unwrap();
    let elapsed = started.elapsed();
    let output_bytes = formatted.as_file().metadata().unwrap().len() as usize;

    assert!(output_bytes > input_bytes);
    BenchSample {
        elapsed,
        records: 1,
        items,
        string_bytes: 0,
        lines: 0,
        indexed_lines: 0,
        window_lines: 0,
        input_bytes,
        output_bytes,
    }
}

fn bench_xml_whole_document_format() -> BenchSample {
    let (_temp, source, input_bytes, items) = generated_xml_document_source(65_536);
    let options = FormatOptions {
        kind: FormatKind::Xml,
        indent: 2,
    };

    let started = Instant::now();
    let formatted = format_source_to_temp(&source, &options).unwrap();
    let elapsed = started.elapsed();
    let output_bytes = formatted.as_file().metadata().unwrap().len() as usize;

    assert!(output_bytes >= input_bytes);
    BenchSample {
        elapsed,
        records: 1,
        items,
        string_bytes: 0,
        lines: 0,
        indexed_lines: 0,
        window_lines: 0,
        input_bytes,
        output_bytes,
    }
}
fn bench_single_huge_object_array_record_format() -> BenchSample {
    let items = 32_768;
    let record = generated_huge_object_array_record(items, 128);
    let input_bytes = record.len();
    let started = Instant::now();
    let rendered = format_record_to_string(&record, FormatKind::Jsonl, 2).unwrap();
    let elapsed = started.elapsed();
    let output_bytes = rendered.len();
    let lines = rendered.lines().count();

    assert!(lines > items);
    BenchSample {
        elapsed,
        records: 1,
        items,
        string_bytes: 0,
        lines,
        indexed_lines: 0,
        window_lines: 0,
        input_bytes,
        output_bytes,
    }
}

fn bench_single_huge_string_field_record_format() -> BenchSample {
    let repeats = 600_000;
    let record = generated_huge_string_field_record(repeats);
    let input_bytes = record.len();
    let started = Instant::now();
    let rendered = format_record_to_string(&record, FormatKind::Jsonl, 2).unwrap();
    let elapsed = started.elapsed();
    let output_bytes = rendered.len();
    let lines = rendered.lines().count();

    assert_eq!(lines, 5);
    BenchSample {
        elapsed,
        records: 1,
        items: 0,
        string_bytes: HUGE_STRING_FRAGMENT.len() * repeats,
        lines,
        indexed_lines: 0,
        window_lines: 0,
        input_bytes,
        output_bytes,
    }
}