Skip to main content

copybook_cli_determinism/
lib.rs

1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
2// SPDX-License-Identifier: AGPL-3.0-or-later
3//! Determinism command execution for copybook CLI workflows.
4//!
5//! This crate focuses on one responsibility:
6//! parsing CLI command arguments and producing formatted determinism output
7//! for decode/encode/round-trip checks.
8
9use anyhow::Context;
10use clap::{Args, Subcommand, ValueEnum};
11use copybook_codec::{
12    Codepage, DecodeOptions, EncodeOptions, JsonNumberMode, RecordFormat,
13    determinism::{
14        DeterminismResult, check_decode_determinism, check_encode_determinism,
15        check_round_trip_determinism,
16    },
17};
18use copybook_core::{Schema, parse_copybook};
19use std::fmt::Write as _;
20use std::fs;
21use std::io::{self, Read};
22use std::path::{Path, PathBuf};
23
24pub use copybook_codec::determinism::DeterminismMode as DeterminismCheckMode;
25pub use copybook_determinism::{BLAKE3_HEX_LEN, DEFAULT_MAX_DIFFS};
26
27/// Determinism command façade for CLI surface.
28#[derive(Args, Debug, Clone)]
29pub struct DeterminismCommand {
30    /// The determinism check mode (decode, encode, or round-trip).
31    #[command(subcommand)]
32    pub mode: DeterminismMode,
33}
34
35/// Determinism check modes.
36#[derive(Subcommand, Debug, Clone)]
37pub enum DeterminismMode {
38    /// Run determinism check for decode (binary → JSON).
39    Decode(DecodeDeterminismArgs),
40
41    /// Run determinism check for encode (JSON → binary).
42    Encode(EncodeDeterminismArgs),
43
44    /// Run determinism check for full round-trip (binary → JSON → binary → JSON).
45    RoundTrip(RoundTripDeterminismArgs),
46}
47
48/// Backward-compatible alias retained for callers importing the command mode by name.
49pub type DeterminismModeCommand = DeterminismMode;
50
51/// Shared determinism arguments.
52#[derive(Args, Debug, Clone)]
53pub struct CommonDeterminismArgs {
54    /// Copybook schema file.
55    #[arg(value_name = "COPYBOOK")]
56    pub copybook: PathBuf,
57
58    /// Record format.
59    #[arg(long, default_value = "fixed")]
60    pub format: RecordFormat,
61
62    /// EBCDIC codepage.
63    #[arg(long, default_value = "cp037")]
64    pub codepage: Codepage,
65
66    /// JSON number handling mode.
67    #[arg(long, value_name = "MODE", default_value = "lossless")]
68    pub json_number: JsonNumberMode,
69
70    /// Include metadata in JSON output.
71    #[arg(long)]
72    pub emit_meta: bool,
73
74    /// Output format: human or json.
75    #[arg(long, value_name = "FORMAT", default_value = "human")]
76    pub output: OutputFormat,
77
78    /// Maximum number of byte diffs to report.
79    #[arg(long, value_name = "N", default_value_t = DEFAULT_MAX_DIFFS)]
80    pub max_diffs: usize,
81}
82
83/// Available output rendering modes.
84///
85/// # Examples
86///
87/// ```
88/// use copybook_cli_determinism::OutputFormat;
89///
90/// let fmt = OutputFormat::Human;
91/// assert_eq!(fmt, OutputFormat::Human);
92/// assert_ne!(fmt, OutputFormat::Json);
93/// ```
94#[derive(ValueEnum, Debug, Clone, Copy, PartialEq, Eq)]
95pub enum OutputFormat {
96    /// Human-readable output with symbols and diff table.
97    Human,
98    /// Structured JSON output for CI integration.
99    Json,
100}
101
102/// Decode command arguments.
103#[derive(Args, Debug, Clone)]
104pub struct DecodeDeterminismArgs {
105    /// Shared determinism arguments.
106    #[command(flatten)]
107    pub common: CommonDeterminismArgs,
108
109    /// Binary input file (single record or sampled payload).
110    #[arg(value_name = "DATA")]
111    pub data: PathBuf,
112}
113
114/// Encode command arguments.
115#[derive(Args, Debug, Clone)]
116pub struct EncodeDeterminismArgs {
117    /// Shared determinism arguments.
118    #[command(flatten)]
119    pub common: CommonDeterminismArgs,
120
121    /// JSON input file (first line is used).
122    #[arg(value_name = "JSON")]
123    pub json: PathBuf,
124}
125
126/// Round-trip command arguments.
127#[derive(Args, Debug, Clone)]
128pub struct RoundTripDeterminismArgs {
129    /// Shared determinism arguments.
130    #[command(flatten)]
131    pub common: CommonDeterminismArgs,
132
133    /// Binary input file.
134    #[arg(value_name = "DATA")]
135    pub data: PathBuf,
136}
137
138/// Result of running a determinism command execution.
139#[derive(Debug, Clone)]
140pub struct DeterminismRun {
141    /// The determinism verdict (pass or fail).
142    pub verdict: DeterminismVerdict,
143    /// Formatted output text for display.
144    pub output: String,
145}
146
147impl DeterminismRun {
148    /// Convert command verdict to CLI exit code semantics.
149    #[inline]
150    #[must_use]
151    pub const fn exit_code(&self) -> i32 {
152        self.verdict.exit_code()
153    }
154}
155
156/// Executable verdict from determinism checks.
157///
158/// # Examples
159///
160/// ```
161/// use copybook_cli_determinism::DeterminismVerdict;
162///
163/// let pass = DeterminismVerdict::Deterministic;
164/// assert_eq!(pass.exit_code(), 0);
165///
166/// let fail = DeterminismVerdict::NonDeterministic;
167/// assert_eq!(fail.exit_code(), 2);
168/// ```
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum DeterminismVerdict {
171    /// Both runs were byte-for-byte identical.
172    Deterministic,
173    /// A difference was detected and data diverged.
174    NonDeterministic,
175}
176
177impl DeterminismVerdict {
178    const fn from_result(result: &DeterminismResult) -> Self {
179        if result.is_deterministic {
180            Self::Deterministic
181        } else {
182            Self::NonDeterministic
183        }
184    }
185
186    /// Convert verdict to a CLI-style exit code.
187    #[inline]
188    #[must_use]
189    pub const fn exit_code(&self) -> i32 {
190        match self {
191            Self::Deterministic => 0,
192            Self::NonDeterministic => 2,
193        }
194    }
195}
196
197/// Execute a determinism subcommand and return output plus verdict.
198///
199/// # Errors
200///
201/// Returns an error if schema loading, data reading, or determinism checks fail.
202#[inline]
203pub fn run(cmd: &DeterminismCommand) -> anyhow::Result<DeterminismRun> {
204    let result = match &cmd.mode {
205        DeterminismModeCommand::Decode(args) => run_decode(args),
206        DeterminismModeCommand::Encode(args) => run_encode(args),
207        DeterminismModeCommand::RoundTrip(args) => run_round_trip(args),
208    }?;
209
210    Ok(result)
211}
212
213/// Run decode determinism check.
214fn run_decode(args: &DecodeDeterminismArgs) -> anyhow::Result<DeterminismRun> {
215    let schema = load_schema(&args.common.copybook)?;
216    let decode_opts = build_decode_options(&args.common);
217    let data = read_bytes_or_stdin(&args.data).with_context(|| {
218        format!(
219            "Failed to read data file for determinism check: {}",
220            args.data.display()
221        )
222    })?;
223
224    let result = check_decode_determinism(&schema, &data, &decode_opts)
225        .context("Decode determinism check failed")?;
226
227    render_result(&result, &args.common)
228}
229
230/// Run encode determinism check.
231fn run_encode(args: &EncodeDeterminismArgs) -> anyhow::Result<DeterminismRun> {
232    let schema = load_schema(&args.common.copybook)?;
233    let encode_opts = build_encode_options(&args.common);
234    let json_text = read_text_or_stdin(&args.json).with_context(|| {
235        format!(
236            "Failed to read JSON input for determinism check: {}",
237            args.json.display()
238        )
239    })?;
240
241    let first_line = json_text
242        .lines()
243        .next()
244        .ok_or_else(|| anyhow::anyhow!("JSON input file is empty"))?;
245    let value: serde_json::Value =
246        serde_json::from_str(first_line).context("Failed to parse JSON input")?;
247
248    let result = check_encode_determinism(&schema, &value, &encode_opts)
249        .context("Encode determinism check failed")?;
250
251    render_result(&result, &args.common)
252}
253
254/// Run round-trip determinism check.
255fn run_round_trip(args: &RoundTripDeterminismArgs) -> anyhow::Result<DeterminismRun> {
256    let schema = load_schema(&args.common.copybook)?;
257    let decode_opts = build_decode_options(&args.common);
258    let encode_opts = build_encode_options(&args.common);
259    let data = read_bytes_or_stdin(&args.data).with_context(|| {
260        format!(
261            "Failed to read data file for round-trip determinism check: {}",
262            args.data.display()
263        )
264    })?;
265
266    let result = check_round_trip_determinism(&schema, &data, &decode_opts, &encode_opts)
267        .context("Round-trip determinism check failed")?;
268
269    render_result(&result, &args.common)
270}
271
272/// Common renderer for result + status.
273fn render_result(
274    result: &DeterminismResult,
275    common: &CommonDeterminismArgs,
276) -> anyhow::Result<DeterminismRun> {
277    let output = match common.output {
278        OutputFormat::Json => render_json_result(result),
279        OutputFormat::Human => Ok(render_human_result(result, common.max_diffs)),
280    }?;
281
282    Ok(DeterminismRun {
283        verdict: DeterminismVerdict::from_result(result),
284        output,
285    })
286}
287
288/// Create JSON formatted output string.
289///
290/// # Errors
291///
292/// Returns an error if JSON serialization fails.
293#[inline]
294pub fn render_json_result(result: &DeterminismResult) -> anyhow::Result<String> {
295    serde_json::to_string_pretty(result).context("Failed to serialize determinism result to JSON")
296}
297
298/// Create human-readable output string.
299#[inline]
300#[must_use]
301pub fn render_human_result(result: &DeterminismResult, max_diffs: usize) -> String {
302    let mut output = String::new();
303
304    let _ = writeln!(&mut output, "Determinism mode: {:?}", result.mode);
305    let _ = writeln!(
306        &mut output,
307        "Round 1 hash: {}",
308        truncate_hash(&result.round1_hash)
309    );
310    let _ = writeln!(
311        &mut output,
312        "Round 2 hash: {}",
313        truncate_hash(&result.round2_hash)
314    );
315
316    if result.is_deterministic {
317        let _ = writeln!(&mut output, "\n✅ DETERMINISTIC");
318    } else {
319        let _ = writeln!(&mut output, "\n❌ NON-DETERMINISTIC");
320    }
321
322    if let Some(diffs) = &result.byte_differences {
323        let count = diffs.len();
324        let shown = diffs.iter().take(max_diffs);
325
326        let _ = writeln!(&mut output, "\nByte differences: {count} total");
327        if count > 0 {
328            output.push_str("\n  Offset  Round1  Round2\n");
329            output.push_str("  ------  ------  ------\n");
330            for diff in shown {
331                let _ = writeln!(
332                    &mut output,
333                    "  0x{:04X}  0x{:02X}    0x{:02X}",
334                    diff.offset, diff.round1_byte, diff.round2_byte
335                );
336            }
337
338            if count > max_diffs {
339                let _ = writeln!(
340                    &mut output,
341                    "\n  ... {} more differences not shown",
342                    count - max_diffs
343                );
344            }
345        }
346    } else {
347        output.push_str("\nByte differences: none");
348    }
349
350    output
351}
352
353/// Build `DecodeOptions` from shared arguments.
354#[inline]
355#[must_use]
356pub fn build_decode_options(common: &CommonDeterminismArgs) -> DecodeOptions {
357    DecodeOptions::new()
358        .with_codepage(common.codepage)
359        .with_format(common.format)
360        .with_json_number_mode(common.json_number)
361        .with_emit_meta(common.emit_meta)
362}
363
364/// Build `EncodeOptions` from shared arguments.
365#[inline]
366#[must_use]
367pub fn build_encode_options(common: &CommonDeterminismArgs) -> EncodeOptions {
368    EncodeOptions::new().with_codepage(common.codepage)
369}
370
371/// Load and parse schema from a file or stdin.
372///
373/// # Errors
374///
375/// Returns an error if the file cannot be read or parsed.
376#[inline]
377pub fn load_schema(path: &Path) -> anyhow::Result<Schema> {
378    let text = read_text_or_stdin(path)?;
379    let schema = parse_copybook(&text)
380        .with_context(|| format!("Failed to parse copybook: {}", path.display()))?;
381    Ok(schema)
382}
383
384/// Truncate BLAKE3 hash for human output.
385#[inline]
386#[must_use]
387pub fn truncate_hash(hash: &str) -> String {
388    if hash.len() > 16 {
389        format!("{}...", &hash[..16])
390    } else {
391        hash.to_string()
392    }
393}
394
395fn read_text_or_stdin(path: &Path) -> anyhow::Result<String> {
396    if path.as_os_str() == "-" {
397        let mut text = String::new();
398        io::stdin().read_to_string(&mut text)?;
399        return Ok(text);
400    }
401
402    let text = fs::read_to_string(path)?;
403    Ok(text)
404}
405
406fn read_bytes_or_stdin(path: &Path) -> anyhow::Result<Vec<u8>> {
407    if path.as_os_str() == "-" {
408        let mut data = Vec::new();
409        io::stdin().read_to_end(&mut data)?;
410        return Ok(data);
411    }
412
413    Ok(fs::read(path)?)
414}
415
416#[cfg(test)]
417#[allow(clippy::expect_used)]
418#[allow(clippy::unwrap_used)]
419mod tests {
420    use super::*;
421    use copybook_determinism::ByteDiff;
422    use copybook_determinism::DeterminismMode as CodecDeterminismMode;
423    use proptest::prelude::*;
424
425    #[test]
426    fn hash_truncation_works() {
427        let long_hash = "7a3f9e2b1c4d5e6f7a3f9e2b1c4d5e6f";
428        assert_eq!(truncate_hash(long_hash), "7a3f9e2b1c4d5e6f...");
429
430        let short_hash = "7a3f9e2b";
431        assert_eq!(truncate_hash(short_hash), "7a3f9e2b");
432    }
433
434    #[test]
435    fn human_result_includes_diff_metadata() {
436        let result = DeterminismResult {
437            mode: CodecDeterminismMode::DecodeOnly,
438            round1_hash: "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef12"
439                .to_string(),
440            round2_hash: "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdeff1"
441                .to_string(),
442            is_deterministic: false,
443            byte_differences: Some(vec![
444                ByteDiff {
445                    offset: 16,
446                    round1_byte: 0xAA,
447                    round2_byte: 0x55,
448                },
449                ByteDiff {
450                    offset: 17,
451                    round1_byte: 0xBB,
452                    round2_byte: 0x44,
453                },
454            ]),
455        };
456        let output = render_human_result(&result, 1);
457        assert!(output.contains("Determinism mode:"));
458        assert!(output.contains("Byte differences: 2 total"));
459        assert!(output.contains("0x0010"));
460        assert!(output.contains("... 1 more differences not shown"));
461    }
462
463    #[test]
464    fn human_result_deterministic_shows_checkmark() {
465        let result = DeterminismResult {
466            mode: CodecDeterminismMode::DecodeOnly,
467            round1_hash: "a".repeat(64),
468            round2_hash: "a".repeat(64),
469            is_deterministic: true,
470            byte_differences: None,
471        };
472        let output = render_human_result(&result, 100);
473        assert!(output.contains("✅ DETERMINISTIC"));
474        assert!(!output.contains("NON-DETERMINISTIC"));
475        assert!(output.contains("Byte differences: none"));
476    }
477
478    #[test]
479    fn human_result_non_deterministic_shows_cross() {
480        let result = DeterminismResult {
481            mode: CodecDeterminismMode::EncodeOnly,
482            round1_hash: "a".repeat(64),
483            round2_hash: "b".repeat(64),
484            is_deterministic: false,
485            byte_differences: Some(vec![ByteDiff {
486                offset: 0,
487                round1_byte: 0x41,
488                round2_byte: 0x42,
489            }]),
490        };
491        let output = render_human_result(&result, 100);
492        assert!(output.contains("❌ NON-DETERMINISTIC"));
493        assert!(output.contains("Byte differences: 1 total"));
494        assert!(output.contains("0x0000"));
495        assert!(output.contains("0x41"));
496        assert!(output.contains("0x42"));
497    }
498
499    #[test]
500    fn render_json_deterministic_round_trips_correctly() {
501        let result = DeterminismResult {
502            mode: CodecDeterminismMode::RoundTrip,
503            round1_hash: "c".repeat(64),
504            round2_hash: "c".repeat(64),
505            is_deterministic: true,
506            byte_differences: None,
507        };
508        let json = render_json_result(&result).unwrap();
509        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
510        assert_eq!(parsed["is_deterministic"], true);
511        assert_eq!(parsed["mode"], "round_trip");
512        assert!(parsed.get("byte_differences").is_none());
513    }
514
515    #[test]
516    fn render_json_non_deterministic_includes_diffs() {
517        let result = DeterminismResult {
518            mode: CodecDeterminismMode::DecodeOnly,
519            round1_hash: "d".repeat(64),
520            round2_hash: "e".repeat(64),
521            is_deterministic: false,
522            byte_differences: Some(vec![ByteDiff {
523                offset: 5,
524                round1_byte: 0x10,
525                round2_byte: 0x20,
526            }]),
527        };
528        let json = render_json_result(&result).unwrap();
529        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
530        assert_eq!(parsed["is_deterministic"], false);
531        let diffs = parsed["byte_differences"].as_array().unwrap();
532        assert_eq!(diffs.len(), 1);
533        assert_eq!(diffs[0]["offset"], 5);
534    }
535
536    #[test]
537    fn verdict_exit_codes_are_stable() {
538        assert_eq!(DeterminismVerdict::Deterministic.exit_code(), 0);
539        assert_eq!(DeterminismVerdict::NonDeterministic.exit_code(), 2);
540    }
541
542    #[test]
543    fn determinism_run_exit_code_delegates_to_verdict() {
544        let run_pass = DeterminismRun {
545            verdict: DeterminismVerdict::Deterministic,
546            output: String::new(),
547        };
548        assert_eq!(run_pass.exit_code(), 0);
549
550        let run_fail = DeterminismRun {
551            verdict: DeterminismVerdict::NonDeterministic,
552            output: String::new(),
553        };
554        assert_eq!(run_fail.exit_code(), 2);
555    }
556
557    #[test]
558    fn truncate_hash_exactly_16_chars() {
559        let hash = "0123456789abcdef";
560        assert_eq!(truncate_hash(hash), "0123456789abcdef");
561    }
562
563    #[test]
564    fn truncate_hash_empty_string() {
565        assert_eq!(truncate_hash(""), "");
566    }
567
568    #[test]
569    fn human_result_shows_mode_name() {
570        for mode in [
571            CodecDeterminismMode::DecodeOnly,
572            CodecDeterminismMode::EncodeOnly,
573            CodecDeterminismMode::RoundTrip,
574        ] {
575            let result = DeterminismResult {
576                mode,
577                round1_hash: "f".repeat(64),
578                round2_hash: "f".repeat(64),
579                is_deterministic: true,
580                byte_differences: None,
581            };
582            let output = render_human_result(&result, 100);
583            assert!(output.contains("Determinism mode:"));
584            assert!(output.contains("Round 1 hash:"));
585            assert!(output.contains("Round 2 hash:"));
586        }
587    }
588
589    proptest! {
590        #[test]
591        fn prop_hash_truncation_is_prefix_plus_ellipsis(bytes in prop::collection::vec(any::<u8>(), 0..128)) {
592            let mut raw = String::with_capacity(bytes.len());
593            for byte in bytes {
594                let digit = byte % 16;
595                let ch = if digit < 10 {
596                    (b'0' + digit) as char
597                } else {
598                    (b'a' + (digit - 10)) as char
599                };
600                raw.push(ch);
601            }
602
603            let rendered = truncate_hash(&raw);
604            if raw.len() <= 16 {
605                prop_assert_eq!(rendered, raw);
606            } else {
607                prop_assert_eq!(&rendered[16..], "...");
608                prop_assert_eq!(rendered.len(), 19);
609                prop_assert_eq!(&rendered[..16], &raw[..16]);
610            }
611        }
612
613        #[test]
614        fn prop_json_output_is_parseable(
615            hash_a in prop::collection::vec(any::<u8>(), 0..64),
616            hash_b in prop::collection::vec(any::<u8>(), 0..64),
617            deterministic in any::<bool>(),
618        ) {
619            let make_hash = |bytes: &[u8]| {
620                let mut out = String::with_capacity(bytes.len() * 2);
621                for byte in bytes {
622                    out.push_str(&format!("{byte:02x}"));
623                }
624                out
625            };
626
627            let result = DeterminismResult {
628                mode: CodecDeterminismMode::RoundTrip,
629                round1_hash: make_hash(&hash_a),
630                round2_hash: if deterministic {
631                    make_hash(&hash_a)
632                } else {
633                    make_hash(&hash_b)
634                },
635                is_deterministic: deterministic,
636                byte_differences: if deterministic {
637                    None
638                } else {
639                    Some(vec![ByteDiff {
640                        offset: 1,
641                        round1_byte: 1,
642                        round2_byte: 2,
643                    }])
644                },
645            };
646            let json = render_json_result(&result).expect("json output");
647            let de = serde_json::from_str::<DeterminismResult>(&json).expect("round-trip decode");
648            assert_eq!(de.mode, result.mode);
649            assert_eq!(de.is_deterministic, result.is_deterministic);
650        }
651    }
652}