use super::verify_report::{VerifyCliEcho, VerifyError, VerifyReport, VerifySample};
use crate::exit_codes::ExitCode;
use crate::utils::{
ParseOptionsConfig, apply_field_projection, atomic_write, build_parse_options,
read_file_or_stdin,
};
use crate::write_stdout_all;
use anyhow::bail;
use copybook_codec::{
Codepage, DecodeOptions, JsonNumberMode, RawMode, RecordFormat, RecordIterator,
UnmappablePolicy,
};
use copybook_core::parse_copybook_with_options;
use std::fmt::Write as _;
use std::fs::{File, metadata};
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::path::PathBuf;
use tracing::{error, info, warn};
const HEX_CTX: usize = 16;
const HEX_FALLBACK: usize = 64;
#[allow(clippy::format_collect)]
fn hex_bytes(bytes: &[u8], max: usize) -> String {
bytes
.iter()
.take(max)
.map(|b| format!("{b:02X}"))
.collect::<String>()
}
#[allow(clippy::format_collect)]
fn hex_window(bytes: &[u8], offset: usize, ctx: usize) -> String {
let start = offset.saturating_sub(ctx);
let end = (offset + ctx).min(bytes.len());
bytes[start..end]
.iter()
.map(|b| format!("{b:02X}"))
.collect::<String>()
}
pub struct VerifyOptions<'a> {
pub format: RecordFormat,
pub codepage: Codepage,
pub strict: bool,
pub max_errors: u32,
pub sample: u32,
pub strict_comments: bool,
pub dialect: copybook_core::dialect::Dialect,
pub select: &'a [String],
}
#[allow(clippy::too_many_lines)]
pub fn run(
copybook_path: &PathBuf,
input: &PathBuf,
report: Option<PathBuf>,
opts: &VerifyOptions,
) -> anyhow::Result<ExitCode> {
info!("Verifying data file: {:?}", input);
if opts.strict_comments {
info!("Inline comments (*>) disabled (COBOL-85 compatibility)");
}
let copybook_text = read_file_or_stdin(copybook_path)?;
let parse_options = build_parse_options(&ParseOptionsConfig {
strict: opts.strict,
strict_comments: opts.strict_comments,
codepage: &opts.codepage.to_string(),
emit_filler: false,
dialect: opts.dialect,
});
let schema = parse_copybook_with_options(©book_text, &parse_options)?;
let working_schema = apply_field_projection(schema, opts.select)?;
let file_metadata = metadata(input)?;
let file_size = file_metadata.len();
let decode_options = DecodeOptions::new()
.with_format(opts.format)
.with_codepage(opts.codepage)
.with_json_number_mode(JsonNumberMode::Native) .with_emit_filler(false)
.with_emit_meta(false)
.with_emit_raw(RawMode::Off)
.with_strict_mode(opts.strict)
.with_max_errors(Some(u64::from(opts.max_errors)))
.with_unmappable_policy(UnmappablePolicy::Error)
.with_threads(1) .with_preserve_zoned_encoding(false)
.with_preferred_zoned_encoding(copybook_codec::ZonedEncodingFormat::Auto);
match opts.format {
RecordFormat::Fixed => {
if let Some(lrecl) = working_schema.lrecl_fixed {
if file_size % u64::from(lrecl) != 0 {
warn!(
"File size {} is not a multiple of LRECL {}",
file_size, lrecl
);
}
} else {
bail!("Fixed format requires LRECL from schema, but schema has no fixed length");
}
}
RecordFormat::RDW => {
}
}
let schema_fingerprint = format!("{:x}", md5::compute(copybook_text.as_bytes()));
let cli_opts = VerifyCliEcho {
codepage: format!("{:?}", opts.codepage),
strict: opts.strict,
max_errors: opts.max_errors,
sample: opts.sample,
strict_comments: opts.strict_comments,
};
let mut verify_report = VerifyReport::new(
schema_fingerprint,
format!("{:?}", opts.format).to_lowercase(),
input.to_string_lossy().to_string(),
file_size,
cli_opts,
);
let mut records_total = 0u64;
let file = File::open(input)?;
let reader = BufReader::new(file);
let mut file_raw = File::open(input)?;
let record_iter = RecordIterator::new(reader, &working_schema, &decode_options)?;
for record_result in record_iter {
records_total += 1;
match record_result {
Ok(_json_value) => {
}
Err(error) => {
let record_bytes = if let Some(lrecl) = working_schema.lrecl_fixed {
let record_offset = (records_total - 1) * u64::from(lrecl);
match file_raw.seek(SeekFrom::Start(record_offset)) {
Ok(_) => {
let mut rec = vec![0u8; lrecl as usize];
match file_raw.read_exact(&mut rec) {
Ok(()) => Some(rec),
Err(_) => None, }
}
Err(_) => None,
}
} else {
None
};
let error_offset = error.context.as_ref().and_then(|ctx| ctx.byte_offset);
let hex_data = record_bytes.as_ref().map(|bytes| {
if let Some(off) = error_offset {
match usize::try_from(off) {
Ok(offset) => hex_window(bytes, offset, HEX_CTX),
Err(_) => hex_bytes(bytes, HEX_FALLBACK),
}
} else {
hex_bytes(bytes, HEX_FALLBACK)
}
});
let error_entry = VerifyError {
index: records_total - 1, code: format!("{:?}", error.code),
field: error
.context
.as_ref()
.and_then(|ctx| ctx.field_path.clone()),
offset: error_offset,
msg: error.message.clone(),
hex: hex_data,
};
verify_report.add_error(error_entry);
if let Some(ref bytes) = record_bytes {
let sample = VerifySample {
index: records_total - 1,
hex: hex_bytes(bytes, 256), };
verify_report.add_sample(sample);
}
error!(
"Record {}: {} - {}",
records_total - 1,
error.code,
error.message
);
}
}
}
verify_report.set_records_total(records_total);
let mut summary_output = String::new();
writeln!(&mut summary_output, "Verification Summary:")?;
writeln!(&mut summary_output, " File: {}", input.display())?;
writeln!(&mut summary_output, " Format: {:?}", opts.format)?;
writeln!(&mut summary_output, " Codepage: {:?}", opts.codepage)?;
writeln!(&mut summary_output, " File Size: {file_size} bytes")?;
writeln!(
&mut summary_output,
" Records Total: {}",
verify_report.records_total
)?;
if verify_report.errors_total > 0 {
writeln!(
&mut summary_output,
" Errors: {} (showing first {})",
verify_report.errors_total,
verify_report.errors.len()
)?;
if verify_report.truncated {
writeln!(
&mut summary_output,
" Warning: Error list truncated at {} errors",
verify_report.cli_opts.max_errors
)?;
}
for error in &verify_report.errors {
writeln!(
&mut summary_output,
" Record {}: {} - {}",
error.index, error.code, error.msg
)?;
}
} else {
writeln!(&mut summary_output, " Status: PASS - No validation errors")?;
}
write_stdout_all(summary_output.as_bytes())?;
if let Some(report_path) = report {
let report_content = serde_json::to_string_pretty(&verify_report)?;
atomic_write(&report_path, |writer| {
writer.write_all(report_content.as_bytes())
})?;
info!("Verification report written to: {:?}", report_path);
}
let exit_code = verify_report.exit_code();
info!("Verify completed with exit code: {}", exit_code);
Ok(exit_code)
}