use crate::error::{MarcError, Result};
use crate::iso2709::{
self, is_control_field_tag, parse_4digits, parse_5digits, parse_data_field, read_leader_bytes,
read_record_data, DataFieldParseConfig, ParseContext, FIELD_TERMINATOR, LEADER_LEN,
};
use crate::leader::Leader;
use crate::record::Field;
use crate::recovery::{RecoveryCap, RecoveryMode, ValidationLevel};
use std::io::Read;
pub trait Iso2709Builder: Sized {
type Output;
fn parse_config(level: ValidationLevel) -> DataFieldParseConfig;
fn validate_record_type(leader: &Leader, ctx: &ParseContext) -> Result<()> {
let _ = (leader, ctx);
Ok(())
}
fn validate_leader_strict_marc(leader: &Leader) -> Result<()> {
crate::RecordStructureValidator::validate_leader(leader)
}
fn new_for(leader: Leader) -> Self;
fn add_control_field(&mut self, tag: String, value: String);
fn add_data_field(&mut self, tag: String, field: Field);
#[inline]
fn decode_control_field_value(
field_bytes: &[u8],
tag: &str,
ctx: &ParseContext,
level: ValidationLevel,
) -> Result<String> {
let raw = &field_bytes[..field_bytes.len().saturating_sub(1)];
match level {
ValidationLevel::Structural => Ok(String::from_utf8_lossy(raw).to_string()),
ValidationLevel::StrictMarc => {
std::str::from_utf8(raw).map(str::to_string).map_err(|e| {
ctx.err_encoding(format!("Invalid UTF-8 in control field {tag}: {e}"))
})
},
}
}
#[inline]
fn validate_data_field_bytes(field_bytes: &[u8], tag: &str, ctx: &ParseContext) -> Result<()> {
let _ = (field_bytes, tag, ctx);
Ok(())
}
#[must_use]
fn try_recover_truncated(
leader: Leader,
partial_data: &[u8],
base_address: usize,
mode: RecoveryMode,
ctx: &ParseContext,
errors: &mut Vec<MarcError>,
) -> Option<Result<Self::Output>> {
let _ = (leader, partial_data, base_address, mode, ctx, errors);
None
}
fn finalize(self) -> Self::Output;
}
#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]
pub fn parse_iso2709_record<R, B>(
reader: &mut R,
ctx: &mut ParseContext,
cap: &mut RecoveryCap,
recovery_mode: RecoveryMode,
validation_level: ValidationLevel,
errors: &mut Vec<MarcError>,
) -> Result<Option<B::Output>>
where
R: Read,
B: Iso2709Builder,
{
if cap.is_exhausted() {
return Ok(None);
}
let Some(leader_bytes) = read_leader_bytes(reader)? else {
return Ok(None);
};
ctx.begin_record();
let leader_offset = ctx.stream_byte_offset;
let leader = Leader::from_bytes(&leader_bytes).map_err(|e| {
e.with_position(ctx)
.with_bytes_near(&leader_bytes, leader_offset)
})?;
leader.validate_for_reading().map_err(|e| {
e.with_position(ctx)
.with_bytes_near(&leader_bytes, leader_offset)
})?;
if validation_level == ValidationLevel::StrictMarc {
if let Err(e) = B::validate_leader_strict_marc(&leader) {
let enriched = e
.with_position(ctx)
.with_bytes_near(&leader_bytes, leader_offset);
if recovery_mode == RecoveryMode::Strict {
return Err(enriched);
}
errors.push(enriched);
cap.note(ctx)?;
}
}
B::validate_record_type(&leader, ctx)?;
let record_length = leader.record_length as usize;
let base_address = leader.data_base_address as usize;
let directory_size = base_address - 24;
ctx.advance(LEADER_LEN);
let (record_data, bytes_read) = read_record_data(reader, record_length, recovery_mode, ctx)?;
let expected_data_len = record_length.saturating_sub(LEADER_LEN);
let record_data_offset = ctx.stream_byte_offset;
ctx.set_parse_buffer(&record_data, record_data_offset);
if bytes_read < expected_data_len {
let err = ctx.err_truncated_record(Some(expected_data_len), Some(bytes_read));
errors.push(err);
cap.note(ctx)?;
if let Some(result) = B::try_recover_truncated(
leader.clone(),
&record_data,
base_address,
recovery_mode,
ctx,
errors,
) {
return result.map(Some);
}
}
if recovery_mode == RecoveryMode::Strict
&& record_data.len() == record_length - LEADER_LEN
&& record_data.last() != Some(&iso2709::RECORD_TERMINATOR)
{
ctx.stream_byte_offset = record_data_offset + record_data.len() - 1;
return Err(ctx.err_end_of_record_not_found());
}
let directory_end = std::cmp::min(directory_size, record_data.len());
let directory: &[u8] = if directory_end > 0 {
&record_data[..directory_end]
} else {
&[]
};
let data_start = std::cmp::min(base_address - 24, record_data.len());
let data: &[u8] = if data_start < record_data.len() {
&record_data[data_start..]
} else {
&[]
};
let mut builder = B::new_for(leader);
let mut pos = 0;
while pos < directory.len() {
ctx.stream_byte_offset = record_data_offset + pos;
if directory[pos] == FIELD_TERMINATOR {
break;
}
if pos + 12 > directory.len() {
let err = ctx
.err_directory_invalid(Some(&directory[pos..]), "complete 12-byte directory entry");
if recovery_mode == RecoveryMode::Strict {
return Err(err);
}
errors.push(err);
cap.note(ctx)?;
break;
}
let entry_chunk = &directory[pos..pos + 12];
let tag_bytes: &[u8; 3] = entry_chunk[0..3]
.try_into()
.expect("entry_chunk guaranteed >= 12 bytes by the slice above");
if !tag_bytes.iter().all(u8::is_ascii) {
let err =
ctx.err_directory_invalid(Some(tag_bytes), "3 ASCII bytes (directory entry tag)");
if recovery_mode == RecoveryMode::Strict {
return Err(err);
}
errors.push(err);
cap.note(ctx)?;
pos += 12;
continue;
}
let tag = std::str::from_utf8(tag_bytes)
.expect("ASCII bytes are valid UTF-8")
.to_string();
let Ok(field_length) = parse_4digits(&entry_chunk[3..7]) else {
ctx.current_field_tag = tag.as_bytes().try_into().ok();
ctx.stream_byte_offset = record_data_offset + pos + 3;
let err = ctx.err_directory_invalid(
Some(&entry_chunk[3..7]),
"4 ASCII digits (directory entry length)",
);
ctx.current_field_tag = None;
if recovery_mode == RecoveryMode::Strict {
return Err(err);
}
errors.push(err);
cap.note(ctx)?;
pos += 12;
continue;
};
let Ok(start_position) = parse_5digits(&entry_chunk[7..12]) else {
ctx.current_field_tag = tag.as_bytes().try_into().ok();
ctx.stream_byte_offset = record_data_offset + pos + 7;
let err = ctx.err_directory_invalid(
Some(&entry_chunk[7..12]),
"5 ASCII digits (directory entry start position)",
);
ctx.current_field_tag = None;
if recovery_mode == RecoveryMode::Strict {
return Err(err);
}
errors.push(err);
cap.note(ctx)?;
pos += 12;
continue;
};
pos += 12;
let end_position = start_position + field_length;
if end_position > data.len() {
ctx.current_field_tag = tag.as_bytes().try_into().ok();
let err = ctx.err_invalid_field(format!(
"Field {tag} exceeds data area (end {end_position} > {})",
data.len()
));
if recovery_mode == RecoveryMode::Strict {
return Err(err);
}
errors.push(err);
cap.note(ctx)?;
let available_end = std::cmp::min(end_position, data.len());
if available_end > start_position {
let field_data = &data[start_position..available_end];
if tag != "LDR" {
if is_control_field_tag(&tag) {
if let Ok(value) =
B::decode_control_field_value(field_data, &tag, ctx, validation_level)
{
if tag == "001" {
ctx.record_control_number = Some(value.clone());
}
builder.add_control_field(tag.clone(), value);
}
} else if B::validate_data_field_bytes(field_data, &tag, ctx).is_ok() {
ctx.current_field_tag = tag.as_bytes().try_into().ok();
ctx.stream_byte_offset = record_data_offset + data_start + start_position;
if let Ok(field) = parse_data_field(
field_data,
&tag,
B::parse_config(validation_level),
ctx,
) {
builder.add_data_field(tag, field);
}
ctx.current_field_tag = None;
}
}
}
continue;
}
let field_data = &data[start_position..end_position];
if tag == "LDR" {
continue;
}
if is_control_field_tag(&tag) {
let value = match B::decode_control_field_value(field_data, &tag, ctx, validation_level)
{
Ok(v) => v,
Err(e) => {
if recovery_mode == RecoveryMode::Strict {
return Err(e);
}
errors.push(e);
cap.note(ctx)?;
continue;
},
};
if tag == "001" {
ctx.record_control_number = Some(value.clone());
}
builder.add_control_field(tag, value);
continue;
}
ctx.current_field_tag = tag.as_bytes().try_into().ok();
ctx.stream_byte_offset = record_data_offset + data_start + start_position;
if let Err(e) = B::validate_data_field_bytes(field_data, &tag, ctx) {
ctx.current_field_tag = None;
if recovery_mode == RecoveryMode::Strict {
return Err(e);
}
errors.push(e);
cap.note(ctx)?;
continue;
}
let parsed = parse_data_field(field_data, &tag, B::parse_config(validation_level), ctx);
ctx.current_field_tag = None;
match parsed {
Ok(field) => builder.add_data_field(tag, field),
Err(e) => {
if recovery_mode == RecoveryMode::Strict {
return Err(e);
}
errors.push(e);
cap.note(ctx)?;
},
}
}
ctx.stream_byte_offset = record_data_offset + record_length.saturating_sub(LEADER_LEN);
Ok(Some(builder.finalize()))
}
pub use iso2709::{DataFieldParseConfig as ParseConfig, FIELD_TERMINATOR as DIRECTORY_TERMINATOR};