use crate::error::{MarcError, Result};
use crate::iso2709::ParseContext;
use crate::leader::Leader;
use crate::record::Record;
pub const DEFAULT_MAX_ERRORS: usize = 10_000;
#[derive(Debug, Clone)]
pub struct RecoveryCap {
max_errors: usize,
error_count: usize,
exceeded: bool,
}
impl Default for RecoveryCap {
fn default() -> Self {
Self::new()
}
}
impl RecoveryCap {
#[must_use]
pub fn new() -> Self {
RecoveryCap {
max_errors: DEFAULT_MAX_ERRORS,
error_count: 0,
exceeded: false,
}
}
pub fn set_max(&mut self, n: usize) {
self.max_errors = n;
}
#[must_use]
pub fn is_exhausted(&self) -> bool {
self.exceeded
}
pub fn note(&mut self, ctx: &ParseContext) -> Result<()> {
if self.max_errors == 0 {
return Ok(());
}
self.error_count += 1;
if self.error_count > self.max_errors {
self.exceeded = true;
let idx = ctx.record_index;
return Err(MarcError::FatalReaderError {
cap: self.max_errors,
errors_seen: self.error_count,
record_index: if idx == 0 { None } else { Some(idx) },
source_name: ctx.source_name.clone(),
});
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum RecoveryMode {
#[default]
Strict,
Lenient,
Permissive,
}
const FIELD_TERMINATOR: u8 = 0x1E;
const SUBFIELD_DELIMITER: u8 = 0x1F;
#[allow(clippy::too_many_lines)]
pub fn try_recover_record(
leader: Leader,
partial_data: &[u8],
base_address: usize,
mode: RecoveryMode,
ctx: &ParseContext,
) -> Result<Record> {
let mut record = Record::new(leader);
let directory_size = base_address.saturating_sub(24);
if directory_size == 0 {
return Err(ctx.err_truncated_record(None, Some(0)));
}
let directory_end = std::cmp::min(directory_size, partial_data.len());
let directory = &partial_data[..directory_end];
let mut pos = 0;
while pos < directory.len() {
if directory[pos] == FIELD_TERMINATOR {
break;
}
if pos + 12 > directory.len() {
break;
}
let entry_chunk = &directory[pos..pos + 12];
let tag = String::from_utf8_lossy(&entry_chunk[0..3]).to_string();
let field_length = if mode == RecoveryMode::Strict {
parse_4digits(&entry_chunk[3..7])?
} else if let Ok(len) = parse_4digits(&entry_chunk[3..7]) {
len
} else {
pos += 12;
continue;
};
let start_position = if mode == RecoveryMode::Strict {
parse_digits(&entry_chunk[7..12])?
} else if let Ok(p) = parse_digits(&entry_chunk[7..12]) {
p
} else {
pos += 12;
continue;
};
pos += 12;
let end_position = start_position + field_length;
let data_start = directory_size;
if start_position < data_start || end_position > partial_data.len() {
if mode == RecoveryMode::Strict {
let mut err_ctx = ctx.clone();
err_ctx.current_field_tag = tag.as_bytes().try_into().ok();
return Err(err_ctx.err_invalid_field(format!(
"Field {tag} data not available (truncated record)"
)));
}
let available_end = std::cmp::min(end_position, partial_data.len());
if available_end > data_start {
if let Ok(field) = try_parse_field(
&partial_data[start_position..available_end],
&tag,
SUBFIELD_DELIMITER,
FIELD_TERMINATOR,
) {
record.add_field(field);
}
}
continue;
}
if tag != "LDR" {
if tag.starts_with('0') && tag.chars().all(char::is_numeric) && tag.as_str() < "010" {
let value = String::from_utf8_lossy(
&partial_data[start_position..end_position.saturating_sub(1)],
)
.to_string();
record.add_control_field(tag, value);
} else if let Ok(field) = try_parse_field(
&partial_data[start_position..end_position],
&tag,
SUBFIELD_DELIMITER,
FIELD_TERMINATOR,
) {
record.add_field(field);
}
}
}
Ok(record)
}
fn try_parse_field(
data: &[u8],
tag: &str,
subfield_delim: u8,
field_term: u8,
) -> Result<crate::record::Field> {
use crate::record::Field;
if data.is_empty() {
return Err(MarcError::invalid_field_msg("Empty field data".to_string()));
}
if data.len() < 2 {
return Err(MarcError::invalid_field_msg(
"Data field too short (needs indicators)".to_string(),
));
}
let indicator1 = data[0] as char;
let indicator2 = data[1] as char;
let mut field = Field::new(tag.to_string(), indicator1, indicator2);
let subfield_data = &data[2..];
let mut current_position = 0;
while current_position < subfield_data.len() {
if subfield_data[current_position] == field_term {
break;
}
if subfield_data[current_position] == subfield_delim {
current_position += 1;
if current_position >= subfield_data.len() {
break;
}
let code = subfield_data[current_position] as char;
current_position += 1;
let mut end = current_position;
while end < subfield_data.len()
&& subfield_data[end] != subfield_delim
&& subfield_data[end] != field_term
{
end += 1;
}
let value = String::from_utf8_lossy(&subfield_data[current_position..end]).to_string();
field.add_subfield(code, value);
current_position = end;
} else {
return Err(MarcError::invalid_field_msg(
"Expected subfield delimiter".to_string(),
));
}
}
Ok(field)
}
fn parse_4digits(bytes: &[u8]) -> Result<usize> {
if bytes.len() != 4 {
return Err(MarcError::invalid_field_msg(format!(
"Expected 4-digit field, got {} bytes",
bytes.len()
)));
}
let s = String::from_utf8_lossy(bytes);
s.parse::<usize>()
.map_err(|_| MarcError::invalid_field_msg(format!("Invalid numeric field: '{s}'")))
}
fn parse_digits(bytes: &[u8]) -> Result<usize> {
if bytes.len() != 5 {
return Err(MarcError::invalid_field_msg(format!(
"Expected 5-digit field, got {} bytes",
bytes.len()
)));
}
let s = String::from_utf8_lossy(bytes);
s.parse::<usize>()
.map_err(|_| MarcError::invalid_field_msg(format!("Invalid numeric field: '{s}'")))
}