use crate::authority_record::AuthorityRecord;
use crate::error::{MarcError, Result};
use crate::leader::Leader;
use crate::record::Field;
use crate::recovery::RecoveryMode;
use std::io::Read;
const FIELD_TERMINATOR: u8 = 0x1E;
const SUBFIELD_DELIMITER: u8 = 0x1F;
#[derive(Debug)]
pub struct AuthorityMarcReader<R: Read> {
reader: R,
recovery_mode: RecoveryMode,
}
impl<R: Read> AuthorityMarcReader<R> {
#[must_use]
pub fn new(reader: R) -> Self {
AuthorityMarcReader {
reader,
recovery_mode: RecoveryMode::Strict,
}
}
#[must_use]
pub fn with_recovery_mode(mut self, mode: RecoveryMode) -> Self {
self.recovery_mode = mode;
self
}
#[allow(clippy::too_many_lines)]
pub fn read_record(&mut self) -> Result<Option<AuthorityRecord>> {
let mut leader_bytes = vec![0u8; 24];
match self.reader.read_exact(&mut leader_bytes) {
Ok(()) => {},
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
return Ok(None);
},
Err(e) => return Err(MarcError::IoError(e)),
}
let leader = Leader::from_bytes(&leader_bytes)?;
leader.validate_for_reading()?;
if leader.record_type != 'z' {
return Err(MarcError::InvalidRecord(format!(
"Expected authority record type 'z', got '{}'",
leader.record_type
)));
}
let record_length = leader.record_length as usize;
let base_address = leader.data_base_address as usize;
let directory_size = base_address - 24;
let mut record_data = vec![0u8; record_length - 24];
match self.reader.read_exact(&mut record_data) {
Ok(()) => {},
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
if self.recovery_mode == RecoveryMode::Strict {
return Err(MarcError::TruncatedRecord(
"Unexpected end of file while reading record data".to_string(),
));
}
},
Err(e) => return Err(MarcError::IoError(e)),
}
if record_data.len() < (record_length - 24) && self.recovery_mode != RecoveryMode::Strict {
return Err(MarcError::TruncatedRecord(
"Authority record is truncated".to_string(),
));
}
let directory_end = std::cmp::min(directory_size, record_data.len());
let directory = if directory_end > 0 {
&record_data[..directory_end]
} else {
&[]
};
let data_start = std::cmp::min(base_address - 24, record_data.len());
let data = if data_start < record_data.len() {
&record_data[data_start..]
} else {
&[]
};
let _ = data;
let mut record = AuthorityRecord::new(leader);
let mut pos = 0;
while pos < directory.len() {
if directory[pos] == FIELD_TERMINATOR {
break;
}
if pos + 12 > directory.len() {
if self.recovery_mode == RecoveryMode::Strict {
return Err(MarcError::InvalidRecord(
"Incomplete directory entry".to_string(),
));
}
break;
}
let tag = std::str::from_utf8(&directory[pos..pos + 3])
.map_err(|_| MarcError::InvalidRecord("Invalid field tag".to_string()))?
.to_string();
let length = std::str::from_utf8(&directory[pos + 3..pos + 7])
.ok()
.and_then(|s| s.parse::<usize>().ok())
.ok_or_else(|| MarcError::InvalidRecord("Invalid field length".to_string()))?;
let start = std::str::from_utf8(&directory[pos + 7..pos + 12])
.ok()
.and_then(|s| s.parse::<usize>().ok())
.ok_or_else(|| {
MarcError::InvalidRecord("Invalid field start position".to_string())
})?;
pos += 12;
let field_data_start = data_start + start;
let field_data_end = std::cmp::min(field_data_start + length, record_data.len());
if field_data_start >= record_data.len() {
continue;
}
let field_bytes = &record_data[field_data_start..field_data_end];
if tag.len() == 3 && tag.as_str() < "010" {
let value = String::from_utf8_lossy(field_bytes)
.trim_end_matches(['\x1E', '\x1F'])
.to_string();
record.add_control_field(tag, value);
} else {
if field_bytes.len() < 2 {
continue;
}
let indicator1 = field_bytes[0] as char;
let indicator2 = field_bytes[1] as char;
let mut field = Field {
tag: tag.clone(),
indicator1,
indicator2,
subfields: smallvec::SmallVec::new(),
};
let mut subfield_pos = 2;
while subfield_pos < field_bytes.len() {
if field_bytes[subfield_pos] == SUBFIELD_DELIMITER {
if subfield_pos + 1 < field_bytes.len() {
let code = field_bytes[subfield_pos + 1] as char;
subfield_pos += 2;
let mut subfield_end = subfield_pos;
while subfield_end < field_bytes.len()
&& field_bytes[subfield_end] != SUBFIELD_DELIMITER
&& field_bytes[subfield_end] != FIELD_TERMINATOR
{
subfield_end += 1;
}
let value =
String::from_utf8_lossy(&field_bytes[subfield_pos..subfield_end])
.to_string();
field
.subfields
.push(crate::record::Subfield { code, value });
subfield_pos = subfield_end;
} else {
break;
}
} else if field_bytes[subfield_pos] == FIELD_TERMINATOR {
break;
} else {
subfield_pos += 1;
}
}
match tag.as_str() {
"100" | "110" | "111" | "130" | "148" | "150" | "151" | "155" => {
record.set_heading(field);
},
"400" | "410" | "411" | "430" | "448" | "450" | "451" | "455" => {
record.add_see_from_tracing(field);
},
"500" | "510" | "511" | "530" | "548" | "550" | "551" | "555" => {
record.add_see_also_tracing(field);
},
"660" | "661" | "662" | "663" | "664" | "665" | "666" | "667" | "668"
| "669" | "670" | "671" | "672" | "673" | "674" | "675" | "676" | "677"
| "678" | "679" | "680" | "681" | "682" | "683" | "684" | "685" | "686"
| "687" | "688" | "689" => {
record.add_note(field);
},
"700" | "710" | "711" | "730" | "748" | "750" | "751" | "755" => {
record.add_linking_entry(field);
},
_ => {
record.add_field(field);
},
}
}
}
Ok(Some(record))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_authority_reader_creation() {
let data = vec![];
let cursor = Cursor::new(data);
let _reader = AuthorityMarcReader::new(cursor);
}
#[test]
fn test_authority_reader_empty() {
let data = vec![];
let cursor = Cursor::new(data);
let mut reader = AuthorityMarcReader::new(cursor);
match reader.read_record() {
Ok(None) => {}, Ok(Some(_)) => panic!("Should not have read a record"),
Err(e) => panic!("Unexpected error: {e}"),
}
}
#[test]
fn test_authority_reader_wrong_type() {
let mut data = vec![];
data.extend_from_slice(b"00029");
data.push(b'n');
data.push(b'a');
data.push(b'm');
data.push(b' ');
data.push(b' ');
data.push(b'2');
data.push(b'2');
data.extend_from_slice(b"00025");
data.push(b' ');
data.push(b'a');
data.push(b' ');
data.extend_from_slice(b"4500");
data.push(FIELD_TERMINATOR);
data.push(0x1D);
let cursor = Cursor::new(data);
let mut reader = AuthorityMarcReader::new(cursor);
match reader.read_record() {
Err(MarcError::InvalidRecord(msg))
if msg.contains("Expected authority record type") =>
{
},
other => panic!("Should have returned type mismatch error, got: {other:?}"),
}
}
}