pub mod marc8;
mod parser;
use core::str;
use std::{
borrow::Cow,
fmt::{Display, Write},
str::Utf8Error,
};
use marc8::Marc8Decoder;
use thiserror::Error;
use winnow::{
combinator::repeat,
error::{ContextError, ParseError, StrContext},
Parser,
};
pub fn parse_records(data: &[u8]) -> Result<Vec<Record>, Error> {
Ok(repeat(
0..,
parser::parse_record.context(StrContext::Label("record")),
)
.parse(data)?)
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Failed to parse: {} at byte offset `{}`", reason, offset)]
ParseFailed { reason: String, offset: usize },
}
impl From<ParseError<&[u8], ContextError>> for Error {
fn from(value: ParseError<&[u8], ContextError>) -> Self {
Self::ParseFailed {
reason: value.inner().to_string(),
offset: value.offset(),
}
}
}
pub struct Record {
pub leader: Leader,
pub fields: Vec<Field>,
}
#[derive(Debug)]
pub struct Leader {
pub record_length: u16,
pub status: Status,
pub record_type: RecordType,
pub bibliographical_level: BibliographicalLevel,
pub control_type: ControlType,
pub coding_scheme: CodingScheme,
pub data_base_address: u16,
pub encoding_level: EncodingLevel,
pub descriptive_cataloging_form: CatalogingForm,
pub multipart_resource_record_level: MultipartResourceRecordLevel,
}
#[derive(Debug, PartialEq, Eq)]
pub enum Status {
IncreaseInEncoding,
Corrected,
Deleted,
New,
IncreaseFromPrepublication,
}
#[derive(Debug, PartialEq, Eq)]
pub enum RecordType {
LanguageMaterial,
NotatedMusic,
ManuscriptNotatedMusic,
CartographicMaterial,
ManuscriptCartographicMaterial,
ProjectedMedium,
NonmusicalSoundRecording,
MusicalSoundRecording,
TwoDimensionalNonprojectableGraphic,
ComputerFile,
Kit,
MixedMaterials,
ThreeDimensionalArtifact,
ManuscriptLanguageMaterial,
}
#[derive(Debug, PartialEq, Eq)]
pub enum BibliographicalLevel {
MonographicComponentPart,
SerialComponentPart,
Collection,
Subunit,
IntegratingResource,
Monograph,
Serial,
Unknown,
}
#[derive(Debug, PartialEq, Eq)]
pub enum ControlType {
Unspecified,
Archival,
}
#[derive(Debug, PartialEq, Eq)]
pub enum CodingScheme {
Marc8,
Ucs,
}
impl CodingScheme {
fn decoder(&self) -> Decoder {
match self {
CodingScheme::Marc8 => Decoder::Marc8(marc8::Marc8Decoder {}),
CodingScheme::Ucs => Decoder::Utf8(Utf8Decoder {}),
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum EncodingLevel {
Full,
FullMaterialNotExamined,
LessThanFullMaterialNotExamined,
Abbreviated,
Core,
Partial,
Minimal,
Prepublication,
Unknown,
NotApplicable,
ObsoleteFull,
ObsoleteMinimal,
AddedFromBatch,
}
#[derive(Debug, PartialEq, Eq)]
pub enum CatalogingForm {
NonIsbd,
Aacr2,
IsbdPunctuationOmitted,
IsbdPunctuationIncluded,
NonIsbdPunctuationOmitted,
Unknown,
}
#[derive(Debug, PartialEq, Eq)]
pub enum MultipartResourceRecordLevel {
NotApplicable,
Set,
PartWithIndependentTitle,
PartwithDependentTitle,
}
enum Decoder {
Marc8(Marc8Decoder),
Utf8(Utf8Decoder),
}
impl TextDecoder for Decoder {
fn decode<'a>(&self, text: &'a [u8]) -> Result<Cow<'a, str>, DecodeError> {
match self {
Decoder::Marc8(marc8_decoder) => marc8_decoder.decode(text),
Decoder::Utf8(utf8_decoder) => utf8_decoder.decode(text),
}
}
}
const RECORD_SEPARATOR: u8 = 0x1D;
const FIELD_SEPARATOR: u8 = 0x1E;
const SUBFIELD_SEPARATOR: u8 = 0x1F;
#[derive(Debug)]
pub struct DirectoryEntry {
pub tag: [char; 3],
pub field_length: usize,
pub starting_pos: usize,
}
impl DirectoryEntry {
fn is_control(&self) -> bool {
&self.tag[0..2] == ['0', '0']
}
}
#[derive(Debug, Error)]
enum DecodeError {
#[error("UTF-8 error: {0}")]
Utf(Utf8Error),
#[error("Unknown char: {0}")]
Unknown(u8),
#[error("Invalid pair: base `{0}` with combining `{1}`")]
InvalidPair(char, char),
#[error("Invalid character sequence")]
InvalidSequence,
}
trait TextDecoder {
fn decode<'a>(&self, text: &'a [u8]) -> Result<Cow<'a, str>, DecodeError>;
}
impl<T> TextDecoder for Box<T>
where
T: TextDecoder + Sized,
{
fn decode<'a>(&self, text: &'a [u8]) -> Result<Cow<'a, str>, DecodeError> {
self.as_ref().decode(text)
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct FieldTag([char; 3]);
impl Display for FieldTag {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_char(self.0[0])?;
f.write_char(self.0[1])?;
f.write_char(self.0[2])
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum Field {
Control(ControlField),
Data(DataField),
}
impl Field {
pub fn control(&self) -> Option<&ControlField> {
match self {
Field::Control(control_field) => Some(control_field),
Field::Data(_) => None,
}
}
pub fn data(&self) -> Option<&DataField> {
match self {
Field::Control(_) => None,
Field::Data(data_field) => Some(data_field),
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct ControlField {
pub tag: FieldTag,
pub data: String,
}
#[derive(Debug, PartialEq, Eq)]
pub struct DataField {
pub tag: FieldTag,
pub indicator: Vec<char>,
pub subfields: Vec<Subfield>,
}
#[derive(Debug, PartialEq, Eq)]
pub struct SubfieldTag(char);
impl Display for SubfieldTag {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_char(self.0)
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct Subfield {
pub tag: SubfieldTag,
pub data: String,
}
struct Utf8Decoder {}
impl TextDecoder for Utf8Decoder {
fn decode<'a>(&self, text: &'a [u8]) -> Result<Cow<'a, str>, DecodeError> {
str::from_utf8(&text)
.map(|s| Cow::Borrowed(s))
.map_err(|e| DecodeError::Utf(e))
}
}