1pub mod marc8;
33mod parser;
35
36use core::str;
37use std::{
38 borrow::Cow,
39 fmt::{Display, Write},
40 str::Utf8Error,
41};
42
43use marc8::Marc8Decoder;
44use thiserror::Error;
45use winnow::{
46 combinator::repeat,
47 error::{ContextError, ParseError, StrContext},
48 Parser,
49};
50
51pub fn parse_records(data: &[u8]) -> Result<Vec<Record>, Error> {
65 Ok(repeat(
66 0..,
67 parser::parse_record.context(StrContext::Label("record")),
68 )
69 .parse(data)?)
70}
71
72#[derive(thiserror::Error, Debug)]
74pub enum Error {
75 #[error("Failed to parse: {} at byte offset `{}`", reason, offset)]
76 ParseFailed { reason: String, offset: usize },
77}
78
79impl From<ParseError<&[u8], ContextError>> for Error {
80 fn from(value: ParseError<&[u8], ContextError>) -> Self {
81 Self::ParseFailed {
82 reason: value.inner().to_string(),
83 offset: value.offset(),
84 }
85 }
86}
87
88pub struct Record {
94 pub leader: Leader,
96 pub fields: Vec<Field>,
98}
99
100#[derive(Debug)]
103pub struct Leader {
104 pub record_length: u16,
106
107 pub status: Status,
109
110 pub record_type: RecordType,
112
113 pub bibliographical_level: BibliographicalLevel,
115
116 pub control_type: ControlType,
117
118 pub coding_scheme: CodingScheme,
120
121 pub data_base_address: u16,
123
124 pub encoding_level: EncodingLevel,
126 pub descriptive_cataloging_form: CatalogingForm,
127 pub multipart_resource_record_level: MultipartResourceRecordLevel,
128}
129
130#[derive(Debug, PartialEq, Eq)]
131pub enum Status {
132 IncreaseInEncoding,
133 Corrected,
134 Deleted,
135 New,
136 IncreaseFromPrepublication,
137}
138
139#[derive(Debug, PartialEq, Eq)]
140pub enum RecordType {
141 LanguageMaterial,
142 NotatedMusic,
143 ManuscriptNotatedMusic,
144 CartographicMaterial,
145 ManuscriptCartographicMaterial,
146 ProjectedMedium,
147 NonmusicalSoundRecording,
148 MusicalSoundRecording,
149 TwoDimensionalNonprojectableGraphic,
150 ComputerFile,
151 Kit,
152 MixedMaterials,
153 ThreeDimensionalArtifact,
154 ManuscriptLanguageMaterial,
155}
156
157#[derive(Debug, PartialEq, Eq)]
158pub enum BibliographicalLevel {
159 MonographicComponentPart,
160 SerialComponentPart,
161 Collection,
162 Subunit,
163 IntegratingResource,
164 Monograph,
165 Serial,
166 Unknown,
167}
168
169#[derive(Debug, PartialEq, Eq)]
170pub enum ControlType {
171 Unspecified,
172 Archival,
173}
174
175#[derive(Debug, PartialEq, Eq)]
176pub enum CodingScheme {
177 Marc8,
179 Ucs,
181}
182
183impl CodingScheme {
184 fn decoder(&self) -> Decoder {
185 match self {
186 CodingScheme::Marc8 => Decoder::Marc8(marc8::Marc8Decoder {}),
187 CodingScheme::Ucs => Decoder::Utf8(Utf8Decoder {}),
188 }
189 }
190}
191
192#[derive(Debug, PartialEq, Eq)]
193pub enum EncodingLevel {
194 Full,
195 FullMaterialNotExamined,
196 LessThanFullMaterialNotExamined,
197 Abbreviated,
198 Core,
199 Partial,
200 Minimal,
201 Prepublication,
202 Unknown,
203 NotApplicable,
204 ObsoleteFull,
205 ObsoleteMinimal,
206 AddedFromBatch,
207}
208
209#[derive(Debug, PartialEq, Eq)]
210pub enum CatalogingForm {
211 NonIsbd,
212 Aacr2,
213 IsbdPunctuationOmitted,
214 IsbdPunctuationIncluded,
215 NonIsbdPunctuationOmitted,
216 Unknown,
217}
218
219#[derive(Debug, PartialEq, Eq)]
220pub enum MultipartResourceRecordLevel {
221 NotApplicable,
222 Set,
223 PartWithIndependentTitle,
224 PartwithDependentTitle,
225}
226
227enum Decoder {
228 Marc8(Marc8Decoder),
229 Utf8(Utf8Decoder),
230}
231
232impl TextDecoder for Decoder {
233 fn decode<'a>(&self, text: &'a [u8]) -> Result<Cow<'a, str>, DecodeError> {
234 match self {
235 Decoder::Marc8(marc8_decoder) => marc8_decoder.decode(text),
236 Decoder::Utf8(utf8_decoder) => utf8_decoder.decode(text),
237 }
238 }
239}
240
241const RECORD_SEPARATOR: u8 = 0x1D;
242const FIELD_SEPARATOR: u8 = 0x1E;
243const SUBFIELD_SEPARATOR: u8 = 0x1F;
244
245#[derive(Debug)]
246pub struct DirectoryEntry {
247 pub tag: [char; 3],
248 pub field_length: usize,
249 pub starting_pos: usize,
250}
251
252impl DirectoryEntry {
253 fn is_control(&self) -> bool {
254 &self.tag[0..2] == ['0', '0']
255 }
256}
257
258#[derive(Debug, Error)]
259enum DecodeError {
260 #[error("UTF-8 error: {0}")]
261 Utf(Utf8Error),
262 #[error("Unknown char: {0}")]
263 Unknown(u8),
264 #[error("Invalid pair: base `{0}` with combining `{1}`")]
265 InvalidPair(char, char),
266 #[error("Invalid character sequence")]
267 InvalidSequence,
268}
269
270trait TextDecoder {
271 fn decode<'a>(&self, text: &'a [u8]) -> Result<Cow<'a, str>, DecodeError>;
272}
273
274impl<T> TextDecoder for Box<T>
276where
277 T: TextDecoder + Sized,
278{
279 fn decode<'a>(&self, text: &'a [u8]) -> Result<Cow<'a, str>, DecodeError> {
280 self.as_ref().decode(text)
281 }
282}
283
284#[derive(Debug, PartialEq, Eq)]
285pub struct FieldTag([char; 3]);
286
287impl Display for FieldTag {
288 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289 f.write_char(self.0[0])?;
290 f.write_char(self.0[1])?;
291 f.write_char(self.0[2])
292 }
293}
294
295#[derive(Debug, PartialEq, Eq)]
296pub enum Field {
297 Control(ControlField),
298 Data(DataField),
299}
300
301impl Field {
302 pub fn control(&self) -> Option<&ControlField> {
304 match self {
305 Field::Control(control_field) => Some(control_field),
306 Field::Data(_) => None,
307 }
308 }
309
310 pub fn data(&self) -> Option<&DataField> {
312 match self {
313 Field::Control(_) => None,
314 Field::Data(data_field) => Some(data_field),
315 }
316 }
317}
318
319#[derive(Debug, PartialEq, Eq)]
323pub struct ControlField {
324 pub tag: FieldTag,
325 pub data: String,
326}
327
328#[derive(Debug, PartialEq, Eq)]
332pub struct DataField {
333 pub tag: FieldTag,
334 pub indicator: Vec<char>,
335 pub subfields: Vec<Subfield>,
336}
337
338#[derive(Debug, PartialEq, Eq)]
341pub struct SubfieldTag(char);
342
343impl Display for SubfieldTag {
344 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
345 f.write_char(self.0)
346 }
347}
348
349#[derive(Debug, PartialEq, Eq)]
350pub struct Subfield {
351 pub tag: SubfieldTag,
352 pub data: String,
353}
354
355struct Utf8Decoder {}
356
357impl TextDecoder for Utf8Decoder {
358 fn decode<'a>(&self, text: &'a [u8]) -> Result<Cow<'a, str>, DecodeError> {
359 str::from_utf8(&text)
360 .map(|s| Cow::Borrowed(s))
361 .map_err(|e| DecodeError::Utf(e))
362 }
363}