import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
try:
from mrrc import (
MARCReader, Record, Field, Leader,
record_to_csv, records_to_csv, records_to_csv_filtered
)
except ImportError:
print("Error: mrrc not installed")
print("Install with: pip install mrrc")
sys.exit(1)
def create_sample_record():
leader = Leader(
record_type='a',
bibliographic_level='m',
character_coding='a', )
record = Record(leader)
record.add_control_field('001', 'ocm12345678')
record.add_control_field('008', '200101s2020 xxu||||||||||||||||eng||')
title = Field('245', '1', '0')
title.add_subfield('a', 'Rust systems programming /')
title.add_subfield('c', 'Jane Smith and Bob Jones.')
record.add_field(title)
author = Field('100', '1', ' ')
author.add_subfield('a', 'Smith, Jane,')
author.add_subfield('d', '1975-')
author.add_subfield('e', 'author.')
record.add_field(author)
contributor = Field('700', '1', ' ')
contributor.add_subfield('a', 'Jones, Bob,')
contributor.add_subfield('d', '1980-')
contributor.add_subfield('e', 'author.')
record.add_field(contributor)
pub = Field('260', ' ', ' ')
pub.add_subfield('a', 'San Francisco :')
pub.add_subfield('b', 'O\'Reilly Media,')
pub.add_subfield('c', '2020.')
record.add_field(pub)
phys = Field('300', ' ', ' ')
phys.add_subfield('a', 'xv, 450 pages ;')
phys.add_subfield('c', '24 cm.')
record.add_field(phys)
isbn = Field('020', ' ', ' ')
isbn.add_subfield('a', '9781491927285')
record.add_field(isbn)
for subject_text in [
'Rust (Computer program language)',
'Systems programming (Computer science)',
'C (Computer program language)',
]:
subject = Field('650', ' ', '0')
subject.add_subfield('a', subject_text)
record.add_field(subject)
genre = Field('655', ' ', '7')
genre.add_subfield('a', 'Handbooks and manuals.')
genre.add_subfield('2', 'lcgft')
record.add_field(genre)
return record
def demonstrate_record_structure(record):
print("\n" + "=" * 70)
print("ORIGINAL RECORD")
print("=" * 70 + "\n")
print(f"Title: {record.title}")
print(f"Author: {record.author}")
print(f"All Authors: {', '.join(record.authors())}")
print(f"ISBN: {', '.join(record.isbns())}")
print(f"Subjects: {', '.join(record.subjects[:2])}... ({len(record.subjects)} total)")
if record.publication_info():
pub = record.publication_info()
print(f"Published: {pub.date} by {pub.publisher} in {pub.place}")
print()
def convert_to_json(record):
print("=" * 70)
print("1. JSON FORMAT (mrrc-specific)")
print("=" * 70 + "\n")
try:
json_str = record.to_json()
import json
json_obj = json.loads(json_str)
print("Structure:")
print(f" Leader: record_type={json_obj['leader']['record_type']}, "
f"bibliographic_level={json_obj['leader']['bibliographic_level']}")
print(f" Fields: {len(json_obj['fields'])} fields")
print()
print("Sample JSON output (first 300 chars):")
print(f" {json_str[:300]}...")
print()
field_types = {}
for field in json_obj.get('fields', []):
field_type = 'control' if 'value' in field else 'data'
field_types[field_type] = field_types.get(field_type, 0) + 1
print("Field breakdown:")
for field_type, count in field_types.items():
print(f" {field_type}: {count} field(s)")
except Exception as e:
print(f"Error: {e}")
print()
def convert_to_marcjson(record):
print("=" * 70)
print("2. MARCJSON FORMAT (Standard MARC-JSON)")
print("=" * 70 + "\n")
try:
marcjson_str = record.to_marcjson()
import json
marcjson_obj = json.loads(marcjson_str)
print("Structure:")
print(f" Records in object: {len(marcjson_obj) if isinstance(marcjson_obj, list) else 1}")
if isinstance(marcjson_obj, list) and marcjson_obj:
first = marcjson_obj[0]
else:
first = marcjson_obj
if isinstance(first, dict):
print(f" Fields: {len(first) if not isinstance(first, list) else 'N/A'}")
for key in sorted(list(first.keys())[:5]): value = first[key]
if isinstance(value, list) and value:
print(f" {key}: array with {len(value)} item(s)")
elif isinstance(value, dict):
print(f" {key}: object")
else:
print(f" {key}: {type(value).__name__}")
print()
print("Sample MARCJSON output (first 300 chars):")
print(f" {marcjson_str[:300]}...")
print()
print("Use cases:")
print(" - Library system integration (standard format)")
print(" - Linked Data / RDF conversion")
print(" - MARC21 XML conversion tools")
print(" - Other library software")
except Exception as e:
print(f"Error: {e}")
print()
def convert_to_xml(record):
print("=" * 70)
print("3. MARCXML FORMAT")
print("=" * 70 + "\n")
try:
xml_str = record.to_xml()
print("Structure (first 500 chars):")
print(f" {xml_str[:500]}...")
print()
import xml.etree.ElementTree as ET
root = ET.fromstring(xml_str)
element_counts = {}
for elem in root.iter():
tag = elem.tag.split('}')[-1] element_counts[tag] = element_counts.get(tag, 0) + 1
print("MARCXML structure breakdown:")
for tag in sorted(element_counts.keys()):
print(f" <{tag}>: {element_counts[tag]} element(s)")
print()
print("Use cases:")
print(" - XML processing pipelines (XSLT, XPath)")
print(" - Document management systems")
print(" - Data warehousing")
print(" - LOC tools and services")
except Exception as e:
print(f"Error: {e}")
print()
def convert_to_csv(record):
print("=" * 70)
print("4. CSV FORMAT (Tabular)")
print("=" * 70 + "\n")
try:
records = [record]
record2 = Record(record.leader)
record2.add_control_field('001', 'test002')
record2.add_control_field('008', '210115s2021 xxu||||||||||||||||eng||')
title2 = Field('245', '1', '0')
title2.add_subfield('a', 'Python systems programming /')
title2.add_subfield('c', 'Alice Brown.')
record2.add_field(title2)
author2 = Field('100', '1', ' ')
author2.add_subfield('a', 'Brown, Alice,')
author2.add_subfield('e', 'author.')
record2.add_field(author2)
records.append(record2)
print("Full CSV export (all fields):")
csv_str = records_to_csv(records)
lines = csv_str.split('\n')
for line in lines[:10]:
print(f" {line}")
print()
print("Filtered CSV export (only 245, 100, 650 fields):")
csv_filtered = records_to_csv_filtered(
records,
lambda tag: tag in ('245', '100', '650')
)
lines_filtered = csv_filtered.split('\n')
for line in lines_filtered[:10]:
print(f" {line}")
print()
print("Use cases:")
print(" - Data analysis in Excel/Sheets")
print(" - SQL database import")
print(" - Python pandas DataFrames")
print(" - Statistical analysis tools")
print(" - Field-level filtering for reporting")
except Exception as e:
print(f"Error: {e}")
print()
def round_trip_conversion(record):
print("=" * 70)
print("5. ROUND-TRIP CONVERSION (Fidelity Check)")
print("=" * 70 + "\n")
try:
json1 = record.to_json()
import json as json_module
json_obj = json_module.loads(json1)
record2 = __import__('mrrc').json.json_to_record(json_obj)
json2 = record2.to_json()
match = json1 == json2
print(f"Original JSON → Record → JSON: {'✓ Match' if match else '✗ Differs'}")
print(f" Original size: {len(json1)} chars")
print(f" Round-trip size: {len(json2)} chars")
print()
original_title = record.title
restored_title = record2.title
print(f"Fidelity verification:")
print(f" Title match: {original_title == restored_title}")
print(f" Author match: {record.author == record2.author}")
print(f" Subject count: {len(record.subjects)} → {len(record2.subjects)}")
except Exception as e:
print(f"Round-trip conversion not fully supported: {e}")
print("(This is expected - not all conversions support round-trip.)")
print()
def comparison_table():
print("=" * 70)
print("FORMAT COMPARISON")
print("=" * 70 + "\n")
print("""
FORMAT | NATIVE | SIZE | USE CASE
─────────────┼────────┼─────────┼──────────────────────────────────
JSON | mrrc | Medium | API responses, document stores
MARCJSON | Std | Medium | Library system integration
MARCXML | Std | Large | XML pipelines, XSLT transforms
CSV | N/A | Varies | Spreadsheets, data analysis
ISO 2709 | Binary | Small | Archive storage, interchange
─────────────┼────────┼─────────┼──────────────────────────────────
JSON:
Pros: Compact, language-neutral, easy to parse
Cons: Not standard for MARC
MARCJSON:
Pros: Standard MARC JSON format, widely supported
Cons: Slightly larger than binary
MARCXML:
Pros: Standard format, XSLT support, widely supported
Cons: Larger file size, slower parsing
CSV:
Pros: Spreadsheet-friendly, simple structure
Cons: Loses MARC structure, subfield complexity
ISO 2709:
Pros: Compact binary, original MARC format
Cons: Binary (not human-readable), requires MARC parser
""")
print()
def main():
print("\n" + "=" * 70)
print("MRRC: Format Conversion Examples (Python/pymarc-compatible)")
print("=" * 70)
record = create_sample_record()
demonstrate_record_structure(record)
convert_to_json(record)
convert_to_marcjson(record)
convert_to_xml(record)
convert_to_csv(record)
round_trip_conversion(record)
comparison_table()
print("=" * 70)
print("RECOMMENDATIONS")
print("=" * 70)
print("""
CHOOSE YOUR FORMAT BASED ON YOUR USE CASE:
1. Archival/Storage:
→ ISO 2709 binary (most compact, original format)
2. Web APIs / JSON Services:
→ JSON (mrrc format - compact and clean)
→ MARCJSON (if interoperability needed)
3. Data Analysis / Reporting:
→ CSV (simple, spreadsheet-friendly)
4. XML Processing / XSLT:
→ MARCXML (standard, tool support)
5. Linked Data / RDF:
→ MARCJSON or custom RDF conversion
PERFORMANCE TIPS:
- JSON conversions are fastest
- MARCXML is slower due to parsing overhead
- CSV generation can be done incrementally
- Keep ISO 2709 for long-term storage
MIGRATION FROM PYMARC:
All these methods work identically in pymarc:
record.to_json()
record.to_marcjson()
record.to_xml()
Just swap the import and you're ready to go!
""")
print()
if __name__ == '__main__':
main()