mrrc 0.7.6

A Rust library for reading, writing, and manipulating MARC bibliographic records in ISO 2709 binary format
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
#!/usr/bin/env python3
"""
Format conversion examples (Python/pymarc-compatible API)

This example demonstrates how to convert MARC records to various
serialization formats using the mrrc Python wrapper:
- JSON (mrrc-specific format)
- MARCJSON (standard MARC-JSON format)
- MARCXML (standard LOC MARCXML format)
- CSV (tabular format)

These conversions are useful for:
- Data interchange with other systems
- Integration with web APIs (JSON)
- Search indexing (various formats)
- Data analysis and reporting (CSV)
"""

import sys
from pathlib import Path

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

try:
    from mrrc import (
        MARCReader, Record, Field, Leader,
        record_to_csv, records_to_csv, records_to_csv_filtered
    )
except ImportError:
    print("Error: mrrc not installed")
    print("Install with: pip install mrrc")
    sys.exit(1)


def create_sample_record():
    """Create a sample record for format conversion demonstrations."""
    leader = Leader(
        record_type='a',
        bibliographic_level='m',
        character_coding='a',  # UTF-8
    )
    
    record = Record(leader)
    
    # Control fields
    record.add_control_field('001', 'ocm12345678')
    record.add_control_field('008', '200101s2020    xxu||||||||||||||||eng||')
    
    # Title
    title = Field('245', '1', '0')
    title.add_subfield('a', 'Rust systems programming /')
    title.add_subfield('c', 'Jane Smith and Bob Jones.')
    record.add_field(title)
    
    # Author
    author = Field('100', '1', ' ')
    author.add_subfield('a', 'Smith, Jane,')
    author.add_subfield('d', '1975-')
    author.add_subfield('e', 'author.')
    record.add_field(author)
    
    # Additional author
    contributor = Field('700', '1', ' ')
    contributor.add_subfield('a', 'Jones, Bob,')
    contributor.add_subfield('d', '1980-')
    contributor.add_subfield('e', 'author.')
    record.add_field(contributor)
    
    # Publication
    pub = Field('260', ' ', ' ')
    pub.add_subfield('a', 'San Francisco :')
    pub.add_subfield('b', 'O\'Reilly Media,')
    pub.add_subfield('c', '2020.')
    record.add_field(pub)
    
    # Physical description
    phys = Field('300', ' ', ' ')
    phys.add_subfield('a', 'xv, 450 pages ;')
    phys.add_subfield('c', '24 cm.')
    record.add_field(phys)
    
    # ISBN
    isbn = Field('020', ' ', ' ')
    isbn.add_subfield('a', '9781491927285')
    record.add_field(isbn)
    
    # Subjects
    for subject_text in [
        'Rust (Computer program language)',
        'Systems programming (Computer science)',
        'C (Computer program language)',
    ]:
        subject = Field('650', ' ', '0')
        subject.add_subfield('a', subject_text)
        record.add_field(subject)
    
    # Genre/form
    genre = Field('655', ' ', '7')
    genre.add_subfield('a', 'Handbooks and manuals.')
    genre.add_subfield('2', 'lcgft')
    record.add_field(genre)
    
    return record


def demonstrate_record_structure(record):
    """Show basic record information."""
    print("\n" + "=" * 70)
    print("ORIGINAL RECORD")
    print("=" * 70 + "\n")
    
    print(f"Title:           {record.title}")
    print(f"Author:          {record.author}")
    print(f"All Authors:     {', '.join(record.authors())}")
    print(f"ISBN:            {', '.join(record.isbns())}")
    print(f"Subjects:        {', '.join(record.subjects[:2])}... ({len(record.subjects)} total)")
    
    if record.publication_info():
        pub = record.publication_info()
        print(f"Published:       {pub.date} by {pub.publisher} in {pub.place}")
    
    print()


def convert_to_json(record):
    """
    Convert record to JSON format (mrrc-specific).
    
    This is a flat JSON representation useful for:
    - Data interchange
    - API responses
    - Document storage (MongoDB, etc.)
    """
    print("=" * 70)
    print("1. JSON FORMAT (mrrc-specific)")
    print("=" * 70 + "\n")
    
    try:
        json_str = record.to_json()
        
        # Show pretty-printed version (limit length)
        import json
        json_obj = json.loads(json_str)
        
        print("Structure:")
        print(f"  Leader: record_type={json_obj['leader']['record_type']}, "
              f"bibliographic_level={json_obj['leader']['bibliographic_level']}")
        print(f"  Fields: {len(json_obj['fields'])} fields")
        print()
        
        # Show sample fields
        print("Sample JSON output (first 300 chars):")
        print(f"  {json_str[:300]}...")
        print()
        
        # Count field types
        field_types = {}
        for field in json_obj.get('fields', []):
            field_type = 'control' if 'value' in field else 'data'
            field_types[field_type] = field_types.get(field_type, 0) + 1
        
        print("Field breakdown:")
        for field_type, count in field_types.items():
            print(f"  {field_type}: {count} field(s)")
        
    except Exception as e:
        print(f"Error: {e}")
    
    print()


def convert_to_marcjson(record):
    """
    Convert record to MARCJSON format (standard MARC-JSON).
    
    MARCJSON is the standard JSON representation for MARC records.
    It's useful for:
    - Standard data interchange
    - Library system integration
    - Compatibility with other MARC tools
    """
    print("=" * 70)
    print("2. MARCJSON FORMAT (Standard MARC-JSON)")
    print("=" * 70 + "\n")
    
    try:
        marcjson_str = record.to_marcjson()
        
        # Parse and analyze
        import json
        marcjson_obj = json.loads(marcjson_str)
        
        print("Structure:")
        print(f"  Records in object: {len(marcjson_obj) if isinstance(marcjson_obj, list) else 1}")
        
        # Get first record (MARCJSON is typically an array)
        if isinstance(marcjson_obj, list) and marcjson_obj:
            first = marcjson_obj[0]
        else:
            first = marcjson_obj
        
        if isinstance(first, dict):
            print(f"  Fields: {len(first) if not isinstance(first, list) else 'N/A'}")
            
            # Show structure
            for key in sorted(list(first.keys())[:5]):  # Show first 5 fields
                value = first[key]
                if isinstance(value, list) and value:
                    print(f"    {key}: array with {len(value)} item(s)")
                elif isinstance(value, dict):
                    print(f"    {key}: object")
                else:
                    print(f"    {key}: {type(value).__name__}")
        
        print()
        print("Sample MARCJSON output (first 300 chars):")
        print(f"  {marcjson_str[:300]}...")
        print()
        
        # Show use case
        print("Use cases:")
        print("  - Library system integration (standard format)")
        print("  - Linked Data / RDF conversion")
        print("  - MARC21 XML conversion tools")
        print("  - Other library software")
        
    except Exception as e:
        print(f"Error: {e}")
    
    print()


def convert_to_xml(record):
    """
    Convert record to MARCXML format.

    MARCXML is the standard XML representation for MARC records.
    Useful for:
    - XML-based processing pipelines
    - XSLT transformations
    - Document management systems
    - Web service integration
    """
    print("=" * 70)
    print("3. MARCXML FORMAT")
    print("=" * 70 + "\n")
    
    try:
        xml_str = record.to_xml()
        
        print("Structure (first 500 chars):")
        print(f"  {xml_str[:500]}...")
        print()
        
        # Count elements
        import xml.etree.ElementTree as ET
        root = ET.fromstring(xml_str)
        
        # Count different element types
        element_counts = {}
        for elem in root.iter():
            tag = elem.tag.split('}')[-1]  # Remove namespace
            element_counts[tag] = element_counts.get(tag, 0) + 1
        
        print("MARCXML structure breakdown:")
        for tag in sorted(element_counts.keys()):
            print(f"  <{tag}>: {element_counts[tag]} element(s)")
        
        print()
        print("Use cases:")
        print("  - XML processing pipelines (XSLT, XPath)")
        print("  - Document management systems")
        print("  - Data warehousing")
        print("  - LOC tools and services")
        
    except Exception as e:
        print(f"Error: {e}")
    
    print()


def convert_to_csv(record):
    """
    Convert records to CSV format for tabular analysis.
    
    CSV is useful for:
    - Data analysis and reporting
    - Spreadsheet import
    - SQL database loading
    - BI tools integration
    """
    print("=" * 70)
    print("4. CSV FORMAT (Tabular)")
    print("=" * 70 + "\n")
    
    try:
        # Create multiple records for CSV output
        records = [record]
        
        # Add a second record for demonstration
        record2 = Record(record.leader)
        record2.add_control_field('001', 'test002')
        record2.add_control_field('008', '210115s2021    xxu||||||||||||||||eng||')
        
        title2 = Field('245', '1', '0')
        title2.add_subfield('a', 'Python systems programming /')
        title2.add_subfield('c', 'Alice Brown.')
        record2.add_field(title2)
        
        author2 = Field('100', '1', ' ')
        author2.add_subfield('a', 'Brown, Alice,')
        author2.add_subfield('e', 'author.')
        record2.add_field(author2)
        
        records.append(record2)
        
        # Convert to CSV - full tabular output
        print("Full CSV export (all fields):")
        csv_str = records_to_csv(records)
        lines = csv_str.split('\n')
        for line in lines[:10]:
            print(f"  {line}")
        print()
        
        # Filtered CSV export
        print("Filtered CSV export (only 245, 100, 650 fields):")
        csv_filtered = records_to_csv_filtered(
            records, 
            lambda tag: tag in ('245', '100', '650')
        )
        lines_filtered = csv_filtered.split('\n')
        for line in lines_filtered[:10]:
            print(f"  {line}")
        print()
        
        print("Use cases:")
        print("  - Data analysis in Excel/Sheets")
        print("  - SQL database import")
        print("  - Python pandas DataFrames")
        print("  - Statistical analysis tools")
        print("  - Field-level filtering for reporting")
        
    except Exception as e:
        print(f"Error: {e}")
    
    print()


def round_trip_conversion(record):
    """
    Demonstrate round-trip conversion (JSON → Record → JSON).
    
    Shows that conversions preserve data fidelity.
    """
    print("=" * 70)
    print("5. ROUND-TRIP CONVERSION (Fidelity Check)")
    print("=" * 70 + "\n")
    
    try:
        # Convert to JSON
        json1 = record.to_json()
        
        # Parse and convert back
        import json as json_module
        json_obj = json_module.loads(json1)
        
        # Convert back to Record (if supported)
        record2 = __import__('mrrc').json.json_to_record(json_obj)
        
        # Convert again to compare
        json2 = record2.to_json()
        
        # Compare
        match = json1 == json2
        print(f"Original JSON → Record → JSON: {'✓ Match' if match else '✗ Differs'}")
        print(f"  Original size: {len(json1)} chars")
        print(f"  Round-trip size: {len(json2)} chars")
        print()
        
        # Verify field counts
        original_title = record.title
        restored_title = record2.title
        
        print(f"Fidelity verification:")
        print(f"  Title match: {original_title == restored_title}")
        print(f"  Author match: {record.author == record2.author}")
        print(f"  Subject count: {len(record.subjects)}{len(record2.subjects)}")
        
    except Exception as e:
        print(f"Round-trip conversion not fully supported: {e}")
        print("(This is expected - not all conversions support round-trip.)")
    
    print()


def comparison_table():
    """Show a comparison of different formats."""
    print("=" * 70)
    print("FORMAT COMPARISON")
    print("=" * 70 + "\n")
    
    print("""
FORMAT       | NATIVE | SIZE    | USE CASE
─────────────┼────────┼─────────┼──────────────────────────────────
JSON         | mrrc   | Medium  | API responses, document stores
MARCJSON     | Std    | Medium  | Library system integration
MARCXML      | Std    | Large   | XML pipelines, XSLT transforms
CSV          | N/A    | Varies  | Spreadsheets, data analysis
ISO 2709     | Binary | Small   | Archive storage, interchange
─────────────┼────────┼─────────┼──────────────────────────────────

JSON:
  Pros: Compact, language-neutral, easy to parse
  Cons: Not standard for MARC
  
MARCJSON:
  Pros: Standard MARC JSON format, widely supported
  Cons: Slightly larger than binary
  
MARCXML:
  Pros: Standard format, XSLT support, widely supported
  Cons: Larger file size, slower parsing
  
CSV:
  Pros: Spreadsheet-friendly, simple structure
  Cons: Loses MARC structure, subfield complexity

ISO 2709:
  Pros: Compact binary, original MARC format
  Cons: Binary (not human-readable), requires MARC parser
    """)
    print()


def main():
    """Main example runner."""
    
    print("\n" + "=" * 70)
    print("MRRC: Format Conversion Examples (Python/pymarc-compatible)")
    print("=" * 70)
    
    # Create sample record
    record = create_sample_record()
    
    # Show original
    demonstrate_record_structure(record)
    
    # Show all conversions
    convert_to_json(record)
    convert_to_marcjson(record)
    convert_to_xml(record)
    convert_to_csv(record)
    round_trip_conversion(record)
    comparison_table()
    
    print("=" * 70)
    print("RECOMMENDATIONS")
    print("=" * 70)
    print("""
CHOOSE YOUR FORMAT BASED ON YOUR USE CASE:

1. Archival/Storage:
   → ISO 2709 binary (most compact, original format)

2. Web APIs / JSON Services:
   → JSON (mrrc format - compact and clean)
   → MARCJSON (if interoperability needed)

3. Data Analysis / Reporting:
   → CSV (simple, spreadsheet-friendly)

4. XML Processing / XSLT:
   → MARCXML (standard, tool support)

5. Linked Data / RDF:
   → MARCJSON or custom RDF conversion

PERFORMANCE TIPS:
- JSON conversions are fastest
- MARCXML is slower due to parsing overhead
- CSV generation can be done incrementally
- Keep ISO 2709 for long-term storage

MIGRATION FROM PYMARC:
All these methods work identically in pymarc:
  record.to_json()
  record.to_marcjson()
  record.to_xml()
Just swap the import and you're ready to go!
    """)
    print()


if __name__ == '__main__':
    main()