import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent / 'src-python'))
from pymarc import Record, Field, Indicators, MARCReader, MARCWriter
def create_field(tag: str, ind1: str, ind2: str, subfields: list) -> Field:
f = Field(tag, Indicators(ind1, ind2))
for code, value in subfields:
f.add_subfield(code, value)
return f
def create_edge_case_cjk() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
record.add_field(create_field('245', ' ', '1', [
('a', '中文标题'), ('c', '作者') ]))
record.add_field(create_field('650', ' ', '0', [
('a', '中国') ]))
return record
def create_edge_case_rtl() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
record.add_field(create_field('245', ' ', '1', [
('a', 'العنوان العربي'), ('c', 'المؤلف') ]))
record.add_field(create_field('650', ' ', '0', [
('a', 'مصر') ]))
return record
def create_edge_case_combining_marks() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
combining_e = 'e\u0301' combining_multi = 'a\u0300\u0301\u0302'
record.add_field(create_field('245', ' ', '1', [
('a', f'Title with {combining_e}'),
('c', f'Aut{combining_multi}r')
]))
return record
def create_edge_case_max_field_length() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
large_data = 'x' * 9000
record.add_field(create_field('520', ' ', ' ', [
('a', large_data)
]))
return record
def create_edge_case_many_fields() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
f = Field('001', data='12345678901234')
record.add_field(f)
for i in range(100):
record.add_field(create_field('650', ' ', '0', [
('a', f'Subject {i:03d}'),
('x', f'Subdivision {i:03d}')
]))
return record
def create_edge_case_many_subfields() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
f = Field('653', Indicators(' ', ' '))
for i in range(50):
f.add_subfield(chr(97 + (i % 26)), f'Subfield value {i:02d}')
record.add_field(f)
return record
def create_edge_case_empty_subfield() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
record.add_field(create_field('245', ' ', '1', [
('a', 'Title'),
('b', ''), ('c', 'Author')
]))
return record
def create_edge_case_repeating_subfields() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
record.add_field(create_field('650', ' ', '0', [
('a', 'First subject'),
('a', 'Second subject'), ('a', 'Third subject'),
('x', 'Subdivision')
]))
return record
def create_edge_case_whitespace_preservation() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
record.add_field(create_field('245', ' ', '1', [
('a', ' leading and trailing spaces '),
('c', ' Author Name ')
]))
return record
def create_edge_case_multiple_245() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
record.add_field(create_field('245', ' ', '1', [
('a', 'First title'),
('c', 'Author 1')
]))
record.add_field(create_field('245', ' ', '1', [
('a', 'Second title'),
('c', 'Author 2')
]))
return record
def create_edge_case_field_reordering() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
f = Field('001', data='001value')
record.add_field(f)
record.add_field(create_field('650', ' ', '0', [('a', 'Subject')]))
record.add_field(create_field('245', ' ', '1', [('a', 'Title'), ('c', 'Author')]))
record.add_field(create_field('260', ' ', ' ', [('a', 'Place'), ('b', 'Publisher'), ('c', 'Date')]))
return record
def create_edge_case_subfield_reordering() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
record.add_field(create_field('700', '1', ' ', [
('d', '1950-'), ('c', 'Musician'), ('a', 'Name, First'), ('q', '(Full name)') ]))
return record
def create_edge_case_mixed_script() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
mixed_text = 'English مصر English עברית English' record.add_field(create_field('650', ' ', '0', [
('a', mixed_text)
]))
return record
def create_edge_case_control_field_validity() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
f = Field('001', data='123456789012')
record.add_field(f)
record.add_field(create_field('245', ' ', '1', [('a', 'Test Title')]))
return record
def create_edge_case_blank_indicators() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
record.add_field(create_field('245', ' ', ' ', [('a', 'Title with blanks')]))
record.add_field(create_field('650', '1', '0', [('a', 'Subject'), ('x', 'Subdivision')]))
return record
def create_edge_case_control_chars() -> Record:
record = Record()
record.leader = '00000cam a2200000 4500'
control_data = 'Data\twith\ttabs'
record.add_field(create_field('520', ' ', ' ', [
('a', control_data)
]))
return record
def main():
fixture_10k = Path(__file__).parent.parent / 'tests' / 'data' / 'fixtures' / '10k_records.mrc'
output_path = Path(__file__).parent.parent / 'tests' / 'data' / 'fixtures' / 'fidelity_test_100.mrc'
if not fixture_10k.exists():
print(f"Error: {fixture_10k} not found")
sys.exit(1)
output_path.parent.mkdir(parents=True, exist_ok=True)
print(f"Extracting records from {fixture_10k.name}...")
records_to_write = []
with open(fixture_10k, 'rb') as f:
reader = MARCReader(f)
all_records = list(reader)
print(f"Total records in 10k fixture: {len(all_records)}")
records_to_write.extend(all_records[:75])
print(f"Extracted {len(records_to_write)} real records")
edge_cases = [
create_edge_case_cjk(),
create_edge_case_rtl(),
create_edge_case_combining_marks(),
create_edge_case_max_field_length(),
create_edge_case_many_fields(),
create_edge_case_many_subfields(),
create_edge_case_empty_subfield(),
create_edge_case_repeating_subfields(),
create_edge_case_whitespace_preservation(),
create_edge_case_multiple_245(),
create_edge_case_field_reordering(),
create_edge_case_subfield_reordering(),
create_edge_case_mixed_script(),
create_edge_case_control_field_validity(),
create_edge_case_blank_indicators(),
]
print(f"Created {len(edge_cases)} edge case records")
records_to_write.extend(all_records[75:90])
records_to_write.extend(edge_cases)
records_to_write = records_to_write[:105]
print(f"Writing {len(records_to_write)} records to {output_path}...")
with open(output_path, 'wb') as f:
writer = MARCWriter(f)
for record in records_to_write:
writer.write(record)
print(f"✓ Fidelity test set created: {output_path}")
print(f" Total records: {len(records_to_write)}")
print(f" File size: {output_path.stat().st_size:,} bytes")
with open(output_path, 'rb') as f:
reader = MARCReader(f)
verified = list(reader)
print(f" Verified: {len(verified)} records read back")
if len(verified) == len(records_to_write):
print("✓ Validation PASSED: Record count matches")
else:
print(f"✗ Validation FAILED: Expected {len(records_to_write)}, got {len(verified)}")
sys.exit(1)
if __name__ == '__main__':
main()