import json
import os
from pathlib import Path
from typing import Dict, List, Optional
SKIP_TESTS = {
'unicode-version-diagnostic',
'version-decl',
'version-decl.2',
'ws-and-delim',
}
CATEGORIES = {
'Basic Examples': [
'aaa', 'test', 'empty-group', 'lf', 'tab',
],
'Arithmetic & Math': [
'arith', 'expr', 'expr1', 'expr2', 'expr3', 'expr4', 'expr5', 'expr6',
'poly', 'hash',
],
'Data Formats': [
'json', 'json1', 'xml', 'xml1', 'vcard', 'diary', 'diary2', 'diary3',
],
'Email & Addresses': [
'email', 'address',
],
'Text Processing': [
'string', 'marked', 'para-test', 'nested-comment', 'range-comments',
'element-content', 'attribute-value',
],
'Character Classes': [
'range', 'ranges', 'ranges1', 'hex', 'hex1', 'hex3',
'unicode-classes', 'unicode-range', 'unicode-range1', 'unicode-range2',
],
'Programming Languages': [
'program', 'xpath',
],
}
def get_category(test_name: str) -> str:
for category, tests in CATEGORIES.items():
if test_name in tests:
return category
return 'Other'
def read_file_safe(path: Path, strip: bool = True) -> Optional[str]:
try:
text = path.read_text(encoding='utf-8')
return text.strip() if strip else text
except (FileNotFoundError, UnicodeDecodeError):
return None
def get_description(test_name: str) -> str:
descriptions = {
'arith': 'Parenthesized arithmetic expression with operators',
'email': 'Email address validation with complex character classes',
'address': 'Postal address parsing',
'json': 'Simple JSON parser',
'json1': 'JSON with nested objects',
'xml': 'Basic XML parser',
'xml1': 'XML with attributes',
'expr': 'Expression with precedence',
'diary': 'Diary entry with date parsing',
'vcard': 'vCard contact format',
'program': 'Simple programming language',
'xpath': 'XPath expression parsing',
'hex': 'Hexadecimal number parsing',
'string': 'String literal parsing',
'range': 'Character range matching',
'unicode-classes': 'Unicode character class support',
}
return descriptions.get(test_name, f'{test_name.replace("-", " ").title()}')
def extract_test_cases(tests_dir: Path) -> Dict[str, List[Dict]]:
categories: Dict[str, List[Dict]] = {}
for ixml_file in sorted(tests_dir.glob('*.ixml')):
test_name = ixml_file.stem
if test_name in SKIP_TESTS:
print(f"⏭️ Skipping {test_name} (not yet supported)")
continue
grammar = read_file_safe(ixml_file, strip=True)
if not grammar:
print(f"⚠️ Skipping {test_name} (no grammar)")
continue
inp_file = ixml_file.with_suffix('.inp')
input_text = read_file_safe(inp_file, strip=False)
if not input_text:
print(f"⚠️ Skipping {test_name} (no input)")
continue
category = get_category(test_name)
test_case = {
'name': test_name.replace('-', ' ').title(),
'id': test_name,
'grammar': grammar,
'input': input_text,
'description': get_description(test_name),
}
if category not in categories:
categories[category] = []
categories[category].append(test_case)
print(f"✅ Added {test_name} to '{category}'")
return categories
def main():
repo_root = Path(__file__).parent.parent
tests_dir = repo_root / 'ixml_tests' / 'correct'
output_file = repo_root / 'docs' / 'test-cases.json'
print(f"📁 Reading tests from: {tests_dir}")
print(f"📝 Output file: {output_file}")
print()
categories = extract_test_cases(tests_dir)
category_order = ['Basic Examples', 'Arithmetic & Math', 'Data Formats',
'Email & Addresses', 'Text Processing', 'Character Classes',
'Programming Languages', 'Other']
sorted_categories = {
cat: categories[cat]
for cat in category_order
if cat in categories
}
output = {
'version': '0.2.0',
'description': 'Passing test cases from the iXML test suite',
'categories': sorted_categories,
'stats': {
'total_tests': sum(len(tests) for tests in categories.values()),
'categories': len(categories),
}
}
output_file.parent.mkdir(exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(output, f, indent=2, ensure_ascii=False)
print()
print(f"✨ Generated {output['stats']['total_tests']} test cases in {output['stats']['categories']} categories")
print(f"📦 Saved to: {output_file}")
print("\n📊 Summary by category:")
for category, tests in sorted_categories.items():
print(f" {category}: {len(tests)} tests")
if __name__ == '__main__':
main()