1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Configuration for testing pattern-based field generation
# This configuration demonstrates the new pattern generator capabilities
[]
= 100 # Generate 100 entities for testing
= 12345 # Fixed seed for consistent results
= "Equal" # Distribute entities equally
= "Balanced" # Balanced cardinality
[]
= "en" # English locale
= "Medium" # Medium quality data
# ============================================================================
# PATTERN-BASED GENERATORS - Demonstrate regex pattern generation
# ============================================================================
# Email with regex pattern (will auto-extract from schema or use this config)
[]
= "pattern"
[]
= "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}"
# Phone number with pattern
[]
= "pattern"
[]
= "\\d{3}-\\d{3}-\\d{4}"
# Student ID with pattern
[]
= "pattern"
[]
= "[A-Z]{2,3}\\d{4,6}"
# Website URL with pattern
[]
= "pattern"
[]
= "https?://[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}"
# Birth date as string with pattern
[]
= "pattern"
[]
= "\\d{4}-\\d{2}-\\d{2}"
# IP address with pattern
[]
= "pattern"
[]
= "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
# Name property uses heuristic generation (no pattern specified)
[]
= "pattern" # Will use heuristic generation based on property name
= {}
# ============================================================================
# ADDITIONAL PATTERN EXAMPLES (commented out)
# ============================================================================
# US phone number with country code
# [field_generators.properties."http://example.org/usPhone"]
# generator = "pattern"
# [field_generators.properties."http://example.org/usPhone".parameters]
# pattern = "\\+1-\\d{3}-\\d{3}-\\d{4}"
# Social Security Number
# [field_generators.properties."http://example.org/ssn"]
# generator = "pattern"
# [field_generators.properties."http://example.org/ssn".parameters]
# pattern = "\\d{3}-\\d{2}-\\d{4}"
# License plate
# [field_generators.properties."http://example.org/licensePlate"]
# generator = "pattern"
# [field_generators.properties."http://example.org/licensePlate".parameters]
# pattern = "[A-Z]{3}\\d{3}"
# Credit card number (simplified)
# [field_generators.properties."http://example.org/creditCard"]
# generator = "pattern"
# [field_generators.properties."http://example.org/creditCard".parameters]
# pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"
# Postal code (US ZIP)
# [field_generators.properties."http://example.org/zipCode"]
# generator = "pattern"
# [field_generators.properties."http://example.org/zipCode".parameters]
# pattern = "\\d{5}(-\\d{4})?"
[]
= "pattern_test_data.ttl" # Output file for pattern testing
= "Turtle" # Turtle format for readability
= false # No compression for testing
= true # Include generation statistics
[]
= 2 # Light threading for testing
= 50 # Small batches
= true # Enable shape parallelism
= true # Enable field parallelism
# ============================================================================
# USAGE INSTRUCTIONS
# ============================================================================
#
# To test pattern generation:
# 1. Use with the pattern_schema.shex file:
# cargo run -p data_generator -- --config pattern_test_config.toml --schema pattern_schema.shex
#
# 2. Or test with any existing schema - patterns from config will be used:
# cargo run -p data_generator -- --config pattern_test_config.toml --schema examples/simple.shex
#
# 3. Check the output file (pattern_test_data.ttl) to verify pattern compliance
#
# Expected output examples:
# - Email: user42@example.com, admin123@test.org
# - Phone: 555-123-4567, 800-555-9999
# - Student ID: AB1234, XYZ123456
# - Website: https://example.com, http://test.org
# - Birth date: 1985-03-15, 2001-12-25
# - IP address: 192.168.1.100, 10.0.0.1