data_generator 0.1.119

RDF data shapes implementation in Rust
Documentation
# Configuration for automatic file count detection
# This configuration will automatically determine the optimal number of parallel files

[generation]
entity_count = 1000                   # Generate 1000 entities total
seed = 42                             # Use fixed seed for reproducible results
entity_distribution = "Equal"        # Distribute entities equally across shapes
cardinality_strategy = "Balanced"    # Use balanced cardinality strategy

[field_generators.default]
locale = "en"                         # Generate English text
quality = "Medium"                    # Medium quality data

# Configure specific data types
[field_generators.datatypes."http://www.w3.org/2001/XMLSchema#integer"]
generator = "integer"
[field_generators.datatypes."http://www.w3.org/2001/XMLSchema#integer".parameters]
min = 1
max = 10000

[field_generators.datatypes."http://www.w3.org/2001/XMLSchema#decimal"]
generator = "decimal"
[field_generators.datatypes."http://www.w3.org/2001/XMLSchema#decimal".parameters]
min = 0.0
max = 1000.0
precision = 2

[field_generators.datatypes."http://www.w3.org/2001/XMLSchema#date"]
generator = "date"
[field_generators.datatypes."http://www.w3.org/2001/XMLSchema#date".parameters]
start_year = 1980
end_year = 2024

# Configure specific properties
[field_generators.properties."http://example.org/name"]
generator = "string"
parameters = {}

[field_generators.properties."http://example.org/legalName"]
generator = "string"
parameters = {}

[output]
path = "auto_parallel.ttl"           # Output file name
format = "Turtle"                    # Use Turtle format
compress = false                     # Don't compress output
write_stats = true                   # Include generation statistics
parallel_writing = true             # Enable parallel writing
parallel_file_count = 0             # 0 = auto-detect optimal count based on system and dataset

[parallel]
worker_threads = 4                   # Use 4 parallel threads
batch_size = 100                     # Process entities in batches of 100
parallel_shapes = true              # Process shapes in parallel
parallel_fields = true              # Generate fields in parallel