import random
import os
from pathlib import Path
def generate_random_dna_sequence(length):
bases = ['A', 'T', 'C', 'G']
return ''.join(random.choice(bases) for _ in range(length))
def generate_fasta_file(filename, num_sequences, seq_length_range=(100, 1000)):
with open(filename, 'w') as f:
for i in range(num_sequences):
header = f">sequence_{i+1:04d}_length_{random.randint(*seq_length_range)}\n"
f.write(header)
seq_length = random.randint(*seq_length_range)
sequence = generate_random_dna_sequence(seq_length)
for j in range(0, len(sequence), 80):
f.write(sequence[j:j+80] + '\n')
f.write('\n')
def main():
test_data_dir = Path(__file__).parent
print(f"Generating test data in: {test_data_dir}")
files_to_generate = [
("small_test.fasta", 50, (50, 200)), ("medium_test.fasta", 100, (100, 500)), ("large_test.fasta", 200, (200, 1000)), ("tiny_test.fasta", 10, (20, 100)), ]
for filename, num_seqs, length_range in files_to_generate:
filepath = test_data_dir / filename
print(f"Generating {filename}...")
generate_fasta_file(filepath, num_seqs, length_range)
size_kb = filepath.stat().st_size / 1024
print(f" Created: {filename} ({size_kb:.1f} KB)")
if size_kb > 500:
print(f" Warning: {filename} exceeds 500KB limit!")
readme_content = """# Test FASTA Data
This directory contains randomly generated FASTA files for testing rustkmer.
## Files:
- `tiny_test.fasta`: Very small file (10 sequences, 20-100bp each)
- `small_test.fasta`: Small file (50 sequences, 50-200bp each)
- `medium_test.fasta`: Medium file (100 sequences, 100-500bp each)
- `large_test.fasta`: Large file (200 sequences, 200-1000bp each)
All files contain randomly generated DNA sequences (A, T, C, G only).
## Usage:
```python
from rustkmer import Database
# Load a test database
db = Database("path/to/test_data.large_test.fasta")
```
Generated on: """ + str(Path(__file__).stat().st_mtime) + """
"""
readme_path = test_data_dir / "README.md"
with open(readme_path, 'w') as f:
f.write(readme_content)
print(f"\nTest data generation complete!")
print(f"Total files generated: {len(files_to_generate) + 1}")
if __name__ == "__main__":
main()