import subprocess
import os
import sys
import pandas as pd
def run_command(cmd, description):
print(f"\n๐งช Testing: {description}")
print(f"Command: {cmd}")
try:
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
print(f"โ FAILED: {description}")
print(f"STDOUT: {result.stdout}")
print(f"STDERR: {result.stderr}")
return False
else:
print(f"โ
PASSED: {description}")
return True
except subprocess.TimeoutExpired:
print(f"โฐ TIMEOUT: {description}")
return False
except Exception as e:
print(f"โ ERROR: {description} - {e}")
return False
def test_cache_only_mode():
print("\n" + "="*70)
print("TESTING CACHE-ONLY MODE")
print("="*70)
cache_files = [
"results/test_cache_only_new.lz4",
"results/test_from_cache_new.tsv"
]
for f in cache_files:
if os.path.exists(f):
os.remove(f)
tests_passed = 0
total_tests = 3
cmd = ("../target/release/cgdist --schema schema_crc32 "
"--profiles profiles/test_profiles_crc32.tsv "
"--mode snps-indel-bases --hasher-type crc32 "
"--cache-file results/test_cache_only_new.lz4 --cache-only "
"--cache-note 'Testing cache-only mode'")
if run_command(cmd, "Cache-only mode execution"):
if os.path.exists("results/test_cache_only_new.lz4"):
print("โ
Cache file created successfully")
tests_passed += 1
else:
print("โ Cache file not found")
cmd = ("../target/release/cgdist --schema schema_crc32 "
"--profiles profiles/test_profiles_crc32.tsv "
"--output results/test_from_cache_new.tsv "
"--mode snps-indel-bases --hasher-type crc32 "
"--cache-file results/test_cache_only_new.lz4")
if run_command(cmd, "Using cached data"):
if os.path.exists("results/test_from_cache_new.tsv"):
print("โ
Matrix created from cache successfully")
tests_passed += 1
else:
print("โ Output matrix not found")
cmd = ("../target/release/cgdist --schema schema_crc32 "
"--profiles profiles/test_profiles_crc32.tsv "
"--mode snps-indel-bases --hasher-type crc32 --cache-only")
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode != 0 and "requires --cache-file" in result.stderr:
print("โ
Cache-only validation works correctly")
tests_passed += 1
else:
print("โ Cache-only validation failed")
return tests_passed, total_tests
def test_recombination_detection():
print("\n" + "="*70)
print("TESTING RECOMBINATION DETECTION")
print("="*70)
tests_passed = 0
total_tests = 4
log_files = [
"results/recomb_low_threshold.csv",
"results/recomb_high_threshold.csv",
"results/test_recomb_matrix.tsv"
]
for f in log_files:
if os.path.exists(f):
os.remove(f)
cmd = ("../target/release/cgdist --schema schema_crc32 "
"--profiles profiles/test_profiles_crc32.tsv "
"--output results/test_recomb_matrix.tsv "
"--mode snps-indel-bases --hasher-type crc32 "
"--recombination-log results/recomb_low_threshold.csv "
"--recombination-threshold 3")
if run_command(cmd, "Recombination detection (low threshold)"):
if os.path.exists("results/recomb_low_threshold.csv"):
try:
df = pd.read_csv("results/recomb_low_threshold.csv")
if len(df) > 0:
print(f"โ
Detected {len(df)} recombination events (threshold=3)")
tests_passed += 1
else:
print("โ No events detected with low threshold")
except Exception as e:
print(f"โ Error reading recombination log: {e}")
else:
print("โ Recombination log not created")
cmd = ("../target/release/cgdist --schema schema_crc32 "
"--profiles profiles/test_profiles_crc32.tsv "
"--output results/test_recomb_matrix.tsv "
"--mode snps-indel-bases --hasher-type crc32 "
"--recombination-log results/recomb_high_threshold.csv "
"--recombination-threshold 50")
if run_command(cmd, "Recombination detection (high threshold)"):
if os.path.exists("results/recomb_high_threshold.csv"):
try:
df = pd.read_csv("results/recomb_high_threshold.csv")
print(f"โ
Detected {len(df)} recombination events (threshold=50)")
tests_passed += 1
except Exception as e:
print(f"โ Error reading recombination log: {e}")
else:
print("โ Recombination log not created")
if os.path.exists("results/recomb_low_threshold.csv"):
try:
df = pd.read_csv("results/recomb_low_threshold.csv")
expected_columns = [
'locus', 'sample1', 'sample2', 'allele1_hash', 'allele2_hash',
'snps_indel_bases', 'threshold', 'seq_length1', 'seq_length2',
'divergence_percent'
]
if all(col in df.columns for col in expected_columns):
print("โ
CSV format is correct")
tests_passed += 1
else:
print("โ CSV format is incorrect")
print(f"Expected: {expected_columns}")
print(f"Found: {list(df.columns)}")
except Exception as e:
print(f"โ Error validating CSV format: {e}")
if os.path.exists("results/recomb_low_threshold.csv"):
try:
df = pd.read_csv("results/recomb_low_threshold.csv")
if len(df) > 0:
divergence_values = df['divergence_percent']
if all(0 <= val <= 100 for val in divergence_values):
print("โ
Divergence percentages are valid")
tests_passed += 1
else:
print("โ Invalid divergence percentages found")
else:
print("โ ๏ธ No data to validate divergence percentages")
tests_passed += 1 except Exception as e:
print(f"โ Error validating divergence percentages: {e}")
return tests_passed, total_tests
def main():
print("๐งช cgDist New Features Validation Test")
print("=" * 70)
total_passed = 0
total_tests = 0
cache_passed, cache_total = test_cache_only_mode()
total_passed += cache_passed
total_tests += cache_total
recomb_passed, recomb_total = test_recombination_detection()
total_passed += recomb_passed
total_tests += recomb_total
print("\n" + "="*70)
print("TEST SUMMARY")
print("="*70)
print(f"Cache-only mode: {cache_passed}/{cache_total} tests passed")
print(f"Recombination detection: {recomb_passed}/{recomb_total} tests passed")
print(f"\nOverall: {total_passed}/{total_tests} tests passed")
if total_passed == total_tests:
print("๐ ALL NEW FEATURE TESTS PASSED!")
return True
else:
print(f"โ ๏ธ {total_tests - total_passed} tests failed")
return False
if __name__ == '__main__':
success = main()
sys.exit(0 if success else 1)