import json
import os
from pathlib import Path
import numpy as np
import pandas as pd
import pytest
from qslib import Experiment
from qslib.data import _filterdata_df_v2, _parse_multicomponent_data_v2
from qslib.experiment import _get_manifest_info
@pytest.fixture(scope="module")
def v2_test_eds_file():
return Path(__file__).parent / "v2_test.eds"
@pytest.fixture(scope="module")
def v2_test_experiment(v2_test_eds_file):
return Experiment.from_file(v2_test_eds_file)
@pytest.fixture(scope="module")
def v2_manifest_data(v2_test_experiment):
return _get_manifest_info(v2_test_experiment.root_dir, checkinfo=False)
@pytest.fixture(scope="module")
def v2_multicomponent_data(v2_test_experiment):
mdp = os.path.join(v2_test_experiment.root_dir, "primary/multicomponent_data.json")
with open(mdp, "r") as f:
return json.load(f)
@pytest.fixture(scope="module")
def v2_filter_data(v2_test_experiment):
fdp = os.path.join(v2_test_experiment.root_dir, "run/filter_data.json")
with open(fdp, "r") as f:
return json.load(f)
@pytest.fixture(scope="module")
def v2_analysis_result(v2_test_experiment):
return v2_test_experiment._analysis_dict_v2
@pytest.fixture(scope="module")
def v2_summary_data(v2_test_experiment):
sdp = os.path.join(v2_test_experiment.root_dir, "summary.json")
with open(sdp, "r") as f:
return json.load(f)
class TestV2ManifestParsing:
def test_manifest_info_parsing(self, v2_manifest_data):
assert v2_manifest_data["Specification-Title"] == "Experiment Document Specification"
assert v2_manifest_data["Specification-Version"] == "2.0.0"
assert v2_manifest_data["Implementation-Title"] == "qPCR File API"
assert v2_manifest_data["Implementation-Version"] == "2.5.6"
def test_manifest_version_detection(self, v2_manifest_data):
assert v2_manifest_data["Specification-Version"].startswith("2")
def test_summary_json_loading(self, v2_summary_data):
summary = v2_summary_data
assert summary["name"] == "Test_Experiment_001"
assert summary["blockType"] == "BLOCK_384W"
assert summary["instrumentType"] == "QS6PRO"
assert summary["runStatus"] == "COMPLETED"
assert summary["regulatoryLabel"] == "RUO"
assert summary["analysis"]["primary"]["status"] == "COMPLETED"
assert summary["analysis"]["primary"]["id"] == "algo.primary"
class TestV2MulticomponentData:
def test_multicomponent_data_structure(self, v2_multicomponent_data):
assert "collectionPoints" in v2_multicomponent_data
assert "wellData" in v2_multicomponent_data
assert len(v2_multicomponent_data["collectionPoints"]) == 5 assert len(v2_multicomponent_data["wellData"]) == 8
def test_collection_points_format(self, v2_multicomponent_data):
cp = v2_multicomponent_data["collectionPoints"][0]
assert "cycle" in cp
assert "point" in cp
assert "stage" in cp
assert "step" in cp
assert isinstance(cp["cycle"], int)
assert isinstance(cp["point"], int)
assert isinstance(cp["stage"], int)
assert isinstance(cp["step"], int)
def test_well_data_format(self, v2_multicomponent_data):
well = v2_multicomponent_data["wellData"][0]
assert "wellIndex" in well
assert "dyeData" in well
assert "temperatures" in well
assert isinstance(well["wellIndex"], int)
assert isinstance(well["dyeData"], list)
assert isinstance(well["temperatures"], list)
dye = well["dyeData"][0]
assert "dyeName" in dye
assert "fluorescences" in dye
assert isinstance(dye["fluorescences"], list)
def test_multicomponent_data_parsing_function(self, v2_multicomponent_data):
df = _parse_multicomponent_data_v2(v2_multicomponent_data, 384)
assert isinstance(df, pd.DataFrame)
assert df.index.names == ["well", "collection_cycle"]
expected_cols = ["cycle", "point", "stage", "step", "temperature"]
for col in expected_cols:
assert col in df.columns
dye_cols = [col for col in df.columns if col not in ["cycle", "point", "stage", "step", "temperature"]]
assert len(dye_cols) > 0
assert df["temperature"].dtype in [np.float64, np.float32]
for dye_col in dye_cols:
assert df[dye_col].dtype in [np.float64, np.float32]
class TestV2FilterData:
def test_filter_data_structure(self, v2_filter_data):
assert isinstance(v2_filter_data, list)
assert len(v2_filter_data) == 3
item = v2_filter_data[0]
assert "collectionPoint" in item
assert "filterData" in item
assert "zoneTemperatures" in item
def test_collection_point_format(self, v2_filter_data):
cp = v2_filter_data[0]["collectionPoint"]
assert "cycle" in cp
assert "point" in cp
assert "stage" in cp
assert "step" in cp
def test_filter_data_format(self, v2_filter_data):
filter_item = v2_filter_data[0]["filterData"][0]
assert "exposure" in filter_item
assert "filterSet" in filter_item
assert "wellFluorescences" in filter_item
assert isinstance(filter_item["wellFluorescences"], list)
assert len(filter_item["wellFluorescences"]) == 384
def test_filterdata_df_v2_function(self, v2_filter_data, v2_test_experiment):
df = _filterdata_df_v2(v2_filter_data, 384, quant_files_path=(Path(v2_test_experiment.root_dir) / "run/quant"))
assert isinstance(df, pd.DataFrame)
assert df.index.names == ["filter_set", "stage", "cycle", "step", "point"]
fl_cols = [col for col in df.columns if col[1] == "fl"]
assert len(fl_cols) == 384
rt_cols = [col for col in df.columns if col[1] == "rt"]
assert len(rt_cols) == 384
assert ("exposure", "exposure") in df.columns
assert ("time", "timestamp") in df.columns
def test_filterdata_without_quant_files(self, v2_filter_data):
df = _filterdata_df_v2(v2_filter_data, 384, quant_files_path=None)
assert isinstance(df, pd.DataFrame)
time_cols = [col for col in df.columns if col[0] == "time"]
assert len(time_cols) == 0
class TestV2AnalysisResults:
def test_analysis_result_structure(self, v2_analysis_result):
assert "replicateGroupResults" in v2_analysis_result
assert isinstance(v2_analysis_result["replicateGroupResults"], list)
assert len(v2_analysis_result["replicateGroupResults"]) == 10
def test_replicate_group_format(self, v2_analysis_result):
group = v2_analysis_result["replicateGroupResults"][0]
expected_fields = [
"cqMean",
"cqSD",
"cqSE",
"flags",
"numberOfReplicates",
"quantity",
"resultQCIssues",
"sampleName",
"targetName",
]
for field in expected_fields:
assert field in group
assert isinstance(group["cqMean"], (int, float))
assert isinstance(group["cqSD"], (int, float))
assert isinstance(group["cqSE"], (int, float))
assert isinstance(group["numberOfReplicates"], int)
assert isinstance(group["quantity"], (int, float))
assert isinstance(group["sampleName"], str)
assert isinstance(group["targetName"], str)
assert isinstance(group["flags"], list)
assert isinstance(group["resultQCIssues"], list)
def test_anonymized_sample_names(self, v2_analysis_result):
sample_names = [group["sampleName"] for group in v2_analysis_result["replicateGroupResults"]]
for name in sample_names:
assert name.startswith("Sample_")
target_names = [group["targetName"] for group in v2_analysis_result["replicateGroupResults"]]
for name in target_names:
assert name.startswith("Target_")
class TestV2ExperimentLoading:
def test_experiment_from_eds_file(self, v2_test_experiment):
exp = v2_test_experiment
assert exp.spec_major_version == 2
assert exp.name == "Test_Experiment_001"
analysis_dict = exp._analysis_dict_v2
assert isinstance(analysis_dict, dict)
assert "replicateGroupResults" in analysis_dict
try:
mc_data = exp.multicomponent_data
assert isinstance(mc_data, pd.DataFrame)
except ValueError:
pass
def test_v2_experiment_properties(self, v2_test_experiment):
exp = v2_test_experiment
exp.plate_type = 384
mc_data = exp.multicomponent_data
assert isinstance(mc_data, pd.DataFrame)
assert not mc_data.empty
analysis_dict = exp._analysis_dict_v2
assert len(analysis_dict["replicateGroupResults"]) == 10
class TestV2ErrorHandling:
def test_missing_manifest_file(self, tmp_path):
with pytest.raises(ValueError, match="No EDS manifest file found"):
_get_manifest_info(tmp_path)
def test_invalid_specification_version(self, tmp_path):
manifest_path = tmp_path / "Manifest.mf"
manifest_path.write_text(
"Manifest-Version: 1.0\n"
"Specification-Title: Experiment Document Specification\n"
"Specification-Version: 3.0.0\n"
)
with pytest.raises(ValueError, match="QSLib does not support EDS files of specification version"):
_get_manifest_info(tmp_path)
def test_multicomponent_data_unsupported_plate_type(self, v2_multicomponent_data):
with pytest.raises(ValueError, match="Unsupported number of wells"):
_parse_multicomponent_data_v2(v2_multicomponent_data, 48)
def test_filterdata_unsupported_plate_type(self, v2_filter_data):
with pytest.raises(ValueError):
_filterdata_df_v2(v2_filter_data, 48)
def test_missing_quant_files(self, v2_filter_data, tmp_path):
with pytest.raises(FileNotFoundError):
_filterdata_df_v2(v2_filter_data, 384, quant_files_path=tmp_path / "nonexistent")
class TestV2DataValidation:
def test_multicomponent_data_consistency(self, v2_multicomponent_data):
collection_points = v2_multicomponent_data["collectionPoints"]
well_data = v2_multicomponent_data["wellData"]
for well in well_data:
assert len(well["temperatures"]) == len(collection_points)
for dye in well["dyeData"]:
assert len(dye["fluorescences"]) == len(collection_points)
def test_filter_data_consistency(self, v2_filter_data):
for item in v2_filter_data:
well_counts = [len(fd["wellFluorescences"]) for fd in item["filterData"]]
assert len(set(well_counts)) == 1
def test_data_types_and_ranges(self, v2_multicomponent_data, v2_filter_data, v2_analysis_result):
for well in v2_multicomponent_data["wellData"]:
for temp in well["temperatures"]:
assert isinstance(temp, (int, float))
assert 0 <= temp <= 100
for dye in well["dyeData"]:
for fl in dye["fluorescences"]:
assert isinstance(fl, (int, float))
assert -10000 <= fl <= 1000000
for item in v2_filter_data:
for fd in item["filterData"]:
assert isinstance(fd["exposure"], int)
assert fd["exposure"] > 0
for fl in fd["wellFluorescences"]:
assert isinstance(fl, (int, float))
assert -10000 <= fl <= 1000000
for group in v2_analysis_result["replicateGroupResults"]:
assert group["numberOfReplicates"] > 0
assert group["cqSD"] >= 0 assert group["cqSE"] >= 0