from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
import pytest
import dtcs
PACKAGE_ROOT = Path(__file__).resolve().parent
REPO_ROOT = Path(__file__).resolve().parents[2]
FIXTURES = REPO_ROOT / "tests" / "fixtures"
EXAMPLE = REPO_ROOT / "examples" / "customer_normalize.dtcs.yaml"
MANIFEST = REPO_ROOT / "tests" / "fixture_expectations.json"
def _fixture_dir() -> Path:
return FIXTURES
def _fixture(name: str) -> bytes:
return _fixture_dir().joinpath(name).read_bytes()
def _fixture_format(name: str) -> str:
return "json" if name.endswith(".json") else "yaml"
def _load_manifest() -> list[dict]:
return json.loads(MANIFEST.read_text(encoding="utf-8"))["fixtures"]
def test_spec_version() -> None:
assert dtcs.SPEC_VERSION.endswith("draft")
assert dtcs.__version__
def test_parse_valid_yaml_fixture() -> None:
result = dtcs.parse(_fixture("valid_customer.yaml"), "yaml")
assert dtcs.is_valid(result["report"])
contract = result["contract"]
assert contract is not None
assert contract["id"] == "customer.normalize"
def test_parse_valid_json_fixture() -> None:
result = dtcs.parse(_fixture("valid_minimal.json"), "json")
assert dtcs.is_valid(result["report"])
contract = result["contract"]
assert contract is not None
assert contract["id"] == "json.example"
def test_parse_and_validate_repo_example() -> None:
content = EXAMPLE.read_bytes()
report = dtcs.parse_and_validate(content, "yaml")
assert dtcs.is_valid(report)
def test_parse_file_repo_example() -> None:
result = dtcs.parse_file(str(EXAMPLE))
assert dtcs.is_valid(result["report"])
contract = result["contract"]
assert contract is not None
assert contract["id"] == "customer.normalize"
def test_validate_result_merges_parse_and_validation_diagnostics() -> None:
result = dtcs.parse(_fixture("missing_lineage.yaml"), "yaml")
report = dtcs.validate_result(result)
assert not dtcs.is_valid(report)
ids = {diagnostic["id"] for diagnostic in report["diagnostics"]}
assert "dtcs:missing-lineage" in ids
def test_validate_contract_round_trip() -> None:
result = dtcs.parse(_fixture("valid_customer.yaml"), "yaml")
report = dtcs.validate(result["contract"])
assert dtcs.is_valid(report)
def test_validate_none_contract_raises() -> None:
with pytest.raises(TypeError, match="contract must be a dict"):
dtcs.validate(None)
def test_parse_malformed_yaml_has_no_contract() -> None:
result = dtcs.parse(_fixture("malformed.yaml"), "yaml")
assert result["contract"] is None
assert any(d["id"] == "dtcs:parse-error" for d in result["report"]["diagnostics"])
def test_parse_malformed_json_has_no_contract() -> None:
result = dtcs.parse(_fixture("malformed.json"), "json")
assert result["contract"] is None
assert any(d["id"] == "dtcs:parse-error" for d in result["report"]["diagnostics"])
def test_parse_yml_format_alias() -> None:
result = dtcs.parse(_fixture("valid_customer.yaml"), "yml")
assert dtcs.is_valid(result["report"])
def test_parse_bytearray_content() -> None:
result = dtcs.parse(bytearray(_fixture("valid_customer.yaml")), "yaml")
assert dtcs.is_valid(result["report"])
def test_inspect_summary() -> None:
result = dtcs.parse(_fixture("valid_customer.yaml"), "yaml")
summary = dtcs.inspect(result["contract"])
assert "customer.normalize" in summary
assert "inputs:" in summary
def test_metadata_validate_matches_full_validate() -> None:
result = dtcs.parse(_fixture("invalid_metadata_timestamp.yaml"), "yaml")
contract = result["contract"]
metadata_report = dtcs.metadata_validate(contract)
full_report = dtcs.validate(contract)
assert not dtcs.is_valid(metadata_report)
assert any(d["id"] == "dtcs:invalid-metadata" for d in metadata_report["diagnostics"])
assert any(d["id"] == "dtcs:invalid-metadata" for d in full_report["diagnostics"])
def test_metadata_validate_is_subset_of_metadata_codes() -> None:
result = dtcs.parse(_fixture("missing_lineage.yaml"), "yaml")
contract = result["contract"]
metadata_report = dtcs.metadata_validate(contract)
full_report = dtcs.validate(contract)
metadata_ids = {d["id"] for d in metadata_report["diagnostics"]}
full_ids = {d["id"] for d in full_report["diagnostics"]}
assert metadata_ids.issubset(full_ids)
assert "dtcs:missing-lineage" in full_ids
def test_preserves_extension_fields() -> None:
yaml = b"""
dtcsVersion: "1.0.0"
id: "ext.example"
name: "Extension Example"
version: "0.1.0"
acme:featureFlag: true
inputs:
- id: "in"
schema:
fields:
- name: "value"
type: "string"
nullable: false
outputs:
- id: "out"
schema:
fields:
- name: "value"
type: "string"
nullable: false
lineage:
mappings:
- output: "out"
inputs: ["in"]
"""
result = dtcs.parse(yaml, "yaml")
contract = result["contract"]
assert contract is not None
assert "acme:featureFlag" in contract
assert dtcs.is_valid(dtcs.validate(contract))
def test_diagnostics_are_deterministic() -> None:
content = _fixture("invalid_type.yaml")
first = dtcs.parse_and_validate(content, "yaml")
second = dtcs.parse_and_validate(content, "yaml")
assert first["diagnostics"] == second["diagnostics"]
@pytest.mark.parametrize("entry", _load_manifest(), ids=lambda entry: entry["file"])
def test_fixture_expectations(entry: dict) -> None:
name = entry["file"]
content = _fixture(name)
doc_format = _fixture_format(name)
result = dtcs.parse(content, doc_format)
assert dtcs.is_valid(result["report"]) is entry["parse_valid"]
assert (result["contract"] is not None) is entry["contract"]
if result["contract"] is not None:
report = dtcs.validate_result(result)
else:
report = result["report"]
assert dtcs.is_valid(report) is entry["validate_valid"]
if codes := entry.get("codes"):
ids = {diagnostic["id"] for diagnostic in report["diagnostics"]}
for code in codes:
assert code in ids
def _python_dtcs(*args: str) -> subprocess.CompletedProcess[str]:
return subprocess.run(
[sys.executable, "-m", "dtcs", *args],
capture_output=True,
text=True,
check=False,
)
def test_cli_validate_succeeds_on_example() -> None:
output = _python_dtcs("validate", str(EXAMPLE))
assert output.returncode == 0
assert "valid" in output.stdout
def test_cli_validate_succeeds_on_phase_0_2_fixture() -> None:
path = _fixture_dir() / "valid_metadata.yaml"
output = _python_dtcs("validate", str(path))
assert output.returncode == 0
assert "valid" in output.stdout
def test_cli_validate_fails_on_invalid_contract() -> None:
path = _fixture_dir() / "missing_lineage.yaml"
output = _python_dtcs("validate", str(path))
assert output.returncode != 0
def test_cli_inspect_fails_on_invalid_contract() -> None:
path = _fixture_dir() / "unresolved_reference.yaml"
output = _python_dtcs("inspect", str(path))
assert output.returncode != 0
def test_cli_inspect_succeeds_on_valid_contract() -> None:
path = _fixture_dir() / "valid_customer.yaml"
output = _python_dtcs("inspect", str(path))
assert output.returncode == 0
assert "customer.normalize" in output.stdout
def test_cli_diagnostics_json_output() -> None:
path = _fixture_dir() / "missing_lineage.yaml"
output = _python_dtcs("diagnostics", "--json", str(path))
assert output.returncode != 0
payload = json.loads(output.stdout)
assert payload["diagnostics"]
def test_cli_version_json_output() -> None:
output = _python_dtcs("version", "--json")
assert output.returncode == 0
payload = json.loads(output.stdout)
assert payload["crateVersion"] == dtcs.__version__
assert payload["specVersion"] == dtcs.SPEC_VERSION
def test_unsupported_format_raises() -> None:
with pytest.raises(ValueError, match="unsupported format"):
dtcs.parse(_fixture("valid_customer.yaml"), "xml")
def test_parse_file_missing_path_raises() -> None:
with pytest.raises(ValueError):
dtcs.parse_file("/tmp/does-not-exist-dtcs-fixture.yaml")
def test_cli_missing_file_exits_cleanly() -> None:
output = _python_dtcs("validate", "/tmp/does-not-exist-dtcs-fixture.yaml")
assert output.returncode == 1
assert "traceback" not in output.stderr.lower()
assert output.stderr.strip()
def test_cli_validate_json_output() -> None:
output = _python_dtcs("validate", "--json", str(EXAMPLE))
assert output.returncode == 0
payload = json.loads(output.stdout)
assert payload["valid"] is True
assert isinstance(payload["diagnostics"], list)
def test_cli_inspect_json_output() -> None:
output = _python_dtcs("inspect", "--json", str(EXAMPLE))
assert output.returncode == 0
payload = json.loads(output.stdout)
assert payload["id"] == "customer.normalize"
assert payload["inputs"] >= 1