odcs 0.8.0

Reference implementation of the Open Data Contract Standard (ODCS)
Documentation
from __future__ import annotations

import json
import subprocess
import sys
from pathlib import Path

import pytest
import pyodcs

PACKAGE_ROOT = Path(__file__).resolve().parent
FIXTURES = Path(__file__).resolve().parents[2] / "tests" / "fixtures"
EXAMPLE = Path(__file__).resolve().parents[2] / "examples" / "minimal.odcs.yaml"


def _fixture(name: str) -> bytes:
    return FIXTURES.joinpath(name).read_bytes()


def test_upstream_spec_version() -> None:
    assert pyodcs.UPSTREAM_SPEC_VERSION == "3.1.0"
    assert pyodcs.__version__ == "0.8.0"
    assert pyodcs.CODES["INVALID_KIND"] == "odcs:invalid-kind"


def test_validation_phases_constants() -> None:
    assert pyodcs.VALIDATION_PHASES["DOCUMENT"] == "document"
    assert pyodcs.VALIDATION_PHASES["JSON_SCHEMA"] == "jsonSchema"


def test_validation_diagnostics_include_validation_phase() -> None:
    report = pyodcs.parse_and_validate(_fixture("invalid-kind.yaml"), "yaml")
    validation_diagnostics = [
        diagnostic
        for diagnostic in report["diagnostics"]
        if diagnostic.get("stage") == "validation"
    ]
    assert validation_diagnostics
    assert all("validationPhase" in diagnostic for diagnostic in validation_diagnostics)
    assert any(
        diagnostic.get("validationPhase") == pyodcs.VALIDATION_PHASES["DOCUMENT"]
        for diagnostic in validation_diagnostics
    )


def test_parse_diagnostics_omit_validation_phase() -> None:
    result = pyodcs.parse(_fixture("invalid-nested-duplicate-key.yaml"), "yaml")
    for diagnostic in result["report"]["diagnostics"]:
        assert "validationPhase" not in diagnostic


def test_parse_valid_yaml_fixture() -> None:
    result = pyodcs.parse(_fixture("minimal.odcs.yaml"), "yaml")
    assert pyodcs.is_valid(result["report"])
    contract = result["contract"]
    assert contract is not None
    assert contract["name"] == "customer_data_contract"


def test_parse_and_validate_repo_example() -> None:
    content = EXAMPLE.read_bytes()
    report = pyodcs.parse_and_validate(content, "yaml")
    assert pyodcs.is_valid(report)


def test_parse_file_repo_example() -> None:
    result = pyodcs.parse_file(str(EXAMPLE))
    assert pyodcs.is_valid(result["report"])
    contract = result["contract"]
    assert contract is not None
    assert contract["kind"] == "DataContract"


def test_validate_result_merges_parse_and_validation_diagnostics() -> None:
    result = pyodcs.parse(
        b"version: '3.1.0'\napiVersion: v9.9.9\nkind: wrong\nid: ''\nstatus: draft\n",
        "yaml",
    )
    report = pyodcs.validate_result(result)
    assert not pyodcs.is_valid(report)


def test_validate_result_accepts_validation_report_shape() -> None:
    invalid = pyodcs.parse_and_validate(
        b"version: '3.1.0'\napiVersion: v3.1.0\nkind: wrong\nid: x\nstatus: draft\n",
        "yaml",
    )
    wrapped = pyodcs.validate_result(invalid)
    assert wrapped["diagnostics"] == invalid["diagnostics"]
    assert not pyodcs.is_valid(wrapped)


def test_validate_result_is_idempotent() -> None:
    result = pyodcs.parse(_fixture("invalid-kind.yaml"), "yaml")
    first = pyodcs.validate_result(result)
    second = pyodcs.validate_result(result)
    assert first["diagnostics"] == second["diagnostics"]


def test_validate_result_rejects_wrong_shape() -> None:
    import pytest

    with pytest.raises(TypeError):
        pyodcs.validate_result({"unexpected": True})


def test_parse_file_missing_raises_file_not_found() -> None:
    import pytest

    with pytest.raises(FileNotFoundError):
        pyodcs.parse_file(str(FIXTURES / "does-not-exist.yaml"))


def test_quality_rules_count_includes_items() -> None:
    result = pyodcs.parse(_fixture("with-schema-quality-items.yaml"), "yaml")
    contract = result["contract"]
    assert contract is not None
    assert pyodcs.quality_rules_count(contract) == 1


def test_inspect_contract() -> None:
    result = pyodcs.parse(_fixture("minimal.odcs.yaml"), "yaml")
    contract = result["contract"]
    assert contract is not None
    summary = pyodcs.inspect(contract)
    assert "customer_data_contract" in summary
    assert "customer-data-contract" in summary


def test_inspect_summary_matches_rust_fields() -> None:
    result = pyodcs.parse(_fixture("minimal.odcs.yaml"), "yaml")
    contract = result["contract"]
    assert contract is not None
    summary = pyodcs.inspect_summary(contract)
    assert summary["id"] == "customer-data-contract"
    assert summary["apiVersion"] == "v3.1.0"
    assert summary["qualityCount"] == 1
    assert summary["schemaCount"] == 1


def test_quality_rules_count() -> None:
    result = pyodcs.parse(_fixture("minimal.odcs.yaml"), "yaml")
    contract = result["contract"]
    assert contract is not None
    assert pyodcs.quality_rules_count(contract) == 1


def _run_pyodcs_cli(*args: str) -> subprocess.CompletedProcess[str]:
    return subprocess.run(
        [sys.executable, "-m", "pyodcs", *args],
        cwd=Path(__file__).resolve().parents[2],
        capture_output=True,
        text=True,
        check=False,
    )


def test_cli_validate_success() -> None:
    result = _run_pyodcs_cli("validate", str(FIXTURES / "minimal.odcs.yaml"))
    assert result.returncode == 0
    assert "valid" in result.stdout


def test_cli_validate_invalid_contract_exits_1() -> None:
    result = _run_pyodcs_cli("validate", str(FIXTURES / "invalid-kind.yaml"))
    assert result.returncode == 1


def test_cli_validate_text_includes_validation_phase() -> None:
    result = _run_pyodcs_cli(
        "validate", str(FIXTURES / "invalid-structural-duplicate-schema-name.yaml")
    )
    assert result.returncode == 1
    assert "phase: structural" in result.stdout


def test_cli_validate_parse_failure_exits_2() -> None:
    result = _run_pyodcs_cli("validate", str(FIXTURES / "malformed.yaml"))
    assert result.returncode == 2


def test_cli_missing_file_exits_2() -> None:
    result = _run_pyodcs_cli("validate", str(FIXTURES / "does-not-exist.yaml"))
    assert result.returncode == 2


def test_cli_inspect_json_output() -> None:
    result = _run_pyodcs_cli("inspect", "--json", str(FIXTURES / "minimal.odcs.yaml"))
    assert result.returncode == 0
    payload = json.loads(result.stdout)
    assert payload["qualityCount"] == 1
    assert payload["id"] == "customer-data-contract"


def test_cli_schema_command() -> None:
    result = _run_pyodcs_cli("schema")
    assert result.returncode == 0
    payload = json.loads(result.stdout)
    assert "$schema" in payload or "title" in payload


def test_cli_schema_url_only() -> None:
    result = _run_pyodcs_cli("schema", "--url-only")
    assert result.returncode == 0
    assert "Upstream ODCS JSON Schema" in result.stdout


def test_cli_validate_strict_json_schema_violation() -> None:
    result = _run_pyodcs_cli(
        "validate",
        "--strict",
        str(FIXTURES / "invalid-json-schema-only.yaml"),
    )
    assert result.returncode == 1
    assert "odcs:json-schema-violation" in result.stdout


def test_validate_strict_is_deprecated_alias() -> None:
    result = pyodcs.parse(_fixture("minimal.odcs.yaml"), "yaml")
    contract = result["contract"]
    assert contract is not None
    default_report = pyodcs.validate(contract)
    strict_report = pyodcs.validate(contract, strict=True)
    assert default_report == strict_report


def test_invalid_json_schema_fixture_fails_default_validation() -> None:
    result = pyodcs.parse(_fixture("invalid-json-schema-only.yaml"), "yaml")
    contract = result["contract"]
    assert contract is not None
    report = pyodcs.validate(contract)
    assert not pyodcs.is_valid(report)


def test_parse_rejects_nested_yaml_duplicate_key() -> None:
    result = pyodcs.parse(_fixture("invalid-nested-duplicate-key.yaml"), "yaml")
    assert not pyodcs.is_valid(result)
    diagnostics = result["report"]["diagnostics"]
    duplicate = next(
        diagnostic
        for diagnostic in diagnostics
        if diagnostic.get("id") == pyodcs.CODES["DUPLICATE_KEY"]
    )
    assert duplicate.get("object_ref") == "schema[0].name"


def test_parse_rejects_nested_json_duplicate_key() -> None:
    result = pyodcs.parse(_fixture("invalid-nested-duplicate-key.json"), "json")
    assert not pyodcs.is_valid(result)
    duplicate = next(
        diagnostic
        for diagnostic in result["report"]["diagnostics"]
        if diagnostic.get("id") == pyodcs.CODES["DUPLICATE_KEY"]
    )
    assert duplicate.get("object_ref") == "schema[0].name"


def test_is_valid_accepts_parse_result_shape() -> None:
    result = pyodcs.parse(_fixture("invalid-nested-duplicate-key.yaml"), "yaml")
    assert not pyodcs.is_valid(result)


def test_parse_file_unsupported_extension_raises_value_error(tmp_path: Path) -> None:
    path = tmp_path / "contract.txt"
    path.write_text("version: 1", encoding="utf-8")
    with pytest.raises(ValueError, match="unsupported file extension"):
        pyodcs.parse_file(str(path))


def test_cli_validate_duplicate_key_exits_2() -> None:
    result = _run_pyodcs_cli(
        "validate",
        str(FIXTURES / "invalid-nested-duplicate-key.yaml"),
    )
    assert result.returncode == 2


def test_pinned_schema_export() -> None:
    schema = pyodcs.pinned_schema()
    assert isinstance(schema, dict)
    metadata = pyodcs.pinned_schema(json_metadata=True)
    assert metadata["schemaVersion"] == "3.1.0"
    assert "schema" in metadata