ie-schema 0.1.5

A flexible schema specification and parser for information extraction tasks.
Documentation
"""Python tests for IESchema.loads with str, dataclass, and Pydantic types."""

from __future__ import annotations

import re
from dataclasses import dataclass

import pytest

import ie_schema


def _single_structure_task(schema: ie_schema.IESchema) -> ie_schema.JSONStructureTask:
    tasks = list(schema)
    assert len(tasks) == 1
    task = tasks[0]
    assert isinstance(task, ie_schema.JSONStructureTask)
    return task


def test_loads_string_unchanged():
    j = '{"json_structures":[{"name":"Biz","a":{"dtype":"str"}}]}'
    schema = ie_schema.IESchema.loads(j)
    task = _single_structure_task(schema)
    assert task.name == "biz"
    assert [c.property for c in task.children] == ["a"]


def test_loads_json_schema_string():
    """Root JSON Schema (not IE ingest) is accepted as a str after strict ingest rejects it."""
    j = """{
        "$id": "Doc",
        "type": "object",
        "properties": {"title": {"type": "string"}, "score": {"type": "number"}}
    }"""
    schema = ie_schema.IESchema.loads(j)
    task = _single_structure_task(schema)
    assert task.name == "doc"
    assert {c.property for c in task.children} == {"title", "score"}


def test_loads_ie_json_rejects_unknown_top_level_key():
    j = '{"entities": ["gene"], "not_an_ie_field": true}'
    with pytest.raises(ValueError):
        ie_schema.IESchema.loads(j)


@dataclass
class _SampleRecord:
    title: str
    count: int


def test_loads_dataclass_type():
    schema = ie_schema.IESchema.loads(_SampleRecord)
    task = _single_structure_task(schema)
    child_names = {c.property for c in task.children}
    assert child_names == {"title", "count"}


def test_loads_dataclass_instance():
    schema = ie_schema.IESchema.loads(_SampleRecord("x", 1))
    task = _single_structure_task(schema)
    child_names = {c.property for c in task.children}
    assert child_names == {"title", "count"}


def test_loads_pydantic_model():
    pytest.importorskip("pydantic")
    from pydantic import BaseModel

    class M(BaseModel):
        title: str
        score: float

    schema = ie_schema.IESchema.loads(M)
    task = _single_structure_task(schema)
    children = {c.property: c for c in task.children}
    assert set(children) == {"title", "score"}


def test_loads_unsupported_raises():
    with pytest.raises(ValueError, match="unsupported"):
        ie_schema.IESchema.loads(42)

    class Plain:
        pass

    with pytest.raises(ValueError, match=re.compile("unsupported", re.I)):
        ie_schema.IESchema.loads(Plain)


def test_loads_plain_instance_unsupported():
    class Plain:
        pass

    with pytest.raises(ValueError, match=re.compile("unsupported", re.I)):
        ie_schema.IESchema.loads(Plain())