1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# This file is automatically generated by pyo3_stub_gen
# ruff: noqa: E501, F401, F403, F405
=
r"""
Classification task definition with labels and threshold metadata.
"""
r"""
Classification task name.
"""
r"""
Ordered list of class labels.
"""
r"""
Optional confidence threshold for the classification.
"""
r"""
Whether multiple labels may be assigned.
"""
r"""
Entity extraction task definition.
"""
r"""
Entity labels that should be extracted.
"""
r"""
Information-extraction schema loaded from JSON (or from a dataclass / Pydantic model type).
Build an `IESchema` from a JSON string with `loads()` (IE ingest JSON or a root JSON Schema
object), from a path with `load()`, or from a stdlib dataclass / Pydantic v2 `BaseModel` by
passing the class (or an instance) to `loads()`. Iterating over the object yields task instances
in schema order.
Example:
>>> import ie_schema
>>> _j = '{"json_structures":[{"name":"Business","business_name":{"dtype":"str"}}]}'
>>> schema = ie_schema.IESchema.loads(_j)
>>> isinstance(schema, ie_schema.IESchema)
True
>>> len(list(schema))
1
"""
r"""
Parse an `IESchema` from a JSON string or from a dataclass / Pydantic v2 `BaseModel` type.
String input must be either IE ingest JSON (top-level keys such as `json_structures`,
`entities`, …) or a root JSON Schema object (`type`, `properties`, …). Unknown top-level
keys are rejected for the IE shape so JSON Schema is not misread as an empty ingest.
For a dataclass or `BaseModel` type (or instance), Pydantic v2 builds JSON Schema
(`TypeAdapter` for dataclasses, `model_json_schema()` for `BaseModel` subclasses), which is
then parsed like JSON Schema string input.
Example:
>>> import ie_schema
>>> schema = ie_schema.IESchema.loads('{"json_structures":[{"name":"Business","business_name":{"dtype":"str"}}]}')
>>> len(list(schema))
1
"""
r"""
Parse an `IESchema` from a JSON file path.
"""
r"""
Return an iterator over planned extraction tasks.
"""
r"""
Return the next planned task, or `None` at the end.
"""
r"""
Render the generated extraction prompt as a debug string.
Example:
>>> import ie_schema
>>> schema = ie_schema.IESchema.loads('{"json_structures":[{"name":"Business","business_name":{"dtype":"str"}}]}')
>>> s = schema.prompt()
>>> ("[P]" in s) and ("business_name" in s)
True
"""
r"""
Structured JSON extraction task with named children.
"""
r"""
Structure name.
"""
r"""
Child fields that belong to this structure.
"""
r"""
Relation extraction task between head and tail entity types.
"""
r"""
Relation name.
"""
r"""
Head entity type.
"""
r"""
Tail entity type.
"""
r"""
Optional human-readable relation description.
"""
r"""
Child field in a `JSONStructureTask`.
"""
r"""
Property name for this child field.
"""
r"""
Allowed string choices for this property.
"""
r"""
Optional child-field description.
"""
r"""
Base class for all extraction tasks yielded by `IESchema`.
"""
...