wme_models/structured.rs
1//! Structured content types (BETA).
2//!
3//! This module provides types for parsed article content including infoboxes,
4//! sections, and tables. These are part of the Structured Contents BETA API.
5//!
6//! # Structured Article Content
7//!
8//! Unlike the raw HTML/wikitext, structured content provides semantically parsed
9//! data that can be programmatically analyzed:
10//!
11//! - **Infoboxes**: Structured data boxes (e.g., taxobox, person infobox)
12//! - **Sections**: Article sections with paragraphs and subsections
13//! - **Tables**: Data tables with headers and rows
14//!
15//! # BETA Status
16//!
17//! These types are part of the experimental Structured Contents endpoints.
18//! They are not covered by SLA and may change as the API evolves.
19
20use crate::Link;
21use serde::{Deserialize, Serialize};
22
23/// Infobox structured content.
24///
25/// Infoboxes are structured data boxes displayed on article pages.
26/// Common examples include taxoboxes for species, person infoboxes for biographies,
27/// and infoboxes for cities, films, etc.
28///
29/// Infoboxes have a tree-like structure with nested parts for complex data.
30///
31/// # Example
32///
33/// ```ignore
34/// use wme_models::Infobox;
35///
36/// // An infobox might contain:
37/// // - Name: "Automatic taxobox"
38/// // - Type: Infobox
39/// // - Parts: [
40/// // { name: "Kingdom:", type: "field", value: "Animalia" },
41/// // { name: "Phylum:", type: "field", value: "Chordata" }
42/// // ]
43/// ```
44#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
45pub struct Infobox {
46 /// Infobox name (e.g., "Automatic taxobox")
47 pub name: Option<String>,
48 /// Infobox type
49 #[serde(rename = "type")]
50 pub infobox_type: InfoboxType,
51 /// Value (for field types)
52 pub value: Option<String>,
53 /// Nested parts (for complex infoboxes)
54 pub has_parts: Option<Vec<InfoboxPart>>,
55 /// Links within infobox
56 pub links: Option<Vec<Link>>,
57 /// Images within infobox
58 pub images: Option<Vec<crate::content::Image>>,
59}
60
61/// Infobox types.
62///
63/// The type determines how the infobox part should be rendered and interpreted.
64#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
65#[serde(rename_all = "snake_case")]
66pub enum InfoboxType {
67 /// Infobox container (root infobox)
68 #[serde(rename = "infobox")]
69 Infobox,
70 /// Field within infobox (name-value pair)
71 #[serde(rename = "field")]
72 Field,
73 /// Section within infobox (group of fields)
74 #[serde(rename = "section")]
75 Section,
76 /// Image in infobox
77 #[serde(rename = "image")]
78 Image,
79}
80
81/// Part of an infobox.
82///
83/// Infobox parts can be nested to create hierarchical data structures.
84/// For example, a taxobox might have sections for classification and
85/// characteristics.
86#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
87pub struct InfoboxPart {
88 /// Part name
89 pub name: Option<String>,
90 /// Part type
91 #[serde(rename = "type")]
92 pub part_type: InfoboxPartType,
93 /// Single value (for field types)
94 pub value: Option<String>,
95 /// Multiple values (for list type)
96 pub values: Option<Vec<String>>,
97 /// Nested parts
98 pub has_parts: Option<Vec<InfoboxPart>>,
99 /// Links
100 pub links: Option<Vec<Link>>,
101 /// Images
102 pub images: Option<Vec<crate::content::Image>>,
103}
104
105/// Infobox part types.
106///
107/// Extended types for parts within infoboxes, including lists.
108#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
109#[serde(rename_all = "snake_case")]
110pub enum InfoboxPartType {
111 /// Infobox container
112 #[serde(rename = "infobox")]
113 Infobox,
114 /// Field (name-value pair)
115 #[serde(rename = "field")]
116 Field,
117 /// Section (grouping of fields)
118 #[serde(rename = "section")]
119 Section,
120 /// Image
121 #[serde(rename = "image")]
122 Image,
123 /// List (multiple values)
124 #[serde(rename = "list")]
125 List,
126}
127
128/// Section with structured content.
129///
130/// Articles are organized into sections. Each section can contain paragraphs,
131/// links, citations, and nested subsections. Sections form a tree structure
132/// with the article root.
133///
134/// # Example
135///
136/// ```ignore
137/// Section {
138/// name: Some("Personal life".to_string()),
139/// section_type: SectionType::Section,
140/// value: None,
141/// has_parts: Some(vec![
142/// Section { name: Some("Relationships".to_string()), ... },
143/// Section { type: Paragraph, value: Some("Baker's first marriage...".to_string()), ... },
144/// ]),
145/// }
146/// ```
147#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
148pub struct Section {
149 /// Section name (from header h2-h6)
150 pub name: Option<String>,
151 /// Section type
152 #[serde(rename = "type")]
153 pub section_type: SectionType,
154 /// Content value (plain text)
155 pub value: Option<String>,
156 /// Links within section
157 pub links: Option<Vec<Link>>,
158 /// Citations within section
159 pub citations: Option<Vec<crate::reference::Citation>>,
160 /// Nested sections
161 pub has_parts: Option<Vec<Section>>,
162 /// Table references (tables live at article root)
163 pub table_references: Option<Vec<TableReference>>,
164}
165
166/// Section types.
167///
168/// Sections can be containers (with subsections) or paragraphs (with text).
169#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
170#[serde(rename_all = "snake_case")]
171pub enum SectionType {
172 /// Section container (has subsections)
173 #[serde(rename = "section")]
174 Section,
175 /// Paragraph (has text content)
176 #[serde(rename = "paragraph")]
177 Paragraph,
178}
179
180/// Table reference (links to table at article root).
181///
182/// Sections don't contain tables directly. Instead, they reference tables
183/// that are stored at the article root level. This allows the same table
184/// to be referenced from multiple locations.
185#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
186pub struct TableReference {
187 /// Table identifier (unique within article)
188 pub identifier: String,
189 /// Confidence score (parser confidence in extraction)
190 pub confidence_score: f64,
191}
192
193/// Table structured content.
194///
195/// Tables extracted from article content with headers and rows.
196/// Tables are stored at the article root level and referenced from sections.
197///
198/// # Example
199///
200/// ```ignore
201/// Table {
202/// identifier: "demographics_table1".to_string(),
203/// headers: vec![vec![
204/// TableCell { value: "Year".to_string() },
205/// TableCell { value: "Pop.".to_string() },
206/// ]],
207/// rows: vec![
208/// vec![
209/// TableCell { value: "1666".to_string() },
210/// TableCell { value: "625".to_string() },
211/// ],
212/// ],
213/// confidence_score: 0.9,
214/// }
215/// ```
216#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
217pub struct Table {
218 /// Table identifier (unique within article)
219 pub identifier: String,
220 /// Table headers (array of arrays for multi-row headers)
221 pub headers: Vec<Vec<TableCell>>,
222 /// Table rows (array of arrays of cells)
223 pub rows: Vec<Vec<TableCell>>,
224 /// Confidence score (parser confidence)
225 pub confidence_score: f64,
226}
227
228/// Table cell.
229///
230/// A single cell within a table row.
231#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
232pub struct TableCell {
233 /// Cell value (text content)
234 pub value: String,
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 #[test]
242 fn test_infobox_creation() {
243 let infobox = Infobox {
244 name: Some("Automatic taxobox".to_string()),
245 infobox_type: InfoboxType::Infobox,
246 value: None,
247 has_parts: Some(vec![InfoboxPart {
248 name: Some("Kingdom:".to_string()),
249 part_type: InfoboxPartType::Field,
250 value: Some("Animalia".to_string()),
251 values: None,
252 has_parts: None,
253 links: None,
254 images: None,
255 }]),
256 links: None,
257 images: None,
258 };
259
260 assert_eq!(infobox.name, Some("Automatic taxobox".to_string()));
261 assert!(!infobox.has_parts.as_ref().unwrap().is_empty());
262 }
263
264 #[test]
265 fn test_section_creation() {
266 let section = Section {
267 name: Some("Personal life".to_string()),
268 section_type: SectionType::Section,
269 value: None,
270 links: None,
271 citations: None,
272 has_parts: Some(vec![Section {
273 name: Some("Relationships".to_string()),
274 section_type: SectionType::Section,
275 value: Some("Baker's first marriage...".to_string()),
276 links: None,
277 citations: None,
278 has_parts: None,
279 table_references: None,
280 }]),
281 table_references: None,
282 };
283
284 assert_eq!(section.name, Some("Personal life".to_string()));
285 }
286
287 #[test]
288 fn test_table_creation() {
289 let table = Table {
290 identifier: "demographics_table1".to_string(),
291 headers: vec![vec![
292 TableCell {
293 value: "Year".to_string(),
294 },
295 TableCell {
296 value: "Pop.".to_string(),
297 },
298 ]],
299 rows: vec![vec![
300 TableCell {
301 value: "1666".to_string(),
302 },
303 TableCell {
304 value: "625".to_string(),
305 },
306 ]],
307 confidence_score: 0.9,
308 };
309
310 assert_eq!(table.identifier, "demographics_table1");
311 assert_eq!(table.rows.len(), 1);
312 }
313
314 #[test]
315 fn test_table_cell() {
316 let cell = TableCell {
317 value: "Test value".to_string(),
318 };
319
320 assert_eq!(cell.value, "Test value");
321 }
322}