wme_models/structured.rs
1//! Structured content types (BETA).
2//!
3//! This module provides types for parsed article content including infoboxes,
4//! sections, and tables. These are part of the Structured Contents BETA API.
5//!
6//! # Structured Article Content
7//!
8//! Unlike the raw HTML/wikitext, structured content provides semantically parsed
9//! data that can be programmatically analyzed:
10//!
11//! - **Infoboxes**: Structured data boxes (e.g., taxobox, person infobox)
12//! - **Sections**: Article sections with paragraphs and subsections
13//! - **Tables**: Data tables with headers and rows
14//!
15//! # BETA Status
16//!
17//! These types are part of the experimental Structured Contents endpoints.
18//! They are not covered by SLA and may change as the API evolves.
19
20use crate::content::Image;
21use crate::reference::Citation;
22use crate::Link;
23use serde::{Deserialize, Serialize};
24
25/// Infobox structured content.
26///
27/// Infoboxes are structured data boxes displayed on article pages.
28/// Common examples include taxoboxes for species, person infoboxes for biographies,
29/// and infoboxes for cities, films, etc.
30///
31/// Infoboxes have a tree-like structure with nested parts for complex data.
32///
33/// # Example
34///
35/// ```ignore
36/// use wme_models::Infobox;
37///
38/// // An infobox might contain:
39/// // - Name: "Automatic taxobox"
40/// // - Type: Infobox
41/// // - Parts: [
42/// // { name: "Kingdom:", type: "field", value: "Animalia" },
43/// // { name: "Phylum:", type: "field", value: "Chordata" }
44/// // ]
45/// ```
46#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
47pub struct Infobox {
48 /// Infobox name (e.g., "Automatic taxobox")
49 pub name: Option<String>,
50 /// Infobox type
51 #[serde(rename = "type")]
52 pub infobox_type: InfoboxType,
53 /// Value (for field types)
54 pub value: Option<String>,
55 /// Nested parts (for complex infoboxes)
56 pub has_parts: Option<Vec<InfoboxPart>>,
57 /// Links within infobox
58 pub links: Option<Vec<Link>>,
59 /// Images within infobox
60 pub images: Option<Vec<Image>>,
61}
62
63/// Infobox types.
64///
65/// The type determines how the infobox part should be rendered and interpreted.
66#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
67#[serde(rename_all = "snake_case")]
68pub enum InfoboxType {
69 /// Infobox container (root infobox)
70 #[serde(rename = "infobox")]
71 Infobox,
72 /// Field within infobox (name-value pair)
73 #[serde(rename = "field")]
74 Field,
75 /// Section within infobox (group of fields)
76 #[serde(rename = "section")]
77 Section,
78 /// Image in infobox
79 #[serde(rename = "image")]
80 Image,
81}
82
83/// Part of an infobox.
84///
85/// Infobox parts can be nested to create hierarchical data structures.
86/// For example, a taxobox might have sections for classification and
87/// characteristics.
88#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
89pub struct InfoboxPart {
90 /// Part name
91 pub name: Option<String>,
92 /// Part type
93 #[serde(rename = "type")]
94 pub part_type: InfoboxPartType,
95 /// Single value (for field types)
96 pub value: Option<String>,
97 /// Multiple values (for list type)
98 pub values: Option<Vec<String>>,
99 /// Nested parts
100 pub has_parts: Option<Vec<InfoboxPart>>,
101 /// Links
102 pub links: Option<Vec<Link>>,
103 /// Images
104 pub images: Option<Vec<Image>>,
105}
106
107/// Infobox part types.
108///
109/// Extended types for parts within infoboxes, including lists.
110#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
111#[serde(rename_all = "snake_case")]
112pub enum InfoboxPartType {
113 /// Infobox container
114 #[serde(rename = "infobox")]
115 Infobox,
116 /// Field (name-value pair)
117 #[serde(rename = "field")]
118 Field,
119 /// Section (grouping of fields)
120 #[serde(rename = "section")]
121 Section,
122 /// Image
123 #[serde(rename = "image")]
124 Image,
125 /// List (multiple values)
126 #[serde(rename = "list")]
127 List,
128}
129
130/// Section with structured content.
131///
132/// Articles are organized into sections. Each section can contain paragraphs,
133/// links, citations, and nested subsections. Sections form a tree structure
134/// with the article root.
135///
136/// # Example
137///
138/// ```ignore
139/// Section {
140/// name: Some("Personal life".to_string()),
141/// section_type: SectionType::Section,
142/// value: None,
143/// has_parts: Some(vec![
144/// Section { name: Some("Relationships".to_string()), ... },
145/// Section { type: Paragraph, value: Some("Baker's first marriage...".to_string()), ... },
146/// ]),
147/// }
148/// ```
149#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
150pub struct Section {
151 /// Section name (from header h2-h6)
152 pub name: Option<String>,
153 /// Section type
154 #[serde(rename = "type")]
155 pub section_type: SectionType,
156 /// Content value (plain text)
157 pub value: Option<String>,
158 /// Links within section
159 pub links: Option<Vec<Link>>,
160 /// Citations within section
161 pub citations: Option<Vec<Citation>>,
162 /// Nested sections
163 pub has_parts: Option<Vec<Section>>,
164 /// Table references (tables live at article root)
165 pub table_references: Option<Vec<TableReference>>,
166}
167
168/// Section types.
169///
170/// Sections can be containers (with subsections) or paragraphs (with text).
171#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
172#[serde(rename_all = "snake_case")]
173pub enum SectionType {
174 /// Section container (has subsections)
175 #[serde(rename = "section")]
176 Section,
177 /// Paragraph (has text content)
178 #[serde(rename = "paragraph")]
179 Paragraph,
180}
181
182/// Table reference (links to table at article root).
183///
184/// Sections don't contain tables directly. Instead, they reference tables
185/// that are stored at the article root level. This allows the same table
186/// to be referenced from multiple locations.
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
188pub struct TableReference {
189 /// Table identifier (unique within article)
190 pub identifier: String,
191 /// Confidence score (parser confidence in extraction)
192 pub confidence_score: f64,
193}
194
195/// Table structured content.
196///
197/// Tables extracted from article content with headers and rows.
198/// Tables are stored at the article root level and referenced from sections.
199///
200/// # Example
201///
202/// ```ignore
203/// Table {
204/// identifier: "demographics_table1".to_string(),
205/// headers: vec![vec![
206/// TableCell { value: "Year".to_string() },
207/// TableCell { value: "Pop.".to_string() },
208/// ]],
209/// rows: vec![
210/// vec![
211/// TableCell { value: "1666".to_string() },
212/// TableCell { value: "625".to_string() },
213/// ],
214/// ],
215/// confidence_score: 0.9,
216/// }
217/// ```
218#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
219pub struct Table {
220 /// Table identifier (unique within article)
221 pub identifier: String,
222 /// Table headers (array of arrays for multi-row headers)
223 pub headers: Vec<Vec<TableCell>>,
224 /// Table rows (array of arrays of cells)
225 pub rows: Vec<Vec<TableCell>>,
226 /// Confidence score (parser confidence)
227 pub confidence_score: f64,
228}
229
230/// Table cell.
231///
232/// A single cell within a table row.
233#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
234pub struct TableCell {
235 /// Cell value (text content)
236 pub value: String,
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242
243 #[test]
244 fn test_infobox_creation() {
245 let infobox = Infobox {
246 name: Some("Automatic taxobox".to_string()),
247 infobox_type: InfoboxType::Infobox,
248 value: None,
249 has_parts: Some(vec![InfoboxPart {
250 name: Some("Kingdom:".to_string()),
251 part_type: InfoboxPartType::Field,
252 value: Some("Animalia".to_string()),
253 values: None,
254 has_parts: None,
255 links: None,
256 images: None,
257 }]),
258 links: None,
259 images: None,
260 };
261
262 assert_eq!(infobox.name, Some("Automatic taxobox".to_string()));
263 assert!(!infobox.has_parts.as_ref().unwrap().is_empty());
264 }
265
266 #[test]
267 fn test_section_creation() {
268 let section = Section {
269 name: Some("Personal life".to_string()),
270 section_type: SectionType::Section,
271 value: None,
272 links: None,
273 citations: None,
274 has_parts: Some(vec![Section {
275 name: Some("Relationships".to_string()),
276 section_type: SectionType::Section,
277 value: Some("Baker's first marriage...".to_string()),
278 links: None,
279 citations: None,
280 has_parts: None,
281 table_references: None,
282 }]),
283 table_references: None,
284 };
285
286 assert_eq!(section.name, Some("Personal life".to_string()));
287 }
288
289 #[test]
290 fn test_table_creation() {
291 let table = Table {
292 identifier: "demographics_table1".to_string(),
293 headers: vec![vec![
294 TableCell {
295 value: "Year".to_string(),
296 },
297 TableCell {
298 value: "Pop.".to_string(),
299 },
300 ]],
301 rows: vec![vec![
302 TableCell {
303 value: "1666".to_string(),
304 },
305 TableCell {
306 value: "625".to_string(),
307 },
308 ]],
309 confidence_score: 0.9,
310 };
311
312 assert_eq!(table.identifier, "demographics_table1");
313 assert_eq!(table.rows.len(), 1);
314 }
315
316 #[test]
317 fn test_table_cell() {
318 let cell = TableCell {
319 value: "Test value".to_string(),
320 };
321
322 assert_eq!(cell.value, "Test value");
323 }
324}