Skip to main content

wme_models/
structured.rs

1//! Structured content types (BETA).
2//!
3//! This module provides types for parsed article content including infoboxes,
4//! sections, and tables. These are part of the Structured Contents BETA API.
5//!
6//! # Structured Article Content
7//!
8//! Unlike the raw HTML/wikitext, structured content provides semantically parsed
9//! data that can be programmatically analyzed:
10//!
11//! - **Infoboxes**: Structured data boxes (e.g., taxobox, person infobox)
12//! - **Sections**: Article sections with paragraphs and subsections
13//! - **Tables**: Data tables with headers and rows
14//!
15//! # BETA Status
16//!
17//! These types are part of the experimental Structured Contents endpoints.
18//! They are not covered by SLA and may change as the API evolves.
19
20use crate::Link;
21use serde::{Deserialize, Serialize};
22
23/// Infobox structured content.
24///
25/// Infoboxes are structured data boxes displayed on article pages.
26/// Common examples include taxoboxes for species, person infoboxes for biographies,
27/// and infoboxes for cities, films, etc.
28///
29/// Infoboxes have a tree-like structure with nested parts for complex data.
30///
31/// # Example
32///
33/// ```ignore
34/// use wme_models::Infobox;
35///
36/// // An infobox might contain:
37/// // - Name: "Automatic taxobox"
38/// // - Type: Infobox
39/// // - Parts: [
40/// //     { name: "Kingdom:", type: "field", value: "Animalia" },
41/// //     { name: "Phylum:", type: "field", value: "Chordata" }
42/// //   ]
43/// ```
44#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
45pub struct Infobox {
46    /// Infobox name (e.g., "Automatic taxobox")
47    pub name: Option<String>,
48    /// Infobox type
49    #[serde(rename = "type")]
50    pub infobox_type: InfoboxType,
51    /// Value (for field types)
52    pub value: Option<String>,
53    /// Nested parts (for complex infoboxes)
54    pub has_parts: Option<Vec<InfoboxPart>>,
55    /// Links within infobox
56    pub links: Option<Vec<Link>>,
57    /// Images within infobox
58    pub images: Option<Vec<crate::content::Image>>,
59}
60
61/// Infobox types.
62///
63/// The type determines how the infobox part should be rendered and interpreted.
64#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
65#[serde(rename_all = "snake_case")]
66pub enum InfoboxType {
67    /// Infobox container (root infobox)
68    #[serde(rename = "infobox")]
69    Infobox,
70    /// Field within infobox (name-value pair)
71    #[serde(rename = "field")]
72    Field,
73    /// Section within infobox (group of fields)
74    #[serde(rename = "section")]
75    Section,
76    /// Image in infobox
77    #[serde(rename = "image")]
78    Image,
79}
80
81/// Part of an infobox.
82///
83/// Infobox parts can be nested to create hierarchical data structures.
84/// For example, a taxobox might have sections for classification and
85/// characteristics.
86#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
87pub struct InfoboxPart {
88    /// Part name
89    pub name: Option<String>,
90    /// Part type
91    #[serde(rename = "type")]
92    pub part_type: InfoboxPartType,
93    /// Single value (for field types)
94    pub value: Option<String>,
95    /// Multiple values (for list type)
96    pub values: Option<Vec<String>>,
97    /// Nested parts
98    pub has_parts: Option<Vec<InfoboxPart>>,
99    /// Links
100    pub links: Option<Vec<Link>>,
101    /// Images
102    pub images: Option<Vec<crate::content::Image>>,
103}
104
105/// Infobox part types.
106///
107/// Extended types for parts within infoboxes, including lists.
108#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
109#[serde(rename_all = "snake_case")]
110pub enum InfoboxPartType {
111    /// Infobox container
112    #[serde(rename = "infobox")]
113    Infobox,
114    /// Field (name-value pair)
115    #[serde(rename = "field")]
116    Field,
117    /// Section (grouping of fields)
118    #[serde(rename = "section")]
119    Section,
120    /// Image
121    #[serde(rename = "image")]
122    Image,
123    /// List (multiple values)
124    #[serde(rename = "list")]
125    List,
126}
127
128/// Section with structured content.
129///
130/// Articles are organized into sections. Each section can contain paragraphs,
131/// links, citations, and nested subsections. Sections form a tree structure
132/// with the article root.
133///
134/// # Example
135///
136/// ```ignore
137/// Section {
138///     name: Some("Personal life".to_string()),
139///     section_type: SectionType::Section,
140///     value: None,
141///     has_parts: Some(vec![
142///         Section { name: Some("Relationships".to_string()), ... },
143///         Section { type: Paragraph, value: Some("Baker's first marriage...".to_string()), ... },
144///     ]),
145/// }
146/// ```
147#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
148pub struct Section {
149    /// Section name (from header h2-h6)
150    pub name: Option<String>,
151    /// Section type
152    #[serde(rename = "type")]
153    pub section_type: SectionType,
154    /// Content value (plain text)
155    pub value: Option<String>,
156    /// Links within section
157    pub links: Option<Vec<Link>>,
158    /// Citations within section
159    pub citations: Option<Vec<crate::reference::Citation>>,
160    /// Nested sections
161    pub has_parts: Option<Vec<Section>>,
162    /// Table references (tables live at article root)
163    pub table_references: Option<Vec<TableReference>>,
164}
165
166/// Section types.
167///
168/// Sections can be containers (with subsections) or paragraphs (with text).
169#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
170#[serde(rename_all = "snake_case")]
171pub enum SectionType {
172    /// Section container (has subsections)
173    #[serde(rename = "section")]
174    Section,
175    /// Paragraph (has text content)
176    #[serde(rename = "paragraph")]
177    Paragraph,
178}
179
180/// Table reference (links to table at article root).
181///
182/// Sections don't contain tables directly. Instead, they reference tables
183/// that are stored at the article root level. This allows the same table
184/// to be referenced from multiple locations.
185#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
186pub struct TableReference {
187    /// Table identifier (unique within article)
188    pub identifier: String,
189    /// Confidence score (parser confidence in extraction)
190    pub confidence_score: f64,
191}
192
193/// Table structured content.
194///
195/// Tables extracted from article content with headers and rows.
196/// Tables are stored at the article root level and referenced from sections.
197///
198/// # Example
199///
200/// ```ignore
201/// Table {
202///     identifier: "demographics_table1".to_string(),
203///     headers: vec![vec![
204///         TableCell { value: "Year".to_string() },
205///         TableCell { value: "Pop.".to_string() },
206///     ]],
207///     rows: vec![
208///         vec![
209///             TableCell { value: "1666".to_string() },
210///             TableCell { value: "625".to_string() },
211///         ],
212///     ],
213///     confidence_score: 0.9,
214/// }
215/// ```
216#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
217pub struct Table {
218    /// Table identifier (unique within article)
219    pub identifier: String,
220    /// Table headers (array of arrays for multi-row headers)
221    pub headers: Vec<Vec<TableCell>>,
222    /// Table rows (array of arrays of cells)
223    pub rows: Vec<Vec<TableCell>>,
224    /// Confidence score (parser confidence)
225    pub confidence_score: f64,
226}
227
228/// Table cell.
229///
230/// A single cell within a table row.
231#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
232pub struct TableCell {
233    /// Cell value (text content)
234    pub value: String,
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_infobox_creation() {
243        let infobox = Infobox {
244            name: Some("Automatic taxobox".to_string()),
245            infobox_type: InfoboxType::Infobox,
246            value: None,
247            has_parts: Some(vec![InfoboxPart {
248                name: Some("Kingdom:".to_string()),
249                part_type: InfoboxPartType::Field,
250                value: Some("Animalia".to_string()),
251                values: None,
252                has_parts: None,
253                links: None,
254                images: None,
255            }]),
256            links: None,
257            images: None,
258        };
259
260        assert_eq!(infobox.name, Some("Automatic taxobox".to_string()));
261        assert!(!infobox.has_parts.as_ref().unwrap().is_empty());
262    }
263
264    #[test]
265    fn test_section_creation() {
266        let section = Section {
267            name: Some("Personal life".to_string()),
268            section_type: SectionType::Section,
269            value: None,
270            links: None,
271            citations: None,
272            has_parts: Some(vec![Section {
273                name: Some("Relationships".to_string()),
274                section_type: SectionType::Section,
275                value: Some("Baker's first marriage...".to_string()),
276                links: None,
277                citations: None,
278                has_parts: None,
279                table_references: None,
280            }]),
281            table_references: None,
282        };
283
284        assert_eq!(section.name, Some("Personal life".to_string()));
285    }
286
287    #[test]
288    fn test_table_creation() {
289        let table = Table {
290            identifier: "demographics_table1".to_string(),
291            headers: vec![vec![
292                TableCell {
293                    value: "Year".to_string(),
294                },
295                TableCell {
296                    value: "Pop.".to_string(),
297                },
298            ]],
299            rows: vec![vec![
300                TableCell {
301                    value: "1666".to_string(),
302                },
303                TableCell {
304                    value: "625".to_string(),
305                },
306            ]],
307            confidence_score: 0.9,
308        };
309
310        assert_eq!(table.identifier, "demographics_table1");
311        assert_eq!(table.rows.len(), 1);
312    }
313
314    #[test]
315    fn test_table_cell() {
316        let cell = TableCell {
317            value: "Test value".to_string(),
318        };
319
320        assert_eq!(cell.value, "Test value");
321    }
322}