Skip to main content

wme_models/
structured.rs

1//! Structured content types (BETA).
2//!
3//! This module provides types for parsed article content including infoboxes,
4//! sections, and tables. These are part of the Structured Contents BETA API.
5//!
6//! # Structured Article Content
7//!
8//! Unlike the raw HTML/wikitext, structured content provides semantically parsed
9//! data that can be programmatically analyzed:
10//!
11//! - **Infoboxes**: Structured data boxes (e.g., taxobox, person infobox)
12//! - **Sections**: Article sections with paragraphs and subsections
13//! - **Tables**: Data tables with headers and rows
14//!
15//! # BETA Status
16//!
17//! These types are part of the experimental Structured Contents endpoints.
18//! They are not covered by SLA and may change as the API evolves.
19
20use crate::content::Image;
21use crate::reference::Citation;
22use crate::Link;
23use serde::{Deserialize, Serialize};
24
25/// Infobox structured content.
26///
27/// Infoboxes are structured data boxes displayed on article pages.
28/// Common examples include taxoboxes for species, person infoboxes for biographies,
29/// and infoboxes for cities, films, etc.
30///
31/// Infoboxes have a tree-like structure with nested parts for complex data.
32///
33/// # Example
34///
35/// ```ignore
36/// use wme_models::Infobox;
37///
38/// // An infobox might contain:
39/// // - Name: "Automatic taxobox"
40/// // - Type: Infobox
41/// // - Parts: [
42/// //     { name: "Kingdom:", type: "field", value: "Animalia" },
43/// //     { name: "Phylum:", type: "field", value: "Chordata" }
44/// //   ]
45/// ```
46#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
47pub struct Infobox {
48    /// Infobox name (e.g., "Automatic taxobox")
49    pub name: Option<String>,
50    /// Infobox type
51    #[serde(rename = "type")]
52    pub infobox_type: InfoboxType,
53    /// Value (for field types)
54    pub value: Option<String>,
55    /// Nested parts (for complex infoboxes)
56    pub has_parts: Option<Vec<InfoboxPart>>,
57    /// Links within infobox
58    pub links: Option<Vec<Link>>,
59    /// Images within infobox
60    pub images: Option<Vec<Image>>,
61}
62
63/// Infobox types.
64///
65/// The type determines how the infobox part should be rendered and interpreted.
66#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
67#[serde(rename_all = "snake_case")]
68pub enum InfoboxType {
69    /// Infobox container (root infobox)
70    #[serde(rename = "infobox")]
71    Infobox,
72    /// Field within infobox (name-value pair)
73    #[serde(rename = "field")]
74    Field,
75    /// Section within infobox (group of fields)
76    #[serde(rename = "section")]
77    Section,
78    /// Image in infobox
79    #[serde(rename = "image")]
80    Image,
81}
82
83/// Part of an infobox.
84///
85/// Infobox parts can be nested to create hierarchical data structures.
86/// For example, a taxobox might have sections for classification and
87/// characteristics.
88#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
89pub struct InfoboxPart {
90    /// Part name
91    pub name: Option<String>,
92    /// Part type
93    #[serde(rename = "type")]
94    pub part_type: InfoboxPartType,
95    /// Single value (for field types)
96    pub value: Option<String>,
97    /// Multiple values (for list type)
98    pub values: Option<Vec<String>>,
99    /// Nested parts
100    pub has_parts: Option<Vec<InfoboxPart>>,
101    /// Links
102    pub links: Option<Vec<Link>>,
103    /// Images
104    pub images: Option<Vec<Image>>,
105}
106
107/// Infobox part types.
108///
109/// Extended types for parts within infoboxes, including lists.
110#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
111#[serde(rename_all = "snake_case")]
112pub enum InfoboxPartType {
113    /// Infobox container
114    #[serde(rename = "infobox")]
115    Infobox,
116    /// Field (name-value pair)
117    #[serde(rename = "field")]
118    Field,
119    /// Section (grouping of fields)
120    #[serde(rename = "section")]
121    Section,
122    /// Image
123    #[serde(rename = "image")]
124    Image,
125    /// List (multiple values)
126    #[serde(rename = "list")]
127    List,
128}
129
130/// Section with structured content.
131///
132/// Articles are organized into sections. Each section can contain paragraphs,
133/// links, citations, and nested subsections. Sections form a tree structure
134/// with the article root.
135///
136/// # Example
137///
138/// ```ignore
139/// Section {
140///     name: Some("Personal life".to_string()),
141///     section_type: SectionType::Section,
142///     value: None,
143///     has_parts: Some(vec![
144///         Section { name: Some("Relationships".to_string()), ... },
145///         Section { type: Paragraph, value: Some("Baker's first marriage...".to_string()), ... },
146///     ]),
147/// }
148/// ```
149#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
150pub struct Section {
151    /// Section name (from header h2-h6)
152    pub name: Option<String>,
153    /// Section type
154    #[serde(rename = "type")]
155    pub section_type: SectionType,
156    /// Content value (plain text)
157    pub value: Option<String>,
158    /// Links within section
159    pub links: Option<Vec<Link>>,
160    /// Citations within section
161    pub citations: Option<Vec<Citation>>,
162    /// Nested sections
163    pub has_parts: Option<Vec<Section>>,
164    /// Table references (tables live at article root)
165    pub table_references: Option<Vec<TableReference>>,
166}
167
168/// Section types.
169///
170/// Sections can be containers (with subsections) or paragraphs (with text).
171#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
172#[serde(rename_all = "snake_case")]
173pub enum SectionType {
174    /// Section container (has subsections)
175    #[serde(rename = "section")]
176    Section,
177    /// Paragraph (has text content)
178    #[serde(rename = "paragraph")]
179    Paragraph,
180}
181
182/// Table reference (links to table at article root).
183///
184/// Sections don't contain tables directly. Instead, they reference tables
185/// that are stored at the article root level. This allows the same table
186/// to be referenced from multiple locations.
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
188pub struct TableReference {
189    /// Table identifier (unique within article)
190    pub identifier: String,
191    /// Confidence score (parser confidence in extraction)
192    pub confidence_score: f64,
193}
194
195/// Table structured content.
196///
197/// Tables extracted from article content with headers and rows.
198/// Tables are stored at the article root level and referenced from sections.
199///
200/// # Example
201///
202/// ```ignore
203/// Table {
204///     identifier: "demographics_table1".to_string(),
205///     headers: vec![vec![
206///         TableCell { value: "Year".to_string() },
207///         TableCell { value: "Pop.".to_string() },
208///     ]],
209///     rows: vec![
210///         vec![
211///             TableCell { value: "1666".to_string() },
212///             TableCell { value: "625".to_string() },
213///         ],
214///     ],
215///     confidence_score: 0.9,
216/// }
217/// ```
218#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
219pub struct Table {
220    /// Table identifier (unique within article)
221    pub identifier: String,
222    /// Table headers (array of arrays for multi-row headers)
223    pub headers: Vec<Vec<TableCell>>,
224    /// Table rows (array of arrays of cells)
225    pub rows: Vec<Vec<TableCell>>,
226    /// Confidence score (parser confidence)
227    pub confidence_score: f64,
228}
229
230/// Table cell.
231///
232/// A single cell within a table row.
233#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
234pub struct TableCell {
235    /// Cell value (text content)
236    pub value: String,
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    #[test]
244    fn test_infobox_creation() {
245        let infobox = Infobox {
246            name: Some("Automatic taxobox".to_string()),
247            infobox_type: InfoboxType::Infobox,
248            value: None,
249            has_parts: Some(vec![InfoboxPart {
250                name: Some("Kingdom:".to_string()),
251                part_type: InfoboxPartType::Field,
252                value: Some("Animalia".to_string()),
253                values: None,
254                has_parts: None,
255                links: None,
256                images: None,
257            }]),
258            links: None,
259            images: None,
260        };
261
262        assert_eq!(infobox.name, Some("Automatic taxobox".to_string()));
263        assert!(!infobox.has_parts.as_ref().unwrap().is_empty());
264    }
265
266    #[test]
267    fn test_section_creation() {
268        let section = Section {
269            name: Some("Personal life".to_string()),
270            section_type: SectionType::Section,
271            value: None,
272            links: None,
273            citations: None,
274            has_parts: Some(vec![Section {
275                name: Some("Relationships".to_string()),
276                section_type: SectionType::Section,
277                value: Some("Baker's first marriage...".to_string()),
278                links: None,
279                citations: None,
280                has_parts: None,
281                table_references: None,
282            }]),
283            table_references: None,
284        };
285
286        assert_eq!(section.name, Some("Personal life".to_string()));
287    }
288
289    #[test]
290    fn test_table_creation() {
291        let table = Table {
292            identifier: "demographics_table1".to_string(),
293            headers: vec![vec![
294                TableCell {
295                    value: "Year".to_string(),
296                },
297                TableCell {
298                    value: "Pop.".to_string(),
299                },
300            ]],
301            rows: vec![vec![
302                TableCell {
303                    value: "1666".to_string(),
304                },
305                TableCell {
306                    value: "625".to_string(),
307                },
308            ]],
309            confidence_score: 0.9,
310        };
311
312        assert_eq!(table.identifier, "demographics_table1");
313        assert_eq!(table.rows.len(), 1);
314    }
315
316    #[test]
317    fn test_table_cell() {
318        let cell = TableCell {
319            value: "Test value".to_string(),
320        };
321
322        assert_eq!(cell.value, "Test value");
323    }
324}