unstructured_client/
element.rs

1use crate::metadata::Metadata;
2use serde::{Deserialize, Serialize};
3
4/// Enum representing various types of elements in a document.
5#[derive(Debug, Deserialize, Serialize, PartialEq)]
6pub enum ElementType {
7    /// An element containing formulas in a document.
8    Formula,
9
10    /// An element for capturing text associated with figure captions.
11    FigureCaption,
12
13    /// NarrativeText is an element consisting of multiple, well-formulated sentences.
14    /// This excludes elements such as titles, headers, footers, and captions.
15    NarrativeText,
16
17    /// ListItem is a NarrativeText element that is part of a list.
18    ListItem,
19
20    /// A text element for capturing titles.
21    Title,
22
23    /// A text element for capturing physical addresses.
24    Address,
25
26    /// A text element for capturing email addresses.
27    EmailAddress,
28
29    /// A text element for capturing image metadata.
30    Image,
31
32    /// An element for capturing page breaks.
33    PageBreak,
34
35    /// An element for capturing tables.
36    Table,
37
38    /// An element for capturing document headers.
39    Header,
40
41    /// An element for capturing document footers.
42    Footer,
43
44    /// An element for capturing code snippets.
45    CodeSnippet,
46
47    /// An element for capturing page numbers.
48    PageNumber,
49
50    /// Base element for capturing free text from within the document.
51    UncategorizedText,
52
53    /// A chunk formed from text (non-Table) elements. It is only produced by chunking.
54    CompositeElement,
55}
56
57#[derive(Debug, Deserialize, Serialize, PartialEq)]
58pub struct Element {
59    pub r#type: ElementType,
60    pub element_id: String,
61    pub text: String,
62    pub metadata: Option<Metadata>,
63}
64
65pub type ElementList = Vec<Element>;
66
67#[cfg(test)]
68mod tests {
69    use super::*;
70
71    #[test]
72    fn test_deserialize_simple() {
73        let json_str = r#"
74        {
75          "type": "NarrativeText",
76          "element_id": "1",
77          "text": "Hello, world!",
78          "metadata": null
79        }
80        "#;
81
82        let expected = Element {
83            r#type: ElementType::NarrativeText,
84            element_id: "1".to_string(),
85            text: "Hello, world!".to_string(),
86            metadata: None,
87        };
88
89        let element: Element = serde_json::from_str(json_str).unwrap();
90        assert_eq!(element, expected);
91    }
92
93    #[test]
94    fn test_deserialize_with_metadata() {
95        let json_str = r#"
96        {
97          "type": "Image",
98          "element_id": "2",
99          "text": "An image element"
100        }
101        "#;
102
103        let expected = Element {
104            r#type: ElementType::Image,
105            element_id: "2".to_string(),
106            text: "An image element".to_string(),
107            metadata: None,
108        };
109
110        let element: Element = serde_json::from_str(json_str).unwrap();
111        assert_eq!(element, expected);
112    }
113
114    #[test]
115    fn test_deserialize_without_metadata() {
116        let json_str = r#"
117        {
118          "type": "ListItem",
119          "element_id": "3",
120          "text": "A list element."
121        }
122        "#;
123
124        let expected = Element {
125            r#type: ElementType::ListItem,
126            element_id: "3".to_string(),
127            text: "A list element.".to_string(),
128            metadata: None,
129        };
130
131        let element: Element = serde_json::from_str(json_str).unwrap();
132        assert_eq!(element, expected);
133    }
134
135    #[test]
136    fn test_serialize() {
137        let element = Element {
138            r#type: ElementType::NarrativeText,
139            element_id: "1".to_string(),
140            text: "Hello, world!".to_string(),
141            metadata: None,
142        };
143
144        let expected_json =
145            r#"{"type":"NarrativeText","element_id":"1","text":"Hello, world!","metadata":null}"#;
146        let json_str = serde_json::to_string(&element).unwrap();
147        assert_eq!(json_str, expected_json);
148    }
149}