1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
//! Article data structure representing the parsed output.
//!
//! This module defines the [`Article`] struct, which contains all extracted content
//! and metadata from a successfully parsed web page.
//!
//! ## Example
//!
//! ```rust,no_run
//! use readabilityrs::{Readability, ReadabilityOptions};
//!
//! let html = r#"<html><body><article><h1>My Article</h1><p>Content...</p></article></body></html>"#;
//! let readability = Readability::new(html, Some("https://example.com"), None).unwrap();
//!
//! if let Some(article) = readability.parse() {
//! // Access article fields
//! println!("Title: {:?}", article.title);
//! println!("Length: {} characters", article.length);
//! println!("Author: {:?}", article.byline);
//!
//! // Get cleaned HTML content
//! if let Some(content) = article.content {
//! println!("HTML: {}", content);
//! }
//!
//! // Or get plain text
//! if let Some(text) = article.text_content {
//! println!("Text: {}", text);
//! }
//! }
//! ```
use ;
/// Represents a successfully parsed article with extracted content and metadata.
///
/// The `Article` struct contains all the extracted information from a web page,
/// including the main content (both HTML and plain text), metadata (title, author,
/// publish date), and other article properties.
///
/// ## Fields
///
/// All fields are optional (`Option<String>`) because not all web pages contain
/// all metadata fields. The `length` field is always present and represents the
/// character count of the extracted text.
///
/// ## Serialization
///
/// This struct implements `Serialize` and `Deserialize` from serde, making it
/// easy to save articles to JSON or other formats:
///
/// ```rust,no_run
/// use readabilityrs::{Readability, Article};
/// # let html = "<html></html>";
/// # let readability = Readability::new(html, None, None).unwrap();
///
/// if let Some(article) = readability.parse() {
/// let json = serde_json::to_string_pretty(&article).unwrap();
/// println!("{}", json);
/// }
/// ```