wme_models/version.rs
1//! Version and editor information types.
2//!
3//! This module contains types related to article revisions (versions), including:
4//! - [`Version`] - Complete revision metadata with credibility signals
5//! - [`Editor`] - Editor information and user groups
6//! - [`Scores`] - Quality scores (revert risk, reference risk, reference need)
7//! - [`Protection`] - Page protection settings
8//! - [`MaintenanceTags`] - Template counts for maintenance needs
9//!
10//! # Credibility Signals
11//!
12//! Several fields are marked as "Credibility Signals" in the API documentation.
13//! These provide qualitative metadata to help make informed decisions about data handling:
14//!
15//! - **Revert Risk Score**: Predicts whether a revision may be reverted
16//! - **Reference Risk Score**: Probability that references remain in the article
17//! - **Reference Need Score**: Proportion of uncited sentences needing citations
18//! - **Editor Information**: Edit count, user groups, registration date
19//! - **Maintenance Tags**: Counts of citation needed, POV, clarification, update templates
20//!
21//! # Example
22//!
23//! ```
24//! use wme_models::{Version, Editor};
25//! use chrono::Utc;
26//!
27//! let version = Version {
28//! identifier: 1182847293,
29//! editor: Some(Editor {
30//! identifier: Some(12345),
31//! name: Some("ExampleUser".to_string()),
32//! is_bot: Some(false),
33//! is_anonymous: Some(false),
34//! date_started: Some(Utc::now()),
35//! edit_count: Some(1500),
36//! groups: Some(vec!["user".to_string(), "autoconfirmed".to_string()]),
37//! is_admin: Some(false),
38//! is_patroller: Some(false),
39//! has_advanced_rights: Some(false),
40//! }),
41//! comment: Some("Fixed typo".to_string()),
42//! tags: Some(vec!["mobile edit".to_string()]),
43//! has_tag_needs_citation: Some(false),
44//! is_minor_edit: Some(true),
45//! is_flagged_stable: Some(true),
46//! is_breaking_news: Some(false),
47//! noindex: Some(false),
48//! number_of_characters: Some(5000),
49//! size: Some(wme_models::ArticleSize {
50//! value: 15000,
51//! unit_text: "B".to_string(),
52//! }),
53//! maintenance_tags: None,
54//! scores: None,
55//! };
56//! ```
57
58use chrono::{DateTime, Utc};
59use serde::{Deserialize, Serialize};
60
61/// Version information for an article.
62///
63/// Represents a single revision of an article with comprehensive metadata
64/// including editor information, credibility signals, and quality scores.
65///
66/// # Key Fields
67///
68/// - `identifier` - Unique revision ID (different from article ID)
69/// - `editor` - Editor who made this revision
70/// - `scores` - Quality predictions from LiftWing models
71/// - `maintenance_tags` - Counts of maintenance templates
72/// - `is_flagged_stable` - Community-approved revision flag
73#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
74pub struct Version {
75 /// Revision identifier (unique for each edit)
76 pub identifier: u64,
77 /// Editor information
78 pub editor: Option<Editor>,
79 /// Edit comment
80 pub comment: Option<String>,
81 /// MediaWiki change tags
82 pub tags: Option<Vec<String>>,
83 /// Has "citation needed" tag
84 pub has_tag_needs_citation: Option<bool>,
85 /// Was this a minor edit
86 pub is_minor_edit: Option<bool>,
87 /// Community-approved revision
88 pub is_flagged_stable: Option<bool>,
89 /// Breaking news flag
90 pub is_breaking_news: Option<bool>,
91 /// Non-indexable to search engines
92 pub noindex: Option<bool>,
93 /// Character count from wikitext
94 pub number_of_characters: Option<u64>,
95 /// Article size
96 pub size: Option<ArticleSize>,
97 /// Maintenance template counts
98 pub maintenance_tags: Option<MaintenanceTags>,
99 /// Quality scores
100 pub scores: Option<Scores>,
101}
102
103/// Previous version reference.
104///
105/// Lightweight reference to the revision prior to the current one.
106#[derive(Debug, Clone, Serialize, PartialEq)]
107pub struct PreviousVersion {
108 /// Revision identifier
109 pub identifier: u64,
110 /// Editor information
111 pub editor: Option<Editor>,
112 /// Number of characters in the previous revision
113 #[serde(skip_serializing_if = "Option::is_none")]
114 pub number_of_characters: Option<u64>,
115}
116
117impl<'de> Deserialize<'de> for PreviousVersion {
118 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
119 where
120 D: serde::Deserializer<'de>,
121 {
122 #[derive(Deserialize)]
123 struct RawPreviousVersion {
124 identifier: Option<u64>,
125 #[serde(default)]
126 number_of_characters: Option<u64>,
127 }
128
129 let raw = RawPreviousVersion::deserialize(deserializer)?;
130
131 match raw.identifier {
132 Some(id) => Ok(PreviousVersion {
133 identifier: id,
134 editor: None,
135 number_of_characters: raw.number_of_characters,
136 }),
137 None => Err(serde::de::Error::custom(
138 "previous_version must have an identifier field",
139 )),
140 }
141 }
142}
143
144/// Wrapper for optional previous_version that handles empty objects.
145///
146/// In the API, `previous_version` can be:
147/// - Missing or null → None
148/// - Empty object `{}` → None
149/// - Object with identifier → Some(PreviousVersion)
150#[derive(Debug, Clone, PartialEq, Default)]
151pub struct OptionalPreviousVersion(pub Option<PreviousVersion>);
152
153impl<'de> Deserialize<'de> for OptionalPreviousVersion {
154 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
155 where
156 D: serde::Deserializer<'de>,
157 {
158 #[derive(Deserialize)]
159 struct RawPreviousVersion {
160 identifier: Option<u64>,
161 #[serde(default)]
162 number_of_characters: Option<u64>,
163 }
164
165 #[derive(Deserialize)]
166 #[serde(untagged)]
167 enum RawOptPreviousVersion {
168 None,
169 Some(RawPreviousVersion),
170 }
171
172 match RawOptPreviousVersion::deserialize(deserializer)? {
173 RawOptPreviousVersion::None => Ok(OptionalPreviousVersion(None)),
174 RawOptPreviousVersion::Some(raw) => match raw.identifier {
175 Some(id) => Ok(OptionalPreviousVersion(Some(PreviousVersion {
176 identifier: id,
177 editor: None,
178 number_of_characters: raw.number_of_characters,
179 }))),
180 None => Ok(OptionalPreviousVersion(None)),
181 },
182 }
183 }
184}
185
186impl Serialize for OptionalPreviousVersion {
187 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
188 where
189 S: serde::Serializer,
190 {
191 match &self.0 {
192 Some(pv) => pv.serialize(serializer),
193 None => serializer.serialize_none(),
194 }
195 }
196}
197
198/// Editor information.
199///
200/// Provides context about who made a revision. Anonymous editors (IP addresses)
201/// have no identifier. Temporary accounts (since Dec 2025) have identifiers
202/// but `is_anonymous` will be false.
203///
204/// # Editor Name Format
205///
206/// - **Registered users**: Username (e.g., "ExampleUser")
207/// - **Anonymous (legacy)**: IP address (e.g., "192.168.1.1")
208/// - **Temporary accounts**: `~YYYY-SERIAL` (e.g., "~2026-59431-3")
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct Editor {
211 /// Editor identifier (none for anonymous users)
212 pub identifier: Option<u64>,
213 /// Editor name or IP address
214 pub name: Option<String>,
215 /// Is a bot
216 pub is_bot: Option<bool>,
217 /// Is an anonymous (IP) editor
218 pub is_anonymous: Option<bool>,
219 /// User registration timestamp
220 pub date_started: Option<DateTime<Utc>>,
221 /// Total edit count
222 pub edit_count: Option<u64>,
223 /// User groups (e.g., "admin", "autoconfirmed")
224 pub groups: Option<Vec<String>>,
225 /// Is an admin
226 pub is_admin: Option<bool>,
227 /// Is a patroller
228 pub is_patroller: Option<bool>,
229 /// Has advanced rights
230 pub has_advanced_rights: Option<bool>,
231}
232
233/// Article size information.
234///
235/// Size of the article in wikitext format.
236#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
237pub struct ArticleSize {
238 /// Size value in bytes
239 pub value: u64,
240 /// Unit text (usually "B")
241 pub unit_text: String,
242}
243
244/// Maintenance template counts.
245///
246/// Counts of occurrences of certain templates in the article body.
247/// These indicate areas that may need editor attention.
248#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
249pub struct MaintenanceTags {
250 /// Citation needed count
251 pub citation_needed_count: Option<u64>,
252 /// POV tag count
253 pub pov_count: Option<u64>,
254 /// Clarification needed count
255 pub clarification_needed_count: Option<u64>,
256 /// Update needed count
257 pub update_count: Option<u64>,
258}
259
260/// Quality scores.
261///
262/// Scores calculated as part of Wikimedia's LiftWing project.
263/// These provide credibility signals for revision quality.
264#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
265pub struct Scores {
266 /// Revert risk score (may revision be reverted?)
267 pub revertrisk: Option<RevertRisk>,
268 /// Reference risk score (will references remain?)
269 pub referencerisk: Option<ReferenceRisk>,
270 /// Reference need score (what needs citations?)
271 pub referenceneed: Option<ReferenceNeed>,
272}
273
274/// Revert risk score.
275///
276/// Predicts whether a revision may be reverted based on edit patterns.
277#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
278pub struct RevertRisk {
279 /// Revert risk prediction (true = likely to be reverted)
280 pub prediction: Option<bool>,
281 /// Revert risk probability details
282 pub probability: Option<serde_json::Value>,
283}
284
285/// Reference risk score.
286///
287/// Probability of references remaining in the article based on
288/// historical editorial activity on web domains used as references.
289/// Serves as a proxy for "source reliability".
290#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
291pub struct ReferenceRisk {
292 /// Reference risk score (0.0 to 1.0)
293 pub reference_risk_score: Option<f64>,
294}
295
296/// Reference need score.
297///
298/// Proportion of uncited sentences that need citations.
299/// Available for these Wikipedia languages: fa, it, zh, ru, pt, es, ja, de, fr, en.
300/// Only available for articles (Namespace 0).
301#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
302pub struct ReferenceNeed {
303 /// Reference need score (0.0 to 1.0)
304 pub reference_need_score: Option<f64>,
305}
306
307/// Protection settings.
308///
309/// Community-specific protections and restrictions on the article.
310/// Indicates which editor permissions are needed to edit or move the page.
311#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
312pub struct Protection {
313 /// Protection type (e.g., "edit", "move")
314 #[serde(rename = "type")]
315 pub protection_type: String,
316 /// Protection level (e.g., "autoconfirmed", "sysop")
317 pub level: String,
318 /// Expiration timestamp (None for never-expiring)
319 #[serde(deserialize_with = "deserialize_expiry")]
320 pub expiry: Option<DateTime<Utc>>,
321}
322
323fn deserialize_expiry<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
324where
325 D: serde::Deserializer<'de>,
326{
327 #[derive(Deserialize)]
328 #[serde(untagged)]
329 enum RawExpiry {
330 DateTime(DateTime<Utc>),
331 Infinity(String),
332 Null,
333 }
334
335 match RawExpiry::deserialize(deserializer)? {
336 RawExpiry::DateTime(dt) => Ok(Some(dt)),
337 RawExpiry::Infinity(s) if s == "infinity" => Ok(None),
338 RawExpiry::Infinity(s) => Err(serde::de::Error::custom(format!(
339 "invalid expiry value: {}",
340 s
341 ))),
342 RawExpiry::Null => Ok(None),
343 }
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349 use chrono::Utc;
350
351 #[test]
352 fn test_version_creation() {
353 let version = Version {
354 identifier: 1182847293,
355 editor: Some(Editor {
356 identifier: Some(12345),
357 name: Some("TestUser".to_string()),
358 is_bot: Some(false),
359 is_anonymous: Some(false),
360 date_started: Some(Utc::now()),
361 edit_count: Some(1000),
362 groups: Some(vec!["user".to_string()]),
363 is_admin: Some(false),
364 is_patroller: Some(false),
365 has_advanced_rights: Some(false),
366 }),
367 comment: Some("Test edit".to_string()),
368 tags: Some(vec!["mobile edit".to_string()]),
369 has_tag_needs_citation: Some(false),
370 is_minor_edit: Some(false),
371 is_flagged_stable: Some(true),
372 is_breaking_news: Some(false),
373 noindex: Some(false),
374 number_of_characters: Some(5000),
375 size: Some(ArticleSize {
376 value: 15000,
377 unit_text: "B".to_string(),
378 }),
379 maintenance_tags: None,
380 scores: None,
381 };
382
383 assert_eq!(version.identifier, 1182847293);
384 assert!(version.is_flagged_stable.unwrap());
385 }
386
387 #[test]
388 fn test_editor_groups() {
389 let editor = Editor {
390 identifier: Some(12345),
391 name: Some("AdminUser".to_string()),
392 is_bot: Some(false),
393 is_anonymous: Some(false),
394 date_started: Some(Utc::now()),
395 edit_count: Some(5000),
396 groups: Some(vec![
397 "user".to_string(),
398 "autoconfirmed".to_string(),
399 "extendedconfirmed".to_string(),
400 ]),
401 is_admin: Some(true),
402 is_patroller: Some(true),
403 has_advanced_rights: Some(true),
404 };
405
406 let groups = editor.groups.as_ref().unwrap();
407 assert!(groups.contains(&"user".to_string()));
408 assert!(groups.contains(&"autoconfirmed".to_string()));
409 assert!(editor.is_admin.unwrap());
410 }
411
412 #[test]
413 fn test_maintenance_tags() {
414 let tags = MaintenanceTags {
415 citation_needed_count: Some(5),
416 pov_count: Some(1),
417 clarification_needed_count: Some(2),
418 update_count: Some(10),
419 };
420
421 assert_eq!(tags.citation_needed_count, Some(5));
422 assert_eq!(tags.pov_count, Some(1));
423 }
424
425 #[test]
426 fn test_protection() {
427 let protection = Protection {
428 protection_type: "edit".to_string(),
429 level: "autoconfirmed".to_string(),
430 expiry: None, // Never expires
431 };
432
433 assert_eq!(protection.protection_type, "edit");
434 assert_eq!(protection.level, "autoconfirmed");
435 assert!(protection.expiry.is_none());
436 }
437
438 #[test]
439 fn test_scores() {
440 let scores = Scores {
441 revertrisk: Some(RevertRisk {
442 prediction: Some(false),
443 probability: None,
444 }),
445 referencerisk: Some(ReferenceRisk {
446 reference_risk_score: Some(0.15),
447 }),
448 referenceneed: Some(ReferenceNeed {
449 reference_need_score: Some(0.25),
450 }),
451 };
452
453 assert_eq!(scores.revertrisk.unwrap().prediction, Some(false));
454 assert_eq!(
455 scores.referencerisk.unwrap().reference_risk_score,
456 Some(0.15)
457 );
458 }
459}