Skip to main content

wme_models/
version.rs

1//! Version and editor information types.
2//!
3//! This module contains types related to article revisions (versions), including:
4//! - [`Version`] - Complete revision metadata with credibility signals
5//! - [`Editor`] - Editor information and user groups
6//! - [`Scores`] - Quality scores (revert risk, reference risk, reference need)
7//! - [`Protection`] - Page protection settings
8//! - [`MaintenanceTags`] - Template counts for maintenance needs
9//!
10//! # Credibility Signals
11//!
12//! Several fields are marked as "Credibility Signals" in the API documentation.
13//! These provide qualitative metadata to help make informed decisions about data handling:
14//!
15//! - **Revert Risk Score**: Predicts whether a revision may be reverted
16//! - **Reference Risk Score**: Probability that references remain in the article
17//! - **Reference Need Score**: Proportion of uncited sentences needing citations
18//! - **Editor Information**: Edit count, user groups, registration date
19//! - **Maintenance Tags**: Counts of citation needed, POV, clarification, update templates
20//!
21//! # Example
22//!
23//! ```
24//! use wme_models::{Version, Editor};
25//! use chrono::Utc;
26//!
27//! let version = Version {
28//!     identifier: 1182847293,
29//!     editor: Some(Editor {
30//!         identifier: Some(12345),
31//!         name: Some("ExampleUser".to_string()),
32//!         is_bot: Some(false),
33//!         is_anonymous: Some(false),
34//!         date_started: Some(Utc::now()),
35//!         edit_count: Some(1500),
36//!         groups: Some(vec!["user".to_string(), "autoconfirmed".to_string()]),
37//!         is_admin: Some(false),
38//!         is_patroller: Some(false),
39//!         has_advanced_rights: Some(false),
40//!     }),
41//!     comment: Some("Fixed typo".to_string()),
42//!     tags: Some(vec!["mobile edit".to_string()]),
43//!     has_tag_needs_citation: Some(false),
44//!     is_minor_edit: Some(true),
45//!     is_flagged_stable: Some(true),
46//!     is_breaking_news: Some(false),
47//!     noindex: Some(false),
48//!     number_of_characters: Some(5000),
49//!     size: Some(wme_models::ArticleSize {
50//!         value: 15000,
51//!         unit_text: "B".to_string(),
52//!     }),
53//!     maintenance_tags: None,
54//!     scores: None,
55//! };
56//! ```
57
58use chrono::{DateTime, Utc};
59use serde::{Deserialize, Serialize};
60
61/// Version information for an article.
62///
63/// Represents a single revision of an article with comprehensive metadata
64/// including editor information, credibility signals, and quality scores.
65///
66/// # Key Fields
67///
68/// - `identifier` - Unique revision ID (different from article ID)
69/// - `editor` - Editor who made this revision
70/// - `scores` - Quality predictions from LiftWing models
71/// - `maintenance_tags` - Counts of maintenance templates
72/// - `is_flagged_stable` - Community-approved revision flag
73#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
74pub struct Version {
75    /// Revision identifier (unique for each edit)
76    pub identifier: u64,
77    /// Editor information
78    pub editor: Option<Editor>,
79    /// Edit comment
80    pub comment: Option<String>,
81    /// MediaWiki change tags
82    pub tags: Option<Vec<String>>,
83    /// Has "citation needed" tag
84    pub has_tag_needs_citation: Option<bool>,
85    /// Was this a minor edit
86    pub is_minor_edit: Option<bool>,
87    /// Community-approved revision
88    pub is_flagged_stable: Option<bool>,
89    /// Breaking news flag
90    pub is_breaking_news: Option<bool>,
91    /// Non-indexable to search engines
92    pub noindex: Option<bool>,
93    /// Character count from wikitext
94    pub number_of_characters: Option<u64>,
95    /// Article size
96    pub size: Option<ArticleSize>,
97    /// Maintenance template counts
98    pub maintenance_tags: Option<MaintenanceTags>,
99    /// Quality scores
100    pub scores: Option<Scores>,
101}
102
103/// Previous version reference.
104///
105/// Lightweight reference to the revision prior to the current one.
106#[derive(Debug, Clone, Serialize, PartialEq)]
107pub struct PreviousVersion {
108    /// Revision identifier
109    pub identifier: u64,
110    /// Editor information
111    pub editor: Option<Editor>,
112    /// Number of characters in the previous revision
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub number_of_characters: Option<u64>,
115}
116
117impl<'de> Deserialize<'de> for PreviousVersion {
118    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
119    where
120        D: serde::Deserializer<'de>,
121    {
122        #[derive(Deserialize)]
123        struct RawPreviousVersion {
124            identifier: Option<u64>,
125            #[serde(default)]
126            number_of_characters: Option<u64>,
127        }
128
129        let raw = RawPreviousVersion::deserialize(deserializer)?;
130
131        match raw.identifier {
132            Some(id) => Ok(PreviousVersion {
133                identifier: id,
134                editor: None,
135                number_of_characters: raw.number_of_characters,
136            }),
137            None => Err(serde::de::Error::custom(
138                "previous_version must have an identifier field",
139            )),
140        }
141    }
142}
143
144/// Wrapper for optional previous_version that handles empty objects.
145///
146/// In the API, `previous_version` can be:
147/// - Missing or null → None
148/// - Empty object `{}` → None
149/// - Object with identifier → Some(PreviousVersion)
150#[derive(Debug, Clone, PartialEq, Default)]
151pub struct OptionalPreviousVersion(pub Option<PreviousVersion>);
152
153impl<'de> Deserialize<'de> for OptionalPreviousVersion {
154    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
155    where
156        D: serde::Deserializer<'de>,
157    {
158        #[derive(Deserialize)]
159        struct RawPreviousVersion {
160            identifier: Option<u64>,
161            #[serde(default)]
162            number_of_characters: Option<u64>,
163        }
164
165        #[derive(Deserialize)]
166        #[serde(untagged)]
167        enum RawOptPreviousVersion {
168            None,
169            Some(RawPreviousVersion),
170        }
171
172        match RawOptPreviousVersion::deserialize(deserializer)? {
173            RawOptPreviousVersion::None => Ok(OptionalPreviousVersion(None)),
174            RawOptPreviousVersion::Some(raw) => match raw.identifier {
175                Some(id) => Ok(OptionalPreviousVersion(Some(PreviousVersion {
176                    identifier: id,
177                    editor: None,
178                    number_of_characters: raw.number_of_characters,
179                }))),
180                None => Ok(OptionalPreviousVersion(None)),
181            },
182        }
183    }
184}
185
186impl Serialize for OptionalPreviousVersion {
187    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
188    where
189        S: serde::Serializer,
190    {
191        match &self.0 {
192            Some(pv) => pv.serialize(serializer),
193            None => serializer.serialize_none(),
194        }
195    }
196}
197
198/// Editor information.
199///
200/// Provides context about who made a revision. Anonymous editors (IP addresses)
201/// have no identifier. Temporary accounts (since Dec 2025) have identifiers
202/// but `is_anonymous` will be false.
203///
204/// # Editor Name Format
205///
206/// - **Registered users**: Username (e.g., "ExampleUser")
207/// - **Anonymous (legacy)**: IP address (e.g., "192.168.1.1")
208/// - **Temporary accounts**: `~YYYY-SERIAL` (e.g., "~2026-59431-3")
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct Editor {
211    /// Editor identifier (none for anonymous users)
212    pub identifier: Option<u64>,
213    /// Editor name or IP address
214    pub name: Option<String>,
215    /// Is a bot
216    pub is_bot: Option<bool>,
217    /// Is an anonymous (IP) editor
218    pub is_anonymous: Option<bool>,
219    /// User registration timestamp
220    pub date_started: Option<DateTime<Utc>>,
221    /// Total edit count
222    pub edit_count: Option<u64>,
223    /// User groups (e.g., "admin", "autoconfirmed")
224    pub groups: Option<Vec<String>>,
225    /// Is an admin
226    pub is_admin: Option<bool>,
227    /// Is a patroller
228    pub is_patroller: Option<bool>,
229    /// Has advanced rights
230    pub has_advanced_rights: Option<bool>,
231}
232
233/// Article size information.
234///
235/// Size of the article in wikitext format.
236#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
237pub struct ArticleSize {
238    /// Size value in bytes
239    pub value: u64,
240    /// Unit text (usually "B")
241    pub unit_text: String,
242}
243
244/// Maintenance template counts.
245///
246/// Counts of occurrences of certain templates in the article body.
247/// These indicate areas that may need editor attention.
248#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
249pub struct MaintenanceTags {
250    /// Citation needed count
251    pub citation_needed_count: Option<u64>,
252    /// POV tag count
253    pub pov_count: Option<u64>,
254    /// Clarification needed count
255    pub clarification_needed_count: Option<u64>,
256    /// Update needed count
257    pub update_count: Option<u64>,
258}
259
260/// Quality scores.
261///
262/// Scores calculated as part of Wikimedia's LiftWing project.
263/// These provide credibility signals for revision quality.
264#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
265pub struct Scores {
266    /// Revert risk score (may revision be reverted?)
267    pub revertrisk: Option<RevertRisk>,
268    /// Reference risk score (will references remain?)
269    pub referencerisk: Option<ReferenceRisk>,
270    /// Reference need score (what needs citations?)
271    pub referenceneed: Option<ReferenceNeed>,
272}
273
274/// Revert risk score.
275///
276/// Predicts whether a revision may be reverted based on edit patterns.
277#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
278pub struct RevertRisk {
279    /// Revert risk prediction (true = likely to be reverted)
280    pub prediction: Option<bool>,
281    /// Revert risk probability details
282    pub probability: Option<serde_json::Value>,
283}
284
285/// Reference risk score.
286///
287/// Probability of references remaining in the article based on
288/// historical editorial activity on web domains used as references.
289/// Serves as a proxy for "source reliability".
290#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
291pub struct ReferenceRisk {
292    /// Reference risk score (0.0 to 1.0)
293    pub reference_risk_score: Option<f64>,
294}
295
296/// Reference need score.
297///
298/// Proportion of uncited sentences that need citations.
299/// Available for these Wikipedia languages: fa, it, zh, ru, pt, es, ja, de, fr, en.
300/// Only available for articles (Namespace 0).
301#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
302pub struct ReferenceNeed {
303    /// Reference need score (0.0 to 1.0)
304    pub reference_need_score: Option<f64>,
305}
306
307/// Protection settings.
308///
309/// Community-specific protections and restrictions on the article.
310/// Indicates which editor permissions are needed to edit or move the page.
311#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
312pub struct Protection {
313    /// Protection type (e.g., "edit", "move")
314    #[serde(rename = "type")]
315    pub protection_type: String,
316    /// Protection level (e.g., "autoconfirmed", "sysop")
317    pub level: String,
318    /// Expiration timestamp (None for never-expiring)
319    pub expiry: Option<DateTime<Utc>>,
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325    use chrono::Utc;
326
327    #[test]
328    fn test_version_creation() {
329        let version = Version {
330            identifier: 1182847293,
331            editor: Some(Editor {
332                identifier: Some(12345),
333                name: Some("TestUser".to_string()),
334                is_bot: Some(false),
335                is_anonymous: Some(false),
336                date_started: Some(Utc::now()),
337                edit_count: Some(1000),
338                groups: Some(vec!["user".to_string()]),
339                is_admin: Some(false),
340                is_patroller: Some(false),
341                has_advanced_rights: Some(false),
342            }),
343            comment: Some("Test edit".to_string()),
344            tags: Some(vec!["mobile edit".to_string()]),
345            has_tag_needs_citation: Some(false),
346            is_minor_edit: Some(false),
347            is_flagged_stable: Some(true),
348            is_breaking_news: Some(false),
349            noindex: Some(false),
350            number_of_characters: Some(5000),
351            size: Some(ArticleSize {
352                value: 15000,
353                unit_text: "B".to_string(),
354            }),
355            maintenance_tags: None,
356            scores: None,
357        };
358
359        assert_eq!(version.identifier, 1182847293);
360        assert!(version.is_flagged_stable.unwrap());
361    }
362
363    #[test]
364    fn test_editor_groups() {
365        let editor = Editor {
366            identifier: Some(12345),
367            name: Some("AdminUser".to_string()),
368            is_bot: Some(false),
369            is_anonymous: Some(false),
370            date_started: Some(Utc::now()),
371            edit_count: Some(5000),
372            groups: Some(vec![
373                "user".to_string(),
374                "autoconfirmed".to_string(),
375                "extendedconfirmed".to_string(),
376            ]),
377            is_admin: Some(true),
378            is_patroller: Some(true),
379            has_advanced_rights: Some(true),
380        };
381
382        let groups = editor.groups.as_ref().unwrap();
383        assert!(groups.contains(&"user".to_string()));
384        assert!(groups.contains(&"autoconfirmed".to_string()));
385        assert!(editor.is_admin.unwrap());
386    }
387
388    #[test]
389    fn test_maintenance_tags() {
390        let tags = MaintenanceTags {
391            citation_needed_count: Some(5),
392            pov_count: Some(1),
393            clarification_needed_count: Some(2),
394            update_count: Some(10),
395        };
396
397        assert_eq!(tags.citation_needed_count, Some(5));
398        assert_eq!(tags.pov_count, Some(1));
399    }
400
401    #[test]
402    fn test_protection() {
403        let protection = Protection {
404            protection_type: "edit".to_string(),
405            level: "autoconfirmed".to_string(),
406            expiry: None, // Never expires
407        };
408
409        assert_eq!(protection.protection_type, "edit");
410        assert_eq!(protection.level, "autoconfirmed");
411        assert!(protection.expiry.is_none());
412    }
413
414    #[test]
415    fn test_scores() {
416        let scores = Scores {
417            revertrisk: Some(RevertRisk {
418                prediction: Some(false),
419                probability: None,
420            }),
421            referencerisk: Some(ReferenceRisk {
422                reference_risk_score: Some(0.15),
423            }),
424            referenceneed: Some(ReferenceNeed {
425                reference_need_score: Some(0.25),
426            }),
427        };
428
429        assert_eq!(scores.revertrisk.unwrap().prediction, Some(false));
430        assert_eq!(
431            scores.referencerisk.unwrap().reference_risk_score,
432            Some(0.15)
433        );
434    }
435}