wme_models/metadata.rs
1//! Metadata types for the Wikimedia Enterprise API.
2//!
3//! This module provides types for reference data, snapshots, and event metadata
4//! used across all Enterprise APIs. These types describe the context around
5//! articles rather than article content itself.
6//!
7//! # Namespaces
8//!
9//! Wikimedia projects use namespaces to organize different types of pages:
10//! - **0 (Main)**: Article content
11//! - **6 (File)**: Media files
12//! - **10 (Template)**: Reusable wikitext
13//! - **14 (Category)**: Topic groupings
14//!
15//! See [`Namespace`] for namespace information.
16//!
17//! # Snapshots
18//!
19//! Snapshots provide complete dumps of Wikimedia projects. Use [`SnapshotInfo`]
20//! to discover available snapshots and their download chunks.
21//!
22//! # Events
23//!
24//! The Realtime API uses event-based updates. See [`EventMetadata`] for
25//! partition/offset tracking to enable resume functionality.
26
27use chrono::{DateTime, Utc};
28use serde::{Deserialize, Serialize};
29
30/// Event types for article changes.
31///
32/// The Realtime API streams events of these types as they happen.
33/// Event types indicate what kind of change occurred to an article.
34#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
35pub enum EventType {
36 /// Article was created or updated (content changed)
37 #[serde(rename = "update")]
38 Update,
39 /// Article was deleted
40 #[serde(rename = "delete")]
41 Delete,
42 /// Article visibility changed (content/editor/comment hidden)
43 #[serde(rename = "visibility-change")]
44 VisibilityChange,
45}
46
47/// Event metadata for articles.
48///
49/// Tracks events through the Wikimedia Enterprise system. The partition and
50/// offset fields enable resuming streams after disconnections.
51///
52/// # Resuming Streams
53///
54/// ```ignore
55/// // Store the last processed offset for each partition
56/// let offsets: HashMap<u32, u64> = load_checkpoints();
57///
58/// // When reconnecting, pass these offsets
59/// client.connect_with_offsets(offsets).await?;
60/// ```
61#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
62pub struct EventMetadata {
63 /// Event UUID (for tracking through the system)
64 pub identifier: String,
65 /// Event type (update, delete, visibility-change)
66 #[serde(rename = "type")]
67 pub event_type: EventType,
68 /// Event creation timestamp (when entered Enterprise system)
69 pub date_created: DateTime<Utc>,
70 /// Event publication timestamp (Realtime API only)
71 pub date_published: Option<DateTime<Utc>>,
72 /// Partition number (Realtime API only, for resume)
73 pub partition: Option<u32>,
74 /// Offset within partition (Realtime API only, for resume)
75 pub offset: Option<u64>,
76}
77
78/// Namespace information.
79///
80/// Namespaces organize different types of pages in Wikimedia projects.
81/// The identifier is the namespace number (0, 6, 10, 14, etc.).
82///
83/// # Common Namespaces
84///
85/// - `0` - Main (article content)
86/// - `6` - File (media uploads)
87/// - `10` - Template (reusable wikitext)
88/// - `14` - Category (topic groupings)
89#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
90pub struct Namespace {
91 /// Namespace ID (0 = Main, 6 = File, 10 = Template, 14 = Category)
92 pub identifier: u32,
93 /// Namespace name
94 pub name: Option<String>,
95 /// Namespace description
96 pub description: Option<String>,
97}
98
99/// Language information.
100///
101/// Describes a language supported by Wikimedia projects.
102#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
103pub struct Language {
104 /// Language code (e.g., "en", "de", "fr")
105 pub identifier: Option<String>,
106 /// Language name in English
107 pub name: Option<String>,
108 /// Alternate language name (native script)
109 pub alternate_name: Option<String>,
110 /// Text direction ("ltr" for left-to-right, "rtl" for right-to-left)
111 pub direction: Option<String>,
112}
113
114/// Project information.
115///
116/// Simple project reference with basic metadata.
117/// For full project metadata including language, see [`ProjectInfo`].
118#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
119pub struct Project {
120 /// Project identifier (e.g., "enwiki")
121 pub identifier: String,
122 /// Project URL
123 pub url: String,
124 /// Project type (e.g., "Wikipedia")
125 #[serde(rename = "type")]
126 pub project_type: Option<String>,
127}
128
129/// Project info for discovery API.
130///
131/// Full project metadata returned by the metadata endpoints.
132#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
133pub struct ProjectInfo {
134 /// Project identifier (e.g., "enwiki")
135 pub identifier: String,
136 /// Project code (e.g., "wikipedia")
137 pub code: Option<String>,
138 /// Project name
139 pub name: Option<String>,
140 /// Project URL
141 pub url: Option<String>,
142 /// Project language
143 pub in_language: Option<Language>,
144}
145
146/// Project type (code) information.
147///
148/// Describes project types like "wiki" (Wikipedia) or "wiktionary".
149#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
150pub struct ProjectType {
151 /// Type identifier (e.g., "wiki", "wiktionary")
152 pub identifier: String,
153 /// Type name
154 pub name: String,
155 /// Type description
156 pub description: Option<String>,
157}
158
159/// File size information with unit.
160///
161/// Used in snapshot and batch metadata to describe download sizes.
162#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
163pub struct Size {
164 /// Unit of measurement (e.g., "bytes", "MB", "GB")
165 pub unit_text: String,
166 /// Size value (can be fractional for small sizes)
167 pub value: f64,
168}
169
170/// Snapshot information.
171///
172/// Metadata about available project snapshots including download chunks
173/// for parallel downloading.
174///
175/// # Example
176///
177/// ```ignore
178/// // Get snapshot info and download chunks in parallel
179/// for chunk in snapshot.chunks {
180/// let data = download_chunk(chunk.url).await?;
181/// process_chunk(data).await?;
182/// }
183/// ```
184#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
185pub struct SnapshotInfo {
186 /// Snapshot identifier (e.g., "enwiki_namespace_0")
187 pub identifier: String,
188 /// Snapshot version
189 pub version: String,
190 /// Last modification timestamp
191 pub date_modified: DateTime<Utc>,
192 /// Language of the snapshot content
193 pub in_language: Language,
194 /// Project this snapshot belongs to
195 pub is_part_of: ProjectInfo,
196 /// Namespace of the snapshot
197 pub namespace: Namespace,
198 /// Snapshot size information
199 pub size: Size,
200 /// Downloadable chunk identifiers for parallel download
201 pub chunks: Vec<String>,
202}
203
204/// Chunk information for parallel downloads.
205///
206/// Large snapshots are split into chunks for parallel downloading.
207/// Each chunk can be downloaded independently and contains a subset
208/// of the articles.
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ChunkInfo {
211 /// Chunk identifier (e.g., "enwiki_namespace_0_chunk_0")
212 pub identifier: String,
213 /// Chunk version
214 pub version: String,
215 /// Last modification timestamp
216 pub date_modified: DateTime<Utc>,
217 /// Language of the chunk content
218 pub in_language: Language,
219 /// Project this chunk belongs to
220 pub is_part_of: ProjectInfo,
221 /// Namespace of the chunk
222 pub namespace: Namespace,
223 /// Chunk size information
224 pub size: Size,
225 /// Download URL for the chunk
226 pub url: Option<String>,
227}
228
229/// Simplified language information (used in Realtime API).
230///
231/// Realtime API uses a minimal language representation.
232#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
233pub struct SimplifiedLanguage {
234 /// Language code (e.g., "en", "de")
235 pub identifier: String,
236 /// Language name
237 pub name: String,
238}
239
240/// Realtime project information (used in streaming API).
241///
242/// Realtime API uses a different project structure with version
243/// and size information for the stream.
244#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
245pub struct RealtimeProject {
246 /// Project identifier (e.g., "enwiki")
247 pub identifier: String,
248 /// Project name
249 pub name: String,
250 /// Project URL
251 pub url: String,
252 /// Project version
253 pub version: String,
254 /// Last modification timestamp
255 pub date_modified: DateTime<Utc>,
256 /// Project size
257 pub size: Size,
258 /// Language information (simplified)
259 pub in_language: SimplifiedLanguage,
260}
261
262/// Simplified namespace (used in Realtime API).
263///
264/// Realtime API uses minimal namespace representation.
265#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
266pub struct SimplifiedNamespace {
267 /// Namespace ID
268 pub identifier: u32,
269 /// Namespace name
270 pub name: Option<String>,
271}
272
273/// Realtime batch info (hourly batches).
274///
275/// Metadata for hourly batch files available through the Realtime API.
276#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
277pub struct RealtimeBatchInfo {
278 /// Batch identifier
279 pub identifier: String,
280 /// Batch name
281 pub name: String,
282 /// Batch version
283 pub version: String,
284 /// Language information
285 pub in_language: Language,
286 /// Project this batch belongs to
287 pub is_part_of: ProjectInfo,
288 /// Namespace information
289 pub namespace: Namespace,
290 /// Batch size information
291 pub size: Size,
292}
293
294/// Batch information for Realtime API.
295///
296/// Basic batch metadata with event count.
297#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
298pub struct BatchInfo {
299 /// Batch identifier
300 pub identifier: String,
301 /// Batch creation timestamp
302 pub date_created: DateTime<Utc>,
303 /// Number of events in batch
304 pub event_count: u64,
305}
306
307/// Article update event for Realtime API.
308///
309/// Combines article data with event metadata for Realtime streams.
310#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
311pub struct ArticleUpdate {
312 /// Event type
313 pub event_type: EventType,
314 /// Article data
315 pub article: crate::Article,
316 /// Event metadata
317 pub event_metadata: EventMetadata,
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323 use chrono::Utc;
324
325 #[test]
326 fn test_event_type_variants() {
327 let update = EventType::Update;
328 let delete = EventType::Delete;
329 let visibility = EventType::VisibilityChange;
330
331 assert!(matches!(update, EventType::Update));
332 assert!(matches!(delete, EventType::Delete));
333 assert!(matches!(visibility, EventType::VisibilityChange));
334 }
335
336 #[test]
337 fn test_event_metadata() {
338 let event = EventMetadata {
339 identifier: "evt-12345".to_string(),
340 event_type: EventType::Update,
341 date_created: Utc::now(),
342 date_published: Some(Utc::now()),
343 partition: Some(4),
344 offset: Some(3593806),
345 };
346
347 assert_eq!(event.identifier, "evt-12345");
348 assert_eq!(event.partition, Some(4));
349 assert_eq!(event.offset, Some(3593806));
350 }
351
352 #[test]
353 fn test_namespace_creation() {
354 let ns = Namespace {
355 identifier: 0,
356 name: Some("".to_string()),
357 description: Some("Main namespace".to_string()),
358 };
359
360 assert_eq!(ns.identifier, 0);
361 assert_eq!(ns.name, Some("".to_string()));
362 }
363
364 #[test]
365 fn test_language_creation() {
366 let lang = Language {
367 identifier: Some("en".to_string()),
368 name: Some("English".to_string()),
369 alternate_name: None,
370 direction: Some("ltr".to_string()),
371 };
372
373 assert_eq!(lang.identifier, Some("en".to_string()));
374 assert_eq!(lang.direction, Some("ltr".to_string()));
375 }
376
377 #[test]
378 fn test_rtl_language() {
379 let lang = Language {
380 identifier: Some("ar".to_string()),
381 name: Some("Arabic".to_string()),
382 alternate_name: Some("العربية".to_string()),
383 direction: Some("rtl".to_string()),
384 };
385
386 assert_eq!(lang.identifier, Some("ar".to_string()));
387 assert_eq!(lang.direction, Some("rtl".to_string()));
388 }
389
390 #[test]
391 fn test_project_info() {
392 let project = ProjectInfo {
393 identifier: "enwiki".to_string(),
394 code: Some("wikipedia".to_string()),
395 name: Some("English Wikipedia".to_string()),
396 url: Some("https://en.wikipedia.org".to_string()),
397 in_language: Some(Language {
398 identifier: Some("en".to_string()),
399 name: Some("English".to_string()),
400 alternate_name: None,
401 direction: Some("ltr".to_string()),
402 }),
403 };
404
405 assert_eq!(project.identifier, "enwiki");
406 assert_eq!(project.code, Some("wikipedia".to_string()));
407 }
408
409 #[test]
410 fn test_size() {
411 let size = Size {
412 unit_text: "MB".to_string(),
413 value: 1500.0,
414 };
415
416 assert_eq!(size.value, 1500.0);
417 assert_eq!(size.unit_text, "MB");
418 }
419
420 #[test]
421 fn test_snapshot_info() {
422 let snapshot = SnapshotInfo {
423 identifier: "enwiki_namespace_0".to_string(),
424 version: "2024-01-15".to_string(),
425 date_modified: Utc::now(),
426 in_language: Language {
427 identifier: Some("en".to_string()),
428 name: Some("English".to_string()),
429 alternate_name: None,
430 direction: Some("ltr".to_string()),
431 },
432 is_part_of: ProjectInfo {
433 identifier: "enwiki".to_string(),
434 code: Some("wikipedia".to_string()),
435 name: Some("English Wikipedia".to_string()),
436 url: Some("https://en.wikipedia.org".to_string()),
437 in_language: Some(Language {
438 identifier: Some("en".to_string()),
439 name: Some("English".to_string()),
440 alternate_name: None,
441 direction: Some("ltr".to_string()),
442 }),
443 },
444 namespace: Namespace {
445 identifier: 0,
446 name: Some("".to_string()),
447 description: Some("Main namespace".to_string()),
448 },
449 size: Size {
450 unit_text: "GB".to_string(),
451 value: 25.0,
452 },
453 chunks: vec![],
454 };
455
456 assert_eq!(snapshot.identifier, "enwiki_namespace_0");
457 assert_eq!(snapshot.chunks.len(), 0);
458 }
459}