wikitext_parser/
wikitext.rs

1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3use std::cmp::Ordering;
4use std::fmt;
5use std::fmt::{Display, Formatter};
6
7/// The root of a wikitext document.
8#[derive(Debug, Eq, PartialEq, Clone)]
9#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
10pub struct Wikitext {
11    /// The root of the section tree of the page.
12    pub root_section: Section,
13}
14
15impl Wikitext {
16    /// Print the headlines of the text.
17    pub fn print_headlines(&self) {
18        self.root_section.print_headlines();
19    }
20
21    /// List the headlines of the text.
22    pub fn list_headlines(&self) -> Vec<Headline> {
23        let mut result = Vec::new();
24        self.root_section.list_headlines(&mut result);
25        result
26    }
27
28    /// List the double brace expressions of the text.
29    pub fn list_double_brace_expressions(&self) -> Vec<TextPiece> {
30        let mut result = Vec::new();
31        self.root_section.list_double_brace_expressions(&mut result);
32        result
33    }
34
35    /// List the plain parts of the text.
36    pub fn list_plain_text(&self) -> Vec<TextPiece> {
37        let mut result = Vec::new();
38        self.root_section.list_plain_text(&mut result);
39        result
40    }
41}
42
43/// A section of wikitext.
44#[derive(Debug, Clone, Eq, PartialEq)]
45#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
46pub struct Section {
47    /// The headline of the section.
48    pub headline: Headline,
49    /// The paragraphs of the section.
50    pub paragraphs: Vec<Paragraph>,
51    /// The subsections of the section.
52    pub subsections: Vec<Section>,
53}
54
55impl Section {
56    /// Print the headlines of the text.
57    pub fn print_headlines(&self) {
58        println!(
59            "{0} {1} {0}",
60            "=".repeat(self.headline.level.into()),
61            self.headline.label
62        );
63        for subsection in &self.subsections {
64            subsection.print_headlines();
65        }
66    }
67
68    /// List the headlines of the text.
69    pub fn list_headlines(&self, result: &mut Vec<Headline>) {
70        result.push(self.headline.clone());
71        for subsection in &self.subsections {
72            subsection.list_headlines(result);
73        }
74    }
75
76    /// Iterate over all text pieces in the wikitext.
77    pub fn iter_text_pieces(&self) -> impl Iterator<Item = &'_ TextPiece> {
78        self.paragraphs
79            .iter()
80            .flat_map(|paragraph| paragraph.lines.iter())
81            .flat_map(|line| match line {
82                Line::Normal { text } => text.pieces.iter(),
83                Line::List { text, .. } => text.pieces.iter(),
84            })
85    }
86
87    /// List the double brace expressions of the text.
88    pub fn list_double_brace_expressions(&self, result: &mut Vec<TextPiece>) {
89        for text_piece in self.iter_text_pieces() {
90            if matches!(text_piece, TextPiece::DoubleBraceExpression { .. }) {
91                result.push(text_piece.clone());
92            }
93        }
94        for subsection in &self.subsections {
95            subsection.list_double_brace_expressions(result);
96        }
97    }
98
99    /// List the plain parts of the text.
100    pub fn list_plain_text(&self, result: &mut Vec<TextPiece>) {
101        for text_piece in self.iter_text_pieces() {
102            if matches!(text_piece, TextPiece::Text { .. }) {
103                result.push(text_piece.clone());
104            }
105        }
106        for subsection in &self.subsections {
107            subsection.list_plain_text(result);
108        }
109    }
110}
111
112/// A headline of a section of wikitext.
113#[derive(Debug, Clone, Eq, PartialEq)]
114#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
115pub struct Headline {
116    /// The label of the headline.
117    pub label: String,
118    /// The level of the headline.
119    pub level: u8,
120}
121
122impl Headline {
123    /// Create a new headline with the given label and level.
124    pub fn new(label: impl Into<String>, level: u8) -> Self {
125        Self {
126            label: label.into(),
127            level,
128        }
129    }
130}
131
132/// A paragraph of a section.
133#[derive(Debug, Clone, Eq, PartialEq, Default)]
134#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
135pub struct Paragraph {
136    /// The lines of the paragraph.
137    pub lines: Vec<Line>,
138}
139
140/// A line of a paragraph.
141#[derive(Debug, Clone, Eq, PartialEq)]
142#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
143pub enum Line {
144    Normal { text: Text },
145    List { list_prefix: String, text: Text },
146}
147
148impl Line {
149    /// Returns true if the line would be ignored by the wikitext renderer.
150    pub fn is_empty(&self) -> bool {
151        match self {
152            Line::Normal { text } => text.is_empty(),
153            Line::List { .. } => false,
154        }
155    }
156}
157
158/// Some text, either a line or an argument to an expression.
159#[derive(Debug, Clone, Eq, PartialEq, Default)]
160#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
161pub struct Text {
162    /// The pieces of the text.
163    pub pieces: Vec<TextPiece>,
164}
165
166impl Text {
167    /// Create a new empty text.
168    pub fn new() -> Self {
169        Default::default()
170    }
171
172    /// Returns `true` if this `Text` contains no pieces.
173    pub fn is_empty(&self) -> bool {
174        self.pieces.is_empty()
175    }
176
177    /// Extend the current last text piece with the given string,
178    /// or append a new text piece created from the given string if there is no text piece
179    /// or the last text piece is not of variant [`Text`](TextPiece::Text) or has different formatting.
180    pub fn extend_with_formatted_text(&mut self, text_formatting: TextFormatting, text: &str) {
181        if let Some(TextPiece::Text {
182            formatting: last_formatting,
183            text: last,
184        }) = self.pieces.last_mut()
185        {
186            if text_formatting == *last_formatting {
187                last.push_str(text);
188                return;
189            }
190        }
191
192        self.pieces.push(TextPiece::Text {
193            formatting: text_formatting,
194            text: text.to_string(),
195        });
196    }
197
198    /// Trim whitespace from the beginning and the end of the text.
199    pub fn trim_self(&mut self) {
200        self.trim_self_start();
201        self.trim_self_end();
202    }
203
204    /// Trim whitespace from the beginning of the text.
205    pub fn trim_self_start(&mut self) {
206        let mut offset = 0;
207        while offset < self.pieces.len() {
208            match &mut self.pieces[offset] {
209                TextPiece::Text { text, .. } => {
210                    *text = text.trim_start().to_string();
211                    if !text.is_empty() {
212                        break;
213                    }
214                }
215                TextPiece::DoubleBraceExpression { .. }
216                | TextPiece::InternalLink { .. }
217                | TextPiece::ListItem { .. } => break,
218            }
219            offset += 1;
220        }
221        self.pieces.drain(..offset);
222    }
223
224    /// Trim whitespace from the end of the text.
225    pub fn trim_self_end(&mut self) {
226        let mut limit = self.pieces.len();
227        while limit > 0 {
228            match &mut self.pieces[limit - 1] {
229                TextPiece::Text { text, .. } => {
230                    *text = text.trim_end().to_string();
231                    if !text.is_empty() {
232                        break;
233                    }
234                }
235                TextPiece::DoubleBraceExpression { .. } | TextPiece::InternalLink { .. } => break,
236                TextPiece::ListItem { text, .. } => {
237                    text.trim_self_end();
238                    break;
239                }
240            }
241            limit -= 1;
242        }
243        self.pieces.drain(limit..);
244    }
245}
246
247/// A piece of text of a section of wikitext.
248#[derive(Debug, Clone, Eq, PartialEq)]
249#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
250pub enum TextPiece {
251    /// Plain text to be rendered as is, with the given formatting.
252    Text {
253        /// The formatting applied to the text.
254        formatting: TextFormatting,
255        /// The text.
256        text: String,
257    },
258    /// A double brace expression.
259    DoubleBraceExpression {
260        /// The tag of the expression.
261        tag: Text,
262        /// The attributes of the expression.
263        attributes: Vec<Attribute>,
264    },
265    /// An internal link.
266    InternalLink {
267        /// The link target.
268        target: Text,
269        /// The link options.
270        options: Vec<Text>,
271        /// The label of the link.
272        label: Option<Text>,
273    },
274    /// A list item.
275    ListItem {
276        /// The prefix deciding the level and numbering of the list.
277        list_prefix: String,
278        /// The text of the list item.
279        text: Text,
280    },
281}
282
283/// An attribute of e.g. a double brace expression.
284#[derive(Debug, Clone, Eq, PartialEq)]
285#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
286pub struct Attribute {
287    /// The name of the attribute.
288    pub name: Option<String>,
289    /// The value of the attribute.
290    pub value: Text,
291}
292
293/// Format of formatted text.
294#[derive(Debug, Clone, Copy, Eq, PartialEq)]
295#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
296#[allow(missing_docs)]
297pub enum TextFormatting {
298    Normal,
299    Italic,
300    Bold,
301    ItalicBold,
302}
303
304impl TextFormatting {
305    /// Returns the new formatting to use after encountering an apostrophe run of length `apostrophe_length`.
306    pub fn next_formatting(&self, apostrophe_length: usize) -> Self {
307        match (self, apostrophe_length) {
308            (TextFormatting::Normal, 2) => TextFormatting::Italic,
309            (TextFormatting::Normal, 3) => TextFormatting::Bold,
310            (TextFormatting::Normal, 5) => TextFormatting::ItalicBold,
311            (TextFormatting::Italic, 2) => TextFormatting::Normal,
312            (TextFormatting::Italic, 3) => TextFormatting::ItalicBold,
313            (TextFormatting::Italic, 5) => TextFormatting::Bold,
314            (TextFormatting::Bold, 2) => TextFormatting::ItalicBold,
315            (TextFormatting::Bold, 3) => TextFormatting::Normal,
316            (TextFormatting::Bold, 5) => TextFormatting::Italic,
317            (TextFormatting::ItalicBold, 2) => TextFormatting::Bold,
318            (TextFormatting::ItalicBold, 3) => TextFormatting::Italic,
319            (TextFormatting::ItalicBold, 5) => TextFormatting::Normal,
320            (_, apostrophe_length) => unreachable!("Unused apostrophe length: {apostrophe_length}"),
321        }
322    }
323}
324
325impl PartialOrd for TextFormatting {
326    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
327        match (self, other) {
328            (TextFormatting::Normal, TextFormatting::Normal) => Some(Ordering::Equal),
329            (TextFormatting::Normal, TextFormatting::Italic) => Some(Ordering::Less),
330            (TextFormatting::Normal, TextFormatting::Bold) => Some(Ordering::Less),
331            (TextFormatting::Normal, TextFormatting::ItalicBold) => Some(Ordering::Less),
332            (TextFormatting::Italic, TextFormatting::Normal) => Some(Ordering::Greater),
333            (TextFormatting::Italic, TextFormatting::Italic) => Some(Ordering::Equal),
334            (TextFormatting::Italic, TextFormatting::Bold) => None,
335            (TextFormatting::Italic, TextFormatting::ItalicBold) => Some(Ordering::Less),
336            (TextFormatting::Bold, TextFormatting::Normal) => Some(Ordering::Greater),
337            (TextFormatting::Bold, TextFormatting::Italic) => None,
338            (TextFormatting::Bold, TextFormatting::Bold) => Some(Ordering::Equal),
339            (TextFormatting::Bold, TextFormatting::ItalicBold) => Some(Ordering::Less),
340            (TextFormatting::ItalicBold, TextFormatting::Normal) => Some(Ordering::Greater),
341            (TextFormatting::ItalicBold, TextFormatting::Italic) => Some(Ordering::Greater),
342            (TextFormatting::ItalicBold, TextFormatting::Bold) => Some(Ordering::Greater),
343            (TextFormatting::ItalicBold, TextFormatting::ItalicBold) => Some(Ordering::Equal),
344        }
345    }
346}
347
348impl Display for Text {
349    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
350        for text_piece in &self.pieces {
351            write!(fmt, "{text_piece}")?;
352        }
353        Ok(())
354    }
355}
356
357impl Display for TextPiece {
358    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
359        match self {
360            TextPiece::Text { text, formatting } => {
361                write!(fmt, "{}", formatting)?;
362                write!(fmt, "{text}")?;
363                write!(fmt, "{}", formatting)
364            }
365            TextPiece::DoubleBraceExpression {
366                tag,
367                attributes: parameters,
368            } => {
369                write!(fmt, "{{{{{tag}")?;
370
371                for parameter in parameters {
372                    write!(fmt, "|{parameter}")?;
373                }
374
375                write!(fmt, "}}}}")
376            }
377            TextPiece::InternalLink {
378                target: url,
379                options,
380                label,
381            } => {
382                write!(fmt, "[[{url}")?;
383                for option in options {
384                    write!(fmt, "|{option}")?;
385                }
386                if let Some(label) = label {
387                    write!(fmt, "|{label}")?;
388                }
389                write!(fmt, "]]")
390            }
391            TextPiece::ListItem { list_prefix, text } => {
392                write!(fmt, "{list_prefix} {text}")
393            }
394        }
395    }
396}
397
398impl Display for Attribute {
399    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
400        if let Some(name) = &self.name {
401            write!(fmt, "{name}=")?;
402        }
403
404        write!(fmt, "{}", self.value)
405    }
406}
407
408impl Display for TextFormatting {
409    fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
410        match self {
411            TextFormatting::Normal => Ok(()),
412            TextFormatting::Italic => write!(fmt, "''"),
413            TextFormatting::Bold => write!(fmt, "'''"),
414            TextFormatting::ItalicBold => write!(fmt, "'''''"),
415        }
416    }
417}