Skip to main content

ftml/parsing/rule/impls/
list.rs

1/*
2 * parsing/rule/impls/list.rs
3 *
4 * ftml - Library to parse Wikidot text
5 * Copyright (C) 2019-2026 Wikijump Team
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
16 *
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21use super::prelude::*;
22use crate::parsing::{DepthItem, DepthList, process_depths};
23use crate::tree::{AttributeMap, ListItem, ListType};
24
25const MAX_LIST_DEPTH: usize = 20;
26
27const fn get_list_type(token: Token) -> Option<ListType> {
28    match token {
29        Token::BulletItem => Some(ListType::Bullet),
30        Token::NumberedItem => Some(ListType::Numbered),
31        _ => None,
32    }
33}
34
35pub const RULE_LIST: Rule = Rule {
36    name: "list",
37    position: LineRequirement::StartOfLine,
38    try_consume_fn,
39};
40
41fn try_consume_fn<'r, 't>(
42    parser: &mut Parser<'r, 't>,
43) -> ParseResult<'r, 't, Elements<'t>> {
44    // We don't know the list type(s) yet, so just log that we're starting
45    debug!("Parsing a list");
46
47    // Context variables
48    let mut depths = Vec::new();
49    let mut errors = Vec::new();
50
51    // Blockquotes are always paragraph-unsafe,
52    // but we need this binding for chain().
53    let mut paragraph_safe = false;
54
55    // Produce a depth list with elements
56    loop {
57        let current = parser.current();
58        let depth = match current.token {
59            // Count the number of spaces for its depth
60            Token::Whitespace => {
61                let spaces = parser.current().slice;
62                parser.step()?;
63
64                // Since these are only ASCII spaces a byte count is fine
65                spaces.len()
66            }
67
68            // No depth, just the bullet
69            Token::BulletItem | Token::NumberedItem => 0,
70
71            // Invalid token, bail
72            _ => {
73                warn!(
74                    "Didn't find correct bullet token or couldn't determine list depth, ending list iteration"
75                );
76                break;
77            }
78        };
79
80        // Check that the depth isn't obscenely deep, to avoid DOS attacks via stack overflow.
81        if depth > MAX_LIST_DEPTH {
82            warn!(
83                "List item has a depth {depth} greater than the maximum ({MAX_LIST_DEPTH})! Failing"
84            );
85            return Err(parser.make_err(ParseErrorKind::ListDepthExceeded));
86        }
87
88        // Check that we're processing a bullet, and get the type
89        let current = parser.current();
90        let list_type = match get_list_type(current.token) {
91            Some(ltype) => ltype,
92            None => {
93                trace!(
94                    "Didn't find bullet token, couldn't determine list type, ending list iteration"
95                );
96                break;
97            }
98        };
99        parser.step()?;
100
101        trace!("Parsing list item '{}'", list_type.name());
102
103        // For now, always expect whitespace after the bullet
104        let current = parser.current();
105        if current.token != Token::Whitespace {
106            warn!("Didn't find whitespace after bullet token, ending list iteration");
107            break;
108        }
109        parser.step()?;
110
111        // Parse elements until we hit the end of the line
112        let elements = collect_consume(
113            parser,
114            RULE_LIST,
115            &[
116                ParseCondition::current(Token::LineBreak),
117                ParseCondition::current(Token::ParagraphBreak),
118                ParseCondition::current(Token::InputEnd),
119            ],
120            &[],
121            None,
122        )?
123        .chain(&mut errors, &mut paragraph_safe);
124
125        // Empty list lines are ignored
126        if elements.is_empty() {
127            trace!("Skipping empty list line");
128            continue;
129        }
130
131        // Append list line
132        depths.push((depth, list_type, elements));
133    }
134
135    // This list has no rows, so the rule fails
136    if depths.is_empty() {
137        return Err(parser.make_err(ParseErrorKind::RuleFailed));
138    }
139
140    let depth_lists = process_depths(ListType::Generic, depths);
141    let elements: Vec<Element> = depth_lists
142        .into_iter()
143        .map(|(ltype, depth_list)| build_list_element(ltype, depth_list))
144        .collect();
145
146    ok!(paragraph_safe; elements, errors)
147}
148
149fn build_list_element(
150    top_ltype: ListType,
151    list: DepthList<ListType, Vec<Element>>,
152) -> Element {
153    let build_item = |item| match item {
154        DepthItem::Item(elements) => ListItem::Elements {
155            elements,
156            attributes: AttributeMap::new(),
157        },
158        DepthItem::List(ltype, list) => ListItem::SubList {
159            element: Box::new(build_list_element(ltype, list)),
160        },
161    };
162
163    let items = list.into_iter().map(build_item).collect();
164    let attributes = AttributeMap::new();
165
166    // Return the Element::List object
167    Element::List {
168        ltype: top_ltype,
169        items,
170        attributes,
171    }
172}