1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
/*
* includes/mod.rs
*
* ftml - Library to parse Wikidot text
* Copyright (C) 2019-2024 Wikijump Team
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//! This module implements "messy includes", or Wikidot native includes.
//!
//! It is an annoying but necessary hack that parses the psuedoblock
//! `[[include-messy]]` and directly replaces that part with the
//! foreign page's wikitext.
#[warn(missing_docs)]
#[cfg(test)]
mod test;
mod include_ref;
mod includer;
mod parse;
pub use self::include_ref::IncludeRef;
pub use self::includer::{DebugIncluder, FetchedPage, Includer, NullIncluder};
use self::parse::parse_include_block;
use crate::data::PageRef;
use crate::settings::WikitextSettings;
use crate::tree::VariableMap;
use once_cell::sync::Lazy;
use regex::{Regex, RegexBuilder};
static INCLUDE_REGEX: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"^\[\[\s*include-messy\s+")
.case_insensitive(true)
.multi_line(true)
.dot_matches_new_line(true)
.build()
.unwrap()
});
static VARIABLE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\{\$(?P<name>[a-zA-Z0-9_\-]+)\}").unwrap());
/// Replaces the include blocks in a string with the content of the pages referenced by those
/// blocks.
pub fn include<'t, I, E, F>(
input: &'t str,
settings: &WikitextSettings,
mut includer: I,
invalid_return: F,
) -> Result<(String, Vec<PageRef<'t>>), E>
where
I: Includer<'t, Error = E>,
F: FnOnce() -> E,
{
if !settings.enable_page_syntax {
info!("Includes are disabled for this input, skipping");
let output = str!(input);
let pages = vec![];
return Ok((output, pages));
}
info!("Finding and replacing all instances of include blocks in text");
let mut ranges = Vec::new();
let mut includes = Vec::new();
// Get include references
for mtch in INCLUDE_REGEX.find_iter(input) {
let start = mtch.start();
debug!(
"Found include regex match (start {}, slice '{}')",
start,
mtch.as_str(),
);
match parse_include_block(input, start, settings) {
Ok((include, end)) => {
ranges.push(start..end);
includes.push(include);
}
Err(_) => warn!("Unable to parse include regex match"),
}
}
// Retrieve included pages
let fetched_pages = includer.include_pages(&includes)?;
// Ensure it matches up with the request
if includes.len() != fetched_pages.len() {
return Err(invalid_return());
}
// Substitute inclusions
//
// We must iterate backwards for all the indices to be valid
let ranges_iter = ranges.into_iter();
let includes_iter = includes.into_iter();
let fetched_iter = fetched_pages.into_iter();
let joined_iter = ranges_iter.zip(includes_iter).zip(fetched_iter).rev();
// Borrowing from the original text and doing in-place insertions
// will not work here. We are trying to both return the page names
// (slices from the input string), and replace it with new content.
let mut output = String::from(input);
let mut pages = Vec::new();
for ((range, include), fetched) in joined_iter {
let (page_ref, variables) = include.into();
info!(
"Replacing range for included page ({}..{})",
range.start, range.end,
);
// Ensure the returned page reference matches
if page_ref != fetched.page_ref {
return Err(invalid_return());
}
// Get replaced content, or error message
let replace_with = match fetched.content {
// Take fetched content, replace variables
Some(mut content) => {
replace_variables(content.to_mut(), &variables);
content
}
// Include not found, return premade template
None => includer.no_such_include(&page_ref)?,
};
// Append page to final list
pages.push(page_ref);
// Perform the substitution
output.replace_range(range, &replace_with);
}
// Since we iterate in reverse order, the pages are reversed.
pages.reverse();
// Return
Ok((output, pages))
}
/// Replaces all specified variables in the content to be included.
///
/// Read <https://www.wikidot.com/doc-wiki-syntax:include> for more details.
fn replace_variables(content: &mut String, variables: &VariableMap) {
let mut matches = Vec::new();
// Find all variables
for capture in VARIABLE_REGEX.captures_iter(content) {
let mtch = capture.get(0).unwrap();
let name = &capture["name"];
if let Some(value) = variables.get(name) {
matches.push((value, mtch.range()));
}
}
// Replace the variables
// Iterates backwards so indices stay valid
matches.reverse();
for (value, range) in matches {
content.replace_range(range, value);
}
}