ftml/settings/interwiki.rs
1/*
2 * settings/interwiki.rs
3 *
4 * ftml - Library to parse Wikidot text
5 * Copyright (C) 2019-2025 Wikijump Team
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
16 *
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21use std::borrow::Cow;
22use std::collections::HashMap;
23use std::sync::LazyLock;
24
25/// An [`InterwikiSettings`] instance that has no prefixes.
26pub static EMPTY_INTERWIKI: LazyLock<InterwikiSettings> =
27 LazyLock::new(|| InterwikiSettings {
28 prefixes: hashmap! {},
29 });
30
31#[allow(rustdoc::bare_urls)]
32/// An [`InterwikiSettings`] instance that has the default prefixes.
33///
34/// These prefixes are:
35/// - `wikipedia:path` => `https://wikipedia.org/wiki/path`
36/// - `wp:path` => `https://wikipedia.org/wiki/path`
37/// - `commons:path` => `https://commons.wikimedia.org/wiki/path`
38/// - `google:path` => `https://google.com/search?q=path`
39/// - `duckduckgo:path` => `https://duckduckgo.com/?q=path`
40/// - `ddg:path` => `https://duckduckgo.com/?q=path`
41/// - `dictionary:path` => `https://dictionary.com/browse/path`
42/// - `thesaurus:path` => `https://thesaurus.com/browse/path`
43pub static DEFAULT_INTERWIKI: LazyLock<InterwikiSettings> =
44 LazyLock::new(|| InterwikiSettings {
45 prefixes: hashmap! {
46 cow!("wikipedia") => cow!("https://wikipedia.org/wiki/$$"),
47 cow!("wp") => cow!("https://wikipedia.org/wiki/$$"),
48 cow!("commons") => cow!("https://commons.wikimedia.org/wiki/$$"),
49 cow!("google") => cow!("https://google.com/search?q=$$"),
50 cow!("duckduckgo") => cow!("https://duckduckgo.com/?q=$$"),
51 cow!("ddg") => cow!("https://duckduckgo.com/?q=$$"),
52 cow!("dictionary") => cow!("https://dictionary.com/browse/$$"),
53 cow!("thesaurus") => cow!("https://thesaurus.com/browse/$$"),
54 },
55 });
56
57/// Settings that determine how to turn [`interwiki links`](http://org.wikidot.com/doc:wiki-syntax#toc21)
58/// into full URLs.
59#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, Eq)]
60pub struct InterwikiSettings {
61 #[serde(flatten)]
62 /// A map from each interwiki prefix to the interwiki URL. A '$$' in the URL indicates where the path specified in
63 /// the Wikijump interwiki block should go.
64 pub prefixes: HashMap<Cow<'static, str>, Cow<'static, str>>,
65}
66
67impl InterwikiSettings {
68 /// Creates a new instance with no prefixes.
69 #[inline]
70 pub fn new() -> Self {
71 InterwikiSettings::default()
72 }
73
74 /// Creates a full URL from an interwiki link.
75 /// # Example
76 /// ```
77 /// # use ftml::settings::*;
78 /// assert_eq!(DEFAULT_INTERWIKI.build("wikipedia:Mallard").unwrap(), "https://wikipedia.org/wiki/Mallard");
79 /// ```
80 ///
81 /// Returns None if:
82 /// - The link starts with a colon
83 /// - There is no colon in the link
84 /// - There is nothing after the colon
85 /// - The interwiki prefix is not found
86 pub fn build(&self, link: &str) -> Option<String> {
87 match link.find(':') {
88 // Starting with a colon is not interwiki, skip.
89 // Or, if no colon, no interwiki.
90 Some(0) | None => None,
91
92 // Split at first colon, any further are treated as part of the link contents.
93 Some(idx) => {
94 let (prefix, rest) = link.split_at(idx);
95 let path = &rest[1..]; // Safe because we're splitting on ':', an ASCII character.
96
97 // Special handling, if it's empty then fail
98 if path.is_empty() {
99 return None;
100 }
101
102 // If there's an interwiki prefix, apply the template.
103 self.prefixes.get(prefix).map(|template| {
104 // Substitute all $$s in the URL templates.
105 let mut url = template.replace("$$", path);
106
107 // Substitute all spaces into url-encoded form.
108 while let Some(idx) = url.find(' ') {
109 url.replace_range(idx..idx + 1, "%20");
110 }
111
112 url
113 })
114 }
115 }
116 }
117}
118
119#[test]
120fn interwiki_prefixes() {
121 use ref_map::*;
122
123 macro_rules! check {
124 ($link:expr, $expected:expr $(,)?) => {{
125 let actual = DEFAULT_INTERWIKI.build($link);
126 let expected = $expected;
127
128 assert_eq!(
129 actual.ref_map(|s| s.as_str()),
130 expected,
131 "Actual interwiki result doesn't match expected",
132 );
133 }};
134 }
135
136 check!("my-link", None);
137 check!(
138 "wikipedia:Mallard",
139 Some("https://wikipedia.org/wiki/Mallard"),
140 );
141 check!(
142 "wikipedia:SCP_Foundation",
143 Some("https://wikipedia.org/wiki/SCP_Foundation"),
144 );
145 check!(
146 "wikipedia:Special:RecentChanges",
147 Some("https://wikipedia.org/wiki/Special:RecentChanges"),
148 );
149 check!(
150 "wp:SCP_Foundation",
151 Some("https://wikipedia.org/wiki/SCP_Foundation"),
152 );
153 check!(
154 "wp:it:SCP_Foundation",
155 Some("https://wikipedia.org/wiki/it:SCP_Foundation"),
156 );
157 check!(
158 "commons:File:SCP-682.jpg",
159 Some("https://commons.wikimedia.org/wiki/File:SCP-682.jpg"),
160 );
161 check!(
162 "commons:Category:SCP_Foundation",
163 Some("https://commons.wikimedia.org/wiki/Category:SCP_Foundation"),
164 );
165 check!(
166 "google:what's+my+ip",
167 Some("https://google.com/search?q=what's+my+ip"),
168 );
169 check!(
170 "duckduckgo:what's+my+ip",
171 Some("https://duckduckgo.com/?q=what's+my+ip"),
172 );
173 check!(
174 "ddg:what's+my+ip",
175 Some("https://duckduckgo.com/?q=what's+my+ip"),
176 );
177 check!("dictionary:oak", Some("https://dictionary.com/browse/oak"));
178 check!("thesaurus:oak", Some("https://thesaurus.com/browse/oak"));
179 check!("banana:fruit-salad", None);
180 check!(":empty", None);
181 check!("no-link:", None);
182}