ftml/settings/
interwiki.rs

1/*
2 * settings/interwiki.rs
3 *
4 * ftml - Library to parse Wikidot text
5 * Copyright (C) 2019-2025 Wikijump Team
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
16 *
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21use std::borrow::Cow;
22use std::collections::HashMap;
23use std::sync::LazyLock;
24
25/// An [`InterwikiSettings`] instance that has no prefixes.
26pub static EMPTY_INTERWIKI: LazyLock<InterwikiSettings> =
27    LazyLock::new(|| InterwikiSettings {
28        prefixes: hashmap! {},
29    });
30
31#[allow(rustdoc::bare_urls)]
32/// An [`InterwikiSettings`] instance that has the default prefixes.
33///
34/// These prefixes are:
35/// - `wikipedia:path` => `https://wikipedia.org/wiki/path`
36/// - `wp:path` => `https://wikipedia.org/wiki/path`
37/// - `commons:path` => `https://commons.wikimedia.org/wiki/path`
38/// - `google:path` => `https://google.com/search?q=path`
39/// - `duckduckgo:path` => `https://duckduckgo.com/?q=path`
40/// - `ddg:path` => `https://duckduckgo.com/?q=path`
41/// - `dictionary:path` => `https://dictionary.com/browse/path`
42/// - `thesaurus:path` => `https://thesaurus.com/browse/path`
43pub static DEFAULT_INTERWIKI: LazyLock<InterwikiSettings> =
44    LazyLock::new(|| InterwikiSettings {
45        prefixes: hashmap! {
46            cow!("wikipedia") => cow!("https://wikipedia.org/wiki/$$"),
47            cow!("wp") => cow!("https://wikipedia.org/wiki/$$"),
48            cow!("commons") => cow!("https://commons.wikimedia.org/wiki/$$"),
49            cow!("google") => cow!("https://google.com/search?q=$$"),
50            cow!("duckduckgo") => cow!("https://duckduckgo.com/?q=$$"),
51            cow!("ddg") => cow!("https://duckduckgo.com/?q=$$"),
52            cow!("dictionary") => cow!("https://dictionary.com/browse/$$"),
53            cow!("thesaurus") => cow!("https://thesaurus.com/browse/$$"),
54        },
55    });
56
57/// Settings that determine how to turn [`interwiki links`](http://org.wikidot.com/doc:wiki-syntax#toc21)
58/// into full URLs.
59#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, Eq)]
60pub struct InterwikiSettings {
61    #[serde(flatten)]
62    /// A map from each interwiki prefix to the interwiki URL. A '$$' in the URL indicates where the path specified in
63    /// the Wikijump interwiki block should go.
64    pub prefixes: HashMap<Cow<'static, str>, Cow<'static, str>>,
65}
66
67impl InterwikiSettings {
68    /// Creates a new instance with no prefixes.
69    #[inline]
70    pub fn new() -> Self {
71        InterwikiSettings::default()
72    }
73
74    /// Creates a full URL from an interwiki link.
75    /// # Example
76    /// ```
77    /// # use ftml::settings::*;
78    /// assert_eq!(DEFAULT_INTERWIKI.build("wikipedia:Mallard").unwrap(), "https://wikipedia.org/wiki/Mallard");
79    /// ```
80    ///
81    /// Returns None if:
82    /// - The link starts with a colon
83    /// - There is no colon in the link
84    /// - There is nothing after the colon
85    /// - The interwiki prefix is not found
86    pub fn build(&self, link: &str) -> Option<String> {
87        match link.find(':') {
88            // Starting with a colon is not interwiki, skip.
89            // Or, if no colon, no interwiki.
90            Some(0) | None => None,
91
92            // Split at first colon, any further are treated as part of the link contents.
93            Some(idx) => {
94                let (prefix, rest) = link.split_at(idx);
95                let path = &rest[1..]; // Safe because we're splitting on ':', an ASCII character.
96
97                // Special handling, if it's empty then fail
98                if path.is_empty() {
99                    return None;
100                }
101
102                // If there's an interwiki prefix, apply the template.
103                self.prefixes.get(prefix).map(|template| {
104                    // Substitute all $$s in the URL templates.
105                    let mut url = template.replace("$$", path);
106
107                    // Substitute all spaces into url-encoded form.
108                    while let Some(idx) = url.find(' ') {
109                        url.replace_range(idx..idx + 1, "%20");
110                    }
111
112                    url
113                })
114            }
115        }
116    }
117}
118
119#[test]
120fn interwiki_prefixes() {
121    use ref_map::*;
122
123    macro_rules! check {
124        ($link:expr, $expected:expr $(,)?) => {{
125            let actual = DEFAULT_INTERWIKI.build($link);
126            let expected = $expected;
127
128            assert_eq!(
129                actual.ref_map(|s| s.as_str()),
130                expected,
131                "Actual interwiki result doesn't match expected",
132            );
133        }};
134    }
135
136    check!("my-link", None);
137    check!(
138        "wikipedia:Mallard",
139        Some("https://wikipedia.org/wiki/Mallard"),
140    );
141    check!(
142        "wikipedia:SCP_Foundation",
143        Some("https://wikipedia.org/wiki/SCP_Foundation"),
144    );
145    check!(
146        "wikipedia:Special:RecentChanges",
147        Some("https://wikipedia.org/wiki/Special:RecentChanges"),
148    );
149    check!(
150        "wp:SCP_Foundation",
151        Some("https://wikipedia.org/wiki/SCP_Foundation"),
152    );
153    check!(
154        "wp:it:SCP_Foundation",
155        Some("https://wikipedia.org/wiki/it:SCP_Foundation"),
156    );
157    check!(
158        "commons:File:SCP-682.jpg",
159        Some("https://commons.wikimedia.org/wiki/File:SCP-682.jpg"),
160    );
161    check!(
162        "commons:Category:SCP_Foundation",
163        Some("https://commons.wikimedia.org/wiki/Category:SCP_Foundation"),
164    );
165    check!(
166        "google:what's+my+ip",
167        Some("https://google.com/search?q=what's+my+ip"),
168    );
169    check!(
170        "duckduckgo:what's+my+ip",
171        Some("https://duckduckgo.com/?q=what's+my+ip"),
172    );
173    check!(
174        "ddg:what's+my+ip",
175        Some("https://duckduckgo.com/?q=what's+my+ip"),
176    );
177    check!("dictionary:oak", Some("https://dictionary.com/browse/oak"));
178    check!("thesaurus:oak", Some("https://thesaurus.com/browse/oak"));
179    check!("banana:fruit-salad", None);
180    check!(":empty", None);
181    check!("no-link:", None);
182}