parse_wiki_text_2/
configuration.rs

1// Copyright 2019 Fredrik Portström <https://portstrom.com>
2// This is free software distributed under the terms specified in
3// the file LICENSE at the top-level directory of this distribution.
4
5/// Site specific configuration of a wiki.
6///
7/// This is generated using the program [`fetch_mediawiki_configuration`](https://github.com/portstrom/fetch_mediawiki_configuration).
8pub struct ConfigurationSource<'a> {
9	/// Aliases of the category namespace.
10	pub category_namespaces: &'a [&'a str],
11
12	/// Tag names of extension tags.
13	pub extension_tags: &'a [&'a str],
14
15	/// Aliases of the file namespace.
16	pub file_namespaces: &'a [&'a str],
17
18	/// Characters that can appear in link trails.
19	pub link_trail: &'a str,
20
21	/// Magic words that can appear between `__` and `__`.
22	pub magic_words: &'a [&'a str],
23
24	/// Protocols that can be used for external links.
25	pub protocols: &'a [&'a str],
26
27	/// Magic words that can be used for redirects.
28	pub redirect_magic_words: &'a [&'a str],
29}
30
31#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
32pub enum Namespace {
33	Category,
34	File,
35}
36
37impl crate::Configuration {
38	/// Allocates and returns a new configuration based on the given site specific configuration.
39	#[must_use]
40	pub fn new(source: &ConfigurationSource) -> Self {
41		let mut configuration = crate::Configuration {
42			character_entities: crate::Trie::new(),
43			link_trail_character_set: crate::HashSet::new(),
44			magic_words: crate::Trie::new(),
45			namespaces: crate::Trie::new(),
46			protocols: crate::Trie::new(),
47			redirect_magic_words: crate::Trie::new(),
48			tag_name_map: crate::HashMap::new(),
49		};
50		for (name, character) in crate::html_entities::HTML_ENTITIES {
51			configuration
52				.character_entities
53				.add_case_sensitive_term(&format!("{};", name), *character);
54		}
55		for character in source.link_trail.chars() {
56			configuration.link_trail_character_set.insert(character);
57		}
58		for protocol in source.protocols {
59			configuration.protocols.add_term(protocol, ());
60		}
61		for magic_word in source.magic_words {
62			configuration.magic_words.add_term(magic_word, ());
63		}
64		for namespace in source.category_namespaces {
65			configuration
66				.namespaces
67				.add_term(&format!("{}:", namespace), Namespace::Category);
68		}
69		for namespace in source.file_namespaces {
70			configuration
71				.namespaces
72				.add_term(&format!("{}:", namespace), Namespace::File);
73		}
74		for redirect_magic_word in source.redirect_magic_words {
75			configuration
76				.redirect_magic_words
77				.add_term(redirect_magic_word, ());
78		}
79		for tag_name in source.extension_tags {
80			configuration
81				.tag_name_map
82				.insert(tag_name.to_string(), crate::TagClass::ExtensionTag);
83		}
84		for tag_name in [
85			"abbr",
86			"b",
87			"bdi",
88			"bdo",
89			"blockquote",
90			"br",
91			"caption",
92			"center",
93			"cite",
94			"code",
95			"data",
96			"dd",
97			"del",
98			"dfn",
99			"div",
100			"dl",
101			"dt",
102			"em",
103			"font",
104			"h1",
105			"h2",
106			"h3",
107			"h4",
108			"h5",
109			"h6",
110			"hr",
111			"i",
112			"ins",
113			"kbd",
114			"li",
115			"mark",
116			"ol",
117			"p",
118			"pre",
119			"q",
120			"rb",
121			"rp",
122			"rt",
123			"ruby",
124			"s",
125			"samp",
126			"small",
127			"span",
128			"strike",
129			"strong",
130			"sub",
131			"sup",
132			"table",
133			"td",
134			"th",
135			"time",
136			"tr",
137			"tt",
138			"u",
139			"ul",
140			"var",
141			"wbr",
142		]
143		.iter()
144		{
145			configuration
146				.tag_name_map
147				.insert(tag_name.to_string(), crate::TagClass::Tag);
148		}
149		configuration
150	}
151
152	/// Parses wiki text into structured data with a user defined timeout.
153	#[must_use]
154	pub fn parse_with_timeout<'a>(
155		&self,
156		wiki_text: &'a str,
157		max_duration: std::time::Duration,
158	) -> Result<crate::Output<'a>, crate::parse::ParseError<'a>> {
159		crate::parse::parse(self, wiki_text, max_duration)
160	}
161	/// Parses wiki text into structured data with a default timeout of 5 seconds.
162	#[must_use]
163	pub fn parse<'a>(
164		&self,
165		wiki_text: &'a str,
166	) -> Result<crate::Output<'a>, crate::parse::ParseError<'a>> {
167		crate::parse::parse(self, wiki_text, std::time::Duration::from_secs(5))
168	}
169	/// Parses wiki text into structured data with no time out.
170	/// This function may run for extremely long lengths of time on certain articles
171	#[must_use]
172	pub fn parse_without_timeout<'a>(
173		&self,
174		wiki_text: &'a str,
175	) -> Result<crate::Output<'a>, crate::parse::ParseError<'a>> {
176		crate::parse::parse(self, wiki_text, std::time::Duration::ZERO)
177	}
178}
179
180impl Default for crate::Configuration {
181	/// Allocates and returns a configuration suitable for testing and quick and dirty prototyping. For correctly parsing an actual wiki, please get the correct site configuration for that particular wiki.
182	fn default() -> Self {
183		crate::default::create_configuration()
184	}
185}