1use crate::{
2 document::{Document, HeaderKind},
3 parser::{Parser, WikipediaParser},
4 Endpoint,
5};
6use anyhow::{anyhow, Context, Result};
7use reqwest::{Client, Response};
8use scraper::Html;
9use serde::{Deserialize, Serialize};
10use std::fmt::Display;
11use tracing::{debug, warn};
12use url::Url;
13use uuid::Uuid;
14
15use super::languages::Language;
16
17pub mod link_data {
18 use crate::{languages::Language, search::Namespace, Endpoint};
19 use serde::{Deserialize, Serialize};
20 use url::Url;
21
22 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
23 pub struct InternalData {
24 pub namespace: Namespace,
25 pub page: String,
26 pub title: String,
27 pub endpoint: Endpoint,
28 pub language: Language,
29 pub anchor: Option<AnchorData>,
30 }
31
32 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
33 pub struct AnchorData {
34 pub anchor: String,
35 pub title: String,
36 }
37
38 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
39 pub struct RedLinkData {
40 pub url: Url,
41 pub title: String,
42 }
43
44 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45 pub struct MediaData {
46 pub url: Url,
47 pub title: String,
48 }
49
50 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
51 pub struct ExternalData {
52 pub url: Url,
53 }
54
55 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
56 pub struct ExternalToInteralData {}
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
60pub enum Link {
61 Internal(link_data::InternalData),
63 Anchor(link_data::AnchorData),
67 RedLink(link_data::RedLinkData),
69 MediaLink(link_data::MediaData),
71 External(link_data::ExternalData),
73 ExternalToInternal(link_data::ExternalToInteralData),
75}
76
77impl Link {
78 pub fn title(&self) -> Option<&str> {
79 match self {
80 Link::Anchor(link_data) => Some(&link_data.title),
81 Link::RedLink(link_data) => Some(&link_data.title),
82 &Link::External(_) => None,
83 &Link::ExternalToInternal(_) => None,
84 Link::MediaLink(link_data) => Some(&link_data.title),
85 Link::Internal(link_data) => Some(&link_data.title),
86 }
87 }
88}
89
90#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92pub struct LanguageLink {
93 #[serde(rename = "langname")]
94 pub name: String,
95 #[serde(rename = "lang")]
96 pub language: Language,
97 pub autonym: String,
98 pub title: String,
99 pub url: Url,
100 pub endpoint: Endpoint,
101}
102
103#[derive(Debug, Deserialize, Clone, PartialEq, Eq, Serialize)]
104pub struct Section {
105 #[serde(skip_deserializing)]
106 pub index: usize,
107 #[serde(rename = "toclevel")]
108 pub header_kind: HeaderKind,
109 #[serde(rename = "line")]
110 pub text: String,
111 pub number: String,
112 pub anchor: String,
113}
114
115#[derive(Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
116pub struct Page {
117 pub title: String,
118 pub pageid: usize,
119 pub content: Document,
120 pub language: Language,
121 pub language_links: Option<Vec<LanguageLink>>,
122 pub sections: Option<Vec<Section>>,
123 pub revision_id: Option<usize>,
124 pub uuid: Uuid,
125}
126
127impl Page {
128 #[cfg(debug_assertions)]
129 pub fn from_path(path: &std::path::PathBuf) -> Option<Page> {
130 if !path.exists() {
131 return None;
132 }
133
134 let content = std::fs::read_to_string(path).ok()?;
135 let nodes = WikipediaParser::parse_document(
136 &content,
137 url::Url::parse("https://en.wikipedia.org/w/api.php").ok()?,
138 Language::default(),
139 )
140 .nodes();
141
142 Some(Page {
143 title: "DEBUG: FILE".to_string(),
144 pageid: 0,
145 content: Document { nodes },
146 language: Language::default(),
147 language_links: None,
148 sections: None,
149 revision_id: None,
150 uuid: Uuid::new_v4(),
151 })
152 }
153
154 pub fn builder() -> PageBuilder<NoPageID, NoPage, NoEndpoint, NoLanguage> {
155 PageBuilder::default()
156 }
157
158 pub fn available_languages(&self) -> Option<usize> {
159 if let Some(ref links) = self.language_links {
160 return Some(links.len());
161 }
162 None
163 }
164
165 pub fn sections(&self) -> Option<&Vec<Section>> {
166 if let Some(ref sections) = self.sections {
167 return Some(sections);
168 }
169 None
170 }
171}
172
173impl std::fmt::Debug for Page {
174 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
175 f.debug_struct("Page")
176 .field("title", &self.title)
177 .field("pageid", &self.pageid)
178 .field("content", &self.content)
179 .field("language", &self.language)
180 .field("language_links", &self.language_links.is_some())
181 .field("sections", &self.sections.is_some())
182 .field("revision_id", &self.revision_id)
183 .finish()
184 }
185}
186
187#[derive(Clone)]
188pub enum Property {
190 Text,
192 LangLinks,
194 Categories,
196 CategoriesHTML,
198 Templates,
200 Images,
202 ExternalLinks,
204 Sections,
206 RevID,
208 DisplayTitle,
210 Subtitle,
212 HeadHTML,
214 Indicators,
216 InterwikiLinks,
218 Wikitext,
220 Properties,
222 LimitReportData,
224 LimitReportHTML,
226 ParseTree,
228 ParseWarnings,
230 ParseWarningsHTML,
232}
233
234impl Display for Property {
235 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
236 match self {
237 Property::Text => write!(f, "text"),
238 Property::LangLinks => write!(f, "langlinks"),
239 Property::Categories => write!(f, "categories"),
240 Property::CategoriesHTML => write!(f, "categorieshtml"),
241 Property::Templates => write!(f, "templates"),
242 Property::Images => write!(f, "images"),
243 Property::ExternalLinks => write!(f, "externallinks"),
244 Property::Sections => write!(f, "sections"),
245 Property::RevID => write!(f, "revid"),
246 Property::DisplayTitle => write!(f, "displaytitle"),
247 Property::Subtitle => write!(f, "subtitle"),
248 Property::HeadHTML => write!(f, "headhtml"),
249 Property::Indicators => write!(f, "indicators"),
250 Property::InterwikiLinks => write!(f, "iwlinks"),
251 Property::Wikitext => write!(f, "wikitext"),
252 Property::Properties => write!(f, "properties"),
253 Property::LimitReportData => write!(f, "limitreportdata"),
254 Property::LimitReportHTML => write!(f, "limitreporthtml"),
255 Property::ParseTree => write!(f, "parsetree"),
256 Property::ParseWarnings => write!(f, "parsewarnings"),
257 Property::ParseWarningsHTML => write!(f, "parsewarningshtml"),
258 }
259 }
260}
261
262pub struct WithPageID(usize);
263#[derive(Default)]
264pub struct NoPageID;
265
266pub struct WithPage(String);
267#[derive(Default)]
268pub struct NoPage;
269
270pub struct WithEndpoint(Url);
271#[derive(Default)]
272pub struct NoEndpoint;
273
274pub struct WithLanguage(Language);
275#[derive(Default)]
276pub struct NoLanguage;
277
278#[derive(Default)]
279pub struct PageBuilder<I, P, E, L> {
280 pageid: I,
281 page: P,
282 endpoint: E,
283 language: L,
284 revision: Option<usize>,
285 redirects: Option<bool>,
286 properties: Option<Vec<Property>>,
287}
288
289pub type PageRequest = PageBuilder<NoPageID, WithPage, WithEndpoint, WithLanguage>;
290pub type PageRequestID = PageBuilder<WithPageID, NoPage, WithEndpoint, WithLanguage>;
291
292impl<E, L> PageBuilder<NoPageID, NoPage, E, L> {
293 pub fn pageid(self, pageid: usize) -> PageBuilder<WithPageID, NoPage, E, L> {
295 PageBuilder {
296 pageid: WithPageID(pageid),
297 page: self.page,
298 endpoint: self.endpoint,
299 revision: self.revision,
300 redirects: self.redirects,
301 properties: self.properties,
302 language: self.language,
303 }
304 }
305
306 pub fn page(self, page: impl Into<String>) -> PageBuilder<NoPageID, WithPage, E, L> {
308 PageBuilder {
309 pageid: self.pageid,
310 page: WithPage(page.into()),
311 endpoint: self.endpoint,
312 revision: self.revision,
313 redirects: self.redirects,
314 properties: self.properties,
315 language: self.language,
316 }
317 }
318}
319
320impl<I, P, L> PageBuilder<I, P, NoEndpoint, L> {
321 pub fn url(self, url: impl Into<Url>) -> PageBuilder<I, P, WithEndpoint, L> {
322 PageBuilder {
323 pageid: self.pageid,
324 page: self.page,
325 endpoint: WithEndpoint(url.into()),
326 revision: self.revision,
327 redirects: self.redirects,
328 properties: self.properties,
329 language: self.language,
330 }
331 }
332
333 pub fn endpoint(self, endpoint: Url) -> PageBuilder<I, P, WithEndpoint, L> {
334 PageBuilder {
335 pageid: self.pageid,
336 page: self.page,
337 endpoint: WithEndpoint(endpoint),
338 revision: self.revision,
339 redirects: self.redirects,
340 properties: self.properties,
341 language: self.language,
342 }
343 }
344}
345
346impl<I, P, E> PageBuilder<I, P, E, NoLanguage> {
347 pub fn language(self, language: Language) -> PageBuilder<I, P, E, WithLanguage> {
348 PageBuilder {
349 pageid: self.pageid,
350 page: self.page,
351 endpoint: self.endpoint,
352 language: WithLanguage(language),
353 revision: self.revision,
354 redirects: self.redirects,
355 properties: self.properties,
356 }
357 }
358}
359
360impl<I, P, U, L> PageBuilder<I, P, U, L> {
361 pub fn revision(mut self, revision: usize) -> Self {
363 self.revision = Some(revision);
364 self
365 }
366
367 pub fn redirects(mut self, redirects: bool) -> Self {
369 self.redirects = Some(redirects);
370 self
371 }
372
373 pub fn properties(mut self, properties: Vec<Property>) -> Self {
375 self.properties = Some(properties);
376 self
377 }
378}
379
380impl<I, P> PageBuilder<I, P, WithEndpoint, WithLanguage> {
381 async fn fetch_with_params(self, mut params: Vec<(&str, String)>) -> Result<Page> {
382 async fn action_parse(params: Vec<(&str, String)>, endpoint: Url) -> Result<Response> {
383 Client::new()
384 .get(endpoint)
385 .header(
386 "User-Agent",
387 format!(
388 "wiki-tui/{} (https://github.com/Builditluc/wiki-tui)",
389 env!("CARGO_PKG_VERSION")
390 ),
391 )
392 .query(&[
393 ("action", "parse"),
394 ("format", "json"),
395 ("formatversion", "2"),
396 ("parsoid", "true"),
397 ])
398 .query(¶ms)
399 .send()
400 .await
401 .inspect(|response| {
402 debug!("response url: '{}'", response.url().as_str());
403 })
404 .context("failed sending the request")
405 }
406
407 if let Some(revision) = self.revision {
408 params.push(("revid", revision.to_string()));
409 }
410
411 if let Some(redirects) = self.redirects {
412 params.push(("redirects", redirects.to_string()));
413 }
414
415 if let Some(ref prop) = self.properties {
416 let mut prop_str = String::new();
417 for prop in prop {
418 prop_str.push('|');
419 prop_str.push_str(&prop.to_string())
420 }
421 params.push(("prop", prop_str));
422 }
423
424 let response = action_parse(params, self.endpoint.0.clone())
425 .await?
426 .error_for_status()
427 .context("the server returned an error")?;
428
429 let res_json: serde_json::Value = serde_json::from_str(
430 &response
431 .text()
432 .await
433 .context("failed reading the response")?,
434 )
435 .context("failed interpreting the response as json")?;
436
437 self.serialize_result(res_json)
438 .context("failed serializing the returned response")
439 }
440
441 fn serialize_result(self, res_json: serde_json::Value) -> Result<Page> {
442 let title = res_json
443 .get("parse")
444 .and_then(|x| x.get("title"))
445 .and_then(|x| x.as_str())
446 .map(|x| x.to_string())
447 .ok_or_else(|| anyhow!("missing the title"))?;
448
449 let pageid = res_json
450 .get("parse")
451 .and_then(|x| x.get("pageid"))
452 .and_then(|x| x.as_u64())
453 .map(|x| x as usize)
454 .ok_or_else(|| anyhow!("missing the pageid"))?;
455
456 let endpoint = self.endpoint.0;
457 let language = self.language.0;
458 let content = res_json
459 .get("parse")
460 .and_then(|x| x.get("text"))
461 .and_then(|x| x.as_str())
462 .map(|x| {
463 let parser = WikipediaParser::parse_document(x, endpoint.clone(), language);
464 Document {
465 nodes: parser.nodes(),
466 }
467 })
468 .ok_or(anyhow!("missing the content or failed parsing the content"))?;
470
471 let language_links = res_json
472 .get("parse")
473 .and_then(|x| x.get("langlinks"))
474 .and_then(|x| x.as_array())
475 .map(|x| x.to_owned())
476 .map(|x| {
477 x.into_iter()
478 .filter_map(|x| {
479 let mut language_link: LanguageLink = serde_json::from_value(x)
480 .map_err(|err| warn!("language_link parsing error: {:?}", err))
481 .ok()?;
482 let mut endpoint = endpoint.clone();
483 let _ = endpoint.set_host(Some(language_link.url.host_str().unwrap()));
484 language_link.endpoint = endpoint;
485 Some(language_link)
486 })
487 .collect::<Vec<LanguageLink>>()
488 })
489 .inspect(|x| {
490 debug!("language_links: '{}'", x.len());
491 });
492
493 let sections = res_json
494 .get("parse")
495 .and_then(|x| x.get("sections"))
496 .and_then(|x| x.as_array())
497 .map(|x| x.to_owned())
498 .map(|x| {
499 x.into_iter()
500 .enumerate()
501 .filter_map(|(i, x)| {
502 serde_json::from_value(x).ok().map(|mut x: Section| {
503 x.index = i + 1;
504 let fragment = Html::parse_document(&x.text);
506 x.text = fragment.root_element().text().collect();
507 x
508 })
509 })
510 .collect::<Vec<Section>>()
511 })
512 .map(|mut x| {
513 x.insert(
514 0,
515 Section {
516 index: 0,
517 header_kind: HeaderKind::Main,
518 text: "(Top)".to_string(),
519 number: "".to_string(),
520 anchor: "Content_Top".to_string(),
521 },
522 );
523 x
524 });
525
526 let revision_id = res_json
527 .get("parse")
528 .and_then(|x| x.get("revid"))
529 .and_then(|x| x.as_u64())
530 .map(|x| x as usize);
531
532 Ok(Page {
533 title,
534 pageid,
535 content,
536 language,
537 language_links,
538 sections,
539 revision_id,
540 uuid: Uuid::new_v4(),
541 })
542 }
543}
544
545impl PageBuilder<WithPageID, NoPage, WithEndpoint, WithLanguage> {
546 pub async fn fetch(self) -> Result<Page> {
547 let param = vec![("pageid", self.pageid.0.to_string())];
548 self.fetch_with_params(param).await
549 }
550}
551
552impl PageBuilder<NoPageID, WithPage, WithEndpoint, WithLanguage> {
553 pub async fn fetch(self) -> Result<Page> {
554 let param = vec![("page", self.page.0.to_string())];
555 self.fetch_with_params(param).await
556 }
557}