#[cfg(feature = "http")]
mod api;
pub mod error;
mod iter;
pub mod map {
pub use indexmap::IndexMap;
}
pub mod node;
pub mod prelude {
#[cfg(feature = "http")]
pub use crate::api::Client;
pub use crate::iter::WikinodeIterator;
pub use crate::map;
pub use crate::node::{
BehaviorSwitch, Category, Comment, ExtLink, Heading, HtmlEntity,
InterwikiLink, LanguageLink, Nowiki, Redirect, Section, WikiLink,
Wikinode,
};
pub use crate::template::Template;
pub use crate::{Result, Wikicode};
}
pub mod template;
#[cfg(feature = "http")]
pub use crate::api::Client;
pub use crate::iter::WikinodeIterator;
use crate::node::{Comment, ExtLink, Redirect, Section, WikiLink, Wikinode};
use crate::template::Template;
use kuchiki::traits::*;
use kuchiki::NodeRef;
use markup5ever::QualName;
use std::ops::Deref;
pub type Result<T> = std::result::Result<T, crate::error::Error>;
#[macro_use]
extern crate markup5ever;
fn build_qual_name(tag: &str) -> QualName {
QualName::new(None, ns!(html), tag.into())
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Wikicode {
document: NodeRef,
etag: Option<String>,
}
impl Deref for Wikicode {
type Target = NodeRef;
fn deref(&self) -> &Self::Target {
&self.document
}
}
impl Wikicode {
pub fn new(body: &str) -> Self {
Wikicode {
document: kuchiki::parse_html().one(body),
etag: None,
}
}
pub fn new_fragment(frag: &str) -> Self {
let ctx_name = build_qual_name("span");
Wikicode {
document: kuchiki::parse_fragment(ctx_name, vec![])
.one(frag)
.first_child()
.unwrap()
.first_child()
.unwrap(),
etag: None,
}
}
fn new_from_node(node: &NodeRef) -> Self {
Wikicode {
document: node.clone(),
etag: None,
}
}
pub fn set_etag(&mut self, etag: &str) {
self.etag = Some(etag.to_string());
}
pub fn get_etag(&self) -> Option<&String> {
self.etag.as_ref()
}
pub fn revision_id(&self) -> Option<u32> {
match self.html_element() {
Some(element) => {
match element
.as_element()
.unwrap()
.attributes
.borrow()
.get("about")
{
Some(url) => Some(
url.to_string()
.split('/')
.last()
.unwrap()
.to_string()
.parse()
.unwrap(),
),
None => None,
}
}
None => None,
}
}
pub fn title(&self) -> Option<String> {
match self.document.select_first("title") {
Ok(element) => Some(element.as_node().text_contents()),
Err(_) => None,
}
}
pub fn get_redirect(&self) -> Option<Redirect> {
match self.document.select_first(Redirect::SELECTOR) {
Ok(element) => Some(Redirect::new_from_node(element.as_node())),
Err(_) => None,
}
}
fn html_element(&self) -> Option<NodeRef> {
match self.document.select_first("html") {
Ok(element) => Some(element.as_node().clone()),
Err(_) => None,
}
}
fn body_element(&self) -> Wikinode {
match self.document.select_first("body") {
Ok(element) => Wikinode::new_from_node(&element.as_node()),
Err(_) => Wikinode::Generic(self.clone()),
}
}
pub fn text_contents(&self) -> String {
self.body_element().text_contents()
}
pub fn filter_links(&self) -> Result<Vec<WikiLink>> {
match self.document.select(WikiLink::SELECTOR) {
Ok(select) => Ok(select
.map(|ref_| WikiLink::new_from_node(ref_.as_node()))
.collect()),
Err(_) => Ok(vec![]),
}
}
pub fn filter_external_links(&self) -> Result<Vec<ExtLink>> {
match self.document.select(ExtLink::SELECTOR) {
Ok(select) => Ok(select
.map(|ref_| ExtLink::new_from_node(ref_.as_node()))
.collect()),
Err(_) => Ok(vec![]),
}
}
pub fn filter_templates(&self) -> Result<Vec<Template>> {
let templates = match self.document.select(Template::SELECTOR) {
Ok(select) => {
let mut templates = vec![];
for ref_ in select {
let element = ref_.as_node();
let data: template::Transclusion = serde_json::from_str(
element
.as_element()
.unwrap()
.attributes
.borrow()
.get("data-mw")
.unwrap(),
)?;
for (part_num, part) in data.parts.iter().enumerate() {
if let template::TransclusionPart::Template {
template: _,
} = part
{
templates.push(Template::new_from_node(
&element, part_num,
));
}
}
}
templates
}
Err(_) => vec![],
};
Ok(templates)
}
pub fn filter_comments(&self) -> Result<Vec<Comment>> {
Ok(self
.body_element()
.inclusive_descendants()
.filter_map(|node| node.as_comment())
.collect())
}
pub fn iter_sections(&self) -> Vec<Section> {
match self.document.select(Section::SELECTOR) {
Ok(select) => select
.map(|node| Section::new_from_node(node.as_node()))
.collect(),
Err(_) => vec![],
}
}
}
impl From<Wikinode> for Wikicode {
fn from(node: Wikinode) -> Self {
Wikicode::new_from_node(node.as_node())
}
}
impl WikinodeIterator for Wikicode {
fn as_node(&self) -> &NodeRef {
&self.document
}
}
#[cfg(test)]
mod tests {
use crate::prelude::*;
use crate::{map::IndexMap, Result};
fn build_client() -> Client {
Client::new(
"https://www.mediawiki.org/api/rest_v1",
"parsoid-rs testing",
)
.unwrap()
}
#[test]
fn test_fragment() {
assert_eq!(
Wikicode::new_fragment("foo").to_string(),
"foo".to_string()
);
assert_eq!(
Wikicode::new_fragment("<b>bar</b>").to_string(),
"<b>bar</b>".to_string()
);
}
#[tokio::test]
#[should_panic]
async fn test_serialize() {
let client = build_client();
let html = client.get_raw("User:Legoktm").await.unwrap();
let code = Wikicode::new(&html);
assert_eq!(code.to_string(), html);
}
#[tokio::test]
async fn test_templates() -> Result<()> {
let client = build_client();
let code = client.get("MediaWiki").await?;
let mut found = false;
for template in code.filter_templates()? {
if template.name() == "Main page" {
found = true;
}
}
assert!(found);
Ok(())
}
#[tokio::test]
async fn test_more_cases() -> Result<()> {
let client = build_client();
let code = client
.transform_to_html(
"{{1x|param<!--comment-->name=value|normal=value2}}{{#if:{{{1}}}|foo|bar}}",
)
.await?;
let templates = code.filter_templates()?;
let temp = &templates[0];
assert!(temp.is_template());
assert!(!temp.is_parser_function());
assert_eq!(temp.normalized_name(), "./Template:1x");
let mut params = IndexMap::new();
params.insert("normal".to_string(), "value2".to_string());
params.insert("paramname".to_string(), "value".to_string());
assert_eq!(temp.get_params(), params);
assert_eq!(temp.get_param("paramname"), Some("value".to_string()));
assert_eq!(temp.get_param("notset"), None);
assert_eq!(
temp.get_param_in_wikitext("paramname"),
Some("param<!--comment-->name".to_string())
);
assert_eq!(
temp.get_param_in_wikitext("normal"),
Some("normal".to_string())
);
assert_eq!(temp.get_param_in_wikitext("notset"), None);
let pf = &templates[1];
assert!(pf.is_parser_function());
assert!(!pf.is_template());
assert_eq!(pf.normalized_name(), "if");
Ok(())
}
#[tokio::test]
async fn test_template_mutation() -> Result<()> {
let client = build_client();
let original = "{{1x|foo=bar}}";
let code = client.transform_to_html(original).await?;
let mut templates = code.filter_templates()?;
let temp = &mut templates[0];
temp.set_param("new", "wikitext")?;
let html = client.transform_to_wikitext(&code).await?;
assert_eq!(html, "{{1x|foo=bar|new=wikitext}}".to_string());
temp.remove_param("new")?;
let new_html = client.transform_to_wikitext(&code).await?;
assert_eq!(new_html, original.to_string());
Ok(())
}
#[tokio::test]
async fn test_text_contents() -> Result<()> {
let client = build_client();
let code = client.get("User:Legoktm/parsoid-rs/strip_code").await?;
assert_eq!(
code.text_contents(),
"This is some formatted code. Also a link.".to_string()
);
Ok(())
}
#[tokio::test]
async fn test_wikilinks() -> Result<()> {
let client = build_client();
let code = client.transform_to_html("[[Main Page|link text]]").await?;
let links = code.filter_links()?;
let link = &links[0];
assert_eq!(link.target(), "./Main_Page".to_string());
assert_eq!(link.text_contents(), "link text".to_string());
assert_eq!(
&link.to_string(),
"<a class=\"mw-redirect\" href=\"./Main_Page\" id=\"mwAw\" rel=\"mw:WikiLink\" title=\"Main Page\">link text</a>"
);
link.set_target("./MediaWiki");
assert_eq!(link.target(), "./MediaWiki".to_string());
assert!(code.to_string().contains("href=\"./MediaWiki\""));
let wikitext =
client.transform_to_wikitext_raw(&code.to_string()).await?;
assert_eq!(wikitext, "[[MediaWiki|link text]]".to_string());
Ok(())
}
#[tokio::test]
async fn test_new_link() -> Result<()> {
let client = build_client();
let link = WikiLink::new("./Foo", &Wikicode::new_fragment("bar"));
assert_eq!(
&link.to_string(),
"<a href=\"./Foo\" rel=\"mw:WikiLink\">bar</a>"
);
let code = Wikicode::new("");
code.append(&link);
let new_wikitext = client.transform_to_wikitext(&code).await?;
assert_eq!(new_wikitext, "[[Foo|bar]]".to_string());
Ok(())
}
#[tokio::test]
async fn test_external_links() -> Result<()> {
let client = build_client();
let code = client
.transform_to_html("[https://example.com Link content] ")
.await?;
let links = code.filter_external_links()?;
let link = &links[0];
assert_eq!(link.target(), "https://example.com".to_string());
assert_eq!(link.text_contents(), "Link content".to_string());
assert_eq!(
&link.to_string(),
"<a class=\"external text\" href=\"https://example.com\" id=\"mwAw\" rel=\"mw:ExtLink\">Link content</a>"
);
link.set_target("https://wiki.example.org/foo?query=1");
assert_eq!(
link.target(),
"https://wiki.example.org/foo?query=1".to_string()
);
let wikitext =
client.transform_to_wikitext_raw(&code.to_string()).await?;
assert_eq!(
wikitext,
"[https://wiki.example.org/foo?query=1 Link content] ".to_string()
);
Ok(())
}
#[tokio::test]
async fn test_comments() -> Result<()> {
let client = build_client();
let code = client.transform_to_html("<!--comment-->").await?;
let comments = code.filter_comments()?;
let comment = &comments[0];
assert_eq!(comment.text(), "comment".to_string());
comment.set_text(" new ");
assert_eq!(comment.text(), " new ".to_string());
assert!(code.to_string().contains("<!-- new -->"));
Ok(())
}
#[tokio::test]
async fn test_properties() -> Result<()> {
let client = build_client();
let code = client.get("User:Legoktm/archive.txt").await?;
assert_eq!(code.revision_id(), Some(2016428));
assert_eq!(code.title(), Some("User:Legoktm/archive.txt".to_string()));
Ok(())
}
#[tokio::test]
async fn test_iterators() -> Result<()> {
let client = build_client();
let code = client.transform_to_html("This is a [[sentence]].").await?;
let link = code
.descendants()
.filter_map(|node| {
dbg!(&node);
node.as_wikilink()
})
.next()
.unwrap();
assert_eq!(link.target(), "./Sentence".to_string());
assert_eq!(link.text_contents(), "sentence".to_string());
Ok(())
}
#[tokio::test]
async fn test_title() -> Result<()> {
let client = build_client();
let code = client.get("Project:Requests").await?;
assert_eq!(code.title().unwrap(), "Project:Requests".to_string());
Ok(())
}
#[tokio::test]
async fn test_sections() -> Result<()> {
let client = build_client();
let wikitext = r#"
...lead section contents...
== foo=bar ==
...section contents...
=== nested ===
...section contents...
"#;
let code = client.transform_to_html(wikitext).await?;
let sections = code.iter_sections();
{
let section = §ions[0];
assert!(section.is_pseudo_section());
assert_eq!(section.section_id(), 0);
assert!(section.heading().is_none());
}
{
let section = §ions[1];
assert!(!section.is_pseudo_section());
assert_eq!(section.section_id(), 1);
let heading = section.heading().unwrap();
assert_eq!(heading.text_contents(), "foo=bar")
}
{
let section = §ions[2];
assert!(!section.is_pseudo_section());
assert_eq!(section.section_id(), 2);
let heading = section.heading().unwrap();
assert_eq!(heading.text_contents(), "nested")
}
Ok(())
}
#[tokio::test]
async fn test_heading() -> Result<()> {
let client = build_client();
let heading = Heading::new(2, "Some text")?;
let code = Wikicode::new("");
code.append(&heading);
let wikitext = client.transform_to_wikitext(&code).await?;
assert_eq!(&wikitext, "== Some text ==\n");
Ok(())
}
#[tokio::test]
async fn test_category() -> Result<()> {
let client = build_client();
let category = Category::new("Category:Foo", Some("Bar baz#quux"));
let code = Wikicode::new("");
code.append(&category);
let wikitext = client.transform_to_wikitext(&code).await?;
assert_eq!(&wikitext, "[[Category:Foo|Bar baz#quux]]");
Ok(())
}
#[tokio::test]
async fn test_language_link() -> Result<()> {
let client = build_client();
let link = LanguageLink::new("https://en.wikipedia.org/wiki/Foo");
let code = Wikicode::new("");
code.append(&link);
let wikitext = client.transform_to_wikitext(&code).await?;
assert_eq!(&wikitext, "[[en:Foo]]");
Ok(())
}
#[tokio::test]
async fn test_behavior_switch() -> Result<()> {
let client = build_client();
let code = Wikicode::new("");
code.append(&BehaviorSwitch::new("toc", None));
code.append(&BehaviorSwitch::new("displaytitle", Some("foo")));
let wikitext = client.transform_to_wikitext(&code).await?;
assert_eq!(&wikitext, "__TOC__\n{{DISPLAYTITLE:foo}}\n");
Ok(())
}
#[tokio::test]
async fn test_redirect() -> Result<()> {
let client = build_client();
let code = Wikicode::new("");
code.append(&Redirect::new("./Foo"));
assert_eq!(code.get_redirect().unwrap().target(), "./Foo".to_string());
let wikitext = client.transform_to_wikitext(&code).await?;
assert_eq!(&wikitext, "#REDIRECT [[Foo]]");
Ok(())
}
}