parsoid 0.10.1

Wrapper around Parsoid HTML that provides convenient accessors for processing and manipulation
Documentation
// SPDX-FileCopyrightText: 2023-2024 Kunal Mehta <legoktm@debian.org>
// SPDX-License-Identifier: GPL-3.0-or-later
//! `<ref>` and `<references/>` tags are used to generate citation footnotes.
//!
//! There are three nodes here:
//! * [`Reference`]: the contents of the citation, i.e. the content inside `<ref>`
//! * [`ReferenceLink`]: the `[1]` link that goes to the citation, aka the "[cue](https://en.wikipedia.org/wiki/Note_(typography)#Location)"
//! * [`ReferenceList`]: the list of citations, i.e. `<references/>`
//!
//! See the [specification](https://www.mediawiki.org/wiki/Specs/HTML/2.8.0/Extensions/Cite) for more details.

use crate::{
    assert_element, inner_data, node::Wikinode, set_inner_data, Result,
    WikinodeIterator,
};
use kuchikikiki::NodeRef;
use serde::{Deserialize, Serialize};

/// Represents the citation contained by a ref tag (`<ref>`)
///
/// See the [spec](https://www.mediawiki.org/wiki/Specs/HTML/2.8.0/Extensions/Cite) for more details.
#[derive(Debug, Clone)]
pub struct Reference(pub(crate) NodeRef);

impl Reference {
    pub(crate) fn new_from_node(element: &NodeRef) -> Self {
        assert_element(element);
        Self(element.clone())
    }

    /// Contents of this reference, i.e. what is in between the `<ref></ref>` tags.
    pub fn contents(&self) -> Wikinode {
        self.select_first(".mw-reference-text")
            .expect("no .mw-reference-text found")
    }

    /// Get the ID of this reference. It correponds to [`ReferenceLink::reference_id()`].
    pub fn id(&self) -> String {
        self.contents()
            .as_element()
            .unwrap()
            .attributes
            .borrow()
            .get("id")
            .unwrap()
            .to_string()
    }

    /// IDs that point back to the links to this reference, corresponds with [`ReferenceLink::id()`].
    pub fn referenced_by_ids(&self) -> Vec<String> {
        // Find the backlink nodes, then parse the anchor out of the href
        self.select(".mw-cite-backlink > a")
            .into_iter()
            .map(|node| {
                node.as_element()
                    .unwrap()
                    .attributes
                    .borrow()
                    .get("href")
                    .unwrap()
                    .split_once('#')
                    .unwrap()
                    .1
                    .to_string()
            })
            .collect()
    }
}

/// Represents the link generated by a ref tag (`<ref>`) that points to the citation
///
/// See the [spec](https://www.mediawiki.org/wiki/Specs/HTML/2.8.0/Extensions/Cite) for more details.
#[derive(Debug, Clone)]
pub struct ReferenceLink(pub(crate) NodeRef);

impl ReferenceLink {
    pub(crate) const TYPEOF: &'static str = "mw:Extension/ref";
    pub(crate) const SELECTOR: &'static str = "[typeof=\"mw:Extension/ref\"]";

    pub(crate) fn new_from_node(element: &NodeRef) -> Self {
        assert_element(element);
        Self(element.clone())
    }

    /// Get the ID of this link, corresponds to [`Reference::referenced_by_ids()`].
    pub fn id(&self) -> String {
        self.as_element()
            .unwrap()
            .attributes
            .borrow()
            .get("id")
            .unwrap()
            .to_string()
    }

    /// Name of the reference, if one is set
    pub fn name(&self) -> Result<Option<String>> {
        Ok(self.inner()?.attrs.name)
    }

    /// Set the name for this reference
    pub fn set_name(&self, name: String) -> Result<()> {
        let mut inner = self.inner()?;
        inner.attrs.name = Some(name);
        self.set_inner(inner)?;
        Ok(())
    }

    /// Remove the name for this reference (if set)
    pub fn remove_name(&self) -> Result<()> {
        let mut inner = self.inner()?;
        inner.attrs.name = None;
        self.set_inner(inner)?;
        Ok(())
    }

    /// Group the reference is in, if one is set
    pub fn group(&self) -> Result<Option<String>> {
        Ok(self.inner()?.attrs.group)
    }

    /// Set the group for this reference
    pub fn set_group(&self, group: String) -> Result<()> {
        let mut inner = self.inner()?;
        inner.attrs.group = Some(group);
        self.set_inner(inner)?;
        Ok(())
    }

    /// Remove the group for this reference
    pub fn remove_group(&self) -> Result<()> {
        let mut inner = self.inner()?;
        inner.attrs.group = None;
        self.set_inner(inner)?;
        Ok(())
    }

    /// Whether this reference link is reusing one that came earlier
    pub fn is_reused(&self) -> Result<bool> {
        Ok(self.inner()?.body.is_none())
    }

    /// ID of the Reference that corresponds to this link, aka [`Reference::id()`].
    pub fn reference_id(&self) -> Result<String> {
        let id = match self.inner()?.body {
            Some(body) => body.id,
            None => {
                // Since we have no body (reused), we have to grab the ID
                // from the href of the link
                let part = self
                    .select_first("a[href]")
                    .unwrap()
                    .as_element()
                    .unwrap()
                    .attributes
                    .borrow()
                    .get("href")
                    .unwrap()
                    .split_once('#')
                    .unwrap()
                    .1
                    .to_string();
                format!("mw-reference-text-{part}")
            }
        };
        Ok(id)
    }

    fn inner(&self) -> Result<RefDataMw> {
        inner_data(self)
    }

    fn set_inner(&self, data: RefDataMw) -> Result<()> {
        set_inner_data(self, data)
    }
}

#[derive(Deserialize, Serialize)]
struct RefDataMw {
    name: String,
    attrs: RefAttrs,
    #[serde(skip_serializing_if = "Option::is_none")]
    body: Option<RefBody>,
}

#[derive(Deserialize, Serialize)]
struct RefAttrs {
    #[serde(skip_serializing_if = "Option::is_none")]
    group: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    name: Option<String>,
}

#[derive(Deserialize, Serialize)]
struct RefBody {
    id: String,
}

/// Represents a reference list tag (`<references/>`)
///
/// See the [spec](https://www.mediawiki.org/wiki/Specs/HTML/2.8.0/Extensions/Cite) for more details.
#[derive(Debug, Clone)]
pub struct ReferenceList(pub(crate) NodeRef);

impl ReferenceList {
    pub(crate) const TYPEOF: &'static str = "mw:Extension/references";
    pub(crate) const SELECTOR: &'static str =
        "[typeof=\"mw:Extension/references\"]";

    pub(crate) fn new_from_node(element: &NodeRef) -> Self {
        assert_element(element);
        Self(element.clone())
    }

    /// Get a list of References in this list
    pub fn references(&self) -> Vec<Reference> {
        self.select("ol.mw-references > li")
            .into_iter()
            .map(|node| Reference::new_from_node(&node))
            .collect()
    }

    /// Find a specific `Reference`, given its ID
    pub fn find(&self, id: &str) -> Option<Reference> {
        self.references().into_iter().find(|ref_| ref_.id() == id)
    }

    /// Group this reference list is showing, if one is set
    pub fn group(&self) -> Result<Option<String>> {
        Ok(self.inner()?.attrs.group)
    }

    /// Set the group for this reference list
    pub fn set_group(&self, group: String) -> Result<()> {
        let mut inner = self.inner()?;
        inner.attrs.group = Some(group);
        self.set_inner(inner)?;
        Ok(())
    }

    /// Remove the group for this reference list
    pub fn remove_group(&self) -> Result<()> {
        let mut inner = self.inner()?;
        inner.attrs.group = None;
        self.set_inner(inner)?;
        Ok(())
    }

    /// If this reference list is automatically generated or explicitly in the wikitext
    pub fn is_auto_generated(&self) -> Result<bool> {
        Ok(self.inner()?.auto_generated)
    }

    fn inner(&self) -> Result<ReferencesListDataMw> {
        inner_data(self)
    }

    fn set_inner(&self, data: ReferencesListDataMw) -> Result<()> {
        set_inner_data(self, data)
    }
}

#[derive(Deserialize, Serialize)]
pub(crate) struct ReferencesListDataMw {
    pub(crate) name: String,
    pub(crate) attrs: ReferencesListAttrs,
    #[serde(rename = "autoGenerated")]
    #[serde(default)]
    pub(crate) auto_generated: bool,
}

#[derive(Deserialize, Serialize)]
pub(crate) struct ReferencesListAttrs {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) group: Option<String>,
}