use std::borrow::Cow;
use std::collections::HashMap;
use ego_tree::NodeId;
use crate::options::Options;
#[derive(Debug, Clone)]
pub struct ListMetadata {
prefix: String,
prefix_width: usize,
parent_indent: usize,
}
impl ListMetadata {
#[inline]
#[must_use]
pub fn prefix(&self) -> &str {
&self.prefix
}
#[inline]
#[must_use]
pub const fn prefix_width(&self) -> usize {
self.prefix_width
}
#[inline]
#[must_use]
pub const fn parent_indent(&self) -> usize {
self.parent_indent
}
}
#[derive(Debug)]
pub struct Context<'a> {
pub(crate) options: Options,
pub(crate) list_metadata: HashMap<NodeId, ListMetadata>,
pub(crate) in_pre: bool,
pub(crate) domain: Option<&'a str>,
pub(crate) references: Vec<String>,
pub(crate) link_index: usize,
}
impl<'a> Context<'a> {
pub(crate) fn new(options: Options, domain: Option<&'a str>) -> Self {
Self {
options,
list_metadata: HashMap::new(),
in_pre: false,
domain,
references: Vec::new(),
link_index: 0,
}
}
#[inline]
#[must_use]
pub const fn options(&self) -> &Options {
&self.options
}
#[inline]
#[must_use]
pub const fn in_pre(&self) -> bool {
self.in_pre
}
#[inline]
pub(crate) const fn set_in_pre(&mut self, value: bool) {
self.in_pre = value;
}
#[inline]
#[must_use]
pub fn list_metadata(&self, id: NodeId) -> Option<&ListMetadata> {
self.list_metadata.get(&id)
}
#[inline]
#[must_use]
pub const fn domain(&self) -> Option<&str> {
self.domain
}
#[must_use]
pub fn resolve_url<'u>(&self, raw_url: &'u str) -> Cow<'u, str> {
let Some(domain) = self.domain else {
return Cow::Borrowed(raw_url);
};
if domain.is_empty() {
return Cow::Borrowed(raw_url);
}
if url::Url::parse(raw_url).is_ok() {
return Cow::Borrowed(raw_url);
}
let base_str = if domain.contains("://") {
Cow::Borrowed(domain)
} else {
Cow::Owned(format!("https://{domain}"))
};
let Ok(base) = url::Url::parse(&base_str) else {
return Cow::Borrowed(raw_url);
};
base.join(raw_url)
.map_or(Cow::Borrowed(raw_url), |u| Cow::Owned(u.to_string()))
}
#[inline]
#[must_use]
pub(crate) const fn next_link_index(&self) -> usize {
self.link_index + 1
}
pub fn push_reference(&mut self, reference: String) -> usize {
self.link_index += 1;
self.references.push(reference);
self.link_index
}
#[inline]
#[must_use]
pub(crate) const fn has_references(&self) -> bool {
!self.references.is_empty()
}
#[must_use]
pub(crate) fn take_references(&mut self) -> String {
let result = self.references.join("\n");
self.references.clear();
self.link_index = 0;
result
}
pub(crate) fn annotate_lists(&mut self, root_id: NodeId, document: &scraper::Html) {
Self::annotate_list_node(root_id, document, self, 0);
}
fn annotate_list_node(
node_id: NodeId,
document: &scraper::Html,
ctx: &mut Self,
parent_indent: usize,
) {
let Some(node_ref) = document.tree.get(node_id) else {
return;
};
let is_list = node_ref.value().as_element().is_some_and(|el| {
let name = el.name();
name == "ul" || name == "ol"
});
if !is_list {
for child in node_ref.children() {
Self::annotate_list_node(child.id(), document, ctx, parent_indent);
}
return;
}
let el = node_ref.value().as_element();
let is_ordered = el.is_some_and(|e| e.name() == "ol");
let start: usize = el
.and_then(|e| e.attr("start"))
.and_then(|s| s.parse().ok())
.unwrap_or(1);
let li_count = node_ref
.children()
.filter(|c| c.value().as_element().is_some_and(|e| e.name() == "li"))
.count();
let max_number = start + li_count.saturating_sub(1);
let number_width = if is_ordered {
digit_count(max_number)
} else {
0
};
let mut item_index = 0usize;
for child in node_ref.children() {
if child.value().as_element().is_none_or(|e| e.name() != "li") {
continue;
}
let prefix = if is_ordered {
let num = start + item_index;
format!("{num:>number_width$}. ")
} else {
format!("{} ", ctx.options.bullet_marker().char())
};
let prefix_width = prefix.len();
ctx.list_metadata.insert(
child.id(),
ListMetadata {
prefix,
prefix_width,
parent_indent,
},
);
Self::annotate_list_node(child.id(), document, ctx, parent_indent + prefix_width);
item_index += 1;
}
}
}
#[inline]
const fn digit_count(n: usize) -> usize {
match n.checked_ilog10() {
Some(log) => log as usize + 1,
None => 1,
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use super::*;
fn make_ctx(domain: Option<&str>) -> Context<'_> {
Context::new(Options::default(), domain)
}
#[test]
fn resolve_no_domain() {
let ctx = make_ctx(None);
assert_eq!(ctx.resolve_url("/about"), "/about");
assert!(matches!(ctx.resolve_url("/about"), Cow::Borrowed(_)));
}
#[test]
fn resolve_empty_domain() {
let ctx = make_ctx(Some(""));
assert_eq!(ctx.resolve_url("/about"), "/about");
}
#[test]
fn resolve_absolute_url_unchanged() {
let ctx = make_ctx(Some("example.com"));
let r = ctx.resolve_url("https://other.com/page");
assert_eq!(r, "https://other.com/page");
assert!(matches!(r, Cow::Borrowed(_)));
}
#[test]
fn resolve_relative_with_bare_domain() {
let ctx = make_ctx(Some("example.com"));
assert_eq!(ctx.resolve_url("/about"), "https://example.com/about");
}
#[test]
fn resolve_relative_with_protocol() {
let ctx = make_ctx(Some("https://example.com"));
assert_eq!(ctx.resolve_url("/about"), "https://example.com/about");
}
#[test]
fn resolve_bare_domain_uses_https() {
let ctx = make_ctx(Some("example.com"));
assert_eq!(ctx.resolve_url("/path"), "https://example.com/path");
}
#[test]
fn resolve_protocol_relative_url() {
let ctx = make_ctx(Some("https://example.com"));
assert_eq!(
ctx.resolve_url("//cdn.example.com/a.js"),
"https://cdn.example.com/a.js"
);
}
#[test]
fn push_reference_increments_index() {
let mut ctx = make_ctx(None);
assert_eq!(ctx.push_reference("[1]: https://a.com".to_owned()), 1);
assert_eq!(ctx.push_reference("[2]: https://b.com".to_owned()), 2);
assert!(ctx.has_references());
}
#[test]
fn take_references_joins_and_resets() {
let mut ctx = make_ctx(None);
ctx.push_reference("[1]: https://a.com".to_owned());
ctx.push_reference("[2]: https://b.com".to_owned());
let refs = ctx.take_references();
assert_eq!(refs, "[1]: https://a.com\n[2]: https://b.com");
assert!(!ctx.has_references());
assert_eq!(ctx.link_index, 0);
}
#[test]
fn take_references_empty() {
let mut ctx = make_ctx(None);
assert!(!ctx.has_references());
assert_eq!(ctx.take_references(), "");
}
}