use super::helpers::escape_attr;
use crate::plugin::{Plugin, PluginContext};
use anyhow::Result;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct CanonicalPlugin {
base_url: String,
}
impl CanonicalPlugin {
pub fn new(base_url: impl Into<String>) -> Self {
Self {
base_url: base_url.into(),
}
}
}
impl Plugin for CanonicalPlugin {
fn name(&self) -> &'static str {
"canonical"
}
fn has_transform(&self) -> bool {
true
}
fn transform_html(
&self,
html: &str,
path: &Path,
ctx: &PluginContext,
) -> Result<String> {
let base = self.base_url.trim_end_matches('/');
let rel_path = path
.strip_prefix(&ctx.site_dir)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/");
let tag = build_canonical_tag(base, &rel_path);
let mut result = remove_existing_canonicals(html);
result = if let Some(pos) = result.find("</head>") {
format!("{}{}\n{}", &result[..pos], tag, &result[pos..])
} else {
result
};
Ok(result)
}
fn after_compile(&self, _ctx: &PluginContext) -> Result<()> {
Ok(())
}
}
fn build_canonical_tag(base: &str, rel_path: &str) -> String {
let canonical_url = format!("{base}/{rel_path}");
format!(
"<link rel=\"canonical\" href=\"{}\">",
escape_attr(&canonical_url)
)
}
fn remove_existing_canonicals(html: &str) -> String {
let has_canonical = html.contains("rel=\"canonical\"")
|| html.contains("rel='canonical'")
|| html.contains("rel=canonical");
if !has_canonical {
return html.to_string();
}
let mut result = html.to_string();
for pat in &["rel=\"canonical\"", "rel='canonical'", "rel=canonical"] {
while let Some(pos) = result.find(pat) {
let start = result[..pos].rfind('<').unwrap_or(pos);
let end = result[pos..]
.find('>')
.map_or(result.len(), |i| pos + i + 1);
let end = if result.as_bytes().get(end) == Some(&b'\n') {
end + 1
} else {
end
};
result.replace_range(start..end, "");
}
}
result
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::plugin::PluginContext;
use std::path::Path;
use tempfile::tempdir;
fn ctx(site: &Path) -> PluginContext {
PluginContext::new(
Path::new("content"),
Path::new("build"),
site,
Path::new("templates"),
)
}
#[test]
fn name_is_stable() {
assert_eq!(CanonicalPlugin::new("https://x").name(), "canonical");
}
#[test]
fn new_accepts_string_or_str() {
let _ = CanonicalPlugin::new("https://a");
let _ = CanonicalPlugin::new(String::from("https://b"));
}
#[test]
fn no_op_when_site_dir_missing() {
let dir = tempdir().unwrap();
CanonicalPlugin::new("https://x")
.after_compile(&ctx(&dir.path().join("nope")))
.unwrap();
}
#[test]
fn build_canonical_tag_joins_base_and_rel_path() {
let tag = build_canonical_tag("https://example.com", "blog/post.html");
assert_eq!(
tag,
r#"<link rel="canonical" href="https://example.com/blog/post.html">"#
);
}
#[test]
fn build_canonical_tag_escapes_href_attribute_value() {
let tag = build_canonical_tag("https://example.com", "x?a=1&b=2");
assert!(
tag.contains("&"),
"ampersand in URL must be HTML-escaped: {tag}"
);
}
#[test]
fn remove_existing_canonicals_no_op_when_none_present() {
let html = "<head><title>x</title></head>";
assert_eq!(remove_existing_canonicals(html), html);
}
#[test]
fn remove_existing_canonicals_strips_double_quoted() {
let html = r#"<head><link rel="canonical" href="/old"><title>x</title></head>"#;
let out = remove_existing_canonicals(html);
assert!(!out.contains("rel=\"canonical\""));
assert!(out.contains("<title>x</title>"));
}
#[test]
fn remove_existing_canonicals_strips_single_quoted() {
let html = "<head><link rel='canonical' href='/old'></head>";
let out = remove_existing_canonicals(html);
assert!(!out.contains("rel='canonical'"));
}
#[test]
fn remove_existing_canonicals_strips_unquoted() {
let html = "<head><link rel=canonical href=/old></head>";
let out = remove_existing_canonicals(html);
assert!(!out.contains("rel=canonical"));
}
#[test]
fn remove_existing_canonicals_strips_multiple() {
let html = r#"<head>
<link rel="canonical" href="/a">
<link rel="canonical" href="/b">
</head>"#;
let out = remove_existing_canonicals(html);
assert!(!out.contains("rel=\"canonical\""));
}
#[test]
fn transform_html_injects_canonical() {
let dir = tempdir().unwrap();
let c = ctx(dir.path());
let html = "<html><head></head><body></body></html>";
let page_path = dir.path().join("page.html");
let after = CanonicalPlugin::new("https://example.com")
.transform_html(html, &page_path, &c)
.unwrap();
assert!(
after.contains(r#"<link rel="canonical""#),
"canonical link should be injected: {after}"
);
}
#[test]
fn transform_html_replaces_existing_canonical_with_correct_one() {
let dir = tempdir().unwrap();
let c = ctx(dir.path());
let html =
r#"<html><head><link rel="canonical" href="/wrong"></head></html>"#;
let page_path = dir.path().join("page.html");
let after = CanonicalPlugin::new("https://example.com")
.transform_html(html, &page_path, &c)
.unwrap();
assert!(
after.contains("https://example.com"),
"wrong canonical replaced with correct: {after}"
);
assert!(
!after.contains("/wrong"),
"old canonical should be gone: {after}"
);
}
#[test]
fn transform_html_trims_trailing_slash_on_base_url() {
let dir = tempdir().unwrap();
let c = ctx(dir.path());
let html = "<html><head></head></html>";
let page_path = dir.path().join("page.html");
let after = CanonicalPlugin::new("https://example.com/")
.transform_html(html, &page_path, &c)
.unwrap();
assert!(
!after.contains("com//page.html"),
"no double-slash after trim: {after}"
);
}
#[test]
fn transform_html_handles_html_without_head_tag() {
let dir = tempdir().unwrap();
let c = ctx(dir.path());
let raw = "<!doctype html><html><body>only</body></html>";
let page_path = dir.path().join("frag.html");
let after = CanonicalPlugin::new("https://example.com")
.transform_html(raw, &page_path, &c)
.unwrap();
assert_eq!(after, raw);
}
}