use std::sync::LazyLock;
use regex::Regex;
use scraper::{Html, Selector};
pub struct BbcodeContent {
pub html: String,
pub title: Option<String>,
pub author: Option<String>,
}
pub fn extract_bbcode_content(html: &Html) -> Option<BbcodeContent> {
let sel = Selector::parse("div[data-partnereventstore]").ok()?;
let el = html.select(&sel).next()?;
let raw = el.value().attr("data-partnereventstore")?;
let events: serde_json::Value = serde_json::from_str(raw).ok()?;
let event = events.as_array()?.first()?;
let body = event
.pointer("/announcement_body/body")
.and_then(|v| v.as_str())?;
let title = event
.get("event_name")
.and_then(|v| v.as_str())
.map(String::from);
let author = extract_group_name(html);
let converted = bbcode_to_html(body);
Some(BbcodeContent {
html: converted,
title,
author,
})
}
fn extract_group_name(html: &Html) -> Option<String> {
let sel = Selector::parse("div[data-groupvanityinfo]").ok()?;
let el = html.select(&sel).next()?;
let raw = el.value().attr("data-groupvanityinfo")?;
let info: serde_json::Value = serde_json::from_str(raw).ok()?;
let entry = info.as_array()?.first()?;
entry
.get("group_name")
.and_then(|v| v.as_str())
.map(String::from)
}
static BBCODE_URL_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"\[url="?([^"\]]+)"?\](.*?)\[/url\]"#).expect("bbcode url regex is valid")
});
static BBCODE_YOUTUBE_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"\[previewyoutube="?([^;"\]]+)[^"]*"?\]\[/previewyoutube\]"#)
.expect("bbcode youtube regex is valid")
});
static BBCODE_IMG_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[img\](.*?)\[/img\]").expect("bbcode img regex is valid"));
fn is_safe_url(url: &str) -> bool {
let trimmed = url.trim().to_ascii_lowercase();
trimmed.starts_with("http://") || trimmed.starts_with("https://") || trimmed.starts_with('/')
}
fn bbcode_to_html(bbcode: &str) -> String {
let mut out = bbcode.to_string();
out = out.replace('\n', "<br>");
out = out.replace("[p]", "<p>").replace("[/p]", "</p>");
out = out.replace("[b]", "<strong>").replace("[/b]", "</strong>");
out = out.replace("[i]", "<em>").replace("[/i]", "</em>");
out = out.replace("[u]", "<u>").replace("[/u]", "</u>");
out = out.replace("[h1]", "<h1>").replace("[/h1]", "</h1>");
out = out.replace("[h2]", "<h2>").replace("[/h2]", "</h2>");
out = out.replace("[h3]", "<h3>").replace("[/h3]", "</h3>");
out = out.replace("[list]", "<ul>").replace("[/list]", "</ul>");
out = out.replace("[olist]", "<ol>").replace("[/olist]", "</ol>");
out = out.replace("[*]", "<li>");
out = BBCODE_URL_RE
.replace_all(&out, |caps: ®ex::Captures| {
let url = &caps[1];
let text = &caps[2];
if is_safe_url(url) {
let escaped_url = crate::dom::html_attr_escape(url);
let escaped_text = crate::dom::html_escape(text);
format!("<a href=\"{escaped_url}\">{escaped_text}</a>")
} else {
crate::dom::html_escape(text)
}
})
.to_string();
out = BBCODE_YOUTUBE_RE
.replace_all(
&out,
r#"<iframe src="https://www.youtube.com/embed/$1"></iframe>"#,
)
.to_string();
out = BBCODE_IMG_RE
.replace_all(&out, |caps: ®ex::Captures| {
let url = &caps[1];
if is_safe_url(url) {
let escaped = crate::dom::html_attr_escape(url);
format!("<img src=\"{escaped}\">")
} else {
crate::dom::html_escape(&caps[0])
}
})
.to_string();
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn convert_basic_bbcode() {
let input = "[p]Hello [b]world[/b]![/p]";
let result = bbcode_to_html(input);
assert_eq!(result, "<p>Hello <strong>world</strong>!</p>");
}
#[test]
fn convert_url_tag() {
let input = r#"[url="https://example.com"]link[/url]"#;
let result = bbcode_to_html(input);
assert_eq!(result, r#"<a href="https://example.com">link</a>"#);
}
#[test]
fn convert_youtube_preview() {
let input = r#"[previewyoutube="dQw4w9WgXcQ;full"][/previewyoutube]"#;
let result = bbcode_to_html(input);
assert!(result.contains("youtube.com/embed/dQw4w9WgXcQ"));
}
#[test]
fn detect_bbcode_by_data_attribute() {
let doc = Html::parse_document(
r#"<html><body>
<div data-partnereventstore='[{"event_name":"Test Event","announcement_body":{"body":"[p]Hello[/p]"}}]'></div>
</body></html>"#,
);
let result = extract_bbcode_content(&doc);
assert!(result.is_some());
}
#[test]
fn extract_bbcode_parses_json_and_converts() {
let doc = Html::parse_document(
r#"<html><body>
<div data-partnereventstore='[{"event_name":"Launch Day","announcement_body":{"body":"[b]Welcome[/b] to the event"}}]'></div>
</body></html>"#,
);
let result = extract_bbcode_content(&doc).expect("should extract bbcode");
assert_eq!(result.title.as_deref(), Some("Launch Day"));
assert!(result.html.contains("<strong>Welcome</strong>"));
assert!(result.html.contains("to the event"));
}
#[test]
fn img_tag_escapes_url() {
let input = r#"[img]https://x.com" onerror="alert(1)[/img]"#;
let result = bbcode_to_html(input);
assert!(result.contains("""));
assert!(
result.contains(r#"src="https://x.com""#),
"expected escaped quote in src attr, got: {result}"
);
}
#[test]
fn url_tag_escapes_text() {
let input = r#"[url="https://x.com"]<script>alert(1)</script>[/url]"#;
let result = bbcode_to_html(input);
assert!(!result.contains("<script>"));
assert!(result.contains("<script>"));
}
}