use scraper::{Html, Selector};
use crate::dom;
use super::ExtractorResult;
use super::comments::{CommentData, build_comment_tree, build_content_html};
#[must_use]
pub fn is_github(html: &Html, url: Option<&str>) -> bool {
let has_meta = has_github_meta(html);
let is_issue = url.is_some_and(is_issue_url) || has_issue_markers(html);
let is_pr = url.is_some_and(is_pr_url) || has_pr_markers(html);
(has_meta || url.is_some_and(|u| u.contains("github.com/"))) && (is_issue || is_pr)
}
#[must_use]
pub fn extract_github(
html: &Html,
url: Option<&str>,
include_replies: bool,
) -> Option<ExtractorResult> {
if !is_github(html, url) {
return None;
}
let is_pr = url.is_some_and(is_pr_url) || has_pr_markers(html);
let result = if is_pr {
extract_pr(html, url, include_replies)
} else {
extract_issue(html, url, include_replies)
};
if dom::count_words_html(&result.content) < 10
&& let Some(api_result) = try_api_fetch(url, include_replies)
{
return Some(api_result);
}
Some(result)
}
fn has_github_meta(html: &Html) -> bool {
let selectors = [
"meta[name=\"expected-hostname\"][content=\"github.com\"]",
"meta[name=\"octolytics-url\"]",
"meta[name=\"github-keyboard-shortcuts\"]",
];
selectors.iter().any(|s| {
Selector::parse(s)
.ok()
.is_some_and(|sel| html.select(&sel).next().is_some())
})
}
fn is_issue_url(url: &str) -> bool {
url.contains("/issues/")
}
fn is_pr_url(url: &str) -> bool {
url.contains("/pull/")
}
fn has_issue_markers(html: &Html) -> bool {
let selectors = [
"[data-testid=\"issue-metadata-sticky\"]",
"[data-testid=\"issue-title\"]",
];
selectors.iter().any(|s| {
Selector::parse(s)
.ok()
.is_some_and(|sel| html.select(&sel).next().is_some())
})
}
fn has_pr_markers(html: &Html) -> bool {
let selectors = [
".pull-discussion-timeline",
".discussion-timeline",
".gh-header-title",
];
selectors.iter().any(|s| {
Selector::parse(s)
.ok()
.is_some_and(|sel| html.select(&sel).next().is_some())
})
}
fn extract_repo_info(url: Option<&str>) -> (String, String) {
use std::sync::LazyLock;
static REPO_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"github\.com/([^/]+)/([^/]+)").expect("github repo regex is valid")
});
let Some(u) = url else {
return (String::new(), String::new());
};
REPO_RE
.captures(u)
.map(|caps| {
(
caps.get(1)
.map_or(String::new(), |m| m.as_str().to_string()),
caps.get(2)
.map_or(String::new(), |m| m.as_str().to_string()),
)
})
.unwrap_or_default()
}
fn extract_issue(html: &Html, url: Option<&str>, include_replies: bool) -> ExtractorResult {
let (_owner, _repo) = extract_repo_info(url);
let title = extract_title(html);
let (body, author) = extract_issue_body(html);
let comments = if include_replies {
extract_issue_comments(html)
} else {
String::new()
};
let content = build_content_html("github", &body, &comments);
ExtractorResult {
content,
title: Some(title),
author: if author.is_empty() {
None
} else {
Some(author)
},
site: Some("GitHub".to_string()),
published: None,
image: None,
description: None,
}
}
fn extract_title(html: &Html) -> String {
let sel = Selector::parse("title").ok();
let raw = sel
.and_then(|s| html.select(&s).next())
.map(|el| dom::text_content(html, el.id()).trim().to_string())
.unwrap_or_default();
if let Some(idx) = raw.rfind(" · ") {
let after = &raw[idx + " · ".len()..];
if after.contains('/') {
return raw[..idx].to_string();
}
}
raw
}
fn extract_issue_body(html: &Html) -> (String, String) {
let container_sel = "[data-testid=\"issue-viewer-issue-container\"]";
let container_ids = dom::select_ids(html, container_sel);
let Some(&container_id) = container_ids.first() else {
return (String::new(), String::new());
};
let author = extract_issue_author(html, container_id);
let body_sel = "[data-testid=\"issue-body-viewer\"] [data-testid=\"markdown-body\"]";
let body_ids = dom::select_ids(html, body_sel);
let body = body_ids
.first()
.map(|&id| dom::inner_html(html, id))
.unwrap_or_default();
(body.trim().to_string(), author)
}
fn extract_issue_author(html: &Html, container_id: ego_tree::NodeId) -> String {
let author_selectors = [
"[data-testid=\"issue-body-header-author\"]",
"a[data-testid=\"avatar-link\"]",
];
for sel_str in &author_selectors {
let ids = dom::select_within(html, container_id, sel_str);
if let Some(&id) = ids.first() {
let text = dom::text_content(html, id);
let trimmed = text.trim();
if !trimmed.is_empty() {
return trimmed.to_string();
}
if let Some(href) = dom::get_attr(html, id, "href") {
let name = href
.strip_prefix("https://github.com/")
.or_else(|| href.strip_prefix('/'))
.unwrap_or(&href);
if !name.is_empty() {
return name.to_string();
}
}
}
}
String::new()
}
fn extract_issue_comments(html: &Html) -> String {
let timeline_sel = "[data-wrapper-timeline-id]";
let timeline_ids = dom::select_ids(html, timeline_sel);
let mut comments = Vec::new();
for &timeline_id in &timeline_ids {
if let Some(comment) = extract_single_issue_comment(html, timeline_id) {
comments.push(comment);
}
}
if comments.is_empty() {
return String::new();
}
build_comment_tree(&comments)
}
fn extract_single_issue_comment(html: &Html, timeline_id: ego_tree::NodeId) -> Option<CommentData> {
let react_ids = dom::select_within(html, timeline_id, ".react-issue-comment");
let comment_container = react_ids.first().copied().unwrap_or(timeline_id);
let author = extract_comment_author(html, comment_container);
let date = extract_relative_time(html, comment_container);
let body_ids = dom::select_within(html, comment_container, "[data-testid=\"markdown-body\"]");
let body = body_ids
.first()
.map(|&id| dom::inner_html(html, id).trim().to_string())
.unwrap_or_default();
if body.is_empty() {
return None;
}
Some(CommentData {
author,
date,
content: body,
depth: 0,
score: None,
url: None,
})
}
fn extract_comment_author(html: &Html, container_id: ego_tree::NodeId) -> String {
let selectors = [
"[data-testid=\"avatar-link\"]",
"a[href^=\"/\"][data-hovercard-url]",
];
for sel_str in &selectors {
let ids = dom::select_within(html, container_id, sel_str);
if let Some(&id) = ids.first()
&& let Some(href) = dom::get_attr(html, id, "href")
{
let name = href.strip_prefix('/').unwrap_or(&href);
if !name.is_empty() {
return name.to_string();
}
}
}
"Unknown".to_string()
}
fn extract_relative_time(html: &Html, container_id: ego_tree::NodeId) -> String {
let ids = dom::select_within(html, container_id, "relative-time");
ids.first()
.and_then(|&id| dom::get_attr(html, id, "datetime"))
.and_then(|dt| dt.split('T').next().map(String::from))
.unwrap_or_default()
}
fn extract_pr(html: &Html, url: Option<&str>, include_replies: bool) -> ExtractorResult {
let (_owner, _repo) = extract_repo_info(url);
let title = extract_title(html);
let (body, author) = extract_pr_body(html);
let comments = if include_replies {
extract_pr_comments(html)
} else {
String::new()
};
let content = build_content_html("github", &body, &comments);
ExtractorResult {
content,
title: Some(title),
author: if author.is_empty() {
None
} else {
Some(author)
},
site: Some("GitHub".to_string()),
published: None,
image: None,
description: None,
}
}
fn extract_pr_body(html: &Html) -> (String, String) {
let pr_sel = "[id^=\"pullrequest-\"]";
let pr_ids = dom::select_ids(html, pr_sel);
let pr_container = pr_ids.first().copied();
let body_sel = ".comment-body.markdown-body";
let body = if let Some(container_id) = pr_container {
let ids = dom::select_within(html, container_id, body_sel);
ids.first()
.map(|&id| dom::inner_html(html, id).trim().to_string())
} else {
let ids = dom::select_ids(html, body_sel);
ids.first()
.map(|&id| dom::inner_html(html, id).trim().to_string())
}
.unwrap_or_default();
let author = pr_container
.map(|cid| extract_pr_author(html, cid))
.unwrap_or_default();
(body, author)
}
fn extract_pr_author(html: &Html, container_id: ego_tree::NodeId) -> String {
let ids = dom::select_within(html, container_id, ".author");
ids.first()
.map(|&id| dom::text_content(html, id).trim().to_string())
.unwrap_or_default()
}
fn extract_pr_comments(html: &Html) -> String {
let comment_sel = ".timeline-comment, .review-comment";
let all_ids = dom::select_ids(html, comment_sel);
let pr_body_ids = dom::select_ids(html, "[id^=\"pullrequest-\"]");
let pr_body_id = pr_body_ids.first().copied();
let mut comments = Vec::new();
for &cid in &all_ids {
if pr_body_id.is_some_and(|pb| pb == cid || dom::is_ancestor(html, cid, pb)) {
continue;
}
if let Some(comment) = extract_single_pr_comment(html, cid) {
comments.push(comment);
}
}
if comments.is_empty() {
return String::new();
}
build_comment_tree(&comments)
}
fn extract_single_pr_comment(html: &Html, comment_id: ego_tree::NodeId) -> Option<CommentData> {
let author = extract_pr_author(html, comment_id);
let date = extract_relative_time(html, comment_id);
let body_ids = dom::select_within(html, comment_id, ".comment-body.markdown-body");
let body = body_ids
.first()
.map(|&id| dom::inner_html(html, id).trim().to_string())
.unwrap_or_default();
if body.is_empty() {
return None;
}
Some(CommentData {
author,
date,
content: body,
depth: 0,
score: None,
url: None,
})
}
fn parse_github_url(url: &str) -> Option<(String, String, String, bool)> {
use std::sync::LazyLock;
static RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"github\.com/([^/]+)/([^/]+)/(issues|pull)/(\d+)")
.expect("github url regex is valid")
});
let caps = RE.captures(url)?;
let owner = caps.get(1)?.as_str().to_string();
let repo = caps.get(2)?.as_str().to_string();
let kind = caps.get(3)?.as_str();
let number = caps.get(4)?.as_str().to_string();
Some((owner, repo, number, kind == "pull"))
}
fn try_api_fetch(url: Option<&str>, include_replies: bool) -> Option<ExtractorResult> {
use std::fmt::Write;
let (owner, repo, number, is_pr) = parse_github_url(url?)?;
let endpoint = if is_pr { "pulls" } else { "issues" };
let api_url = format!("https://api.github.com/repos/{owner}/{repo}/{endpoint}/{number}");
let json = fetch_github_json(&api_url)?;
let title = json_str(&json, "title");
let body = json_str(&json, "body");
let author = json
.get("user")
.and_then(|u| u.get("login"))
.and_then(serde_json::Value::as_str)
.unwrap_or("")
.to_string();
let published = json_str(&json, "created_at")
.split('T')
.next()
.unwrap_or("")
.to_string();
let labels: Vec<&str> = json
.get("labels")
.and_then(serde_json::Value::as_array)
.map(|arr| {
arr.iter()
.filter_map(|l| l.get("name").and_then(serde_json::Value::as_str))
.collect()
})
.unwrap_or_default();
let milestone = json
.get("milestone")
.and_then(|m| m.get("title"))
.and_then(serde_json::Value::as_str)
.unwrap_or("");
let merged = is_pr
&& json
.get("merged")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
let state = json_str(&json, "state");
let mut meta_html = String::new();
if is_pr {
let status = if merged { "merged" } else { &state };
if !status.is_empty() {
let escaped = dom::html_escape(status);
let _ = writeln!(meta_html, "<p><strong>Status:</strong> {escaped}</p>");
}
}
if !labels.is_empty() {
let label_str = labels.join(", ");
let escaped = dom::html_escape(&label_str);
let _ = writeln!(meta_html, "<p><strong>Labels:</strong> {escaped}</p>");
}
if !milestone.is_empty() {
let escaped = dom::html_escape(milestone);
let _ = writeln!(meta_html, "<p><strong>Milestone:</strong> {escaped}</p>");
}
let body_html = format!("{meta_html}{}", markdown_to_html(&body));
let comments_html = if include_replies {
let issue_comments = fetch_api_comments(&owner, &repo, &number);
let review_comments = if is_pr {
fetch_pr_review_comments(&owner, &repo, &number)
} else {
String::new()
};
if issue_comments.is_empty() {
review_comments
} else if review_comments.is_empty() {
issue_comments
} else {
format!("{issue_comments}\n{review_comments}")
}
} else {
String::new()
};
let content = build_content_html("github", &body_html, &comments_html);
Some(ExtractorResult {
content,
title: if title.is_empty() { None } else { Some(title) },
author: if author.is_empty() {
None
} else {
Some(author)
},
site: Some("GitHub".to_string()),
published: if published.is_empty() {
None
} else {
Some(published)
},
image: None,
description: None,
})
}
fn fetch_api_comments(owner: &str, repo: &str, number: &str) -> String {
let url = format!("https://api.github.com/repos/{owner}/{repo}/issues/{number}/comments");
let items = fetch_github_json_paginated(&url);
let comments: Vec<CommentData> = items
.iter()
.filter_map(|c| {
let body = c.get("body")?.as_str()?;
if body.trim().is_empty() {
return None;
}
let author = c
.get("user")
.and_then(|u| u.get("login"))
.and_then(serde_json::Value::as_str)
.unwrap_or("Unknown")
.to_string();
let date = c
.get("created_at")
.and_then(serde_json::Value::as_str)
.and_then(|d| d.split('T').next())
.unwrap_or("")
.to_string();
Some(CommentData {
author,
date,
content: markdown_to_html(body),
depth: 0,
score: None,
url: None,
})
})
.collect();
if comments.is_empty() {
return String::new();
}
build_comment_tree(&comments)
}
fn fetch_pr_review_comments(owner: &str, repo: &str, number: &str) -> String {
let mut comments = Vec::new();
let reviews_url = format!("https://api.github.com/repos/{owner}/{repo}/pulls/{number}/reviews");
for review in fetch_github_json_paginated(&reviews_url) {
let body = review
.get("body")
.and_then(serde_json::Value::as_str)
.unwrap_or("");
if body.trim().is_empty() {
continue;
}
let author = review
.get("user")
.and_then(|u| u.get("login"))
.and_then(serde_json::Value::as_str)
.unwrap_or("Unknown")
.to_string();
let date = review
.get("submitted_at")
.and_then(serde_json::Value::as_str)
.and_then(|d| d.split('T').next())
.unwrap_or("")
.to_string();
comments.push(CommentData {
author,
date,
content: markdown_to_html(body),
depth: 0,
score: None,
url: None,
});
}
let line_url = format!("https://api.github.com/repos/{owner}/{repo}/pulls/{number}/comments");
for c in fetch_github_json_paginated(&line_url) {
let body = c
.get("body")
.and_then(serde_json::Value::as_str)
.unwrap_or("");
if body.trim().is_empty() {
continue;
}
let author = c
.get("user")
.and_then(|u| u.get("login"))
.and_then(serde_json::Value::as_str)
.unwrap_or("Unknown")
.to_string();
let date = c
.get("created_at")
.and_then(serde_json::Value::as_str)
.and_then(|d| d.split('T').next())
.unwrap_or("")
.to_string();
let path = c
.get("path")
.and_then(serde_json::Value::as_str)
.unwrap_or("");
let prefix = if path.is_empty() {
String::new()
} else {
let escaped = dom::html_escape(path);
format!("<p><code>{escaped}</code></p>\n")
};
comments.push(CommentData {
author,
date,
content: format!("{prefix}{}", markdown_to_html(body)),
depth: 0,
score: None,
url: None,
});
}
if comments.is_empty() {
return String::new();
}
build_comment_tree(&comments)
}
fn fetch_github_json(url: &str) -> Option<serde_json::Value> {
let body = crate::http::get_with_headers(url, &[("Accept", "application/vnd.github+json")])?;
serde_json::from_str(&body).ok()
}
fn fetch_github_json_paginated(url: &str) -> Vec<serde_json::Value> {
const MAX_PAGES: u32 = 10;
let mut all_items = Vec::new();
for page in 1..=MAX_PAGES {
let separator = if url.contains('?') { '&' } else { '?' };
let page_url = format!("{url}{separator}per_page=100&page={page}");
let Some(json) = fetch_github_json(&page_url) else {
break;
};
let Some(arr) = json.as_array() else {
break;
};
if arr.is_empty() {
break;
}
all_items.extend(arr.iter().cloned());
if arr.len() < 100 {
break;
}
}
all_items
}
fn json_str(json: &serde_json::Value, key: &str) -> String {
json.get(key)
.and_then(serde_json::Value::as_str)
.unwrap_or("")
.to_string()
}
fn is_dangerous_link(url: &str) -> bool {
let lower = url.trim().to_ascii_lowercase();
lower.starts_with("javascript:") || lower.starts_with("vbscript:") || lower.starts_with("data:")
}
fn markdown_to_html(md: &str) -> String {
use pulldown_cmark::{Event, Options, Parser, Tag, html};
let linkified = linkify_bare_urls(md);
let options = Options::ENABLE_TABLES
| Options::ENABLE_STRIKETHROUGH
| Options::ENABLE_TASKLISTS
| Options::ENABLE_FOOTNOTES;
let parser = Parser::new_ext(&linkified, options);
let sanitized = parser.map(|event| match event {
Event::Html(raw) | Event::InlineHtml(raw) => Event::Text(raw),
Event::Start(Tag::Link {
dest_url,
title,
id,
link_type,
}) if is_dangerous_link(&dest_url) => Event::Start(Tag::Link {
dest_url: "".into(),
title,
id,
link_type,
}),
other => other,
});
let mut html_output = String::with_capacity(md.len() * 2);
html::push_html(&mut html_output, sanitized);
html_output
}
fn linkify_bare_urls(md: &str) -> String {
use regex::Regex;
use std::sync::LazyLock;
static URL_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"https?://[^\s<>\[\])\]]+").expect("valid regex"));
let mut result = String::with_capacity(md.len());
let mut in_code_block = false;
for line in md.lines() {
if !result.is_empty() {
result.push('\n');
}
if line.starts_with("```") {
in_code_block = !in_code_block;
result.push_str(line);
continue;
}
if in_code_block {
result.push_str(line);
continue;
}
if line.trim_start().starts_with('[') && line.contains("]: http") {
result.push_str(line);
continue;
}
let line_bytes = line.as_bytes();
let mut last_end = 0;
for m in URL_RE.find_iter(line) {
let before = if m.start() > 0 {
line_bytes[m.start() - 1]
} else {
b' '
};
if before == b'(' || before == b'[' || before == b']' || before == b'<' {
continue;
}
let prefix = &line[last_end..m.start()];
let backtick_count = prefix.bytes().filter(|&b| b == b'`').count();
if backtick_count % 2 != 0 {
continue;
}
let url = m.as_str().trim_end_matches(['.', ',', ';', ':', '!', '?']);
result.push_str(&line[last_end..m.start()]);
result.push('[');
result.push_str(url);
result.push_str("](");
result.push_str(url);
result.push(')');
let trimmed_len = m.as_str().len() - url.len();
if trimmed_len > 0 {
result.push_str(&m.as_str()[url.len()..]);
}
last_end = m.end();
}
result.push_str(&line[last_end..]);
}
result
}
#[cfg(test)]
#[expect(clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
fn load_fixture(name: &str) -> String {
let path = format!("{}/tests/fixtures/{name}", env!("CARGO_MANIFEST_DIR"));
std::fs::read_to_string(&path)
.unwrap_or_else(|e| panic!("fixture not found at {path}: {e}"))
}
fn url_from_fixture(html_str: &str) -> Option<String> {
let start = html_str.find("<!-- {")?;
let comment_start = start + "<!-- ".len();
let end = html_str[comment_start..].find(" -->")?;
let json_str = &html_str[comment_start..comment_start + end];
let val: serde_json::Value = serde_json::from_str(json_str).ok()?;
val.get("url").and_then(|v| v.as_str()).map(String::from)
}
#[test]
fn extract_github_issue() {
let html_str = load_fixture("general--github.com-issue-56.html");
let url = url_from_fixture(&html_str);
let html = Html::parse_document(&html_str);
assert!(is_github(&html, url.as_deref()));
let result = extract_github(&html, url.as_deref(), true).unwrap();
assert!(result.title.as_ref().unwrap().contains("Issue #56"));
assert!(result.site.as_ref().unwrap().contains("GitHub"));
assert!(result.content.contains("defuddle-cloudflare"));
assert!(result.content.contains("Comments"));
}
#[test]
fn parse_github_issue_url() {
let result = parse_github_url("https://github.com/owner/repo/issues/123");
let (o, r, n, is_pr) = result.unwrap();
assert_eq!(o, "owner");
assert_eq!(r, "repo");
assert_eq!(n, "123");
assert!(!is_pr);
}
#[test]
fn parse_github_pr_url() {
let result = parse_github_url("https://github.com/owner/repo/pull/42");
let (o, r, n, is_pr) = result.unwrap();
assert_eq!(o, "owner");
assert_eq!(r, "repo");
assert_eq!(n, "42");
assert!(is_pr);
}
#[test]
fn parse_github_url_invalid() {
assert!(parse_github_url("https://github.com/owner/repo").is_none());
assert!(parse_github_url("https://example.com").is_none());
}
#[test]
fn markdown_to_html_basic() {
let md = "Hello **world**\n\nA paragraph.\n\n## Header\n\n```\ncode\n```";
let html = markdown_to_html(md);
assert!(html.contains("<strong>world</strong>"));
assert!(html.contains("<p>A paragraph.</p>"));
assert!(html.contains("<h2>Header</h2>"));
assert!(html.contains("<pre><code>"));
assert!(html.contains("code"));
}
#[test]
fn markdown_to_html_gfm_features() {
let md = "- [x] done\n- [ ] todo\n\n~~strike~~\n\n| a | b |\n|---|---|\n| 1 | 2 |";
let html = markdown_to_html(md);
assert!(html.contains("<del>strike</del>"));
assert!(html.contains("<table>"));
assert!(html.contains("checked"));
}
#[test]
fn markdown_to_html_autolinks() {
let md = "See https://example.com for info\n\nAlready [linked](https://other.com)";
let html = markdown_to_html(md);
assert!(html.contains("<a href=\"https://example.com\">"));
assert!(html.contains("<a href=\"https://other.com\">"));
}
#[test]
fn markdown_to_html_autolinks_skip_code_blocks() {
let md = "```\nhttps://example.com\n```";
let html = markdown_to_html(md);
assert!(!html.contains("<a href"));
assert!(html.contains("https://example.com"));
}
#[test]
fn markdown_to_html_escapes_raw_html() {
let md = "<script>alert('xss')</script>\n\n<b>bold</b>";
let html = markdown_to_html(md);
assert!(!html.contains("<script>"));
assert!(!html.contains("</script>"));
assert!(html.contains("<script>"));
assert!(!html.contains("<b>"));
assert!(html.contains("<b>"));
assert!(html.contains("alert"));
}
#[test]
fn markdown_to_html_sanitizes_javascript_links() {
let md = "[click](javascript:alert(1))";
let html = markdown_to_html(md);
assert!(!html.contains("javascript:"));
}
#[test]
fn markdown_to_html_sanitizes_javascript_case_variants() {
for proto in ["JavaScript:", "JAVASCRIPT:", "jAvAsCrIpT:"] {
let md = format!("[click]({proto}alert(1))");
let html = markdown_to_html(&md);
assert!(
!html.contains(proto),
"should block {proto} but got: {html}"
);
}
}
#[test]
fn markdown_to_html_sanitizes_data_uri_links() {
let md = "[click](data:text/html,<script>alert(1)</script>)";
let html = markdown_to_html(md);
assert!(!html.contains("data:text/html"));
}
#[test]
fn markdown_to_html_autolinks_skip_angle_brackets() {
let md = "See <https://example.com> for info";
let html = markdown_to_html(md);
assert!(html.contains("<a href=\"https://example.com\">"));
assert!(!html.contains("[https://"));
}
#[test]
fn markdown_to_html_autolinks_trim_trailing_punctuation() {
let md = "Visit https://example.com. And https://other.com, too!";
let html = markdown_to_html(md);
assert!(html.contains("href=\"https://example.com\""));
assert!(html.contains("href=\"https://other.com\""));
assert!(!html.contains("href=\"https://example.com.\""));
assert!(!html.contains("href=\"https://other.com,\""));
}
#[test]
fn markdown_to_html_autolinks_skip_inline_code() {
let md = "Use `https://example.com` as the base URL";
let html = markdown_to_html(md);
assert!(html.contains("<code>https://example.com</code>"));
assert!(!html.contains("<a href=\"https://example.com\">"));
}
#[test]
fn markdown_to_html_autolinks_skip_reference_links() {
let md = "[example]: https://example.com\n\nSee [example] for details.";
let html = markdown_to_html(md);
assert!(html.contains("href=\"https://example.com\""));
assert!(!html.contains("[https://example.com](https://example.com)"));
}
#[test]
fn api_fetch_live_issue() {
let url = "https://github.com/rust-lang/rust/issues/1";
let result = try_api_fetch(Some(url), false);
if let Some(r) = result {
assert!(r.title.is_some());
assert!(r.author.is_some());
assert_eq!(r.site.as_deref(), Some("GitHub"));
}
}
#[test]
fn extract_github_pr() {
let html_str = load_fixture("general--github.com-test-owner-test-repo-pull-42.html");
let url = url_from_fixture(&html_str);
let html = Html::parse_document(&html_str);
assert!(is_github(&html, url.as_deref()));
let result = extract_github(&html, url.as_deref(), true).unwrap();
assert!(result.title.unwrap().contains("Pull Request #42"));
assert_eq!(result.author.as_deref(), Some("author-one"));
assert_eq!(result.site.as_deref(), Some("GitHub"));
assert!(result.content.contains("Summary"));
assert!(result.content.contains("regression"));
assert!(result.content.contains("Comments"));
assert!(result.content.contains("reviewer-bot"));
}
#[test]
fn api_fetch_live_pr_includes_labels_and_status() {
let url = "https://github.com/rust-lang/rust/pull/2";
let result = try_api_fetch(Some(url), false);
if let Some(r) = result {
assert!(
r.content.contains("<strong>Status:</strong>"),
"PR content should include status"
);
}
}
#[test]
fn api_fetch_live_issue_with_labels() {
let url = "https://github.com/rust-lang/rust/issues/1";
let result = try_api_fetch(Some(url), false);
if let Some(r) = result {
assert!(r.title.is_some());
assert!(!r.content.is_empty());
}
}
#[test]
fn api_fetch_live_pr_includes_review_comments() {
let url = "https://github.com/rust-lang/rust/pull/2";
let result = try_api_fetch(Some(url), true);
if let Some(r) = result {
assert!(r.title.is_some());
assert!(!r.content.is_empty());
}
}
#[test]
fn pagination_helper_returns_empty_for_invalid_url() {
let items = fetch_github_json_paginated(
"https://api.github.com/repos/nonexistent/repo/issues/99999/comments",
);
assert!(items.len() <= 100);
}
}