use serde::Deserialize;
use super::{
ContextSection, ContextSnippet, ContextSourceError, SNIPPET_BODY_CHARS,
truncate_on_char_boundary,
};
const SOURCE_NAME: &str = "confluence";
pub fn strip_html(html: &str) -> String {
let mut text = String::with_capacity(html.len());
let mut in_tag = false;
for ch in html.chars() {
match ch {
'<' => in_tag = true,
'>' => in_tag = false,
_ if !in_tag => text.push(ch),
_ => {}
}
}
let text = text
.replace(" ", " ")
.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace(""", "\"")
.replace("'", "'");
let collapsed = text.split_whitespace().collect::<Vec<_>>().join(" ");
truncate_on_char_boundary(&collapsed, SNIPPET_BODY_CHARS).to_string()
}
#[derive(Debug, Deserialize)]
pub struct ConfluenceSearchResponse {
#[serde(default)]
results: Vec<ConfluencePage>,
}
#[derive(Debug, Deserialize)]
struct ConfluencePage {
#[serde(default)]
title: String,
#[serde(default)]
space: Option<ConfluenceSpace>,
#[serde(default, rename = "_links")]
links: Option<ConfluenceLinks>,
#[serde(default)]
body: Option<ConfluenceBody>,
}
#[derive(Debug, Deserialize)]
struct ConfluenceSpace {
#[serde(default)]
name: Option<String>,
#[serde(default)]
key: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ConfluenceLinks {
#[serde(default)]
webui: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ConfluenceBody {
#[serde(default)]
view: Option<ConfluenceBodyView>,
}
#[derive(Debug, Deserialize)]
struct ConfluenceBodyView {
#[serde(default)]
value: Option<String>,
}
pub fn parse_section(body: &str, base_url: &str) -> Result<ContextSection, ContextSourceError> {
let resp: ConfluenceSearchResponse =
serde_json::from_str(body).map_err(|e| ContextSourceError::Parse {
src: SOURCE_NAME,
detail: e.to_string(),
})?;
let snippets = resp
.results
.into_iter()
.map(|page| {
let subtitle = page
.space
.and_then(|s| s.name.or(s.key))
.map(|s| format!("space: {s}"));
let link = page
.links
.and_then(|l| l.webui)
.map(|webui| format!("{base_url}/wiki{webui}"));
let body_excerpt = page
.body
.and_then(|b| b.view)
.and_then(|v| v.value)
.map(|html| strip_html(&html))
.filter(|s| !s.is_empty());
ContextSnippet {
title: page.title,
subtitle,
body: body_excerpt,
link,
}
})
.collect();
Ok(ContextSection {
heading: "Related Confluence docs".to_string(),
snippets,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strip_html_basic() {
let html = "<p>Hello <b>world</b></p>";
assert_eq!(strip_html(html), "Hello world");
}
#[test]
fn strip_html_collapses_whitespace() {
let html = "<div>\n line one\n\n line two </div>";
assert_eq!(strip_html(html), "line one line two");
}
#[test]
fn strip_html_entities() {
let html = "<p>a & b <tag> "q" 'x' end</p>";
assert_eq!(strip_html(html), "a & b <tag> \"q\" 'x' end");
}
#[test]
fn strip_html_truncates() {
let html = format!("<p>{}</p>", "x".repeat(SNIPPET_BODY_CHARS + 100));
assert_eq!(strip_html(&html).chars().count(), SNIPPET_BODY_CHARS);
}
#[test]
fn parse_pages_to_section() {
let body = r#"{
"results": [
{"title": "Auth Architecture", "space": {"name": "Engineering"},
"_links": {"webui": "/spaces/ENG/pages/123/Auth"}},
{"title": "Sessions", "space": {"key": "ENG"}}
]
}"#;
let section = parse_section(body, "https://acme.atlassian.net").unwrap();
assert_eq!(section.heading, "Related Confluence docs");
assert_eq!(section.snippets.len(), 2);
assert_eq!(section.snippets[0].title, "Auth Architecture");
assert_eq!(
section.snippets[0].subtitle.as_deref(),
Some("space: Engineering")
);
assert_eq!(
section.snippets[0].link.as_deref(),
Some("https://acme.atlassian.net/wiki/spaces/ENG/pages/123/Auth")
);
assert!(section.snippets[0].body.is_none());
assert_eq!(section.snippets[1].subtitle.as_deref(), Some("space: ENG"));
assert!(section.snippets[1].link.is_none());
}
#[test]
fn parse_embeds_body_excerpt() {
let body = r#"{
"results": [
{"title": "Design", "space": {"name": "Eng"},
"body": {"view": {"value": "<p>The <b>session</b> token expires.</p>"}}}
]
}"#;
let section = parse_section(body, "https://acme.atlassian.net").unwrap();
assert_eq!(
section.snippets[0].body.as_deref(),
Some("The session token expires.")
);
}
#[test]
fn parse_error_on_garbage() {
let r = parse_section("xx", "https://acme.atlassian.net");
assert!(matches!(r, Err(ContextSourceError::Parse { .. })));
}
}