use std::collections::HashMap;
use regex::Regex;
use serde::{Deserialize, Serialize};
use tracing::warn;
use super::{ConfluenceSourceConfig, ExternalSignal};
pub const CONFLUENCE_CONFIDENCE: f64 = 0.80;
fn url_regex() -> Regex {
Regex::new(r"/wiki/spaces/[^/]+/pages/(\d+)").expect("static regex is valid")
}
fn smart_commit_regex() -> Regex {
Regex::new(r"\[CONF-(\d+)\]").expect("static regex is valid")
}
pub fn extract_confluence_ids(message: &str) -> Vec<u64> {
let mut seen = std::collections::HashSet::new();
let mut out = Vec::new();
for cap in url_regex().captures_iter(message) {
if let Some(num_m) = cap.get(1) {
let id: u64 = num_m.as_str().parse().unwrap_or(0);
if id > 0 && seen.insert(id) {
out.push(id);
}
}
}
for cap in smart_commit_regex().captures_iter(message) {
if let Some(num_m) = cap.get(1) {
let id: u64 = num_m.as_str().parse().unwrap_or(0);
if id > 0 && seen.insert(id) {
out.push(id);
}
}
}
out
}
#[derive(Debug, Deserialize, Serialize)]
pub struct ConfluencePage {
pub id: String,
#[serde(default)]
pub title: String,
pub metadata: Option<ConfluenceMetadata>,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct ConfluenceMetadata {
pub labels: Option<ConfluenceLabelList>,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct ConfluenceLabelList {
#[serde(default)]
pub results: Vec<ConfluenceLabel>,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct ConfluenceLabel {
#[serde(default)]
pub prefix: String,
pub name: String,
}
pub fn classify_page(
page: &ConfluencePage,
config: &ConfluenceSourceConfig,
) -> Option<ExternalSignal> {
let labels = page
.metadata
.as_ref()
.and_then(|m| m.labels.as_ref())
.map(|ll| ll.results.as_slice())
.unwrap_or(&[]);
for label in labels {
if let Some(cat) = config.label_mappings.get(label.name.as_str()) {
return Some(ExternalSignal {
category: cat.clone(),
confidence: CONFLUENCE_CONFIDENCE,
source: format!("confluence:label:{}", label.name),
});
}
}
None
}
pub async fn fetch_page(
client: &reqwest::Client,
config: &ConfluenceSourceConfig,
id: u64,
base_url_override: Option<&str>,
) -> Option<ConfluencePage> {
let token = match std::env::var(&config.token_env) {
Ok(t) if !t.is_empty() => t,
_ => {
warn!(
token_env = %config.token_env,
"Confluence token env var `{}` is not set — skipping Confluence lookups",
config.token_env,
);
return None;
}
};
let email = match std::env::var(&config.email_env) {
Ok(e) if !e.is_empty() => e,
_ => {
warn!(
email_env = %config.email_env,
"Confluence email env var `{}` is not set — skipping Confluence lookups",
config.email_env,
);
return None;
}
};
let base = base_url_override.unwrap_or(&config.base_url);
let url = format!("{base}/wiki/rest/api/content/{id}?expand=metadata.labels");
let resp = match client
.get(&url)
.basic_auth(&email, Some(&token))
.send()
.await
{
Ok(r) => r,
Err(e) => {
warn!(id, error = %e, "Confluence API request failed; skipping");
return None;
}
};
if !resp.status().is_success() {
warn!(
id,
status = %resp.status(),
"Confluence API returned non-success status; skipping"
);
return None;
}
match resp.json::<ConfluencePage>().await {
Ok(page) => Some(page),
Err(e) => {
warn!(id, error = %e, "failed to parse Confluence page response; skipping");
None
}
}
}
pub async fn fetch_pages_batch(
client: &reqwest::Client,
config: &ConfluenceSourceConfig,
ids: &[u64],
base_url_override: Option<&str>,
) -> HashMap<String, Option<ExternalSignal>> {
let mut out = HashMap::new();
for &id in ids {
let key = id.to_string();
if out.contains_key(&key) {
continue;
}
let page = fetch_page(client, config, id, base_url_override).await;
let signal = page.and_then(|p| classify_page(&p, config));
out.insert(key, signal);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_confluence_refs_url() {
let ids = extract_confluence_ids(
"see https://myco.atlassian.net/wiki/spaces/ENG/pages/123456789 for context",
);
assert_eq!(ids, vec![123456789u64]);
}
#[test]
fn extract_confluence_refs_smart_commit() {
let ids = extract_confluence_ids("deploy: [CONF-4567] runbook followed");
assert_eq!(ids, vec![4567u64]);
}
#[test]
fn extract_confluence_refs_both_forms_and_dedup() {
let ids = extract_confluence_ids(
"[CONF-100] and /wiki/spaces/ENG/pages/200 and [CONF-100] again",
);
assert_eq!(ids, vec![200u64, 100u64]);
let ids2 = extract_confluence_ids("/wiki/spaces/ENG/pages/500 and [CONF-300]");
assert_eq!(ids2, vec![500u64, 300u64]);
let ids3 = extract_confluence_ids("/wiki/spaces/ENG/pages/777 and [CONF-777]");
assert_eq!(ids3, vec![777u64]);
}
#[test]
fn extract_confluence_refs_no_match() {
assert!(extract_confluence_ids("feat: add login flow").is_empty());
assert!(extract_confluence_ids("fix: PROJ-123 jira style").is_empty());
}
#[test]
fn classify_page_matches_label() {
use std::collections::HashMap;
let page = ConfluencePage {
id: "123".to_string(),
title: "Deployment Runbook".to_string(),
metadata: Some(ConfluenceMetadata {
labels: Some(ConfluenceLabelList {
results: vec![
ConfluenceLabel {
prefix: "global".to_string(),
name: "runbook".to_string(),
},
ConfluenceLabel {
prefix: "global".to_string(),
name: "rfc".to_string(),
},
],
}),
}),
};
let config = ConfluenceSourceConfig {
base_url: "https://myco.atlassian.net".to_string(),
token_env: "CONF_TOKEN".to_string(), email_env: "CONF_EMAIL".to_string(),
label_mappings: {
let mut m = HashMap::new();
m.insert("runbook".to_string(), "devops".to_string());
m.insert("rfc".to_string(), "tech_debt_refactoring".to_string());
m
},
};
let signal = classify_page(&page, &config).expect("should match");
assert_eq!(signal.category, "devops");
assert!(
(signal.confidence - CONFLUENCE_CONFIDENCE).abs() < f64::EPSILON,
"confidence should be CONFLUENCE_CONFIDENCE ({CONFLUENCE_CONFIDENCE})"
);
assert!(signal.source.contains("runbook"));
}
#[test]
fn classify_page_returns_none_on_no_match() {
use std::collections::HashMap;
let page = ConfluencePage {
id: "456".to_string(),
title: "Untitled".to_string(),
metadata: Some(ConfluenceMetadata {
labels: Some(ConfluenceLabelList {
results: vec![ConfluenceLabel {
prefix: "global".to_string(),
name: "unlabeled".to_string(),
}],
}),
}),
};
let config = ConfluenceSourceConfig {
base_url: "https://myco.atlassian.net".to_string(),
token_env: "CONF_TOKEN".to_string(), email_env: "CONF_EMAIL".to_string(),
label_mappings: HashMap::new(),
};
assert!(classify_page(&page, &config).is_none());
}
#[test]
fn classify_page_with_no_metadata_returns_none() {
use std::collections::HashMap;
let page = ConfluencePage {
id: "789".to_string(),
title: "Empty".to_string(),
metadata: None,
};
let config = ConfluenceSourceConfig {
base_url: "https://myco.atlassian.net".to_string(),
token_env: "CONF_TOKEN".to_string(), email_env: "CONF_EMAIL".to_string(),
label_mappings: {
let mut m = HashMap::new();
m.insert("runbook".to_string(), "devops".to_string());
m
},
};
assert!(classify_page(&page, &config).is_none());
}
#[test]
fn confluence_source_config_deserializes() {
use crate::classify::sources::SourceConfig;
let yaml = r#"
type: confluence
base_url: "https://myco.atlassian.net/wiki"
token_env: CONFLUENCE_API_TOKEN
email_env: CONFLUENCE_EMAIL
label_mappings:
runbook: devops
rfc: tech_debt_refactoring
incident: bug_fix
"#;
let cfg: SourceConfig = serde_yaml::from_str(yaml).expect("deserialize");
match cfg {
SourceConfig::Confluence(c) => {
assert_eq!(c.base_url, "https://myco.atlassian.net/wiki");
assert_eq!(c.token_env, "CONFLUENCE_API_TOKEN"); assert_eq!(c.email_env, "CONFLUENCE_EMAIL");
assert_eq!(c.label_mappings.get("runbook"), Some(&"devops".to_string()));
assert_eq!(
c.label_mappings.get("rfc"),
Some(&"tech_debt_refactoring".to_string())
);
}
other => panic!("expected Confluence variant, got {other:?}"),
}
}
#[test]
fn confluence_source_config_unknown_field_is_rejected() {
let yaml = r#"
type: confluence
base_url: "https://myco.atlassian.net/wiki"
token_env: CONFLUENCE_API_TOKEN
email_env: CONFLUENCE_EMAIL
api_token_env: CONFLUENCE_API_TOKEN
label_mappings: {}
"#;
let result: Result<crate::classify::sources::SourceConfig, _> = serde_yaml::from_str(yaml);
assert!(result.is_err(), "unknown field must be rejected");
}
#[tokio::test]
async fn fetch_and_classify_via_wiremock() {
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
let server = MockServer::start().await;
let body = serde_json::json!({
"id": "99",
"title": "Deployment Runbook",
"metadata": {
"labels": {
"results": [
{"prefix": "global", "name": "runbook"},
{"prefix": "team", "name": "platform"}
]
}
}
});
Mock::given(method("GET"))
.and(path("/wiki/rest/api/content/99"))
.respond_with(ResponseTemplate::new(200).set_body_json(body))
.mount(&server)
.await;
unsafe { std::env::set_var("CONF_TOKEN_WT", "test-token") }; unsafe { std::env::set_var("CONF_EMAIL_WT", "test@example.com") };
use std::collections::HashMap;
let config = ConfluenceSourceConfig {
base_url: server.uri(),
token_env: "CONF_TOKEN_WT".to_string(), email_env: "CONF_EMAIL_WT".to_string(),
label_mappings: {
let mut m = HashMap::new();
m.insert("runbook".to_string(), "devops".to_string());
m
},
};
let client = reqwest::Client::new();
let page = fetch_page(&client, &config, 99, Some(&server.uri()))
.await
.expect("fetch should succeed");
let signal = classify_page(&page, &config).expect("should classify");
assert_eq!(signal.category, "devops");
assert!((signal.confidence - CONFLUENCE_CONFIDENCE).abs() < f64::EPSILON);
unsafe { std::env::remove_var("CONF_TOKEN_WT") };
unsafe { std::env::remove_var("CONF_EMAIL_WT") };
}
}