synaptic_confluence/
lib.rs1use async_trait::async_trait;
2use base64::Engine;
3use serde_json::Value;
4use std::collections::HashMap;
5use synaptic_core::{Document, Loader, SynapticError};
6
7#[derive(Debug, Clone)]
8pub struct ConfluenceConfig {
9 pub domain: String,
10 pub email: String,
11 pub api_token: String,
12 pub space_key: Option<String>,
13 pub page_ids: Vec<String>,
14}
15
16impl ConfluenceConfig {
17 pub fn new(
18 domain: impl Into<String>,
19 email: impl Into<String>,
20 api_token: impl Into<String>,
21 ) -> Self {
22 Self {
23 domain: domain.into(),
24 email: email.into(),
25 api_token: api_token.into(),
26 space_key: None,
27 page_ids: vec![],
28 }
29 }
30
31 pub fn with_space_key(mut self, key: impl Into<String>) -> Self {
32 self.space_key = Some(key.into());
33 self
34 }
35
36 pub fn with_page_ids(mut self, ids: Vec<String>) -> Self {
37 self.page_ids = ids;
38 self
39 }
40}
41
42pub struct ConfluenceLoader {
43 config: ConfluenceConfig,
44 client: reqwest::Client,
45}
46
47impl ConfluenceLoader {
48 pub fn new(config: ConfluenceConfig) -> Self {
49 Self {
50 config,
51 client: reqwest::Client::new(),
52 }
53 }
54
55 fn auth_header(&self) -> String {
56 let credentials = format!("{}:{}", self.config.email, self.config.api_token);
57 format!(
58 "Basic {}",
59 base64::engine::general_purpose::STANDARD.encode(credentials.as_bytes())
60 )
61 }
62
63 async fn fetch_page(&self, page_id: &str) -> Result<Document, SynapticError> {
64 let url = format!(
65 "https://{}/wiki/api/v2/pages/{}?body-format=storage",
66 self.config.domain, page_id
67 );
68 let resp = self
69 .client
70 .get(&url)
71 .header("Authorization", self.auth_header())
72 .header("Accept", "application/json")
73 .send()
74 .await
75 .map_err(|e| SynapticError::Loader(format!("Confluence fetch page: {e}")))?;
76 let body: Value = resp
77 .json()
78 .await
79 .map_err(|e| SynapticError::Loader(format!("Confluence parse page: {e}")))?;
80
81 let title = body["title"].as_str().unwrap_or("").to_string();
82 let content_html = body["body"]["storage"]["value"].as_str().unwrap_or("");
83 let content = strip_html_tags(content_html);
84
85 let mut metadata = HashMap::new();
86 metadata.insert("title".to_string(), Value::String(title));
87 metadata.insert(
88 "source".to_string(),
89 Value::String(format!("confluence:{}", page_id)),
90 );
91 if let Some(space_id) = body["spaceId"].as_str() {
92 metadata.insert("space_id".to_string(), Value::String(space_id.to_string()));
93 }
94
95 Ok(Document {
96 id: page_id.to_string(),
97 content,
98 metadata,
99 })
100 }
101
102 async fn fetch_space_pages(&self, space_key: &str) -> Result<Vec<String>, SynapticError> {
103 let url = format!(
104 "https://{}/wiki/api/v2/spaces/{}/pages?limit=50",
105 self.config.domain, space_key
106 );
107 let resp = self
108 .client
109 .get(&url)
110 .header("Authorization", self.auth_header())
111 .header("Accept", "application/json")
112 .send()
113 .await
114 .map_err(|e| SynapticError::Loader(format!("Confluence fetch space: {e}")))?;
115 let body: Value = resp
116 .json()
117 .await
118 .map_err(|e| SynapticError::Loader(format!("Confluence parse space: {e}")))?;
119
120 let ids = body["results"]
121 .as_array()
122 .unwrap_or(&vec![])
123 .iter()
124 .filter_map(|p| p["id"].as_str().map(|s| s.to_string()))
125 .collect();
126 Ok(ids)
127 }
128}
129
130fn strip_html_tags(html: &str) -> String {
131 let mut result = String::new();
132 let mut in_tag = false;
133 for c in html.chars() {
134 match c {
135 '<' => in_tag = true,
136 '>' => in_tag = false,
137 _ if !in_tag => result.push(c),
138 _ => {}
139 }
140 }
141 result.split_whitespace().collect::<Vec<_>>().join(" ")
142}
143
144#[async_trait]
145impl Loader for ConfluenceLoader {
146 async fn load(&self) -> Result<Vec<Document>, SynapticError> {
147 let mut page_ids = self.config.page_ids.clone();
148 if let Some(ref space_key) = self.config.space_key {
149 let space_ids = self.fetch_space_pages(space_key).await?;
150 page_ids.extend(space_ids);
151 }
152 let mut documents = Vec::new();
153 for page_id in &page_ids {
154 match self.fetch_page(page_id).await {
155 Ok(doc) => documents.push(doc),
156 Err(e) => eprintln!("Warning: failed to load Confluence page {}: {}", page_id, e),
157 }
158 }
159 Ok(documents)
160 }
161}