Skip to main content

synaptic_confluence/
lib.rs

1use async_trait::async_trait;
2use base64::Engine;
3use serde_json::Value;
4use std::collections::HashMap;
5use synaptic_core::{Document, Loader, SynapticError};
6
7#[derive(Debug, Clone)]
8pub struct ConfluenceConfig {
9    pub domain: String,
10    pub email: String,
11    pub api_token: String,
12    pub space_key: Option<String>,
13    pub page_ids: Vec<String>,
14}
15
16impl ConfluenceConfig {
17    pub fn new(
18        domain: impl Into<String>,
19        email: impl Into<String>,
20        api_token: impl Into<String>,
21    ) -> Self {
22        Self {
23            domain: domain.into(),
24            email: email.into(),
25            api_token: api_token.into(),
26            space_key: None,
27            page_ids: vec![],
28        }
29    }
30
31    pub fn with_space_key(mut self, key: impl Into<String>) -> Self {
32        self.space_key = Some(key.into());
33        self
34    }
35
36    pub fn with_page_ids(mut self, ids: Vec<String>) -> Self {
37        self.page_ids = ids;
38        self
39    }
40}
41
42pub struct ConfluenceLoader {
43    config: ConfluenceConfig,
44    client: reqwest::Client,
45}
46
47impl ConfluenceLoader {
48    pub fn new(config: ConfluenceConfig) -> Self {
49        Self {
50            config,
51            client: reqwest::Client::new(),
52        }
53    }
54
55    fn auth_header(&self) -> String {
56        let credentials = format!("{}:{}", self.config.email, self.config.api_token);
57        format!(
58            "Basic {}",
59            base64::engine::general_purpose::STANDARD.encode(credentials.as_bytes())
60        )
61    }
62
63    async fn fetch_page(&self, page_id: &str) -> Result<Document, SynapticError> {
64        let url = format!(
65            "https://{}/wiki/api/v2/pages/{}?body-format=storage",
66            self.config.domain, page_id
67        );
68        let resp = self
69            .client
70            .get(&url)
71            .header("Authorization", self.auth_header())
72            .header("Accept", "application/json")
73            .send()
74            .await
75            .map_err(|e| SynapticError::Loader(format!("Confluence fetch page: {e}")))?;
76        let body: Value = resp
77            .json()
78            .await
79            .map_err(|e| SynapticError::Loader(format!("Confluence parse page: {e}")))?;
80
81        let title = body["title"].as_str().unwrap_or("").to_string();
82        let content_html = body["body"]["storage"]["value"].as_str().unwrap_or("");
83        let content = strip_html_tags(content_html);
84
85        let mut metadata = HashMap::new();
86        metadata.insert("title".to_string(), Value::String(title));
87        metadata.insert(
88            "source".to_string(),
89            Value::String(format!("confluence:{}", page_id)),
90        );
91        if let Some(space_id) = body["spaceId"].as_str() {
92            metadata.insert("space_id".to_string(), Value::String(space_id.to_string()));
93        }
94
95        Ok(Document {
96            id: page_id.to_string(),
97            content,
98            metadata,
99        })
100    }
101
102    async fn fetch_space_pages(&self, space_key: &str) -> Result<Vec<String>, SynapticError> {
103        let url = format!(
104            "https://{}/wiki/api/v2/spaces/{}/pages?limit=50",
105            self.config.domain, space_key
106        );
107        let resp = self
108            .client
109            .get(&url)
110            .header("Authorization", self.auth_header())
111            .header("Accept", "application/json")
112            .send()
113            .await
114            .map_err(|e| SynapticError::Loader(format!("Confluence fetch space: {e}")))?;
115        let body: Value = resp
116            .json()
117            .await
118            .map_err(|e| SynapticError::Loader(format!("Confluence parse space: {e}")))?;
119
120        let ids = body["results"]
121            .as_array()
122            .unwrap_or(&vec![])
123            .iter()
124            .filter_map(|p| p["id"].as_str().map(|s| s.to_string()))
125            .collect();
126        Ok(ids)
127    }
128}
129
130fn strip_html_tags(html: &str) -> String {
131    let mut result = String::new();
132    let mut in_tag = false;
133    for c in html.chars() {
134        match c {
135            '<' => in_tag = true,
136            '>' => in_tag = false,
137            _ if !in_tag => result.push(c),
138            _ => {}
139        }
140    }
141    result.split_whitespace().collect::<Vec<_>>().join(" ")
142}
143
144#[async_trait]
145impl Loader for ConfluenceLoader {
146    async fn load(&self) -> Result<Vec<Document>, SynapticError> {
147        let mut page_ids = self.config.page_ids.clone();
148        if let Some(ref space_key) = self.config.space_key {
149            let space_ids = self.fetch_space_pages(space_key).await?;
150            page_ids.extend(space_ids);
151        }
152        let mut documents = Vec::new();
153        for page_id in &page_ids {
154            match self.fetch_page(page_id).await {
155                Ok(doc) => documents.push(doc),
156                Err(e) => eprintln!("Warning: failed to load Confluence page {}: {}", page_id, e),
157            }
158        }
159        Ok(documents)
160    }
161}