Skip to main content

synaptic_loaders/
notion.rs

1use async_trait::async_trait;
2use serde_json::Value;
3use std::collections::HashMap;
4use synaptic_core::{Document, Loader, SynapticError};
5
6/// Loader for Notion pages via the Notion API.
7pub struct NotionLoader {
8    client: reqwest::Client,
9    token: String,
10    page_ids: Vec<String>,
11}
12
13impl NotionLoader {
14    pub fn new(token: impl Into<String>, page_ids: Vec<String>) -> Self {
15        Self {
16            client: reqwest::Client::new(),
17            token: token.into(),
18            page_ids,
19        }
20    }
21
22    async fn fetch_page_title(&self, page_id: &str) -> Result<String, SynapticError> {
23        let url = format!("https://api.notion.com/v1/pages/{}", page_id);
24        let resp = self
25            .client
26            .get(&url)
27            .header("Authorization", format!("Bearer {}", self.token))
28            .header("Notion-Version", "2022-06-28")
29            .send()
30            .await
31            .map_err(|e| SynapticError::Loader(format!("Notion fetch page: {e}")))?;
32        let body: Value = resp
33            .json()
34            .await
35            .map_err(|e| SynapticError::Loader(format!("Notion parse page: {e}")))?;
36
37        let title = body["properties"]["title"]["title"][0]["plain_text"]
38            .as_str()
39            .or_else(|| body["properties"]["Name"]["title"][0]["plain_text"].as_str())
40            .unwrap_or("Untitled")
41            .to_string();
42        Ok(title)
43    }
44
45    async fn fetch_blocks(&self, block_id: &str) -> Result<Vec<Value>, SynapticError> {
46        let url = format!(
47            "https://api.notion.com/v1/blocks/{}/children?page_size=100",
48            block_id
49        );
50        let resp = self
51            .client
52            .get(&url)
53            .header("Authorization", format!("Bearer {}", self.token))
54            .header("Notion-Version", "2022-06-28")
55            .send()
56            .await
57            .map_err(|e| SynapticError::Loader(format!("Notion fetch blocks: {e}")))?;
58        let body: Value = resp
59            .json()
60            .await
61            .map_err(|e| SynapticError::Loader(format!("Notion parse blocks: {e}")))?;
62
63        Ok(body["results"].as_array().cloned().unwrap_or_default())
64    }
65
66    fn extract_rich_text(rich_text: &Value) -> String {
67        rich_text
68            .as_array()
69            .map(|arr| {
70                arr.iter()
71                    .filter_map(|t| t["plain_text"].as_str())
72                    .collect::<Vec<_>>()
73                    .join("")
74            })
75            .unwrap_or_default()
76    }
77
78    fn block_to_text(block: &Value) -> Option<String> {
79        let block_type = block["type"].as_str()?;
80        match block_type {
81            "paragraph" => Some(Self::extract_rich_text(&block["paragraph"]["rich_text"])),
82            "heading_1" => Some(format!(
83                "# {}",
84                Self::extract_rich_text(&block["heading_1"]["rich_text"])
85            )),
86            "heading_2" => Some(format!(
87                "## {}",
88                Self::extract_rich_text(&block["heading_2"]["rich_text"])
89            )),
90            "heading_3" => Some(format!(
91                "### {}",
92                Self::extract_rich_text(&block["heading_3"]["rich_text"])
93            )),
94            "bulleted_list_item" => Some(format!(
95                "- {}",
96                Self::extract_rich_text(&block["bulleted_list_item"]["rich_text"])
97            )),
98            "numbered_list_item" => Some(format!(
99                "1. {}",
100                Self::extract_rich_text(&block["numbered_list_item"]["rich_text"])
101            )),
102            "quote" => Some(format!(
103                "> {}",
104                Self::extract_rich_text(&block["quote"]["rich_text"])
105            )),
106            "callout" => Some(Self::extract_rich_text(&block["callout"]["rich_text"])),
107            "code" => {
108                let lang = block["code"]["language"].as_str().unwrap_or("");
109                let code = Self::extract_rich_text(&block["code"]["rich_text"]);
110                Some(format!("```{}\n{}\n```", lang, code))
111            }
112            _ => None,
113        }
114    }
115}
116
117#[async_trait]
118impl Loader for NotionLoader {
119    async fn load(&self) -> Result<Vec<Document>, SynapticError> {
120        let mut documents = Vec::new();
121        for page_id in &self.page_ids {
122            let title = self
123                .fetch_page_title(page_id)
124                .await
125                .unwrap_or_else(|_| "Untitled".to_string());
126            let blocks = self.fetch_blocks(page_id).await?;
127            let content = blocks
128                .iter()
129                .filter_map(Self::block_to_text)
130                .filter(|s| !s.trim().is_empty())
131                .collect::<Vec<_>>()
132                .join("\n\n");
133            let mut metadata = HashMap::new();
134            metadata.insert(
135                "source".to_string(),
136                Value::String(format!("notion:{}", page_id)),
137            );
138            metadata.insert("title".to_string(), Value::String(title));
139            documents.push(Document {
140                id: page_id.clone(),
141                content,
142                metadata,
143            });
144        }
145        Ok(documents)
146    }
147}