synaptic_loaders/
notion.rs1use async_trait::async_trait;
2use serde_json::Value;
3use std::collections::HashMap;
4use synaptic_core::{Document, Loader, SynapticError};
5
6pub struct NotionLoader {
8 client: reqwest::Client,
9 token: String,
10 page_ids: Vec<String>,
11}
12
13impl NotionLoader {
14 pub fn new(token: impl Into<String>, page_ids: Vec<String>) -> Self {
15 Self {
16 client: reqwest::Client::new(),
17 token: token.into(),
18 page_ids,
19 }
20 }
21
22 async fn fetch_page_title(&self, page_id: &str) -> Result<String, SynapticError> {
23 let url = format!("https://api.notion.com/v1/pages/{}", page_id);
24 let resp = self
25 .client
26 .get(&url)
27 .header("Authorization", format!("Bearer {}", self.token))
28 .header("Notion-Version", "2022-06-28")
29 .send()
30 .await
31 .map_err(|e| SynapticError::Loader(format!("Notion fetch page: {e}")))?;
32 let body: Value = resp
33 .json()
34 .await
35 .map_err(|e| SynapticError::Loader(format!("Notion parse page: {e}")))?;
36
37 let title = body["properties"]["title"]["title"][0]["plain_text"]
38 .as_str()
39 .or_else(|| body["properties"]["Name"]["title"][0]["plain_text"].as_str())
40 .unwrap_or("Untitled")
41 .to_string();
42 Ok(title)
43 }
44
45 async fn fetch_blocks(&self, block_id: &str) -> Result<Vec<Value>, SynapticError> {
46 let url = format!(
47 "https://api.notion.com/v1/blocks/{}/children?page_size=100",
48 block_id
49 );
50 let resp = self
51 .client
52 .get(&url)
53 .header("Authorization", format!("Bearer {}", self.token))
54 .header("Notion-Version", "2022-06-28")
55 .send()
56 .await
57 .map_err(|e| SynapticError::Loader(format!("Notion fetch blocks: {e}")))?;
58 let body: Value = resp
59 .json()
60 .await
61 .map_err(|e| SynapticError::Loader(format!("Notion parse blocks: {e}")))?;
62
63 Ok(body["results"].as_array().cloned().unwrap_or_default())
64 }
65
66 fn extract_rich_text(rich_text: &Value) -> String {
67 rich_text
68 .as_array()
69 .map(|arr| {
70 arr.iter()
71 .filter_map(|t| t["plain_text"].as_str())
72 .collect::<Vec<_>>()
73 .join("")
74 })
75 .unwrap_or_default()
76 }
77
78 fn block_to_text(block: &Value) -> Option<String> {
79 let block_type = block["type"].as_str()?;
80 match block_type {
81 "paragraph" => Some(Self::extract_rich_text(&block["paragraph"]["rich_text"])),
82 "heading_1" => Some(format!(
83 "# {}",
84 Self::extract_rich_text(&block["heading_1"]["rich_text"])
85 )),
86 "heading_2" => Some(format!(
87 "## {}",
88 Self::extract_rich_text(&block["heading_2"]["rich_text"])
89 )),
90 "heading_3" => Some(format!(
91 "### {}",
92 Self::extract_rich_text(&block["heading_3"]["rich_text"])
93 )),
94 "bulleted_list_item" => Some(format!(
95 "- {}",
96 Self::extract_rich_text(&block["bulleted_list_item"]["rich_text"])
97 )),
98 "numbered_list_item" => Some(format!(
99 "1. {}",
100 Self::extract_rich_text(&block["numbered_list_item"]["rich_text"])
101 )),
102 "quote" => Some(format!(
103 "> {}",
104 Self::extract_rich_text(&block["quote"]["rich_text"])
105 )),
106 "callout" => Some(Self::extract_rich_text(&block["callout"]["rich_text"])),
107 "code" => {
108 let lang = block["code"]["language"].as_str().unwrap_or("");
109 let code = Self::extract_rich_text(&block["code"]["rich_text"]);
110 Some(format!("```{}\n{}\n```", lang, code))
111 }
112 _ => None,
113 }
114 }
115}
116
117#[async_trait]
118impl Loader for NotionLoader {
119 async fn load(&self) -> Result<Vec<Document>, SynapticError> {
120 let mut documents = Vec::new();
121 for page_id in &self.page_ids {
122 let title = self
123 .fetch_page_title(page_id)
124 .await
125 .unwrap_or_else(|_| "Untitled".to_string());
126 let blocks = self.fetch_blocks(page_id).await?;
127 let content = blocks
128 .iter()
129 .filter_map(Self::block_to_text)
130 .filter(|s| !s.trim().is_empty())
131 .collect::<Vec<_>>()
132 .join("\n\n");
133 let mut metadata = HashMap::new();
134 metadata.insert(
135 "source".to_string(),
136 Value::String(format!("notion:{}", page_id)),
137 );
138 metadata.insert("title".to_string(), Value::String(title));
139 documents.push(Document {
140 id: page_id.clone(),
141 content,
142 metadata,
143 });
144 }
145 Ok(documents)
146 }
147}