synaptic_loaders/
github.rs1use async_trait::async_trait;
2use base64::Engine;
3use serde_json::Value;
4use std::collections::HashMap;
5use synaptic_core::{Document, Loader, SynapticError};
6
7pub struct GitHubLoader {
9 client: reqwest::Client,
10 owner: String,
11 repo: String,
12 paths: Vec<String>,
13 token: Option<String>,
14 branch: Option<String>,
15 extensions: Vec<String>,
16}
17
18impl GitHubLoader {
19 pub fn new(owner: impl Into<String>, repo: impl Into<String>, paths: Vec<String>) -> Self {
20 Self {
21 client: reqwest::Client::new(),
22 owner: owner.into(),
23 repo: repo.into(),
24 paths,
25 token: None,
26 branch: None,
27 extensions: vec![],
28 }
29 }
30
31 pub fn with_token(mut self, token: impl Into<String>) -> Self {
32 self.token = Some(token.into());
33 self
34 }
35
36 pub fn with_branch(mut self, branch: impl Into<String>) -> Self {
37 self.branch = Some(branch.into());
38 self
39 }
40
41 pub fn with_extensions(mut self, exts: Vec<String>) -> Self {
42 self.extensions = exts;
43 self
44 }
45
46 fn matches_extension(&self, path: &str) -> bool {
47 if self.extensions.is_empty() {
48 return true;
49 }
50 self.extensions
51 .iter()
52 .any(|ext| path.ends_with(ext.as_str()))
53 }
54
55 async fn fetch_path(&self, path: &str, docs: &mut Vec<Document>) -> Result<(), SynapticError> {
56 let mut url = format!(
57 "https://api.github.com/repos/{}/{}/contents/{}",
58 self.owner, self.repo, path
59 );
60 if let Some(ref branch) = self.branch {
61 url.push_str(&format!("?ref={}", branch));
62 }
63 let mut req = self
64 .client
65 .get(&url)
66 .header("Accept", "application/vnd.github.v3+json")
67 .header("User-Agent", "synaptic-github-loader");
68 if let Some(ref token) = self.token {
69 req = req.header("Authorization", format!("token {}", token));
70 }
71 let resp = req
72 .send()
73 .await
74 .map_err(|e| SynapticError::Loader(format!("GitHub fetch: {e}")))?;
75 let body: Value = resp
76 .json()
77 .await
78 .map_err(|e| SynapticError::Loader(format!("GitHub parse: {e}")))?;
79
80 if body.is_array() {
81 for item in body.as_array().unwrap() {
83 let item_type = item["type"].as_str().unwrap_or("");
84 let item_path = item["path"].as_str().unwrap_or("").to_string();
85 match item_type {
86 "file" if self.matches_extension(&item_path) => {
87 Box::pin(self.fetch_path(&item_path, docs)).await?;
88 }
89 "dir" => {
90 Box::pin(self.fetch_path(&item_path, docs)).await?;
91 }
92 _ => {}
93 }
94 }
95 } else if let Some("file") = body["type"].as_str() {
96 let encoded = body["content"].as_str().unwrap_or("").replace('\n', "");
97 let content = base64::engine::general_purpose::STANDARD
98 .decode(&encoded)
99 .ok()
100 .and_then(|b| String::from_utf8(b).ok())
101 .unwrap_or_default();
102 let file_path = body["path"].as_str().unwrap_or(path).to_string();
103 let mut metadata = HashMap::new();
104 metadata.insert(
105 "source".to_string(),
106 Value::String(format!("github:{}/{}/{}", self.owner, self.repo, file_path)),
107 );
108 metadata.insert(
109 "sha".to_string(),
110 Value::String(body["sha"].as_str().unwrap_or("").to_string()),
111 );
112 if let Some(ref branch) = self.branch {
113 metadata.insert("branch".to_string(), Value::String(branch.clone()));
114 }
115 docs.push(Document {
116 id: file_path.clone(),
117 content,
118 metadata,
119 });
120 }
121 Ok(())
122 }
123}
124
125#[async_trait]
126impl Loader for GitHubLoader {
127 async fn load(&self) -> Result<Vec<Document>, SynapticError> {
128 let mut documents = Vec::new();
129 for path in &self.paths {
130 self.fetch_path(path, &mut documents).await?;
131 }
132 Ok(documents)
133 }
134}