Skip to main content

synaptic_loaders/
github.rs

1use async_trait::async_trait;
2use base64::Engine;
3use serde_json::Value;
4use std::collections::HashMap;
5use synaptic_core::{Document, Loader, SynapticError};
6
7/// Loader for GitHub repository files via the GitHub API.
8pub struct GitHubLoader {
9    client: reqwest::Client,
10    owner: String,
11    repo: String,
12    paths: Vec<String>,
13    token: Option<String>,
14    branch: Option<String>,
15    extensions: Vec<String>,
16}
17
18impl GitHubLoader {
19    pub fn new(owner: impl Into<String>, repo: impl Into<String>, paths: Vec<String>) -> Self {
20        Self {
21            client: reqwest::Client::new(),
22            owner: owner.into(),
23            repo: repo.into(),
24            paths,
25            token: None,
26            branch: None,
27            extensions: vec![],
28        }
29    }
30
31    pub fn with_token(mut self, token: impl Into<String>) -> Self {
32        self.token = Some(token.into());
33        self
34    }
35
36    pub fn with_branch(mut self, branch: impl Into<String>) -> Self {
37        self.branch = Some(branch.into());
38        self
39    }
40
41    pub fn with_extensions(mut self, exts: Vec<String>) -> Self {
42        self.extensions = exts;
43        self
44    }
45
46    fn matches_extension(&self, path: &str) -> bool {
47        if self.extensions.is_empty() {
48            return true;
49        }
50        self.extensions
51            .iter()
52            .any(|ext| path.ends_with(ext.as_str()))
53    }
54
55    async fn fetch_path(&self, path: &str, docs: &mut Vec<Document>) -> Result<(), SynapticError> {
56        let mut url = format!(
57            "https://api.github.com/repos/{}/{}/contents/{}",
58            self.owner, self.repo, path
59        );
60        if let Some(ref branch) = self.branch {
61            url.push_str(&format!("?ref={}", branch));
62        }
63        let mut req = self
64            .client
65            .get(&url)
66            .header("Accept", "application/vnd.github.v3+json")
67            .header("User-Agent", "synaptic-github-loader");
68        if let Some(ref token) = self.token {
69            req = req.header("Authorization", format!("token {}", token));
70        }
71        let resp = req
72            .send()
73            .await
74            .map_err(|e| SynapticError::Loader(format!("GitHub fetch: {e}")))?;
75        let body: Value = resp
76            .json()
77            .await
78            .map_err(|e| SynapticError::Loader(format!("GitHub parse: {e}")))?;
79
80        if body.is_array() {
81            // Directory — recurse
82            for item in body.as_array().unwrap() {
83                let item_type = item["type"].as_str().unwrap_or("");
84                let item_path = item["path"].as_str().unwrap_or("").to_string();
85                match item_type {
86                    "file" if self.matches_extension(&item_path) => {
87                        Box::pin(self.fetch_path(&item_path, docs)).await?;
88                    }
89                    "dir" => {
90                        Box::pin(self.fetch_path(&item_path, docs)).await?;
91                    }
92                    _ => {}
93                }
94            }
95        } else if let Some("file") = body["type"].as_str() {
96            let encoded = body["content"].as_str().unwrap_or("").replace('\n', "");
97            let content = base64::engine::general_purpose::STANDARD
98                .decode(&encoded)
99                .ok()
100                .and_then(|b| String::from_utf8(b).ok())
101                .unwrap_or_default();
102            let file_path = body["path"].as_str().unwrap_or(path).to_string();
103            let mut metadata = HashMap::new();
104            metadata.insert(
105                "source".to_string(),
106                Value::String(format!("github:{}/{}/{}", self.owner, self.repo, file_path)),
107            );
108            metadata.insert(
109                "sha".to_string(),
110                Value::String(body["sha"].as_str().unwrap_or("").to_string()),
111            );
112            if let Some(ref branch) = self.branch {
113                metadata.insert("branch".to_string(), Value::String(branch.clone()));
114            }
115            docs.push(Document {
116                id: file_path.clone(),
117                content,
118                metadata,
119            });
120        }
121        Ok(())
122    }
123}
124
125#[async_trait]
126impl Loader for GitHubLoader {
127    async fn load(&self) -> Result<Vec<Document>, SynapticError> {
128        let mut documents = Vec::new();
129        for path in &self.paths {
130            self.fetch_path(path, &mut documents).await?;
131        }
132        Ok(documents)
133    }
134}