synaptic_loaders/
web_loader.rs1use std::collections::HashMap;
2
3use crate::Document;
4use async_trait::async_trait;
5use serde_json::Value;
6use synaptic_core::SynapticError;
7
8use crate::Loader;
9
10pub struct WebBaseLoader {
16 url: String,
17}
18
19impl WebBaseLoader {
20 pub fn new(url: impl Into<String>) -> Self {
21 Self { url: url.into() }
22 }
23}
24
25#[async_trait]
26impl Loader for WebBaseLoader {
27 async fn load(&self) -> Result<Vec<Document>, SynapticError> {
28 let response = reqwest::get(&self.url).await.map_err(|e| {
29 SynapticError::Loader(format!("HTTP request failed for {}: {e}", self.url))
30 })?;
31
32 let content_type = response
33 .headers()
34 .get(reqwest::header::CONTENT_TYPE)
35 .and_then(|v| v.to_str().ok())
36 .unwrap_or("unknown")
37 .to_string();
38
39 let text = response
40 .text()
41 .await
42 .map_err(|e| SynapticError::Loader(format!("failed to read response body: {e}")))?;
43
44 let mut metadata = HashMap::new();
45 metadata.insert("source".to_string(), Value::String(self.url.clone()));
46 metadata.insert("content_type".to_string(), Value::String(content_type));
47
48 Ok(vec![Document::with_metadata(
49 self.url.clone(),
50 text,
51 metadata,
52 )])
53 }
54}