Skip to main content

chub_core/
fetch.rs

1use std::fs;
2use std::path::PathBuf;
3
4use crate::cache::{
5    get_source_data_dir, get_source_dir, get_source_registry_path, get_source_search_index_path,
6    read_cached_doc, read_meta, save_cached_doc, save_source_registry,
7    should_fetch_remote_registry, write_meta,
8};
9use crate::config::{load_config, SourceConfig};
10use crate::error::{Error, Result};
11
12const FETCH_TIMEOUT_SECS: u64 = 30;
13
14/// Fetch registry for a single remote source.
15pub async fn fetch_remote_registry(source: &SourceConfig, force: bool) -> Result<()> {
16    if !force && !should_fetch_remote_registry(&source.name) {
17        return Ok(());
18    }
19
20    let url = format!(
21        "{}/registry.json",
22        source.url.as_deref().unwrap_or_default()
23    );
24
25    let client = reqwest::Client::builder()
26        .timeout(std::time::Duration::from_secs(FETCH_TIMEOUT_SECS))
27        .build()
28        .map_err(|e| Error::Config(format!("HTTP client error: {}", e)))?;
29
30    let res = client.get(&url).send().await.map_err(|e| {
31        Error::Config(format!(
32            "Failed to fetch registry from {}: {}",
33            source.name, e
34        ))
35    })?;
36
37    if !res.status().is_success() {
38        return Err(Error::Config(format!(
39            "Failed to fetch registry from {}: {} {}",
40            source.name,
41            res.status().as_u16(),
42            res.status().canonical_reason().unwrap_or("")
43        )));
44    }
45
46    let data = res
47        .text()
48        .await
49        .map_err(|e| Error::Config(format!("Failed to read registry body: {}", e)))?;
50
51    save_source_registry(&source.name, &data);
52    crate::cache::touch_source_meta(&source.name);
53    Ok(())
54}
55
56/// Fetch registries for all configured sources.
57pub async fn fetch_all_registries(force: bool) -> Vec<FetchError> {
58    let config = load_config();
59    let mut errors = Vec::new();
60
61    for source in &config.sources {
62        if source.path.is_some() {
63            continue;
64        }
65        if let Err(e) = fetch_remote_registry(source, force).await {
66            errors.push(FetchError {
67                source: source.name.clone(),
68                error: e.to_string(),
69            });
70        }
71    }
72
73    errors
74}
75
76#[derive(Debug, Clone, serde::Serialize)]
77pub struct FetchError {
78    pub source: String,
79    pub error: String,
80}
81
82/// Download full bundle for a remote source.
83pub async fn fetch_full_bundle(source_name: &str) -> Result<()> {
84    let config = load_config();
85    let source = config
86        .sources
87        .iter()
88        .find(|s| s.name == source_name)
89        .ok_or_else(|| Error::Config(format!("Source \"{}\" not found", source_name)))?;
90
91    if source.path.is_some() {
92        return Err(Error::Config(format!(
93            "Source \"{}\" is not a remote source.",
94            source_name
95        )));
96    }
97
98    let url = format!(
99        "{}/bundle.tar.gz",
100        source.url.as_deref().unwrap_or_default()
101    );
102
103    let client = reqwest::Client::builder()
104        .timeout(std::time::Duration::from_secs(FETCH_TIMEOUT_SECS))
105        .build()
106        .map_err(|e| Error::Config(format!("HTTP client error: {}", e)))?;
107
108    let res = client.get(&url).send().await.map_err(|e| {
109        Error::Config(format!(
110            "Failed to fetch bundle from {}: {}",
111            source_name, e
112        ))
113    })?;
114
115    if !res.status().is_success() {
116        return Err(Error::Config(format!(
117            "Failed to fetch bundle from {}: {} {}",
118            source_name,
119            res.status().as_u16(),
120            res.status().canonical_reason().unwrap_or("")
121        )));
122    }
123
124    let bytes = res
125        .bytes()
126        .await
127        .map_err(|e| Error::Config(format!("Failed to read bundle body: {}", e)))?;
128
129    let source_dir = get_source_dir(source_name);
130    fs::create_dir_all(&source_dir)?;
131
132    let tmp_path = source_dir.join("bundle.tar.gz");
133    fs::write(&tmp_path, &bytes)?;
134
135    // Extract tar.gz
136    let data_dir = get_source_data_dir(source_name);
137    fs::create_dir_all(&data_dir)?;
138
139    let file = fs::File::open(&tmp_path)?;
140    let gz = flate2::read::GzDecoder::new(file);
141    let mut archive = tar::Archive::new(gz);
142    archive.unpack(&data_dir)?;
143
144    // Copy registry.json from extracted bundle if present
145    let extracted_registry = data_dir.join("registry.json");
146    if extracted_registry.exists() {
147        let reg_data = fs::read_to_string(&extracted_registry)?;
148        fs::write(get_source_registry_path(source_name), &reg_data)?;
149    }
150
151    // Copy search-index.json from extracted bundle if present
152    let extracted_search_index = data_dir.join("search-index.json");
153    if extracted_search_index.exists() {
154        let idx_data = fs::read_to_string(&extracted_search_index)?;
155        fs::write(get_source_search_index_path(source_name), &idx_data)?;
156    } else {
157        let _ = fs::remove_file(get_source_search_index_path(source_name));
158    }
159
160    // Update meta
161    let mut meta = read_meta(source_name);
162    meta.last_updated = Some(
163        std::time::SystemTime::now()
164            .duration_since(std::time::UNIX_EPOCH)
165            .unwrap_or_default()
166            .as_millis() as u64,
167    );
168    meta.full_bundle = true;
169    write_meta(source_name, &meta);
170
171    // Clean up temp file
172    let _ = fs::remove_file(&tmp_path);
173
174    Ok(())
175}
176
177/// Fetch a single doc. Source must have name + (url or path).
178pub async fn fetch_doc(source: &SourceConfig, doc_path: &str) -> Result<String> {
179    // Local source: read directly
180    if let Some(ref local_path) = source.path {
181        let full_path = PathBuf::from(local_path).join(doc_path);
182        if !full_path.exists() {
183            return Err(Error::NotFound(format!(
184                "File not found: {}",
185                full_path.display()
186            )));
187        }
188        return Ok(fs::read_to_string(&full_path)?);
189    }
190
191    // Remote source: check cache first
192    if let Some(content) = read_cached_doc(&source.name, doc_path) {
193        return Ok(content);
194    }
195
196    // Fetch from CDN
197    let url = format!("{}/{}", source.url.as_deref().unwrap_or_default(), doc_path);
198
199    let client = reqwest::Client::builder()
200        .timeout(std::time::Duration::from_secs(FETCH_TIMEOUT_SECS))
201        .build()
202        .map_err(|e| Error::Config(format!("HTTP client error: {}", e)))?;
203
204    let res = client.get(&url).send().await.map_err(|e| {
205        Error::Config(format!(
206            "Failed to fetch {} from {}: {}",
207            doc_path, source.name, e
208        ))
209    })?;
210
211    if !res.status().is_success() {
212        return Err(Error::Config(format!(
213            "Failed to fetch {} from {}: {} {}",
214            doc_path,
215            source.name,
216            res.status().as_u16(),
217            res.status().canonical_reason().unwrap_or("")
218        )));
219    }
220
221    let content = res
222        .text()
223        .await
224        .map_err(|e| Error::Config(format!("Failed to read body: {}", e)))?;
225
226    // Cache locally
227    save_cached_doc(&source.name, doc_path, &content);
228
229    Ok(content)
230}
231
232/// Fetch all files in an entry directory. Returns vec of (filename, content).
233pub async fn fetch_doc_full(
234    source: &SourceConfig,
235    base_path: &str,
236    files: &[String],
237) -> Result<Vec<(String, String)>> {
238    let mut results = Vec::new();
239    for file in files {
240        let file_path = format!("{}/{}", base_path, file);
241        let content = fetch_doc(source, &file_path).await?;
242        results.push((file.clone(), content));
243    }
244    Ok(results)
245}
246
247/// Ensure at least one registry is available.
248pub async fn ensure_registry() -> Result<()> {
249    if crate::cache::has_any_registry() {
250        // Auto-refresh stale remote registries (best-effort)
251        let config = load_config();
252        for source in &config.sources {
253            if source.path.is_some() {
254                continue;
255            }
256            if should_fetch_remote_registry(&source.name) {
257                let _ = fetch_remote_registry(source, false).await;
258            }
259        }
260        return Ok(());
261    }
262
263    // No registries at all — must download from remote
264    let errors = fetch_all_registries(true).await;
265    if !errors.is_empty() && !crate::cache::has_any_registry() {
266        return Err(Error::Config(format!(
267            "Failed to fetch registries: {}",
268            errors
269                .iter()
270                .map(|e| format!("{}: {}", e.source, e.error))
271                .collect::<Vec<_>>()
272                .join("; ")
273        )));
274    }
275
276    Ok(())
277}