Skip to main content

bv_index/
git.rs

1use std::fs;
2use std::path::PathBuf;
3use std::process::{Command, Stdio};
4
5use bv_core::data::DataManifest;
6use bv_core::error::{BvError, Result};
7use bv_core::manifest::Manifest;
8use semver::{Version, VersionReq};
9
10use crate::backend::{IndexBackend, ToolSummary};
11
12pub struct GitIndex {
13    pub url: String,
14    pub local_path: PathBuf,
15}
16
17impl GitIndex {
18    pub fn new(url: impl Into<String>, local_path: impl Into<PathBuf>) -> Self {
19        Self {
20            url: url.into(),
21            local_path: local_path.into(),
22        }
23    }
24
25    /// Refresh only if the local clone is older than `ttl`.
26    /// Returns `true` when an actual network fetch was performed.
27    pub fn refresh_if_stale(&self, ttl: std::time::Duration) -> Result<bool> {
28        let stamp = self.local_path.join(".bv-refresh");
29        let is_fresh = stamp
30            .metadata()
31            .and_then(|m| m.modified())
32            .ok()
33            .and_then(|t| t.elapsed().ok())
34            .map(|elapsed| elapsed < ttl)
35            .unwrap_or(false);
36
37        if is_fresh {
38            return Ok(false);
39        }
40
41        self.git_refresh()?;
42        self.touch_stamp();
43        Ok(true)
44    }
45
46    /// True when the local clone exists and has been fetched at least once.
47    pub fn is_available(&self) -> bool {
48        self.local_path.join(".bv-refresh").exists() || self.local_path.join(".git").exists()
49    }
50
51    /// Path to the local clone of this registry.
52    pub fn local_path(&self) -> &std::path::Path {
53        &self.local_path
54    }
55
56    fn git_refresh(&self) -> Result<()> {
57        if self.local_path.exists() {
58            // Re-point the remote if the cached clone is from a different URL.
59            // This handles users who had the old default registry cloned.
60            self.maybe_update_remote()?;
61
62            let out = Command::new("git")
63                .args([
64                    "-C",
65                    &self.local_path.to_string_lossy(),
66                    "pull",
67                    "--ff-only",
68                ])
69                .env("GIT_TERMINAL_PROMPT", "0")
70                .stdin(Stdio::null())
71                .stdout(Stdio::null())
72                .stderr(Stdio::piped())
73                .output()?;
74            if !out.status.success() {
75                let msg = String::from_utf8_lossy(&out.stderr);
76                return Err(BvError::IndexError(format!(
77                    "git pull failed in {}: {}",
78                    self.local_path.display(),
79                    msg.trim()
80                )));
81            }
82        } else {
83            if let Some(parent) = self.local_path.parent() {
84                fs::create_dir_all(parent)?;
85            }
86            let out = Command::new("git")
87                .args([
88                    "clone",
89                    "--depth",
90                    "1",
91                    &self.url,
92                    &self.local_path.to_string_lossy(),
93                ])
94                .env("GIT_TERMINAL_PROMPT", "0")
95                .stdin(Stdio::null())
96                .stdout(Stdio::null())
97                .stderr(Stdio::piped())
98                .output()?;
99            if !out.status.success() {
100                let msg = String::from_utf8_lossy(&out.stderr);
101                return Err(BvError::IndexError(format!(
102                    "git clone failed for '{}': {}",
103                    self.url,
104                    msg.trim()
105                )));
106            }
107        }
108        Ok(())
109    }
110
111    /// If the existing clone's remote URL doesn't match `self.url`, update it.
112    fn maybe_update_remote(&self) -> Result<()> {
113        let out = Command::new("git")
114            .args([
115                "-C",
116                &self.local_path.to_string_lossy(),
117                "remote",
118                "get-url",
119                "origin",
120            ])
121            .stdin(Stdio::null())
122            .stdout(Stdio::piped())
123            .stderr(Stdio::null())
124            .output()?;
125
126        let current = String::from_utf8_lossy(&out.stdout).trim().to_string();
127        if current != self.url {
128            Command::new("git")
129                .args([
130                    "-C",
131                    &self.local_path.to_string_lossy(),
132                    "remote",
133                    "set-url",
134                    "origin",
135                    &self.url,
136                ])
137                .stdin(Stdio::null())
138                .stdout(Stdio::null())
139                .stderr(Stdio::null())
140                .status()?;
141        }
142        Ok(())
143    }
144
145    fn touch_stamp(&self) {
146        let stamp = self.local_path.join(".bv-refresh");
147        let _ = fs::write(&stamp, "");
148    }
149}
150
151impl IndexBackend for GitIndex {
152    fn name(&self) -> &str {
153        "git"
154    }
155
156    fn refresh(&self) -> Result<()> {
157        self.git_refresh()?;
158        self.touch_stamp();
159        Ok(())
160    }
161
162    fn get_manifest(&self, tool: &str, version: &VersionReq) -> Result<Manifest> {
163        let tool_dir = self.local_path.join("tools").join(tool);
164        if !tool_dir.exists() {
165            return Err(BvError::IndexError(format!(
166                "tool '{tool}' not found in registry"
167            )));
168        }
169
170        let versions = self.list_versions(tool)?;
171        if versions.is_empty() {
172            return Err(BvError::IndexError(format!(
173                "no versions of '{tool}' found in registry"
174            )));
175        }
176
177        let best = versions
178            .iter()
179            .filter(|v| version.matches(v))
180            .max()
181            .ok_or_else(|| {
182                BvError::IndexError(format!(
183                    "no version of '{tool}' satisfies '{version}' (available: {})",
184                    versions
185                        .iter()
186                        .map(|v| v.to_string())
187                        .collect::<Vec<_>>()
188                        .join(", ")
189                ))
190            })?;
191
192        let manifest_path = tool_dir.join(format!("{best}.toml"));
193        let s = fs::read_to_string(&manifest_path).map_err(|e| {
194            BvError::IndexError(format!("could not read manifest for '{tool}@{best}': {e}"))
195        })?;
196
197        Manifest::from_toml_str(&s)
198    }
199
200    fn list_versions(&self, tool: &str) -> Result<Vec<Version>> {
201        let tool_dir = self.local_path.join("tools").join(tool);
202        if !tool_dir.exists() {
203            return Err(BvError::IndexError(format!(
204                "tool '{tool}' not found in registry"
205            )));
206        }
207
208        let mut versions = Vec::new();
209        let mut dropped: Vec<String> = Vec::new();
210        for entry in fs::read_dir(&tool_dir)? {
211            let entry = entry?;
212            let path = entry.path();
213            if path.extension().is_some_and(|e| e == "toml")
214                && let Some(stem) = path.file_stem().and_then(|s| s.to_str())
215            {
216                match stem.parse::<Version>() {
217                    Ok(v) => versions.push(v),
218                    Err(_) => dropped.push(stem.to_string()),
219                }
220            }
221        }
222
223        if !dropped.is_empty() {
224            tracing::warn!(
225                tool = %tool,
226                dropped = ?dropped,
227                "ignoring tool manifest files with non-semver names (expected MAJOR.MINOR.PATCH; \
228                 calver like 2024.01.0 is not valid semver)"
229            );
230        }
231
232        versions.sort();
233        Ok(versions)
234    }
235
236    fn list_tools(&self) -> Result<Vec<ToolSummary>> {
237        let tools_dir = self.local_path.join("tools");
238        if !tools_dir.exists() {
239            return Ok(vec![]);
240        }
241
242        let mut tools = Vec::new();
243        for entry in fs::read_dir(&tools_dir)? {
244            let entry = entry?;
245            if !entry.file_type()?.is_dir() {
246                continue;
247            }
248
249            let id = entry.file_name().to_string_lossy().to_string();
250            let versions = self.list_versions(&id).unwrap_or_default();
251
252            let latest_manifest = versions.last().and_then(|v| {
253                let p = tools_dir.join(&id).join(format!("{v}.toml"));
254                fs::read_to_string(p)
255                    .ok()
256                    .and_then(|s| Manifest::from_toml_str(&s).ok())
257            });
258
259            let description = latest_manifest
260                .as_ref()
261                .and_then(|m| m.tool.description.clone());
262            let tier = latest_manifest
263                .as_ref()
264                .map(|m| m.tool.tier.clone())
265                .unwrap_or_default();
266            let deprecated = latest_manifest
267                .as_ref()
268                .map(|m| m.tool.deprecated)
269                .unwrap_or(false);
270            let input_types = latest_manifest
271                .as_ref()
272                .map(|m| m.tool.inputs.iter().map(|i| i.r#type.to_string()).collect())
273                .unwrap_or_default();
274            let output_types = latest_manifest
275                .as_ref()
276                .map(|m| {
277                    m.tool
278                        .outputs
279                        .iter()
280                        .map(|o| o.r#type.to_string())
281                        .collect()
282                })
283                .unwrap_or_default();
284
285            tools.push(ToolSummary {
286                id,
287                latest_version: versions.last().map(|v| v.to_string()).unwrap_or_default(),
288                description,
289                tier,
290                deprecated,
291                input_types,
292                output_types,
293            });
294        }
295
296        tools.sort_by(|a, b| a.id.cmp(&b.id));
297        Ok(tools)
298    }
299
300    fn get_data_manifest(&self, dataset: &str, version: Option<&str>) -> Result<DataManifest> {
301        let data_dir = self.local_path.join("data").join(dataset);
302        if !data_dir.exists() {
303            return Err(BvError::IndexError(format!(
304                "dataset '{dataset}' not found in registry"
305            )));
306        }
307
308        let ver = if let Some(v) = version {
309            v.to_string()
310        } else {
311            let mut versions = self.list_data_versions(dataset)?;
312            versions.sort();
313            versions.into_iter().last().ok_or_else(|| {
314                BvError::IndexError(format!("no versions of '{dataset}' found in registry"))
315            })?
316        };
317
318        let manifest_path = data_dir.join(format!("{ver}.toml"));
319        let s = fs::read_to_string(&manifest_path).map_err(|e| {
320            BvError::IndexError(format!(
321                "could not read data manifest for '{dataset}@{ver}': {e}"
322            ))
323        })?;
324
325        DataManifest::from_toml_str(&s)
326    }
327
328    fn list_data_versions(&self, dataset: &str) -> Result<Vec<String>> {
329        let data_dir = self.local_path.join("data").join(dataset);
330        if !data_dir.exists() {
331            return Err(BvError::IndexError(format!(
332                "dataset '{dataset}' not found in registry"
333            )));
334        }
335
336        let mut versions = Vec::new();
337        let mut dropped: Vec<String> = Vec::new();
338        for entry in fs::read_dir(&data_dir)? {
339            let entry = entry?;
340            let path = entry.path();
341            if path.extension().is_some_and(|e| e == "toml")
342                && let Some(stem) = path.file_stem().and_then(|s| s.to_str())
343            {
344                if stem.parse::<Version>().is_ok() {
345                    versions.push(stem.to_string());
346                } else {
347                    dropped.push(stem.to_string());
348                }
349            }
350        }
351
352        if !dropped.is_empty() {
353            tracing::warn!(
354                dataset = %dataset,
355                dropped = ?dropped,
356                "ignoring dataset manifest files with non-semver names (expected MAJOR.MINOR.PATCH; \
357                 calver like 2024.01.0 is not valid semver)"
358            );
359        }
360
361        versions.sort();
362        Ok(versions)
363    }
364
365    fn list_datasets(&self) -> Result<Vec<String>> {
366        let data_dir = self.local_path.join("data");
367        if !data_dir.exists() {
368            return Ok(Vec::new());
369        }
370        let mut ids = Vec::new();
371        for entry in fs::read_dir(&data_dir)? {
372            let entry = entry?;
373            let path = entry.path();
374            if path.is_dir()
375                && let Some(name) = path.file_name().and_then(|s| s.to_str())
376            {
377                ids.push(name.to_string());
378            }
379        }
380        ids.sort();
381        Ok(ids)
382    }
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388    use tempfile::tempdir;
389
390    /// git pull/clone must never hang waiting for credentials.
391    /// Verify GIT_TERMINAL_PROMPT=0 is set by trying to clone a nonexistent URL
392    /// and confirming the process exits (rather than blocking on stdin).
393    #[test]
394    fn git_refresh_does_not_prompt_for_credentials() {
395        let tmp = tempdir().unwrap();
396        let index = GitIndex::new(
397            "https://github.com/tejasprabhune/definitely-does-not-exist-bv-test",
398            tmp.path().join("clone"),
399        );
400        // Must complete quickly (not hang on a credential prompt).
401        let result = index.git_refresh();
402        assert!(
403            result.is_err(),
404            "expected clone of nonexistent repo to fail"
405        );
406        let msg = result.unwrap_err().to_string();
407        assert!(
408            !msg.is_empty(),
409            "expected a non-empty error message, not a silent hang"
410        );
411    }
412
413    #[test]
414    fn maybe_update_remote_fixes_stale_url() {
415        let tmp = tempdir().unwrap();
416        let repo_path = tmp.path().join("repo");
417
418        // Initialise a bare local repo so we have a valid .git dir.
419        Command::new("git")
420            .args(["init", repo_path.to_str().unwrap()])
421            .stdin(Stdio::null())
422            .stdout(Stdio::null())
423            .stderr(Stdio::null())
424            .status()
425            .unwrap();
426        Command::new("git")
427            .args([
428                "-C",
429                repo_path.to_str().unwrap(),
430                "remote",
431                "add",
432                "origin",
433                "https://github.com/old-org/old-repo",
434            ])
435            .stdin(Stdio::null())
436            .stdout(Stdio::null())
437            .stderr(Stdio::null())
438            .status()
439            .unwrap();
440
441        let index = GitIndex::new(
442            "https://github.com/tejasprabhune/bv-registry",
443            repo_path.clone(),
444        );
445        index.maybe_update_remote().unwrap();
446
447        let out = Command::new("git")
448            .args([
449                "-C",
450                repo_path.to_str().unwrap(),
451                "remote",
452                "get-url",
453                "origin",
454            ])
455            .stdin(Stdio::null())
456            .stdout(Stdio::piped())
457            .stderr(Stdio::null())
458            .output()
459            .unwrap();
460        let url = String::from_utf8_lossy(&out.stdout).trim().to_string();
461        assert_eq!(url, "https://github.com/tejasprabhune/bv-registry");
462    }
463
464    #[test]
465    fn list_versions_returns_only_valid_semver() {
466        let tmp = tempdir().unwrap();
467        let tool_dir = tmp.path().join("tools").join("tmalign");
468        fs::create_dir_all(&tool_dir).unwrap();
469        fs::write(tool_dir.join("1.2.3.toml"), "").unwrap();
470        fs::write(tool_dir.join("20240303.toml"), "").unwrap();
471        fs::write(tool_dir.join("2024.01.0.toml"), "").unwrap();
472
473        let index = GitIndex::new("unused", tmp.path().to_path_buf());
474        let versions = index.list_versions("tmalign").unwrap();
475
476        assert_eq!(versions.len(), 1);
477        assert_eq!(versions[0], Version::new(1, 2, 3));
478    }
479
480    #[test]
481    fn list_data_versions_returns_only_valid_semver() {
482        let tmp = tempdir().unwrap();
483        let data_dir = tmp.path().join("data").join("uniref50");
484        fs::create_dir_all(&data_dir).unwrap();
485        fs::write(data_dir.join("0.1.0.toml"), "").unwrap();
486        fs::write(data_dir.join("2024.01.0.toml"), "").unwrap();
487
488        let index = GitIndex::new("unused", tmp.path().to_path_buf());
489        let versions = index.list_data_versions("uniref50").unwrap();
490
491        assert_eq!(versions, vec!["0.1.0".to_string()]);
492    }
493}