Skip to main content

normalize_package_index/index/
docker.rs

1//! Docker container registry index fetcher.
2//!
3//! Fetches image metadata from container registries.
4//!
5//! ## API Strategy
6//! - **fetch**: `hub.docker.com/v2/repositories/{namespace}/{name}` - Docker Hub API
7//! - **fetch_versions**: `hub.docker.com/v2/repositories/{namespace}/{name}/tags`
8//! - **search**: `hub.docker.com/v2/search/repositories?query=`
9//! - **fetch_all**: Not supported (millions of images)
10//!
11//! ## Multi-registry Support
12//! ```rust,ignore
13//! use normalize_packages::index::docker::{Docker, DockerRegistry};
14//!
15//! // All registries (default)
16//! let all = Docker::all();
17//!
18//! // Docker Hub only
19//! let hub = Docker::hub();
20//!
21//! // GitHub Container Registry
22//! let ghcr = Docker::ghcr();
23//! ```
24
25use super::{IndexError, PackageIndex, PackageMeta, VersionMeta};
26use std::collections::HashMap;
27
28/// Available container registries.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
30pub enum DockerRegistry {
31    /// Docker Hub - the main public registry
32    DockerHub,
33    /// GitHub Container Registry (ghcr.io)
34    Ghcr,
35    /// Quay.io (Red Hat)
36    Quay,
37    /// Google Container Registry (gcr.io)
38    Gcr,
39}
40
41impl DockerRegistry {
42    /// Get the registry name for tagging.
43    pub fn name(&self) -> &'static str {
44        match self {
45            Self::DockerHub => "docker-hub",
46            Self::Ghcr => "ghcr",
47            Self::Quay => "quay",
48            Self::Gcr => "gcr",
49        }
50    }
51
52    /// Get the registry prefix used in image names.
53    pub fn prefix(&self) -> &'static str {
54        match self {
55            Self::DockerHub => "",
56            Self::Ghcr => "ghcr.io/",
57            Self::Quay => "quay.io/",
58            Self::Gcr => "gcr.io/",
59        }
60    }
61
62    /// All available registries.
63    pub fn all() -> &'static [DockerRegistry] {
64        &[Self::DockerHub, Self::Ghcr, Self::Quay, Self::Gcr]
65    }
66
67    /// Docker Hub only.
68    pub fn docker_hub() -> &'static [DockerRegistry] {
69        &[Self::DockerHub]
70    }
71
72    /// GitHub Container Registry only.
73    pub fn ghcr() -> &'static [DockerRegistry] {
74        &[Self::Ghcr]
75    }
76
77    /// Cloud-native registries (Quay + GCR).
78    pub fn cloud() -> &'static [DockerRegistry] {
79        &[Self::Quay, Self::Gcr]
80    }
81}
82
83/// Docker container registry fetcher with configurable registries.
84pub struct Docker {
85    registries: Vec<DockerRegistry>,
86}
87
88impl Docker {
89    /// Create a fetcher with all registries.
90    pub fn all() -> Self {
91        Self {
92            registries: DockerRegistry::all().to_vec(),
93        }
94    }
95
96    /// Create a fetcher with Docker Hub only.
97    pub fn hub() -> Self {
98        Self {
99            registries: DockerRegistry::docker_hub().to_vec(),
100        }
101    }
102
103    /// Create a fetcher with GitHub Container Registry only.
104    pub fn ghcr() -> Self {
105        Self {
106            registries: DockerRegistry::ghcr().to_vec(),
107        }
108    }
109
110    /// Create a fetcher with cloud registries (Quay + GCR).
111    pub fn cloud() -> Self {
112        Self {
113            registries: DockerRegistry::cloud().to_vec(),
114        }
115    }
116
117    /// Create a fetcher with custom registry selection.
118    pub fn with_registries(registries: &[DockerRegistry]) -> Self {
119        Self {
120            registries: registries.to_vec(),
121        }
122    }
123
124    /// Detect which registry an image name refers to.
125    fn detect_registry(name: &str) -> (DockerRegistry, String) {
126        if name.starts_with("ghcr.io/") {
127            (
128                DockerRegistry::Ghcr,
129                name.trim_start_matches("ghcr.io/").to_string(),
130            )
131        } else if name.starts_with("quay.io/") {
132            (
133                DockerRegistry::Quay,
134                name.trim_start_matches("quay.io/").to_string(),
135            )
136        } else if name.starts_with("gcr.io/") {
137            (
138                DockerRegistry::Gcr,
139                name.trim_start_matches("gcr.io/").to_string(),
140            )
141        } else {
142            (DockerRegistry::DockerHub, name.to_string())
143        }
144    }
145
146    /// Fetch from Docker Hub.
147    fn fetch_from_dockerhub(name: &str) -> Result<(PackageMeta, DockerRegistry), IndexError> {
148        let (namespace, repo) = if name.contains('/') {
149            let parts: Vec<&str> = name.splitn(2, '/').collect();
150            (parts[0], parts[1])
151        } else {
152            ("library", name)
153        };
154
155        let url = format!(
156            "https://hub.docker.com/v2/repositories/{}/{}/",
157            namespace, repo
158        );
159        let response: serde_json::Value = ureq::get(&url)
160            .call()
161            .map_err(|_| IndexError::NotFound(name.to_string()))?
162            .into_json()?;
163
164        // Get latest tag info
165        let tags_url = format!(
166            "https://hub.docker.com/v2/repositories/{}/{}/tags?page_size=1&ordering=-last_updated",
167            namespace, repo
168        );
169        let tags: serde_json::Value = ureq::get(&tags_url)
170            .call()
171            .map_err(|_| IndexError::NotFound(name.to_string()))?
172            .into_json()?;
173
174        let latest_tag = tags["results"]
175            .as_array()
176            .and_then(|arr| arr.first())
177            .and_then(|t| t["name"].as_str())
178            .unwrap_or("latest");
179
180        let keywords: Vec<String> = response["categories"]
181            .as_array()
182            .map(|arr| {
183                arr.iter()
184                    .filter_map(|c| c["slug"].as_str().map(String::from))
185                    .collect()
186            })
187            .unwrap_or_default();
188
189        let mut extra = HashMap::new();
190        extra.insert(
191            "source_repo".to_string(),
192            serde_json::Value::String("docker-hub".to_string()),
193        );
194
195        Ok((
196            PackageMeta {
197                name: format!(
198                    "{}/{}",
199                    namespace,
200                    response["name"].as_str().unwrap_or(repo)
201                ),
202                version: latest_tag.to_string(),
203                description: response["description"].as_str().map(String::from),
204                homepage: None,
205                repository: None,
206                license: None,
207                binaries: Vec::new(),
208                keywords,
209                maintainers: vec![
210                    response["namespace"]
211                        .as_str()
212                        .unwrap_or(namespace)
213                        .to_string(),
214                ],
215                published: response["last_updated"].as_str().map(String::from),
216                downloads: response["pull_count"].as_u64(),
217                archive_url: None,
218                checksum: None,
219                extra,
220            },
221            DockerRegistry::DockerHub,
222        ))
223    }
224
225    /// Fetch tags from Docker Hub.
226    fn fetch_versions_dockerhub(name: &str) -> Result<Vec<VersionMeta>, IndexError> {
227        let (namespace, repo) = if name.contains('/') {
228            let parts: Vec<&str> = name.splitn(2, '/').collect();
229            (parts[0], parts[1])
230        } else {
231            ("library", name)
232        };
233
234        let url = format!(
235            "https://hub.docker.com/v2/repositories/{}/{}/tags?page_size=50&ordering=-last_updated",
236            namespace, repo
237        );
238        let response: serde_json::Value = ureq::get(&url)
239            .call()
240            .map_err(|_| IndexError::NotFound(name.to_string()))?
241            .into_json()?;
242
243        let tags = response["results"]
244            .as_array()
245            .ok_or_else(|| IndexError::NotFound(name.to_string()))?;
246
247        Ok(tags
248            .iter()
249            .filter_map(|t| {
250                Some(VersionMeta {
251                    version: format!("{} (docker-hub)", t["name"].as_str()?),
252                    released: t["last_updated"].as_str().map(String::from),
253                    yanked: false,
254                })
255            })
256            .collect())
257    }
258
259    /// Fetch all tags with full metadata from Docker Hub.
260    fn fetch_all_versions_dockerhub(name: &str) -> Result<Vec<PackageMeta>, IndexError> {
261        let (namespace, repo) = if name.contains('/') {
262            let parts: Vec<&str> = name.splitn(2, '/').collect();
263            (parts[0], parts[1])
264        } else {
265            ("library", name)
266        };
267
268        // Get repository info for shared metadata
269        let repo_url = format!(
270            "https://hub.docker.com/v2/repositories/{}/{}/",
271            namespace, repo
272        );
273        let repo_info: serde_json::Value = ureq::get(&repo_url)
274            .call()
275            .map_err(|_| IndexError::NotFound(name.to_string()))?
276            .into_json()?;
277
278        let description = repo_info["description"].as_str().map(String::from);
279        let pull_count = repo_info["pull_count"].as_u64();
280
281        // Get tags with full metadata
282        let tags_url = format!(
283            "https://hub.docker.com/v2/repositories/{}/{}/tags?page_size=100&ordering=-last_updated",
284            namespace, repo
285        );
286        let response: serde_json::Value = ureq::get(&tags_url)
287            .call()
288            .map_err(|_| IndexError::NotFound(name.to_string()))?
289            .into_json()?;
290
291        let tags = response["results"]
292            .as_array()
293            .ok_or_else(|| IndexError::NotFound(name.to_string()))?;
294
295        let full_name = format!("{}/{}", namespace, repo);
296
297        Ok(tags
298            .iter()
299            .filter_map(|t| {
300                let tag_name = t["name"].as_str()?;
301                let mut extra = HashMap::new();
302
303                extra.insert(
304                    "source_repo".to_string(),
305                    serde_json::Value::String("docker-hub".to_string()),
306                );
307
308                // Digest
309                if let Some(digest) = t["digest"].as_str() {
310                    extra.insert(
311                        "digest".to_string(),
312                        serde_json::Value::String(digest.to_string()),
313                    );
314                }
315
316                // Full size in bytes
317                if let Some(size) = t["full_size"].as_u64() {
318                    extra.insert("size".to_string(), serde_json::Value::Number(size.into()));
319                }
320
321                // Architecture info from images array
322                if let Some(images) = t["images"].as_array() {
323                    let archs: Vec<serde_json::Value> = images
324                        .iter()
325                        .filter_map(|img| {
326                            img["architecture"]
327                                .as_str()
328                                .map(|a| serde_json::Value::String(a.to_string()))
329                        })
330                        .collect();
331                    if !archs.is_empty() {
332                        extra.insert("architectures".to_string(), serde_json::Value::Array(archs));
333                    }
334
335                    // OS info
336                    let os_list: Vec<serde_json::Value> = images
337                        .iter()
338                        .filter_map(|img| {
339                            img["os"]
340                                .as_str()
341                                .map(|o| serde_json::Value::String(o.to_string()))
342                        })
343                        .collect();
344                    if !os_list.is_empty() {
345                        // Dedupe
346                        let unique: std::collections::HashSet<_> =
347                            os_list.iter().filter_map(|v| v.as_str()).collect();
348                        let unique_vec: Vec<serde_json::Value> = unique
349                            .into_iter()
350                            .map(|s| serde_json::Value::String(s.to_string()))
351                            .collect();
352                        extra.insert("os".to_string(), serde_json::Value::Array(unique_vec));
353                    }
354                }
355
356                Some(PackageMeta {
357                    name: full_name.clone(),
358                    version: tag_name.to_string(),
359                    description: description.clone(),
360                    homepage: None,
361                    repository: None,
362                    license: None,
363                    binaries: Vec::new(),
364                    keywords: Vec::new(),
365                    maintainers: vec![namespace.to_string()],
366                    published: t["last_updated"].as_str().map(String::from),
367                    downloads: pull_count,
368                    archive_url: None,
369                    checksum: t["digest"].as_str().map(String::from),
370                    extra,
371                })
372            })
373            .collect())
374    }
375
376    /// Fetch from Quay.io.
377    fn fetch_from_quay(name: &str) -> Result<(PackageMeta, DockerRegistry), IndexError> {
378        let (namespace, repo) = if name.contains('/') {
379            let parts: Vec<&str> = name.splitn(2, '/').collect();
380            (parts[0], parts[1])
381        } else {
382            return Err(IndexError::Parse(
383                "Quay.io requires namespace/repo format".into(),
384            ));
385        };
386
387        let url = format!("https://quay.io/api/v1/repository/{}/{}", namespace, repo);
388        let response: serde_json::Value = ureq::get(&url)
389            .call()
390            .map_err(|_| IndexError::NotFound(name.to_string()))?
391            .into_json()?;
392
393        let latest_tag = response["tags"]
394            .as_object()
395            .and_then(|tags| tags.keys().next())
396            .map(|s| s.as_str())
397            .unwrap_or("latest");
398
399        let mut extra = HashMap::new();
400        extra.insert(
401            "source_repo".to_string(),
402            serde_json::Value::String("quay".to_string()),
403        );
404
405        Ok((
406            PackageMeta {
407                name: format!("quay.io/{}/{}", namespace, repo),
408                version: latest_tag.to_string(),
409                description: response["description"].as_str().map(String::from),
410                homepage: None,
411                repository: None,
412                license: None,
413                binaries: Vec::new(),
414                keywords: Vec::new(),
415                maintainers: vec![namespace.to_string()],
416                published: None,
417                downloads: None,
418                archive_url: None,
419                checksum: None,
420                extra,
421            },
422            DockerRegistry::Quay,
423        ))
424    }
425
426    /// Fetch tags from Quay.io.
427    fn fetch_versions_quay(name: &str) -> Result<Vec<VersionMeta>, IndexError> {
428        let (namespace, repo) = if name.contains('/') {
429            let parts: Vec<&str> = name.splitn(2, '/').collect();
430            (parts[0], parts[1])
431        } else {
432            return Err(IndexError::Parse(
433                "Quay.io requires namespace/repo format".into(),
434            ));
435        };
436
437        let url = format!(
438            "https://quay.io/api/v1/repository/{}/{}/tag/",
439            namespace, repo
440        );
441        let response: serde_json::Value = ureq::get(&url)
442            .call()
443            .map_err(|_| IndexError::NotFound(name.to_string()))?
444            .into_json()?;
445
446        let tags = response["tags"]
447            .as_array()
448            .ok_or_else(|| IndexError::NotFound(name.to_string()))?;
449
450        Ok(tags
451            .iter()
452            .filter_map(|t| {
453                Some(VersionMeta {
454                    version: format!("{} (quay)", t["name"].as_str()?),
455                    released: t["last_modified"].as_str().map(String::from),
456                    yanked: false,
457                })
458            })
459            .collect())
460    }
461
462    /// Search Docker Hub.
463    fn search_dockerhub(query: &str) -> Result<Vec<PackageMeta>, IndexError> {
464        let url = format!(
465            "https://hub.docker.com/v2/search/repositories?query={}&page_size=25",
466            query
467        );
468        let response: serde_json::Value = ureq::get(&url).call()?.into_json()?;
469
470        let results = response["results"]
471            .as_array()
472            .ok_or_else(|| IndexError::Parse("Invalid search response".into()))?;
473
474        let mut extra = HashMap::new();
475        extra.insert(
476            "source_repo".to_string(),
477            serde_json::Value::String("docker-hub".to_string()),
478        );
479
480        Ok(results
481            .iter()
482            .filter_map(|img| {
483                let name = if img["is_official"].as_bool().unwrap_or(false) {
484                    format!("library/{}", img["repo_name"].as_str()?)
485                } else {
486                    img["repo_name"].as_str()?.to_string()
487                };
488
489                Some(PackageMeta {
490                    name,
491                    version: "latest".to_string(),
492                    description: img["short_description"].as_str().map(String::from),
493                    homepage: None,
494                    repository: None,
495                    license: None,
496                    binaries: Vec::new(),
497                    keywords: Vec::new(),
498                    maintainers: Vec::new(),
499                    published: None,
500                    downloads: img["pull_count"].as_u64(),
501                    archive_url: None,
502                    checksum: None,
503                    extra: extra.clone(),
504                })
505            })
506            .collect())
507    }
508
509    /// Search Quay.io.
510    fn search_quay(query: &str) -> Result<Vec<PackageMeta>, IndexError> {
511        let url = format!("https://quay.io/api/v1/find/repositories?query={}", query);
512        let response: serde_json::Value = ureq::get(&url).call()?.into_json()?;
513
514        let results = response["results"]
515            .as_array()
516            .ok_or_else(|| IndexError::Parse("Invalid search response".into()))?;
517
518        let mut extra = HashMap::new();
519        extra.insert(
520            "source_repo".to_string(),
521            serde_json::Value::String("quay".to_string()),
522        );
523
524        Ok(results
525            .iter()
526            .filter_map(|repo| {
527                let namespace = repo["namespace"]["name"].as_str()?;
528                let name = repo["name"].as_str()?;
529
530                Some(PackageMeta {
531                    name: format!("quay.io/{}/{}", namespace, name),
532                    version: "latest".to_string(),
533                    description: repo["description"].as_str().map(String::from),
534                    homepage: None,
535                    repository: None,
536                    license: None,
537                    binaries: Vec::new(),
538                    keywords: Vec::new(),
539                    maintainers: vec![namespace.to_string()],
540                    published: None,
541                    downloads: None,
542                    archive_url: None,
543                    checksum: None,
544                    extra: extra.clone(),
545                })
546            })
547            .collect())
548    }
549}
550
551impl PackageIndex for Docker {
552    fn ecosystem(&self) -> &'static str {
553        "docker"
554    }
555
556    fn display_name(&self) -> &'static str {
557        "Container Registries (Docker)"
558    }
559
560    fn fetch(&self, name: &str) -> Result<PackageMeta, IndexError> {
561        let (detected_registry, clean_name) = Self::detect_registry(name);
562
563        // If the detected registry is in our configured list, use it
564        if self.registries.contains(&detected_registry) {
565            return match detected_registry {
566                DockerRegistry::DockerHub => {
567                    Self::fetch_from_dockerhub(&clean_name).map(|(p, _)| p)
568                }
569                DockerRegistry::Quay => Self::fetch_from_quay(&clean_name).map(|(p, _)| p),
570                DockerRegistry::Ghcr | DockerRegistry::Gcr => {
571                    // GHCR and GCR require authentication for most operations
572                    // Return basic metadata from what we know
573                    let mut extra = HashMap::new();
574                    extra.insert(
575                        "source_repo".to_string(),
576                        serde_json::Value::String(detected_registry.name().to_string()),
577                    );
578                    Ok(PackageMeta {
579                        name: format!("{}{}", detected_registry.prefix(), clean_name),
580                        version: "latest".to_string(),
581                        description: None,
582                        homepage: None,
583                        repository: None,
584                        license: None,
585                        binaries: Vec::new(),
586                        keywords: Vec::new(),
587                        maintainers: Vec::new(),
588                        published: None,
589                        downloads: None,
590                        archive_url: None,
591                        checksum: None,
592                        extra,
593                    })
594                }
595            };
596        }
597
598        // Try each configured registry
599        for &registry in &self.registries {
600            let result = match registry {
601                DockerRegistry::DockerHub => Self::fetch_from_dockerhub(name),
602                DockerRegistry::Quay => Self::fetch_from_quay(name),
603                DockerRegistry::Ghcr | DockerRegistry::Gcr => continue, // Skip auth-required registries
604            };
605
606            if let Ok((pkg, _)) = result {
607                return Ok(pkg);
608            }
609        }
610
611        Err(IndexError::NotFound(name.to_string()))
612    }
613
614    fn fetch_versions(&self, name: &str) -> Result<Vec<VersionMeta>, IndexError> {
615        let (detected_registry, clean_name) = Self::detect_registry(name);
616        let mut all_versions = Vec::new();
617
618        // If the detected registry is in our configured list, use it
619        if self.registries.contains(&detected_registry) {
620            let versions = match detected_registry {
621                DockerRegistry::DockerHub => Self::fetch_versions_dockerhub(&clean_name),
622                DockerRegistry::Quay => Self::fetch_versions_quay(&clean_name),
623                DockerRegistry::Ghcr | DockerRegistry::Gcr => {
624                    // These require authentication
625                    Err(IndexError::Parse("Registry requires authentication".into()))
626                }
627            };
628
629            if let Ok(v) = versions {
630                return Ok(v);
631            }
632        }
633
634        // Try each configured registry
635        for &registry in &self.registries {
636            let result = match registry {
637                DockerRegistry::DockerHub => Self::fetch_versions_dockerhub(name),
638                DockerRegistry::Quay => Self::fetch_versions_quay(name),
639                DockerRegistry::Ghcr | DockerRegistry::Gcr => continue,
640            };
641
642            if let Ok(versions) = result {
643                all_versions.extend(versions);
644            }
645        }
646
647        if all_versions.is_empty() {
648            return Err(IndexError::NotFound(name.to_string()));
649        }
650
651        Ok(all_versions)
652    }
653
654    fn fetch_all_versions(&self, name: &str) -> Result<Vec<PackageMeta>, IndexError> {
655        let (detected_registry, clean_name) = Self::detect_registry(name);
656
657        // If the detected registry is in our configured list, use it
658        if self.registries.contains(&detected_registry) {
659            return match detected_registry {
660                DockerRegistry::DockerHub => Self::fetch_all_versions_dockerhub(&clean_name),
661                DockerRegistry::Quay | DockerRegistry::Ghcr | DockerRegistry::Gcr => {
662                    // Fall back to default implementation for other registries
663                    let versions = self.fetch_versions(name)?;
664                    Ok(versions
665                        .into_iter()
666                        .map(|v| PackageMeta {
667                            name: name.to_string(),
668                            version: v.version,
669                            published: v.released,
670                            ..Default::default()
671                        })
672                        .collect())
673                }
674            };
675        }
676
677        // Try Docker Hub if configured
678        if self.registries.contains(&DockerRegistry::DockerHub) {
679            if let Ok(versions) = Self::fetch_all_versions_dockerhub(name) {
680                return Ok(versions);
681            }
682        }
683
684        Err(IndexError::NotFound(name.to_string()))
685    }
686
687    fn search(&self, query: &str) -> Result<Vec<PackageMeta>, IndexError> {
688        let mut results = Vec::new();
689
690        // Search Docker Hub if configured
691        if self.registries.contains(&DockerRegistry::DockerHub) {
692            if let Ok(packages) = Self::search_dockerhub(query) {
693                results.extend(packages);
694            }
695        }
696
697        // Search Quay if configured
698        if self.registries.contains(&DockerRegistry::Quay) {
699            if let Ok(packages) = Self::search_quay(query) {
700                results.extend(packages);
701            }
702        }
703
704        // GHCR and GCR don't have public search APIs
705
706        Ok(results)
707    }
708}