Skip to main content

normalize_package_index/index/
docker.rs

1//! Docker container registry index fetcher.
2//!
3//! Fetches image metadata from container registries.
4//!
5//! ## API Strategy
6//! - **fetch**: `hub.docker.com/v2/repositories/{namespace}/{name}` - Docker Hub API
7//! - **fetch_versions**: `hub.docker.com/v2/repositories/{namespace}/{name}/tags`
8//! - **search**: `hub.docker.com/v2/search/repositories?query=`
9//! - **fetch_all**: Not supported (millions of images)
10//!
11//! ## Multi-registry Support
12//! ```rust,ignore
13//! use normalize_packages::index::docker::{Docker, DockerRegistry};
14//!
15//! // All registries (default)
16//! let all = Docker::all();
17//!
18//! // Docker Hub only
19//! let hub = Docker::hub();
20//!
21//! // GitHub Container Registry
22//! let ghcr = Docker::ghcr();
23//! ```
24
25use super::{IndexError, PackageIndex, PackageMeta, VersionMeta};
26use std::collections::HashMap;
27
28/// Available container registries.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
30pub enum DockerRegistry {
31    /// Docker Hub - the main public registry
32    DockerHub,
33    /// GitHub Container Registry (ghcr.io)
34    Ghcr,
35    /// Quay.io (Red Hat)
36    Quay,
37    /// Google Container Registry (gcr.io)
38    Gcr,
39}
40
41impl DockerRegistry {
42    /// Get the registry name for tagging.
43    pub fn name(&self) -> &'static str {
44        match self {
45            Self::DockerHub => "docker-hub",
46            Self::Ghcr => "ghcr",
47            Self::Quay => "quay",
48            Self::Gcr => "gcr",
49        }
50    }
51
52    /// Get the registry prefix used in image names.
53    pub fn prefix(&self) -> &'static str {
54        match self {
55            Self::DockerHub => "",
56            Self::Ghcr => "ghcr.io/",
57            Self::Quay => "quay.io/",
58            Self::Gcr => "gcr.io/",
59        }
60    }
61
62    /// All available registries.
63    pub fn all() -> &'static [DockerRegistry] {
64        &[Self::DockerHub, Self::Ghcr, Self::Quay, Self::Gcr]
65    }
66
67    /// Docker Hub only.
68    pub fn docker_hub() -> &'static [DockerRegistry] {
69        &[Self::DockerHub]
70    }
71
72    /// GitHub Container Registry only.
73    pub fn ghcr() -> &'static [DockerRegistry] {
74        &[Self::Ghcr]
75    }
76
77    /// Cloud-native registries (Quay + GCR).
78    pub fn cloud() -> &'static [DockerRegistry] {
79        &[Self::Quay, Self::Gcr]
80    }
81}
82
83struct DetectedRegistry {
84    registry: DockerRegistry,
85    clean_name: String,
86}
87
88struct FetchedPackage {
89    package: PackageMeta,
90    #[allow(dead_code)]
91    registry: DockerRegistry,
92}
93
94/// Docker container registry fetcher with configurable registries.
95pub struct Docker {
96    registries: Vec<DockerRegistry>,
97}
98
99impl Docker {
100    /// Create a fetcher with all registries.
101    pub fn all() -> Self {
102        Self {
103            registries: DockerRegistry::all().to_vec(),
104        }
105    }
106
107    /// Create a fetcher with Docker Hub only.
108    pub fn hub() -> Self {
109        Self {
110            registries: DockerRegistry::docker_hub().to_vec(),
111        }
112    }
113
114    /// Create a fetcher with GitHub Container Registry only.
115    pub fn ghcr() -> Self {
116        Self {
117            registries: DockerRegistry::ghcr().to_vec(),
118        }
119    }
120
121    /// Create a fetcher with cloud registries (Quay + GCR).
122    pub fn cloud() -> Self {
123        Self {
124            registries: DockerRegistry::cloud().to_vec(),
125        }
126    }
127
128    /// Create a fetcher with custom registry selection.
129    pub fn with_registries(registries: &[DockerRegistry]) -> Self {
130        Self {
131            registries: registries.to_vec(),
132        }
133    }
134
135    /// Detect which registry an image name refers to.
136    fn detect_registry(name: &str) -> DetectedRegistry {
137        if name.starts_with("ghcr.io/") {
138            DetectedRegistry {
139                registry: DockerRegistry::Ghcr,
140                clean_name: name.trim_start_matches("ghcr.io/").to_string(),
141            }
142        } else if name.starts_with("quay.io/") {
143            DetectedRegistry {
144                registry: DockerRegistry::Quay,
145                clean_name: name.trim_start_matches("quay.io/").to_string(),
146            }
147        } else if name.starts_with("gcr.io/") {
148            DetectedRegistry {
149                registry: DockerRegistry::Gcr,
150                clean_name: name.trim_start_matches("gcr.io/").to_string(),
151            }
152        } else {
153            DetectedRegistry {
154                registry: DockerRegistry::DockerHub,
155                clean_name: name.to_string(),
156            }
157        }
158    }
159
160    /// Fetch from Docker Hub.
161    fn fetch_from_dockerhub(name: &str) -> Result<FetchedPackage, IndexError> {
162        let (namespace, repo) = if name.contains('/') {
163            let parts: Vec<&str> = name.splitn(2, '/').collect();
164            (parts[0], parts[1])
165        } else {
166            ("library", name)
167        };
168
169        let url = format!(
170            "https://hub.docker.com/v2/repositories/{}/{}/",
171            namespace, repo
172        );
173        let response: serde_json::Value = ureq::get(&url)
174            .call()
175            .map_err(|_| IndexError::NotFound(name.to_string()))?
176            .into_json()?;
177
178        // Get latest tag info
179        let tags_url = format!(
180            "https://hub.docker.com/v2/repositories/{}/{}/tags?page_size=1&ordering=-last_updated",
181            namespace, repo
182        );
183        let tags: serde_json::Value = ureq::get(&tags_url)
184            .call()
185            .map_err(|_| IndexError::NotFound(name.to_string()))?
186            .into_json()?;
187
188        let latest_tag = tags["results"]
189            .as_array()
190            .and_then(|arr| arr.first())
191            .and_then(|t| t["name"].as_str())
192            .unwrap_or("latest");
193
194        let keywords: Vec<String> = response["categories"]
195            .as_array()
196            .map(|arr| {
197                arr.iter()
198                    .filter_map(|c| c["slug"].as_str().map(String::from))
199                    .collect()
200            })
201            .unwrap_or_default();
202
203        let mut extra = HashMap::new();
204        extra.insert(
205            "source_repo".to_string(),
206            serde_json::Value::String("docker-hub".to_string()),
207        );
208
209        Ok(FetchedPackage {
210            package: PackageMeta {
211                name: format!(
212                    "{}/{}",
213                    namespace,
214                    response["name"].as_str().unwrap_or(repo)
215                ),
216                version: latest_tag.to_string(),
217                description: response["description"].as_str().map(String::from),
218                homepage: None,
219                repository: None,
220                license: None,
221                binaries: Vec::new(),
222                keywords,
223                maintainers: vec![
224                    response["namespace"]
225                        .as_str()
226                        .unwrap_or(namespace)
227                        .to_string(),
228                ],
229                published: response["last_updated"].as_str().map(String::from),
230                downloads: response["pull_count"].as_u64(),
231                archive_url: None,
232                checksum: None,
233                extra,
234            },
235            registry: DockerRegistry::DockerHub,
236        })
237    }
238
239    /// Fetch tags from Docker Hub.
240    fn fetch_versions_dockerhub(name: &str) -> Result<Vec<VersionMeta>, IndexError> {
241        let (namespace, repo) = if name.contains('/') {
242            let parts: Vec<&str> = name.splitn(2, '/').collect();
243            (parts[0], parts[1])
244        } else {
245            ("library", name)
246        };
247
248        let url = format!(
249            "https://hub.docker.com/v2/repositories/{}/{}/tags?page_size=50&ordering=-last_updated",
250            namespace, repo
251        );
252        let response: serde_json::Value = ureq::get(&url)
253            .call()
254            .map_err(|_| IndexError::NotFound(name.to_string()))?
255            .into_json()?;
256
257        let tags = response["results"]
258            .as_array()
259            .ok_or_else(|| IndexError::NotFound(name.to_string()))?;
260
261        Ok(tags
262            .iter()
263            .filter_map(|t| {
264                Some(VersionMeta {
265                    version: format!("{} (docker-hub)", t["name"].as_str()?),
266                    released: t["last_updated"].as_str().map(String::from),
267                    yanked: false,
268                })
269            })
270            .collect())
271    }
272
273    /// Fetch all tags with full metadata from Docker Hub.
274    fn fetch_all_versions_dockerhub(name: &str) -> Result<Vec<PackageMeta>, IndexError> {
275        let (namespace, repo) = if name.contains('/') {
276            let parts: Vec<&str> = name.splitn(2, '/').collect();
277            (parts[0], parts[1])
278        } else {
279            ("library", name)
280        };
281
282        // Get repository info for shared metadata
283        let repo_url = format!(
284            "https://hub.docker.com/v2/repositories/{}/{}/",
285            namespace, repo
286        );
287        let repo_info: serde_json::Value = ureq::get(&repo_url)
288            .call()
289            .map_err(|_| IndexError::NotFound(name.to_string()))?
290            .into_json()?;
291
292        let description = repo_info["description"].as_str().map(String::from);
293        let pull_count = repo_info["pull_count"].as_u64();
294
295        // Get tags with full metadata
296        let tags_url = format!(
297            "https://hub.docker.com/v2/repositories/{}/{}/tags?page_size=100&ordering=-last_updated",
298            namespace, repo
299        );
300        let response: serde_json::Value = ureq::get(&tags_url)
301            .call()
302            .map_err(|_| IndexError::NotFound(name.to_string()))?
303            .into_json()?;
304
305        let tags = response["results"]
306            .as_array()
307            .ok_or_else(|| IndexError::NotFound(name.to_string()))?;
308
309        let full_name = format!("{}/{}", namespace, repo);
310
311        Ok(tags
312            .iter()
313            .filter_map(|t| {
314                let tag_name = t["name"].as_str()?;
315                let mut extra = HashMap::new();
316
317                extra.insert(
318                    "source_repo".to_string(),
319                    serde_json::Value::String("docker-hub".to_string()),
320                );
321
322                // Digest
323                if let Some(digest) = t["digest"].as_str() {
324                    extra.insert(
325                        "digest".to_string(),
326                        serde_json::Value::String(digest.to_string()),
327                    );
328                }
329
330                // Full size in bytes
331                if let Some(size) = t["full_size"].as_u64() {
332                    extra.insert("size".to_string(), serde_json::Value::Number(size.into()));
333                }
334
335                // Architecture info from images array
336                if let Some(images) = t["images"].as_array() {
337                    let archs: Vec<serde_json::Value> = images
338                        .iter()
339                        .filter_map(|img| {
340                            img["architecture"]
341                                .as_str()
342                                .map(|a| serde_json::Value::String(a.to_string()))
343                        })
344                        .collect();
345                    if !archs.is_empty() {
346                        extra.insert("architectures".to_string(), serde_json::Value::Array(archs));
347                    }
348
349                    // OS info
350                    let os_list: Vec<serde_json::Value> = images
351                        .iter()
352                        .filter_map(|img| {
353                            img["os"]
354                                .as_str()
355                                .map(|o| serde_json::Value::String(o.to_string()))
356                        })
357                        .collect();
358                    if !os_list.is_empty() {
359                        // Dedupe
360                        let unique: std::collections::HashSet<_> =
361                            os_list.iter().filter_map(|v| v.as_str()).collect();
362                        let unique_vec: Vec<serde_json::Value> = unique
363                            .into_iter()
364                            .map(|s| serde_json::Value::String(s.to_string()))
365                            .collect();
366                        extra.insert("os".to_string(), serde_json::Value::Array(unique_vec));
367                    }
368                }
369
370                Some(PackageMeta {
371                    name: full_name.clone(),
372                    version: tag_name.to_string(),
373                    description: description.clone(),
374                    homepage: None,
375                    repository: None,
376                    license: None,
377                    binaries: Vec::new(),
378                    keywords: Vec::new(),
379                    maintainers: vec![namespace.to_string()],
380                    published: t["last_updated"].as_str().map(String::from),
381                    downloads: pull_count,
382                    archive_url: None,
383                    checksum: t["digest"].as_str().map(String::from),
384                    extra,
385                })
386            })
387            .collect())
388    }
389
390    /// Fetch from Quay.io.
391    fn fetch_from_quay(name: &str) -> Result<FetchedPackage, IndexError> {
392        let (namespace, repo) = if name.contains('/') {
393            let parts: Vec<&str> = name.splitn(2, '/').collect();
394            (parts[0], parts[1])
395        } else {
396            return Err(IndexError::Parse(
397                "Quay.io requires namespace/repo format".into(),
398            ));
399        };
400
401        let url = format!("https://quay.io/api/v1/repository/{}/{}", namespace, repo);
402        let response: serde_json::Value = ureq::get(&url)
403            .call()
404            .map_err(|_| IndexError::NotFound(name.to_string()))?
405            .into_json()?;
406
407        let latest_tag = response["tags"]
408            .as_object()
409            .and_then(|tags| tags.keys().next())
410            .map(|s| s.as_str())
411            .unwrap_or("latest");
412
413        let mut extra = HashMap::new();
414        extra.insert(
415            "source_repo".to_string(),
416            serde_json::Value::String("quay".to_string()),
417        );
418
419        Ok(FetchedPackage {
420            package: PackageMeta {
421                name: format!("quay.io/{}/{}", namespace, repo),
422                version: latest_tag.to_string(),
423                description: response["description"].as_str().map(String::from),
424                homepage: None,
425                repository: None,
426                license: None,
427                binaries: Vec::new(),
428                keywords: Vec::new(),
429                maintainers: vec![namespace.to_string()],
430                published: None,
431                downloads: None,
432                archive_url: None,
433                checksum: None,
434                extra,
435            },
436            registry: DockerRegistry::Quay,
437        })
438    }
439
440    /// Fetch tags from Quay.io.
441    fn fetch_versions_quay(name: &str) -> Result<Vec<VersionMeta>, IndexError> {
442        let (namespace, repo) = if name.contains('/') {
443            let parts: Vec<&str> = name.splitn(2, '/').collect();
444            (parts[0], parts[1])
445        } else {
446            return Err(IndexError::Parse(
447                "Quay.io requires namespace/repo format".into(),
448            ));
449        };
450
451        let url = format!(
452            "https://quay.io/api/v1/repository/{}/{}/tag/",
453            namespace, repo
454        );
455        let response: serde_json::Value = ureq::get(&url)
456            .call()
457            .map_err(|_| IndexError::NotFound(name.to_string()))?
458            .into_json()?;
459
460        let tags = response["tags"]
461            .as_array()
462            .ok_or_else(|| IndexError::NotFound(name.to_string()))?;
463
464        Ok(tags
465            .iter()
466            .filter_map(|t| {
467                Some(VersionMeta {
468                    version: format!("{} (quay)", t["name"].as_str()?),
469                    released: t["last_modified"].as_str().map(String::from),
470                    yanked: false,
471                })
472            })
473            .collect())
474    }
475
476    /// Search Docker Hub.
477    fn search_dockerhub(query: &str) -> Result<Vec<PackageMeta>, IndexError> {
478        let url = format!(
479            "https://hub.docker.com/v2/search/repositories?query={}&page_size=25",
480            query
481        );
482        let response: serde_json::Value = ureq::get(&url).call()?.into_json()?;
483
484        let results = response["results"]
485            .as_array()
486            .ok_or_else(|| IndexError::Parse("Invalid search response".into()))?;
487
488        let mut extra = HashMap::new();
489        extra.insert(
490            "source_repo".to_string(),
491            serde_json::Value::String("docker-hub".to_string()),
492        );
493
494        Ok(results
495            .iter()
496            .filter_map(|img| {
497                let name = if img["is_official"].as_bool().unwrap_or(false) {
498                    format!("library/{}", img["repo_name"].as_str()?)
499                } else {
500                    img["repo_name"].as_str()?.to_string()
501                };
502
503                Some(PackageMeta {
504                    name,
505                    version: "latest".to_string(),
506                    description: img["short_description"].as_str().map(String::from),
507                    homepage: None,
508                    repository: None,
509                    license: None,
510                    binaries: Vec::new(),
511                    keywords: Vec::new(),
512                    maintainers: Vec::new(),
513                    published: None,
514                    downloads: img["pull_count"].as_u64(),
515                    archive_url: None,
516                    checksum: None,
517                    extra: extra.clone(),
518                })
519            })
520            .collect())
521    }
522
523    /// Search Quay.io.
524    fn search_quay(query: &str) -> Result<Vec<PackageMeta>, IndexError> {
525        let url = format!("https://quay.io/api/v1/find/repositories?query={}", query);
526        let response: serde_json::Value = ureq::get(&url).call()?.into_json()?;
527
528        let results = response["results"]
529            .as_array()
530            .ok_or_else(|| IndexError::Parse("Invalid search response".into()))?;
531
532        let mut extra = HashMap::new();
533        extra.insert(
534            "source_repo".to_string(),
535            serde_json::Value::String("quay".to_string()),
536        );
537
538        Ok(results
539            .iter()
540            .filter_map(|repo| {
541                let namespace = repo["namespace"]["name"].as_str()?;
542                let name = repo["name"].as_str()?;
543
544                Some(PackageMeta {
545                    name: format!("quay.io/{}/{}", namespace, name),
546                    version: "latest".to_string(),
547                    description: repo["description"].as_str().map(String::from),
548                    homepage: None,
549                    repository: None,
550                    license: None,
551                    binaries: Vec::new(),
552                    keywords: Vec::new(),
553                    maintainers: vec![namespace.to_string()],
554                    published: None,
555                    downloads: None,
556                    archive_url: None,
557                    checksum: None,
558                    extra: extra.clone(),
559                })
560            })
561            .collect())
562    }
563}
564
565impl PackageIndex for Docker {
566    fn ecosystem(&self) -> &'static str {
567        "docker"
568    }
569
570    fn display_name(&self) -> &'static str {
571        "Container Registries (Docker)"
572    }
573
574    fn fetch(&self, name: &str) -> Result<PackageMeta, IndexError> {
575        let detected = Self::detect_registry(name);
576
577        // If the detected registry is in our configured list, use it
578        if self.registries.contains(&detected.registry) {
579            return match detected.registry {
580                DockerRegistry::DockerHub => {
581                    Self::fetch_from_dockerhub(&detected.clean_name).map(|f| f.package)
582                }
583                DockerRegistry::Quay => {
584                    Self::fetch_from_quay(&detected.clean_name).map(|f| f.package)
585                }
586                DockerRegistry::Ghcr | DockerRegistry::Gcr => {
587                    // GHCR and GCR require authentication for most operations
588                    // Return basic metadata from what we know
589                    let mut extra = HashMap::new();
590                    extra.insert(
591                        "source_repo".to_string(),
592                        serde_json::Value::String(detected.registry.name().to_string()),
593                    );
594                    Ok(PackageMeta {
595                        name: format!("{}{}", detected.registry.prefix(), detected.clean_name),
596                        version: "latest".to_string(),
597                        description: None,
598                        homepage: None,
599                        repository: None,
600                        license: None,
601                        binaries: Vec::new(),
602                        keywords: Vec::new(),
603                        maintainers: Vec::new(),
604                        published: None,
605                        downloads: None,
606                        archive_url: None,
607                        checksum: None,
608                        extra,
609                    })
610                }
611            };
612        }
613
614        // Try each configured registry
615        for &registry in &self.registries {
616            let result = match registry {
617                DockerRegistry::DockerHub => Self::fetch_from_dockerhub(name),
618                DockerRegistry::Quay => Self::fetch_from_quay(name),
619                DockerRegistry::Ghcr | DockerRegistry::Gcr => continue, // Skip auth-required registries
620            };
621
622            if let Ok(fetched) = result {
623                return Ok(fetched.package);
624            }
625        }
626
627        Err(IndexError::NotFound(name.to_string()))
628    }
629
630    fn fetch_versions(&self, name: &str) -> Result<Vec<VersionMeta>, IndexError> {
631        let detected = Self::detect_registry(name);
632        let mut all_versions = Vec::new();
633
634        // If the detected registry is in our configured list, use it
635        if self.registries.contains(&detected.registry) {
636            let versions = match detected.registry {
637                DockerRegistry::DockerHub => Self::fetch_versions_dockerhub(&detected.clean_name),
638                DockerRegistry::Quay => Self::fetch_versions_quay(&detected.clean_name),
639                DockerRegistry::Ghcr | DockerRegistry::Gcr => {
640                    // These require authentication
641                    Err(IndexError::Parse("Registry requires authentication".into()))
642                }
643            };
644
645            if let Ok(v) = versions {
646                return Ok(v);
647            }
648        }
649
650        // Try each configured registry
651        for &registry in &self.registries {
652            let result = match registry {
653                DockerRegistry::DockerHub => Self::fetch_versions_dockerhub(name),
654                DockerRegistry::Quay => Self::fetch_versions_quay(name),
655                DockerRegistry::Ghcr | DockerRegistry::Gcr => continue,
656            };
657
658            if let Ok(versions) = result {
659                all_versions.extend(versions);
660            }
661        }
662
663        if all_versions.is_empty() {
664            return Err(IndexError::NotFound(name.to_string()));
665        }
666
667        Ok(all_versions)
668    }
669
670    fn fetch_all_versions(&self, name: &str) -> Result<Vec<PackageMeta>, IndexError> {
671        let detected = Self::detect_registry(name);
672
673        // If the detected registry is in our configured list, use it
674        if self.registries.contains(&detected.registry) {
675            return match detected.registry {
676                DockerRegistry::DockerHub => {
677                    Self::fetch_all_versions_dockerhub(&detected.clean_name)
678                }
679                DockerRegistry::Quay | DockerRegistry::Ghcr | DockerRegistry::Gcr => {
680                    // Fall back to default implementation for other registries
681                    let versions = self.fetch_versions(name)?;
682                    Ok(versions
683                        .into_iter()
684                        .map(|v| PackageMeta {
685                            name: name.to_string(),
686                            version: v.version,
687                            published: v.released,
688                            ..Default::default()
689                        })
690                        .collect())
691                }
692            };
693        }
694
695        // Try Docker Hub if configured
696        if self.registries.contains(&DockerRegistry::DockerHub)
697            && let Ok(versions) = Self::fetch_all_versions_dockerhub(name)
698        {
699            return Ok(versions);
700        }
701
702        Err(IndexError::NotFound(name.to_string()))
703    }
704
705    fn search(&self, query: &str) -> Result<Vec<PackageMeta>, IndexError> {
706        let mut results = Vec::new();
707
708        // Search Docker Hub if configured
709        if self.registries.contains(&DockerRegistry::DockerHub)
710            && let Ok(packages) = Self::search_dockerhub(query)
711        {
712            results.extend(packages);
713        }
714
715        // Search Quay if configured
716        if self.registries.contains(&DockerRegistry::Quay)
717            && let Ok(packages) = Self::search_quay(query)
718        {
719            results.extend(packages);
720        }
721
722        // GHCR and GCR don't have public search APIs
723
724        Ok(results)
725    }
726}