Skip to main content

kellnr_common/
index_metadata.rs

1use std::collections::BTreeMap;
2use std::fmt::{Display, Formatter, Write};
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Deserializer, Serialize, Serializer};
7
8mod pubtime_format {
9    use chrono::{DateTime, Utc};
10    use serde::{self, Deserialize, Deserializer, Serializer};
11
12    const FORMAT: &str = "%Y-%m-%dT%H:%M:%SZ";
13
14    #[allow(clippy::ref_option)] // signature required by serde's `with` attribute
15    pub fn serialize<S>(date: &Option<DateTime<Utc>>, serializer: S) -> Result<S::Ok, S::Error>
16    where
17        S: Serializer,
18    {
19        match date {
20            Some(dt) => serializer.serialize_str(&dt.format(FORMAT).to_string()),
21            None => serializer.serialize_none(),
22        }
23    }
24
25    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
26    where
27        D: Deserializer<'de>,
28    {
29        let s: Option<String> = Option::deserialize(deserializer)?;
30        match s {
31            Some(s) => DateTime::parse_from_rfc3339(&s)
32                .map(|dt| Some(dt.with_timezone(&Utc)))
33                .map_err(serde::de::Error::custom),
34            None => Ok(None),
35        }
36    }
37}
38use tokio::fs::File;
39use tokio::io::AsyncReadExt;
40
41use crate::publish_metadata::{PublishMetadata, RegistryDep};
42use crate::version::Version;
43
44// This Metadata struct defined here is the one saved in the index.
45// It is different to the one send by Cargo to the registry.
46// See: https://doc.rust-lang.org/cargo/reference/registries.html#index-format
47// Crates.io implementation: https://github.com/rust-lang/crates.io/blob/main/crates/crates_io_index/data.rs
48
49#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
50pub struct IndexMetadata {
51    // The name of the package.
52    // This must only contain alphanumeric, `-`, or `_` characters.
53    pub name: String,
54    // The version of the package this row is describing.
55    // This must be a valid version number according to the Semantic
56    // Versioning 2.0.0 spec at https://semver.org/.
57    pub vers: String,
58    // Array of direct dependencies of the package.
59    pub deps: Vec<IndexDep>,
60    // A SHA256 checksum of the `.crate` file.
61    pub cksum: String,
62    // Set of features defined for the package.
63    // Each feature maps to an array of features or dependencies it enables.
64    // #[serde(
65    //     skip_serializing_if = "Option::is_none",
66    //     serialize_with = "option_sorted_map"
67    // )]
68    pub features: BTreeMap<String, Vec<String>>,
69    // Boolean of whether or not this version has been yanked.
70    pub yanked: bool,
71    // The `links` string value from the package's manifest, or null if not
72    // specified. This field is optional and defaults to null.
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub links: Option<String>,
75    // The time the package was published
76    #[serde(
77        default,
78        skip_serializing_if = "Option::is_none",
79        with = "pubtime_format"
80    )]
81    pub pubtime: Option<DateTime<Utc>>,
82    // An unsigned 32-bit integer value indicating the schema version of this
83    // entry.
84    //
85    // If this not specified, it should be interpreted as the default of 1.
86    //
87    // Cargo (starting with version 1.51) will ignore versions it does not
88    // recognize. This provides a method to safely introduce changes to index
89    // entries and allow older versions of cargo to ignore newer entries it
90    // doesn't understand. Versions older than 1.51 ignore this field, and
91    // thus may misinterpret the meaning of the index entry.
92    //
93    // The current values are:
94    //
95    // * 1: The schema as documented here, not including newer additions.
96    //      This is honored in Rust version 1.51 and newer.
97    // * 2: The addition of the `features2` field.
98    //      This is honored in Rust version 1.60 and newer.
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub v: Option<u32>,
101    // This optional field contains features with new, extended syntax.
102    // Specifically, namespaced features (`dep:`) and weak dependencies
103    // (`pkg?/feat`).
104    //
105    // This is separated from `features` because versions older than 1.19
106    // will fail to load due to not being able to parse the new syntax, even
107    // with a `Cargo.lock` file.
108    //
109    // Cargo will merge any values listed here with the "features" field.
110    //
111    // If this field is included, the "v" field should be set to at least 2.
112    //
113    // Registries are not required to use this field for extended feature
114    // syntax, they are allowed to include those in the "features" field.
115    // Using this is only necessary if the registry wants to support cargo
116    // versions older than 1.19, which in practice is only crates.io since
117    // those older versions do not support other registries.
118    // "features2": {
119    // "serde": ["dep:serde", "chrono?/serde"]
120    // }
121    #[serde(skip_serializing_if = "Option::is_none")]
122    pub features2: Option<BTreeMap<String, Vec<String>>>,
123}
124
125impl IndexMetadata {
126    pub async fn from_max_version(path: &Path) -> Result<Self, std::io::Error> {
127        let mut file = File::open(path).await?;
128        let mut content = String::new();
129        file.read_to_string(&mut content).await?;
130
131        let mut metadata: Vec<IndexMetadata> = content
132            .lines()
133            .filter_map(|m| serde_json::from_str::<IndexMetadata>(m).ok())
134            .collect();
135
136        metadata.sort_by(|a, b| {
137            let sv1 = Version::from_unchecked_str(&a.vers);
138            let sv2 = Version::from_unchecked_str(&b.vers);
139            sv1.cmp(&sv2)
140        });
141
142        metadata.last().cloned().ok_or_else(|| {
143            std::io::Error::new(
144                std::io::ErrorKind::InvalidData,
145                "Unable to read metadata file.",
146            )
147        })
148    }
149
150    pub async fn from_version(path: &Path, version: &Version) -> Result<Self, std::io::Error> {
151        let mut file = File::open(path).await?;
152        let mut content = String::new();
153        file.read_to_string(&mut content).await?;
154
155        let metadata: Vec<IndexMetadata> = content
156            .lines()
157            .filter_map(|m| serde_json::from_str::<IndexMetadata>(m).ok())
158            .collect();
159
160        metadata
161            .iter()
162            .find(|m| {
163                let sv = Version::try_from(&m.vers).unwrap_or_default();
164                sv == *version
165            })
166            .cloned()
167            .ok_or_else(|| {
168                std::io::Error::new(
169                    std::io::ErrorKind::InvalidData,
170                    "Unable to read metadata file.",
171                )
172            })
173    }
174
175    pub fn to_json(&self) -> Result<String, serde_json::Error> {
176        serde_json::to_string(&self)
177    }
178
179    pub fn metadata_path(&self, index_path: &Path) -> PathBuf {
180        metadata_path(index_path, &self.name)
181    }
182
183    pub fn from_reg_meta(registry_metadata: &PublishMetadata, cksum: &str) -> Self {
184        IndexMetadata {
185            name: registry_metadata.name.clone(),
186            vers: registry_metadata.vers.clone(),
187            deps: registry_metadata
188                .deps
189                .clone()
190                .into_iter()
191                .map(IndexDep::from)
192                .collect(),
193            cksum: cksum.to_string(),
194            pubtime: Some(Utc::now()),
195            features: registry_metadata.features.clone(),
196            yanked: false,
197            links: registry_metadata.links.clone(),
198            v: Some(1),
199            features2: None,
200        }
201    }
202
203    pub fn minimal(name: &str, vers: &str, cksum: &str) -> Self {
204        Self {
205            name: name.to_string(),
206            vers: vers.to_string(),
207            cksum: cksum.to_string(),
208            deps: vec![],
209            features: BTreeMap::default(),
210            yanked: false,
211            links: None,
212            pubtime: None,
213            v: Some(1),
214            features2: None,
215        }
216    }
217
218    pub fn serialize_indices(indices: &[IndexMetadata]) -> Result<String, serde_json::Error> {
219        let indices = indices
220            .iter()
221            .map(serde_json::to_string)
222            .collect::<Result<Vec<_>, serde_json::Error>>()?;
223        let mut index = String::new();
224        for ix in &indices {
225            writeln!(&mut index, "{ix}").unwrap();
226        }
227        Ok(index)
228    }
229}
230
231#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
232pub struct IndexDep {
233    // Name of the dependency.
234    // If the dependency is renamed from the original package name,
235    // this is the new name. The original package name is stored in
236    // the `package` field.
237    pub name: String,
238    // The SemVer requirement for this dependency.
239    // This must be a valid version requirement defined at
240    // https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html.
241    pub req: String,
242    // Array of features (as strings) enabled for this dependency.
243    pub features: Vec<String>,
244    // Boolean of whether or not this is an optional dependency.
245    pub optional: bool,
246    // Boolean of whether or not default features are enabled.
247    pub default_features: bool,
248    // The target platform for the dependency.
249    // null if not a target dependency.
250    // Otherwise, a string such as "cfg(windows)".
251    pub target: Option<String>,
252    // The dependency kind.
253    // "dev", "build", or "normal".
254    // Note: this is a required field, but a small number of entries
255    // exist in the crates.io index with either a missing or null
256    // `kind` field due to implementation bugs.
257    pub kind: Option<DependencyKind>,
258    // The URL of the index of the registry where this dependency is
259    // from as a string. If not specified or null, it is assumed the
260    // dependency is in the current registry.
261    pub registry: Option<String>,
262    // If the dependency is renamed, this is a string of the actual
263    // package name. If not specified or null, this dependency is not
264    // renamed.
265    #[serde(skip_serializing_if = "Option::is_none")]
266    pub package: Option<String>,
267}
268
269#[derive(Clone, Debug, PartialEq, PartialOrd, Ord, Eq)]
270pub enum DependencyKind {
271    Normal,
272    Build,
273    Dev,
274    Other(String),
275}
276
277impl<'de> Deserialize<'de> for DependencyKind {
278    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
279    where
280        D: Deserializer<'de>,
281    {
282        let s = String::deserialize(deserializer)?;
283        match s.as_str() {
284            "normal" => Ok(DependencyKind::Normal),
285            "build" => Ok(DependencyKind::Build),
286            "dev" => Ok(DependencyKind::Dev),
287            _ => Ok(DependencyKind::Other(s)),
288        }
289    }
290}
291
292impl Serialize for DependencyKind {
293    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
294    where
295        S: Serializer,
296    {
297        match self {
298            DependencyKind::Normal => serializer.serialize_str("normal"),
299            DependencyKind::Build => serializer.serialize_str("build"),
300            DependencyKind::Dev => serializer.serialize_str("dev"),
301            DependencyKind::Other(s) => serializer.serialize_str(s),
302        }
303    }
304}
305
306impl Display for DependencyKind {
307    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
308        match self {
309            DependencyKind::Normal => write!(f, "normal"),
310            DependencyKind::Build => write!(f, "build"),
311            DependencyKind::Dev => write!(f, "dev"),
312            DependencyKind::Other(s) => write!(f, "{s}"),
313        }
314    }
315}
316
317impl From<String> for DependencyKind {
318    fn from(kind: String) -> Self {
319        match kind.as_str() {
320            "normal" => DependencyKind::Normal,
321            "build" => DependencyKind::Build,
322            "dev" => DependencyKind::Dev,
323            _ => DependencyKind::Other(kind),
324        }
325    }
326}
327
328impl From<RegistryDep> for IndexDep {
329    fn from(registry_dep: RegistryDep) -> Self {
330        IndexDep {
331            name: match registry_dep.explicit_name_in_toml {
332                Some(ref name) => name.clone(),
333                None => registry_dep.name.clone(),
334            },
335            req: registry_dep.version_req,
336            features: registry_dep.features.unwrap_or_default(),
337            optional: registry_dep.optional,
338            default_features: registry_dep.default_features,
339            target: registry_dep.target,
340            kind: registry_dep.kind.map(DependencyKind::from),
341            registry: registry_dep.registry,
342            package: match registry_dep.explicit_name_in_toml {
343                Some(_) => Some(registry_dep.name),
344                None => None,
345            },
346        }
347    }
348}
349
350pub fn metadata_path(index_path: &Path, name: &str) -> PathBuf {
351    if name.len() == 1 {
352        index_path.join("1").join(name.to_lowercase())
353    } else if name.len() == 2 {
354        index_path.join("2").join(name.to_lowercase())
355    } else if name.len() == 3 {
356        let first_char = &name[0..1].to_lowercase();
357        index_path
358            .join("3")
359            .join(first_char)
360            .join(name.to_lowercase())
361    } else {
362        let first_two = &name[0..2].to_lowercase();
363        let second_two = &name[2..4].to_lowercase();
364        index_path
365            .join(first_two)
366            .join(second_two)
367            .join(name.to_lowercase())
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    use super::*;
374
375    #[test]
376    fn transitive_dependency_rename() {
377        let reg_meta = PublishMetadata {
378            name: "foo".to_string(),
379            vers: "0.1.0".to_string(),
380            deps: vec![
381                RegistryDep {
382                    name: "bar".to_string(),
383                    version_req: "^0.1.0".to_string(),
384                    features: None,
385                    optional: false,
386                    default_features: true,
387                    target: None,
388                    kind: None,
389                    registry: None,
390                    explicit_name_in_toml: None,
391                },
392                RegistryDep {
393                    name: "baz".to_string(),
394                    version_req: "^0.1.0".to_string(),
395                    features: None,
396                    optional: false,
397                    default_features: true,
398                    target: None,
399                    kind: None,
400                    registry: None,
401                    explicit_name_in_toml: Some("qux".to_string()),
402                },
403            ],
404            features: BTreeMap::default(),
405            links: None,
406            description: None,
407            authors: None,
408            documentation: None,
409            homepage: None,
410            readme: None,
411            readme_file: None,
412            keywords: Vec::default(),
413            categories: Vec::default(),
414            license: None,
415            license_file: None,
416            repository: None,
417            badges: None,
418            rust_version: None,
419        };
420
421        let index_meta = IndexMetadata::from_reg_meta(&reg_meta, "1234");
422
423        assert_eq!(index_meta.deps.len(), 2);
424        assert_eq!(index_meta.deps[0].name, "bar");
425        assert_eq!(index_meta.deps[0].package, None);
426        assert_eq!(index_meta.deps[1].name, "qux");
427        assert_eq!(index_meta.deps[1].package, Some("baz".to_string()));
428    }
429
430    #[test]
431    fn metadata_path_one_letter() {
432        let name = "A";
433        assert_eq!(
434            metadata_path(&PathBuf::from("ip"), name),
435            Path::new("ip").join("1").join("a")
436        );
437    }
438
439    #[test]
440    fn metadata_path_two_letters() {
441        let name = "cB";
442        assert_eq!(
443            metadata_path(&PathBuf::from("ip"), name),
444            Path::new("ip").join("2").join("cb")
445        );
446    }
447
448    #[test]
449    fn metadata_path_three_letters() {
450        let name = "cAb";
451        assert_eq!(
452            metadata_path(&PathBuf::from("ip"), name),
453            Path::new("ip").join("3").join("c").join("cab")
454        );
455    }
456
457    #[test]
458    fn metadata_path_four_or_more_letters() {
459        let name = "foo_bAr";
460        assert_eq!(
461            metadata_path(&PathBuf::from("ip"), name),
462            Path::new("ip").join("fo").join("o_").join("foo_bar")
463        );
464    }
465
466    #[test]
467    fn pubtime_serializes_without_fractional_seconds() {
468        use chrono::TimeZone;
469
470        let pubtime = Utc.with_ymd_and_hms(2025, 1, 2, 9, 5, 7).unwrap();
471        let metadata = IndexMetadata {
472            name: "test".to_string(),
473            vers: "1.0.0".to_string(),
474            deps: vec![],
475            cksum: "abc123".to_string(),
476            features: BTreeMap::new(),
477            yanked: false,
478            links: None,
479            pubtime: Some(pubtime),
480            v: Some(1),
481            features2: None,
482        };
483
484        let json = metadata.to_json().unwrap();
485
486        // Verify format is exactly "2025-01-02T09:05:07Z" (zero-padded, no fractional seconds)
487        assert!(
488            json.contains(r#""pubtime":"2025-01-02T09:05:07Z""#),
489            "Expected pubtime to be serialized as '2025-01-02T09:05:07Z', got: {json}"
490        );
491    }
492
493    #[test]
494    fn pubtime_none_is_omitted_from_serialization() {
495        let metadata = IndexMetadata {
496            name: "test".to_string(),
497            vers: "1.0.0".to_string(),
498            deps: vec![],
499            cksum: "abc123".to_string(),
500            features: BTreeMap::new(),
501            yanked: false,
502            links: None,
503            pubtime: None,
504            v: Some(1),
505            features2: None,
506        };
507
508        let json = metadata.to_json().unwrap();
509
510        assert!(
511            !json.contains("pubtime"),
512            "Expected pubtime to be omitted when None, got: {json}"
513        );
514    }
515
516    #[test]
517    fn pubtime_deserializes_from_rfc3339() {
518        use chrono::{Datelike, Timelike};
519
520        let json = r#"{"name":"test","vers":"1.0.0","deps":[],"cksum":"abc","features":{},"yanked":false,"pubtime":"2025-01-02T09:05:07Z","v":1}"#;
521
522        let metadata: IndexMetadata = serde_json::from_str(json).unwrap();
523
524        assert!(metadata.pubtime.is_some());
525        let pubtime = metadata.pubtime.unwrap();
526        assert_eq!(pubtime.year(), 2025);
527        assert_eq!(pubtime.month(), 1);
528        assert_eq!(pubtime.day(), 2);
529        assert_eq!(pubtime.hour(), 9);
530        assert_eq!(pubtime.minute(), 5);
531        assert_eq!(pubtime.second(), 7);
532    }
533
534    #[test]
535    fn pubtime_deserializes_from_rfc3339_with_fractional_seconds() {
536        use chrono::{Datelike, Timelike};
537
538        // Should also handle input with fractional seconds (from crates.io or other sources)
539        let json = r#"{"name":"test","vers":"1.0.0","deps":[],"cksum":"abc","features":{},"yanked":false,"pubtime":"2025-01-02T09:05:07.123456Z","v":1}"#;
540
541        let metadata: IndexMetadata = serde_json::from_str(json).unwrap();
542
543        assert!(metadata.pubtime.is_some());
544        let pubtime = metadata.pubtime.unwrap();
545        assert_eq!(pubtime.year(), 2025);
546        assert_eq!(pubtime.month(), 1);
547        assert_eq!(pubtime.day(), 2);
548        assert_eq!(pubtime.hour(), 9);
549        assert_eq!(pubtime.minute(), 5);
550        assert_eq!(pubtime.second(), 7);
551    }
552
553    #[test]
554    fn serialize_indices_ends_with_newline_and_one_line_per_entry() {
555        let mk = |vers: &str| IndexMetadata {
556            name: "crate".to_string(),
557            vers: vers.to_string(),
558            deps: vec![],
559            cksum: "cksum".to_string(),
560            features: BTreeMap::default(),
561            yanked: false,
562            links: None,
563            pubtime: None,
564            v: Some(1),
565            features2: None,
566        };
567        let indices = vec![mk("1.0.0"), mk("2.0.0")];
568
569        let out = IndexMetadata::serialize_indices(&indices).unwrap();
570
571        // crates.io terminates the sparse-index body with a trailing newline,
572        // including after the final entry. Cargo tolerates a missing final
573        // newline, but `cargo install-update` / `cargo outdated` stream the body
574        // and treat an unterminated last line as "trailing garbage". Guard
575        // against regressing to `write!` (no newline) on the last line.
576        assert!(
577            out.ends_with('\n'),
578            "sparse index body must end with a newline, got: {out:?}"
579        );
580
581        // Exactly one newline-terminated JSON line per entry; no blank trailing line.
582        let lines: Vec<&str> = out.lines().collect();
583        assert_eq!(lines.len(), indices.len());
584        for (line, meta) in lines.iter().zip(&indices) {
585            let parsed: IndexMetadata = serde_json::from_str(line).unwrap();
586            assert_eq!(parsed.vers, meta.vers);
587        }
588    }
589}