Skip to main content

kellnr_common/
index_metadata.rs

1use std::collections::BTreeMap;
2use std::fmt::{Display, Formatter, Write};
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Deserializer, Serialize, Serializer};
7
8mod pubtime_format {
9    use chrono::{DateTime, Utc};
10    use serde::{self, Deserialize, Deserializer, Serializer};
11
12    const FORMAT: &str = "%Y-%m-%dT%H:%M:%SZ";
13
14    #[allow(clippy::ref_option)] // signature required by serde's `with` attribute
15    pub fn serialize<S>(date: &Option<DateTime<Utc>>, serializer: S) -> Result<S::Ok, S::Error>
16    where
17        S: Serializer,
18    {
19        match date {
20            Some(dt) => serializer.serialize_str(&dt.format(FORMAT).to_string()),
21            None => serializer.serialize_none(),
22        }
23    }
24
25    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
26    where
27        D: Deserializer<'de>,
28    {
29        let s: Option<String> = Option::deserialize(deserializer)?;
30        match s {
31            Some(s) => DateTime::parse_from_rfc3339(&s)
32                .map(|dt| Some(dt.with_timezone(&Utc)))
33                .map_err(serde::de::Error::custom),
34            None => Ok(None),
35        }
36    }
37}
38use tokio::fs::File;
39use tokio::io::AsyncReadExt;
40
41use crate::publish_metadata::{PublishMetadata, RegistryDep};
42use crate::version::Version;
43
44// This Metadata struct defined here is the one saved in the index.
45// It is different to the one send by Cargo to the registry.
46// See: https://doc.rust-lang.org/cargo/reference/registries.html#index-format
47// Crates.io implementation: https://github.com/rust-lang/crates.io/blob/main/crates/crates_io_index/data.rs
48
49#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
50pub struct IndexMetadata {
51    // The name of the package.
52    // This must only contain alphanumeric, `-`, or `_` characters.
53    pub name: String,
54    // The version of the package this row is describing.
55    // This must be a valid version number according to the Semantic
56    // Versioning 2.0.0 spec at https://semver.org/.
57    pub vers: String,
58    // Array of direct dependencies of the package.
59    pub deps: Vec<IndexDep>,
60    // A SHA256 checksum of the `.crate` file.
61    pub cksum: String,
62    // Set of features defined for the package.
63    // Each feature maps to an array of features or dependencies it enables.
64    // #[serde(
65    //     skip_serializing_if = "Option::is_none",
66    //     serialize_with = "option_sorted_map"
67    // )]
68    pub features: BTreeMap<String, Vec<String>>,
69    // Boolean of whether or not this version has been yanked.
70    pub yanked: bool,
71    // The `links` string value from the package's manifest, or null if not
72    // specified. This field is optional and defaults to null.
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub links: Option<String>,
75    // The time the package was published
76    #[serde(
77        default,
78        skip_serializing_if = "Option::is_none",
79        with = "pubtime_format"
80    )]
81    pub pubtime: Option<DateTime<Utc>>,
82    // An unsigned 32-bit integer value indicating the schema version of this
83    // entry.
84    //
85    // If this not specified, it should be interpreted as the default of 1.
86    //
87    // Cargo (starting with version 1.51) will ignore versions it does not
88    // recognize. This provides a method to safely introduce changes to index
89    // entries and allow older versions of cargo to ignore newer entries it
90    // doesn't understand. Versions older than 1.51 ignore this field, and
91    // thus may misinterpret the meaning of the index entry.
92    //
93    // The current values are:
94    //
95    // * 1: The schema as documented here, not including newer additions.
96    //      This is honored in Rust version 1.51 and newer.
97    // * 2: The addition of the `features2` field.
98    //      This is honored in Rust version 1.60 and newer.
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub v: Option<u32>,
101    // This optional field contains features with new, extended syntax.
102    // Specifically, namespaced features (`dep:`) and weak dependencies
103    // (`pkg?/feat`).
104    //
105    // This is separated from `features` because versions older than 1.19
106    // will fail to load due to not being able to parse the new syntax, even
107    // with a `Cargo.lock` file.
108    //
109    // Cargo will merge any values listed here with the "features" field.
110    //
111    // If this field is included, the "v" field should be set to at least 2.
112    //
113    // Registries are not required to use this field for extended feature
114    // syntax, they are allowed to include those in the "features" field.
115    // Using this is only necessary if the registry wants to support cargo
116    // versions older than 1.19, which in practice is only crates.io since
117    // those older versions do not support other registries.
118    // "features2": {
119    // "serde": ["dep:serde", "chrono?/serde"]
120    // }
121    #[serde(skip_serializing_if = "Option::is_none")]
122    pub features2: Option<BTreeMap<String, Vec<String>>>,
123}
124
125impl IndexMetadata {
126    pub async fn from_max_version(path: &Path) -> Result<Self, std::io::Error> {
127        let mut file = File::open(path).await?;
128        let mut content = String::new();
129        file.read_to_string(&mut content).await?;
130
131        let mut metadata: Vec<IndexMetadata> = content
132            .lines()
133            .filter_map(|m| serde_json::from_str::<IndexMetadata>(m).ok())
134            .collect();
135
136        metadata.sort_by(|a, b| {
137            let sv1 = Version::from_unchecked_str(&a.vers);
138            let sv2 = Version::from_unchecked_str(&b.vers);
139            sv1.cmp(&sv2)
140        });
141
142        metadata.last().cloned().ok_or_else(|| {
143            std::io::Error::new(
144                std::io::ErrorKind::InvalidData,
145                "Unable to read metadata file.",
146            )
147        })
148    }
149
150    pub async fn from_version(path: &Path, version: &Version) -> Result<Self, std::io::Error> {
151        let mut file = File::open(path).await?;
152        let mut content = String::new();
153        file.read_to_string(&mut content).await?;
154
155        let metadata: Vec<IndexMetadata> = content
156            .lines()
157            .filter_map(|m| serde_json::from_str::<IndexMetadata>(m).ok())
158            .collect();
159
160        metadata
161            .iter()
162            .find(|m| {
163                let sv = Version::try_from(&m.vers).unwrap_or_default();
164                sv == *version
165            })
166            .cloned()
167            .ok_or_else(|| {
168                std::io::Error::new(
169                    std::io::ErrorKind::InvalidData,
170                    "Unable to read metadata file.",
171                )
172            })
173    }
174
175    pub fn to_json(&self) -> Result<String, serde_json::Error> {
176        serde_json::to_string(&self)
177    }
178
179    pub fn metadata_path(&self, index_path: &Path) -> PathBuf {
180        metadata_path(index_path, &self.name)
181    }
182
183    pub fn from_reg_meta(registry_metadata: &PublishMetadata, cksum: &str) -> Self {
184        IndexMetadata {
185            name: registry_metadata.name.clone(),
186            vers: registry_metadata.vers.clone(),
187            deps: registry_metadata
188                .deps
189                .clone()
190                .into_iter()
191                .map(IndexDep::from)
192                .collect(),
193            cksum: cksum.to_string(),
194            pubtime: Some(Utc::now()),
195            features: registry_metadata.features.clone(),
196            yanked: false,
197            links: registry_metadata.links.clone(),
198            v: Some(1),
199            features2: None,
200        }
201    }
202
203    pub fn minimal(name: &str, vers: &str, cksum: &str) -> Self {
204        Self {
205            name: name.to_string(),
206            vers: vers.to_string(),
207            cksum: cksum.to_string(),
208            deps: vec![],
209            features: BTreeMap::default(),
210            yanked: false,
211            links: None,
212            pubtime: None,
213            v: Some(1),
214            features2: None,
215        }
216    }
217
218    pub fn serialize_indices(indices: &[IndexMetadata]) -> Result<String, serde_json::Error> {
219        let mut index = String::new();
220        for ix in indices {
221            writeln!(&mut index, "{}", serde_json::to_string(ix)?).unwrap();
222        }
223        Ok(index)
224    }
225}
226
227#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
228pub struct IndexDep {
229    // Name of the dependency.
230    // If the dependency is renamed from the original package name,
231    // this is the new name. The original package name is stored in
232    // the `package` field.
233    pub name: String,
234    // The SemVer requirement for this dependency.
235    // This must be a valid version requirement defined at
236    // https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html.
237    pub req: String,
238    // Array of features (as strings) enabled for this dependency.
239    pub features: Vec<String>,
240    // Boolean of whether or not this is an optional dependency.
241    pub optional: bool,
242    // Boolean of whether or not default features are enabled.
243    pub default_features: bool,
244    // The target platform for the dependency.
245    // null if not a target dependency.
246    // Otherwise, a string such as "cfg(windows)".
247    pub target: Option<String>,
248    // The dependency kind.
249    // "dev", "build", or "normal".
250    // Note: this is a required field, but a small number of entries
251    // exist in the crates.io index with either a missing or null
252    // `kind` field due to implementation bugs.
253    pub kind: Option<DependencyKind>,
254    // The URL of the index of the registry where this dependency is
255    // from as a string. If not specified or null, it is assumed the
256    // dependency is in the current registry.
257    pub registry: Option<String>,
258    // If the dependency is renamed, this is a string of the actual
259    // package name. If not specified or null, this dependency is not
260    // renamed.
261    #[serde(skip_serializing_if = "Option::is_none")]
262    pub package: Option<String>,
263}
264
265#[derive(Clone, Debug, PartialEq, PartialOrd, Ord, Eq)]
266pub enum DependencyKind {
267    Normal,
268    Build,
269    Dev,
270    Other(String),
271}
272
273impl<'de> Deserialize<'de> for DependencyKind {
274    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
275    where
276        D: Deserializer<'de>,
277    {
278        let s = String::deserialize(deserializer)?;
279        match s.as_str() {
280            "normal" => Ok(DependencyKind::Normal),
281            "build" => Ok(DependencyKind::Build),
282            "dev" => Ok(DependencyKind::Dev),
283            _ => Ok(DependencyKind::Other(s)),
284        }
285    }
286}
287
288impl Serialize for DependencyKind {
289    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
290    where
291        S: Serializer,
292    {
293        match self {
294            DependencyKind::Normal => serializer.serialize_str("normal"),
295            DependencyKind::Build => serializer.serialize_str("build"),
296            DependencyKind::Dev => serializer.serialize_str("dev"),
297            DependencyKind::Other(s) => serializer.serialize_str(s),
298        }
299    }
300}
301
302impl Display for DependencyKind {
303    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
304        match self {
305            DependencyKind::Normal => write!(f, "normal"),
306            DependencyKind::Build => write!(f, "build"),
307            DependencyKind::Dev => write!(f, "dev"),
308            DependencyKind::Other(s) => write!(f, "{s}"),
309        }
310    }
311}
312
313impl From<String> for DependencyKind {
314    fn from(kind: String) -> Self {
315        match kind.as_str() {
316            "normal" => DependencyKind::Normal,
317            "build" => DependencyKind::Build,
318            "dev" => DependencyKind::Dev,
319            _ => DependencyKind::Other(kind),
320        }
321    }
322}
323
324impl From<RegistryDep> for IndexDep {
325    fn from(registry_dep: RegistryDep) -> Self {
326        IndexDep {
327            name: match registry_dep.explicit_name_in_toml {
328                Some(ref name) => name.clone(),
329                None => registry_dep.name.clone(),
330            },
331            req: registry_dep.version_req,
332            features: registry_dep.features.unwrap_or_default(),
333            optional: registry_dep.optional,
334            default_features: registry_dep.default_features,
335            target: registry_dep.target,
336            kind: registry_dep.kind.map(DependencyKind::from),
337            registry: registry_dep.registry,
338            package: match registry_dep.explicit_name_in_toml {
339                Some(_) => Some(registry_dep.name),
340                None => None,
341            },
342        }
343    }
344}
345
346pub fn metadata_path(index_path: &Path, name: &str) -> PathBuf {
347    if name.len() == 1 {
348        index_path.join("1").join(name.to_lowercase())
349    } else if name.len() == 2 {
350        index_path.join("2").join(name.to_lowercase())
351    } else if name.len() == 3 {
352        let first_char = &name[0..1].to_lowercase();
353        index_path
354            .join("3")
355            .join(first_char)
356            .join(name.to_lowercase())
357    } else {
358        let first_two = &name[0..2].to_lowercase();
359        let second_two = &name[2..4].to_lowercase();
360        index_path
361            .join(first_two)
362            .join(second_two)
363            .join(name.to_lowercase())
364    }
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370
371    #[test]
372    fn transitive_dependency_rename() {
373        let reg_meta = PublishMetadata {
374            name: "foo".to_string(),
375            vers: "0.1.0".to_string(),
376            deps: vec![
377                RegistryDep {
378                    name: "bar".to_string(),
379                    version_req: "^0.1.0".to_string(),
380                    features: None,
381                    optional: false,
382                    default_features: true,
383                    target: None,
384                    kind: None,
385                    registry: None,
386                    explicit_name_in_toml: None,
387                },
388                RegistryDep {
389                    name: "baz".to_string(),
390                    version_req: "^0.1.0".to_string(),
391                    features: None,
392                    optional: false,
393                    default_features: true,
394                    target: None,
395                    kind: None,
396                    registry: None,
397                    explicit_name_in_toml: Some("qux".to_string()),
398                },
399            ],
400            features: BTreeMap::default(),
401            links: None,
402            description: None,
403            authors: None,
404            documentation: None,
405            homepage: None,
406            readme: None,
407            readme_file: None,
408            keywords: Vec::default(),
409            categories: Vec::default(),
410            license: None,
411            license_file: None,
412            repository: None,
413            badges: None,
414            rust_version: None,
415        };
416
417        let index_meta = IndexMetadata::from_reg_meta(&reg_meta, "1234");
418
419        assert_eq!(index_meta.deps.len(), 2);
420        assert_eq!(index_meta.deps[0].name, "bar");
421        assert_eq!(index_meta.deps[0].package, None);
422        assert_eq!(index_meta.deps[1].name, "qux");
423        assert_eq!(index_meta.deps[1].package, Some("baz".to_string()));
424    }
425
426    #[test]
427    fn metadata_path_one_letter() {
428        let name = "A";
429        assert_eq!(
430            metadata_path(&PathBuf::from("ip"), name),
431            Path::new("ip").join("1").join("a")
432        );
433    }
434
435    #[test]
436    fn metadata_path_two_letters() {
437        let name = "cB";
438        assert_eq!(
439            metadata_path(&PathBuf::from("ip"), name),
440            Path::new("ip").join("2").join("cb")
441        );
442    }
443
444    #[test]
445    fn metadata_path_three_letters() {
446        let name = "cAb";
447        assert_eq!(
448            metadata_path(&PathBuf::from("ip"), name),
449            Path::new("ip").join("3").join("c").join("cab")
450        );
451    }
452
453    #[test]
454    fn metadata_path_four_or_more_letters() {
455        let name = "foo_bAr";
456        assert_eq!(
457            metadata_path(&PathBuf::from("ip"), name),
458            Path::new("ip").join("fo").join("o_").join("foo_bar")
459        );
460    }
461
462    #[test]
463    fn pubtime_serializes_without_fractional_seconds() {
464        use chrono::TimeZone;
465
466        let pubtime = Utc.with_ymd_and_hms(2025, 1, 2, 9, 5, 7).unwrap();
467        let metadata = IndexMetadata {
468            name: "test".to_string(),
469            vers: "1.0.0".to_string(),
470            deps: vec![],
471            cksum: "abc123".to_string(),
472            features: BTreeMap::new(),
473            yanked: false,
474            links: None,
475            pubtime: Some(pubtime),
476            v: Some(1),
477            features2: None,
478        };
479
480        let json = metadata.to_json().unwrap();
481
482        // Verify format is exactly "2025-01-02T09:05:07Z" (zero-padded, no fractional seconds)
483        assert!(
484            json.contains(r#""pubtime":"2025-01-02T09:05:07Z""#),
485            "Expected pubtime to be serialized as '2025-01-02T09:05:07Z', got: {json}"
486        );
487    }
488
489    #[test]
490    fn pubtime_none_is_omitted_from_serialization() {
491        let metadata = IndexMetadata {
492            name: "test".to_string(),
493            vers: "1.0.0".to_string(),
494            deps: vec![],
495            cksum: "abc123".to_string(),
496            features: BTreeMap::new(),
497            yanked: false,
498            links: None,
499            pubtime: None,
500            v: Some(1),
501            features2: None,
502        };
503
504        let json = metadata.to_json().unwrap();
505
506        assert!(
507            !json.contains("pubtime"),
508            "Expected pubtime to be omitted when None, got: {json}"
509        );
510    }
511
512    #[test]
513    fn pubtime_deserializes_from_rfc3339() {
514        use chrono::{Datelike, Timelike};
515
516        let json = r#"{"name":"test","vers":"1.0.0","deps":[],"cksum":"abc","features":{},"yanked":false,"pubtime":"2025-01-02T09:05:07Z","v":1}"#;
517
518        let metadata: IndexMetadata = serde_json::from_str(json).unwrap();
519
520        assert!(metadata.pubtime.is_some());
521        let pubtime = metadata.pubtime.unwrap();
522        assert_eq!(pubtime.year(), 2025);
523        assert_eq!(pubtime.month(), 1);
524        assert_eq!(pubtime.day(), 2);
525        assert_eq!(pubtime.hour(), 9);
526        assert_eq!(pubtime.minute(), 5);
527        assert_eq!(pubtime.second(), 7);
528    }
529
530    #[test]
531    fn pubtime_deserializes_from_rfc3339_with_fractional_seconds() {
532        use chrono::{Datelike, Timelike};
533
534        // Should also handle input with fractional seconds (from crates.io or other sources)
535        let json = r#"{"name":"test","vers":"1.0.0","deps":[],"cksum":"abc","features":{},"yanked":false,"pubtime":"2025-01-02T09:05:07.123456Z","v":1}"#;
536
537        let metadata: IndexMetadata = serde_json::from_str(json).unwrap();
538
539        assert!(metadata.pubtime.is_some());
540        let pubtime = metadata.pubtime.unwrap();
541        assert_eq!(pubtime.year(), 2025);
542        assert_eq!(pubtime.month(), 1);
543        assert_eq!(pubtime.day(), 2);
544        assert_eq!(pubtime.hour(), 9);
545        assert_eq!(pubtime.minute(), 5);
546        assert_eq!(pubtime.second(), 7);
547    }
548
549    #[test]
550    fn serialize_indices_ends_with_newline_and_one_line_per_entry() {
551        let mk = |vers: &str| IndexMetadata {
552            name: "crate".to_string(),
553            vers: vers.to_string(),
554            deps: vec![],
555            cksum: "cksum".to_string(),
556            features: BTreeMap::default(),
557            yanked: false,
558            links: None,
559            pubtime: None,
560            v: Some(1),
561            features2: None,
562        };
563        let indices = vec![mk("1.0.0"), mk("2.0.0")];
564
565        let out = IndexMetadata::serialize_indices(&indices).unwrap();
566
567        // crates.io terminates the sparse-index body with a trailing newline,
568        // including after the final entry. Cargo tolerates a missing final
569        // newline, but `cargo install-update` / `cargo outdated` stream the body
570        // and treat an unterminated last line as "trailing garbage". Guard
571        // against regressing to `write!` (no newline) on the last line.
572        assert!(
573            out.ends_with('\n'),
574            "sparse index body must end with a newline, got: {out:?}"
575        );
576
577        // Exactly one newline-terminated JSON line per entry; no blank trailing line.
578        let lines: Vec<&str> = out.lines().collect();
579        assert_eq!(lines.len(), indices.len());
580        for (line, meta) in lines.iter().zip(&indices) {
581            let parsed: IndexMetadata = serde_json::from_str(line).unwrap();
582            assert_eq!(parsed.vers, meta.vers);
583        }
584    }
585}