Skip to main content

kellnr_common/
index_metadata.rs

1use std::collections::BTreeMap;
2use std::fmt::{Display, Formatter, Write};
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Deserializer, Serialize, Serializer};
7
8mod pubtime_format {
9    use chrono::{DateTime, Utc};
10    use serde::{self, Deserialize, Deserializer, Serializer};
11
12    const FORMAT: &str = "%Y-%m-%dT%H:%M:%SZ";
13
14    #[allow(clippy::ref_option)] // signature required by serde's `with` attribute
15    pub fn serialize<S>(date: &Option<DateTime<Utc>>, serializer: S) -> Result<S::Ok, S::Error>
16    where
17        S: Serializer,
18    {
19        match date {
20            Some(dt) => serializer.serialize_str(&dt.format(FORMAT).to_string()),
21            None => serializer.serialize_none(),
22        }
23    }
24
25    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
26    where
27        D: Deserializer<'de>,
28    {
29        let s: Option<String> = Option::deserialize(deserializer)?;
30        match s {
31            Some(s) => DateTime::parse_from_rfc3339(&s)
32                .map(|dt| Some(dt.with_timezone(&Utc)))
33                .map_err(serde::de::Error::custom),
34            None => Ok(None),
35        }
36    }
37}
38use tokio::fs::File;
39use tokio::io::AsyncReadExt;
40
41use crate::publish_metadata::{PublishMetadata, RegistryDep};
42use crate::version::Version;
43
44// This Metadata struct defined here is the one saved in the index.
45// It is different to the one send by Cargo to the registry.
46// See: https://doc.rust-lang.org/cargo/reference/registries.html#index-format
47// Crates.io implementation: https://github.com/rust-lang/crates.io/blob/main/crates/crates_io_index/data.rs
48
49#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
50pub struct IndexMetadata {
51    // The name of the package.
52    // This must only contain alphanumeric, `-`, or `_` characters.
53    pub name: String,
54    // The version of the package this row is describing.
55    // This must be a valid version number according to the Semantic
56    // Versioning 2.0.0 spec at https://semver.org/.
57    pub vers: String,
58    // Array of direct dependencies of the package.
59    pub deps: Vec<IndexDep>,
60    // A SHA256 checksum of the `.crate` file.
61    pub cksum: String,
62    // Set of features defined for the package.
63    // Each feature maps to an array of features or dependencies it enables.
64    // #[serde(
65    //     skip_serializing_if = "Option::is_none",
66    //     serialize_with = "option_sorted_map"
67    // )]
68    pub features: BTreeMap<String, Vec<String>>,
69    // Boolean of whether or not this version has been yanked.
70    pub yanked: bool,
71    // The `links` string value from the package's manifest, or null if not
72    // specified. This field is optional and defaults to null.
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub links: Option<String>,
75    // The time the package was published
76    #[serde(
77        default,
78        skip_serializing_if = "Option::is_none",
79        with = "pubtime_format"
80    )]
81    pub pubtime: Option<DateTime<Utc>>,
82    // An unsigned 32-bit integer value indicating the schema version of this
83    // entry.
84    //
85    // If this not specified, it should be interpreted as the default of 1.
86    //
87    // Cargo (starting with version 1.51) will ignore versions it does not
88    // recognize. This provides a method to safely introduce changes to index
89    // entries and allow older versions of cargo to ignore newer entries it
90    // doesn't understand. Versions older than 1.51 ignore this field, and
91    // thus may misinterpret the meaning of the index entry.
92    //
93    // The current values are:
94    //
95    // * 1: The schema as documented here, not including newer additions.
96    //      This is honored in Rust version 1.51 and newer.
97    // * 2: The addition of the `features2` field.
98    //      This is honored in Rust version 1.60 and newer.
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub v: Option<u32>,
101    // This optional field contains features with new, extended syntax.
102    // Specifically, namespaced features (`dep:`) and weak dependencies
103    // (`pkg?/feat`).
104    //
105    // This is separated from `features` because versions older than 1.19
106    // will fail to load due to not being able to parse the new syntax, even
107    // with a `Cargo.lock` file.
108    //
109    // Cargo will merge any values listed here with the "features" field.
110    //
111    // If this field is included, the "v" field should be set to at least 2.
112    //
113    // Registries are not required to use this field for extended feature
114    // syntax, they are allowed to include those in the "features" field.
115    // Using this is only necessary if the registry wants to support cargo
116    // versions older than 1.19, which in practice is only crates.io since
117    // those older versions do not support other registries.
118    // "features2": {
119    // "serde": ["dep:serde", "chrono?/serde"]
120    // }
121    #[serde(skip_serializing_if = "Option::is_none")]
122    pub features2: Option<BTreeMap<String, Vec<String>>>,
123}
124
125impl IndexMetadata {
126    pub async fn from_max_version(path: &Path) -> Result<Self, std::io::Error> {
127        let mut file = File::open(path).await?;
128        let mut content = String::new();
129        file.read_to_string(&mut content).await?;
130
131        let mut metadata: Vec<IndexMetadata> = content
132            .lines()
133            .filter_map(|m| serde_json::from_str::<IndexMetadata>(m).ok())
134            .collect();
135
136        metadata.sort_by(|a, b| {
137            let sv1 = Version::from_unchecked_str(&a.vers);
138            let sv2 = Version::from_unchecked_str(&b.vers);
139            sv1.cmp(&sv2)
140        });
141
142        metadata.last().cloned().ok_or_else(|| {
143            std::io::Error::new(
144                std::io::ErrorKind::InvalidData,
145                "Unable to read metadata file.",
146            )
147        })
148    }
149
150    pub async fn from_version(path: &Path, version: &Version) -> Result<Self, std::io::Error> {
151        let mut file = File::open(path).await?;
152        let mut content = String::new();
153        file.read_to_string(&mut content).await?;
154
155        let metadata: Vec<IndexMetadata> = content
156            .lines()
157            .filter_map(|m| serde_json::from_str::<IndexMetadata>(m).ok())
158            .collect();
159
160        metadata
161            .iter()
162            .find(|m| {
163                let sv = Version::try_from(&m.vers).unwrap_or_default();
164                sv == *version
165            })
166            .cloned()
167            .ok_or_else(|| {
168                std::io::Error::new(
169                    std::io::ErrorKind::InvalidData,
170                    "Unable to read metadata file.",
171                )
172            })
173    }
174
175    pub fn to_json(&self) -> Result<String, serde_json::Error> {
176        serde_json::to_string(&self)
177    }
178
179    pub fn metadata_path(&self, index_path: &Path) -> PathBuf {
180        metadata_path(index_path, &self.name)
181    }
182
183    pub fn from_reg_meta(registry_metadata: &PublishMetadata, cksum: &str) -> Self {
184        IndexMetadata {
185            name: registry_metadata.name.clone(),
186            vers: registry_metadata.vers.clone(),
187            deps: registry_metadata
188                .deps
189                .clone()
190                .into_iter()
191                .map(IndexDep::from)
192                .collect(),
193            cksum: cksum.to_string(),
194            pubtime: Some(Utc::now()),
195            features: registry_metadata.features.clone(),
196            yanked: false,
197            links: registry_metadata.links.clone(),
198            v: Some(1),
199            features2: None,
200        }
201    }
202
203    pub fn minimal(name: &str, vers: &str, cksum: &str) -> Self {
204        Self {
205            name: name.to_string(),
206            vers: vers.to_string(),
207            cksum: cksum.to_string(),
208            deps: vec![],
209            features: BTreeMap::default(),
210            yanked: false,
211            links: None,
212            pubtime: None,
213            v: Some(1),
214            features2: None,
215        }
216    }
217
218    pub fn serialize_indices(indices: &[IndexMetadata]) -> Result<String, serde_json::Error> {
219        let indices = indices
220            .iter()
221            .map(serde_json::to_string)
222            .collect::<Result<Vec<_>, serde_json::Error>>()?;
223        let mut index = String::new();
224        for (i, ix) in indices.iter().enumerate() {
225            if i == indices.len() - 1 {
226                write!(&mut index, "{ix}").unwrap();
227            } else {
228                writeln!(&mut index, "{ix}").unwrap();
229            }
230        }
231        Ok(index)
232    }
233}
234
235#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
236pub struct IndexDep {
237    // Name of the dependency.
238    // If the dependency is renamed from the original package name,
239    // this is the new name. The original package name is stored in
240    // the `package` field.
241    pub name: String,
242    // The SemVer requirement for this dependency.
243    // This must be a valid version requirement defined at
244    // https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html.
245    pub req: String,
246    // Array of features (as strings) enabled for this dependency.
247    pub features: Vec<String>,
248    // Boolean of whether or not this is an optional dependency.
249    pub optional: bool,
250    // Boolean of whether or not default features are enabled.
251    pub default_features: bool,
252    // The target platform for the dependency.
253    // null if not a target dependency.
254    // Otherwise, a string such as "cfg(windows)".
255    pub target: Option<String>,
256    // The dependency kind.
257    // "dev", "build", or "normal".
258    // Note: this is a required field, but a small number of entries
259    // exist in the crates.io index with either a missing or null
260    // `kind` field due to implementation bugs.
261    pub kind: Option<DependencyKind>,
262    // The URL of the index of the registry where this dependency is
263    // from as a string. If not specified or null, it is assumed the
264    // dependency is in the current registry.
265    pub registry: Option<String>,
266    // If the dependency is renamed, this is a string of the actual
267    // package name. If not specified or null, this dependency is not
268    // renamed.
269    #[serde(skip_serializing_if = "Option::is_none")]
270    pub package: Option<String>,
271}
272
273#[derive(Clone, Debug, PartialEq, PartialOrd, Ord, Eq)]
274pub enum DependencyKind {
275    Normal,
276    Build,
277    Dev,
278    Other(String),
279}
280
281impl<'de> Deserialize<'de> for DependencyKind {
282    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
283    where
284        D: Deserializer<'de>,
285    {
286        let s = String::deserialize(deserializer)?;
287        match s.as_str() {
288            "normal" => Ok(DependencyKind::Normal),
289            "build" => Ok(DependencyKind::Build),
290            "dev" => Ok(DependencyKind::Dev),
291            _ => Ok(DependencyKind::Other(s)),
292        }
293    }
294}
295
296impl Serialize for DependencyKind {
297    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
298    where
299        S: Serializer,
300    {
301        match self {
302            DependencyKind::Normal => serializer.serialize_str("normal"),
303            DependencyKind::Build => serializer.serialize_str("build"),
304            DependencyKind::Dev => serializer.serialize_str("dev"),
305            DependencyKind::Other(s) => serializer.serialize_str(s),
306        }
307    }
308}
309
310impl Display for DependencyKind {
311    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
312        match self {
313            DependencyKind::Normal => write!(f, "normal"),
314            DependencyKind::Build => write!(f, "build"),
315            DependencyKind::Dev => write!(f, "dev"),
316            DependencyKind::Other(s) => write!(f, "{s}"),
317        }
318    }
319}
320
321impl From<String> for DependencyKind {
322    fn from(kind: String) -> Self {
323        match kind.as_str() {
324            "normal" => DependencyKind::Normal,
325            "build" => DependencyKind::Build,
326            "dev" => DependencyKind::Dev,
327            _ => DependencyKind::Other(kind),
328        }
329    }
330}
331
332impl From<RegistryDep> for IndexDep {
333    fn from(registry_dep: RegistryDep) -> Self {
334        IndexDep {
335            name: match registry_dep.explicit_name_in_toml {
336                Some(ref name) => name.clone(),
337                None => registry_dep.name.clone(),
338            },
339            req: registry_dep.version_req,
340            features: registry_dep.features.unwrap_or_default(),
341            optional: registry_dep.optional,
342            default_features: registry_dep.default_features,
343            target: registry_dep.target,
344            kind: registry_dep.kind.map(DependencyKind::from),
345            registry: registry_dep.registry,
346            package: match registry_dep.explicit_name_in_toml {
347                Some(_) => Some(registry_dep.name),
348                None => None,
349            },
350        }
351    }
352}
353
354pub fn metadata_path(index_path: &Path, name: &str) -> PathBuf {
355    if name.len() == 1 {
356        index_path.join("1").join(name.to_lowercase())
357    } else if name.len() == 2 {
358        index_path.join("2").join(name.to_lowercase())
359    } else if name.len() == 3 {
360        let first_char = &name[0..1].to_lowercase();
361        index_path
362            .join("3")
363            .join(first_char)
364            .join(name.to_lowercase())
365    } else {
366        let first_two = &name[0..2].to_lowercase();
367        let second_two = &name[2..4].to_lowercase();
368        index_path
369            .join(first_two)
370            .join(second_two)
371            .join(name.to_lowercase())
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378
379    #[test]
380    fn transitive_dependency_rename() {
381        let reg_meta = PublishMetadata {
382            name: "foo".to_string(),
383            vers: "0.1.0".to_string(),
384            deps: vec![
385                RegistryDep {
386                    name: "bar".to_string(),
387                    version_req: "^0.1.0".to_string(),
388                    features: None,
389                    optional: false,
390                    default_features: true,
391                    target: None,
392                    kind: None,
393                    registry: None,
394                    explicit_name_in_toml: None,
395                },
396                RegistryDep {
397                    name: "baz".to_string(),
398                    version_req: "^0.1.0".to_string(),
399                    features: None,
400                    optional: false,
401                    default_features: true,
402                    target: None,
403                    kind: None,
404                    registry: None,
405                    explicit_name_in_toml: Some("qux".to_string()),
406                },
407            ],
408            features: BTreeMap::default(),
409            links: None,
410            description: None,
411            authors: None,
412            documentation: None,
413            homepage: None,
414            readme: None,
415            readme_file: None,
416            keywords: Vec::default(),
417            categories: Vec::default(),
418            license: None,
419            license_file: None,
420            repository: None,
421            badges: None,
422            rust_version: None,
423        };
424
425        let index_meta = IndexMetadata::from_reg_meta(&reg_meta, "1234");
426
427        assert_eq!(index_meta.deps.len(), 2);
428        assert_eq!(index_meta.deps[0].name, "bar");
429        assert_eq!(index_meta.deps[0].package, None);
430        assert_eq!(index_meta.deps[1].name, "qux");
431        assert_eq!(index_meta.deps[1].package, Some("baz".to_string()));
432    }
433
434    #[test]
435    fn metadata_path_one_letter() {
436        let name = "A";
437        assert_eq!(
438            metadata_path(&PathBuf::from("ip"), name),
439            Path::new("ip").join("1").join("a")
440        );
441    }
442
443    #[test]
444    fn metadata_path_two_letters() {
445        let name = "cB";
446        assert_eq!(
447            metadata_path(&PathBuf::from("ip"), name),
448            Path::new("ip").join("2").join("cb")
449        );
450    }
451
452    #[test]
453    fn metadata_path_three_letters() {
454        let name = "cAb";
455        assert_eq!(
456            metadata_path(&PathBuf::from("ip"), name),
457            Path::new("ip").join("3").join("c").join("cab")
458        );
459    }
460
461    #[test]
462    fn metadata_path_four_or_more_letters() {
463        let name = "foo_bAr";
464        assert_eq!(
465            metadata_path(&PathBuf::from("ip"), name),
466            Path::new("ip").join("fo").join("o_").join("foo_bar")
467        );
468    }
469
470    #[test]
471    fn pubtime_serializes_without_fractional_seconds() {
472        use chrono::TimeZone;
473
474        let pubtime = Utc.with_ymd_and_hms(2025, 1, 2, 9, 5, 7).unwrap();
475        let metadata = IndexMetadata {
476            name: "test".to_string(),
477            vers: "1.0.0".to_string(),
478            deps: vec![],
479            cksum: "abc123".to_string(),
480            features: BTreeMap::new(),
481            yanked: false,
482            links: None,
483            pubtime: Some(pubtime),
484            v: Some(1),
485            features2: None,
486        };
487
488        let json = metadata.to_json().unwrap();
489
490        // Verify format is exactly "2025-01-02T09:05:07Z" (zero-padded, no fractional seconds)
491        assert!(
492            json.contains(r#""pubtime":"2025-01-02T09:05:07Z""#),
493            "Expected pubtime to be serialized as '2025-01-02T09:05:07Z', got: {json}"
494        );
495    }
496
497    #[test]
498    fn pubtime_none_is_omitted_from_serialization() {
499        let metadata = IndexMetadata {
500            name: "test".to_string(),
501            vers: "1.0.0".to_string(),
502            deps: vec![],
503            cksum: "abc123".to_string(),
504            features: BTreeMap::new(),
505            yanked: false,
506            links: None,
507            pubtime: None,
508            v: Some(1),
509            features2: None,
510        };
511
512        let json = metadata.to_json().unwrap();
513
514        assert!(
515            !json.contains("pubtime"),
516            "Expected pubtime to be omitted when None, got: {json}"
517        );
518    }
519
520    #[test]
521    fn pubtime_deserializes_from_rfc3339() {
522        use chrono::{Datelike, Timelike};
523
524        let json = r#"{"name":"test","vers":"1.0.0","deps":[],"cksum":"abc","features":{},"yanked":false,"pubtime":"2025-01-02T09:05:07Z","v":1}"#;
525
526        let metadata: IndexMetadata = serde_json::from_str(json).unwrap();
527
528        assert!(metadata.pubtime.is_some());
529        let pubtime = metadata.pubtime.unwrap();
530        assert_eq!(pubtime.year(), 2025);
531        assert_eq!(pubtime.month(), 1);
532        assert_eq!(pubtime.day(), 2);
533        assert_eq!(pubtime.hour(), 9);
534        assert_eq!(pubtime.minute(), 5);
535        assert_eq!(pubtime.second(), 7);
536    }
537
538    #[test]
539    fn pubtime_deserializes_from_rfc3339_with_fractional_seconds() {
540        use chrono::{Datelike, Timelike};
541
542        // Should also handle input with fractional seconds (from crates.io or other sources)
543        let json = r#"{"name":"test","vers":"1.0.0","deps":[],"cksum":"abc","features":{},"yanked":false,"pubtime":"2025-01-02T09:05:07.123456Z","v":1}"#;
544
545        let metadata: IndexMetadata = serde_json::from_str(json).unwrap();
546
547        assert!(metadata.pubtime.is_some());
548        let pubtime = metadata.pubtime.unwrap();
549        assert_eq!(pubtime.year(), 2025);
550        assert_eq!(pubtime.month(), 1);
551        assert_eq!(pubtime.day(), 2);
552        assert_eq!(pubtime.hour(), 9);
553        assert_eq!(pubtime.minute(), 5);
554        assert_eq!(pubtime.second(), 7);
555    }
556}