Skip to main content

shipper_sparse_index/
lib.rs

1//! Cargo sparse-index helpers.
2//!
3//! This crate owns two focused concerns:
4//! - Converting crate names to sparse-index paths
5//! - Checking JSONL sparse-index content for a target version
6
7use serde::Deserialize;
8
9/// Compute the Cargo sparse-index path for a crate name.
10///
11/// Layout:
12/// - `1/{name}` for length 1
13/// - `2/{name}` for length 2
14/// - `3/{name[0]}/{name}` for length 3
15/// - `{name[0..2]}/{name[2..4]}/{name}` for length >= 4
16///
17/// Names are lowercased using ASCII rules.
18pub fn sparse_index_path(crate_name: &str) -> String {
19    let lower = crate_name.to_ascii_lowercase();
20    match lower.len() {
21        0 => "0/".to_string(),
22        1 => format!("1/{}", lower),
23        2 => format!("2/{}", lower),
24        3 => format!("3/{}/{}", &lower[..1], lower),
25        _ => format!("{}/{}/{}", &lower[..2], &lower[2..4], lower),
26    }
27}
28
29#[derive(Debug, Deserialize)]
30struct SparseIndexEntry {
31    vers: String,
32}
33
34/// Returns `true` if JSONL sparse-index content contains the exact version.
35///
36/// Invalid lines are ignored.
37pub fn contains_version(content: &str, version: &str) -> bool {
38    content
39        .lines()
40        .filter(|line| !line.trim().is_empty())
41        .filter_map(|line| serde_json::from_str::<SparseIndexEntry>(line).ok())
42        .any(|entry| entry.vers == version)
43}
44
45#[cfg(test)]
46mod tests {
47    use super::*;
48
49    #[test]
50    fn sparse_index_path_matches_cargo_layout() {
51        assert_eq!(sparse_index_path("a"), "1/a");
52        assert_eq!(sparse_index_path("ab"), "2/ab");
53        assert_eq!(sparse_index_path("abc"), "3/a/abc");
54        assert_eq!(sparse_index_path("demo"), "de/mo/demo");
55    }
56
57    #[test]
58    fn sparse_index_path_lowercases_ascii_names() {
59        assert_eq!(sparse_index_path("Serde"), "se/rd/serde");
60        assert_eq!(sparse_index_path("A"), "1/a");
61    }
62
63    #[test]
64    fn sparse_index_path_handles_empty_name_without_panicking() {
65        assert_eq!(sparse_index_path(""), "0/");
66    }
67
68    #[test]
69    fn contains_version_finds_exact_match() {
70        let content = r#"{"vers":"0.1.0"}
71{"vers":"1.0.0"}
72{"vers":"2.0.0"}"#;
73        assert!(contains_version(content, "1.0.0"));
74        assert!(!contains_version(content, "3.0.0"));
75    }
76
77    #[test]
78    fn contains_version_ignores_invalid_lines() {
79        let content = r#"{"vers":"0.1.0"}
80not json
81{"oops":"missing-vers"}
82{"vers":"1.2.3"}"#;
83        assert!(contains_version(content, "1.2.3"));
84    }
85
86    #[test]
87    fn contains_version_requires_exact_match() {
88        let content = r#"{"vers":"1.2.3"}"#;
89        assert!(!contains_version(content, "1.2"));
90    }
91
92    // ── Index URL construction: boundary lengths ──
93
94    #[test]
95    fn sparse_index_path_exact_four_char_boundary() {
96        assert_eq!(sparse_index_path("abcd"), "ab/cd/abcd");
97    }
98
99    #[test]
100    fn sparse_index_path_five_chars() {
101        assert_eq!(sparse_index_path("hello"), "he/ll/hello");
102    }
103
104    #[test]
105    fn sparse_index_path_long_name() {
106        let name = "a".to_string() + &"b".repeat(99);
107        let path = sparse_index_path(&name);
108        assert!(path.starts_with("ab/bb/"));
109        assert!(path.ends_with(&name));
110    }
111
112    // ── Crate name edge cases ──
113
114    #[test]
115    fn sparse_index_path_with_hyphens() {
116        assert_eq!(sparse_index_path("my-crate"), "my/-c/my-crate");
117    }
118
119    #[test]
120    fn sparse_index_path_with_underscores() {
121        assert_eq!(sparse_index_path("my_crate"), "my/_c/my_crate");
122    }
123
124    #[test]
125    fn sparse_index_path_hyphen_underscore_produce_different_paths() {
126        let hyphen = sparse_index_path("my-crate");
127        let underscore = sparse_index_path("my_crate");
128        assert_ne!(hyphen, underscore);
129    }
130
131    #[test]
132    fn sparse_index_path_digits_in_name() {
133        assert_eq!(sparse_index_path("h264"), "h2/64/h264");
134        assert_eq!(sparse_index_path("3d"), "2/3d");
135    }
136
137    #[test]
138    fn sparse_index_path_all_digits() {
139        assert_eq!(sparse_index_path("1234"), "12/34/1234");
140    }
141
142    #[test]
143    #[should_panic(expected = "byte index")]
144    fn sparse_index_path_panics_on_multibyte_unicode() {
145        // Crate names must be ASCII; multi-byte chars cause an indexing panic
146        let _ = sparse_index_path("café");
147    }
148
149    #[test]
150    fn sparse_index_path_ascii_only_unicode_safe() {
151        // Pure ASCII with non-alpha chars does not panic
152        let path = sparse_index_path("a-b_c");
153        assert_eq!(path, "a-/b_/a-b_c");
154    }
155
156    #[test]
157    fn sparse_index_path_mixed_case_three_char() {
158        assert_eq!(sparse_index_path("SYN"), "3/s/syn");
159        assert_eq!(sparse_index_path("Syn"), "3/s/syn");
160    }
161
162    #[test]
163    fn sparse_index_path_already_lowercase() {
164        assert_eq!(sparse_index_path("serde"), sparse_index_path("SERDE"));
165    }
166
167    #[test]
168    fn sparse_index_path_single_char_variants() {
169        for c in b'A'..=b'Z' {
170            let upper = String::from(c as char);
171            let lower = upper.to_ascii_lowercase();
172            assert_eq!(sparse_index_path(&upper), format!("1/{lower}"));
173        }
174    }
175
176    // ── Response parsing edge cases ──
177
178    #[test]
179    fn contains_version_empty_content() {
180        assert!(!contains_version("", "1.0.0"));
181    }
182
183    #[test]
184    fn contains_version_whitespace_only_content() {
185        assert!(!contains_version("   \t  \n  \n  ", "1.0.0"));
186    }
187
188    #[test]
189    fn contains_version_single_entry() {
190        assert!(contains_version(r#"{"vers":"0.1.0"}"#, "0.1.0"));
191    }
192
193    #[test]
194    fn contains_version_many_versions() {
195        let content: String = (0..200)
196            .map(|i| format!("{{\"vers\":\"0.{i}.0\"}}"))
197            .collect::<Vec<_>>()
198            .join("\n");
199        assert!(contains_version(&content, "0.99.0"));
200        assert!(contains_version(&content, "0.0.0"));
201        assert!(contains_version(&content, "0.199.0"));
202        assert!(!contains_version(&content, "0.200.0"));
203    }
204
205    #[test]
206    fn contains_version_prerelease() {
207        let content = r#"{"vers":"1.0.0-alpha.1"}
208{"vers":"1.0.0-beta.2"}
209{"vers":"1.0.0"}"#;
210        assert!(contains_version(content, "1.0.0-alpha.1"));
211        assert!(contains_version(content, "1.0.0-beta.2"));
212        assert!(contains_version(content, "1.0.0"));
213        assert!(!contains_version(content, "1.0.0-rc.1"));
214    }
215
216    #[test]
217    fn contains_version_build_metadata() {
218        let content = r#"{"vers":"1.0.0+build.123"}"#;
219        assert!(contains_version(content, "1.0.0+build.123"));
220        assert!(!contains_version(content, "1.0.0"));
221    }
222
223    #[test]
224    fn contains_version_trailing_newline() {
225        let content = "{\"vers\":\"1.0.0\"}\n";
226        assert!(contains_version(content, "1.0.0"));
227    }
228
229    #[test]
230    fn contains_version_blank_lines_between_entries() {
231        let content = "{\"vers\":\"0.1.0\"}\n\n\n{\"vers\":\"0.2.0\"}\n\n";
232        assert!(contains_version(content, "0.1.0"));
233        assert!(contains_version(content, "0.2.0"));
234    }
235
236    #[test]
237    fn contains_version_windows_line_endings() {
238        let content = "{\"vers\":\"0.1.0\"}\r\n{\"vers\":\"0.2.0\"}\r\n";
239        assert!(contains_version(content, "0.1.0"));
240        assert!(contains_version(content, "0.2.0"));
241    }
242
243    #[test]
244    fn contains_version_empty_version_query() {
245        let content = r#"{"vers":"1.0.0"}"#;
246        assert!(!contains_version(content, ""));
247    }
248
249    #[test]
250    fn contains_version_duplicate_versions() {
251        let content = "{\"vers\":\"1.0.0\"}\n{\"vers\":\"1.0.0\"}\n{\"vers\":\"1.0.0\"}";
252        assert!(contains_version(content, "1.0.0"));
253    }
254
255    #[test]
256    fn contains_version_realistic_full_entry() {
257        let content = r#"{"name":"serde","vers":"1.0.210","deps":[{"name":"serde_derive","req":"^1.0","features":["default"],"optional":true,"default_features":false,"target":null,"kind":"normal"}],"cksum":"abcdef1234567890","features":{"default":["std"],"derive":["serde_derive"],"std":[]},"yanked":false,"links":null,"v":2}"#;
258        assert!(contains_version(content, "1.0.210"));
259        assert!(!contains_version(content, "1.0.211"));
260    }
261
262    #[test]
263    fn contains_version_yanked_entry_still_matches() {
264        let content = r#"{"vers":"0.1.0","yanked":true}"#;
265        assert!(contains_version(content, "0.1.0"));
266    }
267
268    // ── Simulated error responses (non-JSON content) ──
269
270    #[test]
271    fn contains_version_html_error_page() {
272        let content = "<html><body>404 Not Found</body></html>";
273        assert!(!contains_version(content, "1.0.0"));
274    }
275
276    #[test]
277    fn contains_version_plain_text_error() {
278        let content = "rate limit exceeded";
279        assert!(!contains_version(content, "1.0.0"));
280    }
281
282    #[test]
283    fn contains_version_json_error_object() {
284        let content = r#"{"errors":[{"detail":"Not Found"}]}"#;
285        assert!(!contains_version(content, "1.0.0"));
286    }
287
288    // ── Version filtering precision ──
289
290    #[test]
291    fn contains_version_does_not_match_prefix() {
292        let content = r#"{"vers":"1.0.0"}"#;
293        assert!(!contains_version(content, "1.0"));
294        assert!(!contains_version(content, "1"));
295    }
296
297    #[test]
298    fn contains_version_does_not_match_suffix() {
299        let content = r#"{"vers":"1.0.0"}"#;
300        assert!(!contains_version(content, "0.0"));
301        assert!(!contains_version(content, "1.0.0.0"));
302    }
303
304    #[test]
305    fn contains_version_distinguishes_similar_versions() {
306        let content = r#"{"vers":"1.10.0"}
307{"vers":"1.1.0"}
308{"vers":"10.1.0"}"#;
309        assert!(contains_version(content, "1.10.0"));
310        assert!(contains_version(content, "1.1.0"));
311        assert!(contains_version(content, "10.1.0"));
312        assert!(!contains_version(content, "1.0.0"));
313        assert!(!contains_version(content, "1.100.0"));
314    }
315}
316
317#[cfg(test)]
318mod snapshot_tests {
319    use super::*;
320    use insta::assert_snapshot;
321
322    // ── sparse_index_path: all length categories ──
323
324    #[test]
325    fn snapshot_path_empty_name() {
326        assert_snapshot!(sparse_index_path(""), @"0/");
327    }
328
329    #[test]
330    fn snapshot_path_one_char() {
331        assert_snapshot!(sparse_index_path("a"), @"1/a");
332    }
333
334    #[test]
335    fn snapshot_path_two_chars() {
336        assert_snapshot!(sparse_index_path("ab"), @"2/ab");
337    }
338
339    #[test]
340    fn snapshot_path_three_chars() {
341        assert_snapshot!(sparse_index_path("abc"), @"3/a/abc");
342    }
343
344    #[test]
345    fn snapshot_path_four_chars() {
346        assert_snapshot!(sparse_index_path("demo"), @"de/mo/demo");
347    }
348
349    // ── sparse_index_path: real-world crates ──
350
351    #[test]
352    fn snapshot_path_real_world_crates() {
353        let crates = [
354            "serde",
355            "tokio",
356            "clap",
357            "anyhow",
358            "rand",
359            "syn",
360            "proc-macro2",
361            "quote",
362            "libc",
363            "regex",
364        ];
365        let paths: Vec<String> = crates
366            .iter()
367            .map(|c| format!("{c} -> {}", sparse_index_path(c)))
368            .collect();
369        assert_snapshot!(paths.join("\n"));
370    }
371
372    // ── sparse_index_path: case normalisation ──
373
374    #[test]
375    fn snapshot_path_mixed_case() {
376        assert_snapshot!(sparse_index_path("Serde"), @"se/rd/serde");
377    }
378
379    #[test]
380    fn snapshot_path_all_upper() {
381        assert_snapshot!(sparse_index_path("TOKIO"), @"to/ki/tokio");
382    }
383
384    // ── sparse_index_path: index URL construction ──
385
386    #[test]
387    fn snapshot_full_sparse_index_url() {
388        let base = "https://index.crates.io/";
389        let crates = ["serde", "a", "ab", "syn", "rand_core"];
390        let urls: Vec<String> = crates
391            .iter()
392            .map(|c| format!("{base}{}", sparse_index_path(c)))
393            .collect();
394        assert_snapshot!(urls.join("\n"));
395    }
396
397    // ── contains_version: parsed entry snapshots ──
398
399    #[test]
400    fn snapshot_version_found() {
401        let content = r#"{"vers":"0.1.0"}
402{"vers":"1.0.0"}
403{"vers":"2.0.0"}"#;
404        assert_snapshot!(contains_version(content, "1.0.0").to_string(), @"true");
405    }
406
407    #[test]
408    fn snapshot_version_not_found() {
409        let content = r#"{"vers":"0.1.0"}
410{"vers":"1.0.0"}"#;
411        assert_snapshot!(contains_version(content, "3.0.0").to_string(), @"false");
412    }
413
414    #[test]
415    fn snapshot_version_with_extra_fields() {
416        let content = r#"{"name":"serde","vers":"1.0.210","deps":[],"cksum":"abc","features":{},"yanked":false}
417{"name":"serde","vers":"1.0.211","deps":[],"cksum":"def","features":{},"yanked":false}"#;
418        assert_snapshot!(contains_version(content, "1.0.210").to_string(), @"true");
419    }
420
421    #[test]
422    fn snapshot_version_with_invalid_lines() {
423        let content = r#"not-json
424{"vers":"0.5.0"}
425{"oops":"missing"}
426{"vers":"1.2.3"}"#;
427        let results: Vec<String> = ["0.5.0", "1.2.3", "9.9.9"]
428            .iter()
429            .map(|v| format!("{v} -> {}", contains_version(content, v)))
430            .collect();
431        assert_snapshot!(results.join("\n"));
432    }
433
434    #[test]
435    fn snapshot_version_empty_content() {
436        assert_snapshot!(contains_version("", "1.0.0").to_string(), @"false");
437    }
438
439    // ── Additional snapshot tests ──
440
441    #[test]
442    fn snapshot_path_hyphenated_and_underscored_crates() {
443        let crates = [
444            "my-crate",
445            "my_crate",
446            "proc-macro2",
447            "rand_core",
448            "serde_json",
449            "async-trait",
450        ];
451        let paths: Vec<String> = crates
452            .iter()
453            .map(|c| format!("{c} -> {}", sparse_index_path(c)))
454            .collect();
455        assert_snapshot!(paths.join("\n"));
456    }
457
458    #[test]
459    fn snapshot_path_boundary_lengths() {
460        let names = ["x", "ab", "syn", "clap", "tokio", "serde_json"];
461        let paths: Vec<String> = names
462            .iter()
463            .map(|c| format!("len={} {c} -> {}", c.len(), sparse_index_path(c)))
464            .collect();
465        assert_snapshot!(paths.join("\n"));
466    }
467
468    #[test]
469    fn snapshot_multiversion_lookup_results() {
470        let content = r#"{"vers":"0.1.0"}
471{"vers":"0.2.0"}
472{"vers":"1.0.0-alpha"}
473{"vers":"1.0.0"}
474{"vers":"1.0.1"}
475{"vers":"2.0.0"}"#;
476        let queries = [
477            "0.1.0",
478            "0.2.0",
479            "0.3.0",
480            "1.0.0-alpha",
481            "1.0.0",
482            "1.0.1",
483            "1.0.2",
484            "2.0.0",
485            "3.0.0",
486        ];
487        let results: Vec<String> = queries
488            .iter()
489            .map(|v| format!("{v} -> {}", contains_version(content, v)))
490            .collect();
491        assert_snapshot!(results.join("\n"));
492    }
493
494    #[test]
495    fn snapshot_index_url_all_length_categories() {
496        let base = "https://index.crates.io/";
497        let names = ["x", "ab", "syn", "rand", "serde", "my-crate", "proc-macro2"];
498        let urls: Vec<String> = names
499            .iter()
500            .map(|c| format!("{c} -> {base}{}", sparse_index_path(c)))
501            .collect();
502        assert_snapshot!(urls.join("\n"));
503    }
504}
505
506#[cfg(test)]
507mod property_tests {
508    use std::collections::BTreeSet;
509
510    use proptest::prelude::*;
511
512    use super::*;
513
514    proptest! {
515        #[test]
516        fn sparse_index_path_is_deterministic(name in "[A-Za-z0-9_-]{0,32}") {
517            let first = sparse_index_path(&name);
518            let second = sparse_index_path(&name);
519            prop_assert_eq!(first, second);
520        }
521
522        #[test]
523        fn sparse_index_path_ends_with_lowercase_name_for_non_empty_inputs(name in "[A-Za-z0-9_-]{1,32}") {
524            let lower = name.to_ascii_lowercase();
525            let path = sparse_index_path(&name);
526            prop_assert!(path.ends_with(&lower));
527        }
528
529        #[test]
530        fn contains_version_returns_true_when_version_is_present(
531            target in "[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}",
532            others in prop::collection::vec("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}", 0..16),
533        ) {
534            let mut versions = Vec::with_capacity(others.len() + 1);
535            versions.push(target.clone());
536            versions.extend(others);
537
538            let content = versions
539                .iter()
540                .map(|v| format!("{{\"vers\":\"{}\"}}", v))
541                .collect::<Vec<_>>()
542                .join("\n");
543
544            prop_assert!(contains_version(&content, &target));
545        }
546
547        #[test]
548        fn contains_version_returns_false_when_version_is_absent(
549            target in "[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}",
550            versions in prop::collection::vec("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}", 0..16),
551        ) {
552            let unique: BTreeSet<String> = versions.into_iter().filter(|v| v != &target).collect();
553            let content = unique
554                .iter()
555                .map(|v| format!("{{\"vers\":\"{}\"}}", v))
556                .collect::<Vec<_>>()
557                .join("\n");
558
559            prop_assert_eq!(contains_version(&content, &target), unique.contains(&target));
560        }
561
562        #[test]
563        fn sparse_index_path_correct_prefix_by_length(name in "[a-z][a-z0-9]{0,31}") {
564            let path = sparse_index_path(&name);
565            match name.len() {
566                1 => prop_assert!(path.starts_with("1/"), "expected '1/' for len=1, got {path}"),
567                2 => prop_assert!(path.starts_with("2/"), "expected '2/' for len=2, got {path}"),
568                3 => {
569                    let expected = format!("3/{}/", &name[..1]);
570                    prop_assert!(path.starts_with(&expected), "expected '{expected}', got {path}");
571                }
572                n if n >= 4 => {
573                    let expected = format!("{}/{}/", &name[..2], &name[2..4]);
574                    prop_assert!(path.starts_with(&expected), "expected '{expected}', got {path}");
575                }
576                _ => {}
577            }
578        }
579
580        #[test]
581        fn contains_version_roundtrip_single(ver in "[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}") {
582            let content = format!("{{\"vers\":\"{ver}\"}}");
583            prop_assert!(contains_version(&content, &ver));
584        }
585    }
586}