gem_index_filter/
lib.rs

1//! gem-index-filter - Fast streaming filter for RubyGems versions index
2//!
3//! This library provides efficient streaming filtering of the RubyGems
4//! versions file (https://rubygems.org/versions). It's designed to run in memory-
5//! constrained environments like edge workers while handling 20+ MB index files.
6//!
7//! # Key Features
8//!
9//! - **True streaming**: Processes files line-by-line with zero memory retention
10//! - **Flexible filtering**: Allow mode, block mode, or passthrough (no filtering)
11//! - **Order preservation**: Maintains exact original order from input file
12//! - **Fast filtering**: Uses HashSet for O(1) gem name lookups
13//! - **Version stripping**: Optionally replace version lists with `0` to reduce size
14//! - **Digest computation**: Optionally compute checksums (SHA-256, SHA-512) of filtered output
15//!
16//! # Examples
17//!
18//! **Allow mode** - include only specific gems:
19//!
20//! ```no_run
21//! use gem_index_filter::{filter_versions_streaming, FilterMode, VersionOutput};
22//! use std::collections::HashSet;
23//! use std::fs::File;
24//!
25//! let input = File::open("versions").unwrap();
26//! let mut output = File::create("versions.filtered").unwrap();
27//! let mut allowlist = HashSet::new();
28//! allowlist.insert("rails");
29//! allowlist.insert("sinatra");
30//! filter_versions_streaming(input, &mut output, FilterMode::Allow(&allowlist), VersionOutput::Preserve, None).unwrap();
31//! ```
32//!
33//! **Block mode** - exclude specific gems:
34//!
35//! ```no_run
36//! # use gem_index_filter::{filter_versions_streaming, FilterMode, VersionOutput};
37//! # use std::collections::HashSet;
38//! # use std::fs::File;
39//! let input = File::open("versions").unwrap();
40//! let mut output = File::create("versions.filtered").unwrap();
41//! let mut blocklist = HashSet::new();
42//! blocklist.insert("big-gem");
43//! filter_versions_streaming(input, &mut output, FilterMode::Block(&blocklist), VersionOutput::Preserve, None).unwrap();
44//! ```
45//!
46//! **With digest computation**:
47//!
48//! ```no_run
49//! # use gem_index_filter::{filter_versions_streaming, FilterMode, VersionOutput, DigestAlgorithm};
50//! # use std::fs::File;
51//! let input = File::open("versions").unwrap();
52//! let mut output = File::create("versions.filtered").unwrap();
53//! let digest = filter_versions_streaming(
54//!     input,
55//!     &mut output,
56//!     FilterMode::Passthrough,
57//!     VersionOutput::Preserve,
58//!     Some(DigestAlgorithm::Sha256)
59//! ).unwrap();
60//! if let Some(checksum) = digest {
61//!     println!("SHA-256: {}", checksum);
62//! }
63//! ```
64
65pub mod filter;
66
67pub use filter::{filter_versions_streaming, DigestAlgorithm, FilterMode, VersionOutput};