use super::{Bound, BoundSet, Operation, Predicate, Range};
use crate::{Identifier, Identifiers, MAX_SAFE_INTEGER, Version};
const SHORT_SCAN_MAX: usize = 16;
const BYTE_LANE_LOW_BITS: u128 = 0x0101_0101_0101_0101_0101_0101_0101_0101;
const BYTE_LANE_HIGH_BITS: u128 = 0x8080_8080_8080_8080_8080_8080_8080_8080;
#[derive(Clone, Copy)]
struct ShortWord {
word: u128,
len: usize,
}
impl ShortWord {
#[inline(always)]
fn new(bytes: &[u8]) -> Option<Self> {
if bytes.len() > SHORT_SCAN_MAX {
return None;
}
let mut padded = [0; SHORT_SCAN_MAX];
padded[..bytes.len()].copy_from_slice(bytes);
Some(Self {
word: u128::from_le_bytes(padded),
len: bytes.len(),
})
}
#[inline(always)]
fn contains_byte(self, needle: u8) -> bool {
self.matching_byte_lanes(needle) & active_byte_mask(self.len) != 0
}
#[inline(always)]
fn contains_repeated_pair(self, needle: u8) -> bool {
if self.len < 2 {
return false;
}
let byte_matches = self.matching_byte_lanes(needle);
let valid_pair_starts = active_byte_mask(self.len - 1);
(byte_matches & (byte_matches >> 8) & valid_pair_starts) != 0
}
#[inline(always)]
fn matching_byte_lanes(self, needle: u8) -> u128 {
matching_byte_lanes(self.word, needle)
}
}
#[inline(always)]
fn contains_byte_fast(bytes: &[u8], needle: u8) -> bool {
if let Some(word) = ShortWord::new(bytes) {
return word.contains_byte(needle);
}
bytes.contains(&needle)
}
#[inline(always)]
fn contains_or_fast(input: &str) -> bool {
let bytes = input.as_bytes();
if let Some(word) = ShortWord::new(bytes) {
return word.contains_repeated_pair(b'|');
}
input.contains("||")
}
#[inline(always)]
fn active_byte_mask(len: usize) -> u128 {
if len >= SHORT_SCAN_MAX {
u128::MAX
} else if len == 0 {
0
} else {
(1u128 << (len * 8)) - 1
}
}
#[inline(always)]
fn repeated_byte(byte: u8) -> u128 {
u128::from_le_bytes([byte; SHORT_SCAN_MAX])
}
#[inline(always)]
fn matching_byte_lanes(word: u128, needle: u8) -> u128 {
let zero_bytes = word ^ repeated_byte(needle);
zero_bytes.wrapping_sub(BYTE_LANE_LOW_BITS) & !zero_bytes & BYTE_LANE_HIGH_BITS
}
pub(super) fn parse(input: &str) -> Option<Range> {
let bytes = input.as_bytes();
let first = bytes.first().copied();
if first == Some(b'^') {
return parse_caret(input).or_else(|| parse_or_if_present(input));
}
if first == Some(b'~') {
return parse_tilde(input).or_else(|| parse_or_if_present(input));
}
if matches!(first, Some(b'>' | b'<' | b'=')) {
return parse_comparator_set(input, contains_byte_fast(bytes, b'+'))
.or_else(|| parse_or_if_present(input));
}
if let Some(range) = parse_exact_version(bytes)
.and_then(|version| BoundSet::exact(version).map(Range::from_bound_set))
{
return Some(range);
}
if matches!(first, Some(b'x' | b'X' | b'*')) {
return parse_partial_wildcard(input).or_else(|| parse_or_if_present(input));
}
if matches!(first, Some(b'v' | b'V' | b'0'..=b'9')) {
if contains_byte_fast(bytes, b'-') {
if let Some(range) = parse_hyphen(input, contains_byte_fast(bytes, b'+')) {
return Some(range);
}
}
return parse_partial_wildcard(input).or_else(|| parse_or_if_present(input));
}
None
}
pub(super) fn parse_garbage(input: &str) -> Option<Range> {
if !input.bytes().any(is_space_byte) {
return None;
}
let mut sets = Vec::new();
for part in input.split("||") {
let mut current: Option<BoundSet> = None;
for token in part.split_whitespace() {
if token_requires_fallback(token) {
return None;
}
let Some(bound_set) = parse_single_token(token) else {
continue;
};
current = Some(if let Some(current) = current {
if let Some(bound) = current.intersect(&bound_set) {
bound
} else {
sets.push(current);
bound_set
}
} else {
bound_set
});
}
if let Some(bound_set) = current {
sets.push(bound_set);
}
}
Range::from_bound_sets(sets)
}
fn parse_single_token(token: &str) -> Option<BoundSet> {
let mut bound_sets = Vec::new();
parse(token)?.append_bound_sets_to(&mut bound_sets);
if bound_sets.len() == 1 {
bound_sets.pop()
} else {
None
}
}
fn token_requires_fallback(token: &str) -> bool {
matches!(
token,
"-" | ">" | ">=" | "<" | "<=" | "=" | "^" | "~" | "~>"
) || token.bytes().any(|ch| matches!(ch, b'-' | b'+'))
}
fn parse_or_if_present(input: &str) -> Option<Range> {
contains_or_fast(input).then(|| parse_or(input)).flatten()
}
fn parse_or(input: &str) -> Option<Range> {
let mut sets = Vec::new();
for part in input.split("||") {
let part = part.trim();
if part.is_empty() {
return None;
}
parse(part)?.append_bound_sets_to(&mut sets);
}
Range::from_bound_sets(sets)
}
fn parse_hyphen(input: &str, has_plus: bool) -> Option<Range> {
if has_plus {
return None;
}
let bytes = input.as_bytes();
let (lower, mut i) = parse_partial_loose(input, 0)?;
i = skip_spaces1(bytes, i)?;
if bytes.get(i) != Some(&b'-') {
return None;
}
i += 1;
i = skip_spaces1(bytes, i)?;
let (upper, i) = parse_partial_loose(input, i)?;
if i != bytes.len() {
return None;
}
BoundSet::new(
Bound::Lower(Predicate::Including(partial_to_version(lower))),
Bound::Upper(hyphen_upper(upper)),
)
.map(Range::from_bound_set)
}
fn hyphen_upper(partial: Partial) -> Predicate {
match partial {
Partial {
major: None,
minor: None,
patch: None,
..
} => Predicate::Excluding(Version::from((0, 0, 0, 0))),
Partial {
major: Some(major),
minor: None,
patch: None,
..
} => Predicate::Excluding(Version::from((major + 1, 0, 0, 0))),
Partial {
major: Some(major),
minor: Some(minor),
patch: None,
..
} => Predicate::Excluding(Version::from((major, minor + 1, 0, 0))),
partial => Predicate::Including(partial_to_version(partial)),
}
}
fn parse_comparator_set(input: &str, has_plus: bool) -> Option<Range> {
if has_plus {
return None;
}
let bytes = input.as_bytes();
let mut i = 0;
let mut current: Option<BoundSet> = None;
loop {
let (bound_set, next) = parse_comparator(input, i)?;
current = Some(if let Some(current) = current {
current.intersect(&bound_set)?
} else {
bound_set
});
i = next;
if i == bytes.len() {
break;
}
if !is_space(bytes.get(i).copied()) {
return None;
}
while is_space(bytes.get(i).copied()) {
i += 1;
}
if i == bytes.len() {
return None;
}
}
current.map(Range::from_bound_set)
}
fn parse_comparator(input: &str, start: usize) -> Option<(BoundSet, usize)> {
let bytes = input.as_bytes();
let (operation, mut i) = parse_operation(bytes, start)?;
while is_space(bytes.get(i).copied()) {
i += 1;
}
let (partial, i) = parse_partial_loose(input, i)?;
primitive_range(operation, partial).map(|bound| (bound, i))
}
fn parse_operation(bytes: &[u8], start: usize) -> Option<(Operation, usize)> {
match (bytes.get(start).copied()?, bytes.get(start + 1).copied()) {
(b'>', Some(b'=')) => Some((Operation::GreaterThanEquals, start + 2)),
(b'>', _) => Some((Operation::GreaterThan, start + 1)),
(b'=', _) => Some((Operation::Exact, start + 1)),
(b'<', Some(b'=')) => Some((Operation::LessThanEquals, start + 2)),
(b'<', _) => Some((Operation::LessThan, start + 1)),
_ => None,
}
}
fn primitive_range(operation: Operation, partial: Partial) -> Option<BoundSet> {
use Operation::*;
match (operation, partial) {
(GreaterThanEquals, partial) => {
BoundSet::at_least(Predicate::Including(partial_to_version(partial)))
}
(
GreaterThan,
Partial {
major: Some(major),
minor: Some(minor),
patch: None,
..
},
) => BoundSet::at_least(Predicate::Including(Version::from((major, minor + 1, 0)))),
(
GreaterThan,
Partial {
major: Some(major),
minor: None,
patch: None,
..
},
) => BoundSet::at_least(Predicate::Including(Version::from((major + 1, 0, 0)))),
(GreaterThan, partial) => {
BoundSet::at_least(Predicate::Excluding(partial_to_version(partial)))
}
(
LessThan,
Partial {
major: Some(major),
minor: Some(minor),
patch: None,
..
},
) => BoundSet::at_most(Predicate::Excluding(Version::from((major, minor, 0, 0)))),
(LessThan, partial) => BoundSet::at_most(Predicate::Excluding(partial_to_version(partial))),
(
LessThanEquals,
Partial {
major,
minor: None,
patch: None,
..
},
) => BoundSet::at_most(Predicate::Including(Version::from((
major.unwrap_or(0),
MAX_SAFE_INTEGER,
MAX_SAFE_INTEGER,
)))),
(
LessThanEquals,
Partial {
major,
minor,
patch: None,
..
},
) => BoundSet::at_most(Predicate::Including(Version::from((
major.unwrap_or(0),
minor.unwrap_or(0),
MAX_SAFE_INTEGER,
)))),
(LessThanEquals, partial) => {
BoundSet::at_most(Predicate::Including(partial_to_version(partial)))
}
(
Exact,
Partial {
major: Some(major),
minor: Some(minor),
patch: Some(patch),
pre_release,
},
) => BoundSet::exact(Version::new_with_identifiers(
major,
minor,
patch,
pre_release,
Identifiers::Empty,
)),
(
Exact,
Partial {
major: Some(major),
minor: Some(minor),
..
},
) => BoundSet::new(
Bound::Lower(Predicate::Including(Version::from((major, minor, 0)))),
Bound::Upper(Predicate::Excluding(Version::from((
major,
minor + 1,
0,
0,
)))),
),
(
Exact,
Partial {
major: Some(major), ..
},
) => BoundSet::new(
Bound::Lower(Predicate::Including(Version::from((major, 0, 0)))),
Bound::Upper(Predicate::Excluding(Version::from((major + 1, 0, 0, 0)))),
),
_ => None,
}
}
fn partial_to_version(partial: Partial) -> Version {
Version::new_with_identifiers(
partial.major.unwrap_or(0),
partial.minor.unwrap_or(0),
partial.patch.unwrap_or(0),
partial.pre_release,
Identifiers::Empty,
)
}
fn parse_caret(input: &str) -> Option<Range> {
let bytes = input.as_bytes();
let mut i = 1;
while matches!(
bytes.get(i),
Some(b' ' | b'\t' | b'\n' | b'\r' | 0x0B | 0x0C)
) {
i += 1;
}
let (partial, i) = parse_partial_loose(input, i)?;
if i != bytes.len() {
return None;
}
caret_range(partial).map(Range::from_bound_set)
}
fn parse_tilde(input: &str) -> Option<Range> {
let bytes = input.as_bytes();
let mut i = 1;
while is_space(bytes.get(i).copied()) {
i += 1;
}
if bytes.get(i) == Some(&b'>') {
i += 1;
while is_space(bytes.get(i).copied()) {
i += 1;
}
}
let (partial, i) = parse_partial_loose(input, i)?;
if i != bytes.len() {
return None;
}
tilde_range(partial).map(Range::from_bound_set)
}
fn parse_partial_wildcard(input: &str) -> Option<Range> {
let (partial, i) = parse_partial(input, 0)?;
if i != input.len() {
return None;
}
partial_wildcard_range(partial).map(Range::from_bound_set)
}
fn partial_wildcard_range(partial: Partial) -> Option<BoundSet> {
match partial {
Partial { major: None, .. } => {
BoundSet::at_least(Predicate::Including(Version::from((0, 0, 0))))
}
Partial {
major: Some(major),
minor: None,
..
} => BoundSet::new(
Bound::Lower(Predicate::Including(Version::from((major, 0, 0)))),
Bound::Upper(Predicate::Excluding(Version::from((major + 1, 0, 0, 0)))),
),
Partial {
major: Some(major),
minor: Some(minor),
patch: None,
..
} => BoundSet::new(
Bound::Lower(Predicate::Including(Version::from((major, minor, 0)))),
Bound::Upper(Predicate::Excluding(Version::from((
major,
minor + 1,
0,
0,
)))),
),
_ => None,
}
}
fn tilde_range(partial: Partial) -> Option<BoundSet> {
match partial {
Partial {
major: Some(major),
minor: None,
..
} => BoundSet::new(
Bound::Lower(Predicate::Including(Version::from((major, 0, 0)))),
Bound::Upper(Predicate::Excluding(Version::from((major + 1, 0, 0, 0)))),
),
Partial {
major: Some(major),
minor: Some(minor),
patch,
pre_release,
} => BoundSet::new(
Bound::Lower(Predicate::Including(Version::new_with_identifiers(
major,
minor,
patch.unwrap_or(0),
pre_release,
Identifiers::Empty,
))),
Bound::Upper(Predicate::Excluding(Version::from((
major,
minor + 1,
0,
0,
)))),
),
_ => None,
}
}
fn caret_range(partial: Partial) -> Option<BoundSet> {
match partial {
Partial {
major: Some(0),
minor: None,
patch: None,
..
} => BoundSet::at_most(Predicate::Excluding(Version::from((1, 0, 0, 0)))),
Partial {
major: Some(0),
minor: Some(minor),
patch: None,
..
} => BoundSet::new(
Bound::Lower(Predicate::Including(Version::from((0, minor, 0)))),
Bound::Upper(Predicate::Excluding(Version::from((0, minor + 1, 0, 0)))),
),
Partial {
major: Some(major),
minor: None,
patch: None,
..
} => BoundSet::new(
Bound::Lower(Predicate::Including(Version::from((major, 0, 0)))),
Bound::Upper(Predicate::Excluding(Version::from((major + 1, 0, 0, 0)))),
),
Partial {
major: Some(major),
minor: Some(minor),
patch: None,
..
} => BoundSet::new(
Bound::Lower(Predicate::Including(Version::from((major, minor, 0)))),
Bound::Upper(Predicate::Excluding(Version::from((major + 1, 0, 0, 0)))),
),
Partial {
major: Some(major),
minor: Some(minor),
patch: Some(patch),
pre_release,
} => BoundSet::new(
Bound::Lower(Predicate::Including(Version::new_with_identifiers(
major,
minor,
patch,
pre_release,
Identifiers::Empty,
))),
Bound::Upper(Predicate::Excluding(match (major, minor, patch) {
(0, 0, n) => Version::from((0, 0, n + 1, 0)),
(0, n, _) => Version::from((0, n + 1, 0, 0)),
(n, _, _) => Version::from((n + 1, 0, 0, 0)),
})),
),
_ => None,
}
}
fn parse_exact_version(bytes: &[u8]) -> Option<Version> {
let (major, mut i) = parse_number(bytes, 0)?;
if bytes.get(i) != Some(&b'.') {
return None;
}
i += 1;
let (minor, next) = parse_number(bytes, i)?;
i = next;
if bytes.get(i) != Some(&b'.') {
return None;
}
i += 1;
let (patch, i) = parse_number(bytes, i)?;
if i != bytes.len() {
return None;
}
Some(Version::from((major, minor, patch)))
}
fn parse_number(bytes: &[u8], start: usize) -> Option<(u64, usize)> {
let mut i = start;
let mut value = 0u64;
while let Some(ch @ b'0'..=b'9') = bytes.get(i).copied() {
value = value.checked_mul(10)?.checked_add(u64::from(ch - b'0'))?;
if value > MAX_SAFE_INTEGER {
return None;
}
i += 1;
}
(i > start).then_some((value, i))
}
fn is_space(ch: Option<u8>) -> bool {
ch.is_some_and(is_space_byte)
}
fn is_space_byte(ch: u8) -> bool {
matches!(ch, b' ' | b'\t' | b'\n' | b'\r' | 0x0B | 0x0C)
}
fn skip_spaces1(bytes: &[u8], start: usize) -> Option<usize> {
if !is_space(bytes.get(start).copied()) {
return None;
}
let mut i = start + 1;
while is_space(bytes.get(i).copied()) {
i += 1;
}
Some(i)
}
#[derive(Debug)]
struct Partial {
major: Option<u64>,
minor: Option<u64>,
patch: Option<u64>,
pre_release: Identifiers,
}
fn parse_partial(input: &str, start: usize) -> Option<(Partial, usize)> {
parse_partial_inner(input, start, false)
}
fn parse_partial_loose(input: &str, start: usize) -> Option<(Partial, usize)> {
parse_partial_inner(input, start, true)
}
fn parse_partial_inner(
input: &str,
start: usize,
allow_loose_suffix: bool,
) -> Option<(Partial, usize)> {
let bytes = input.as_bytes();
let mut i = start;
if bytes.get(i) == Some(&b'v') {
i += 1;
}
while matches!(
bytes.get(i),
Some(b' ' | b'\t' | b'\n' | b'\r' | 0x0B | 0x0C)
) {
i += 1;
}
let (major, next) = parse_component(bytes, i)?;
i = next;
let mut minor = None;
let mut patch = None;
let mut pre_release = Identifiers::Empty;
if bytes.get(i) == Some(&b'.') {
let (parsed_minor, next) = parse_component(bytes, i + 1)?;
minor = parsed_minor;
i = next;
if bytes.get(i) == Some(&b'.') {
let (parsed_patch, next) = parse_component(bytes, i + 1)?;
patch = parsed_patch;
i = next;
if patch.is_some() {
if bytes.get(i) == Some(&b'-') {
let (parsed_pre, next) = parse_identifiers(input, i + 1)?;
pre_release = parsed_pre;
i = next;
} else if allow_loose_suffix
&& bytes.get(i).is_some_and(|ch| ch.is_ascii_alphanumeric())
{
let (parsed_pre, next) = parse_identifiers(input, i)?;
pre_release = parsed_pre;
i = next;
}
if bytes.get(i) == Some(&b'+') {
let (_, next) = parse_identifiers(input, i + 1)?;
i = next;
}
}
}
}
Some((
Partial {
major,
minor,
patch,
pre_release,
},
i,
))
}
fn parse_component(bytes: &[u8], start: usize) -> Option<(Option<u64>, usize)> {
match bytes.get(start).copied()? {
b'x' | b'X' | b'*' => Some((None, start + 1)),
b'0'..=b'9' => parse_number(bytes, start).map(|(value, i)| (Some(value), i)),
_ => None,
}
}
fn parse_identifiers(input: &str, start: usize) -> Option<(Identifiers, usize)> {
let bytes = input.as_bytes();
let mut i = start;
let mut identifiers = Identifiers::Empty;
loop {
let ident_start = i;
while matches!(
bytes.get(i),
Some(b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-')
) {
i += 1;
}
if i == ident_start {
return None;
}
let ident = &input[ident_start..i];
if ident.bytes().all(|ch| ch.is_ascii_digit()) {
identifiers.push(match ident.parse::<u64>() {
Ok(value) => Identifier::Numeric(value),
Err(_) => Identifier::AlphaNumeric(ident.to_string()),
});
} else {
identifiers.push(Identifier::AlphaNumeric(ident.to_string()));
}
if bytes.get(i) != Some(&b'.') {
return Some((identifiers, i));
}
i += 1;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn short_word_accepts_only_inputs_up_to_one_word() {
assert!(ShortWord::new(b"").is_some());
assert!(ShortWord::new(b"1234567890123456").is_some());
assert!(ShortWord::new(b"12345678901234567").is_none());
}
#[test]
fn short_byte_search_ignores_zero_padding() {
assert!(!contains_byte_fast(b"abc", 0));
assert!(contains_byte_fast(b"a\0c", 0));
assert!(contains_byte_fast(b">=1.2.3", b'>'));
assert!(!contains_byte_fast(b">=1.2.3", b'+'));
}
#[test]
fn short_or_search_requires_adjacent_pipes() {
assert!(contains_or_fast("12345678901234||"));
assert!(contains_or_fast("^1 || ^2"));
assert!(!contains_or_fast("123456789012345|"));
assert!(!contains_or_fast("^1 | ^2"));
assert!(!contains_or_fast("|"));
assert!(!contains_or_fast(""));
}
#[test]
fn scanners_fall_back_for_long_inputs() {
assert!(contains_byte_fast(b"12345678901234567+meta", b'+'));
assert!(!contains_byte_fast(b"12345678901234567-meta", b'+'));
assert!(contains_or_fast("12345678901234567||next"));
assert!(!contains_or_fast("12345678901234567|next"));
}
#[test]
fn parses_tilde_ranges() {
let cases = [
("~1", ">=1.0.0 <2.0.0-0"),
("~1.2", ">=1.2.0 <1.3.0-0"),
("~1.2.3", ">=1.2.3 <1.3.0-0"),
("~1.2.3-beta", ">=1.2.3-beta <1.3.0-0"),
("~>3.2.1", ">=3.2.1 <3.3.0-0"),
("~> 1", ">=1.0.0 <2.0.0-0"),
("~ > 1.2.3", ">=1.2.3 <1.3.0-0"),
];
for (input, expected) in cases {
assert_eq!(parse(input).unwrap().to_string(), expected);
}
}
#[test]
fn parses_loose_prerelease_suffixes() {
let cases = [
("~1.2.3beta", ">=1.2.3-beta <1.3.0-0"),
("^1.0.0alpha", ">=1.0.0-alpha <2.0.0-0"),
];
for (input, expected) in cases {
assert_eq!(parse(input).unwrap().to_string(), expected);
}
assert!(parse("1.2.3beta").is_none());
}
#[test]
fn parses_simple_garbage_ranges() {
let cases = [
("1.2.3 foo", "1.2.3"),
("foo 1.2.3", "1.2.3"),
("~1.y 1.2.3", "1.2.3"),
("1.2.3 ~1.y", "1.2.3"),
];
for (input, expected) in cases {
assert_eq!(parse_garbage(input).unwrap().to_string(), expected);
}
}
#[test]
fn leaves_complex_garbage_ranges_to_fallback() {
assert!(parse_garbage("foo >= 1.2.3").is_none());
assert!(parse_garbage("foo 1.2.3-beta").is_none());
}
}