#![allow(clippy::module_name_repetitions)]
use crate::Error;
#[derive(Debug, Clone, PartialEq, Eq)]
enum Segment {
Literal(String),
Set(Vec<String>),
NumericRange {
start: i64,
end: i64,
step: i64,
width: usize,
},
AlphaRange { start: char, end: char, step: u32 },
}
impl Segment {
fn count(&self) -> usize {
match self {
Self::Literal(_) => 1,
Self::Set(items) => items.len(),
Self::NumericRange { start, end, step, .. } => {
if *step == 0 {
return 0;
}
let range = start.abs_diff(*end);
#[allow(clippy::cast_possible_truncation)]
let count = (range / step.unsigned_abs()) as usize + 1;
count
}
Self::AlphaRange { start, end, step } => {
if *step == 0 {
return 0;
}
let range = (*start as u32).abs_diff(*end as u32);
(range / step) as usize + 1
}
}
}
fn value_at(&self, index: usize) -> String {
match self {
Self::Literal(s) => s.clone(),
Self::Set(items) => items[index].clone(),
Self::NumericRange { start, step, width, .. } => {
#[allow(clippy::cast_possible_wrap)]
let val = start + (index as i64) * step;
if *width > 0 {
format!("{val:0>width$}", width = *width)
} else {
val.to_string()
}
}
Self::AlphaRange { start, step, .. } => {
#[allow(clippy::cast_possible_truncation)]
let val = *start as u32 + (index as u32) * step;
char::from_u32(val).map_or_else(|| "?".to_string(), |c| c.to_string())
}
}
}
}
const MAX_GLOB_SEGMENTS: usize = 255;
fn glob_truncate_url(pattern: &str, display_pos: usize) -> String {
let header = format!("too many {{}} sets in URL position {display_pos}:\n");
let header_len = header.len();
let max_url_len = 511_usize.saturating_sub(header_len);
pattern.chars().take(max_url_len).collect()
}
#[allow(clippy::too_many_lines)]
fn parse_glob(pattern: &str) -> Result<Vec<Segment>, Error> {
let mut segments = Vec::new();
let mut chars = pattern.chars().peekable();
let mut literal = String::new();
let mut pos: usize = 0;
let mut segment_count: usize = 0;
while let Some(&ch) = chars.peek() {
match ch {
'\\' => {
let _ = chars.next(); pos += 1;
if let Some(&next_ch) = chars.peek() {
if matches!(next_ch, '{' | '}' | '[' | ']') {
literal.push(next_ch);
let _ = chars.next();
pos += next_ch.len_utf8();
} else {
literal.push('\\');
}
} else {
literal.push('\\');
}
}
'{' => {
if !literal.is_empty() {
segment_count += 1;
if segment_count > MAX_GLOB_SEGMENTS {
let display_pos = pos + 1;
let truncated = glob_truncate_url(pattern, display_pos);
return Err(Error::UrlGlob {
message: format!("too many {{}} sets in URL position {display_pos}:"),
url: truncated,
position: pos,
});
}
segments.push(Segment::Literal(std::mem::take(&mut literal)));
}
let _ = chars.next(); pos += 1;
let (set, consumed) = parse_set_with_len(&mut chars)?;
pos += consumed;
segment_count += 1;
if segment_count > MAX_GLOB_SEGMENTS {
let display_pos = pos + 1;
let truncated = glob_truncate_url(pattern, display_pos);
return Err(Error::UrlGlob {
message: format!("too many {{}} sets in URL position {display_pos}:"),
url: truncated,
position: pos,
});
}
segments.push(set);
}
'[' => {
#[allow(clippy::unused_peekable)]
let treat_as_literal = {
let mut scan = chars.clone();
let _ = scan.next(); let mut bracket_content = String::new();
let mut found_close = false;
for c in scan {
if c == ']' {
found_close = true;
break;
}
bracket_content.push(c);
}
if !found_close {
false } else if bracket_content.is_empty() {
true } else {
let colon_count = bracket_content.chars().filter(|&c| c == ':').count();
bracket_content.starts_with(':')
|| colon_count >= 2
|| bracket_content.contains('%')
}
};
if treat_as_literal {
literal.push('[');
let _ = chars.next(); pos += 1;
while let Some(&c) = chars.peek() {
literal.push(c);
let _ = chars.next();
pos += c.len_utf8();
if c == ']' {
break;
}
}
} else {
if !literal.is_empty() {
segment_count += 1;
if segment_count > MAX_GLOB_SEGMENTS {
let display_pos = pos + 1;
let truncated = glob_truncate_url(pattern, display_pos);
return Err(Error::UrlGlob {
message: format!("too many [] sets in URL position {display_pos}:"),
url: truncated,
position: pos,
});
}
segments.push(Segment::Literal(std::mem::take(&mut literal)));
}
let open_pos = pos;
let _ = chars.next(); pos += 1;
let (range, consumed) = parse_range_with_len(&mut chars, pattern, open_pos)?;
pos += consumed;
segment_count += 1;
if segment_count > MAX_GLOB_SEGMENTS {
let display_pos = pos + 1;
let truncated = glob_truncate_url(pattern, display_pos);
return Err(Error::UrlGlob {
message: format!("too many [] sets in URL position {display_pos}:"),
url: truncated,
position: pos,
});
}
segments.push(range);
}
}
_ => {
literal.push(ch);
let _ = chars.next();
pos += ch.len_utf8();
}
}
}
if !literal.is_empty() {
segments.push(Segment::Literal(literal));
}
Ok(segments)
}
fn parse_set_with_len(
chars: &mut std::iter::Peekable<std::str::Chars<'_>>,
) -> Result<(Segment, usize), Error> {
let mut items = Vec::new();
let mut current = String::new();
let mut depth = 1;
let mut consumed: usize = 0;
for ch in chars.by_ref() {
consumed += ch.len_utf8();
match ch {
'{' => {
depth += 1;
current.push(ch);
}
'}' => {
depth -= 1;
if depth == 0 {
items.push(current);
if items.is_empty() {
return Err(Error::Http("empty glob set {}".to_string()));
}
return Ok((Segment::Set(items), consumed - 1));
}
current.push(ch);
}
',' if depth == 1 => {
items.push(std::mem::take(&mut current));
}
_ => {
current.push(ch);
}
}
}
Err(Error::Http("unclosed glob set '{'".to_string()))
}
fn parse_range_with_len(
chars: &mut std::iter::Peekable<std::str::Chars<'_>>,
url: &str,
open_pos: usize,
) -> Result<(Segment, usize), Error> {
let mut content = String::new();
let mut consumed: usize = 0;
for ch in chars.by_ref() {
consumed += ch.len_utf8();
if ch == ']' {
let end_pos = open_pos + 1 + consumed;
let seg = parse_range_content_with_pos(&content, url, end_pos)?;
return Ok((seg, consumed));
}
content.push(ch);
}
Err(Error::Http("unclosed glob range '['".to_string()))
}
fn parse_range_content_with_pos(
content: &str,
url: &str,
end_pos: usize,
) -> Result<Segment, Error> {
let (range_part, step_str) = content.rfind(':').map_or((content, None), |colon_pos| {
(&content[..colon_pos], Some(&content[colon_pos + 1..]))
});
let dash_pos = find_range_dash(range_part)
.ok_or_else(|| Error::Http(format!("invalid glob range: [{content}]")))?;
let start_str = &range_part[..dash_pos];
let end_str = &range_part[dash_pos + 1..];
if start_str.len() == 1 && end_str.len() == 1 {
let start_ch = start_str.chars().next().unwrap_or('a');
let end_ch = end_str.chars().next().unwrap_or('z');
if start_ch.is_ascii_alphabetic() && end_ch.is_ascii_alphabetic() {
let step = step_str
.map(str::parse::<u32>)
.transpose()
.map_err(|_| Error::Http(format!("invalid glob range step: [{content}]")))?
.unwrap_or(1);
if start_ch > end_ch {
return Err(bad_range_error(url, end_pos));
}
return Ok(Segment::AlphaRange { start: start_ch, end: end_ch, step });
}
}
let start: i64 = start_str
.parse()
.map_err(|_| Error::Http(format!("invalid glob range start: [{content}]")))?;
let end: i64 =
end_str.parse().map_err(|_| Error::Http(format!("invalid glob range end: [{content}]")))?;
let width = if start_str.len() > 1 && start_str.starts_with('0') {
start_str.len()
} else if end_str.len() > 1 && end_str.starts_with('0') {
end_str.len()
} else {
0
};
let step = step_str
.map(str::parse::<i64>)
.transpose()
.map_err(|_| Error::Http(format!("invalid glob range step: [{content}]")))?
.unwrap_or(1);
if step == 0 {
return Err(Error::Http("glob range step cannot be zero".to_string()));
}
if (step > 0 && start > end) || (step < 0 && start < end) {
return Err(bad_range_error(url, end_pos));
}
Ok(Segment::NumericRange { start, end, step, width })
}
fn bad_range_error(url: &str, end_pos: usize) -> Error {
if url.is_empty() {
return Error::UrlGlob {
message: "bad range in URL".to_string(),
url: String::new(),
position: 0,
};
}
let display_pos = end_pos + 1;
Error::UrlGlob {
message: format!("bad range in URL position {display_pos}:"),
url: url.to_string(),
position: end_pos,
}
}
fn find_range_dash(s: &str) -> Option<usize> {
let start = usize::from(s.starts_with('-'));
s[start..].find('-').map(|pos| pos + start)
}
const MAX_EXPANSION: usize = 100_000;
pub fn expand_glob(pattern: &str) -> Result<Vec<String>, Error> {
let segments = parse_glob(pattern)?;
if segments.iter().all(|s| matches!(s, Segment::Literal(_))) {
let joined: String = segments.iter().map(|s| s.value_at(0)).collect();
return Ok(vec![joined]);
}
let total: usize = segments.iter().map(Segment::count).product();
if total > MAX_EXPANSION {
return Err(Error::Http(format!(
"glob expansion too large: {total} URLs (max {MAX_EXPANSION})"
)));
}
let mut results = Vec::with_capacity(total);
let counts: Vec<usize> = segments.iter().map(Segment::count).collect();
let mut indices = vec![0usize; segments.len()];
for _ in 0..total {
let mut url = String::new();
for (seg_idx, segment) in segments.iter().enumerate() {
url.push_str(&segment.value_at(indices[seg_idx]));
}
results.push(url);
let mut carry = true;
for i in (0..indices.len()).rev() {
if carry {
indices[i] += 1;
if indices[i] >= counts[i] {
indices[i] = 0;
} else {
carry = false;
}
}
}
}
Ok(results)
}
pub fn expand_glob_with_values(pattern: &str) -> Result<Vec<(String, Vec<String>)>, Error> {
let segments = parse_glob(pattern)?;
let glob_segment_indices: Vec<usize> = segments
.iter()
.enumerate()
.filter(|(_, s)| !matches!(s, Segment::Literal(_)))
.map(|(i, _)| i)
.collect();
if glob_segment_indices.is_empty() {
let joined: String = segments.iter().map(|s| s.value_at(0)).collect();
return Ok(vec![(joined, Vec::new())]);
}
let total: usize = segments.iter().map(Segment::count).product();
if total > MAX_EXPANSION {
return Err(Error::Http(format!(
"glob expansion too large: {total} URLs (max {MAX_EXPANSION})"
)));
}
let mut results = Vec::with_capacity(total);
let counts: Vec<usize> = segments.iter().map(Segment::count).collect();
let mut indices = vec![0usize; segments.len()];
for _ in 0..total {
let mut url = String::new();
for (seg_idx, segment) in segments.iter().enumerate() {
url.push_str(&segment.value_at(indices[seg_idx]));
}
let values: Vec<String> =
glob_segment_indices.iter().map(|&i| segments[i].value_at(indices[i])).collect();
results.push((url, values));
let mut carry = true;
for i in (0..indices.len()).rev() {
if carry {
indices[i] += 1;
if indices[i] >= counts[i] {
indices[i] = 0;
} else {
carry = false;
}
}
}
}
Ok(results)
}
pub fn glob_pattern_count(pattern: &str) -> Result<usize, Error> {
let segments = parse_glob(pattern)?;
Ok(segments.iter().filter(|s| !matches!(s, Segment::Literal(_))).count())
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used, clippy::expect_used)]
use super::*;
#[test]
fn set_expansion_basic() {
let urls = expand_glob("http://example.com/{a,b,c}").unwrap();
assert_eq!(
urls,
vec!["http://example.com/a", "http://example.com/b", "http://example.com/c",]
);
}
#[test]
fn set_expansion_multiple_sets() {
let urls = expand_glob("http://{a,b}.example.com/{x,y}").unwrap();
assert_eq!(
urls,
vec![
"http://a.example.com/x",
"http://a.example.com/y",
"http://b.example.com/x",
"http://b.example.com/y",
]
);
}
#[test]
fn set_expansion_single_item() {
let urls = expand_glob("http://example.com/{only}").unwrap();
assert_eq!(urls, vec!["http://example.com/only"]);
}
#[test]
fn set_expansion_empty_items() {
let urls = expand_glob("http://example.com/{a,,c}").unwrap();
assert_eq!(
urls,
vec!["http://example.com/a", "http://example.com/", "http://example.com/c",]
);
}
#[test]
fn numeric_range_basic() {
let urls = expand_glob("http://example.com/[1-5]").unwrap();
assert_eq!(
urls,
vec![
"http://example.com/1",
"http://example.com/2",
"http://example.com/3",
"http://example.com/4",
"http://example.com/5",
]
);
}
#[test]
fn numeric_range_with_step() {
let urls = expand_glob("http://example.com/[0-10:5]").unwrap();
assert_eq!(
urls,
vec!["http://example.com/0", "http://example.com/5", "http://example.com/10",]
);
}
#[test]
fn numeric_range_zero_padded() {
let urls = expand_glob("http://example.com/[01-03]").unwrap();
assert_eq!(
urls,
vec!["http://example.com/01", "http://example.com/02", "http://example.com/03",]
);
}
#[test]
fn numeric_range_single_value() {
let urls = expand_glob("http://example.com/[5-5]").unwrap();
assert_eq!(urls, vec!["http://example.com/5"]);
}
#[test]
fn alpha_range_basic() {
let urls = expand_glob("http://example.com/[a-d]").unwrap();
assert_eq!(
urls,
vec![
"http://example.com/a",
"http://example.com/b",
"http://example.com/c",
"http://example.com/d",
]
);
}
#[test]
fn alpha_range_with_step() {
let urls = expand_glob("http://example.com/[a-g:2]").unwrap();
assert_eq!(
urls,
vec![
"http://example.com/a",
"http://example.com/c",
"http://example.com/e",
"http://example.com/g",
]
);
}
#[test]
fn alpha_range_uppercase() {
let urls = expand_glob("http://example.com/[A-C]").unwrap();
assert_eq!(
urls,
vec!["http://example.com/A", "http://example.com/B", "http://example.com/C",]
);
}
#[test]
fn combined_set_and_range() {
let urls = expand_glob("http://{foo,bar}.com/[1-2]").unwrap();
assert_eq!(
urls,
vec!["http://foo.com/1", "http://foo.com/2", "http://bar.com/1", "http://bar.com/2",]
);
}
#[test]
fn no_glob_returns_original() {
let urls = expand_glob("http://example.com/path").unwrap();
assert_eq!(urls, vec!["http://example.com/path"]);
}
#[test]
fn unclosed_brace_error() {
let err = expand_glob("http://example.com/{a,b").unwrap_err();
assert!(err.to_string().contains("unclosed"));
}
#[test]
fn unclosed_bracket_error() {
let err = expand_glob("http://example.com/[1-5").unwrap_err();
assert!(err.to_string().contains("unclosed"));
}
#[test]
fn zero_step_error() {
let err = expand_glob("http://example.com/[1-5:0]").unwrap_err();
assert!(err.to_string().contains("zero"));
}
#[test]
fn invalid_range_error() {
let err = expand_glob("http://example.com/[abc]").unwrap_err();
assert!(err.to_string().contains("invalid"));
}
#[test]
fn pattern_count_none() {
assert_eq!(glob_pattern_count("http://example.com/").unwrap(), 0);
}
#[test]
fn pattern_count_one() {
assert_eq!(glob_pattern_count("http://example.com/{a,b}").unwrap(), 1);
}
#[test]
fn pattern_count_two() {
assert_eq!(glob_pattern_count("http://{a,b}.com/[1-3]").unwrap(), 2);
}
#[test]
fn large_expansion_capped() {
let err = expand_glob("http://[0-999]/[0-999]").unwrap_err();
assert!(err.to_string().contains("too large"));
}
#[test]
fn literal_braces_in_url() {
let err = expand_glob("http://example.com/{unclosed").unwrap_err();
assert!(err.to_string().contains("unclosed"));
}
#[test]
fn numeric_range_step_2() {
let urls = expand_glob("http://example.com/[1-9:3]").unwrap();
assert_eq!(
urls,
vec!["http://example.com/1", "http://example.com/4", "http://example.com/7",]
);
}
}