use super::bytes::{
find_bytes, is_base64_char, is_boundary, is_email_domain, is_email_local, is_uri_char,
starts_ci,
};
use super::decode::{parse_ipv4, parse_ipv6, parse_jwt};
use super::{to_u32, validate_input, DetectionError};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ByteSpan {
pub offset: u32,
pub len: u32,
}
pub fn span(offset: usize, len: usize) -> Result<ByteSpan, DetectionError> {
Ok(ByteSpan {
offset: to_u32(offset, "offset")?,
len: to_u32(len, "length")?,
})
}
#[must_use]
pub fn advance_while(input: &[u8], mut index: usize, pred: fn(u8) -> bool) -> usize {
while index < input.len() && pred(input[index]) {
index += 1;
}
index
}
#[must_use]
pub fn rewind_while(input: &[u8], mut index: usize, pred: fn(u8) -> bool) -> usize {
while index > 0 && pred(input[index - 1]) {
index -= 1;
}
index
}
pub fn push_run(
offsets: &mut Vec<u32>,
start: usize,
end: usize,
min: usize,
) -> Result<(), DetectionError> {
if end.saturating_sub(start) >= min {
offsets.push(to_u32(start, "offset")?);
}
Ok(())
}
pub fn run_offsets(
input: &[u8],
min_run_len: u32,
predicate: fn(&u8) -> bool,
) -> Result<Vec<u32>, DetectionError> {
validate_input(input)?;
let min = min_run_len as usize;
let mut out = Vec::new();
let mut start = None;
for (i, b) in input.iter().enumerate() {
match (predicate(b), start) {
(true, None) => start = Some(i),
(false, Some(s)) => {
if i - s >= min {
out.push(to_u32(s, "run_start")?);
out.push(to_u32(i, "run_end")?);
}
start = None;
}
_ => {}
}
}
if let Some(s) = start {
if input.len() - s >= min {
out.push(to_u32(s, "run_start")?);
out.push(to_u32(input.len(), "run_end")?);
}
}
Ok(out)
}
pub fn base64_run_offsets(input: &[u8], min_run_len: u32) -> Result<Vec<u32>, DetectionError> {
run_offsets(input, min_run_len, is_base64_char)
}
pub fn hex_run_offsets(input: &[u8], min_run_len: u32) -> Result<Vec<u32>, DetectionError> {
run_offsets(input, min_run_len, u8::is_ascii_hexdigit)
}
pub fn url_spans(input: &[u8]) -> Result<Vec<ByteSpan>, DetectionError> {
validate_input(input)?;
let mut spans = Vec::new();
let mut index = 0usize;
while index < input.len() {
let scheme_len = if starts_ci(input, index, b"http://") {
7
} else if starts_ci(input, index, b"https://") {
8
} else {
index += 1;
continue;
};
if !is_boundary(index.checked_sub(1).and_then(|i| input.get(i).copied())) {
index += 1;
continue;
}
let mut end = index + scheme_len;
let host_start = end;
while end < input.len() && is_uri_char(input[end]) {
end += 1;
}
if end > host_start && input[host_start..end].contains(&b'.') {
spans.push(span(index, end - index)?);
}
index = end.max(index + 1);
}
Ok(spans)
}
pub fn ipv4_spans(input: &[u8]) -> Result<Vec<ByteSpan>, DetectionError> {
validate_input(input)?;
let mut out = Vec::new();
for start in 0..input.len() {
if !is_boundary(start.checked_sub(1).and_then(|i| input.get(i).copied())) {
continue;
}
if let Some(len) = parse_ipv4(&input[start..]) {
if is_boundary(input.get(start + len).copied()) {
out.push(span(start, len)?);
}
}
}
Ok(out)
}
pub fn ipv6_spans(input: &[u8]) -> Result<Vec<ByteSpan>, DetectionError> {
validate_input(input)?;
let mut out = Vec::new();
for start in 0..input.len() {
if !is_boundary(start.checked_sub(1).and_then(|i| input.get(i).copied())) {
continue;
}
if let Some(len) = parse_ipv6(&input[start..]) {
if len >= 3 && is_boundary(input.get(start + len).copied()) {
out.push(span(start, len)?);
}
}
}
Ok(out)
}
pub fn email_spans(input: &[u8]) -> Result<Vec<ByteSpan>, DetectionError> {
validate_input(input)?;
let mut out = Vec::new();
for at in 1..input.len().saturating_sub(1) {
if input[at] != b'@' {
continue;
}
let start = rewind_while(input, at, is_email_local);
let end = advance_while(input, at + 1, is_email_domain);
if start < at
&& end > at + 3
&& input[at + 1..end].contains(&b'.')
&& is_boundary(start.checked_sub(1).and_then(|i| input.get(i).copied()))
&& is_boundary(input.get(end).copied())
{
out.push(span(start, end - start)?);
}
}
Ok(out)
}
#[must_use]
pub fn matches_uuid(bytes: &[u8]) -> bool {
for (index, &byte) in bytes.iter().enumerate() {
if matches!(index, 8 | 13 | 18 | 23) {
if byte != b'-' {
return false;
}
} else if !byte.is_ascii_hexdigit() {
return false;
}
}
true
}
pub fn uuid_spans(input: &[u8]) -> Result<Vec<ByteSpan>, DetectionError> {
validate_input(input)?;
let mut out = Vec::new();
for start in 0..input.len().saturating_sub(35) {
if matches_uuid(&input[start..start + 36])
&& is_boundary(start.checked_sub(1).and_then(|i| input.get(i).copied()))
&& is_boundary(input.get(start + 36).copied())
{
out.push(span(start, 36)?);
}
}
Ok(out)
}
pub fn jwt_spans(input: &[u8]) -> Result<Vec<ByteSpan>, DetectionError> {
validate_input(input)?;
let mut out = Vec::new();
for start in 0..input.len() {
if !is_boundary(start.checked_sub(1).and_then(|i| input.get(i).copied())) {
continue;
}
if let Some(len) = parse_jwt(&input[start..]) {
if is_boundary(input.get(start + len).copied()) {
out.push(span(start, len)?);
}
}
}
Ok(out)
}
pub fn pem_spans(input: &[u8]) -> Result<Vec<ByteSpan>, DetectionError> {
validate_input(input)?;
let mut out = Vec::new();
let mut index = 0usize;
while let Some(begin_rel) = find_bytes(&input[index..], b"-----BEGIN ") {
let begin = index + begin_rel;
let label_start = begin + 11;
let Some(label_end_rel) = find_bytes(&input[label_start..], b"-----") else {
break;
};
let label_end = label_start + label_end_rel;
let label = &input[label_start..label_end];
if label.is_empty() || !label.iter().all(|b| b.is_ascii_uppercase() || *b == b' ') {
index = label_start;
continue;
}
let mut end_marker = b"-----END ".to_vec();
end_marker.extend_from_slice(label);
end_marker.extend_from_slice(b"-----");
if let Some(end_rel) = find_bytes(&input[label_end..], &end_marker) {
let end = label_end + end_rel + end_marker.len();
out.push(span(begin, end - begin)?);
index = end;
} else {
index = label_end;
}
}
Ok(out)
}