use crate::fs::FileSystem;
use crate::parsers::frontmatter::FrontmatterParts;
use std::collections::HashSet;
use std::path::Path;
use super::{
PathMatch, SkillFrontmatter, reference_path_regex, windows_path_regex, windows_path_token_regex,
};
pub(super) fn parse_frontmatter_fields(
frontmatter: &str,
) -> Result<SkillFrontmatter, serde_yaml::Error> {
if frontmatter.trim().is_empty() {
return Ok(SkillFrontmatter::default());
}
serde_yaml::from_str(frontmatter)
}
pub(super) fn extract_reference_paths(body: &str) -> Vec<PathMatch> {
let re = reference_path_regex();
let mut paths = Vec::new();
let mut seen = HashSet::new();
for m in re.find_iter(body) {
#[allow(clippy::collapsible_if)]
if let Some((trimmed, delta)) = trim_path_token_with_offset(m.as_str()) {
if seen.insert(trimmed.clone()) {
paths.push(PathMatch {
path: trimmed,
start: m.start() + delta,
});
}
}
}
paths
}
pub(super) fn is_regex_escape(s: &str) -> bool {
static REGEX_ESCAPE_CHARS: &[char] = &[
'n', 's', 'd', 'w', 't', 'r', 'b', '|', '.', '/', '$', '^', '+', '*', '?', '{', '}', '[',
']', '(', ')', 'S', 'D', 'W', 'B', 'K', 'A', 'Z', 'z', 'G', 'p', 'P', 'x', 'X',
];
let parts: Vec<&str> = s.split('\\').collect();
if parts.len() < 2 {
return false;
}
let regex_like_count = parts[1..]
.iter()
.filter(|part| {
part.chars()
.next()
.map(|c| REGEX_ESCAPE_CHARS.contains(&c))
.unwrap_or(false)
})
.count();
regex_like_count > 0 && regex_like_count >= (parts.len() - 1) / 2
}
pub(super) fn extract_windows_paths(body: &str) -> Vec<PathMatch> {
let re = windows_path_regex();
let token_re = windows_path_token_regex();
let mut paths = Vec::new();
let mut seen = HashSet::new();
for m in re.find_iter(body) {
if let Some((trimmed, delta)) = trim_path_token_with_offset(m.as_str()) {
if is_regex_escape(&trimmed) {
continue;
}
if seen.insert(trimmed.clone()) {
paths.push(PathMatch {
path: trimmed,
start: m.start() + delta,
});
}
}
}
for m in token_re.find_iter(body) {
if let Some((trimmed, delta)) = trim_path_token_with_offset(m.as_str()) {
if is_regex_escape(&trimmed) {
continue;
}
if seen.insert(trimmed.clone()) {
paths.push(PathMatch {
path: trimmed,
start: m.start() + delta,
});
}
}
}
paths
}
pub(super) fn reference_path_too_deep(path: &str) -> bool {
let normalized = path.replace('\\', "/");
let mut parts = normalized.split('/').filter(|part| !part.is_empty());
let Some(prefix) = parts.next() else {
return false;
};
if !prefix.eq_ignore_ascii_case("references")
&& !prefix.eq_ignore_ascii_case("reference")
&& !prefix.eq_ignore_ascii_case("refs")
{
return false;
}
if prefix.eq_ignore_ascii_case("refs") {
if let Some(second) = parts.next() {
if second.eq_ignore_ascii_case("remotes")
|| second.eq_ignore_ascii_case("heads")
|| second.eq_ignore_ascii_case("tags")
|| second.eq_ignore_ascii_case("stash")
{
return false; }
}
let parts = normalized.split('/').filter(|part| !part.is_empty());
return parts.skip(1).count() > 1;
}
parts.count() > 1
}
pub(super) fn trim_path_token(token: &str) -> &str {
token
.trim_start_matches(['(', '[', '{', '<', '"', '\''])
.trim_end_matches(['.', ',', ';', ':', ')', ']', '}', '>', '"', '\''])
}
pub(super) fn trim_path_token_with_offset(token: &str) -> Option<(String, usize)> {
let trimmed = trim_path_token(token);
if trimmed.is_empty() {
return None;
}
let offset = token.find(trimmed).unwrap_or(0);
Some((trimmed.to_string(), offset))
}
pub(super) fn compute_line_starts(content: &str) -> Vec<usize> {
let mut starts = vec![0];
for (idx, ch) in content.char_indices() {
if ch == '\n' {
starts.push(idx + 1);
}
}
starts
}
pub(super) fn line_col_at(offset: usize, line_starts: &[usize]) -> (usize, usize) {
let mut low = 0usize;
let mut high = line_starts.len();
while low + 1 < high {
let mid = (low + high) / 2;
if line_starts[mid] <= offset {
low = mid;
} else {
high = mid;
}
}
let line_start = line_starts[low];
(low + 1, offset.saturating_sub(line_start) + 1)
}
pub(super) fn frontmatter_key_line_col(
parts: &FrontmatterParts,
key: &str,
line_starts: &[usize],
) -> (usize, usize) {
let offset = frontmatter_key_offset(&parts.frontmatter, key)
.map(|local| parts.frontmatter_start + local)
.unwrap_or(parts.frontmatter_start);
line_col_at(offset, line_starts)
}
pub(super) fn frontmatter_key_offset(frontmatter: &str, key: &str) -> Option<usize> {
let mut offset = 0usize;
let bytes = frontmatter.as_bytes();
for line in frontmatter.lines() {
let trimmed = line.trim_start();
if trimmed.starts_with('#') || trimmed.is_empty() {
let line_end = offset + line.len();
if line_end < bytes.len() {
if bytes[line_end] == b'\n' {
offset = line_end + 1; } else if line_end + 1 < bytes.len()
&& bytes[line_end] == b'\r'
&& bytes[line_end + 1] == b'\n'
{
offset = line_end + 2; } else {
offset = line_end; }
} else {
offset = line_end; }
continue;
}
if let Some(rest) = trimmed.strip_prefix(key) {
if rest.trim_start().starts_with(':') {
let column = line.len() - trimmed.len();
return Some(offset + column);
}
}
let line_end = offset + line.len();
if line_end < bytes.len() {
if bytes[line_end] == b'\n' {
offset = line_end + 1; } else if line_end + 1 < bytes.len()
&& bytes[line_end] == b'\r'
&& bytes[line_end + 1] == b'\n'
{
offset = line_end + 2; } else {
offset = line_end; }
} else {
offset = line_end; }
}
None
}
pub(super) fn frontmatter_value_byte_range(
_content: &str,
parts: &FrontmatterParts,
key: &str,
) -> Option<(usize, usize)> {
let frontmatter = &parts.frontmatter;
let mut offset = 0usize;
let bytes = frontmatter.as_bytes();
for line in frontmatter.lines() {
let trimmed = line.trim_start();
if trimmed.starts_with('#') || trimmed.is_empty() {
let line_end = offset + line.len();
if line_end < bytes.len() {
if bytes[line_end] == b'\n' {
offset = line_end + 1; } else if line_end + 1 < bytes.len()
&& bytes[line_end] == b'\r'
&& bytes[line_end + 1] == b'\n'
{
offset = line_end + 2; } else {
offset = line_end; }
} else {
offset = line_end; }
continue;
}
if let Some(rest) = trimmed.strip_prefix(key) {
if let Some(after_colon) = rest.trim_start().strip_prefix(':') {
let leading_ws = line.len() - trimmed.len();
let ws_after_key = rest.len() - rest.trim_start().len();
let key_end = leading_ws + key.len() + ws_after_key + 1;
let value_str = after_colon.trim_start();
if value_str.is_empty() {
return None;
}
let value_offset_in_line = key_end + (after_colon.len() - value_str.len());
let (value_start, value_len) = if let Some(inner) = value_str.strip_prefix('"') {
if let Some(end_quote) = inner.find('"') {
(value_offset_in_line + 1, end_quote) } else {
return None;
}
} else if let Some(inner) = value_str.strip_prefix('\'') {
if let Some(end_quote) = inner.find('\'') {
(value_offset_in_line + 1, end_quote) } else {
return None;
}
} else {
let value_end = value_str
.find(" #")
.or_else(|| value_str.find("\t#"))
.unwrap_or(value_str.len());
(value_offset_in_line, value_end)
};
let abs_start = parts.frontmatter_start + offset + value_start;
let abs_end = abs_start + value_len;
return Some((abs_start, abs_end));
}
}
let line_end = offset + line.len();
if line_end < bytes.len() {
if bytes[line_end] == b'\n' {
offset = line_end + 1; } else if line_end + 1 < bytes.len()
&& bytes[line_end] == b'\r'
&& bytes[line_end + 1] == b'\n'
{
offset = line_end + 2; } else {
offset = line_end; }
} else {
offset = line_end; }
}
None
}
pub(super) fn frontmatter_key_line_byte_range(
content: &str,
parts: &FrontmatterParts,
key: &str,
) -> Option<(usize, usize)> {
let local_key_start = frontmatter_key_offset(&parts.frontmatter, key)?;
let abs_start = parts.frontmatter_start + local_key_start;
if abs_start >= content.len() {
return None;
}
let bytes = content.as_bytes();
let mut end = abs_start;
while end < content.len() && bytes[end] != b'\n' {
end += 1;
}
if end < content.len() {
end += 1;
}
Some((abs_start, end))
}
pub(super) fn directory_size_until(path: &Path, max_bytes: u64, fs: &dyn FileSystem) -> u64 {
let mut total = 0u64;
let mut stack = vec![path.to_path_buf()];
while let Some(current) = stack.pop() {
let entries = match fs.read_dir(¤t) {
Ok(entries) => entries,
Err(_) => continue,
};
for entry in entries {
if entry.metadata.is_symlink {
continue;
}
if entry.metadata.is_dir {
stack.push(entry.path.clone());
continue;
}
if entry.metadata.is_file {
total = total.saturating_add(entry.metadata.len);
if total > max_bytes {
return total;
}
}
}
}
total
}