use std::collections::HashMap;
use std::sync::LazyLock;
use regex::Regex;
use serde_json::Value;
use crate::link_provenance;
use crate::sanitize::sanitize_url;
use crate::types::{Config, Link, Macro, Tier};
use crate::validate::validate_regex;
const BLOCKED_KEYS: &[&str] = &[
"__proto__",
"constructor",
"prototype",
"__class__",
"__bases__",
"__mro__",
"__subclasses__",
];
const LINK_FIELDS: &[&str] = &[
"url",
"label",
"tags",
"cssClass",
"image",
"altText",
"targetWindow",
"description",
"thumbnail",
"hooks",
"guid",
"createdAt",
"meta",
];
static META_URL_KEY_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)url$").expect("META_URL_KEY_RE is a valid regex"));
#[derive(Debug, Clone)]
pub struct ValidateOptions {
pub provenance: Tier,
}
impl Default for ValidateOptions {
fn default() -> Self {
Self {
provenance: Tier::Author,
}
}
}
pub fn validate_config(value: Value) -> Result<Config, String> {
validate_config_with_options(value, ValidateOptions::default())
}
pub fn validate_config_with_options(
value: Value,
opts: ValidateOptions,
) -> Result<Config, String> {
if !opts.provenance.is_valid() {
return Err(format!(
"invalid ValidateOptions.provenance: {:?}",
opts.provenance
));
}
let obj = match value {
Value::Object(map) => map,
_ => return Err("Invalid config: expected an object".into()),
};
let hook_allowlist: Option<Vec<String>> = obj
.get("settings")
.and_then(Value::as_object)
.and_then(|s| s.get("hooks"))
.and_then(Value::as_array)
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
});
let raw_links = match obj.get("allLinks") {
Some(Value::Object(map)) => map,
Some(Value::Array(_)) | None => {
return Err("Invalid config: allLinks must be a non-null object".into());
}
Some(_) => {
return Err("Invalid config: allLinks must be a non-null object".into());
}
};
let mut all_links: HashMap<String, Link> = HashMap::new();
for (key, link_val) in raw_links {
if BLOCKED_KEYS.contains(&key.as_str()) {
continue;
}
if key.contains('-') {
eprintln!(
"validateConfig: skipping allLinks[\"{key}\"] \
— hyphens are not allowed in item IDs. Use underscores instead. \
The \"-\" character is the WITHOUT operator in expressions."
);
continue;
}
let raw_link = match link_val {
Value::Object(map) => map,
_ => {
eprintln!(
"validateConfig: skipping allLinks[\"{key}\"] — not a valid link object"
);
continue;
}
};
let url = match raw_link.get("url") {
Some(Value::String(s)) => s.clone(),
_ => {
eprintln!(
"validateConfig: skipping allLinks[\"{key}\"] — missing or invalid url"
);
continue;
}
};
let tags = match raw_link.get("tags") {
Some(Value::Array(arr)) => {
let mut filtered = Vec::new();
for t in arr {
if let Value::String(s) = t {
if s.contains('-') {
eprintln!(
"validateConfig: allLinks[\"{key}\"] \
— stripping tag \"{s}\" (hyphens not allowed in tags). \
Use underscores instead."
);
} else {
filtered.push(s.clone());
}
}
}
filtered
}
Some(_) => {
eprintln!("validateConfig: allLinks[\"{key}\"].tags is not an array — ignoring");
Vec::new()
}
None => Vec::new(),
};
let label = string_field(raw_link, "label");
let css_class = string_field(raw_link, "cssClass");
let image = string_field(raw_link, "image");
let alt_text = string_field(raw_link, "altText");
let target_window = string_field(raw_link, "targetWindow");
let description = string_field(raw_link, "description");
let thumbnail = string_field(raw_link, "thumbnail");
let guid = string_field(raw_link, "guid");
let created_at = raw_link.get("createdAt").cloned();
let hooks = match raw_link.get("hooks") {
Some(Value::Array(arr)) => {
let string_hooks: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
tier_aware_hooks(&opts.provenance, hook_allowlist.as_ref(), &string_hooks, key)
}
_ => None,
};
let meta = match raw_link.get("meta") {
Some(Value::Object(meta_map)) => {
let mut out = HashMap::new();
for (mk, mv) in meta_map {
if BLOCKED_KEYS.contains(&mk.as_str()) {
continue;
}
out.insert(mk.clone(), mv.clone());
}
Some(out)
}
_ => None,
};
for field_key in raw_link.keys() {
if !LINK_FIELDS.contains(&field_key.as_str()) {
eprintln!(
"validateConfig: allLinks[\"{key}\"] — ignoring unknown field \"{field_key}\""
);
}
}
let mut link = Link {
url,
label,
tags,
css_class,
image,
alt_text,
target_window,
description,
thumbnail,
hooks,
guid,
created_at,
meta,
provenance: None,
};
sanitize_link_urls(&mut link);
link_provenance::must_stamp(&mut link, opts.provenance.clone());
all_links.insert(key.clone(), link);
}
let settings = match obj.get("settings") {
Some(Value::Object(map)) => {
let mut out = HashMap::new();
for (k, v) in map {
if !BLOCKED_KEYS.contains(&k.as_str()) {
out.insert(k.clone(), v.clone());
}
}
out
}
_ => HashMap::new(),
};
let macros = match obj.get("macros") {
Some(Value::Object(map)) => {
let mut out = HashMap::new();
for (k, v) in map {
if BLOCKED_KEYS.contains(&k.as_str()) {
continue;
}
if k.contains('-') {
eprintln!(
"validateConfig: skipping macro \"{k}\" \
— hyphens are not allowed in macro names. Use underscores instead. \
The \"-\" character is the WITHOUT operator in expressions."
);
continue;
}
match v {
Value::Object(macro_map) => {
if let Some(Value::String(link_items)) = macro_map.get("linkItems") {
out.insert(
k.clone(),
Macro {
link_items: link_items.clone(),
config: macro_map.get("config").cloned(),
},
);
} else {
eprintln!("validateConfig: skipping macro \"{k}\" — invalid shape");
}
}
_ => {
eprintln!("validateConfig: skipping macro \"{k}\" — invalid shape");
}
}
}
out
}
_ => HashMap::new(),
};
let search_patterns = match obj.get("searchPatterns") {
Some(Value::Object(map)) => {
let mut out = HashMap::new();
for (k, v) in map {
if BLOCKED_KEYS.contains(&k.as_str()) {
continue;
}
if k.contains('-') {
eprintln!(
"validateConfig: skipping searchPattern \"{k}\" \
— hyphens are not allowed in pattern keys. Use underscores instead. \
The \"-\" character is the WITHOUT operator in expressions."
);
continue;
}
if let Value::String(pattern) = v {
let validation = validate_regex(pattern);
if validation.safe {
out.insert(k.clone(), v.clone());
} else {
eprintln!(
"validateConfig: removing searchPattern \"{k}\" — {}",
validation.reason.unwrap_or_default()
);
}
continue;
}
if let Value::Object(entry) = v {
if let Some(Value::String(pattern)) = entry.get("pattern") {
let validation = validate_regex(pattern);
if validation.safe {
out.insert(k.clone(), v.clone());
} else {
eprintln!(
"validateConfig: removing searchPattern \"{k}\" — {}",
validation.reason.unwrap_or_default()
);
}
continue;
}
}
eprintln!("validateConfig: skipping searchPattern \"{k}\" — invalid shape");
}
out
}
_ => HashMap::new(),
};
Ok(Config {
settings,
macros,
all_links,
search_patterns,
protocols: None,
})
}
pub fn sanitize_link_urls(link: &mut Link) {
if !link.url.is_empty() {
link.url = sanitize_url(&link.url);
}
if let Some(image) = &link.image {
link.image = Some(sanitize_url(image));
}
if let Some(thumbnail) = &link.thumbnail {
link.thumbnail = Some(sanitize_url(thumbnail));
}
if let Some(meta) = link.meta.as_mut() {
meta.retain(|k, _| !BLOCKED_KEYS.contains(&k.as_str()));
for (k, v) in meta.iter_mut() {
if META_URL_KEY_RE.is_match(k) {
if let Value::String(s) = v {
*v = Value::String(sanitize_url(s));
}
}
}
}
}
fn tier_aware_hooks(
provenance: &Tier,
allowlist: Option<&Vec<String>>,
string_hooks: &[String],
link_key: &str,
) -> Option<Vec<String>> {
if string_hooks.is_empty() {
return None;
}
if matches!(provenance, Tier::Author) {
return Some(string_hooks.to_vec());
}
if let Some(allowlist) = allowlist {
let allowed: Vec<String> = string_hooks
.iter()
.filter(|h| {
let ok = allowlist.contains(h);
if !ok {
eprintln!(
"validateConfig: allLinks[\"{link_key}\"] — stripping hook \"{h}\" \
not in settings.hooks allowlist (tier: {provenance:?})"
);
}
ok
})
.cloned()
.collect();
if allowed.is_empty() {
return None;
}
return Some(allowed);
}
eprintln!(
"validateConfig: allLinks[\"{link_key}\"] — dropping {count} hook(s) on \
{provenance:?}-tier link; declare settings.hooks to allow specific keys",
count = string_hooks.len()
);
None
}
fn string_field(map: &serde_json::Map<String, Value>, key: &str) -> Option<String> {
match map.get(key) {
Some(Value::String(s)) => Some(s.clone()),
_ => None,
}
}