use std::collections::HashSet;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedWikilink {
pub source: String,
pub value: String,
pub display_text: Option<String>,
}
impl ParsedWikilink {
pub fn new(source: impl Into<String>, value: impl Into<String>) -> Self {
Self {
source: source.into(),
value: value.into(),
display_text: None,
}
}
pub fn with_display(
source: impl Into<String>,
value: impl Into<String>,
display: impl Into<String>,
) -> Self {
Self {
source: source.into(),
value: value.into(),
display_text: Some(display.into()),
}
}
pub fn url_source(&self) -> String {
self.source.to_lowercase()
}
pub fn url_value(&self) -> String {
normalize_tag_value(&self.value)
}
pub fn url_path(&self) -> String {
format!("/{}/{}/", self.url_source(), self.url_value())
}
pub fn display(&self) -> &str {
self.display_text.as_deref().unwrap_or(&self.value)
}
pub fn to_markdown_link(&self) -> String {
format!("[{}]({})", self.display(), self.url_path())
}
}
pub fn sanitize_path_component(value: &str) -> String {
value
.chars()
.filter(|c| !c.is_control())
.collect::<String>()
.split('/')
.filter(|seg| !seg.is_empty() && *seg != ".." && *seg != ".")
.collect::<Vec<_>>()
.join("/")
}
pub fn normalize_tag_value(value: &str) -> String {
sanitize_path_component(&value.trim().to_lowercase().replace(' ', "_"))
}
const URL_SCHEMES: &[&str] = &[
"http",
"https",
"mailto",
"tel",
"ftp",
"ftps",
"file",
"data",
"javascript",
"ssh",
"git",
"svn",
"magnet",
];
fn is_url_scheme(source: &str) -> bool {
URL_SCHEMES
.iter()
.any(|scheme| source.eq_ignore_ascii_case(scheme))
}
pub fn transform_wikilinks(input: &str, valid_sources: &HashSet<String>) -> String {
let mut result = String::with_capacity(input.len());
let mut remaining = input;
while let Some(start) = remaining.find("[[") {
result.push_str(&remaining[..start]);
let after_open = &remaining[start + 2..];
if let Some(end) = after_open.find("]]") {
let inner = &after_open[..end];
if let Some(wikilink) = parse_wikilink_inner(inner, valid_sources) {
result.push_str(&wikilink.to_markdown_link());
} else {
result.push_str(&remaining[start..start + 4 + end]);
}
remaining = &after_open[end + 2..];
} else {
result.push_str("[[");
remaining = after_open;
}
}
result.push_str(remaining);
result
}
fn parse_wikilink_inner(inner: &str, valid_sources: &HashSet<String>) -> Option<ParsedWikilink> {
let colon_pos = inner.find(':')?;
let source = inner[..colon_pos].trim();
let value = inner[colon_pos + 1..].trim();
if source.is_empty() || value.is_empty() {
return None;
}
if is_url_scheme(source) {
return None;
}
let source_lower = source.to_lowercase();
if !valid_sources
.iter()
.any(|s| s.to_lowercase() == source_lower)
{
return None;
}
Some(ParsedWikilink::new(source, value))
}
pub fn parse_tag_link(dest: &str, valid_sources: &HashSet<String>) -> Option<ParsedWikilink> {
parse_wikilink_inner(dest, valid_sources)
}
pub fn transform_tag_link_dest(dest: &str, valid_sources: &HashSet<String>) -> Option<String> {
parse_tag_link(dest, valid_sources).map(|wl| wl.url_path())
}
#[cfg(test)]
mod tests {
use super::*;
fn make_sources(sources: &[&str]) -> HashSet<String> {
sources.iter().map(|s| s.to_string()).collect()
}
#[test]
fn test_normalize_tag_value_basic() {
assert_eq!(normalize_tag_value("rust"), "rust");
assert_eq!(normalize_tag_value("Rust"), "rust");
assert_eq!(normalize_tag_value("RUST"), "rust");
}
#[test]
fn test_normalize_tag_value_spaces() {
assert_eq!(normalize_tag_value("Joshua Jay"), "joshua_jay");
assert_eq!(normalize_tag_value("hello world"), "hello_world");
assert_eq!(normalize_tag_value("a b c"), "a_b_c");
}
#[test]
fn test_normalize_tag_value_trims() {
assert_eq!(normalize_tag_value(" rust "), "rust");
assert_eq!(normalize_tag_value("\tspaced\t"), "spaced");
}
#[test]
fn test_is_url_scheme() {
assert!(is_url_scheme("http"));
assert!(is_url_scheme("HTTP"));
assert!(is_url_scheme("https"));
assert!(is_url_scheme("mailto"));
assert!(is_url_scheme("file"));
assert!(!is_url_scheme("tags"));
assert!(!is_url_scheme("performers"));
assert!(!is_url_scheme("category"));
}
#[test]
fn test_parsed_wikilink_url_path() {
let wl = ParsedWikilink::new("Tags", "Rust");
assert_eq!(wl.url_path(), "/tags/rust/");
let wl = ParsedWikilink::new("performers", "Joshua Jay");
assert_eq!(wl.url_path(), "/performers/joshua_jay/");
}
#[test]
fn test_parsed_wikilink_display() {
let wl = ParsedWikilink::new("tags", "rust");
assert_eq!(wl.display(), "rust");
let wl = ParsedWikilink::with_display("tags", "rust", "Rust Programming");
assert_eq!(wl.display(), "Rust Programming");
}
#[test]
fn test_parsed_wikilink_to_markdown() {
let wl = ParsedWikilink::new("Tags", "rust");
assert_eq!(wl.to_markdown_link(), "[rust](/tags/rust/)");
let wl = ParsedWikilink::new("performers", "Joshua Jay");
assert_eq!(
wl.to_markdown_link(),
"[Joshua Jay](/performers/joshua_jay/)"
);
}
#[test]
fn test_transform_wikilinks_basic() {
let sources = make_sources(&["tags"]);
let input = "See [[Tags:rust]] for more.";
let output = transform_wikilinks(input, &sources);
assert_eq!(output, "See [rust](/tags/rust/) for more.");
}
#[test]
fn test_transform_wikilinks_multiple() {
let sources = make_sources(&["tags"]);
let input = "[[Tags:rust]] and [[Tags:programming]] are great.";
let output = transform_wikilinks(input, &sources);
assert_eq!(
output,
"[rust](/tags/rust/) and [programming](/tags/programming/) are great."
);
}
#[test]
fn test_transform_wikilinks_with_spaces() {
let sources = make_sources(&["performers"]);
let input = "Watch [[performers:Joshua Jay]] perform!";
let output = transform_wikilinks(input, &sources);
assert_eq!(
output,
"Watch [Joshua Jay](/performers/joshua_jay/) perform!"
);
}
#[test]
fn test_transform_wikilinks_case_insensitive_source() {
let sources = make_sources(&["tags"]);
let input1 = "[[Tags:rust]]";
let input2 = "[[TAGS:rust]]";
let input3 = "[[tags:rust]]";
assert_eq!(transform_wikilinks(input1, &sources), "[rust](/tags/rust/)");
assert_eq!(transform_wikilinks(input2, &sources), "[rust](/tags/rust/)");
assert_eq!(transform_wikilinks(input3, &sources), "[rust](/tags/rust/)");
}
#[test]
fn test_transform_wikilinks_unknown_source() {
let sources = make_sources(&["tags"]);
let input = "[[category:books]]"; let output = transform_wikilinks(input, &sources);
assert_eq!(output, "[[category:books]]"); }
#[test]
fn test_transform_wikilinks_url_scheme_not_matched() {
let sources = make_sources(&["tags", "http"]); let input = "[[http://example.com]]";
let output = transform_wikilinks(input, &sources);
assert_eq!(output, "[[http://example.com]]"); }
#[test]
fn test_transform_wikilinks_nested_source() {
let sources = make_sources(&["taxonomy.tags"]);
let input = "[[taxonomy.tags:rust]]";
let output = transform_wikilinks(input, &sources);
assert_eq!(output, "[rust](/taxonomy.tags/rust/)");
}
#[test]
fn test_transform_wikilinks_no_closing() {
let sources = make_sources(&["tags"]);
let input = "[[Tags:rust is broken";
let output = transform_wikilinks(input, &sources);
assert_eq!(output, "[[Tags:rust is broken"); }
#[test]
fn test_transform_wikilinks_empty_value() {
let sources = make_sources(&["tags"]);
let input = "[[Tags:]]";
let output = transform_wikilinks(input, &sources);
assert_eq!(output, "[[Tags:]]"); }
#[test]
fn test_parse_tag_link_valid() {
let sources = make_sources(&["tags", "performers"]);
let result = parse_tag_link("Tags:rust", &sources);
assert!(result.is_some());
let wl = result.unwrap();
assert_eq!(wl.source, "Tags");
assert_eq!(wl.value, "rust");
assert_eq!(wl.url_path(), "/tags/rust/");
}
#[test]
fn test_parse_tag_link_with_spaces() {
let sources = make_sources(&["performers"]);
let result = parse_tag_link("performers:Joshua Jay", &sources);
assert!(result.is_some());
let wl = result.unwrap();
assert_eq!(wl.value, "Joshua Jay");
assert_eq!(wl.url_path(), "/performers/joshua_jay/");
}
#[test]
fn test_parse_tag_link_url_scheme() {
let sources = make_sources(&["tags", "https"]);
assert!(parse_tag_link("https://example.com", &sources).is_none());
assert!(parse_tag_link("mailto:test@example.com", &sources).is_none());
assert!(parse_tag_link("file:///path/to/file", &sources).is_none());
}
#[test]
fn test_parse_tag_link_unknown_source() {
let sources = make_sources(&["tags"]);
assert!(parse_tag_link("category:books", &sources).is_none());
}
#[test]
fn test_parse_tag_link_no_colon() {
let sources = make_sources(&["tags"]);
assert!(parse_tag_link("just-a-path", &sources).is_none());
assert!(parse_tag_link("/absolute/path", &sources).is_none());
}
#[test]
fn test_transform_tag_link_dest() {
let sources = make_sources(&["tags"]);
assert_eq!(
transform_tag_link_dest("Tags:rust", &sources),
Some("/tags/rust/".to_string())
);
assert_eq!(
transform_tag_link_dest("https://example.com", &sources),
None
);
assert_eq!(transform_tag_link_dest("/regular/path/", &sources), None);
}
#[test]
fn test_sanitize_path_component_normal_values() {
assert_eq!(sanitize_path_component("rust"), "rust");
assert_eq!(sanitize_path_component("hello_world"), "hello_world");
assert_eq!(sanitize_path_component("foo/bar"), "foo/bar");
}
#[test]
fn test_sanitize_path_component_strips_leading_slash() {
assert_eq!(sanitize_path_component("/etc/passwd"), "etc/passwd");
assert_eq!(sanitize_path_component("//absolute"), "absolute");
assert_eq!(sanitize_path_component("///triple"), "triple");
}
#[test]
fn test_sanitize_path_component_removes_dotdot() {
assert_eq!(sanitize_path_component("../../secret"), "secret");
assert_eq!(sanitize_path_component("foo/../bar"), "foo/bar");
assert_eq!(sanitize_path_component("../.."), "");
assert_eq!(sanitize_path_component("a/../../b"), "a/b");
}
#[test]
fn test_sanitize_path_component_removes_single_dot() {
assert_eq!(sanitize_path_component("./foo"), "foo");
assert_eq!(sanitize_path_component("foo/./bar"), "foo/bar");
}
#[test]
fn test_sanitize_path_component_null_bytes() {
assert_eq!(sanitize_path_component("foo\0bar"), "foobar");
assert_eq!(sanitize_path_component("\0"), "");
}
#[test]
fn test_sanitize_path_component_control_chars() {
assert_eq!(sanitize_path_component("foo\x01bar"), "foobar");
assert_eq!(sanitize_path_component("hello\nworld"), "helloworld");
}
#[test]
fn test_sanitize_path_component_complex_attacks() {
assert_eq!(sanitize_path_component("/pol/_phenomena"), "pol/_phenomena");
assert_eq!(
sanitize_path_component("/../../../etc/shadow"),
"etc/shadow"
);
}
#[test]
fn test_sanitize_path_component_empty() {
assert_eq!(sanitize_path_component(""), "");
assert_eq!(sanitize_path_component("/"), "");
assert_eq!(sanitize_path_component("//"), "");
}
#[test]
fn test_normalize_tag_value_sanitizes_paths() {
assert_eq!(normalize_tag_value("/etc/passwd"), "etc/passwd");
assert_eq!(normalize_tag_value("../../secret"), "secret");
assert_eq!(normalize_tag_value("/pol/_phenomena"), "pol/_phenomena");
}
}