use crate::{SpecmanError, WorkspacePaths};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet, VecDeque};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::Duration;
use unicode_normalization::UnicodeNormalization;
use url::Url;
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct ReferenceValidationOptions {
pub https: HttpsValidationOptions,
pub transitive: TransitiveOptions,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn slugify_nfkd_filters_combining_marks() {
assert_eq!(slugify_heading("café"), Some("cafe".to_string()));
}
#[test]
fn slugify_removes_punctuation_and_hyphenates_spaces() {
assert_eq!(
slugify_heading("Hello, world!"),
Some("hello-world".to_string())
);
}
#[test]
fn slugify_collapses_hyphens_and_trims() {
assert_eq!(slugify_heading("-- A -- B --"), Some("a-b".to_string()));
}
#[test]
fn heading_slug_deduplication_appends_suffix_starting_at_1() {
let md = "# Overview\n\n## Overview\n\n## Overview\n";
let root = markdown::to_mdast(md, &markdown::ParseOptions::default()).unwrap();
let mut issues = Vec::new();
let slugs = collect_heading_slugs(&root, &mut issues, "doc");
assert!(issues.is_empty());
assert!(slugs.contains("overview"));
assert!(slugs.contains("overview-1"));
assert!(slugs.contains("overview-2"));
}
#[test]
fn heading_plain_text_includes_link_reference_text() {
let md = "## Hello [World][x]\n\n[x]: https://example.com\n";
let root = markdown::to_mdast(md, &markdown::ParseOptions::default()).unwrap();
let mut issues = Vec::new();
let slugs = collect_heading_slugs(&root, &mut issues, "doc");
assert!(issues.is_empty());
assert!(slugs.contains("hello-world"));
}
#[test]
fn reference_validation_options_default_shape_matches_schema() {
let options = ReferenceValidationOptions::default();
assert!(options.transitive.enabled);
assert!(options.transitive.max_documents > 0);
let json = serde_json::to_value(&options).expect("serialize options");
assert!(json.get("https").is_some());
assert!(json.get("transitive").is_some());
}
#[test]
fn https_mode_serialization_uses_spec_enum_labels() {
let syntax_only = ReferenceValidationOptions {
https: HttpsValidationOptions {
mode: HttpsValidationMode::SyntaxOnly,
},
transitive: TransitiveOptions {
enabled: false,
max_documents: 1,
},
};
let syntax_json = serde_json::to_string(&syntax_only).expect("serialize syntax-only");
assert!(syntax_json.contains("check-syntax"));
let reachability = ReferenceValidationOptions {
https: HttpsValidationOptions {
mode: HttpsValidationMode::Reachability {
timeout: Duration::from_secs(1),
max_redirects: 2,
method: HttpsMethod::Get,
},
},
transitive: TransitiveOptions {
enabled: true,
max_documents: 4,
},
};
let reachability_json =
serde_json::to_string(&reachability).expect("serialize reachability");
assert!(reachability_json.contains("check-reachability"));
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct HttpsValidationOptions {
pub mode: HttpsValidationMode,
}
impl Default for HttpsValidationOptions {
fn default() -> Self {
Self {
mode: HttpsValidationMode::Reachability {
timeout: Duration::from_secs(30),
max_redirects: 10,
method: HttpsMethod::Head,
},
}
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub enum HttpsValidationMode {
#[serde(rename = "check-syntax")]
SyntaxOnly,
#[serde(rename = "check-reachability")]
Reachability {
timeout: Duration,
max_redirects: u32,
method: HttpsMethod,
},
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub enum HttpsMethod {
Head,
Get,
}
impl Default for HttpsValidationMode {
fn default() -> Self {
HttpsValidationMode::Reachability {
timeout: Duration::from_secs(30),
max_redirects: 10,
method: HttpsMethod::Head,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct TransitiveOptions {
pub enabled: bool,
pub max_documents: usize,
}
impl Default for TransitiveOptions {
fn default() -> Self {
Self {
enabled: true,
max_documents: 64,
}
}
}
#[derive(Clone, Debug)]
pub struct ValidationMode {
pub resolve_fragments: bool,
pub reachability: ReachabilityPolicy,
pub transitive: bool,
pub max_documents: usize,
}
impl Default for ValidationMode {
fn default() -> Self {
Self {
resolve_fragments: true,
reachability: ReachabilityPolicy::Online {
timeout: Duration::from_secs(30),
max_redirects: 10,
method: HttpsMethod::Head,
},
transitive: true,
max_documents: 64,
}
}
}
#[derive(Clone, Debug)]
pub enum ReachabilityPolicy {
Disabled,
Online {
timeout: Duration,
max_redirects: u32,
method: HttpsMethod,
},
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
pub enum ReferenceValidationStatus {
Success,
Failure,
}
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
pub struct ReferenceValidationReport {
pub status: ReferenceValidationStatus,
pub records: Vec<ReferenceRecord>,
pub discovered: Vec<DiscoveredReference>,
pub errors: Vec<ReferenceValidationIssue>,
pub issues: Vec<ReferenceValidationIssue>,
pub processed_documents: usize,
}
impl ReferenceValidationReport {
fn new() -> Self {
Self {
status: ReferenceValidationStatus::Success,
records: Vec::new(),
discovered: Vec::new(),
errors: Vec::new(),
issues: Vec::new(),
processed_documents: 0,
}
}
fn finalize(&mut self) {
if self.records.is_empty() {
self.records = self.discovered.clone();
}
if self.errors.is_empty() {
self.errors = self.issues.clone();
}
if self
.errors
.iter()
.any(|i| i.severity == IssueSeverity::Error)
{
self.status = ReferenceValidationStatus::Failure;
}
}
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
pub enum IssueSeverity {
Error,
Diagnostic,
}
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
pub struct ReferenceValidationIssue {
#[serde(default)]
pub kind: ReferenceIssueKind,
pub severity: IssueSeverity,
pub message: String,
pub source: ReferenceSource,
pub destination: Option<String>,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
pub enum ReferenceIssueKind {
UnsupportedScheme,
DisallowedHandle,
WorkspaceBoundary,
FileMissing,
InvalidFragment,
MalformedUrl,
UnreachableUrl,
ParseFailure,
TraversalLimit,
Io,
UnresolvedReference,
EmptyDestination,
CrossDocumentFragmentSkipped,
Fetch,
Unknown,
}
impl Default for ReferenceIssueKind {
fn default() -> Self {
ReferenceIssueKind::Unknown
}
}
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
pub struct ReferenceRecord {
pub source: ReferenceSource,
pub destination: String,
pub normalized: Option<String>,
pub kind: ReferenceKind,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
pub enum ReferenceKind {
WorkspacePath,
HttpsUrl,
FragmentOnly,
SpecHandle,
UnsupportedScheme,
}
pub type DestinationKind = ReferenceKind;
pub type DiscoveredReference = ReferenceRecord;
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, JsonSchema)]
pub struct ReferenceSource {
pub document: String,
pub range: Option<SourceRange>,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, JsonSchema)]
pub struct SourceRange {
pub start: SourcePoint,
pub end: SourcePoint,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, JsonSchema)]
pub struct SourcePoint {
pub line: usize,
pub column: usize,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum DocumentKey {
File(PathBuf),
Url(String),
}
impl From<&ValidationMode> for ReferenceValidationOptions {
fn from(mode: &ValidationMode) -> Self {
let https_mode = match &mode.reachability {
ReachabilityPolicy::Disabled => HttpsValidationMode::SyntaxOnly,
ReachabilityPolicy::Online {
timeout,
max_redirects,
method,
} => HttpsValidationMode::Reachability {
timeout: *timeout,
max_redirects: *max_redirects,
method: method.clone(),
},
};
ReferenceValidationOptions {
https: HttpsValidationOptions { mode: https_mode },
transitive: TransitiveOptions {
enabled: mode.transitive,
max_documents: mode.max_documents,
},
}
}
}
impl From<ValidationMode> for ReferenceValidationOptions {
fn from(mode: ValidationMode) -> Self {
ReferenceValidationOptions::from(&mode)
}
}
impl From<&ReferenceValidationOptions> for ValidationMode {
fn from(options: &ReferenceValidationOptions) -> Self {
let reachability = match &options.https.mode {
HttpsValidationMode::SyntaxOnly => ReachabilityPolicy::Disabled,
HttpsValidationMode::Reachability {
timeout,
max_redirects,
method,
} => ReachabilityPolicy::Online {
timeout: *timeout,
max_redirects: *max_redirects,
method: method.clone(),
},
};
ValidationMode {
resolve_fragments: true,
reachability,
transitive: options.transitive.enabled,
max_documents: options.transitive.max_documents,
}
}
}
impl From<ReferenceValidationOptions> for ValidationMode {
fn from(options: ReferenceValidationOptions) -> Self {
ValidationMode::from(&options)
}
}
impl Default for ReferenceValidationOptions {
fn default() -> Self {
ValidationMode::default().into()
}
}
#[derive(Clone, Debug)]
pub struct ReferenceValidator<'a> {
workspace: &'a WorkspacePaths,
mode: ValidationMode,
}
impl<'a> ReferenceValidator<'a> {
pub fn new(workspace: &'a WorkspacePaths) -> Self {
Self {
workspace,
mode: ValidationMode::default(),
}
}
pub fn with_mode(workspace: &'a WorkspacePaths, mode: ValidationMode) -> Self {
Self { workspace, mode }
}
pub fn mode(&self) -> &ValidationMode {
&self.mode
}
pub fn validate(&self, locator: &str) -> Result<ReferenceValidationReport, SpecmanError> {
validate_references_internal(
locator,
self.workspace,
self.mode.clone().into(),
self.mode.resolve_fragments,
)
}
}
pub fn validate_references(
locator: &str,
workspace: &WorkspacePaths,
options: ReferenceValidationOptions,
) -> Result<ReferenceValidationReport, SpecmanError> {
validate_references_internal(locator, workspace, options, true)
}
fn validate_references_internal(
locator: &str,
workspace: &WorkspacePaths,
options: ReferenceValidationOptions,
resolve_fragments: bool,
) -> Result<ReferenceValidationReport, SpecmanError> {
let mut report = ReferenceValidationReport::new();
let root = resolve_input_locator(locator, workspace);
let mut queue = VecDeque::new();
queue.push_back(root);
let mut visited = BTreeSet::new();
let mut pending_fragments: BTreeMap<DocumentKey, BTreeSet<PendingFragmentCheck>> =
BTreeMap::new();
while let Some(target) = queue.pop_front() {
let key = match &target {
ResolvedDocument::File { path, .. } => DocumentKey::File(path.clone()),
ResolvedDocument::Url { url } => DocumentKey::Url(url.as_str().to_string()),
};
if !visited.insert(key.clone()) {
continue;
}
report.processed_documents += 1;
if options.transitive.enabled
&& report.processed_documents > options.transitive.max_documents
{
report.issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::TraversalLimit,
severity: IssueSeverity::Diagnostic,
message: format!(
"transitive traversal stopped after {} documents (max_documents)",
options.transitive.max_documents
),
source: ReferenceSource {
document: target.describe(),
range: None,
},
destination: None,
});
break;
}
let content = match load_document_content(&target, &options.https.mode) {
Ok(Some(content)) => content,
Ok(None) => continue,
Err(issue) => {
report.issues.push(issue);
continue;
}
};
let (discovered, issues, next_docs, heading_slugs) = validate_document(
&target,
&content,
workspace,
&options,
resolve_fragments,
&mut pending_fragments,
);
report.discovered.extend(discovered);
report.issues.extend(issues);
if resolve_fragments {
if let Some(checks) = pending_fragments.remove(&key) {
for check in checks {
if !heading_slugs.contains(&check.fragment) {
report.issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::InvalidFragment,
severity: IssueSeverity::Error,
message: format!(
"fragment '#{}' does not match any heading slug in {}",
check.fragment,
target.describe()
),
source: check.source,
destination: Some(check.destination),
});
}
}
}
}
if options.transitive.enabled {
for doc in next_docs {
queue.push_back(doc);
}
}
}
report.finalize();
Ok(report)
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
struct PendingFragmentCheck {
source: ReferenceSource,
fragment: String,
destination: String,
}
#[derive(Clone, Debug)]
enum ResolvedDocument {
File {
path: PathBuf,
base_dir: PathBuf,
},
Url {
url: Url,
},
}
impl ResolvedDocument {
fn describe(&self) -> String {
match self {
ResolvedDocument::File { path, .. } => path_to_forward_slashes(path),
ResolvedDocument::Url { url } => url.as_str().to_string(),
}
}
}
fn resolve_input_locator(locator: &str, workspace: &WorkspacePaths) -> ResolvedDocument {
if let Ok(url) = parse_https_url(locator) {
return ResolvedDocument::Url { url };
}
let path = if let Some(rest) = locator.strip_prefix("spec://") {
resolve_handle_path(workspace.spec_dir(), rest, "spec.md")
} else if let Some(rest) = locator.strip_prefix("impl://") {
resolve_handle_path(workspace.impl_dir(), rest, "impl.md")
} else if let Some(rest) = locator.strip_prefix("scratch://") {
resolve_handle_path(workspace.scratchpad_dir(), rest, "scratch.md")
} else {
if Path::new(locator).is_absolute() {
PathBuf::from(locator)
} else {
workspace.root().join(locator)
}
};
let (path, base_dir) = if path.exists() {
let canonical = fs::canonicalize(&path).unwrap_or(path);
let base_dir = canonical
.parent()
.unwrap_or_else(|| Path::new("."))
.to_path_buf();
(canonical, base_dir)
} else {
let base_dir = path
.parent()
.unwrap_or_else(|| Path::new("."))
.to_path_buf();
(path, base_dir)
};
ResolvedDocument::File { path, base_dir }
}
fn resolve_handle_path(base: PathBuf, raw_slug: &str, leaf: &str) -> PathBuf {
let slug = raw_slug.trim().to_ascii_lowercase();
base.join(slug).join(leaf)
}
fn load_document_content(
target: &ResolvedDocument,
https_mode: &HttpsValidationMode,
) -> Result<Option<String>, ReferenceValidationIssue> {
match target {
ResolvedDocument::File { path, .. } => match fs::read_to_string(path) {
Ok(s) => Ok(Some(s)),
Err(err) => Err(ReferenceValidationIssue {
kind: ReferenceIssueKind::Io,
severity: IssueSeverity::Error,
message: format!("failed to read file {}: {err}", path.display()),
source: ReferenceSource {
document: path_to_forward_slashes(path),
range: None,
},
destination: None,
}),
},
ResolvedDocument::Url { url } => match https_mode {
HttpsValidationMode::SyntaxOnly => Ok(None),
HttpsValidationMode::Reachability { .. } => {
match fetch_url(url, https_mode) {
Ok(text) => Ok(Some(text)),
Err(issue) => Err(issue),
}
}
},
}
}
fn fetch_url(url: &Url, mode: &HttpsValidationMode) -> Result<String, ReferenceValidationIssue> {
let (timeout, max_redirects, method) = match mode {
HttpsValidationMode::Reachability {
timeout,
max_redirects,
method,
} => (*timeout, *max_redirects, method.clone()),
HttpsValidationMode::SyntaxOnly => {
return Err(ReferenceValidationIssue {
kind: ReferenceIssueKind::Fetch,
severity: IssueSeverity::Diagnostic,
message: "fetch_url called in syntax-only mode".into(),
source: ReferenceSource {
document: url.as_str().to_string(),
range: None,
},
destination: Some(url.as_str().to_string()),
});
}
};
let agent = ureq::AgentBuilder::new()
.timeout_read(timeout)
.timeout_write(timeout)
.redirects(max_redirects)
.build();
let request = match method {
HttpsMethod::Head => agent.head(url.as_str()),
HttpsMethod::Get => agent.get(url.as_str()),
};
match request.call() {
Ok(response) => {
if let HttpsMethod::Head = method {
return Ok(String::new());
}
match response.into_string() {
Ok(s) => Ok(s),
Err(err) => Err(ReferenceValidationIssue {
kind: ReferenceIssueKind::Fetch,
severity: IssueSeverity::Diagnostic,
message: format!("failed reading response body: {err}"),
source: ReferenceSource {
document: url.as_str().to_string(),
range: None,
},
destination: Some(url.as_str().to_string()),
}),
}
}
Err(ureq::Error::Status(code, response)) => {
let severity = if (400..500).contains(&code) {
IssueSeverity::Error
} else {
IssueSeverity::Diagnostic
};
let hint = response.status_text().to_string();
Err(ReferenceValidationIssue {
kind: ReferenceIssueKind::UnreachableUrl,
severity,
message: format!("https reachability failed with status {code} ({hint})"),
source: ReferenceSource {
document: url.as_str().to_string(),
range: None,
},
destination: Some(url.as_str().to_string()),
})
}
Err(err) => Err(ReferenceValidationIssue {
kind: ReferenceIssueKind::UnreachableUrl,
severity: IssueSeverity::Diagnostic,
message: format!("https request failed: {err}"),
source: ReferenceSource {
document: url.as_str().to_string(),
range: None,
},
destination: Some(url.as_str().to_string()),
}),
}
}
fn validate_document(
target: &ResolvedDocument,
markdown_source: &str,
workspace: &WorkspacePaths,
options: &ReferenceValidationOptions,
resolve_fragments: bool,
pending_fragments: &mut BTreeMap<DocumentKey, BTreeSet<PendingFragmentCheck>>,
) -> (
Vec<DiscoveredReference>,
Vec<ReferenceValidationIssue>,
Vec<ResolvedDocument>,
BTreeSet<String>,
) {
let document_id = target.describe();
let mut discovered = Vec::new();
let mut issues = Vec::new();
let mut markdown_targets = BTreeMap::<String, ResolvedDocument>::new();
let root = match markdown::to_mdast(markdown_source, &markdown::ParseOptions::default()) {
Ok(node) => node,
Err(message) => {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::ParseFailure,
severity: IssueSeverity::Error,
message: format!("markdown parse error: {message}"),
source: ReferenceSource {
document: document_id,
range: None,
},
destination: None,
});
return (discovered, issues, Vec::new(), BTreeSet::new());
}
};
let definition_map = collect_definitions(&root);
let heading_slugs = if resolve_fragments {
collect_heading_slugs(&root, &mut issues, &document_id)
} else {
BTreeSet::new()
};
for extracted in extract_destinations(&root, &definition_map) {
if let Some(missing) = extracted.unresolved_reference_identifier {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::UnresolvedReference,
severity: IssueSeverity::Error,
message: format!("unresolved link reference identifier: {missing:?}"),
source: ReferenceSource {
document: document_id.clone(),
range: extracted.range,
},
destination: None,
});
continue;
}
let destination = extracted.destination;
let range = extracted.range;
let source = ReferenceSource {
document: document_id.clone(),
range,
};
let kind = classify_destination(&destination);
discovered.push(ReferenceRecord {
source: source.clone(),
destination: destination.clone(),
normalized: None,
kind: kind.clone(),
});
let record = discovered
.last_mut()
.expect("record inserted for discovered reference");
match kind {
ReferenceKind::FragmentOnly => {
record.normalized = Some(destination.clone());
if resolve_fragments {
let fragment = destination.trim_start_matches('#');
let fragment = fragment.split('?').next().unwrap_or(fragment);
if fragment.is_empty() {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::InvalidFragment,
severity: IssueSeverity::Error,
message: "empty fragment is invalid".into(),
source,
destination: Some(destination),
});
} else if !heading_slugs.contains(fragment) {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::InvalidFragment,
severity: IssueSeverity::Error,
message: format!(
"fragment '#{fragment}' does not match any heading slug"
),
source,
destination: Some(destination),
});
}
}
}
ReferenceKind::HttpsUrl => {
record.normalized = Some(destination.clone());
if parse_https_url(&destination).is_err() {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::MalformedUrl,
severity: IssueSeverity::Error,
message: "invalid https url".into(),
source,
destination: Some(destination),
});
} else if let HttpsValidationMode::Reachability { .. } = options.https.mode {
if let Ok(url) = Url::parse(&destination) {
if let Err(issue) = fetch_url(&url, &options.https.mode).map(|_| ()) {
issues.push(ReferenceValidationIssue {
kind: issue.kind,
severity: issue.severity,
message: issue.message,
source: ReferenceSource {
document: source.document.clone(),
range: source.range.clone(),
},
destination: Some(destination),
});
}
}
}
}
ReferenceKind::WorkspacePath => {
let (path_part, fragment_opt) = split_path_and_fragment(&destination);
match resolve_markdown_destination_path(target, &path_part, workspace) {
Ok(resolved) => {
record.normalized = Some(path_to_forward_slashes(&resolved));
if !resolved.exists() {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::FileMissing,
severity: IssueSeverity::Error,
message: format!(
"missing filesystem target {}",
path_to_forward_slashes(&resolved)
),
source: source.clone(),
destination: Some(destination.clone()),
});
}
if is_markdown_path(&resolved) {
if resolve_fragments {
if let Some(fragment) = fragment_opt {
if fragment.is_empty() {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::InvalidFragment,
severity: IssueSeverity::Error,
message: "empty fragment is invalid".into(),
source: source.clone(),
destination: Some(destination.clone()),
});
} else {
if let ResolvedDocument::File { path, .. } = target {
if fs::canonicalize(path).ok().as_ref()
== Some(&resolved)
{
if !heading_slugs.contains(&fragment) {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::InvalidFragment,
severity: IssueSeverity::Error,
message: format!(
"fragment '#{fragment}' does not match any heading slug"
),
source: source.clone(),
destination: Some(destination.clone()),
});
}
} else if options.transitive.enabled {
pending_fragments
.entry(DocumentKey::File(resolved.clone()))
.or_default()
.insert(PendingFragmentCheck {
source: source.clone(),
fragment: fragment.clone(),
destination: destination.clone(),
});
} else {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::CrossDocumentFragmentSkipped,
severity: IssueSeverity::Diagnostic,
message: "cross-document fragment validation skipped (transitive disabled)".into(),
source: source.clone(),
destination: Some(destination.clone()),
});
}
}
}
}
}
if options.transitive.enabled {
markdown_targets.insert(
path_to_forward_slashes(&resolved),
ResolvedDocument::File {
base_dir: resolved
.parent()
.unwrap_or_else(|| Path::new("."))
.to_path_buf(),
path: resolved,
},
);
}
}
}
Err(msg) => {
let issue_kind = if msg.contains("empty path destination") {
ReferenceIssueKind::EmptyDestination
} else if msg.contains("absolute paths are not supported") {
ReferenceIssueKind::UnsupportedScheme
} else {
ReferenceIssueKind::WorkspaceBoundary
};
issues.push(ReferenceValidationIssue {
kind: issue_kind,
severity: IssueSeverity::Error,
message: msg,
source,
destination: Some(destination),
})
}
}
}
ReferenceKind::SpecHandle => {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::DisallowedHandle,
severity: IssueSeverity::Error,
message: "SpecMan handles are not valid markdown destinations".into(),
source,
destination: Some(destination),
});
}
ReferenceKind::UnsupportedScheme => {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::UnsupportedScheme,
severity: IssueSeverity::Error,
message: "unsupported or invalid destination".into(),
source,
destination: Some(destination),
});
}
}
}
let next_docs = markdown_targets.into_values().collect::<Vec<_>>();
(discovered, issues, next_docs, heading_slugs)
}
fn parse_https_url(input: &str) -> Result<Url, ()> {
if input.starts_with("https://") {
Url::parse(input).map_err(|_| ())
} else {
Err(())
}
}
fn classify_destination(destination: &str) -> ReferenceKind {
let trimmed = destination.trim();
if trimmed.starts_with("spec://")
|| trimmed.starts_with("impl://")
|| trimmed.starts_with("scratch://")
{
return ReferenceKind::SpecHandle;
}
if trimmed.starts_with("http://") {
return ReferenceKind::UnsupportedScheme;
}
if trimmed.starts_with('#') {
return ReferenceKind::FragmentOnly;
}
if trimmed.starts_with("https://") {
return ReferenceKind::HttpsUrl;
}
if trimmed.contains('\\') {
return ReferenceKind::UnsupportedScheme;
}
if looks_like_windows_drive_path(trimmed) {
return ReferenceKind::UnsupportedScheme;
}
if trimmed.contains("://") {
return ReferenceKind::UnsupportedScheme;
}
ReferenceKind::WorkspacePath
}
fn looks_like_windows_drive_path(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.len() >= 2 && bytes[1] == b':' {
let c0 = bytes[0] as char;
return c0.is_ascii_alphabetic();
}
false
}
fn path_to_forward_slashes(path: &Path) -> String {
path.to_string_lossy().replace('\\', "/")
}
fn is_markdown_path(path: &Path) -> bool {
matches!(path.extension().and_then(|e| e.to_str()), Some("md"))
}
fn split_path_and_fragment(dest: &str) -> (String, Option<String>) {
match dest.split_once('#') {
Some((before_hash, after_hash)) => {
let path_part = before_hash
.split_once('?')
.map(|(p, _)| p)
.unwrap_or(before_hash)
.to_string();
let fragment = after_hash
.split_once('?')
.map(|(f, _)| f)
.unwrap_or(after_hash)
.to_string();
(path_part, Some(fragment))
}
None => {
let path_part = dest
.split_once('?')
.map(|(p, _)| p)
.unwrap_or(dest)
.to_string();
(path_part, None)
}
}
}
fn resolve_markdown_destination_path(
source: &ResolvedDocument,
dest: &str,
workspace: &WorkspacePaths,
) -> Result<PathBuf, String> {
let dest = dest.trim();
if dest.is_empty() {
return Err("empty path destination".into());
}
if dest.starts_with('/') {
return Err("absolute paths are not supported in markdown destinations".into());
}
let candidate = Path::new(dest);
let (path, base_dir) = match source {
ResolvedDocument::File { base_dir, .. } => (candidate.to_path_buf(), base_dir.as_path()),
ResolvedDocument::Url { .. } => {
return Err(
"filesystem destinations cannot be resolved relative to a URL source".into(),
);
}
};
let workspace_root =
fs::canonicalize(workspace.root()).unwrap_or_else(|_| workspace.root().to_path_buf());
let base_dir = fs::canonicalize(base_dir).unwrap_or_else(|_| base_dir.to_path_buf());
let resolved = if path.is_absolute() {
path
} else {
base_dir.join(path)
};
if let Ok(canonical) = fs::canonicalize(&resolved) {
if !canonical.starts_with(&workspace_root) {
return Err(format!(
"filesystem destination escapes workspace: {}",
path_to_forward_slashes(&canonical)
));
}
return Ok(canonical);
}
let normalized = normalize_path_lexical(&resolved);
if !normalized.starts_with(&workspace_root) {
return Err(format!(
"filesystem destination escapes workspace: {}",
path_to_forward_slashes(&normalized)
));
}
Ok(normalized)
}
fn normalize_path_lexical(path: &Path) -> PathBuf {
use std::ffi::OsString;
use std::path::Component;
let mut anchored = PathBuf::new();
let mut parts: Vec<OsString> = Vec::new();
let mut is_anchored = false;
for component in path.components() {
match component {
component @ (Component::Prefix(_) | Component::RootDir) => {
anchored.push(component.as_os_str());
is_anchored = true;
}
Component::CurDir => {}
Component::ParentDir => {
if parts.pop().is_none() && !is_anchored {
parts.push(OsString::from(".."));
}
}
Component::Normal(s) => parts.push(s.to_os_string()),
}
}
for part in parts {
anchored.push(part);
}
anchored
}
fn collect_definitions(root: &markdown::mdast::Node) -> BTreeMap<String, String> {
use markdown::mdast::Node;
let mut defs = BTreeMap::new();
fn walk(node: &Node, defs: &mut BTreeMap<String, String>) {
match node {
Node::Definition(def) => {
defs.insert(def.identifier.clone(), def.url.clone());
}
_ => {
if let Some(children) = node.children() {
for child in children {
walk(child, defs);
}
}
}
}
}
walk(root, &mut defs);
defs
}
struct ExtractedDestination {
destination: String,
range: Option<SourceRange>,
unresolved_reference_identifier: Option<String>,
}
fn extract_destinations(
root: &markdown::mdast::Node,
defs: &BTreeMap<String, String>,
) -> Vec<ExtractedDestination> {
use markdown::mdast::{Node, ReferenceKind};
let mut out: Vec<ExtractedDestination> = Vec::new();
fn pos_to_range(pos: &Option<markdown::unist::Position>) -> Option<SourceRange> {
let pos = pos.as_ref()?;
Some(SourceRange {
start: SourcePoint {
line: pos.start.line as usize,
column: pos.start.column as usize,
},
end: SourcePoint {
line: pos.end.line as usize,
column: pos.end.column as usize,
},
})
}
fn walk(node: &Node, defs: &BTreeMap<String, String>, out: &mut Vec<ExtractedDestination>) {
match node {
Node::Link(link) => {
out.push(ExtractedDestination {
destination: link.url.clone(),
range: pos_to_range(&link.position),
unresolved_reference_identifier: None,
});
}
Node::LinkReference(lr) => {
let dest = match lr.reference_kind {
ReferenceKind::Full | ReferenceKind::Collapsed | ReferenceKind::Shortcut => {
defs.get(&lr.identifier).cloned()
}
};
match dest {
Some(url) => out.push(ExtractedDestination {
destination: url,
range: pos_to_range(&lr.position),
unresolved_reference_identifier: None,
}),
None => out.push(ExtractedDestination {
destination: String::new(),
range: pos_to_range(&lr.position),
unresolved_reference_identifier: Some(lr.identifier.clone()),
}),
}
}
Node::Image(_) | Node::ImageReference(_) => {
}
_ => {
if let Some(children) = node.children() {
for child in children {
walk(child, defs, out);
}
}
}
}
}
walk(root, defs, &mut out);
out
}
fn collect_heading_slugs(
root: &markdown::mdast::Node,
issues: &mut Vec<ReferenceValidationIssue>,
document_id: &str,
) -> BTreeSet<String> {
use markdown::mdast::Node;
let mut slugs = BTreeSet::new();
let mut occurrences = BTreeMap::<String, usize>::new();
fn plain_text(node: &Node, out: &mut String) {
use markdown::mdast::Node;
match node {
Node::Text(t) => out.push_str(&t.value),
Node::InlineCode(c) => out.push_str(&c.value),
Node::Link(l) => {
for child in &l.children {
plain_text(child, out);
}
}
Node::LinkReference(lr) => {
for child in &lr.children {
plain_text(child, out);
}
}
Node::Emphasis(e) => {
for child in &e.children {
plain_text(child, out);
}
}
Node::Strong(s) => {
for child in &s.children {
plain_text(child, out);
}
}
Node::Delete(d) => {
for child in &d.children {
plain_text(child, out);
}
}
_ => {
if let Some(children) = node.children() {
for child in children {
plain_text(child, out);
}
}
}
}
}
fn walk(
node: &Node,
slugs: &mut BTreeSet<String>,
occurrences: &mut BTreeMap<String, usize>,
issues: &mut Vec<ReferenceValidationIssue>,
document_id: &str,
) {
match node {
Node::Heading(h) => {
let mut title = String::new();
for child in &h.children {
plain_text(child, &mut title);
}
match slugify_heading(&title) {
Some(base) => {
let count = occurrences.entry(base.clone()).or_insert(0);
let slug = if *count == 0 {
base.clone()
} else {
format!("{base}-{count}")
};
*count += 1;
slugs.insert(slug);
}
None => {
issues.push(ReferenceValidationIssue {
kind: ReferenceIssueKind::InvalidFragment,
severity: IssueSeverity::Diagnostic,
message: format!("heading produces empty slug (title: {title:?})"),
source: ReferenceSource {
document: document_id.to_string(),
range: None,
},
destination: None,
});
}
}
}
_ => {
if let Some(children) = node.children() {
for child in children {
walk(child, slugs, occurrences, issues, document_id);
}
}
}
}
}
walk(root, &mut slugs, &mut occurrences, issues, document_id);
slugs
}
fn slugify_heading(title: &str) -> Option<String> {
let normalized = title.nfkd().collect::<String>();
let lowered = normalized.to_lowercase();
let mut filtered = String::new();
for ch in lowered.chars() {
let mapped = if ch.is_whitespace() { ' ' } else { ch };
if mapped.is_alphabetic() || mapped.is_numeric() || mapped == ' ' || mapped == '-' {
filtered.push(mapped);
}
}
let mut hyphenated = String::new();
let mut in_space = false;
for ch in filtered.chars() {
if ch == ' ' {
if !in_space {
hyphenated.push('-');
in_space = true;
}
} else {
hyphenated.push(ch);
in_space = false;
}
}
let mut cleaned = String::new();
let mut prev_hyphen = false;
for ch in hyphenated.chars() {
if ch == '-' {
if !prev_hyphen {
cleaned.push('-');
prev_hyphen = true;
}
} else {
cleaned.push(ch);
prev_hyphen = false;
}
}
let cleaned = cleaned.trim_matches('-').to_string();
if cleaned.is_empty() {
None
} else {
Some(cleaned)
}
}